rework the verifier to prepare for loop cutting
[ajla.git] / ipret.c
blobe1383f1fbf6ef52e88abd275d6b11fcdeffcb654
1 /*
2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
9 * version.
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #include "ajla.h"
21 #ifndef FILE_OMIT
23 #include "mem_al.h"
24 #include "data.h"
25 #include "array.h"
26 #include "code-op.h"
27 #include "funct.h"
28 #include "arithm-b.h"
29 #include "arithm-i.h"
30 #include "arithm-r.h"
31 #include "tick.h"
32 #include "task.h"
33 #include "ipfn.h"
34 #include "ipio.h"
35 #include "util.h"
36 #include "os.h"
37 #include "codegen.h"
39 #include "ipret.h"
41 #if defined(HAVE_COMPUTED_GOTO) && !defined(DEBUG_TRACE)
42 #define COMPUTED_GOTO
43 /*#define COMPUTED_GOTO_RELATIVE*/
44 #endif
46 #if defined(C_LITTLE_ENDIAN)
47 #define get_lo(p) (((const unsigned char *)(p))[0])
48 #elif defined(C_BIG_ENDIAN)
49 #define get_lo(p) (((const unsigned char *)(p))[1])
50 #else
51 #define get_lo(p) (*(p) & 0xff)
52 #endif
54 #if defined(C_LITTLE_ENDIAN)
55 #define get_hi(p) (((const unsigned char *)(p))[1])
56 #elif defined(C_BIG_ENDIAN)
57 #define get_hi(p) (((const unsigned char *)(p))[0])
58 #else
59 #define get_hi(p) (*(p) >> 8)
60 #endif
62 #define ADVANCE_IP(n) (ip += (n))
65 #define op_add(type, utype, op1, op2) op1 + op2
66 #define op_subtract(type, utype, op1, op2) op1 - op2
67 #define op_multiply(type, utype, op1, op2) op1 * op2
68 #define op_divide(type, utype, op1, op2) op1 / op2
69 /* EMX has a bug - fmod doesn't return NaN */
70 #if defined(_MSC_VER)
71 #define op_modulo(type, utype, op1, op2) (!isnan_any(type, op1, op2) && cat(isfinite_,type)(op1) && !cat(isfinite_,type)(op2) ? op1 :\
72 !isnan_any(type, op1, op2) && op1 == 0 && op2 != 0 ? op1 :\
73 cat(mathfunc_,type)(fmod)(op1, op2))
74 #elif defined(HAVE_BUGGY_FMOD)
75 #define op_modulo(type, utype, op1, op2) (op2 == 0 ? 0./0. : cat(mathfunc_,type)(fmod)(op1, op2))
76 #else
77 #define op_modulo(type, utype, op1, op2) cat(mathfunc_,type)(fmod)(op1, op2)
78 #endif
79 #define op_atan2(type, utype, op1, op2) cat(mathfunc_,type)(atan2)(op1, op2)
80 #define op_and(type, utype, op1, op2) op1 & op2
81 #define op_or(type, utype, op1, op2) op1 | op2
82 #define op_xor(type, utype, op1, op2) op1 ^ op2
83 #define op_shl(type, utype, op1, op2) op1 << (op2 & (sizeof(utype) * 8 - 1))
84 #define op_shr(type, utype, op1, op2) \
85 RIGHT_SHIFT_KEEPS_SIGN || (type)op1 >= 0 ? \
86 (utype)((type)op1 >> (op2 & (sizeof(utype) * 8 - 1))) \
87 : \
88 ~(~(utype)op1 >> (op2 & (sizeof(utype) * 8 - 1)))
89 #define op_ushr(type, utype, op1, op2) op1 >> (op2 & (sizeof(utype) * 8 - 1))
90 #define op_equal(type, utype, op1, op2) op1 == op2
91 #define op_not_equal(type, utype, op1, op2) op1 != op2
92 #define op_less(type, utype, op1, op2) (type)op1 < (type)op2
93 #define op_less_equal(type, utype, op1, op2) (type)op1 <= (type)op2
94 #define op_greater(type, utype, op1, op2) (type)op1 > (type)op2
95 #define op_greater_equal(type, utype, op1, op2) (type)op1 >= (type)op2
96 #define op_uless(type, utype, op1, op2) op1 < op2
97 #define op_uless_equal(type, utype, op1, op2) op1 <= op2
98 #define op_ugreater(type, utype, op1, op2) op1 > op2
99 #define op_ugreater_equal(type, utype, op1, op2) op1 >= op2
100 #define op_not(type, utype, op1) ~op1
101 #define op_neg(type, utype, op1) -op1
102 #define op_sqrt(type, utype, op1) cat(mathfunc_,type)(sqrt)(op1)
103 #define op_cbrt(type, utype, op1) cat(mathfunc_,type)(cbrt)(op1)
104 #define op_sin(type, utype, op1) cat(mathfunc_,type)(sin)(op1)
105 #define op_cos(type, utype, op1) cat(mathfunc_,type)(cos)(op1)
106 #define op_tan(type, utype, op1) cat(mathfunc_,type)(tan)(op1)
107 #define op_asin(type, utype, op1) cat(mathfunc_,type)(asin)(op1)
108 #define op_acos(type, utype, op1) cat(mathfunc_,type)(acos)(op1)
109 #define op_atan(type, utype, op1) cat(mathfunc_,type)(atan)(op1)
110 #define op_sinh(type, utype, op1) cat(mathfunc_,type)(sinh)(op1)
111 #define op_cosh(type, utype, op1) cat(mathfunc_,type)(cosh)(op1)
112 #define op_tanh(type, utype, op1) cat(mathfunc_,type)(tanh)(op1)
113 #define op_asinh(type, utype, op1) cat(mathfunc_,type)(asinh)(op1)
114 #define op_acosh(type, utype, op1) cat(mathfunc_,type)(acosh)(op1)
115 #define op_atanh(type, utype, op1) cat(mathfunc_,type)(atanh)(op1)
116 #define op_exp2(type, utype, op1) cat(mathfunc_,type)(exp2)(op1)
117 #define op_exp(type, utype, op1) cat(mathfunc_,type)(exp)(op1)
118 #define op_exp10(type, utype, op1) cat(mathfunc_,type)(exp10)(op1)
119 #define op_log2(type, utype, op1) cat(mathfunc_,type)(log2)(op1)
120 #define op_log(type, utype, op1) cat(mathfunc_,type)(log)(op1)
121 #define op_log10(type, utype, op1) cat(mathfunc_,type)(log10)(op1)
122 #define op_round(type, utype, op1) cat(mathfunc_,type)(rint)(op1)
123 #define op_ceil(type, utype, op1) cat(mathfunc_,type)(ceil)(op1)
124 #define op_floor(type, utype, op1) cat(mathfunc_,type)(floor)(op1)
125 #define op_trunc(type, utype, op1) cat(mathfunc_,type)(trunc)(op1)
126 #define op_fract(type, utype, op1) cat(mathfunc_,type)(fract)(op1)
127 #define op_mantissa(type, utype, op1) cat(mathfunc_,type)(mantissa)(op1)
128 #define op_exponent(type, utype, op1) cat(mathfunc_,type)(exponent)(op1)
130 #define generate_fixed_binary(type, utype, op) \
131 static ipret_inline bool cat4(FIXED_binary_,op,_,type) \
132 (const utype *op1, const utype *op2, utype *res)\
134 *(utype *)res = cat(op_,op)(type, utype, (*(const utype *)op1), (*(const utype *)op2));\
135 return true; \
138 #define generate_fixed_binary_logical(type, utype, op) \
139 static ipret_inline bool cat4(FIXED_binary_,op,_,type) \
140 (const utype *op1, const utype *op2, ajla_flat_option_t *res)\
142 *(ajla_flat_option_t *)res = cat(op_,op)(type, utype, (*(const utype *)op1), (*(const utype *)op2));\
143 return true; \
146 #define generate_fixed_unary(type, utype, op) \
147 static ipret_inline void cat4(FIXED_unary_,op,_,type) \
148 (const utype *op1, utype *res) \
150 *(utype *)res = cat(op_,op)(type, utype, (*(utype *)op1)); \
153 #define generate_fixed_ldc(type, utype, sz, bits) \
154 static ipret_inline ip_t cat(fixed_ldc_,type) \
155 (utype *res, const code_t *ip, bool small)\
157 if (small && sz > 2) { \
158 *res = (utype)(int16_t)ip[0]; \
159 return 1; \
161 *res = (utype)cat(get_unaligned_,bits)(ip); \
162 return (sz + 1) / 2; \
165 #define generate_fixed_functions(n, type, utype, sz, bits) \
166 generate_fixed_binary(type, utype, add) \
167 generate_fixed_binary(type, utype, subtract) \
168 generate_fixed_binary(type, utype, multiply) \
169 generate_fixed_binary(type, utype, and) \
170 generate_fixed_binary(type, utype, or) \
171 generate_fixed_binary(type, utype, xor) \
172 generate_fixed_binary(type, utype, shl) \
173 generate_fixed_binary(type, utype, shr) \
174 generate_fixed_binary(type, utype, ushr) \
175 generate_fixed_binary_logical(type, utype, equal) \
176 generate_fixed_binary_logical(type, utype, not_equal) \
177 generate_fixed_binary_logical(type, utype, less) \
178 generate_fixed_binary_logical(type, utype, less_equal) \
179 generate_fixed_binary_logical(type, utype, greater) \
180 generate_fixed_binary_logical(type, utype, greater_equal) \
181 generate_fixed_binary_logical(type, utype, uless) \
182 generate_fixed_binary_logical(type, utype, uless_equal) \
183 generate_fixed_binary_logical(type, utype, ugreater) \
184 generate_fixed_binary_logical(type, utype, ugreater_equal) \
185 generate_fixed_unary(type, utype, not) \
186 generate_fixed_unary(type, utype, neg) \
187 generate_fixed_ldc(type, utype, sz, bits)
188 for_all_fixed(generate_fixed_functions)
189 #undef generate_fixed_functions
192 #define generate_int_binary(type, utype, op, operator) \
193 static ipret_inline bool \
194 cat4(INT_binary_,op,_,type)(const void *op1, const void *op2, void *res)\
196 *cast_ptr(type *, res) = \
197 *cast_ptr(const type *, op1) operator \
198 *cast_ptr(const type *, op2); \
199 return true; \
202 #define generate_int_binary_logical(type, utype, op, operator) \
203 static ipret_inline bool \
204 cat4(INT_binary_,op,_,type)(const void *op1, const void *op2, ajla_flat_option_t *res)\
206 *res = *cast_ptr(const type *, op1) operator \
207 *cast_ptr(const type *, op2); \
208 return true; \
211 #define generate_int_ldc(type, utype, bits) \
212 static ipret_inline ip_t cat(int_ldc_,type) \
213 (type *res, const code_t *ip, bool small)\
215 return cat(fixed_ldc_,type)(cast_ptr(utype *, res), ip, small); \
219 #define generate_int_functions(typeid, type, utype, sz, bits) \
220 generate_int_binary(type, utype, and, &) \
221 generate_int_binary(type, utype, or, |) \
222 generate_int_binary(type, utype, xor, ^) \
223 generate_int_binary_logical(type, utype, equal, ==) \
224 generate_int_binary_logical(type, utype, not_equal, !=) \
225 generate_int_binary_logical(type, utype, less, <) \
226 generate_int_binary_logical(type, utype, less_equal, <=) \
227 generate_int_binary_logical(type, utype, greater, >) \
228 generate_int_binary_logical(type, utype, greater_equal, >=) \
229 generate_int_ldc(type, utype, bits)
230 for_all_int(generate_int_functions, for_all_empty)
231 #undef generate_int_binary_functions
234 #if defined(use_is_macros)
235 #ifdef HAVE_REAL_GNUC
236 #define isnan_any(type, a, b) (unlikely(isunordered(b, a)))
237 #else
238 #define isnan_any(type, a, b) (unlikely(isunordered(a, b)))
239 #endif
240 #else
241 #define isnan_any(type, a, b) (unlikely(cat(isnan_,type)(a)) || unlikely(cat(isnan_,type)(b)))
242 #endif
244 #if REAL_MASK & 0x1
245 static attr_always_inline bool do_nextafter_real16_t(real16_t attr_unused x, int attr_unused dir, real16_t attr_unused *res)
247 return false;
249 #endif
251 #if REAL_MASK & 0x2
252 static attr_always_inline bool do_nextafter_real32_t(real32_t attr_unused x, int attr_unused dir, real32_t attr_unused *res)
254 #ifdef HAVE_NEXTAFTERF
255 *res = nextafterf(x, HUGE_VALF * dir);
256 return true;
257 #else
258 return false;
259 #endif
261 #endif
263 #if REAL_MASK & 0x4
264 static attr_always_inline bool do_nextafter_real64_t(real64_t attr_unused x, int attr_unused dir, real64_t attr_unused *res)
266 #ifdef HAVE_NEXTAFTER
267 *res = nextafter(x, HUGE_VAL * dir);
268 return true;
269 #else
270 return false;
271 #endif
273 #endif
275 #if REAL_MASK & 0x8
276 static attr_always_inline bool do_nextafter_real80_t(real80_t attr_unused x, int dir, real80_t attr_unused *res)
278 #ifdef HAVE_NEXTAFTERL
279 *res = nextafterl(x, HUGE_VALL * dir);
280 return true;
281 #else
282 return false;
283 #endif
285 #endif
287 #if REAL_MASK & 0x10
288 static attr_always_inline bool do_nextafter_real128_t(real128_t attr_unused x, int dir, real128_t attr_unused *res)
290 #ifndef HAVE_NATIVE_FLOAT128
291 #ifdef HAVE_NEXTAFTERL
292 *res = nextafterl(x, HUGE_VALL * dir);
293 return true;
294 #else
295 return false;
296 #endif
297 #else
298 *res = nextafterq(x, HUGE_VAL * dir);
299 return true;
300 #endif
302 #endif
304 #define generate_real_binary(type, ntype, pack, unpack, op) \
305 static ipret_inline bool cat4(REAL_binary_,op,_,type) \
306 (const type *op1, const type *op2, type *res)\
308 *res = pack(cat(op_,op)(ntype, ntype, (unpack(*op1)), (unpack(*op2))));\
309 return true; \
312 #define generate_real_binary_logical(type, ntype, pack, unpack, op) \
313 static ipret_inline bool cat4(REAL_binary_,op,_,type) \
314 (const type *op1, const type *op2, ajla_flat_option_t *res)\
316 ntype o1 = unpack(*op1); \
317 ntype o2 = unpack(*op2); \
318 if (isnan_any(ntype, o1, o2)) \
319 return false; \
320 *res = cat(op_real_,op)(ntype, ntype, o1, o2); \
321 return true; \
324 #define generate_real_unary(n, type, ntype, pack, unpack, op, op_n) \
325 static ipret_inline void cat4(REAL_unary_,op,_,type) \
326 (const type *op1, type *res) \
328 if (!n && REAL16_T_IS_UINT16_T) { \
329 if (!op_n) { \
330 *(uint16_t *)res = *(uint16_t *)op1 ^ 0x8000U; \
331 return; \
334 *res = pack(cat(op_,op)(type, type, (unpack(*op1)))); \
337 /* EMX has a bug - modf(infinity) return NaN instead of 0. */
338 #ifdef HAVE_BUGGY_MODF
339 #define need_modf_hack true
340 #else
341 #define need_modf_hack false
342 #endif
344 #define generate_real_fns(n, type, ntype, pack, unpack) \
345 static ipret_inline bool cat(REAL_binary_power_,type) \
346 (const type *op1, const type *op2, type *res)\
348 ntype o1 = unpack(*op1); \
349 ntype o2 = unpack(*op2); \
350 ntype r; \
351 if (unlikely(isnan_any(ntype, o1, o2))) \
352 return false; \
353 r = cat(mathfunc_,type)(pow)(o1, o2); \
354 *res = pack(r); \
355 return true; \
357 static ipret_inline bool cat(REAL_binary_ldexp_,type) \
358 (const type *op1, const type *op2, type *res)\
360 ntype m; \
361 ntype o1 = unpack(*op1); \
362 ntype o2 = unpack(*op2); \
363 if (unlikely(isnan_any(ntype, o1, o2))) \
364 return false; \
365 if (likely(o2 >= (ntype)sign_bit(int)) && likely(o2 <= (ntype)signed_maximum(int)) && likely(o2 == (int)o2)) {\
366 *res = pack(cat(mathfunc_,type)(ldexp)(o1, (int)o2)); \
367 } else { \
368 m = cat(mathfunc_,type)(exp2)(o2); \
369 m *= o1; \
370 *res = pack(m); \
372 return true; \
374 static ipret_inline void cat(REAL_unary_fract_,type) \
375 (const type *op1, type *res) \
377 ntype m = unpack(*op1); \
378 union { \
379 ntype i; \
380 float f; \
381 } u; \
382 if (need_modf_hack) { \
383 if (likely(!cat(isnan_,ntype)(m)) && unlikely(!cat(isfinite_,ntype)(m))) {\
384 *res = pack(m >= 0 ? 0. : -0.); \
385 return; \
388 *res = pack(cat(mathfunc_,type)(modf)(m, (void *)&u)); \
390 static ipret_inline void cat(REAL_unary_mantissa_,type) \
391 (const type *op1, type *res) \
393 int i; \
394 *res = pack(cat(mathfunc_,type)(frexp)(unpack(*op1), &i)); \
396 static ipret_inline void cat(REAL_unary_exponent_,type) \
397 (const type *op1, type *res) \
399 int i; \
400 ntype m = cat(mathfunc_,type)(frexp)(unpack(*op1), &i); \
401 if (unlikely(cat(isnan_,ntype)(m))) { \
402 *res = pack(m); \
403 return; \
405 if (unlikely(!cat(isfinite_,ntype)(m))) { \
406 *res = pack((ntype)0.); \
407 return; \
409 *res = pack((ntype)i); \
411 static ipret_inline type cat(REAL_unary_next_prev_number_,type) \
412 (type op1, int dir) \
414 int ex, bit; \
415 volatile ntype m, mm, n1; \
416 volatile type res, o; \
417 if (unlikely(cat(isnan_,type)(op1))) \
418 return op1; \
419 n1 = unpack(op1); \
420 if (unlikely(!cat(isfinite_,type)(op1))) { \
421 if ((n1 >= 0) == (dir >= 0)) \
422 return op1; \
423 m = cat(mathfunc_,ntype)(ldexp)(1, cat(bits_,type)) - 1;\
424 while (1) { \
425 mm = m * 2; \
426 res = pack(mm); \
427 if (unlikely(!cat(isfinite_,type)(res))) \
428 break; \
429 m = mm; \
431 return pack(m * -dir); \
433 if (unlikely(!n1)) { \
434 res = pack(1); \
435 o = pack(1); \
436 while (1) { \
437 o = pack(unpack(o) * 0.5); \
438 m = unpack(o); \
439 if (m == 0) \
440 break; \
441 res = o; \
443 return pack(unpack(res) * dir); \
445 m = cat(mathfunc_,type)(frexp)(n1, &ex); \
446 bit = cat(bits_,type) + 1; \
447 again: \
448 mm = m + cat(mathfunc_,ntype)(ldexp)(dir, -bit); \
449 o = pack(cat(mathfunc_,ntype)(ldexp)(mm, ex)); \
450 res = o; \
451 if (unpack(res) == n1) { \
452 bit--; \
453 goto again; \
455 return res; \
457 static ipret_inline void cat(REAL_unary_next_number_,type) \
458 (const type *op1, type *res) \
460 if (cat(do_nextafter_,type)(*op1, 1, res)) \
461 return; \
462 *res = cat(REAL_unary_next_prev_number_,type)(*op1, 1); \
464 static ipret_inline void cat(REAL_unary_prev_number_,type) \
465 (const type *op1, type *res) \
467 if (cat(do_nextafter_,type)(*op1, -1, res)) \
468 return; \
469 *res = cat(REAL_unary_next_prev_number_,type)(*op1, -1); \
472 #define generate_real_unary_logical(n, type, ntype, pack, unpack, op, op_n)\
473 static ipret_inline void cat4(REAL_unary_,op,_,type) \
474 (const type *op1, ajla_flat_option_t *res)\
476 *res = cat(isnan_,type)(*op1); \
479 #define op_real_equal op_equal
480 #if defined(use_is_macros) && defined(ARCH_X86)
481 #define op_real_not_equal(type, utype, op1, op2) islessgreater(op1, op2)
482 #else
483 #define op_real_not_equal op_not_equal
484 #endif
485 #if defined(use_is_macros)
486 #define op_real_less(type, utype, op1, op2) isless(op1, op2)
487 #else
488 #define op_real_less op_less
489 #endif
490 #if defined(use_is_macros)
491 #define op_real_less_equal(type, utype, op1, op2) islessequal(op1, op2)
492 #else
493 #define op_real_less_equal op_less_equal
494 #endif
495 #if defined(use_is_macros)
496 #define op_real_greater(type, utype, op1, op2) isgreater(op1, op2)
497 #else
498 #define op_real_greater op_greater
499 #endif
500 #if defined(use_is_macros)
501 #define op_real_greater_equal(type, utype, op1, op2) isgreaterequal(op1, op2)
502 #else
503 #define op_real_greater_equal op_greater_equal
504 #endif
506 #define generate_real_ldc(n, rtype, ntype, pack, unpack) \
507 static ipret_inline size_t cat(fixed_ldc_,rtype) \
508 (rtype *res, const code_t *ip, bool attr_unused shrt)\
510 memcpy(res, ip, sizeof(rtype)); \
511 return round_up(sizeof(rtype), sizeof(code_t)) / sizeof(code_t);\
514 #define generate_real_int(type, ntype, pack, unpack) \
515 static ipret_inline bool cat(REAL_unary_to_int_,type)(const type *val, int_default_t *r)\
517 ntype val1; \
518 val1 = unpack(*val); \
519 if (likely(val1 > (ntype)sign_bit(int_default_t)) && likely(val1 < (ntype)signed_maximum(int_default_t))) {\
520 *r = val1; \
521 return true; \
523 return false; \
525 static ipret_inline void cat(REAL_unary_from_int_,type)(const int_default_t *val, type *r)\
527 *r = pack(*val); \
530 #define generate_real_functions(n, type, ntype, pack, unpack) \
531 generate_real_binary(type, ntype, pack, unpack, add) \
532 generate_real_binary(type, ntype, pack, unpack, subtract) \
533 generate_real_binary(type, ntype, pack, unpack, multiply) \
534 generate_real_binary(type, ntype, pack, unpack, divide) \
535 generate_real_binary(type, ntype, pack, unpack, modulo) \
536 generate_real_binary(type, ntype, pack, unpack, atan2) \
537 generate_real_binary_logical(type, ntype, pack, unpack, equal) \
538 generate_real_binary_logical(type, ntype, pack, unpack, not_equal) \
539 generate_real_binary_logical(type, ntype, pack, unpack, less) \
540 generate_real_binary_logical(type, ntype, pack, unpack, less_equal) \
541 generate_real_binary_logical(type, ntype, pack, unpack, greater) \
542 generate_real_binary_logical(type, ntype, pack, unpack, greater_equal) \
543 generate_real_unary(n, type, ntype, pack, unpack, neg, 0) \
544 generate_real_unary(n, type, ntype, pack, unpack, sqrt, 1) \
545 generate_real_unary(n, type, ntype, pack, unpack, cbrt, 1) \
546 generate_real_unary(n, type, ntype, pack, unpack, sin, 1) \
547 generate_real_unary(n, type, ntype, pack, unpack, cos, 1) \
548 generate_real_unary(n, type, ntype, pack, unpack, tan, 1) \
549 generate_real_unary(n, type, ntype, pack, unpack, asin, 1) \
550 generate_real_unary(n, type, ntype, pack, unpack, acos, 1) \
551 generate_real_unary(n, type, ntype, pack, unpack, atan, 1) \
552 generate_real_unary(n, type, ntype, pack, unpack, sinh, 1) \
553 generate_real_unary(n, type, ntype, pack, unpack, cosh, 1) \
554 generate_real_unary(n, type, ntype, pack, unpack, tanh, 1) \
555 generate_real_unary(n, type, ntype, pack, unpack, asinh, 1) \
556 generate_real_unary(n, type, ntype, pack, unpack, acosh, 1) \
557 generate_real_unary(n, type, ntype, pack, unpack, atanh, 1) \
558 generate_real_unary(n, type, ntype, pack, unpack, exp2, 1) \
559 generate_real_unary(n, type, ntype, pack, unpack, exp, 1) \
560 generate_real_unary(n, type, ntype, pack, unpack, exp10, 1) \
561 generate_real_unary(n, type, ntype, pack, unpack, log2, 1) \
562 generate_real_unary(n, type, ntype, pack, unpack, log, 1) \
563 generate_real_unary(n, type, ntype, pack, unpack, log10, 1) \
564 generate_real_unary(n, type, ntype, pack, unpack, round, 1) \
565 generate_real_unary(n, type, ntype, pack, unpack, ceil, 1) \
566 generate_real_unary(n, type, ntype, pack, unpack, floor, 1) \
567 generate_real_unary(n, type, ntype, pack, unpack, trunc, 1) \
568 generate_real_fns(n, type, ntype, pack, unpack) \
569 generate_real_int(type, ntype, pack, unpack) \
570 generate_real_unary_logical(n, type, ntype, pack, unpack, is_exception, 0)\
571 generate_real_ldc(n, type, ntype, pack, unpack)
573 for_all_real(generate_real_functions, for_all_empty)
574 #undef generate_real_functions
577 static inline frame_s *frame_build(frame_s *fp, struct data *function, ajla_error_t *mayfail)
579 frame_t new_frame_slots = da(function,function)->frame_slots;
580 if (likely(new_frame_slots <= get_frame(fp)->available_slots)) {
581 frame_s *new_fp = cast_ptr(frame_s *, cast_ptr(char *, fp) - new_frame_slots * slot_size);
582 get_frame(new_fp)->available_slots = get_frame(fp)->available_slots - new_frame_slots;
583 get_frame(new_fp)->function = function;
584 return new_fp;
585 } else {
586 return stack_expand(fp, function, mayfail);
591 #define ipret_checkpoint_forced \
592 do { \
593 void *ex_ = ipret_tick(fp, ip); \
594 RELOAD_EX_POSITION(ex_); \
595 } while (0)
598 #define OPCODE_ARG_MODE(opcode) ((opcode) + ARG_MODE * OPCODE_MODE_MULT)
600 #define EMIT_FUNCTIONS
601 #include "ipret.inc"
603 void attr_hot_fastcall run(frame_s *fp_, ip_t ip_)
605 ajla_error_t ajla_error;
606 tick_stamp_t ts;
607 #if defined(DEBUG) && !defined(COMPUTED_GOTO)
608 const code_t *last_stack[20];
609 #endif
611 register frame_s *fp
612 #if ((defined(INLINE_ASM_GCC_I386) && !defined(__PIC__)) || defined(INLINE_ASM_GCC_X32)) && defined(__OPTIMIZE__) && defined(HAVE_REAL_GNUC)
614 * GCC usually uses ebp for the variable fp. It is bad choice because
615 * ebp can't be used as a base register without immediate offset. So,
616 * the assembler adds offset 0 to every instruction using ebp as base.
617 * Doing two additions and one shift in one instruction is too much and
618 * it causes performance drop on both Intel and AMD architectures.
620 __asm__("ebx")
621 #endif
622 #if defined(INLINE_ASM_GCC_X86_64) && defined(__OPTIMIZE__) && defined(HAVE_REAL_GNUC)
623 /*__asm__("rbx")*/
624 #endif
625 #if defined(INLINE_ASM_GCC_ARM_THUMB2) && defined(__OPTIMIZE__) && defined(HAVE_REAL_GNUC)
626 /* peg this to a register in lower bank to reduce code size and improve performance */
627 __asm__("r6")
628 #endif
630 register const code_t *ip
631 #if defined(INLINE_ASM_GCC_ARM_THUMB2) && defined(__OPTIMIZE__) && defined(HAVE_REAL_GNUC) && 0
632 /* don't use it for now, it causes too much register pressure */
633 __asm__("r5")
634 #endif
636 code_t code;
638 #ifdef COMPUTED_GOTO
639 const void *next_label;
640 #ifdef COMPUTED_GOTO_RELATIVE
641 static const int dispatch[OPCODE_MODE_MULT * ARG_MODE_N - (OPCODE_MODE_MULT - OPCODE_N)] = {
642 #define DEFINE_OPCODE_START_LBL(opcode, lbl) \
643 [OPCODE_ARG_MODE(opcode)] = (const char *)&&cat(label_,lbl) - (const char *)&&label_unknown,
644 #else
645 static const void *dispatch[OPCODE_MODE_MULT * ARG_MODE_N - (OPCODE_MODE_MULT - OPCODE_N)] = {
646 #define DEFINE_OPCODE_START_LBL(opcode, lbl) \
647 [OPCODE_ARG_MODE(opcode)] = &&cat(label_,lbl),
648 #endif
649 #include "ipret.inc"
650 #ifdef COMPUTED_GOTO_RELATIVE
652 #else
654 #endif
655 #endif
657 #if defined(DEBUG) && !defined(COMPUTED_GOTO)
658 memset(last_stack, 0, sizeof last_stack);
659 #endif
661 fp = fp_;
662 ip = &da(get_frame(fp)->function,function)->code[ip_];
663 tick_start(&ts);
665 #define RELOAD_EX_POSITION(ex) \
666 do { \
667 if ((ex) != POINTER_FOLLOW_THUNK_EXIT) { \
668 ajla_assert((ex) != POINTER_FOLLOW_THUNK_RETRY && (ex) != POINTER_FOLLOW_THUNK_EXCEPTION && (ex) != POINTER_FOLLOW_THUNK_GO, (file_line, "RELOAD_EX_POSITION: invalid pointer: %p", (ex)));\
669 fp = cast_ptr(struct execution_control *, (ex))->current_frame;\
670 ip = da(get_frame(fp)->function,function)->code + cast_ptr(struct execution_control *, (ex))->current_ip;\
671 tick_start(&ts); \
672 goto next_code; \
673 } else { \
674 goto exit_ipret; \
676 } while (0)
678 #ifdef COMPUTED_GOTO
680 #ifdef COMPUTED_GOTO_RELATIVE
681 #define GOTO_NEXT(opcode) \
682 code = *ip; \
683 next_label = (const char *)&&label_unknown + dispatch[code & OPCODE_MASK];\
684 ASM_PREVENT_JOIN(OPCODE_ARG_MODE(opcode)); \
685 goto *(void *)next_label;
686 #else
687 #define GOTO_NEXT(opcode) \
688 code = *ip; \
689 next_label = dispatch[code & OPCODE_MASK]; \
690 ASM_PREVENT_JOIN(OPCODE_ARG_MODE(opcode)); \
691 goto *(void *)next_label;
692 #endif
694 next_code:
695 #define ARG_MODE 0
696 GOTO_NEXT(-1);
697 #undef ARG_MODE
699 #define EMIT_CODE
700 #define START_BLOCK(declarations) { declarations
701 #define END_BLOCK() }
702 #define DEFINE_LABEL(lbl, code) \
703 lbl: do { \
704 code \
705 } while (0); \
706 GOTO_NEXT(-2);
707 #define DEFINE_OPCODE_START_LBL(opcode, lbl) \
708 cat(label_,lbl): do {
709 #define DEFINE_OPCODE_END(opcode) \
710 } while (0); \
711 GOTO_NEXT(OPCODE_ARG_MODE(opcode));
712 #include "ipret.inc"
714 #ifdef COMPUTED_GOTO_RELATIVE
715 label_unknown:
716 internal(file_line, "run: invalid opcode %04x", (int)code);
717 #endif
719 #else
720 next_code:
721 code = *ip;
722 #if defined(DEBUG) && !defined(COMPUTED_GOTO)
723 memmove(last_stack + 1, last_stack, (sizeof last_stack) - sizeof(*last_stack));
724 last_stack[0] = ip;
725 #endif
726 #ifdef DEBUG_TRACE
727 if (unlikely(load_relaxed(&trace_enabled))) {
728 struct stack_trace st;
729 const char *fn = "";
730 unsigned ln = 0;
731 stack_trace_capture(&st, fp, ip, 1);
732 if (st.trace_n >= 1) {
733 fn = st.trace[0].function_name;
734 ln = st.trace[0].line;
736 #define xip(n) (frame_ip(fp, ip) + n >= da(get_frame(fp)->function,function)->code_size ? 0xffff : ip[n])
737 trace("%-24s %-5u %-32s at %u %p %p %04x %04x %04x %04x %04x %04x %04x %04x", fn, ln, decode_opcode(code, true), frame_ip(fp, ip), fp, frame_execution_control(fp), xip(1), xip(2), xip(3), xip(4), xip(5), xip(6), xip(7), xip(8));
738 #undef xip
739 stack_trace_free(&st);
741 #endif
742 switch (code & OPCODE_MASK) {
743 #define EMIT_CODE
744 #define START_BLOCK(declarations) { declarations
745 #define END_BLOCK() }
746 #define DEFINE_LABEL(lbl, code) \
747 lbl: { \
748 code \
750 break;
751 #define DEFINE_OPCODE_START_LBL(opcode, lbl) \
752 case OPCODE_ARG_MODE(opcode): {
753 #define DEFINE_OPCODE_END(opcode) \
755 break;
756 #include "ipret.inc"
757 default:
758 #if defined(HAVE___BUILTIN_UNREACHABLE) && !defined(DEBUG)
759 __builtin_unreachable();
760 #else
762 ip_t l = ip - da(get_frame(fp)->function,function)->code;
763 ip_t x;
764 for (x = 0; x <= l; x++) {
765 code_t v = da(get_frame(fp)->function,function)->code[x];
766 const char *opc = decode_opcode(v, true);
767 char c = ' ';
768 #if defined(DEBUG) && !defined(COMPUTED_GOTO)
769 size_t lso;
770 for (lso = 0; lso < n_array_elements(last_stack); lso++)
771 if (&da(get_frame(fp)->function,function)->code[x] == last_stack[lso])
772 c = '*';
773 #endif
774 if (opc)
775 debug("%c %04x (%s)", c, v, opc);
776 else
777 debug("%c %04x", c, v);
779 internal(file_line, "run: invalid opcode %04x (mode %x, int %x, real %x, bool %x, extra %x)", code, OPCODE_MODE_MULT, OPCODE_INT_OP, OPCODE_REAL_OP, OPCODE_BOOL_OP, OPCODE_EXTRA);
781 #endif
783 goto next_code;
784 #endif
786 exit_ipret:;
790 #ifdef HAVE_CODEGEN
792 static void cg_upcall_mem_copy(void *dest, const void *src, size_t size)
794 memcpy(dest, src, size);
797 static void cg_upcall_mem_clear(void *ptr, size_t len)
799 memset(ptr, 0, len);
802 static void cg_upcall_pointer_dereference(pointer_t_upcall ptr)
804 pointer_dereference(ptr);
807 static void cg_upcall_pointer_reference_owned(pointer_t_upcall ptr)
809 pointer_reference_owned(ptr);
812 static pointer_t cg_upcall_ipret_copy_variable_to_pointer_noderef(frame_s *src_fp, uintptr_t src_slot)
814 return ipret_copy_variable_to_pointer(src_fp, src_slot, false);
817 static pointer_t cg_upcall_ipret_copy_variable_to_pointer_deref(frame_s *src_fp, uintptr_t src_slot)
819 pointer_t ptr = ipret_copy_variable_to_pointer(src_fp, src_slot, true);
820 *frame_pointer(src_fp, src_slot) = pointer_empty();
821 return ptr;
824 static pointer_t cg_upcall_flat_to_data(frame_s *fp, uintptr_t slot)
826 const struct type *type = frame_get_type_of_local(fp, slot);
827 const unsigned char *flat = frame_var(fp, slot);
828 return flat_to_data(type, flat);
831 static unsigned char *cg_upcall_data_alloc_function_reference_mayfail(uintptr_t n_curried_arguments)
833 ajla_error_t sink;
834 return cast_ptr(unsigned char *, data_alloc_function_reference_mayfail(n_curried_arguments, &sink pass_file_line));
837 static unsigned char *cg_upcall_data_alloc_record_mayfail(frame_s *fp, uintptr_t slot)
839 ajla_error_t sink;
840 const struct type *type = frame_get_type_of_local(fp, slot);
841 return cast_ptr(unsigned char *, data_alloc_record_mayfail(type_def(type,record), &sink pass_file_line));
844 static unsigned char *cg_upcall_data_alloc_option_mayfail(void)
846 ajla_error_t sink;
847 return cast_ptr(unsigned char *, data_alloc(option, &sink));
850 static unsigned char *cg_upcall_data_alloc_array_flat_tag_mayfail(uintptr_t tag, int_default_t_upcall n_entries)
852 ajla_error_t sink;
853 const struct type *type = type_get_from_tag(tag);
854 return cast_ptr(unsigned char *, data_alloc_array_flat_mayfail(type, n_entries, n_entries, false, &sink pass_file_line));
857 static unsigned char *cg_upcall_data_alloc_array_flat_slot_mayfail(frame_s *fp, uintptr_t slot, int_default_t_upcall n_entries)
859 ajla_error_t sink;
860 const struct type *type = frame_get_type_of_local(fp, slot);
861 return cast_ptr(unsigned char *, data_alloc_array_flat_mayfail(type, n_entries, n_entries, false, &sink pass_file_line));
864 static unsigned char *cg_upcall_data_alloc_array_flat_types_ptr_mayfail(frame_s *fp, uintptr_t local_type, int_default_t_upcall n_allocated, int_default_t_upcall n_used)
866 ajla_error_t sink;
867 const struct type *type = da_type(get_frame(fp)->function, local_type);
868 return cast_ptr(unsigned char *, data_alloc_array_flat_mayfail(type, n_allocated, n_used, false, &sink pass_file_line));
871 static unsigned char *cg_upcall_data_alloc_array_pointers_mayfail(int_default_t_upcall n_allocated, int_default_t_upcall n_used)
873 ajla_error_t sink;
874 return cast_ptr(unsigned char *, data_alloc_array_pointers_mayfail(n_allocated, n_used, &sink pass_file_line));
877 static pointer_t cg_upcall_array_create_flat(frame_s *fp, int_default_t_upcall length, uintptr_t content_slot)
879 array_index_t idx;
880 const struct type *content_type = frame_get_type_of_local(fp, content_slot);
881 index_from_int(&idx, length);
882 return array_create(idx, content_type, frame_var(fp, content_slot), pointer_empty());
885 static pointer_t cg_upcall_array_create_pointers(frame_s *fp, uintptr_t ip_offset, uintptr_t length_slot, pointer_t_upcall ptr)
887 array_index_t idx;
888 int_default_t length = *frame_slot(fp, length_slot, int_default_t);
889 if (unlikely(length < 0)) {
890 code_t *ip;
891 pointer_dereference(ptr);
892 ip = da(get_frame(fp)->function,function)->code + ip_offset;
893 return pointer_error(error_ajla(EC_SYNC, AJLA_ERROR_NEGATIVE_INDEX), fp, ip pass_file_line);
895 index_from_int(&idx, length);
896 return array_create(idx, NULL, NULL, ptr);
899 static pointer_t cg_upcall_array_create_sparse(int_default_t_upcall length, pointer_t_upcall ptr)
901 array_index_t idx;
902 index_from_int(&idx, length);
903 return array_create_sparse(idx, ptr);
906 static pointer_t cg_upcall_array_sub(pointer_t_upcall array, int_default_t_upcall start, int_default_t_upcall end, bool deref)
908 pointer_t res_ptr;
909 ajla_error_t err;
910 struct data *d, *s;
911 array_index_t idx_start, idx_end, idx_len, idx_array_len;
912 if (unlikely((start | end) < 0))
913 goto fail1;
914 if (unlikely(start > end))
915 goto fail1;
916 if (unlikely(pointer_is_thunk(array)))
917 goto fail1;
918 index_from_int(&idx_start, start);
919 index_from_int(&idx_end, end);
920 index_from_int(&idx_len, end - start);
921 d = pointer_get_data(array);
922 if (unlikely(da_tag(d) == DATA_TAG_array_incomplete))
923 goto fail2;
924 idx_array_len = array_len(d);
925 if (unlikely(!index_ge_index(idx_array_len, idx_end))) {
926 index_free(&idx_array_len);
927 goto fail2;
929 index_free(&idx_array_len);
930 index_free(&idx_end);
931 s = array_sub(d, idx_start, idx_len, deref, &err);
932 if (unlikely(!s)) {
933 res_ptr = pointer_error(err, NULL, 0 pass_file_line);
934 } else {
935 res_ptr = pointer_data(s);
937 return res_ptr;
938 fail2:
939 index_free(&idx_start);
940 index_free(&idx_end);
941 index_free(&idx_len);
942 fail1:
943 return pointer_empty();
946 static pointer_t cg_upcall_array_skip(pointer_t_upcall array, int_default_t_upcall start, bool deref)
948 pointer_t res_ptr;
949 ajla_error_t err;
950 struct data *d, *s;
951 array_index_t idx_start, idx_array_len;
952 if (unlikely(start < 0))
953 goto fail1;
954 if (unlikely(pointer_is_thunk(array)))
955 goto fail1;
956 d = pointer_get_data(array);
957 if (unlikely(da_tag(d) == DATA_TAG_array_incomplete))
958 goto fail1;
959 index_from_int(&idx_start, start);
960 idx_array_len = array_len(d);
961 if (unlikely(!index_ge_index(idx_array_len, idx_start))) {
962 goto fail2;
964 index_sub_int(&idx_array_len, start);
965 s = array_sub(d, idx_start, idx_array_len, deref, &err);
966 if (unlikely(!s)) {
967 res_ptr = pointer_error(err, NULL, 0 pass_file_line);
968 } else {
969 res_ptr = pointer_data(s);
971 return res_ptr;
972 fail2:
973 index_free(&idx_array_len);
974 index_free(&idx_start);
975 fail1:
976 return pointer_empty();
979 static pointer_t cg_upcall_array_join(pointer_t_upcall ptr1, pointer_t_upcall ptr2)
981 ajla_error_t err;
982 struct data *d1 = pointer_get_data(ptr1);
983 struct data *d2 = pointer_get_data(ptr2);
984 struct data *d = array_join(d1, d2, &err);
985 if (unlikely(!d))
986 return pointer_error(err, NULL, NULL pass_file_line);
987 return pointer_data(d);
990 static void *cg_upcall_ipret_io(frame_s *fp, uintptr_t ip_offset, uintptr_t code_params)
992 void *ret;
993 code_t *ip = da(get_frame(fp)->function,function)->code + ip_offset;
994 unsigned char io_code = code_params >> 24;
995 unsigned char n_outputs = code_params >> 16;
996 unsigned char n_inputs = code_params >> 8;
997 unsigned char n_params = code_params;
998 /*debug("cg_upcall_ipret_io start: %p, %u %u %u %u", ip, io_code, n_outputs, n_inputs, n_params);*/
999 ret = ipret_io(fp, ip, io_code, n_outputs, n_inputs, n_params);
1000 /*debug("cg_upcall_ipret_io end: %u %u %u %u -> %p", io_code, n_outputs, n_inputs, n_params, ret);*/
1001 return ret;
1004 static int_default_t cg_upcall_ipret_system_property(int_default_t_upcall idx)
1006 return ipret_system_property(idx);
1009 #define f(n, s, u, sz, bits) \
1010 static bool cat(INT_binary_const_,s)(const s *v1, int_default_t_upcall v2, s *r, bool (*op)(const void *, const void *, void *))\
1012 s c = v2; \
1013 return op(v1, &c, r); \
1015 for_all_int(f, for_all_empty)
1016 #undef f
1018 #define f(n, s, u, sz, bits) \
1019 static bool cat(FIXED_uto_int_,s)(const u *v1, int_default_t *r) \
1021 int_default_t ret; \
1022 ret = (int_default_t)*v1; \
1023 if (unlikely((u)ret != *v1) || unlikely(ret < 0)) \
1024 return false; \
1025 *r = ret; \
1026 return true; \
1028 static bool cat(FIXED_ufrom_int_,s)(const int_default_t *v1, u *r) \
1030 u ret; \
1031 ret = (u)*v1; \
1032 if (unlikely((int_default_t)ret != *v1) || unlikely(*v1 < 0)) \
1033 return false; \
1034 *r = ret; \
1035 return true; \
1037 for_all_fixed(f)
1038 #undef f
1040 #ifdef DEBUG_UPCALL
1041 static void cg_upcall_debug(unsigned long x1, unsigned long x2, unsigned long x3, unsigned long x4)
1043 debug("cg upcall: %lx, %lx, %lx, %lx", x1, x2, x3, x4);
1045 #endif
1047 #endif
1049 #define nf(n, t) NULL,
1051 struct cg_upcall_vector_s cg_upcall_vector = {
1053 #ifdef HAVE_CODEGEN
1054 cg_upcall_mem_copy,
1055 cg_upcall_mem_clear,
1056 cg_upcall_pointer_dereference,
1057 cg_upcall_pointer_reference_owned,
1058 cg_upcall_ipret_copy_variable_to_pointer_noderef,
1059 cg_upcall_ipret_copy_variable_to_pointer_deref,
1060 cg_upcall_flat_to_data,
1061 cg_upcall_data_alloc_function_reference_mayfail,
1062 cg_upcall_data_alloc_record_mayfail,
1063 cg_upcall_data_alloc_option_mayfail,
1064 cg_upcall_data_alloc_array_flat_tag_mayfail,
1065 cg_upcall_data_alloc_array_flat_slot_mayfail,
1066 cg_upcall_data_alloc_array_flat_types_ptr_mayfail,
1067 cg_upcall_data_alloc_array_pointers_mayfail,
1068 cg_upcall_array_create_flat,
1069 cg_upcall_array_create_pointers,
1070 cg_upcall_array_create_sparse,
1071 cg_upcall_array_sub,
1072 cg_upcall_array_skip,
1073 cg_upcall_array_join,
1074 cg_upcall_ipret_io,
1075 cg_upcall_ipret_system_property,
1076 #define f(n, s, u, sz, bits) \
1077 cat(INT_binary_const_,s),
1078 for_all_int(f, for_all_empty)
1079 #undef f
1080 cat(FIXED_binary_add_,TYPE_INT_MAX),
1081 cat(FIXED_binary_subtract_,TYPE_INT_MAX),
1082 #define f(n, s, u, sz, bits) \
1083 cat(FIXED_binary_multiply_,s),
1084 for_all_fixed(f)
1085 #undef f
1086 #define f(n, s, u, sz, bits) \
1087 cat(FIXED_binary_divide_,s),
1088 for_all_fixed(f)
1089 #undef f
1090 #define f(n, s, u, sz, bits) \
1091 cat(FIXED_binary_udivide_,s),
1092 for_all_fixed(f)
1093 #undef f
1094 #define f(n, s, u, sz, bits) \
1095 cat(FIXED_binary_modulo_,s),
1096 for_all_fixed(f)
1097 #undef f
1098 #define f(n, s, u, sz, bits) \
1099 cat(FIXED_binary_umodulo_,s),
1100 for_all_fixed(f)
1101 #undef f
1102 #define f(n, s, u, sz, bits) \
1103 cat(FIXED_binary_power_,s),
1104 for_all_fixed(f)
1105 #undef f
1106 cat(FIXED_binary_shl_,TYPE_INT_MAX),
1107 cat(FIXED_binary_shr_,TYPE_INT_MAX),
1108 cat(FIXED_binary_ushr_,TYPE_INT_MAX),
1109 cat(FIXED_binary_rol_,TYPE_INT_MAX),
1110 cat(FIXED_binary_ror_,TYPE_INT_MAX),
1111 cat(FIXED_binary_bts_,TYPE_INT_MAX),
1112 cat(FIXED_binary_btr_,TYPE_INT_MAX),
1113 cat(FIXED_binary_btc_,TYPE_INT_MAX),
1114 cat(FIXED_binary_less_,TYPE_INT_MAX),
1115 cat(FIXED_binary_less_equal_,TYPE_INT_MAX),
1116 cat(FIXED_binary_greater_,TYPE_INT_MAX),
1117 cat(FIXED_binary_greater_equal_,TYPE_INT_MAX),
1118 cat(FIXED_binary_uless_,TYPE_INT_MAX),
1119 cat(FIXED_binary_uless_equal_,TYPE_INT_MAX),
1120 cat(FIXED_binary_ugreater_,TYPE_INT_MAX),
1121 cat(FIXED_binary_ugreater_equal_,TYPE_INT_MAX),
1122 cat(FIXED_binary_bt_,TYPE_INT_MAX),
1123 cat(FIXED_unary_neg_,TYPE_INT_MAX),
1124 #define f(n, s, u, sz, bits) \
1125 cat(FIXED_unary_bswap_,s),
1126 for_all_fixed(f)
1127 #undef f
1128 #define f(n, s, u, sz, bits) \
1129 cat(FIXED_unary_brev_,s),
1130 for_all_fixed(f)
1131 #undef f
1132 #define f(n, s, u, sz, bits) \
1133 cat(FIXED_unary_bsf_,s),
1134 for_all_fixed(f)
1135 #undef f
1136 #define f(n, s, u, sz, bits) \
1137 cat(FIXED_unary_bsr_,s),
1138 for_all_fixed(f)
1139 #undef f
1140 #define f(n, s, u, sz, bits) \
1141 cat(FIXED_unary_popcnt_,s),
1142 for_all_fixed(f)
1143 #undef f
1144 #define f(n, s, u, sz, bits) \
1145 cat(FIXED_uto_int_,s),
1146 for_all_fixed(f)
1147 #undef f
1148 #define f(n, s, u, sz, bits) \
1149 cat(FIXED_ufrom_int_,s),
1150 for_all_fixed(f)
1151 #undef f
1152 cat(INT_binary_add_,TYPE_INT_MAX),
1153 cat(INT_binary_subtract_,TYPE_INT_MAX),
1154 #define f(n, s, u, sz, bits) \
1155 cat(INT_binary_multiply_,s),
1156 for_all_int(f, for_all_empty)
1157 #undef f
1158 #define f(n, s, u, sz, bits) \
1159 cat(INT_binary_divide_,s),
1160 for_all_int(f, for_all_empty)
1161 #undef f
1162 #define f(n, s, u, sz, bits) \
1163 cat(INT_binary_modulo_,s),
1164 for_all_int(f, for_all_empty)
1165 #undef f
1166 #define f(n, s, u, sz, bits) \
1167 cat(INT_binary_power_,s),
1168 for_all_int(f, for_all_empty)
1169 #undef f
1170 cat(INT_binary_shl_,TYPE_INT_MAX),
1171 cat(INT_binary_shr_,TYPE_INT_MAX),
1172 cat(INT_binary_bts_,TYPE_INT_MAX),
1173 cat(INT_binary_btr_,TYPE_INT_MAX),
1174 cat(INT_binary_btc_,TYPE_INT_MAX),
1175 cat(INT_binary_bt_,TYPE_INT_MAX),
1176 cat(INT_unary_neg_,TYPE_INT_MAX),
1177 #define f(n, s, u, sz, bits) \
1178 cat(INT_unary_bsf_,s),
1179 for_all_int(f, for_all_empty)
1180 #undef f
1181 #define f(n, s, u, sz, bits) \
1182 cat(INT_unary_bsr_,s),
1183 for_all_int(f, for_all_empty)
1184 #undef f
1185 #define f(n, s, u, sz, bits) \
1186 cat(INT_unary_popcnt_,s),
1187 for_all_int(f, for_all_empty)
1188 #undef f
1189 #define f(n, t, nt, pack, unpack) \
1190 cat(REAL_binary_add_,t),
1191 for_all_real(f, nf)
1192 #undef f
1193 #define f(n, t, nt, pack, unpack) \
1194 cat(REAL_binary_subtract_,t),
1195 for_all_real(f, nf)
1196 #undef f
1197 #define f(n, t, nt, pack, unpack) \
1198 cat(REAL_binary_multiply_,t),
1199 for_all_real(f, nf)
1200 #undef f
1201 #define f(n, t, nt, pack, unpack) \
1202 cat(REAL_binary_divide_,t),
1203 for_all_real(f, nf)
1204 #undef f
1205 #define f(n, t, nt, pack, unpack) \
1206 cat(REAL_binary_modulo_,t),
1207 for_all_real(f, nf)
1208 #undef f
1209 #define f(n, t, nt, pack, unpack) \
1210 cat(REAL_binary_power_,t),
1211 for_all_real(f, nf)
1212 #undef f
1213 #define f(n, t, nt, pack, unpack) \
1214 cat(REAL_binary_ldexp_,t),
1215 for_all_real(f, nf)
1216 #undef f
1217 #define f(n, t, nt, pack, unpack) \
1218 cat(REAL_binary_atan2_,t),
1219 for_all_real(f, nf)
1220 #undef f
1221 #define f(n, t, nt, pack, unpack) \
1222 cat(REAL_binary_equal_,t),
1223 for_all_real(f, nf)
1224 #undef f
1225 #define f(n, t, nt, pack, unpack) \
1226 cat(REAL_binary_not_equal_,t),
1227 for_all_real(f, nf)
1228 #undef f
1229 #define f(n, t, nt, pack, unpack) \
1230 cat(REAL_binary_less_,t),
1231 for_all_real(f, nf)
1232 #undef f
1233 #define f(n, t, nt, pack, unpack) \
1234 cat(REAL_binary_less_equal_,t),
1235 for_all_real(f, nf)
1236 #undef f
1237 #define f(n, t, nt, pack, unpack) \
1238 cat(REAL_binary_greater_,t),
1239 for_all_real(f, nf)
1240 #undef f
1241 #define f(n, t, nt, pack, unpack) \
1242 cat(REAL_binary_greater_equal_,t),
1243 for_all_real(f, nf)
1244 #undef f
1245 #define f(n, t, nt, pack, unpack) \
1246 cat(REAL_unary_neg_,t),
1247 for_all_real(f, nf)
1248 #undef f
1249 #define f(n, t, nt, pack, unpack) \
1250 cat(REAL_unary_sqrt_,t),
1251 for_all_real(f, nf)
1252 #undef f
1253 #define f(n, t, nt, pack, unpack) \
1254 cat(REAL_unary_cbrt_,t),
1255 for_all_real(f, nf)
1256 #undef f
1257 #define f(n, t, nt, pack, unpack) \
1258 cat(REAL_unary_sin_,t),
1259 for_all_real(f, nf)
1260 #undef f
1261 #define f(n, t, nt, pack, unpack) \
1262 cat(REAL_unary_cos_,t),
1263 for_all_real(f, nf)
1264 #undef f
1265 #define f(n, t, nt, pack, unpack) \
1266 cat(REAL_unary_tan_,t),
1267 for_all_real(f, nf)
1268 #undef f
1269 #define f(n, t, nt, pack, unpack) \
1270 cat(REAL_unary_asin_,t),
1271 for_all_real(f, nf)
1272 #undef f
1273 #define f(n, t, nt, pack, unpack) \
1274 cat(REAL_unary_acos_,t),
1275 for_all_real(f, nf)
1276 #undef f
1277 #define f(n, t, nt, pack, unpack) \
1278 cat(REAL_unary_atan_,t),
1279 for_all_real(f, nf)
1280 #undef f
1281 #define f(n, t, nt, pack, unpack) \
1282 cat(REAL_unary_sinh_,t),
1283 for_all_real(f, nf)
1284 #undef f
1285 #define f(n, t, nt, pack, unpack) \
1286 cat(REAL_unary_cosh_,t),
1287 for_all_real(f, nf)
1288 #undef f
1289 #define f(n, t, nt, pack, unpack) \
1290 cat(REAL_unary_tanh_,t),
1291 for_all_real(f, nf)
1292 #undef f
1293 #define f(n, t, nt, pack, unpack) \
1294 cat(REAL_unary_asinh_,t),
1295 for_all_real(f, nf)
1296 #undef f
1297 #define f(n, t, nt, pack, unpack) \
1298 cat(REAL_unary_acosh_,t),
1299 for_all_real(f, nf)
1300 #undef f
1301 #define f(n, t, nt, pack, unpack) \
1302 cat(REAL_unary_atanh_,t),
1303 for_all_real(f, nf)
1304 #undef f
1305 #define f(n, t, nt, pack, unpack) \
1306 cat(REAL_unary_exp2_,t),
1307 for_all_real(f, nf)
1308 #undef f
1309 #define f(n, t, nt, pack, unpack) \
1310 cat(REAL_unary_exp_,t),
1311 for_all_real(f, nf)
1312 #undef f
1313 #define f(n, t, nt, pack, unpack) \
1314 cat(REAL_unary_exp10_,t),
1315 for_all_real(f, nf)
1316 #undef f
1317 #define f(n, t, nt, pack, unpack) \
1318 cat(REAL_unary_log2_,t),
1319 for_all_real(f, nf)
1320 #undef f
1321 #define f(n, t, nt, pack, unpack) \
1322 cat(REAL_unary_log_,t),
1323 for_all_real(f, nf)
1324 #undef f
1325 #define f(n, t, nt, pack, unpack) \
1326 cat(REAL_unary_log10_,t),
1327 for_all_real(f, nf)
1328 #undef f
1329 #define f(n, t, nt, pack, unpack) \
1330 cat(REAL_unary_round_,t),
1331 for_all_real(f, nf)
1332 #undef f
1333 #define f(n, t, nt, pack, unpack) \
1334 cat(REAL_unary_ceil_,t),
1335 for_all_real(f, nf)
1336 #undef f
1337 #define f(n, t, nt, pack, unpack) \
1338 cat(REAL_unary_floor_,t),
1339 for_all_real(f, nf)
1340 #undef f
1341 #define f(n, t, nt, pack, unpack) \
1342 cat(REAL_unary_trunc_,t),
1343 for_all_real(f, nf)
1344 #undef f
1345 #define f(n, t, nt, pack, unpack) \
1346 cat(REAL_unary_fract_,t),
1347 for_all_real(f, nf)
1348 #undef f
1349 #define f(n, t, nt, pack, unpack) \
1350 cat(REAL_unary_mantissa_,t),
1351 for_all_real(f, nf)
1352 #undef f
1353 #define f(n, t, nt, pack, unpack) \
1354 cat(REAL_unary_exponent_,t),
1355 for_all_real(f, nf)
1356 #undef f
1357 #define f(n, t, nt, pack, unpack) \
1358 cat(REAL_unary_next_number_,t),
1359 for_all_real(f, nf)
1360 #undef f
1361 #define f(n, t, nt, pack, unpack) \
1362 cat(REAL_unary_prev_number_,t),
1363 for_all_real(f, nf)
1364 #undef f
1365 #define f(n, t, nt, pack, unpack) \
1366 cat(REAL_unary_to_int_,t),
1367 for_all_real(f, nf)
1368 #undef f
1369 #define f(n, t, nt, pack, unpack) \
1370 cat(REAL_unary_from_int_,t),
1371 for_all_real(f, nf)
1372 #undef f
1373 #define f(n, t, nt, pack, unpack) \
1374 cat(REAL_unary_is_exception_,t),
1375 for_all_real(f, nf)
1376 #undef f
1377 #endif
1378 #ifdef DEBUG_UPCALL
1379 cg_upcall_debug,
1380 #endif
1383 #if defined(HAVE_CODEGEN)
1384 uint32_t hacked_upcall_map = 0;
1385 static size_t hacked_upcall_size[32];
1386 #endif
1388 void name(ipret_init)(void)
1390 #if 0
1391 unsigned __int128 a = ((unsigned __int128)0x285C1889155FULL << 64) + 0xC6DCBCCF1106E0C5ULL;
1392 unsigned __int128 b = 0x374AC42721E9E9BFULL;
1393 unsigned __int128 c = 1;
1394 char *s;
1395 FIXED_binary_power_int128_t(&a, &b, &c);
1396 s = str_from_signed(c, 16);
1397 debug("%s", s);
1398 mem_free(s);
1399 #endif
1400 #if 0
1401 int i;
1402 for (i = 0; i < OPCODE_MODE_MULT * ARG_MODE_N - (OPCODE_MODE_MULT - OPCODE_N); i++) {
1403 debug("%04x - %s", i, decode_opcode(i, true));
1405 #endif
1406 tick_stamp_ptr = &tick_stamp;
1407 #if defined(HAVE_CODEGEN) && defined(ARCH_X86_64) && !defined(ARCH_X86_WIN_ABI) && (!defined(POINTER_COMPRESSION) || POINTER_COMPRESSION == 3)
1408 if (!offsetof(struct data, refcount_) && REFCOUNT_STEP == 256) {
1409 const char *id = "codegen";
1410 void *pde = (void *)pointer_dereference_;
1411 void *icvtp = (void *)ipret_copy_variable_to_pointer;
1412 void *cuftd = (void *)cg_upcall_flat_to_data;
1413 char *c;
1414 size_t cs;
1415 unsigned idx;
1417 str_init(&c, &cs);
1418 #ifndef POINTER_COMPRESSION
1419 if (cpu_test_feature(CPU_FEATURE_apx)) {
1420 str_add_hex(&c, &cs, "4889d04883e0fe488b084881f9fffeffff0f87a40000004881f9ff0000000f8798000000d5085662f4bc18fff762d4ac18fff162fcf410fff062fce410fff262fcd410fff462fcc410fff662dcb410fff062dca410fff262dc9410fff462dc8410fff648b8000000000000000048be00000000000000004889d7ffd062dc8c108fc762dc9c108fc562dcac108fc362dcbc108fc162fccc108fc762fcdc108fc562fcec108fc362fcfc108fc162d4b4188fc262d4c4188fc0d5085ec3f04881280001000073f548810000010000e952ffffff");
1421 memcpy(&c[0x65], &pde, 8);
1422 memcpy(&c[0x6f], &id, 8);
1423 } else {
1424 str_add_hex(&c, &cs, "4889d04883e0fe488b084881f9fffeffff77324881f9ff000000772a565741504151415248b8000000000000000048be00000000000000004889d7ffd0415a415941585f5ec3f04881280001000073f548810000010000ebc3");
1425 memcpy(&c[0x26], &pde, 8);
1426 memcpy(&c[0x30], &id, 8);
1428 #else
1429 if (cpu_test_feature(CPU_FEATURE_apx)) {
1430 str_add_hex(&c, &cs, "89d083e0fe48c1e003488b084881f9fffeffff0f87a30000004881f9ff0000000f8797000000d5085662f4bc18fff762d4ac18fff162fcf410fff062fce410fff262fcd410fff462fcc410fff662dcb410fff062dca410fff262dc9410fff462dc8410fff648b8000000000000000048be000000000000000089d7ffd062dc8c108fc762dc9c108fc562dcac108fc362dcbc108fc162fccc108fc762fcdc108fc562fcec108fc362fcfc108fc162d4b4188fc262d4c4188fc0d5085ec3f04881280001000073f548810000010000e953ffffff");
1431 memcpy(&c[0x67], &pde, 8);
1432 memcpy(&c[0x71], &id, 8);
1433 } else {
1434 str_add_hex(&c, &cs, "89d083e0fe48c1e003488b084881f9fffeffff77314881f9ff0000007729565741504151415248b8000000000000000048be000000000000000089d7ffd0415a415941585f5ec3f04881280001000073f548810000010000ebc4");
1435 memcpy(&c[0x28], &pde, 8);
1436 memcpy(&c[0x32], &id, 8);
1438 #endif
1439 array_finish(char, &c, &cs);
1440 cg_upcall_vector.cg_upcall_pointer_dereference = os_code_map(cast_ptr(uint8_t *, c), cs, NULL);
1441 idx = offsetof(struct cg_upcall_vector_s, cg_upcall_pointer_dereference) / sizeof(void *);
1442 hacked_upcall_map |= 1U << idx;
1443 hacked_upcall_size[idx] = cs;
1445 str_init(&c, &cs);
1446 #ifndef POINTER_COMPRESSION
1447 str_add_hex(&c, &cs, "4883e2fe488b02483dfffeffff7708f048810200010000c3");
1448 #else
1449 str_add_hex(&c, &cs, "83e2fe48c1e203488b02483dfffeffff7708f048810200010000c30f1f440000");
1450 #endif
1451 array_finish(char, &c, &cs);
1452 cg_upcall_vector.cg_upcall_pointer_reference_owned = os_code_map(cast_ptr(uint8_t *, c), cs, NULL);
1453 idx = offsetof(struct cg_upcall_vector_s, cg_upcall_pointer_reference_owned) / sizeof(void *);
1454 hacked_upcall_map |= 1U << idx;
1455 hacked_upcall_size[idx] = cs;
1457 str_init(&c, &cs);
1458 if (cpu_test_feature(CPU_FEATURE_apx)) {
1459 str_add_hex(&c, &cs, "d5085662f4bc18fff762d4ac18fff162fcf410fff062fce410fff262fcd410fff462fcc410fff662dcb410fff062dca410fff262dc9410fff462dc8410fff64889d789ce31d248b80000000000000000ffd062dc8c108fc762dc9c108fc562dcac108fc362dcbc108fc162fccc108fc762fcdc108fc562fcec108fc362fcfc108fc162d4b4188fc262d4c4188fc0d5085ec3");
1460 memcpy(&c[0x48], &icvtp, 8);
1461 } else {
1462 str_add_hex(&c, &cs, "56574150415141524889d789ce31d248b80000000000000000ffd0415a415941585f5ec3");
1463 memcpy(&c[0x11], &icvtp, 8);
1465 array_finish(char, &c, &cs);
1466 cg_upcall_vector.cg_upcall_ipret_copy_variable_to_pointer_noderef = os_code_map(cast_ptr(uint8_t *, c), cs, NULL);
1467 idx = offsetof(struct cg_upcall_vector_s, cg_upcall_ipret_copy_variable_to_pointer_noderef) / sizeof(void *);
1468 hacked_upcall_map |= 1U << idx;
1469 hacked_upcall_size[idx] = cs;
1471 str_init(&c, &cs);
1472 #ifndef POINTER_COMPRESSION
1473 if (cpu_test_feature(CPU_FEATURE_apx)) {
1474 str_add_hex(&c, &cs, "d5085662f4bc18fff762d4ac18fff162fcf410fff062fce410fff262fcd410fff462fcc410fff662dcb410fff062dca410fff262dc9410fff462dc8410fff662f4f418fff24889d789ceba0100000048b80000000000000000ffd062f4ec188fc148c704ca0000000062dc8c108fc762dc9c108fc562dcac108fc362dcbc108fc162fccc108fc762fcdc108fc562fcec108fc362fcfc108fc162d4b4188fc262d4c4188fc0d5085ec3");
1475 memcpy(&c[0x51], &icvtp, 8);
1476 } else {
1477 str_add_hex(&c, &cs, "565741504151415252514889d789ceba0100000048b80000000000000000ffd0595a48c704ca00000000415a415941585f5ec3");
1478 memcpy(&c[0x16], &icvtp, 8);
1480 #else
1481 if (cpu_test_feature(CPU_FEATURE_apx)) {
1482 str_add_hex(&c, &cs, "d5085662f4bc18fff762d4ac18fff162fcf410fff062fce410fff262fcd410fff462fcc410fff662dcb410fff062dca410fff262dc9410fff462dc8410fff662f4f418fff24889d789ceba0100000048b80000000000000000ffd062f4ec188fc1c7048a0000000062dc8c108fc762dc9c108fc562dcac108fc362dcbc108fc162fccc108fc762fcdc108fc562fcec108fc362fcfc108fc162d4b4188fc262d4c4188fc0d5085ec3");
1483 memcpy(&c[0x51], &icvtp, 8);
1484 } else {
1485 str_add_hex(&c, &cs, "565741504151415252514889d789ceba0100000048b80000000000000000ffd0595ac7048a00000000415a415941585f5ec3");
1486 memcpy(&c[0x16], &icvtp, 8);
1488 #endif
1489 array_finish(char, &c, &cs);
1490 cg_upcall_vector.cg_upcall_ipret_copy_variable_to_pointer_deref = os_code_map(cast_ptr(uint8_t *, c), cs, NULL);
1491 idx = offsetof(struct cg_upcall_vector_s, cg_upcall_ipret_copy_variable_to_pointer_deref) / sizeof(void *);
1492 hacked_upcall_map |= 1U << idx;
1493 hacked_upcall_size[idx] = cs;
1495 str_init(&c, &cs);
1496 if (cpu_test_feature(CPU_FEATURE_apx)) {
1497 str_add_hex(&c, &cs, "d5085662f4bc18fff762d4ac18fff162fcf410fff062fce410fff262fcd410fff462fcc410fff662dcb410fff062dca410fff262dc9410fff462dc8410fff64889d789ce48b80000000000000000ffd062dc8c108fc762dc9c108fc562dcac108fc362dcbc108fc162fccc108fc762fcdc108fc562fcec108fc362fcfc108fc162d4b4188fc262d4c4188fc0d5085ec3");
1498 memcpy(&c[0x46], &cuftd, 8);
1499 } else {
1500 str_add_hex(&c, &cs, "56574150415141524889d789ce48b80000000000000000ffd0415a415941585f5ec3");
1501 memcpy(&c[0xf], &cuftd, 8);
1503 array_finish(char, &c, &cs);
1504 cg_upcall_vector.cg_upcall_flat_to_data = os_code_map(cast_ptr(uint8_t *, c), cs, NULL);
1505 idx = offsetof(struct cg_upcall_vector_s, cg_upcall_flat_to_data) / sizeof(void *);
1506 hacked_upcall_map |= 1U << idx;
1507 hacked_upcall_size[idx] = cs;
1509 #endif
1512 void name(ipret_done)(void)
1514 #if defined(HAVE_CODEGEN)
1515 while (hacked_upcall_map) {
1516 unsigned idx = low_bit(hacked_upcall_map);
1517 hacked_upcall_map &= hacked_upcall_map - 1;
1518 os_code_unmap(*((void **)&cg_upcall_vector + idx), hacked_upcall_size[idx]);
1520 #endif
1523 #endif