2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define X86_ALU_RM8_R8 0x00
20 #define X86_ALU_RM16_R16 0x01
21 #define X86_ALU_R8_RM8 0x02
22 #define X86_ALU_R16_RM16 0x03
23 #define X86_ALU_AL_IMM8 0x04
24 #define X86_ALU_AX_IMM16 0x05
27 #define X86_REX_B 0x01
28 #define X86_REX_X 0x02
29 #define X86_REX_R 0x04
30 #define X86_REX_W 0x08
31 #define X86_INC_R16 0x40
32 #define X86_DEC_R16 0x48
33 #define X86_PUSH_R16 0x50
34 #define X86_POP_R16 0x58
35 #define X86_MOVSXD 0x63
36 #define X86_OP_SIZE 0x66
37 #define X86_PUSH_IMM16 0x68
38 #define X86_IMUL_R16_RM16_IMM16 0x69
39 #define X86_PUSH_IMM8 0x6a
40 #define X86_IMUL_R16_RM16_IMM8 0x6b
41 #define X86_JCC_8 0x70
42 #define X86_ALU_RM8_IMM8 0x80
43 #define X86_ALU_RM16_IMM16 0x81
44 #define X86_ALU_RM16_IMM8 0x83
45 #define X86_TEST_RM8_R8 0x84
46 #define X86_TEST_RM16_R16 0x85
47 #define X86_MOV_RM8_R8 0x88
48 #define X86_MOV_RM16_R16 0x89
49 #define X86_MOV_R8_RM8 0x8a
50 #define X86_MOV_R16_RM16 0x8b
51 #define X86_LEA_R16_RM16 0x8d
54 #define X86_MOV_AL_M16 0xa0
55 #define X86_MOV_AX_M16 0xa1
56 #define X86_MOV_M16_AL 0xa2
57 #define X86_MOV_M16_AX 0xa3
58 #define X86_MOVSB 0xa4
59 #define X86_TEST_AL_IMM8 0xa8
60 #define X86_TEST_AX_IMM16 0xa9
61 #define X86_STOSB 0xaa
62 #define X86_MOV_R16_IMM16 0xb8
63 #define X86_ROT_RM8_IMM8 0xc0
64 #define X86_ROT_RM16_IMM8 0xc1
65 #define X86_RET_IMM16 0xc2
67 #define X86_VEX_3 0xc4
68 #define X86_VEX_2 0xc5
69 #define X86_MOV_RM8_IMM8 0xc6
70 #define X86_MOV_RM16_IMM16 0xc7
71 #define X86_MOV_R16_IMM16_REG 0x0
73 #define X86_8F_POP 0x0
74 #define X86_ROT_RM8_1 0xd0
75 #define X86_ROT_RM16_1 0xd1
76 #define X86_ROT_RM8_CL 0xd2
77 #define X86_ROT_RM16_CL 0xd3
78 #define X86_JMP_16 0xe9
79 #define X86_JMP_8 0xeb
80 #define X86_REPNE 0xf2
83 #define X86_F6_TEST_RM8_IMM8 0x0
84 #define X86_F6_NOT_RM8 0x2
85 #define X86_F6_NEG_RM8 0x3
86 #define X86_F6_MUL_RM8 0x4
87 #define X86_F6_IMUL_RM8 0x5
88 #define X86_F6_DIV_RM8 0x6
89 #define X86_F6_IDIV_RM8 0x7
91 #define X86_F7_TEST_RM16_IMM16 0x0
92 #define X86_F7_NOT_RM16 0x2
93 #define X86_F7_NEG_RM16 0x3
94 #define X86_F7_MUL_RM16 0x4
95 #define X86_F7_IMUL_RM16 0x5
96 #define X86_F7_DIV_RM16 0x6
97 #define X86_F7_IDIV_RM16 0x7
99 #define X86_FE_INC_RM8 0x0
100 #define X86_FE_DEC_RM8 0x1
102 #define X86_FF_INC_RM16 0x0
103 #define X86_FF_DEC_RM16 0x1
104 #define X86_FF_CALL_INDIRECT 0x2
105 #define X86_FF_JMP_INDIRECT 0x4
106 #define X86_FF_PUSH 0x6
108 #define X86_0F_MOVSS_X128_M32 0x10
109 #define X86_0F_MOVSS_M32_X128 0x11
110 #define X86_0F_MOVAPS_X128_M128 0x28
111 #define X86_0F_MOVAPS_M128_X128 0x29
112 #define X86_0F_CVTSI2SS_X128_RM32 0x2a
113 #define X86_0F_CVTTSS2SI_X128_RM32 0x2c
114 #define X86_0F_UCOMISS_X128_RM32 0x2e
115 #define X86_0F_38 0x38
116 #define X86_0F_3A 0x3a
117 #define X86_0F_CMOVCC_R16_RM16 0x40
118 #define X86_0F_SQRTPS_X128_M32 0x51
119 #define X86_0F_ANDPS_X128_M128 0x54
120 #define X86_0F_ANDNPS_X128_M128 0x55
121 #define X86_0F_ORPS_X128_M128 0x56
122 #define X86_0F_XORPS_X128_M128 0x57
123 #define X86_0F_ADDPS_X128_M32 0x58
124 #define X86_0F_MULPS_X128_M32 0x59
125 #define X86_0F_SUBPS_X128_M32 0x5c
126 #define X86_0F_DIVPS_X128_M32 0x5e
127 #define X86_0F_JCC_16 0x80
128 #define X86_0F_SETCC_RM8 0x90
129 #define X86_0F_BT_RM16_R16 0xa3
130 #define X86_0F_BTS_RM16_R16 0xab
131 #define X86_0F_BTR_RM16_R16 0xb3
132 #define X86_0F_BTX_RM16_IMM8 0xba
133 #define X86_0F_BTX_BT_RM16_IMM8 0x4
134 #define X86_0F_BTX_BTS_RM16_IMM8 0x5
135 #define X86_0F_BTX_BTR_RM16_IMM8 0x6
136 #define X86_0F_BTX_BTC_RM16_IMM8 0x7
137 #define X86_0F_BSWAP 0xc8
138 #define X86_0F_BTC_RM16_R16 0xbb
139 #define X86_0F_IMUL_R16_RM16 0xaf
140 #define X86_0F_MOVZX_R16_RM8 0xb6
141 #define X86_0F_MOVZX_R16_RM16 0xb7
142 #define X86_0F_POPCNT_R16_RM16 0xb8
143 #define X86_0F_BSF_R16_RM16 0xbc
144 #define X86_0F_BSR_R16_RM16 0xbd
145 #define X86_0F_MOVSX_R16_RM8 0xbe
146 #define X86_0F_MOVSX_R16_RM16 0xbf
147 #define X86_0F_PINSRW_X128_RM16_IMM8 0xc4
149 #define X86_0F_38_CVTPH2PS_X128_RM64 0x13
151 #define X86_0F_3A_ROUNDSS_X128_M32 0x0a
152 #define X86_0F_3A_ROUNDSD_X128_M64 0x0b
153 #define X86_0F_3A_PEXTRW_RM16_X128_IMM8 0x15
154 #define X86_0F_3A_CVTPS2PH_RM64_X128 0x1d
156 #define X87_FLD_RM32 0xd9
157 #define X87_FLD_RM32_X 0x0
158 #define X87_FLDCW 0xd9
159 #define X87_FLDCW_X 0x5
160 #define X87_FILD_M32 0xdb
161 #define X87_FILD_M32_X 0x0
162 #define X87_FISTTP_M32 0xdb
163 #define X87_FISTTP_M32_X 0x1
164 #define X87_FISTP_M32 0xdb
165 #define X87_FISTP_M32_X 0x3
166 #define X87_FLD_M80 0xdb
167 #define X87_FLD_M80_X 0x5
168 #define X87_FLD_M64 0xdd
169 #define X87_FLD_M64_X 0x0
170 #define X87_FSTP_M32 0xd9
171 #define X87_FSTP_M32_X 0x3
172 #define X87_FSTP_M80 0xdb
173 #define X87_FSTP_M80_X 0x7
174 #define X87_FSTP_RM64 0xdd
175 #define X87_FSTP_RM64_X 0x3
176 #define X87_FALU_ST_RM32 0xd8
177 #define X87_FCHS 0xd9
178 #define X87_FCHS_2 0xe0
179 #define X87_FSQRT 0xd9
180 #define X87_FSQRT_2 0xfa
181 #define X87_FRNDINT 0xd9
182 #define X87_FRNDINT_2 0xfc
183 #define X87_FALU_ST_M64 0xdc
184 #define X87_FALU_STi_ST 0xdc
185 #define X87_FISTTP_M64 0xdd
186 #define X87_FISTTP_M64_X 0x1
187 #define X87_FALUP_STi_ST0 0xde
188 #define X87_ALU_ADD 0x0
189 #define X87_ALU_MUL 0x1
190 #define X87_ALU_FCOM 0x2
191 #define X87_ALU_FCOMP 0x3
192 #define X87_ALU_SUBR 0x4
193 #define X87_ALU_SUB 0x5
194 #define X87_ALU_DIVR 0x6
195 #define X87_ALU_DIV 0x7
196 #define X87_FCOMPP 0xde
197 #define X87_FCOMPP_2 0xd9
198 #define X87_FILD_M16 0xdf
199 #define X87_FILD_M16_X 0x0
200 #define X87_FISTTP_M16 0xdf
201 #define X87_FISTTP_M16_X 0x1
202 #define X87_FISTP_M16 0xdf
203 #define X87_FISTP_M16_X 0x3
204 #define X87_FILD_M64 0xdf
205 #define X87_FILD_M64_X 0x5
206 #define X87_FISTP_M64 0xdf
207 #define X87_FISTP_M64_X 0x7
208 #define X87_FNSTSW 0xdf
209 #define X87_FNSTSW_2 0xe0
210 #define X87_FCOMIP 0xdf
211 #define X87_FCOMIP_2 0xf0
213 #define SSE_PREFIX_NONE 0
214 #define SSE_PREFIX_66 1
215 #define SSE_PREFIX_F3 2
216 #define SSE_PREFIX_F2 3
218 #define PREFIX_NONE 0
220 #define PREFIX_0F_38 2
221 #define PREFIX_0F_3A 3
224 #define cgen_rex(rex) internal(file_line, "cgen_rex: attempting to generate rex in 32-bit mode: %02x", rex)
226 #define cgen_rex(rex) cgen_one(rex)
229 static bool attr_w cgen_rm_insn(struct codegen_context *ctx, int16_t sse_prefix, uint8_t prefix, uint8_t opcode, unsigned size, bool reg_is_reg, uint8_t reg, uint8_t *arg)
231 uint8_t rex, mod, rm;
233 int64_t imm = 0; /* avoid warning */
237 if (unlikely(R_IS_XMM(reg)))
239 if (unlikely(R_IS_XMM(arg[0]))) {
240 arg_reg = arg[0] - R_XMM0;
243 if (unlikely(!R_IS_GPR(reg)))
244 internal(file_line, "cgen_rm_insn: invalid register %02x", reg);
248 if (size == OP_SIZE_8)
259 uint8_t *imm_ptr = arg + arg_size(arg[0]) - 8;
260 imm = get_imm(imm_ptr);
261 if (unlikely(!imm_is_32bit(imm)))
262 internal(file_line, "cgen_rm_insn: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
263 if (arg[0] == ARG_ADDRESS_0) {
272 } else if (imm >= -0x80 && imm <= 0x7f) {
277 if ((arg[1] & 7) == 0x5 && addr_size == 0)
281 else if (addr_size == 1)
285 if (arg[0] == ARG_ADDRESS_1) {
288 if ((arg[1] & 7) == 0x4) {
296 if (arg[0] >= ARG_ADDRESS_1_2 && arg[0] <= ARG_ADDRESS_1_8) {
297 if (unlikely(arg[1] == R_SP))
298 internal(file_line, "cgen_rm_insn: attemptint to scale SP");
304 sib = ((arg[0] - ARG_ADDRESS_1) << 6) | ((arg[1] & 7) << 3) | 0x5;
307 if (arg[0] >= ARG_ADDRESS_2 && arg[0] <= ARG_ADDRESS_2_8) {
308 if (unlikely(arg[2] == R_SP))
309 internal(file_line, "cgen_rm_insn: attemptint to scale SP");
315 sib = ((arg[0] - ARG_ADDRESS_2) << 6) | ((arg[2] & 7) << 3) | (arg[1] & 7);
318 internal(file_line, "cgen_rm_insn: invalid argument %02x", arg[0]);
322 if (unlikely(sse_prefix >= 0)) {
323 if (likely(cpu_test_feature(CPU_FEATURE_avx))) {
324 if ((rex & (X86_REX_X | X86_REX_B | X86_REX_W)) == 0 && prefix == PREFIX_0F) {
326 cgen_one((~rex & X86_REX_R) << 5 | (~(sse_prefix >> 8) & 0xf) << 3 | (sse_prefix & 3));
329 cgen_one((~rex & (X86_REX_R | X86_REX_X | X86_REX_B)) << 5 | prefix);
330 cgen_one((rex & X86_REX_W) << 4 | (~(sse_prefix >> 8) & 0xf) << 3 | (sse_prefix & 3));
334 switch (sse_prefix & 3) {
335 case SSE_PREFIX_66: cgen_one(X86_OP_SIZE); break;
336 case SSE_PREFIX_F3: cgen_one(X86_REPE); break;
337 case SSE_PREFIX_F2: cgen_one(X86_REPNE); break;
340 if (size == OP_SIZE_2)
341 cgen_one(X86_OP_SIZE);
343 if (rex != X86_REX || (size == OP_SIZE_1 && ((reg_is_reg && !reg_is_fp(reg) && reg >= 4) || (mod == 0xc0 && !reg_is_fp(arg[0]) && arg[0] >= 4))))
361 internal(file_line, "cgen_rm_insn: invalid prefix %u", prefix);
365 cgen_one(mod | ((reg & 7) << 3) | (rm & 7));
379 static bool attr_w cgen_sse_insn(struct codegen_context *ctx, unsigned sse_prefix, unsigned sse_op_map, uint8_t opcode, bool wide, uint8_t reg, uint8_t reg2, uint8_t *arg)
381 g(cgen_rm_insn(ctx, sse_prefix + (reg2 << 8), sse_op_map, opcode, !wide ? OP_SIZE_4 : OP_SIZE_8, true, reg, arg));
385 static bool attr_w cgen_push(struct codegen_context *ctx)
387 uint8_t *arg1 = ctx->code_position;
388 ctx->code_position += arg_size(*arg1);
389 if (likely(R_IS_GPR(arg1[0]))) {
391 cgen_rex(X86_REX | X86_REX_B);
392 cgen_one(X86_PUSH_R16 + (arg1[0] & 7));
395 if (arg1[0] == ARG_IMM) {
397 imm = get_imm(&arg1[1]);
398 if (unlikely(!imm_is_32bit(imm)))
399 internal(file_line, "cgen_push: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
400 if (imm >= -0x80 && imm <= 0x7f) {
401 cgen_one(X86_PUSH_IMM8);
405 cgen_one(X86_PUSH_IMM16);
410 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_FF, OP_SIZE_4, false, X86_FF_PUSH, arg1));
414 static bool attr_w cgen_pop(struct codegen_context *ctx)
416 uint8_t *arg1 = ctx->code_position;
417 ctx->code_position += arg_size(*arg1);
418 if (likely(R_IS_GPR(arg1[0]))) {
420 cgen_rex(X86_REX | X86_REX_B);
421 cgen_one(X86_POP_R16 + (arg1[0] & 7));
424 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_8F, OP_SIZE_4, false, X86_8F_POP, arg1));
428 static bool attr_w cgen_mov(struct codegen_context *ctx, unsigned size)
430 uint8_t *arg1 = ctx->code_position;
431 uint8_t *arg2 = arg1 + arg_size(*arg1);
432 ctx->code_position = arg2 + arg_size(*arg2);
433 if (arg2[0] == ARG_IMM) {
436 imm = get_imm(&arg2[1]);
437 if (R_IS_GPR(arg1[0])) {
441 if (imm >= 0 && imm < 0x100000000LL)
446 cgen_one(X86_MOV_R16_IMM16 + (arg1[0] & 7));
450 if (imm >= ~(int64_t)0x7fffffff && imm < 0) {
455 cgen_one(X86_MOV_R16_IMM16 + (arg1[0] & 7));
459 if (size < OP_SIZE_4) {
460 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_MOV_RM8_IMM8 : X86_MOV_RM16_IMM16, size, false, X86_MOV_R16_IMM16_REG, arg1));
461 if (size == OP_SIZE_1)
468 if (unlikely(!imm_is_32bit(imm)))
469 internal(file_line, "cgen_mov: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
470 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_MOV_RM16_IMM16, maximum(size, OP_SIZE_4), false, X86_MOV_R16_IMM16_REG, arg1));
475 if (arg1[0] == R_AX && size >= OP_SIZE_4 && arg2[0] == ARG_ADDRESS_0) {
477 imm = get_imm(&arg2[1]);
478 if (size == OP_SIZE_8)
479 cgen_rex(X86_REX | X86_REX_W);
480 cgen_one(X86_MOV_AX_M16);
484 if (arg1[0] == ARG_ADDRESS_0 && arg2[0] == R_AX) {
485 uint8_t code = size == OP_SIZE_1 ? X86_MOV_M16_AL : X86_MOV_M16_AX;
487 imm = get_imm(&arg1[1]);
488 if (size == OP_SIZE_2)
489 cgen_one(X86_OP_SIZE);
490 if (size == OP_SIZE_8)
491 cgen_rex(X86_REX | X86_REX_W);
496 if (R_IS_XMM(arg1[0]) && ARG_IS_ADDRESS(arg2[0])) {
497 if (size == OP_SIZE_2) {
498 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F, X86_0F_PINSRW_X128_RM16_IMM8, false, arg1[0], R_XMM7, arg2));
502 if (size == OP_SIZE_16) {
503 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, X86_0F_MOVAPS_X128_M128, false, arg1[0], 0, arg2));
506 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_MOVSS_X128_M32, false, arg1[0], 0, arg2));
509 if (ARG_IS_ADDRESS(arg1[0]) && R_IS_XMM(arg2[0])) {
510 if (size == OP_SIZE_2) {
511 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F_3A, X86_0F_3A_PEXTRW_RM16_X128_IMM8, false, arg2[0], 0, arg1));
515 if (size == OP_SIZE_16) {
516 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, X86_0F_MOVAPS_M128_X128, false, arg2[0], 0, arg1));
519 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_MOVSS_M32_X128, false, arg2[0], 0, arg1));
522 if (R_IS_XMM(arg1[0]) && R_IS_XMM(arg2[0])) {
523 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, X86_0F_MOVAPS_X128_M128, false, arg1[0], 0, arg2));
526 if (!R_IS_GPR(arg1[0]) && unlikely(!R_IS_GPR(arg2[0]))) {
527 /*debug("%s", da(ctx->fn,function)->function_name);*/
528 internal(file_line, "cgen_mov: two addresses not supported");
530 if (!R_IS_GPR(arg1[0])) {
531 uint8_t code = size == OP_SIZE_1 ? X86_MOV_RM8_R8 : X86_MOV_RM16_R16;
532 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg2[0], arg1));
534 } else if (size >= OP_SIZE_4) {
535 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_MOV_R16_RM16, size, true, arg1[0], arg2));
538 uint8_t code = size == OP_SIZE_1 ? X86_0F_MOVZX_R16_RM8 : X86_0F_MOVZX_R16_RM16;
539 g(cgen_rm_insn(ctx, -1, PREFIX_0F, code, size, false, arg1[0], arg2));
544 static bool attr_w cgen_movsx(struct codegen_context *ctx, unsigned size)
546 uint8_t *arg1, *arg2;
547 if (unlikely(size == OP_SIZE_NATIVE)) {
548 g(cgen_mov(ctx, size));
551 arg1 = ctx->code_position;
552 arg2 = arg1 + arg_size(*arg1);
553 ctx->code_position = arg2 + arg_size(*arg2);
554 if (size <= OP_SIZE_2) {
555 g(cgen_rm_insn(ctx, -1, PREFIX_0F, size == OP_SIZE_1 ? X86_0F_MOVSX_R16_RM8 : X86_0F_MOVSX_R16_RM16, OP_SIZE_NATIVE, true, arg1[0], arg2));
557 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_MOVSXD, OP_SIZE_NATIVE, true, arg1[0], arg2));
562 static bool attr_w cgen_lea(struct codegen_context *ctx, unsigned size)
565 uint8_t *arg1, *arg2, *arg3;
567 arg1 = ctx->code_position;
568 arg2 = arg1 + arg_size(*arg1);
569 arg3 = arg2 + arg_size(*arg2);
570 ctx->code_position = arg3 + arg_size(*arg3);
572 if (arg3[0] == ARG_IMM) {
573 if (arg2[0] == ARG_SHIFTED_REGISTER) {
574 if (unlikely((arg2[1] & ARG_SHIFT_MODE) != ARG_SHIFT_LSL) ||
575 unlikely((arg2[1] & ARG_SHIFT_AMOUNT) > 3))
577 addr[0] = ARG_ADDRESS_1 + (arg2[1] & ARG_SHIFT_AMOUNT);
579 memcpy(&addr[2], &arg3[1], 8);
581 addr[0] = ARG_ADDRESS_1;
583 memcpy(&addr[2], &arg3[1], 8);
585 } else if (R_IS_GPR(arg3[0])) {
586 addr[0] = ARG_ADDRESS_2;
589 memset(&addr[3], 0, 8);
590 } else if (arg3[0] == ARG_SHIFTED_REGISTER) {
591 if (unlikely((arg3[1] & ARG_SHIFT_MODE) != ARG_SHIFT_LSL) ||
592 unlikely((arg3[1] & ARG_SHIFT_AMOUNT) > 3))
594 addr[0] = ARG_ADDRESS_2 + (arg3[1] & ARG_SHIFT_AMOUNT);
597 memset(&addr[3], 0, 8);
600 internal(file_line, "cgen_lea: invalid argument %02x, %02x, %02x", arg1[0], arg2[0], arg3[0]);
602 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_LEA_R16_RM16, size, true, arg1[0], addr));
606 static bool attr_w cgen_alu(struct codegen_context *ctx, unsigned size, unsigned alu)
608 uint8_t *arg1, *arg2, *arg3;
610 arg1 = ctx->code_position;
611 arg2 = arg1 + arg_size(*arg1);
612 arg3 = arg2 + arg_size(*arg2);
613 ctx->code_position = arg3 + arg_size(*arg3);
614 if (unlikely(arg_size(*arg1) != arg_size(*arg2)))
615 internal(file_line, "cgen_alu: three-operand mode not supported");
616 if (unlikely(memcmp(arg1, arg2, arg_size(*arg1))))
617 internal(file_line, "cgen_alu: three-operand mode not supported");
619 arg1 = ctx->code_position;
621 arg3 = arg2 + arg_size(*arg2);
622 ctx->code_position = arg3 + arg_size(*arg3);
625 if (unlikely(alu == ALU_MUL)) {
626 if (unlikely(arg3[0] == ARG_IMM)) {
629 imm = get_imm(&arg3[1]);
630 if (unlikely(!imm_is_32bit(imm)))
631 internal(file_line, "cgen_alu: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
632 code = imm_is_8bit(imm) ? X86_IMUL_R16_RM16_IMM8 : X86_IMUL_R16_RM16_IMM16;
633 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg1[0], arg2));
634 if (code == X86_IMUL_R16_RM16_IMM8) {
636 } else if (size == OP_SIZE_2) {
643 if (unlikely(size == OP_SIZE_1)) {
644 if (unlikely(arg1[0] != R_AX))
645 internal(file_line, "cgen_alu: imul with unsupported register");
646 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_F6, size, false, X86_F6_IMUL_RM8, arg3));
649 if (unlikely(!R_IS_GPR(arg1[0])))
650 internal(file_line, "cgen_alu: invalid multiply args");
651 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_IMUL_R16_RM16, size, true, arg1[0], arg3));
656 if (arg3[0] == ARG_IMM) {
660 imm = get_imm(&arg3[1]);
661 if (unlikely(!imm_is_32bit(imm)))
662 internal(file_line, "cgen_alu: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
664 if (arg1[0] == R_AX) {
665 if (imm_is_8bit(imm) && size >= OP_SIZE_4)
668 code = size == OP_SIZE_1 ? X86_ALU_AL_IMM8 : X86_ALU_AX_IMM16;
669 if (size == OP_SIZE_2)
670 cgen_one(X86_OP_SIZE);
671 if (size == OP_SIZE_8)
672 cgen_rex(X86_REX | X86_REX_W);
677 bit8 = imm_is_8bit(imm);
678 code = size == OP_SIZE_1 ? X86_ALU_RM8_IMM8 : bit8 ? X86_ALU_RM16_IMM8 : X86_ALU_RM16_IMM16;
679 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, false, alu, arg1));
681 if (bit8 || size == OP_SIZE_1) {
683 } else if (size == OP_SIZE_2) {
691 if (R_IS_XMM(arg1[0]) && size == OP_SIZE_16) {
694 case ALU_AND: code = X86_0F_ANDPS_X128_M128; break;
695 case ALU_ANDN: code = X86_0F_ANDNPS_X128_M128; break;
696 case ALU_OR: code = X86_0F_ORPS_X128_M128; break;
697 case ALU_XOR: code = X86_0F_XORPS_X128_M128; break;
698 default: internal(file_line, "invalid sse alu: %u", alu);
700 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, code, false, arg1[0], arg2[0], arg3));
704 if (!R_IS_GPR(arg1[0]) && unlikely(!R_IS_GPR(arg3[0])))
705 internal(file_line, "cgen_alu: two addresses not supported");
707 if (!R_IS_GPR(arg1[0])) {
708 uint8_t code = size == OP_SIZE_1 ? X86_ALU_RM8_R8 : X86_ALU_RM16_R16;
710 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg3[0], arg1));
713 uint8_t code = size == OP_SIZE_1 ? X86_ALU_R8_RM8 : X86_ALU_R16_RM16;
715 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg1[0], arg3));
720 static bool attr_w cgen_alu1(struct codegen_context *ctx, unsigned size, unsigned alu, unsigned writes_flags)
723 uint8_t *arg1 = ctx->code_position;
724 uint8_t *arg2 = arg1 + arg_size(*arg1);
725 ctx->code_position = arg2 + arg_size(*arg2);
726 if (alu == ALU1_NOT || alu == ALU1_NEG || alu == ALU1_INC || alu == ALU1_DEC || alu == ALU1_BSWAP) {
727 if (unlikely(arg1[0] != arg2[0]))
728 internal(file_line, "cgen_alu1: arguments mismatch");
732 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, X86_F6_NOT_RM8, arg1));
735 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, X86_F6_NEG_RM8, arg1));
738 if (writes_flags & 2) {
739 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ALU_RM8_IMM8 : X86_ALU_RM16_IMM8, size, false, ALU_ADD, arg1));
744 if (R_IS_GPR(arg1[0]) && size >= OP_SIZE_2) {
745 if (size == OP_SIZE_2)
746 cgen_one(X86_OP_SIZE);
747 cgen_one(X86_INC_R16 + arg1[0]);
751 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_FE : X86_FF, size, false, X86_FE_INC_RM8, arg1));
754 if (writes_flags & 2) {
755 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ALU_RM8_IMM8 : X86_ALU_RM16_IMM8, size, false, ALU_SUB, arg1));
760 if (R_IS_GPR(arg1[0]) && size >= OP_SIZE_2) {
761 if (size == OP_SIZE_2)
762 cgen_one(X86_OP_SIZE);
763 cgen_one(X86_DEC_R16 + arg1[0]);
767 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_FE : X86_FF, size, false, X86_FE_DEC_RM8, arg1));
770 if (unlikely(size <= OP_SIZE_2))
771 internal(file_line, "cgen_alu1: bytes or words not supported with this operation");
773 if (!R_IS_GPR(arg1[0]))
774 internal(file_line, "cgen_alu1: bswap needs a register");
777 if (size == OP_SIZE_8)
782 cgen_one(X86_0F_BSWAP + (arg1[0] & 7));
788 if (unlikely(size == OP_SIZE_1))
789 internal(file_line, "cgen_alu1: bytes not supported with this operation");
790 if (alu == ALU1_POPCNT || alu == ALU1_LZCNT)
792 g(cgen_rm_insn(ctx, -1, PREFIX_0F, alu == ALU1_BSF ? X86_0F_BSF_R16_RM16 : alu == ALU1_BSR || alu == ALU1_LZCNT ? X86_0F_BSR_R16_RM16 : X86_0F_POPCNT_R16_RM16, size, true, arg1[0], arg2));
795 internal(file_line, "cgen_alu1: invalid operation %u", alu);
800 static bool attr_w cgen_test(struct codegen_context *ctx, unsigned size)
802 uint8_t *arg1, *arg2;
803 arg1 = ctx->code_position;
804 arg2 = arg1 + arg_size(*arg1);
805 ctx->code_position = arg2 + arg_size(*arg2);
807 if (arg2[0] == ARG_IMM) {
809 imm = get_imm(&arg2[1]);
810 if (arg1[0] == R_AX) {
811 if (size == OP_SIZE_1) {
812 cgen_one(X86_TEST_AL_IMM8);
813 } else if (size == OP_SIZE_2) {
814 cgen_one(X86_OP_SIZE);
815 cgen_one(X86_TEST_AX_IMM16);
816 } else if (size == OP_SIZE_4) {
817 cgen_one(X86_TEST_AX_IMM16);
819 if (unlikely(!imm_is_32bit(imm)))
820 internal(file_line, "cgen_Test: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
821 cgen_rex(X86_REX | X86_REX_W);
822 cgen_one(X86_TEST_AX_IMM16);
825 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, X86_F6_TEST_RM8_IMM8, arg1));
827 if (size == OP_SIZE_1) {
829 } else if (size == OP_SIZE_2) {
831 } else if (size == OP_SIZE_4) {
834 if (unlikely(!imm_is_32bit(imm)))
835 internal(file_line, "cgen_Test: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
841 if (!R_IS_GPR(arg1[0]) && unlikely(!R_IS_GPR(arg2[0])))
842 internal(file_line, "cgen_test: two addresses not supported");
844 if (!R_IS_GPR(arg1[0])) {
845 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_TEST_RM8_R8 : X86_TEST_RM16_R16, size, true, arg2[0], arg1));
847 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_TEST_RM8_R8 : X86_TEST_RM16_R16, size, true, arg1[0], arg2));
852 static bool attr_w cgen_lea3(struct codegen_context *ctx, unsigned size, unsigned shift)
855 uint8_t *arg1, *arg2, *arg3, *arg4;
857 arg1 = ctx->code_position;
858 arg2 = arg1 + arg_size(*arg1);
859 arg3 = arg2 + arg_size(*arg2);
860 arg4 = arg3 + arg_size(*arg3);
861 ctx->code_position = arg4 + arg_size(*arg4);
863 if (unlikely(!R_IS_GPR(arg1[0])) || unlikely(!R_IS_GPR(arg2[0])) || unlikely(!R_IS_GPR(arg3[0])) || unlikely(arg4[0] != ARG_IMM))
864 internal(file_line, "cgen_lea3: invalid arguments");
866 addr[0] = ARG_ADDRESS_2 + shift;
869 memcpy(&addr[3], &arg4[1], 8);
871 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_LEA_R16_RM16, size, true, arg1[0], addr));
876 static bool attr_w cgen_rot(struct codegen_context *ctx, unsigned size, uint8_t rot)
878 uint8_t *arg1 = ctx->code_position;
879 uint8_t *arg2 = arg1 + arg_size(*arg1);
880 uint8_t *arg3 = arg2 + arg_size(*arg2);
881 ctx->code_position = arg3 + arg_size(*arg3);
883 if (arg1[0] != arg2[0])
884 internal(file_line, "cgen_rot: invalid arguments: %x, %02x, %02x, %02x", rot, arg1[0], arg2[0], arg3[0]);
886 if (arg3[0] == R_CX) {
887 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ROT_RM8_CL : X86_ROT_RM16_CL, size, false, rot, arg1));
888 } else if (likely(arg3[0] == ARG_IMM)) {
890 imm = get_imm(&arg1[3]);
892 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ROT_RM8_1 : X86_ROT_RM16_1, size, false, rot, arg1));
894 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ROT_RM8_IMM8 : X86_ROT_RM16_IMM8, size, false, rot, arg1));
898 internal(file_line, "cgen_rot: invalid argument %02x", arg3[0]);
903 static bool attr_w cgen_btxt(struct codegen_context *ctx, unsigned size, uint8_t bt, uint8_t *arg1, uint8_t *arg2)
905 if (arg2[0] == ARG_IMM) {
906 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_BTX_RM16_IMM8, size, false, X86_0F_BTX_BT_RM16_IMM8 + bt, arg1));
909 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_BT_RM16_R16 + bt * 8, size, true, arg2[0], arg1));
914 static bool attr_w cgen_bt(struct codegen_context *ctx, unsigned size)
916 uint8_t *arg1 = ctx->code_position;
917 uint8_t *arg2 = arg1 + arg_size(*arg1);
918 ctx->code_position = arg2 + arg_size(*arg2);
920 return cgen_btxt(ctx, size, BTX_BT, arg1, arg2);
923 static bool attr_w cgen_btx(struct codegen_context *ctx, unsigned size, uint8_t bt)
925 uint8_t *arg1 = ctx->code_position;
926 uint8_t *arg2 = arg1 + arg_size(*arg1);
927 uint8_t *arg3 = arg2 + arg_size(*arg2);
928 ctx->code_position = arg3 + arg_size(*arg3);
930 if (arg1[0] != arg2[0])
931 internal(file_line, "cgen_btx: invalid arguments");
933 return cgen_btxt(ctx, size, bt, arg1, arg3);
936 static bool attr_w cgen_mul_l(struct codegen_context *ctx, unsigned size, bool sgn)
938 uint8_t *arg1, *arg2, *arg3, *arg4;
940 arg1 = ctx->code_position;
941 arg2 = arg1 + arg_size(*arg1);
942 arg3 = arg2 + arg_size(*arg2);
943 arg4 = arg3 + arg_size(*arg3);
944 ctx->code_position = arg4 + arg_size(*arg4);
945 reg_up = size == OP_SIZE_1 ? R_AX : R_DX;
946 if (unlikely(arg1[0] != R_AX) || unlikely(arg2[0] != reg_up) || unlikely(arg3[0] != R_AX) || unlikely(arg4[0] == ARG_IMM))
947 internal(file_line, "cgen_mul_l: invalid mul arguments");
949 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, !sgn ? X86_F6_MUL_RM8 : X86_F6_IMUL_RM8, arg4));
953 static bool attr_w cgen_div_l(struct codegen_context *ctx, unsigned size, bool sgn)
955 uint8_t *arg1, *arg2, *arg3, *arg4, *arg5;
957 arg1 = ctx->code_position;
958 arg2 = arg1 + arg_size(*arg1);
959 arg3 = arg2 + arg_size(*arg2);
960 arg4 = arg3 + arg_size(*arg3);
961 arg5 = arg4 + arg_size(*arg4);
962 ctx->code_position = arg5 + arg_size(*arg5);
963 reg_up = size == OP_SIZE_1 ? R_AX : R_DX;
964 if (unlikely(arg1[0] != R_AX) || unlikely(arg2[0] != reg_up) || unlikely(arg3[0] != R_AX) || unlikely(arg4[0] != reg_up) || unlikely(arg5[0] == ARG_IMM))
965 internal(file_line, "cgen_div_l: invalid div arguments");
967 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, !sgn ? X86_F6_DIV_RM8 : X86_F6_IDIV_RM8, arg5));
971 static bool attr_w cgen_cmov(struct codegen_context *ctx, unsigned size, unsigned cond)
973 uint8_t *arg1 = ctx->code_position;
974 uint8_t *arg2 = arg1 + arg_size(*arg1);
975 uint8_t *arg3 = arg2 + arg_size(*arg2);
976 ctx->code_position = arg3 + arg_size(*arg3);
977 if (unlikely(arg1[0] != arg2[0]))
978 internal(file_line, "cgen_cmov: invalid arguments");
979 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_CMOVCC_R16_RM16 + cond, size, true, arg1[0], arg3));
983 static bool attr_w cgen_memcpy(struct codegen_context *ctx)
985 int64_t disp_dest, disp_src;
986 uint8_t *arg1, *arg2, *arg3;
987 arg1 = ctx->code_position;
988 arg2 = arg1 + arg_size(*arg1);
989 arg3 = arg2 + arg_size(*arg2);
990 ctx->code_position = arg3 + arg_size(*arg3);
991 if (unlikely(arg1[0] != ARG_ADDRESS_1_POST_I) || unlikely(arg2[0] != ARG_ADDRESS_1_POST_I) || unlikely(arg3[0] != R_CX))
993 if (unlikely(arg1[1] != R_DI) || unlikely(arg2[1] != R_SI))
995 disp_dest = get_imm(&arg1[2]);
996 disp_src = get_imm(&arg2[2]);
997 if (unlikely(disp_dest != 0) || unlikely(disp_src != 0))
1001 cgen_one(X86_MOVSB);
1005 internal(file_line, "cgen_memcpy: invalid arguments %02x, %02x, %02x", *arg1, *arg2, *arg3);
1009 static bool attr_w cgen_memset(struct codegen_context *ctx)
1012 uint8_t *arg1, *arg2, *arg3;
1013 arg1 = ctx->code_position;
1014 arg2 = arg1 + arg_size(*arg1);
1015 arg3 = arg2 + arg_size(*arg2);
1016 ctx->code_position = arg3 + arg_size(*arg3);
1017 if (unlikely(arg1[0] != ARG_ADDRESS_1_POST_I) || unlikely(arg2[0] != R_CX) || unlikely(arg3[0] != R_AX))
1019 if (unlikely(arg1[1] != R_DI))
1021 disp_dest = get_imm(&arg1[2]);
1022 if (unlikely(disp_dest != 0))
1026 cgen_one(X86_STOSB);
1030 internal(file_line, "cgen_memset: invalid arguments %02x, %02x, %02x", *arg1, *arg2, *arg3);
1034 static bool attr_w cgen_sse_cmp(struct codegen_context *ctx, unsigned size)
1036 uint8_t *arg1 = ctx->code_position;
1037 uint8_t *arg2 = arg1 + arg_size(*arg1);
1038 ctx->code_position = arg2 + arg_size(*arg2);
1039 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_NONE: SSE_PREFIX_66, PREFIX_0F, X86_0F_UCOMISS_X128_RM32, false, arg1[0], 0, arg2));
1043 static bool attr_w cgen_sse_alu(struct codegen_context *ctx, unsigned size, unsigned alu)
1046 uint8_t *arg1 = ctx->code_position;
1047 uint8_t *arg2 = arg1 + arg_size(*arg1);
1048 uint8_t *arg3 = arg2 + arg_size(*arg2);
1049 ctx->code_position = arg3 + arg_size(*arg3);
1051 case FP_ALU_ADD: opcode = X86_0F_ADDPS_X128_M32; break;
1052 case FP_ALU_SUB: opcode = X86_0F_SUBPS_X128_M32; break;
1053 case FP_ALU_MUL: opcode = X86_0F_MULPS_X128_M32; break;
1054 case FP_ALU_DIV: opcode = X86_0F_DIVPS_X128_M32; break;
1055 default: internal(file_line, "cgen_sse_alu: invalid alu %u", alu);
1057 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, opcode, false, arg1[0], arg2[0], arg3));
1061 static bool attr_w cgen_sse_alu1(struct codegen_context *ctx, unsigned size, unsigned alu)
1064 unsigned sse_pfx, sse_op_map;
1065 uint8_t *arg1 = ctx->code_position;
1066 uint8_t *arg2 = arg1 + arg_size(*arg1);
1067 ctx->code_position = arg2 + arg_size(*arg2);
1069 case FP_ALU1_SQRT: if (size == OP_SIZE_4) {
1070 sse_pfx = SSE_PREFIX_F3;
1071 } else if (size == OP_SIZE_8) {
1072 sse_pfx = SSE_PREFIX_F2;
1076 sse_op_map = PREFIX_0F;
1077 opcode = X86_0F_SQRTPS_X128_M32;
1082 case FP_ALU1_TRUNC: sse_pfx = SSE_PREFIX_66;
1083 sse_op_map = PREFIX_0F_3A;
1084 if (size == OP_SIZE_4) {
1085 opcode = X86_0F_3A_ROUNDSS_X128_M32;
1086 } else if (size == OP_SIZE_8) {
1087 opcode = X86_0F_3A_ROUNDSD_X128_M64;
1093 default: internal(file_line, "cgen_sse_alu1: invalid alu %u, %u", alu, size);
1095 g(cgen_sse_insn(ctx, sse_pfx, sse_op_map, opcode, false, arg1[0], arg1[0], arg2));
1096 if (OP_IS_ROUND(alu))
1097 cgen_one(alu - FP_ALU1_ROUND);
1101 static bool attr_w cgen_sse_from_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
1103 uint8_t *arg1 = ctx->code_position;
1104 uint8_t *arg2 = arg1 + arg_size(*arg1);
1105 ctx->code_position = arg2 + arg_size(*arg2);
1106 g(cgen_sse_insn(ctx, fp_op_size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_CVTSI2SS_X128_RM32, int_op_size == OP_SIZE_8, arg1[0], R_XMM7, arg2));
1110 static bool attr_w cgen_sse_to_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
1112 uint8_t *arg1 = ctx->code_position;
1113 uint8_t *arg2 = arg1 + arg_size(*arg1);
1114 ctx->code_position = arg2 + arg_size(*arg2);
1115 g(cgen_sse_insn(ctx, fp_op_size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_CVTTSS2SI_X128_RM32, int_op_size == OP_SIZE_8, arg1[0], 0, arg2));
1119 static bool attr_w cgen_sse_cvt(struct codegen_context *ctx, unsigned from_op_size, unsigned to_op_size)
1121 uint8_t *arg1 = ctx->code_position;
1122 uint8_t *arg2 = arg1 + arg_size(*arg1);
1123 ctx->code_position = arg2 + arg_size(*arg2);
1124 if (from_op_size == OP_SIZE_2 && to_op_size == OP_SIZE_4) {
1125 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F_38, X86_0F_38_CVTPH2PS_X128_RM64, false, arg1[0], 0, arg2));
1127 } else if (from_op_size == OP_SIZE_4 && to_op_size == OP_SIZE_2) {
1128 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F_3A, X86_0F_3A_CVTPS2PH_RM64_X128, false, arg2[0], 0, arg1));
1132 internal(file_line, "cgen_sse_cvt: unsupported arguments %u, %u", from_op_size, to_op_size);
1136 static bool attr_w cgen_x87_fld(struct codegen_context *ctx, unsigned size)
1139 uint8_t *arg1 = ctx->code_position;
1140 ctx->code_position = arg1 + arg_size(*arg1);
1141 if (arg1[0] >= R_ST0 && arg1[0] <= R_ST7)
1145 c1 = X87_FLD_RM32; c2 = X87_FLD_RM32_X; break;
1147 c1 = X87_FLD_M64; c2 = X87_FLD_M64_X; break;
1149 c1 = X87_FLD_M80; c2 = X87_FLD_M80_X; break;
1151 internal(file_line, "cgen_x87_fld: invalid size %u", size);
1153 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1157 static bool attr_w cgen_x87_fild(struct codegen_context *ctx, unsigned size)
1160 uint8_t *arg1 = ctx->code_position;
1161 ctx->code_position = arg1 + arg_size(*arg1);
1164 c1 = X87_FILD_M16; c2 = X87_FILD_M16_X; break;
1166 c1 = X87_FILD_M32; c2 = X87_FILD_M32_X; break;
1168 c1 = X87_FILD_M64; c2 = X87_FILD_M64_X; break;
1170 internal(file_line, "cgen_x87_fild: invalid size %u", size);
1172 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1176 static bool attr_w cgen_x87_fstp(struct codegen_context *ctx, unsigned size)
1179 uint8_t *arg1 = ctx->code_position;
1180 ctx->code_position = arg1 + arg_size(*arg1);
1181 if (arg1[0] >= R_ST0 && arg1[0] <= R_ST7)
1185 c1 = X87_FSTP_M32; c2 = X87_FSTP_M32_X; break;
1187 c1 = X87_FSTP_RM64; c2 = X87_FSTP_RM64_X; break;
1189 c1 = X87_FSTP_M80; c2 = X87_FSTP_M80_X; break;
1191 internal(file_line, "cgen_x87_fstp: invalid size %u", size);
1193 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1197 static bool attr_w cgen_x87_fistp(struct codegen_context *ctx, unsigned size)
1200 uint8_t *arg1 = ctx->code_position;
1201 ctx->code_position = arg1 + arg_size(*arg1);
1204 c1 = X87_FISTP_M16; c2 = X87_FISTP_M16_X; break;
1206 c1 = X87_FISTP_M32; c2 = X87_FISTP_M32_X; break;
1208 c1 = X87_FISTP_M64; c2 = X87_FISTP_M64_X; break;
1210 internal(file_line, "cgen_x87_fistp: invalid size %u", size);
1212 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1216 static bool attr_w cgen_x87_fisttp(struct codegen_context *ctx, unsigned size)
1219 uint8_t *arg1 = ctx->code_position;
1220 ctx->code_position = arg1 + arg_size(*arg1);
1223 c1 = X87_FISTTP_M16; c2 = X87_FISTTP_M16_X; break;
1225 c1 = X87_FISTTP_M32; c2 = X87_FISTTP_M32_X; break;
1227 c1 = X87_FISTTP_M64; c2 = X87_FISTTP_M64_X; break;
1229 internal(file_line, "cgen_x87_fisttp: invalid size %u", size);
1231 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1235 static bool attr_w cgen_x87_fcomp(struct codegen_context *ctx, unsigned size)
1238 uint8_t *arg1 = ctx->code_position;
1239 ctx->code_position = arg1 + arg_size(*arg1);
1240 if (arg1[0] < ARG_REGS_MAX) {
1241 c1 = X87_FALU_ST_RM32;
1242 } else switch (size) {
1244 c1 = X87_FALU_ST_RM32; break;
1246 c1 = X87_FALU_ST_M64; break;
1248 internal(file_line, "cgen_x87_fcomp: invalid size %u", size);
1251 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1255 static bool attr_w cgen_x87_alu(struct codegen_context *ctx, unsigned size, unsigned aux)
1258 uint8_t *arg1 = ctx->code_position;
1259 ctx->code_position = arg1 + arg_size(*arg1);
1260 if (arg1[0] < ARG_REGS_MAX) {
1261 c1 = X87_FALU_ST_RM32;
1262 } else switch (size) {
1264 c1 = X87_FALU_ST_RM32; break;
1266 c1 = X87_FALU_ST_M64; break;
1268 internal(file_line, "cgen_x87_alu: invalid size %u", size);
1272 c2 = X87_ALU_ADD; break;
1274 c2 = X87_ALU_SUB; break;
1276 c2 = X87_ALU_MUL; break;
1278 c2 = X87_ALU_DIV; break;
1280 internal(file_line, "cgen_x87_fst: invalid operation %u", aux);
1282 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1286 static bool attr_w cgen_x87_alup(struct codegen_context *ctx, unsigned aux)
1289 uint8_t *arg1 = ctx->code_position;
1290 ctx->code_position = arg1 + arg_size(*arg1);
1293 c2 = X87_ALU_ADD; break;
1295 c2 = X87_ALU_SUB; break;
1297 c2 = X87_ALU_MUL; break;
1299 c2 = X87_ALU_DIV; break;
1301 internal(file_line, "cgen_x87_fstp: invalid operation %u", aux);
1303 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X87_FALUP_STi_ST0, OP_SIZE_4, false, c2, arg1));
1307 static bool attr_w resolve_relocation(struct codegen_context *ctx, struct relocation *reloc)
1309 int64_t offs = (int64_t)ctx->label_to_pos[reloc->label_id] - (int64_t)(reloc->position + (reloc->length == JMP_SHORT ? 1 : 4));
1310 switch (reloc->length) {
1313 if (!imm_is_8bit(offs))
1316 memcpy(ctx->mcode + reloc->position, &i8, 1);
1321 if (!imm_is_32bit(offs))
1324 memcpy(ctx->mcode + reloc->position, &i32, 4);
1328 internal(file_line, "resolve_relocation: invalid relocation length %u", reloc->length);
1334 static bool attr_w cgen_insn(struct codegen_context *ctx, uint32_t insn)
1338 /*debug("insn: %08x", insn);*/
1339 switch (insn_opcode(insn)) {
1350 imm16 = cget_two(ctx);
1351 cgen_one(X86_RET_IMM16);
1360 case INSN_CALL_INDIRECT:
1361 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_FF, OP_SIZE_4, false, X86_FF_CALL_INDIRECT, ctx->code_position));
1362 ctx->code_position += arg_size(*ctx->code_position);
1365 g(cgen_mov(ctx, insn_op_size(insn)));
1368 g(cgen_movsx(ctx, insn_op_size(insn)));
1371 g(cgen_alu(ctx, insn_op_size(insn), 7));
1374 g(cgen_test(ctx, insn_op_size(insn)));
1377 case INSN_ALU_FLAGS:
1378 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1380 if (!insn_writes_flags(insn) && insn_op_size(insn) <= OP_SIZE_8) {
1381 if (unlikely(insn_aux(insn) != ALU_ADD))
1383 g(cgen_lea(ctx, insn_op_size(insn)));
1386 g(cgen_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1388 case INSN_ALU_PARTIAL:
1389 case INSN_ALU_FLAGS_PARTIAL:
1390 if (unlikely(insn_op_size(insn) >= OP_SIZE_4))
1392 g(cgen_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1395 case INSN_ALU1_FLAGS:
1396 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1398 g(cgen_alu1(ctx, insn_op_size(insn), insn_aux(insn), insn_writes_flags(insn)));
1400 case INSN_ALU1_PARTIAL:
1401 case INSN_ALU1_FLAGS_PARTIAL:
1402 if (unlikely(insn_op_size(insn) >= OP_SIZE_4))
1404 g(cgen_alu1(ctx, insn_op_size(insn), insn_aux(insn), insn_writes_flags(insn)));
1407 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1409 g(cgen_lea3(ctx, insn_op_size(insn), insn_aux(insn)));
1412 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1414 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn)));
1416 case INSN_ROT_PARTIAL:
1417 if (unlikely(insn_op_size(insn) >= OP_SIZE_4))
1419 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn)));
1422 if (unlikely(insn_op_size(insn) == OP_SIZE_1) || unlikely(!insn_writes_flags(insn)))
1424 g(cgen_bt(ctx, insn_op_size(insn)));
1427 if (unlikely(insn_op_size(insn) == OP_SIZE_1) || unlikely(!insn_writes_flags(insn)))
1429 g(cgen_btx(ctx, insn_op_size(insn), insn_aux(insn)));
1432 g(cgen_mul_l(ctx, insn_op_size(insn), insn_aux(insn)));
1435 g(cgen_div_l(ctx, insn_op_size(insn), insn_aux(insn)));
1438 if (unlikely(insn_op_size(insn) <= OP_SIZE_2))
1440 if (insn_op_size(insn) == OP_SIZE_8)
1441 cgen_rex(X86_REX | X86_REX_W);
1442 if (unlikely(cget_one(ctx) != R_AX))
1444 if (unlikely(cget_one(ctx) != R_AX))
1448 case INSN_CBW_PARTIAL:
1449 if (unlikely(insn_op_size(insn) != OP_SIZE_2))
1451 if (unlikely(cget_one(ctx) != R_AX))
1453 if (unlikely(cget_one(ctx) != R_AX))
1455 cgen_one(X86_OP_SIZE);
1459 if (unlikely(insn_op_size(insn) <= OP_SIZE_2))
1461 if (unlikely(cget_one(ctx) != R_DX))
1463 if (unlikely(cget_one(ctx) != R_AX))
1465 if (insn_op_size(insn) == OP_SIZE_8)
1466 cgen_rex(X86_REX | X86_REX_W);
1469 case INSN_CWD_PARTIAL:
1470 if (unlikely(insn_op_size(insn) != OP_SIZE_2))
1472 if (unlikely(cget_one(ctx) != R_DX))
1474 if (unlikely(cget_one(ctx) != R_AX))
1476 if (unlikely(cget_one(ctx) != R_DX))
1478 cgen_one(X86_OP_SIZE);
1482 if (unlikely(insn_op_size(insn) != OP_SIZE_1))
1484 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_SETCC_RM8 + (insn_aux(insn) & 0xf), OP_SIZE_1, false, 0, ctx->code_position));
1485 ctx->code_position += arg_size(*ctx->code_position);
1487 case INSN_SET_COND_PARTIAL:
1488 if (unlikely(insn_op_size(insn) != OP_SIZE_1))
1490 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_SETCC_RM8 + (insn_aux(insn) & 0xf), OP_SIZE_1, false, 0, ctx->code_position));
1491 ctx->code_position += arg_size(*ctx->code_position);
1492 ctx->code_position += arg_size(*ctx->code_position);
1496 if (unlikely(insn_op_size(insn) == OP_SIZE_1))
1498 g(cgen_cmov(ctx, insn_op_size(insn), insn_aux(insn)));
1501 g(cgen_memcpy(ctx));
1504 g(cgen_memset(ctx));
1507 g(cgen_sse_cmp(ctx, insn_op_size(insn)));
1510 g(cgen_sse_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1513 g(cgen_sse_alu1(ctx, insn_op_size(insn), insn_aux(insn)));
1515 case INSN_FP_FROM_INT32:
1516 case INSN_FP_FROM_INT64:
1517 g(cgen_sse_from_int(ctx, insn_opcode(insn) == INSN_FP_FROM_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1519 case INSN_FP_TO_INT32:
1520 case INSN_FP_TO_INT64:
1521 g(cgen_sse_to_int(ctx, insn_opcode(insn) == INSN_FP_TO_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1524 g(cgen_sse_cvt(ctx, insn_op_size(insn), insn_aux(insn)));
1527 g(cgen_x87_fld(ctx, insn_op_size(insn)));
1530 g(cgen_x87_fild(ctx, insn_op_size(insn)));
1533 g(cgen_x87_fstp(ctx, insn_op_size(insn)));
1535 case INSN_X87_FISTP:
1536 g(cgen_x87_fistp(ctx, insn_op_size(insn)));
1538 case INSN_X87_FISTTP:
1539 g(cgen_x87_fisttp(ctx, insn_op_size(insn)));
1541 case INSN_X87_FCOMP:
1542 g(cgen_x87_fcomp(ctx, insn_op_size(insn)));
1544 case INSN_X87_FCOMPP:
1545 cgen_one(X87_FCOMPP);
1546 cgen_one(X87_FCOMPP_2);
1548 case INSN_X87_FCOMIP:
1549 imm8 = cget_one(ctx);
1550 cgen_one(X87_FCOMIP);
1551 cgen_one(X87_FCOMIP_2 + (imm8 & 7));
1554 g(cgen_x87_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1557 g(cgen_x87_alup(ctx, insn_aux(insn)));
1561 cgen_one(X87_FCHS_2);
1563 case INSN_X87_FSQRT:
1564 cgen_one(X87_FSQRT);
1565 cgen_one(X87_FSQRT_2);
1567 case INSN_X87_FRNDINT:
1568 cgen_one(X87_FRNDINT);
1569 cgen_one(X87_FRNDINT_2);
1571 case INSN_X87_FNSTSW:
1572 if (unlikely(cget_one(ctx) != R_AX))
1574 if (unlikely(cget_one(ctx) != R_AX))
1576 cgen_one(X87_FNSTSW);
1577 cgen_one(X87_FNSTSW_2);
1579 case INSN_X87_FLDCW:
1580 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X87_FLDCW, OP_SIZE_4, false, X87_FLDCW_X, ctx->code_position));
1581 ctx->code_position += arg_size(*ctx->code_position);
1584 if (insn_jump_size(insn) == JMP_SHORT || insn_jump_size(insn) == JMP_SHORTEST) {
1585 cgen_one(X86_JMP_8);
1586 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
1588 } else if (likely(insn_jump_size(insn) == JMP_LONG)) {
1589 cgen_one(X86_JMP_16);
1590 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1597 if (insn_jump_size(insn) == JMP_SHORT || insn_jump_size(insn) == JMP_SHORTEST) {
1598 cgen_one(X86_JCC_8 + (insn_aux(insn) & 0xf));
1599 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
1601 } else if (likely(insn_jump_size(insn) == JMP_LONG)) {
1603 cgen_one(X86_0F_JCC_16 + (insn_aux(insn) & 0xf));
1604 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1610 case INSN_JMP_INDIRECT:
1611 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_FF, OP_SIZE_4, false, X86_FF_JMP_INDIRECT, ctx->code_position));
1612 ctx->code_position += arg_size(*ctx->code_position);
1616 internal(file_line, "cgen_insn: invalid insn %08lx", (unsigned long)insn);