2 * Copyright (C) 2024 Mikulas Patocka
4 * This file is part of Ajla.
6 * Ajla is free software: you can redistribute it and/or modify it under the
7 * terms of the GNU General Public License as published by the Free Software
8 * Foundation, either version 3 of the License, or (at your option) any later
11 * Ajla is distributed in the hope that it will be useful, but WITHOUT ANY
12 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
13 * A PARTICULAR PURPOSE. See the GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along with
16 * Ajla. If not, see <https://www.gnu.org/licenses/>.
19 #define X86_ALU_RM8_R8 0x00
20 #define X86_ALU_RM16_R16 0x01
21 #define X86_ALU_R8_RM8 0x02
22 #define X86_ALU_R16_RM16 0x03
23 #define X86_ALU_AL_IMM8 0x04
24 #define X86_ALU_AX_IMM16 0x05
31 #define X86_REX_B 0x01
32 #define X86_REX_X 0x02
33 #define X86_REX_R 0x04
34 #define X86_REX_W 0x08
35 #define X86_INC_R16 0x40
36 #define X86_DEC_R16 0x48
37 #define X86_PUSH_R16 0x50
38 #define X86_POP_R16 0x58
39 #define X86_MOVSXD 0x63
42 #define X86_OP_SIZE 0x66
43 #define X86_PUSH_IMM16 0x68
44 #define X86_IMUL_R16_RM16_IMM16 0x69
45 #define X86_PUSH_IMM8 0x6a
46 #define X86_IMUL_R16_RM16_IMM8 0x6b
47 #define X86_JCC_8 0x70
48 #define X86_ALU_RM8_IMM8 0x80
49 #define X86_ALU_RM16_IMM16 0x81
50 #define X86_ALU_RM16_IMM8 0x83
51 #define X86_TEST_RM8_R8 0x84
52 #define X86_TEST_RM16_R16 0x85
53 #define X86_MOV_RM8_R8 0x88
54 #define X86_MOV_RM16_R16 0x89
55 #define X86_MOV_R8_RM8 0x8a
56 #define X86_MOV_R16_RM16 0x8b
57 #define X86_LEA_R16_RM16 0x8d
60 #define X86_MOV_AL_M16 0xa0
61 #define X86_MOV_AX_M16 0xa1
62 #define X86_MOV_M16_AL 0xa2
63 #define X86_MOV_M16_AX 0xa3
64 #define X86_MOVSB 0xa4
65 #define X86_TEST_AL_IMM8 0xa8
66 #define X86_TEST_AX_IMM16 0xa9
67 #define X86_STOSB 0xaa
68 #define X86_MOV_R16_IMM16 0xb8
69 #define X86_ROT_RM8_IMM8 0xc0
70 #define X86_ROT_RM16_IMM8 0xc1
71 #define X86_RET_IMM16 0xc2
73 #define X86_VEX_3 0xc4
74 #define X86_VEX_2 0xc5
75 #define X86_MOV_RM8_IMM8 0xc6
76 #define X86_MOV_RM16_IMM16 0xc7
77 #define X86_MOV_R16_IMM16_REG 0x0
79 #define X86_8F_POP 0x0
80 #define X86_ROT_RM8_1 0xd0
81 #define X86_ROT_RM16_1 0xd1
82 #define X86_ROT_RM8_CL 0xd2
83 #define X86_ROT_RM16_CL 0xd3
84 #define X86_JMP_16 0xe9
85 #define X86_JMP_8 0xeb
86 #define X86_REPNE 0xf2
89 #define X86_F6_TEST_RM8_IMM8 0x0
90 #define X86_F6_NOT_RM8 0x2
91 #define X86_F6_NEG_RM8 0x3
92 #define X86_F6_MUL_RM8 0x4
93 #define X86_F6_IMUL_RM8 0x5
94 #define X86_F6_DIV_RM8 0x6
95 #define X86_F6_IDIV_RM8 0x7
97 #define X86_F7_TEST_RM16_IMM16 0x0
98 #define X86_F7_NOT_RM16 0x2
99 #define X86_F7_NEG_RM16 0x3
100 #define X86_F7_MUL_RM16 0x4
101 #define X86_F7_IMUL_RM16 0x5
102 #define X86_F7_DIV_RM16 0x6
103 #define X86_F7_IDIV_RM16 0x7
105 #define X86_FE_INC_RM8 0x0
106 #define X86_FE_DEC_RM8 0x1
108 #define X86_FF_INC_RM16 0x0
109 #define X86_FF_DEC_RM16 0x1
110 #define X86_FF_CALL_INDIRECT 0x2
111 #define X86_FF_JMP_INDIRECT 0x4
112 #define X86_FF_PUSH 0x6
114 #define X86_0F_MOVSS_X128_M32 0x10
115 #define X86_0F_MOVSS_M32_X128 0x11
116 #define X86_0F_MOVAPS_X128_M128 0x28
117 #define X86_0F_MOVAPS_M128_X128 0x29
118 #define X86_0F_CVTSI2SS_X128_RM32 0x2a
119 #define X86_0F_CVTTSS2SI_X128_RM32 0x2c
120 #define X86_0F_UCOMISS_X128_RM32 0x2e
121 #define X86_0F_38 0x38
122 #define X86_0F_3A 0x3a
123 #define X86_0F_CMOVCC_R16_RM16 0x40
124 #define X86_0F_SQRTPS_X128_M32 0x51
125 #define X86_0F_ANDPS_X128_M128 0x54
126 #define X86_0F_ANDNPS_X128_M128 0x55
127 #define X86_0F_ORPS_X128_M128 0x56
128 #define X86_0F_XORPS_X128_M128 0x57
129 #define X86_0F_ADDPS_X128_M32 0x58
130 #define X86_0F_MULPS_X128_M32 0x59
131 #define X86_0F_SUBPS_X128_M32 0x5c
132 #define X86_0F_DIVPS_X128_M32 0x5e
133 #define X86_0F_MOVD_X128_RM32 0x6e
134 #define X86_0F_MOVD_RM32_X128 0x7e
135 #define X86_0F_JCC_16 0x80
136 #define X86_0F_SETCC_RM8 0x90
137 #define X86_0F_BT_RM16_R16 0xa3
138 #define X86_0F_BTS_RM16_R16 0xab
139 #define X86_0F_BTR_RM16_R16 0xb3
140 #define X86_0F_BTX_RM16_IMM8 0xba
141 #define X86_0F_BTX_BT_RM16_IMM8 0x4
142 #define X86_0F_BTX_BTS_RM16_IMM8 0x5
143 #define X86_0F_BTX_BTR_RM16_IMM8 0x6
144 #define X86_0F_BTX_BTC_RM16_IMM8 0x7
145 #define X86_0F_BSWAP 0xc8
146 #define X86_0F_BTC_RM16_R16 0xbb
147 #define X86_0F_IMUL_R16_RM16 0xaf
148 #define X86_0F_MOVZX_R16_RM8 0xb6
149 #define X86_0F_MOVZX_R16_RM16 0xb7
150 #define X86_0F_POPCNT_R16_RM16 0xb8
151 #define X86_0F_BSF_R16_RM16 0xbc
152 #define X86_0F_BSR_R16_RM16 0xbd
153 #define X86_0F_MOVSX_R16_RM8 0xbe
154 #define X86_0F_MOVSX_R16_RM16 0xbf
155 #define X86_0F_PINSRW_X128_RM16_IMM8 0xc4
157 #define X86_0F_38_CVTPH2PS_X128_RM64 0x13
158 #define X86_0F_38_ROTX 0xf7
160 #define X86_0F_3A_ROUNDSS_X128_M32 0x0a
161 #define X86_0F_3A_ROUNDSD_X128_M64 0x0b
162 #define X86_0F_3A_PEXTRW_RM16_X128_IMM8 0x15
163 #define X86_0F_3A_CVTPS2PH_RM64_X128 0x1d
165 #define X87_FLD_RM32 0xd9
166 #define X87_FLD_RM32_X 0x0
167 #define X87_FLDCW 0xd9
168 #define X87_FLDCW_X 0x5
169 #define X87_FILD_M32 0xdb
170 #define X87_FILD_M32_X 0x0
171 #define X87_FISTTP_M32 0xdb
172 #define X87_FISTTP_M32_X 0x1
173 #define X87_FISTP_M32 0xdb
174 #define X87_FISTP_M32_X 0x3
175 #define X87_FLD_M80 0xdb
176 #define X87_FLD_M80_X 0x5
177 #define X87_FLD_M64 0xdd
178 #define X87_FLD_M64_X 0x0
179 #define X87_FSTP_M32 0xd9
180 #define X87_FSTP_M32_X 0x3
181 #define X87_FSTP_M80 0xdb
182 #define X87_FSTP_M80_X 0x7
183 #define X87_FSTP_RM64 0xdd
184 #define X87_FSTP_RM64_X 0x3
185 #define X87_FALU_ST_RM32 0xd8
186 #define X87_FCHS 0xd9
187 #define X87_FCHS_2 0xe0
188 #define X87_FSQRT 0xd9
189 #define X87_FSQRT_2 0xfa
190 #define X87_FRNDINT 0xd9
191 #define X87_FRNDINT_2 0xfc
192 #define X87_FALU_ST_M64 0xdc
193 #define X87_FALU_STi_ST 0xdc
194 #define X87_FISTTP_M64 0xdd
195 #define X87_FISTTP_M64_X 0x1
196 #define X87_FALUP_STi_ST0 0xde
197 #define X87_ALU_ADD 0x0
198 #define X87_ALU_MUL 0x1
199 #define X87_ALU_FCOM 0x2
200 #define X87_ALU_FCOMP 0x3
201 #define X87_ALU_SUBR 0x4
202 #define X87_ALU_SUB 0x5
203 #define X87_ALU_DIVR 0x6
204 #define X87_ALU_DIV 0x7
205 #define X87_FCOMPP 0xde
206 #define X87_FCOMPP_2 0xd9
207 #define X87_FILD_M16 0xdf
208 #define X87_FILD_M16_X 0x0
209 #define X87_FISTTP_M16 0xdf
210 #define X87_FISTTP_M16_X 0x1
211 #define X87_FISTP_M16 0xdf
212 #define X87_FISTP_M16_X 0x3
213 #define X87_FILD_M64 0xdf
214 #define X87_FILD_M64_X 0x5
215 #define X87_FISTP_M64 0xdf
216 #define X87_FISTP_M64_X 0x7
217 #define X87_FNSTSW 0xdf
218 #define X87_FNSTSW_2 0xe0
219 #define X87_FCOMIP 0xdf
220 #define X87_FCOMIP_2 0xf0
222 #define SSE_PREFIX_NONE 0
223 #define SSE_PREFIX_66 1
224 #define SSE_PREFIX_F3 2
225 #define SSE_PREFIX_F2 3
227 #define PREFIX_NONE 0
229 #define PREFIX_0F_38 2
230 #define PREFIX_0F_3A 3
233 #define cgen_rex(rex) internal(file_line, "cgen_rex: attempting to generate rex in 32-bit mode: %02x", rex)
235 #define cgen_rex(rex) cgen_one(rex)
238 #define force_vex 0x10000
240 static bool attr_w cgen_rm_insn(struct codegen_context *ctx, int32_t sse_prefix, uint8_t prefix, uint8_t opcode, unsigned size, bool reg_is_reg, uint8_t reg, uint8_t *arg)
242 uint8_t rex, mod, rm;
244 int64_t imm = 0; /* avoid warning */
249 if (unlikely(R_IS_XMM(reg)))
251 if (unlikely(R_IS_XMM(arg[0]))) {
252 arg_reg = arg[0] - R_XMM0;
255 if (unlikely(!R_IS_GPR(reg)))
256 internal(file_line, "cgen_rm_insn: invalid register %02x", reg);
260 if (size == OP_SIZE_8)
271 uint8_t *imm_ptr = arg + arg_size(arg[0]) - 8;
272 imm = get_imm(imm_ptr);
273 if (unlikely(!imm_is_32bit(imm)))
274 internal(file_line, "cgen_rm_insn: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
275 if (arg[0] == ARG_ADDRESS_0) {
285 } else if (imm >= -0x80 && imm <= 0x7f) {
290 if ((arg[1] & 7) == 0x5 && addr_size == 0)
294 else if (addr_size == 1)
298 if (arg[0] == ARG_ADDRESS_1) {
299 if (reg_is_segment(arg[1])) {
300 static const uint8_t segments[6] = { X86_ES, X86_CS, X86_SS, X86_DS, X86_FS, X86_GS };
301 cgen_one(segments[arg[1] - R_ES]);
306 if ((arg[1] & 7) == 0x4) {
314 if (arg[0] >= ARG_ADDRESS_1_2 && arg[0] <= ARG_ADDRESS_1_8) {
315 if (unlikely(arg[1] == R_SP))
316 internal(file_line, "cgen_rm_insn: attemptint to scale SP");
322 sib = ((arg[0] - ARG_ADDRESS_1) << 6) | ((arg[1] & 7) << 3) | 0x5;
325 if (arg[0] >= ARG_ADDRESS_2 && arg[0] <= ARG_ADDRESS_2_8) {
326 if (unlikely(arg[2] == R_SP))
327 internal(file_line, "cgen_rm_insn: attemptint to scale SP");
333 sib = ((arg[0] - ARG_ADDRESS_2) << 6) | ((arg[2] & 7) << 3) | (arg[1] & 7);
336 internal(file_line, "cgen_rm_insn: invalid argument %02x", arg[0]);
340 if (unlikely(sse_prefix >= 0)) {
341 if (likely(cpu_test_feature(CPU_FEATURE_avx)) || (sse_prefix & force_vex)) {
342 if ((rex & (X86_REX_X | X86_REX_B | X86_REX_W)) == 0 && prefix == PREFIX_0F) {
344 cgen_one((~rex & X86_REX_R) << 5 | (~(sse_prefix >> 8) & 0xf) << 3 | (sse_prefix & 3));
347 cgen_one((~rex & (X86_REX_R | X86_REX_X | X86_REX_B)) << 5 | prefix);
348 cgen_one((rex & X86_REX_W) << 4 | (~(sse_prefix >> 8) & 0xf) << 3 | (sse_prefix & 3));
352 switch (sse_prefix & 3) {
353 case SSE_PREFIX_66: cgen_one(X86_OP_SIZE); break;
354 case SSE_PREFIX_F3: cgen_one(X86_REPE); break;
355 case SSE_PREFIX_F2: cgen_one(X86_REPNE); break;
358 if (size == OP_SIZE_2)
359 cgen_one(X86_OP_SIZE);
361 need_rex = rex != X86_REX;
362 need_rex |= size == OP_SIZE_1 && ((reg_is_reg && !reg_is_fp(reg) && reg >= 4) || (mod == 0xc0 && !reg_is_fp(arg[0]) && arg[0] >= 4));
363 if (prefix == PREFIX_0F && (opcode == X86_0F_MOVZX_R16_RM8 || opcode == X86_0F_MOVSX_R16_RM8)) {
364 need_rex |= mod == 0xc0 && arg[0] >= 4;
384 internal(file_line, "cgen_rm_insn: invalid prefix %u", prefix);
388 cgen_one(mod | ((reg & 7) << 3) | (rm & 7));
402 static bool attr_w cgen_sse_insn(struct codegen_context *ctx, unsigned sse_prefix, unsigned sse_op_map, uint8_t opcode, bool wide, uint8_t reg, uint8_t reg2, uint8_t *arg)
404 g(cgen_rm_insn(ctx, sse_prefix + (reg2 << 8), sse_op_map, opcode, !wide ? OP_SIZE_4 : OP_SIZE_8, true, reg, arg));
408 static bool attr_w cgen_push(struct codegen_context *ctx)
410 uint8_t *arg1 = ctx->code_position;
411 ctx->code_position += arg_size(*arg1);
412 if (likely(R_IS_GPR(arg1[0]))) {
414 cgen_rex(X86_REX | X86_REX_B);
415 cgen_one(X86_PUSH_R16 + (arg1[0] & 7));
418 if (arg1[0] == ARG_IMM) {
420 imm = get_imm(&arg1[1]);
421 if (unlikely(!imm_is_32bit(imm)))
422 internal(file_line, "cgen_push: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
423 if (imm >= -0x80 && imm <= 0x7f) {
424 cgen_one(X86_PUSH_IMM8);
428 cgen_one(X86_PUSH_IMM16);
433 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_FF, OP_SIZE_4, false, X86_FF_PUSH, arg1));
437 static bool attr_w cgen_pop(struct codegen_context *ctx)
439 uint8_t *arg1 = ctx->code_position;
440 ctx->code_position += arg_size(*arg1);
441 if (likely(R_IS_GPR(arg1[0]))) {
443 cgen_rex(X86_REX | X86_REX_B);
444 cgen_one(X86_POP_R16 + (arg1[0] & 7));
447 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_8F, OP_SIZE_4, false, X86_8F_POP, arg1));
451 static bool attr_w cgen_mov(struct codegen_context *ctx, unsigned size)
453 uint8_t *arg1 = ctx->code_position;
454 uint8_t *arg2 = arg1 + arg_size(*arg1);
455 ctx->code_position = arg2 + arg_size(*arg2);
456 if (arg2[0] == ARG_IMM) {
459 imm = get_imm(&arg2[1]);
460 if (R_IS_GPR(arg1[0])) {
464 if (imm >= 0 && imm < 0x100000000LL)
469 cgen_one(X86_MOV_R16_IMM16 + (arg1[0] & 7));
473 if (imm >= ~(int64_t)0x7fffffff && imm < 0) {
478 cgen_one(X86_MOV_R16_IMM16 + (arg1[0] & 7));
482 if (size < OP_SIZE_4) {
483 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_MOV_RM8_IMM8 : X86_MOV_RM16_IMM16, size, false, X86_MOV_R16_IMM16_REG, arg1));
484 if (size == OP_SIZE_1)
491 if (unlikely(!imm_is_32bit(imm)))
492 internal(file_line, "cgen_mov: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
493 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_MOV_RM16_IMM16, maximum(size, OP_SIZE_4), false, X86_MOV_R16_IMM16_REG, arg1));
498 if (arg1[0] == R_AX && size >= OP_SIZE_4 && arg2[0] == ARG_ADDRESS_0) {
500 imm = get_imm(&arg2[1]);
501 if (size == OP_SIZE_8)
502 cgen_rex(X86_REX | X86_REX_W);
503 cgen_one(X86_MOV_AX_M16);
507 if (arg1[0] == ARG_ADDRESS_0 && arg2[0] == R_AX) {
508 uint8_t code = size == OP_SIZE_1 ? X86_MOV_M16_AL : X86_MOV_M16_AX;
510 imm = get_imm(&arg1[1]);
511 if (size == OP_SIZE_2)
512 cgen_one(X86_OP_SIZE);
513 if (size == OP_SIZE_8)
514 cgen_rex(X86_REX | X86_REX_W);
519 if (R_IS_XMM(arg1[0]) && ARG_IS_ADDRESS(arg2[0])) {
520 if (size == OP_SIZE_2) {
521 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F, X86_0F_PINSRW_X128_RM16_IMM8, false, arg1[0], R_XMM7, arg2));
525 if (size == OP_SIZE_16) {
526 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, X86_0F_MOVAPS_X128_M128, false, arg1[0], 0, arg2));
529 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_MOVSS_X128_M32, false, arg1[0], 0, arg2));
532 if (ARG_IS_ADDRESS(arg1[0]) && R_IS_XMM(arg2[0])) {
533 if (size == OP_SIZE_2) {
534 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F_3A, X86_0F_3A_PEXTRW_RM16_X128_IMM8, false, arg2[0], 0, arg1));
538 if (size == OP_SIZE_16) {
539 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, X86_0F_MOVAPS_M128_X128, false, arg2[0], 0, arg1));
542 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_MOVSS_M32_X128, false, arg2[0], 0, arg1));
545 if (R_IS_XMM(arg1[0]) && R_IS_XMM(arg2[0])) {
546 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, X86_0F_MOVAPS_X128_M128, false, arg1[0], 0, arg2));
549 if (R_IS_XMM(arg1[0]) && R_IS_GPR(arg2[0])) {
550 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F, X86_0F_MOVD_X128_RM32, size == OP_SIZE_8, arg1[0], 0, arg2));
553 if (R_IS_GPR(arg1[0]) && R_IS_XMM(arg2[0])) {
554 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F, X86_0F_MOVD_RM32_X128, size == OP_SIZE_8, arg2[0], 0, arg1));
557 if (!R_IS_GPR(arg1[0]) && unlikely(!R_IS_GPR(arg2[0]))) {
558 /*debug("%s", da(ctx->fn,function)->function_name);*/
559 internal(file_line, "cgen_mov: two addresses not supported");
561 if (!R_IS_GPR(arg1[0])) {
562 uint8_t code = size == OP_SIZE_1 ? X86_MOV_RM8_R8 : X86_MOV_RM16_R16;
563 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg2[0], arg1));
565 } else if (size >= OP_SIZE_4) {
566 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_MOV_R16_RM16, size, true, arg1[0], arg2));
569 uint8_t code = size == OP_SIZE_1 ? X86_0F_MOVZX_R16_RM8 : X86_0F_MOVZX_R16_RM16;
570 g(cgen_rm_insn(ctx, -1, PREFIX_0F, code, OP_SIZE_4, false, arg1[0], arg2));
575 static bool attr_w cgen_movsx(struct codegen_context *ctx, unsigned size)
577 uint8_t *arg1, *arg2;
578 if (unlikely(size == OP_SIZE_NATIVE)) {
579 g(cgen_mov(ctx, size));
582 arg1 = ctx->code_position;
583 arg2 = arg1 + arg_size(*arg1);
584 ctx->code_position = arg2 + arg_size(*arg2);
585 if (size <= OP_SIZE_2) {
586 g(cgen_rm_insn(ctx, -1, PREFIX_0F, size == OP_SIZE_1 ? X86_0F_MOVSX_R16_RM8 : X86_0F_MOVSX_R16_RM16, OP_SIZE_NATIVE, true, arg1[0], arg2));
588 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_MOVSXD, OP_SIZE_NATIVE, true, arg1[0], arg2));
593 static bool attr_w cgen_lea(struct codegen_context *ctx, unsigned size)
596 uint8_t *arg1, *arg2, *arg3;
598 arg1 = ctx->code_position;
599 arg2 = arg1 + arg_size(*arg1);
600 arg3 = arg2 + arg_size(*arg2);
601 ctx->code_position = arg3 + arg_size(*arg3);
603 if (arg3[0] == ARG_IMM) {
604 if (arg2[0] == ARG_SHIFTED_REGISTER) {
605 if (unlikely((arg2[1] & ARG_SHIFT_MODE) != ARG_SHIFT_LSL) ||
606 unlikely((arg2[1] & ARG_SHIFT_AMOUNT) > 3))
608 addr[0] = ARG_ADDRESS_1 + (arg2[1] & ARG_SHIFT_AMOUNT);
610 memcpy(&addr[2], &arg3[1], 8);
612 addr[0] = ARG_ADDRESS_1;
614 memcpy(&addr[2], &arg3[1], 8);
616 } else if (R_IS_GPR(arg3[0])) {
617 addr[0] = ARG_ADDRESS_2;
620 memset(&addr[3], 0, 8);
621 } else if (arg3[0] == ARG_SHIFTED_REGISTER) {
622 if (unlikely((arg3[1] & ARG_SHIFT_MODE) != ARG_SHIFT_LSL) ||
623 unlikely((arg3[1] & ARG_SHIFT_AMOUNT) > 3))
625 addr[0] = ARG_ADDRESS_2 + (arg3[1] & ARG_SHIFT_AMOUNT);
628 memset(&addr[3], 0, 8);
631 internal(file_line, "cgen_lea: invalid argument %02x, %02x, %02x", arg1[0], arg2[0], arg3[0]);
633 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_LEA_R16_RM16, size, true, arg1[0], addr));
637 static bool attr_w cgen_alu(struct codegen_context *ctx, unsigned size, unsigned alu)
639 uint8_t *arg1, *arg2, *arg3;
641 arg1 = ctx->code_position;
642 arg2 = arg1 + arg_size(*arg1);
643 arg3 = arg2 + arg_size(*arg2);
644 ctx->code_position = arg3 + arg_size(*arg3);
645 if (unlikely(arg_size(*arg1) != arg_size(*arg2)))
646 internal(file_line, "cgen_alu: three-operand mode not supported");
647 if (unlikely(memcmp(arg1, arg2, arg_size(*arg1))))
648 internal(file_line, "cgen_alu: three-operand mode not supported");
650 arg1 = ctx->code_position;
652 arg3 = arg2 + arg_size(*arg2);
653 ctx->code_position = arg3 + arg_size(*arg3);
656 if (unlikely(alu == ALU_MUL)) {
657 if (unlikely(arg3[0] == ARG_IMM)) {
660 imm = get_imm(&arg3[1]);
661 if (unlikely(!imm_is_32bit(imm)))
662 internal(file_line, "cgen_alu: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
663 code = imm_is_8bit(imm) ? X86_IMUL_R16_RM16_IMM8 : X86_IMUL_R16_RM16_IMM16;
664 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg1[0], arg2));
665 if (code == X86_IMUL_R16_RM16_IMM8) {
667 } else if (size == OP_SIZE_2) {
674 if (unlikely(size == OP_SIZE_1)) {
675 if (unlikely(arg1[0] != R_AX))
676 internal(file_line, "cgen_alu: imul with unsupported register");
677 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_F6, size, false, X86_F6_IMUL_RM8, arg3));
680 if (unlikely(!R_IS_GPR(arg1[0])))
681 internal(file_line, "cgen_alu: invalid multiply args");
682 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_IMUL_R16_RM16, size, true, arg1[0], arg3));
687 if (arg3[0] == ARG_IMM) {
691 imm = get_imm(&arg3[1]);
692 if (unlikely(!imm_is_32bit(imm)))
693 internal(file_line, "cgen_alu: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
695 if (arg1[0] == R_AX) {
696 if (imm_is_8bit(imm) && size >= OP_SIZE_4)
699 code = size == OP_SIZE_1 ? X86_ALU_AL_IMM8 : X86_ALU_AX_IMM16;
700 if (size == OP_SIZE_2)
701 cgen_one(X86_OP_SIZE);
702 if (size == OP_SIZE_8)
703 cgen_rex(X86_REX | X86_REX_W);
708 bit8 = imm_is_8bit(imm);
709 code = size == OP_SIZE_1 ? X86_ALU_RM8_IMM8 : bit8 ? X86_ALU_RM16_IMM8 : X86_ALU_RM16_IMM16;
710 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, false, alu, arg1));
712 if (bit8 || size == OP_SIZE_1) {
714 } else if (size == OP_SIZE_2) {
722 if (R_IS_XMM(arg1[0]) && size == OP_SIZE_16) {
725 case ALU_AND: code = X86_0F_ANDPS_X128_M128; break;
726 case ALU_ANDN: code = X86_0F_ANDNPS_X128_M128; break;
727 case ALU_OR: code = X86_0F_ORPS_X128_M128; break;
728 case ALU_XOR: code = X86_0F_XORPS_X128_M128; break;
729 default: internal(file_line, "invalid sse alu: %u", alu);
731 g(cgen_sse_insn(ctx, SSE_PREFIX_NONE, PREFIX_0F, code, false, arg1[0], arg2[0], arg3));
735 if (!R_IS_GPR(arg1[0]) && unlikely(!R_IS_GPR(arg3[0])))
736 internal(file_line, "cgen_alu: two addresses not supported");
738 if (!R_IS_GPR(arg1[0])) {
739 uint8_t code = size == OP_SIZE_1 ? X86_ALU_RM8_R8 : X86_ALU_RM16_R16;
741 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg3[0], arg1));
744 uint8_t code = size == OP_SIZE_1 ? X86_ALU_R8_RM8 : X86_ALU_R16_RM16;
746 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, code, size, true, arg1[0], arg3));
751 static bool attr_w cgen_alu1(struct codegen_context *ctx, unsigned size, unsigned alu, unsigned writes_flags)
754 uint8_t *arg1 = ctx->code_position;
755 uint8_t *arg2 = arg1 + arg_size(*arg1);
756 ctx->code_position = arg2 + arg_size(*arg2);
757 if (alu == ALU1_NOT || alu == ALU1_NEG || alu == ALU1_INC || alu == ALU1_DEC || alu == ALU1_BSWAP) {
758 if (unlikely(arg1[0] != arg2[0]))
759 internal(file_line, "cgen_alu1: arguments mismatch: %x, %x", arg1[0], arg2[0]);
763 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, X86_F6_NOT_RM8, arg1));
766 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, X86_F6_NEG_RM8, arg1));
769 if (writes_flags & 2) {
770 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ALU_RM8_IMM8 : X86_ALU_RM16_IMM8, size, false, ALU_ADD, arg1));
775 if (R_IS_GPR(arg1[0]) && size >= OP_SIZE_2) {
776 if (size == OP_SIZE_2)
777 cgen_one(X86_OP_SIZE);
778 cgen_one(X86_INC_R16 + arg1[0]);
782 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_FE : X86_FF, size, false, X86_FE_INC_RM8, arg1));
785 if (writes_flags & 2) {
786 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ALU_RM8_IMM8 : X86_ALU_RM16_IMM8, size, false, ALU_SUB, arg1));
791 if (R_IS_GPR(arg1[0]) && size >= OP_SIZE_2) {
792 if (size == OP_SIZE_2)
793 cgen_one(X86_OP_SIZE);
794 cgen_one(X86_DEC_R16 + arg1[0]);
798 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_FE : X86_FF, size, false, X86_FE_DEC_RM8, arg1));
801 if (unlikely(size <= OP_SIZE_2))
802 internal(file_line, "cgen_alu1: bytes or words not supported with this operation");
804 if (!R_IS_GPR(arg1[0]))
805 internal(file_line, "cgen_alu1: bswap needs a register");
808 if (size == OP_SIZE_8)
813 cgen_one(X86_0F_BSWAP + (arg1[0] & 7));
819 if (unlikely(size == OP_SIZE_1))
820 internal(file_line, "cgen_alu1: bytes not supported with this operation");
821 if (alu == ALU1_POPCNT || alu == ALU1_LZCNT)
823 g(cgen_rm_insn(ctx, -1, PREFIX_0F, alu == ALU1_BSF ? X86_0F_BSF_R16_RM16 : alu == ALU1_BSR || alu == ALU1_LZCNT ? X86_0F_BSR_R16_RM16 : X86_0F_POPCNT_R16_RM16, size, true, arg1[0], arg2));
826 internal(file_line, "cgen_alu1: invalid operation %u", alu);
831 static bool attr_w cgen_test(struct codegen_context *ctx, unsigned size)
833 uint8_t *arg1, *arg2;
834 arg1 = ctx->code_position;
835 arg2 = arg1 + arg_size(*arg1);
836 ctx->code_position = arg2 + arg_size(*arg2);
838 if (arg2[0] == ARG_IMM) {
840 imm = get_imm(&arg2[1]);
841 if (arg1[0] == R_AX) {
842 if (size == OP_SIZE_1) {
843 cgen_one(X86_TEST_AL_IMM8);
844 } else if (size == OP_SIZE_2) {
845 cgen_one(X86_OP_SIZE);
846 cgen_one(X86_TEST_AX_IMM16);
847 } else if (size == OP_SIZE_4) {
848 cgen_one(X86_TEST_AX_IMM16);
850 if (unlikely(!imm_is_32bit(imm)))
851 internal(file_line, "cgen_Test: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
852 cgen_rex(X86_REX | X86_REX_W);
853 cgen_one(X86_TEST_AX_IMM16);
856 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, X86_F6_TEST_RM8_IMM8, arg1));
858 if (size == OP_SIZE_1) {
860 } else if (size == OP_SIZE_2) {
862 } else if (size == OP_SIZE_4) {
865 if (unlikely(!imm_is_32bit(imm)))
866 internal(file_line, "cgen_Test: immediate out of range: %"PRIxMAX"", (uintmax_t)imm);
872 if (!R_IS_GPR(arg1[0]) && unlikely(!R_IS_GPR(arg2[0])))
873 internal(file_line, "cgen_test: two addresses not supported");
875 if (!R_IS_GPR(arg1[0])) {
876 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_TEST_RM8_R8 : X86_TEST_RM16_R16, size, true, arg2[0], arg1));
878 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_TEST_RM8_R8 : X86_TEST_RM16_R16, size, true, arg1[0], arg2));
883 static bool attr_w cgen_lea3(struct codegen_context *ctx, unsigned size, unsigned shift)
886 uint8_t *arg1, *arg2, *arg3, *arg4;
888 arg1 = ctx->code_position;
889 arg2 = arg1 + arg_size(*arg1);
890 arg3 = arg2 + arg_size(*arg2);
891 arg4 = arg3 + arg_size(*arg3);
892 ctx->code_position = arg4 + arg_size(*arg4);
894 if (unlikely(!R_IS_GPR(arg1[0])) || unlikely(!R_IS_GPR(arg2[0])) || unlikely(!R_IS_GPR(arg3[0])) || unlikely(arg4[0] != ARG_IMM))
895 internal(file_line, "cgen_lea3: invalid arguments");
897 addr[0] = ARG_ADDRESS_2 + shift;
900 memcpy(&addr[3], &arg4[1], 8);
902 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_LEA_R16_RM16, size, true, arg1[0], addr));
907 static bool attr_w cgen_rot(struct codegen_context *ctx, unsigned size, uint8_t rot, unsigned writes_flags)
909 uint8_t *arg1 = ctx->code_position;
910 uint8_t *arg2 = arg1 + arg_size(*arg1);
911 uint8_t *arg3 = arg2 + arg_size(*arg2);
912 ctx->code_position = arg3 + arg_size(*arg3);
914 if (cpu_test_feature(CPU_FEATURE_bmi2) && size >= OP_SIZE_4 && arg3[0] != ARG_IMM && !writes_flags && (rot == ROT_SHL || rot == ROT_SHR || rot == ROT_SAR)) {
917 case ROT_SHL: sse_prefix = SSE_PREFIX_66; break;
918 case ROT_SAR: sse_prefix = SSE_PREFIX_F3; break;
919 case ROT_SHR: sse_prefix = SSE_PREFIX_F2; break;
920 default: internal(file_line, "cgen_rot: invalid rotation %x", rot);
922 g(cgen_sse_insn(ctx, sse_prefix + force_vex, PREFIX_0F_38, X86_0F_38_ROTX, size == OP_SIZE_8, arg1[0], arg3[0], arg2));
926 if (arg1[0] != arg2[0])
927 internal(file_line, "cgen_rot: invalid arguments: %x, %02x, %02x, %02x", rot, arg1[0], arg2[0], arg3[0]);
929 if (arg3[0] == R_CX) {
930 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ROT_RM8_CL : X86_ROT_RM16_CL, size, false, rot, arg1));
931 } else if (likely(arg3[0] == ARG_IMM)) {
933 imm = get_imm(&arg3[1]);
935 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ROT_RM8_1 : X86_ROT_RM16_1, size, false, rot, arg1));
937 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_ROT_RM8_IMM8 : X86_ROT_RM16_IMM8, size, false, rot, arg1));
941 internal(file_line, "cgen_rot: invalid argument %02x", arg3[0]);
946 static bool attr_w cgen_btxt(struct codegen_context *ctx, unsigned size, uint8_t bt, uint8_t *arg1, uint8_t *arg2)
948 if (arg2[0] == ARG_IMM) {
949 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_BTX_RM16_IMM8, size, false, X86_0F_BTX_BT_RM16_IMM8 + bt, arg1));
952 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_BT_RM16_R16 + bt * 8, size, true, arg2[0], arg1));
957 static bool attr_w cgen_bt(struct codegen_context *ctx, unsigned size)
959 uint8_t *arg1 = ctx->code_position;
960 uint8_t *arg2 = arg1 + arg_size(*arg1);
961 ctx->code_position = arg2 + arg_size(*arg2);
963 return cgen_btxt(ctx, size, BTX_BT, arg1, arg2);
966 static bool attr_w cgen_btx(struct codegen_context *ctx, unsigned size, uint8_t bt)
968 uint8_t *arg1 = ctx->code_position;
969 uint8_t *arg2 = arg1 + arg_size(*arg1);
970 uint8_t *arg3 = arg2 + arg_size(*arg2);
971 ctx->code_position = arg3 + arg_size(*arg3);
973 if (arg1[0] != arg2[0])
974 internal(file_line, "cgen_btx: invalid arguments");
976 return cgen_btxt(ctx, size, bt, arg1, arg3);
979 static bool attr_w cgen_mul_l(struct codegen_context *ctx, unsigned size, bool sgn)
981 uint8_t *arg1, *arg2, *arg3, *arg4;
983 arg1 = ctx->code_position;
984 arg2 = arg1 + arg_size(*arg1);
985 arg3 = arg2 + arg_size(*arg2);
986 arg4 = arg3 + arg_size(*arg3);
987 ctx->code_position = arg4 + arg_size(*arg4);
988 reg_up = size == OP_SIZE_1 ? R_AX : R_DX;
989 if (unlikely(arg1[0] != R_AX) || unlikely(arg2[0] != reg_up) || unlikely(arg3[0] != R_AX) || unlikely(arg4[0] == ARG_IMM))
990 internal(file_line, "cgen_mul_l: invalid mul arguments");
992 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, !sgn ? X86_F6_MUL_RM8 : X86_F6_IMUL_RM8, arg4));
996 static bool attr_w cgen_div_l(struct codegen_context *ctx, unsigned size, bool sgn)
998 uint8_t *arg1, *arg2, *arg3, *arg4, *arg5;
1000 arg1 = ctx->code_position;
1001 arg2 = arg1 + arg_size(*arg1);
1002 arg3 = arg2 + arg_size(*arg2);
1003 arg4 = arg3 + arg_size(*arg3);
1004 arg5 = arg4 + arg_size(*arg4);
1005 ctx->code_position = arg5 + arg_size(*arg5);
1006 reg_up = size == OP_SIZE_1 ? R_AX : R_DX;
1007 if (unlikely(arg1[0] != R_AX) || unlikely(arg2[0] != reg_up) || unlikely(arg3[0] != R_AX) || unlikely(arg4[0] != reg_up) || unlikely(arg5[0] == ARG_IMM))
1008 internal(file_line, "cgen_div_l: invalid div arguments");
1010 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, size == OP_SIZE_1 ? X86_F6 : X86_F7, size, false, !sgn ? X86_F6_DIV_RM8 : X86_F6_IDIV_RM8, arg5));
1014 static bool attr_w cgen_cmov(struct codegen_context *ctx, unsigned size, unsigned cond)
1016 uint8_t *arg1 = ctx->code_position;
1017 uint8_t *arg2 = arg1 + arg_size(*arg1);
1018 uint8_t *arg3 = arg2 + arg_size(*arg2);
1019 ctx->code_position = arg3 + arg_size(*arg3);
1020 if (unlikely(arg1[0] != arg2[0]))
1021 internal(file_line, "cgen_cmov: invalid arguments");
1022 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_CMOVCC_R16_RM16 + cond, size, true, arg1[0], arg3));
1026 static bool attr_w cgen_memcpy(struct codegen_context *ctx)
1028 int64_t disp_dest, disp_src;
1029 uint8_t *arg1, *arg2, *arg3;
1030 arg1 = ctx->code_position;
1031 arg2 = arg1 + arg_size(*arg1);
1032 arg3 = arg2 + arg_size(*arg2);
1033 ctx->code_position = arg3 + arg_size(*arg3);
1034 if (unlikely(arg1[0] != ARG_ADDRESS_1_POST_I) || unlikely(arg2[0] != ARG_ADDRESS_1_POST_I) || unlikely(arg3[0] != R_CX))
1036 if (unlikely(arg1[1] != R_DI) || unlikely(arg2[1] != R_SI))
1038 disp_dest = get_imm(&arg1[2]);
1039 disp_src = get_imm(&arg2[2]);
1040 if (unlikely(disp_dest != 0) || unlikely(disp_src != 0))
1044 cgen_one(X86_MOVSB);
1048 internal(file_line, "cgen_memcpy: invalid arguments %02x, %02x, %02x", *arg1, *arg2, *arg3);
1052 static bool attr_w cgen_memset(struct codegen_context *ctx)
1055 uint8_t *arg1, *arg2, *arg3;
1056 arg1 = ctx->code_position;
1057 arg2 = arg1 + arg_size(*arg1);
1058 arg3 = arg2 + arg_size(*arg2);
1059 ctx->code_position = arg3 + arg_size(*arg3);
1060 if (unlikely(arg1[0] != ARG_ADDRESS_1_POST_I) || unlikely(arg2[0] != R_CX) || unlikely(arg3[0] != R_AX))
1062 if (unlikely(arg1[1] != R_DI))
1064 disp_dest = get_imm(&arg1[2]);
1065 if (unlikely(disp_dest != 0))
1069 cgen_one(X86_STOSB);
1073 internal(file_line, "cgen_memset: invalid arguments %02x, %02x, %02x", *arg1, *arg2, *arg3);
1077 static bool attr_w cgen_sse_cmp(struct codegen_context *ctx, unsigned size)
1079 uint8_t *arg1 = ctx->code_position;
1080 uint8_t *arg2 = arg1 + arg_size(*arg1);
1081 ctx->code_position = arg2 + arg_size(*arg2);
1082 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_NONE: SSE_PREFIX_66, PREFIX_0F, X86_0F_UCOMISS_X128_RM32, false, arg1[0], 0, arg2));
1086 static bool attr_w cgen_sse_alu(struct codegen_context *ctx, unsigned size, unsigned alu)
1089 uint8_t *arg1 = ctx->code_position;
1090 uint8_t *arg2 = arg1 + arg_size(*arg1);
1091 uint8_t *arg3 = arg2 + arg_size(*arg2);
1092 ctx->code_position = arg3 + arg_size(*arg3);
1094 case FP_ALU_ADD: opcode = X86_0F_ADDPS_X128_M32; break;
1095 case FP_ALU_SUB: opcode = X86_0F_SUBPS_X128_M32; break;
1096 case FP_ALU_MUL: opcode = X86_0F_MULPS_X128_M32; break;
1097 case FP_ALU_DIV: opcode = X86_0F_DIVPS_X128_M32; break;
1098 default: internal(file_line, "cgen_sse_alu: invalid alu %u", alu);
1100 g(cgen_sse_insn(ctx, size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, opcode, false, arg1[0], arg2[0], arg3));
1104 static bool attr_w cgen_sse_alu1(struct codegen_context *ctx, unsigned size, unsigned alu)
1107 unsigned sse_pfx, sse_op_map;
1108 uint8_t *arg1 = ctx->code_position;
1109 uint8_t *arg2 = arg1 + arg_size(*arg1);
1110 ctx->code_position = arg2 + arg_size(*arg2);
1112 case FP_ALU1_SQRT: if (size == OP_SIZE_4) {
1113 sse_pfx = SSE_PREFIX_F3;
1114 } else if (size == OP_SIZE_8) {
1115 sse_pfx = SSE_PREFIX_F2;
1119 sse_op_map = PREFIX_0F;
1120 opcode = X86_0F_SQRTPS_X128_M32;
1125 case FP_ALU1_TRUNC: sse_pfx = SSE_PREFIX_66;
1126 sse_op_map = PREFIX_0F_3A;
1127 if (size == OP_SIZE_4) {
1128 opcode = X86_0F_3A_ROUNDSS_X128_M32;
1129 } else if (size == OP_SIZE_8) {
1130 opcode = X86_0F_3A_ROUNDSD_X128_M64;
1136 default: internal(file_line, "cgen_sse_alu1: invalid alu %u, %u", alu, size);
1138 g(cgen_sse_insn(ctx, sse_pfx, sse_op_map, opcode, false, arg1[0], arg1[0], arg2));
1139 if (OP_IS_ROUND(alu))
1140 cgen_one(alu - FP_ALU1_ROUND);
1144 static bool attr_w cgen_sse_from_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
1146 uint8_t *arg1 = ctx->code_position;
1147 uint8_t *arg2 = arg1 + arg_size(*arg1);
1148 ctx->code_position = arg2 + arg_size(*arg2);
1149 g(cgen_sse_insn(ctx, fp_op_size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_CVTSI2SS_X128_RM32, int_op_size == OP_SIZE_8, arg1[0], R_XMM7, arg2));
1153 static bool attr_w cgen_sse_to_int(struct codegen_context *ctx, unsigned int_op_size, unsigned fp_op_size)
1155 uint8_t *arg1 = ctx->code_position;
1156 uint8_t *arg2 = arg1 + arg_size(*arg1);
1157 ctx->code_position = arg2 + arg_size(*arg2);
1158 g(cgen_sse_insn(ctx, fp_op_size == OP_SIZE_4 ? SSE_PREFIX_F3 : SSE_PREFIX_F2, PREFIX_0F, X86_0F_CVTTSS2SI_X128_RM32, int_op_size == OP_SIZE_8, arg1[0], 0, arg2));
1162 static bool attr_w cgen_sse_cvt(struct codegen_context *ctx, unsigned from_op_size, unsigned to_op_size)
1164 uint8_t *arg1 = ctx->code_position;
1165 uint8_t *arg2 = arg1 + arg_size(*arg1);
1166 ctx->code_position = arg2 + arg_size(*arg2);
1167 if (from_op_size == OP_SIZE_2 && to_op_size == OP_SIZE_4) {
1168 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F_38, X86_0F_38_CVTPH2PS_X128_RM64, false, arg1[0], 0, arg2));
1170 } else if (from_op_size == OP_SIZE_4 && to_op_size == OP_SIZE_2) {
1171 g(cgen_sse_insn(ctx, SSE_PREFIX_66, PREFIX_0F_3A, X86_0F_3A_CVTPS2PH_RM64_X128, false, arg2[0], 0, arg1));
1175 internal(file_line, "cgen_sse_cvt: unsupported arguments %u, %u", from_op_size, to_op_size);
1179 static bool attr_w cgen_x87_fld(struct codegen_context *ctx, unsigned size)
1182 uint8_t *arg1 = ctx->code_position;
1183 ctx->code_position = arg1 + arg_size(*arg1);
1184 if (arg1[0] >= R_ST0 && arg1[0] <= R_ST7)
1188 c1 = X87_FLD_RM32; c2 = X87_FLD_RM32_X; break;
1190 c1 = X87_FLD_M64; c2 = X87_FLD_M64_X; break;
1192 c1 = X87_FLD_M80; c2 = X87_FLD_M80_X; break;
1194 internal(file_line, "cgen_x87_fld: invalid size %u", size);
1196 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1200 static bool attr_w cgen_x87_fild(struct codegen_context *ctx, unsigned size)
1203 uint8_t *arg1 = ctx->code_position;
1204 ctx->code_position = arg1 + arg_size(*arg1);
1207 c1 = X87_FILD_M16; c2 = X87_FILD_M16_X; break;
1209 c1 = X87_FILD_M32; c2 = X87_FILD_M32_X; break;
1211 c1 = X87_FILD_M64; c2 = X87_FILD_M64_X; break;
1213 internal(file_line, "cgen_x87_fild: invalid size %u", size);
1215 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1219 static bool attr_w cgen_x87_fstp(struct codegen_context *ctx, unsigned size)
1222 uint8_t *arg1 = ctx->code_position;
1223 ctx->code_position = arg1 + arg_size(*arg1);
1224 if (arg1[0] >= R_ST0 && arg1[0] <= R_ST7)
1228 c1 = X87_FSTP_M32; c2 = X87_FSTP_M32_X; break;
1230 c1 = X87_FSTP_RM64; c2 = X87_FSTP_RM64_X; break;
1232 c1 = X87_FSTP_M80; c2 = X87_FSTP_M80_X; break;
1234 internal(file_line, "cgen_x87_fstp: invalid size %u", size);
1236 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1240 static bool attr_w cgen_x87_fistp(struct codegen_context *ctx, unsigned size)
1243 uint8_t *arg1 = ctx->code_position;
1244 ctx->code_position = arg1 + arg_size(*arg1);
1247 c1 = X87_FISTP_M16; c2 = X87_FISTP_M16_X; break;
1249 c1 = X87_FISTP_M32; c2 = X87_FISTP_M32_X; break;
1251 c1 = X87_FISTP_M64; c2 = X87_FISTP_M64_X; break;
1253 internal(file_line, "cgen_x87_fistp: invalid size %u", size);
1255 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1259 static bool attr_w cgen_x87_fisttp(struct codegen_context *ctx, unsigned size)
1262 uint8_t *arg1 = ctx->code_position;
1263 ctx->code_position = arg1 + arg_size(*arg1);
1266 c1 = X87_FISTTP_M16; c2 = X87_FISTTP_M16_X; break;
1268 c1 = X87_FISTTP_M32; c2 = X87_FISTTP_M32_X; break;
1270 c1 = X87_FISTTP_M64; c2 = X87_FISTTP_M64_X; break;
1272 internal(file_line, "cgen_x87_fisttp: invalid size %u", size);
1274 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1278 static bool attr_w cgen_x87_fcomp(struct codegen_context *ctx, unsigned size)
1281 uint8_t *arg1 = ctx->code_position;
1282 ctx->code_position = arg1 + arg_size(*arg1);
1283 if (arg1[0] < ARG_REGS_MAX) {
1284 c1 = X87_FALU_ST_RM32;
1285 } else switch (size) {
1287 c1 = X87_FALU_ST_RM32; break;
1289 c1 = X87_FALU_ST_M64; break;
1291 internal(file_line, "cgen_x87_fcomp: invalid size %u", size);
1294 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1298 static bool attr_w cgen_x87_alu(struct codegen_context *ctx, unsigned size, unsigned aux)
1301 uint8_t *arg1 = ctx->code_position;
1302 ctx->code_position = arg1 + arg_size(*arg1);
1303 if (arg1[0] < ARG_REGS_MAX) {
1304 c1 = X87_FALU_ST_RM32;
1305 } else switch (size) {
1307 c1 = X87_FALU_ST_RM32; break;
1309 c1 = X87_FALU_ST_M64; break;
1311 internal(file_line, "cgen_x87_alu: invalid size %u", size);
1315 c2 = X87_ALU_ADD; break;
1317 c2 = X87_ALU_SUB; break;
1319 c2 = X87_ALU_MUL; break;
1321 c2 = X87_ALU_DIV; break;
1323 internal(file_line, "cgen_x87_fst: invalid operation %u", aux);
1325 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, c1, OP_SIZE_4, false, c2, arg1));
1329 static bool attr_w cgen_x87_alup(struct codegen_context *ctx, unsigned aux)
1332 uint8_t *arg1 = ctx->code_position;
1333 ctx->code_position = arg1 + arg_size(*arg1);
1336 c2 = X87_ALU_ADD; break;
1338 c2 = X87_ALU_SUB; break;
1340 c2 = X87_ALU_MUL; break;
1342 c2 = X87_ALU_DIV; break;
1344 internal(file_line, "cgen_x87_fstp: invalid operation %u", aux);
1346 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X87_FALUP_STi_ST0, OP_SIZE_4, false, c2, arg1));
1350 static bool attr_w resolve_relocation(struct codegen_context *ctx, struct relocation *reloc)
1352 int64_t offs = (int64_t)ctx->label_to_pos[reloc->label_id] - (int64_t)(reloc->position + (reloc->length == JMP_SHORT ? 1 : 4));
1353 switch (reloc->length) {
1356 if (!imm_is_8bit(offs))
1359 memcpy(ctx->mcode + reloc->position, &i8, 1);
1364 if (!imm_is_32bit(offs))
1367 memcpy(ctx->mcode + reloc->position, &i32, 4);
1371 internal(file_line, "resolve_relocation: invalid relocation length %u", reloc->length);
1377 static bool attr_w cgen_insn(struct codegen_context *ctx, uint32_t insn)
1381 /*debug("insn: %08x", insn);*/
1382 switch (insn_opcode(insn)) {
1393 imm16 = cget_two(ctx);
1394 cgen_one(X86_RET_IMM16);
1403 case INSN_CALL_INDIRECT:
1404 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_FF, OP_SIZE_4, false, X86_FF_CALL_INDIRECT, ctx->code_position));
1405 ctx->code_position += arg_size(*ctx->code_position);
1408 g(cgen_mov(ctx, insn_op_size(insn)));
1411 g(cgen_movsx(ctx, insn_op_size(insn)));
1414 g(cgen_alu(ctx, insn_op_size(insn), 7));
1417 g(cgen_test(ctx, insn_op_size(insn)));
1420 case INSN_ALU_FLAGS:
1421 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1423 if (!insn_writes_flags(insn) && insn_op_size(insn) <= OP_SIZE_8) {
1424 if (unlikely(insn_aux(insn) != ALU_ADD))
1426 g(cgen_lea(ctx, insn_op_size(insn)));
1429 g(cgen_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1431 case INSN_ALU_PARTIAL:
1432 case INSN_ALU_FLAGS_PARTIAL:
1433 if (unlikely(insn_op_size(insn) >= OP_SIZE_4))
1435 g(cgen_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1438 case INSN_ALU1_FLAGS:
1439 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1441 g(cgen_alu1(ctx, insn_op_size(insn), insn_aux(insn), insn_writes_flags(insn)));
1443 case INSN_ALU1_PARTIAL:
1444 case INSN_ALU1_FLAGS_PARTIAL:
1445 if (unlikely(insn_op_size(insn) >= OP_SIZE_4))
1447 g(cgen_alu1(ctx, insn_op_size(insn), insn_aux(insn), insn_writes_flags(insn)));
1450 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1452 g(cgen_lea3(ctx, insn_op_size(insn), insn_aux(insn)));
1455 if (unlikely(insn_op_size(insn) < OP_SIZE_4))
1457 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn), insn_writes_flags(insn)));
1459 case INSN_ROT_PARTIAL:
1460 if (unlikely(insn_op_size(insn) >= OP_SIZE_4))
1462 g(cgen_rot(ctx, insn_op_size(insn), insn_aux(insn), insn_writes_flags(insn)));
1465 if (unlikely(insn_op_size(insn) == OP_SIZE_1) || unlikely(!insn_writes_flags(insn)))
1467 g(cgen_bt(ctx, insn_op_size(insn)));
1470 if (unlikely(insn_op_size(insn) == OP_SIZE_1) || unlikely(!insn_writes_flags(insn)))
1472 g(cgen_btx(ctx, insn_op_size(insn), insn_aux(insn)));
1475 g(cgen_mul_l(ctx, insn_op_size(insn), insn_aux(insn)));
1478 g(cgen_div_l(ctx, insn_op_size(insn), insn_aux(insn)));
1481 if (unlikely(insn_op_size(insn) <= OP_SIZE_2))
1483 if (insn_op_size(insn) == OP_SIZE_8)
1484 cgen_rex(X86_REX | X86_REX_W);
1485 if (unlikely(cget_one(ctx) != R_AX))
1487 if (unlikely(cget_one(ctx) != R_AX))
1491 case INSN_CBW_PARTIAL:
1492 if (unlikely(insn_op_size(insn) != OP_SIZE_2))
1494 if (unlikely(cget_one(ctx) != R_AX))
1496 if (unlikely(cget_one(ctx) != R_AX))
1498 cgen_one(X86_OP_SIZE);
1502 if (unlikely(insn_op_size(insn) <= OP_SIZE_2))
1504 if (unlikely(cget_one(ctx) != R_DX))
1506 if (unlikely(cget_one(ctx) != R_AX))
1508 if (insn_op_size(insn) == OP_SIZE_8)
1509 cgen_rex(X86_REX | X86_REX_W);
1512 case INSN_CWD_PARTIAL:
1513 if (unlikely(insn_op_size(insn) != OP_SIZE_2))
1515 if (unlikely(cget_one(ctx) != R_DX))
1517 if (unlikely(cget_one(ctx) != R_AX))
1519 if (unlikely(cget_one(ctx) != R_DX))
1521 cgen_one(X86_OP_SIZE);
1525 if (unlikely(insn_op_size(insn) != OP_SIZE_1))
1527 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_SETCC_RM8 + (insn_aux(insn) & 0xf), OP_SIZE_1, false, 0, ctx->code_position));
1528 ctx->code_position += arg_size(*ctx->code_position);
1530 case INSN_SET_COND_PARTIAL:
1531 if (unlikely(insn_op_size(insn) != OP_SIZE_1))
1533 g(cgen_rm_insn(ctx, -1, PREFIX_0F, X86_0F_SETCC_RM8 + (insn_aux(insn) & 0xf), OP_SIZE_1, false, 0, ctx->code_position));
1534 ctx->code_position += arg_size(*ctx->code_position);
1535 ctx->code_position += arg_size(*ctx->code_position);
1539 if (unlikely(insn_op_size(insn) == OP_SIZE_1))
1541 g(cgen_cmov(ctx, insn_op_size(insn), insn_aux(insn)));
1544 g(cgen_memcpy(ctx));
1547 g(cgen_memset(ctx));
1550 g(cgen_sse_cmp(ctx, insn_op_size(insn)));
1553 g(cgen_sse_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1556 g(cgen_sse_alu1(ctx, insn_op_size(insn), insn_aux(insn)));
1558 case INSN_FP_FROM_INT32:
1559 case INSN_FP_FROM_INT64:
1560 g(cgen_sse_from_int(ctx, insn_opcode(insn) == INSN_FP_FROM_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1562 case INSN_FP_TO_INT32:
1563 case INSN_FP_TO_INT64:
1564 g(cgen_sse_to_int(ctx, insn_opcode(insn) == INSN_FP_TO_INT32 ? OP_SIZE_4 : OP_SIZE_8, insn_op_size(insn)));
1567 g(cgen_sse_cvt(ctx, insn_op_size(insn), insn_aux(insn)));
1570 g(cgen_x87_fld(ctx, insn_op_size(insn)));
1573 g(cgen_x87_fild(ctx, insn_op_size(insn)));
1576 g(cgen_x87_fstp(ctx, insn_op_size(insn)));
1578 case INSN_X87_FISTP:
1579 g(cgen_x87_fistp(ctx, insn_op_size(insn)));
1581 case INSN_X87_FISTTP:
1582 g(cgen_x87_fisttp(ctx, insn_op_size(insn)));
1584 case INSN_X87_FCOMP:
1585 g(cgen_x87_fcomp(ctx, insn_op_size(insn)));
1587 case INSN_X87_FCOMPP:
1588 cgen_one(X87_FCOMPP);
1589 cgen_one(X87_FCOMPP_2);
1591 case INSN_X87_FCOMIP:
1592 imm8 = cget_one(ctx);
1593 cgen_one(X87_FCOMIP);
1594 cgen_one(X87_FCOMIP_2 + (imm8 & 7));
1597 g(cgen_x87_alu(ctx, insn_op_size(insn), insn_aux(insn)));
1600 g(cgen_x87_alup(ctx, insn_aux(insn)));
1604 cgen_one(X87_FCHS_2);
1606 case INSN_X87_FSQRT:
1607 cgen_one(X87_FSQRT);
1608 cgen_one(X87_FSQRT_2);
1610 case INSN_X87_FRNDINT:
1611 cgen_one(X87_FRNDINT);
1612 cgen_one(X87_FRNDINT_2);
1614 case INSN_X87_FNSTSW:
1615 if (unlikely(cget_one(ctx) != R_AX))
1617 if (unlikely(cget_one(ctx) != R_AX))
1619 cgen_one(X87_FNSTSW);
1620 cgen_one(X87_FNSTSW_2);
1622 case INSN_X87_FLDCW:
1623 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X87_FLDCW, OP_SIZE_4, false, X87_FLDCW_X, ctx->code_position));
1624 ctx->code_position += arg_size(*ctx->code_position);
1627 if (insn_jump_size(insn) == JMP_SHORT || insn_jump_size(insn) == JMP_SHORTEST) {
1628 cgen_one(X86_JMP_8);
1629 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
1631 } else if (likely(insn_jump_size(insn) == JMP_LONG)) {
1632 cgen_one(X86_JMP_16);
1633 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1640 if (insn_jump_size(insn) == JMP_SHORT || insn_jump_size(insn) == JMP_SHORTEST) {
1641 cgen_one(X86_JCC_8 + (insn_aux(insn) & 0xf));
1642 g(add_relocation(ctx, JMP_SHORT, 0, NULL));
1644 } else if (likely(insn_jump_size(insn) == JMP_LONG)) {
1646 cgen_one(X86_0F_JCC_16 + (insn_aux(insn) & 0xf));
1647 g(add_relocation(ctx, JMP_LONG, 0, NULL));
1653 case INSN_JMP_INDIRECT:
1654 g(cgen_rm_insn(ctx, -1, PREFIX_NONE, X86_FF, OP_SIZE_4, false, X86_FF_JMP_INDIRECT, ctx->code_position));
1655 ctx->code_position += arg_size(*ctx->code_position);
1659 internal(file_line, "cgen_insn: invalid insn %08lx", (unsigned long)insn);