1 /**************************************************************************
3 * Copyright (C) 1999-2005 Brian Paul All Rights Reserved.
5 * Permission is hereby granted, free of charge, to any person obtaining a
6 * copy of this software and associated documentation files (the "Software"),
7 * to deal in the Software without restriction, including without limitation
8 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
9 * and/or sell copies of the Software, and to permit persons to whom the
10 * Software is furnished to do so, subject to the following conditions:
12 * The above copyright notice and this permission notice shall be included
13 * in all copies or substantial portions of the Software.
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
16 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
19 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
22 **************************************************************************/
24 #ifndef _RTASM_X86SSE_H_
25 #define _RTASM_X86SSE_H_
27 #include "pipe/p_compiler.h"
28 #include "pipe/p_config.h"
30 #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64)
32 /* It is up to the caller to ensure that instructions issued are
33 * suitable for the host cpu. There are no checks made in this module
34 * for mmx/sse/sse2 support on the cpu.
39 unsigned mod
:2; /* mod_REG if this is just a register */
40 int disp
:24; /* only +/- 23bits of offset - should be enough... */
48 #define X86_SSE4_1 0x20
56 unsigned stack_offset
:16;
60 unsigned char error_overflow
[4];
70 /* Values for mod field of modr/m byte
101 cc_NO
, /* not overflow */
102 cc_NAE
, /* not above or equal / carry */
103 cc_AE
, /* above or equal / not carry */
104 cc_E
, /* equal / zero */
105 cc_NE
/* not equal / not zero */
123 /** generic pointer to function */
124 typedef void (*x86_func
)(void);
127 /* Begin/end/retrieve function creation:
137 /* make this read a member of x86_function if target != host is desired */
138 static INLINE
enum x86_target
x86_target( struct x86_function
* p
)
142 #elif defined(_WIN64)
143 return X86_64_WIN64_ABI
;
144 #elif defined(PIPE_ARCH_X86_64)
145 return X86_64_STD_ABI
;
149 static INLINE
unsigned x86_target_caps( struct x86_function
* p
)
154 void x86_init_func( struct x86_function
*p
);
155 void x86_init_func_size( struct x86_function
*p
, unsigned code_size
);
156 void x86_release_func( struct x86_function
*p
);
157 x86_func
x86_get_func( struct x86_function
*p
);
161 void x86_print_reg( struct x86_reg reg
);
164 /* Create and manipulate registers and regmem values:
166 struct x86_reg
x86_make_reg( enum x86_reg_file file
,
167 enum x86_reg_name idx
);
169 struct x86_reg
x86_make_disp( struct x86_reg reg
,
172 struct x86_reg
x86_deref( struct x86_reg reg
);
174 struct x86_reg
x86_get_base_reg( struct x86_reg reg
);
177 /* Labels, jumps and fixup:
179 int x86_get_label( struct x86_function
*p
);
181 void x64_rexw(struct x86_function
*p
);
183 void x86_jcc( struct x86_function
*p
,
187 int x86_jcc_forward( struct x86_function
*p
,
190 int x86_jmp_forward( struct x86_function
*p
);
192 int x86_call_forward( struct x86_function
*p
);
194 void x86_fixup_fwd_jump( struct x86_function
*p
,
197 void x86_jmp( struct x86_function
*p
, int label
);
199 /* void x86_call( struct x86_function *p, void (*label)() ); */
200 void x86_call( struct x86_function
*p
, struct x86_reg reg
);
202 void x86_mov_reg_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
203 void x86_add_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
204 void x86_or_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
205 void x86_and_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
206 void x86_sub_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
207 void x86_xor_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
208 void x86_cmp_imm( struct x86_function
*p
, struct x86_reg dst
, int imm
);
211 /* Macro for sse_shufps() and sse2_pshufd():
213 #define SHUF(_x,_y,_z,_w) (((_x)<<0) | ((_y)<<2) | ((_z)<<4) | ((_w)<<6))
214 #define SHUF_NOOP RSW(0,1,2,3)
215 #define GET_SHUF(swz, idx) (((swz) >> ((idx)*2)) & 0x3)
217 void mmx_emms( struct x86_function
*p
);
218 void mmx_movd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
219 void mmx_movq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
220 void mmx_packssdw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
221 void mmx_packuswb( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
223 void sse2_movd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
224 void sse2_movq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
225 void sse2_movdqu( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
226 void sse2_movdqa( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
227 void sse2_movsd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
228 void sse2_movupd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
229 void sse2_movapd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
231 void sse2_cvtps2dq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
232 void sse2_cvttps2dq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
233 void sse2_cvtdq2ps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
234 void sse2_cvtsd2ss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
235 void sse2_cvtpd2ps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
237 void sse2_movd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
238 void sse2_packssdw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
239 void sse2_packsswb( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
240 void sse2_packuswb( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
241 void sse2_pshufd( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg arg0
,
242 unsigned char shuf
);
243 void sse2_pshuflw( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg arg0
,
244 unsigned char shuf
);
245 void sse2_pshufhw( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg arg0
,
246 unsigned char shuf
);
247 void sse2_rcpps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
248 void sse2_rcpss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
250 void sse2_punpcklbw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
251 void sse2_punpcklwd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
252 void sse2_punpckldq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
253 void sse2_punpcklqdq( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
255 void sse2_psllw_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
256 void sse2_pslld_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
257 void sse2_psllq_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
259 void sse2_psrlw_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
260 void sse2_psrld_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
261 void sse2_psrlq_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
263 void sse2_psraw_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
264 void sse2_psrad_imm( struct x86_function
*p
, struct x86_reg dst
, unsigned imm
);
266 void sse2_por( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
268 void sse2_pshuflw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
, uint8_t imm
);
269 void sse2_pshufhw( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
, uint8_t imm
);
270 void sse2_pshufd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
, uint8_t imm
);
272 void sse_prefetchnta( struct x86_function
*p
, struct x86_reg ptr
);
273 void sse_prefetch0( struct x86_function
*p
, struct x86_reg ptr
);
274 void sse_prefetch1( struct x86_function
*p
, struct x86_reg ptr
);
276 void sse_movntps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
278 void sse_addps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
279 void sse_addss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
280 void sse_cvtps2pi( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
281 void sse_divss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
282 void sse_andnps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
283 void sse_andps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
284 void sse_cmpps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
,
286 void sse_maxps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
287 void sse_maxss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
288 void sse_minps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
289 void sse_movaps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
290 void sse_movhlps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
291 void sse_movhps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
292 void sse_movlhps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
293 void sse_movlps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
294 void sse_movss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
295 void sse_movups( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
296 void sse_mulps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
297 void sse_mulss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
298 void sse_orps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
299 void sse_xorps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
300 void sse_subps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
301 void sse_rsqrtps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
302 void sse_rsqrtss( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
303 void sse_shufps( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg arg0
,
304 unsigned char shuf
);
305 void sse_unpckhps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
306 void sse_unpcklps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
307 void sse_pmovmskb( struct x86_function
*p
, struct x86_reg dest
, struct x86_reg src
);
308 void sse_movmskps( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
310 void x86_add( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
311 void x86_and( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
312 void x86_cmovcc( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
, enum x86_cc cc
);
313 void x86_cmp( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
314 void x86_dec( struct x86_function
*p
, struct x86_reg reg
);
315 void x86_inc( struct x86_function
*p
, struct x86_reg reg
);
316 void x86_lea( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
317 void x86_mov( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
318 void x64_mov64( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
319 void x86_mov8( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
320 void x86_mov16( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
321 void x86_movzx8(struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
322 void x86_movzx16(struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
323 void x86_mov_imm(struct x86_function
*p
, struct x86_reg dst
, int imm
);
324 void x86_mov8_imm(struct x86_function
*p
, struct x86_reg dst
, uint8_t imm
);
325 void x86_mov16_imm(struct x86_function
*p
, struct x86_reg dst
, uint16_t imm
);
326 void x86_mul( struct x86_function
*p
, struct x86_reg src
);
327 void x86_imul( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
328 void x86_or( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
329 void x86_pop( struct x86_function
*p
, struct x86_reg reg
);
330 void x86_push( struct x86_function
*p
, struct x86_reg reg
);
331 void x86_push_imm32( struct x86_function
*p
, int imm
);
332 void x86_ret( struct x86_function
*p
);
333 void x86_retw( struct x86_function
*p
, unsigned short imm
);
334 void x86_sub( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
335 void x86_test( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
336 void x86_xor( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg src
);
337 void x86_sahf( struct x86_function
*p
);
338 void x86_div( struct x86_function
*p
, struct x86_reg src
);
339 void x86_bswap( struct x86_function
*p
, struct x86_reg src
);
340 void x86_shr_imm( struct x86_function
*p
, struct x86_reg reg
, unsigned imm
);
341 void x86_sar_imm( struct x86_function
*p
, struct x86_reg reg
, unsigned imm
);
342 void x86_shl_imm( struct x86_function
*p
, struct x86_reg reg
, unsigned imm
);
344 void x86_cdecl_caller_push_regs( struct x86_function
*p
);
345 void x86_cdecl_caller_pop_regs( struct x86_function
*p
);
347 void x87_assert_stack_empty( struct x86_function
*p
);
349 void x87_f2xm1( struct x86_function
*p
);
350 void x87_fabs( struct x86_function
*p
);
351 void x87_fadd( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
352 void x87_faddp( struct x86_function
*p
, struct x86_reg dst
);
353 void x87_fchs( struct x86_function
*p
);
354 void x87_fclex( struct x86_function
*p
);
355 void x87_fcmovb( struct x86_function
*p
, struct x86_reg src
);
356 void x87_fcmovbe( struct x86_function
*p
, struct x86_reg src
);
357 void x87_fcmove( struct x86_function
*p
, struct x86_reg src
);
358 void x87_fcmovnb( struct x86_function
*p
, struct x86_reg src
);
359 void x87_fcmovnbe( struct x86_function
*p
, struct x86_reg src
);
360 void x87_fcmovne( struct x86_function
*p
, struct x86_reg src
);
361 void x87_fcom( struct x86_function
*p
, struct x86_reg dst
);
362 void x87_fcomi( struct x86_function
*p
, struct x86_reg dst
);
363 void x87_fcomip( struct x86_function
*p
, struct x86_reg dst
);
364 void x87_fcomp( struct x86_function
*p
, struct x86_reg dst
);
365 void x87_fcos( struct x86_function
*p
);
366 void x87_fdiv( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
367 void x87_fdivp( struct x86_function
*p
, struct x86_reg dst
);
368 void x87_fdivr( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
369 void x87_fdivrp( struct x86_function
*p
, struct x86_reg dst
);
370 void x87_fild( struct x86_function
*p
, struct x86_reg arg
);
371 void x87_fist( struct x86_function
*p
, struct x86_reg dst
);
372 void x87_fistp( struct x86_function
*p
, struct x86_reg dst
);
373 void x87_fld( struct x86_function
*p
, struct x86_reg arg
);
374 void x87_fld1( struct x86_function
*p
);
375 void x87_fldcw( struct x86_function
*p
, struct x86_reg arg
);
376 void x87_fldl2e( struct x86_function
*p
);
377 void x87_fldln2( struct x86_function
*p
);
378 void x87_fldz( struct x86_function
*p
);
379 void x87_fmul( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
380 void x87_fmulp( struct x86_function
*p
, struct x86_reg dst
);
381 void x87_fnclex( struct x86_function
*p
);
382 void x87_fprndint( struct x86_function
*p
);
383 void x87_fpop( struct x86_function
*p
);
384 void x87_fscale( struct x86_function
*p
);
385 void x87_fsin( struct x86_function
*p
);
386 void x87_fsincos( struct x86_function
*p
);
387 void x87_fsqrt( struct x86_function
*p
);
388 void x87_fst( struct x86_function
*p
, struct x86_reg dst
);
389 void x87_fstp( struct x86_function
*p
, struct x86_reg dst
);
390 void x87_fsub( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
391 void x87_fsubp( struct x86_function
*p
, struct x86_reg dst
);
392 void x87_fsubr( struct x86_function
*p
, struct x86_reg dst
, struct x86_reg arg
);
393 void x87_fsubrp( struct x86_function
*p
, struct x86_reg dst
);
394 void x87_ftst( struct x86_function
*p
);
395 void x87_fxch( struct x86_function
*p
, struct x86_reg dst
);
396 void x87_fxtract( struct x86_function
*p
);
397 void x87_fyl2x( struct x86_function
*p
);
398 void x87_fyl2xp1( struct x86_function
*p
);
399 void x87_fwait( struct x86_function
*p
);
400 void x87_fnstcw( struct x86_function
*p
, struct x86_reg dst
);
401 void x87_fnstsw( struct x86_function
*p
, struct x86_reg dst
);
402 void x87_fucompp( struct x86_function
*p
);
403 void x87_fucomp( struct x86_function
*p
, struct x86_reg arg
);
404 void x87_fucom( struct x86_function
*p
, struct x86_reg arg
);
408 /* Retrieve a reference to one of the function arguments, taking into
409 * account any push/pop activity. Note - doesn't track explicit
410 * manipulation of ESP by other instructions.
412 struct x86_reg
x86_fn_arg( struct x86_function
*p
, unsigned arg
);