1 /**************************************************************************
3 * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the
8 * "Software"), to deal in the Software without restriction, including
9 * without limitation the rights to use, copy, modify, merge, publish,
10 * distribute, sub license, and/or sell copies of the Software, and to
11 * permit persons to whom the Software is furnished to do so, subject to
12 * the following conditions:
14 * The above copyright notice and this permission notice (including the
15 * next paragraph) shall be included in all copies or substantial portions
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21 * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
22 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 **************************************************************************/
28 /* Authors: Keith Whitwell <keith@tungstengraphics.com>
34 #include "pipe/p_config.h"
35 #include "tgsi/tgsi_exec.h"
43 #include "pipe/p_state.h"
44 #include "rtasm/rtasm_x86sse.h"
55 #define MAX_INPUTS PIPE_MAX_ATTRIBS
56 #define MAX_OUTPUTS PIPE_MAX_SHADER_OUTPUTS
57 #define MAX_TEMPS TGSI_EXEC_NUM_TEMPS
58 #define MAX_CONSTANTS 1024 /** only used for sanity checking */
59 #define MAX_IMMEDIATES 1024 /** only used for sanity checking */
60 #define MAX_INTERNALS 8 /** see IMM_x values below */
62 #define AOS_FILE_INTERNAL TGSI_FILE_COUNT
65 #define FPU_RND_NEAREST 2
68 typedef void (PIPE_CDECL
*lit_func
)( struct aos_machine
*,
73 void PIPE_CDECL
aos_do_lit( struct aos_machine
*machine
,
86 struct shine_tab
*shine_tab
;
89 #define MAX_SHINE_TAB 4
90 #define MAX_LIT_INFO 16
95 void *ptr
; /* updated per vertex */
101 /* This is the temporary storage used by all the aos_sse vs variants.
102 * Create one per context and reuse by passing a pointer in at
103 * vs_variant creation??
106 float input
[MAX_INPUTS
][4];
107 float output
[MAX_OUTPUTS
][4];
108 float temp
[MAX_TEMPS
][4];
109 float internal
[MAX_INTERNALS
][4];
111 float scale
[4]; /* viewport */
112 float translate
[4]; /* viewport */
114 float tmp
[2][4]; /* scratch space for LIT */
116 struct shine_tab shine_tab
[MAX_SHINE_TAB
];
117 struct lit_info lit_info
[MAX_LIT_INFO
];
121 ushort fpu_rnd_nearest
;
122 ushort fpu_rnd_neg_inf
;
124 ushort fpucntl
; /* one of FPU_* above */
126 const float (*immediates
)[4]; /* points to shader data */
127 const void *constants
[PIPE_MAX_CONSTANT_BUFFERS
]; /* points to draw data */
129 const struct aos_buffer
*buffer
; /* points to ? */
135 struct aos_compilation
{
136 struct x86_function
*func
;
137 struct draw_vs_variant_aos_sse
*vaos
;
139 unsigned insn_counter
;
140 unsigned num_immediates
;
151 unsigned x86_reg
[2]; /* one of X86_* */
153 boolean input_fetched
[PIPE_MAX_ATTRIBS
];
154 unsigned output_last_write
[PIPE_MAX_ATTRIBS
];
160 /* these are actually known values, but putting them in a struct
161 * like this is helpful to keep them in sync across the file.
163 struct x86_reg tmp_EAX
;
164 struct x86_reg idx_EBX
; /* either start+i or &elt[i] */
165 struct x86_reg outbuf_ECX
;
166 struct x86_reg machine_EDX
;
167 struct x86_reg count_ESI
; /* decrements to zero */
168 struct x86_reg temp_EBP
;
169 struct x86_reg stack_ESP
;
172 struct x86_reg
aos_get_xmm_reg( struct aos_compilation
*cp
);
173 void aos_release_xmm_reg( struct aos_compilation
*cp
, unsigned idx
);
175 void aos_adopt_xmm_reg( struct aos_compilation
*cp
,
181 void aos_spill_all( struct aos_compilation
*cp
);
183 struct x86_reg
aos_get_shader_reg( struct aos_compilation
*cp
,
187 boolean
aos_init_inputs( struct aos_compilation
*cp
, boolean linear
);
188 boolean
aos_fetch_inputs( struct aos_compilation
*cp
, boolean linear
);
189 boolean
aos_incr_inputs( struct aos_compilation
*cp
, boolean linear
);
191 boolean
aos_emit_outputs( struct aos_compilation
*cp
);
194 #define IMM_ONES 0 /* 1, 1,1,1 */
195 #define IMM_SWZ 1 /* 1,-1,0, 0xffffffff */
196 #define IMM_IDENTITY 2 /* 0, 0,0,1 */
197 #define IMM_INV_255 3 /* 1/255, 1/255, 1/255, 1/255 */
198 #define IMM_255 4 /* 255, 255, 255, 255 */
199 #define IMM_NEGS 5 /* -1,-1,-1,-1 */
200 #define IMM_RSQ 6 /* -.5,1.5,_,_ */
201 #define IMM_PSIZE 7 /* not really an immediate - updated each run */
203 struct x86_reg
aos_get_internal( struct aos_compilation
*cp
,
205 struct x86_reg
aos_get_internal_xmm( struct aos_compilation
*cp
,
209 #define AOS_ERROR(cp, msg) \
211 if (0) debug_printf("%s: x86 translation failed: %s\n", __FUNCTION__, msg); \
217 #define X86_IMMEDIATES 1
218 #define X86_CONSTANTS 2
219 #define X86_BUFFERS 3
221 struct x86_reg
aos_get_x86( struct aos_compilation
*cp
,
226 typedef void (PIPE_CDECL
*vaos_run_elts_func
)( struct aos_machine
*,
227 const unsigned *elts
,
229 void *output_buffer
);
231 typedef void (PIPE_CDECL
*vaos_run_linear_func
)( struct aos_machine
*,
234 void *output_buffer
);
237 struct draw_vs_variant_aos_sse
{
238 struct draw_vs_variant base
;
239 struct draw_context
*draw
;
241 struct aos_buffer
*buffer
;
244 vaos_run_linear_func gen_run_linear
;
245 vaos_run_elts_func gen_run_elts
;
248 struct x86_function func
[2];