Hint added.
[AROS.git] / workbench / hidds / nouveau / xf86-video-nouveau / nv30_shaders.c
blob02ab9a1838d6651e38b846d9f834b717555ff34d
1 /*
2 * Copyright 2007 Nouveau Project
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice shall be included in
12 * all copies or substantial portions of the Software.
14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF
19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
20 * SOFTWARE.
23 #include "nv30_shaders.h"
24 #include "nv04_pushbuf.h"
26 void NV30_UploadFragProg(NVPtr pNv, nv_shader_t *shader, int *hw_offset)
28 uint32_t data, i;
29 uint32_t *map;
31 shader->hw_id = *hw_offset;
33 nouveau_bo_map(pNv->shader_mem, NOUVEAU_BO_WR);
34 map = pNv->shader_mem->map + *hw_offset;
35 for (i = 0; i < shader->size; i++) {
36 data = shader->data[i];
37 #if (X_BYTE_ORDER != X_LITTLE_ENDIAN)
38 data = ((data >> 16) | ((data & 0xffff) << 16));
39 #endif
40 map[i] = data;
42 nouveau_bo_unmap(pNv->shader_mem);
44 *hw_offset += (shader->size * sizeof(uint32_t));
45 *hw_offset = (*hw_offset + 63) & ~63;
48 void NV40_UploadVtxProg(NVPtr pNv, nv_shader_t *shader, int *hw_id)
50 struct nouveau_channel *chan = pNv->chan;
51 struct nouveau_grobj *curie = pNv->Nv3D;
52 int i;
54 shader->hw_id = *hw_id;
56 BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_FROM_ID, 1);
57 OUT_RING (chan, (shader->hw_id));
58 for (i=0; i<shader->size; i+=4) {
59 BEGIN_RING(chan, curie, NV40TCL_VP_UPLOAD_INST(0), 4);
60 OUT_RING (chan, shader->data[i + 0]);
61 OUT_RING (chan, shader->data[i + 1]);
62 OUT_RING (chan, shader->data[i + 2]);
63 OUT_RING (chan, shader->data[i + 3]);
64 (*hw_id)++;
68 Bool
69 NV30_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
71 NVPtr pNv = NVPTR(pScrn);
72 struct nouveau_channel *chan = pNv->chan;
73 struct nouveau_grobj *rankine = pNv->Nv3D;
75 BEGIN_RING(chan, rankine, NV34TCL_FP_ACTIVE_PROGRAM, 1);
76 if (OUT_RELOC(chan, pNv->shader_mem, shader->hw_id, NOUVEAU_BO_VRAM |
77 NOUVEAU_BO_RD | NOUVEAU_BO_LOW | NOUVEAU_BO_OR,
78 NV34TCL_FP_ACTIVE_PROGRAM_DMA0,
79 NV34TCL_FP_ACTIVE_PROGRAM_DMA1))
80 return FALSE;
81 BEGIN_RING(chan, rankine, NV34TCL_FP_REG_CONTROL, 1);
82 OUT_RING (chan, (1 << 16)| 0xf);
83 BEGIN_RING(chan, rankine, NV34TCL_MULTISAMPLE_CONTROL, 1);
84 OUT_RING (chan, 0xffff0000);
86 BEGIN_RING(chan, rankine, NV34TCL_FP_CONTROL,1);
87 OUT_RING (chan, (shader->card_priv.NV30FP.num_regs-1)/2);
89 return TRUE;
92 void
93 NV40_LoadVtxProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
95 NVPtr pNv = NVPTR(pScrn);
96 struct nouveau_channel *chan = pNv->chan;
97 struct nouveau_grobj *curie = pNv->Nv3D;
99 BEGIN_RING(chan, curie, NV40TCL_VP_START_FROM_ID, 1);
100 OUT_RING (chan, (shader->hw_id));
102 BEGIN_RING(chan, curie, NV40TCL_VP_ATTRIB_EN, 2);
103 OUT_RING (chan, shader->card_priv.NV30VP.vp_in_reg);
104 OUT_RING (chan, shader->card_priv.NV30VP.vp_out_reg);
107 Bool
108 NV40_LoadFragProg(ScrnInfoPtr pScrn, nv_shader_t *shader)
110 NVPtr pNv = NVPTR(pScrn);
111 struct nouveau_channel *chan = pNv->chan;
112 struct nouveau_grobj *curie = pNv->Nv3D;
114 BEGIN_RING(chan, curie, NV40TCL_FP_ADDRESS, 1);
115 if (OUT_RELOC(chan, pNv->shader_mem, shader->hw_id, NOUVEAU_BO_VRAM |
116 NOUVEAU_BO_GART | NOUVEAU_BO_RD | NOUVEAU_BO_LOW |
117 NOUVEAU_BO_OR,
118 NV40TCL_FP_ADDRESS_DMA0, NV40TCL_FP_ADDRESS_DMA1))
119 return FALSE;
120 BEGIN_RING(chan, curie, NV40TCL_FP_CONTROL, 1);
121 OUT_RING (chan, shader->card_priv.NV30FP.num_regs <<
122 NV40TCL_FP_CONTROL_TEMP_COUNT_SHIFT);
124 return TRUE;
127 /*******************************************************************************
128 * NV40/G70 vertex shaders
131 nv_shader_t nv40_vp_exa_render = {
132 .card_priv.NV30VP.vp_in_reg = 0x00000309,
133 .card_priv.NV30VP.vp_out_reg = 0x0000c001,
134 .size = (3*4),
135 .data = {
136 /* MOV result.position, vertex.position */
137 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
138 /* MOV result.texcoord[0], vertex.texcoord[0] */
139 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
140 /* MOV result.texcoord[1], vertex.texcoord[1] */
141 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
145 /*******************************************************************************
146 * NV30/NV40/G70 fragment shaders
149 nv_shader_t nv30_fp_pass_col0 = {
150 .card_priv.NV30FP.num_regs = 2,
151 .size = (1*4),
152 .data = {
153 /* MOV R0, fragment.color */
154 0x01403e81, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
158 nv_shader_t nv30_fp_pass_tex0 = {
159 .card_priv.NV30FP.num_regs = 2,
160 .size = (2*4),
161 .data = {
162 /* TEX R0, fragment.texcoord[0], texture[0], 2D */
163 0x17009e00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
164 /* MOV R0, R0 */
165 0x01401e81, 0x1c9dc800, 0x0001c800, 0x0001c800,
169 nv_shader_t nv30_fp_composite_mask = {
170 .card_priv.NV30FP.num_regs = 2,
171 .size = (3*4),
172 .data = {
173 /* TEXC0 R1.w , fragment.texcoord[1], texture[1], 2D */
174 0x1702b102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
175 /* TEX R0 (NE0.wwww), fragment.texcoord[0], texture[0], 2D */
176 0x17009e00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
177 /* MUL R0 , R0, R1.w */
178 0x02001e81, 0x1c9dc800, 0x0001fe04, 0x0001c800,
182 nv_shader_t nv30_fp_composite_mask_sa_ca = {
183 .card_priv.NV30FP.num_regs = 2,
184 .size = (3*4),
185 .data = {
186 /* TEXC0 R1.w , fragment.texcoord[0], texture[0], 2D */
187 0x17009102, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
188 /* TEX R0 (NE0.wwww), fragment.texcoord[1], texture[1], 2D */
189 0x1702be00, 0x1ff5c801, 0x0001c800, 0x3fe1c800,
190 /* MUL R0 , R1,wwww, R0 */
191 0x02001e81, 0x1c9dfe04, 0x0001c800, 0x0001c800,
195 nv_shader_t nv30_fp_composite_mask_ca = {
196 .card_priv.NV30FP.num_regs = 2,
197 .size = (3*4),
198 .data = {
199 /* TEXC0 R0 , fragment.texcoord[0], texture[0], 2D */
200 0x17009f00, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
201 /* TEX R1 (NE0.xyzw), fragment.texcoord[1], texture[1], 2D */
202 0x1702be02, 0x1c95c801, 0x0001c800, 0x3fe1c800,
203 /* MUL R0 , R0, R1 */
204 0x02001e81, 0x1c9dc800, 0x0001c804, 0x0001c800,
208 nv_shader_t nv40_vp_video = {
209 .card_priv.NV30VP.vp_in_reg = 0x00000309,
210 .card_priv.NV30VP.vp_out_reg = 0x0000c001,
211 .size = (3*4),
212 .data = {
213 /* MOV result.position, vertex.position */
214 0x40041c6c, 0x0040000d, 0x8106c083, 0x6041ff80,
215 /* MOV result.texcoord[0], vertex.texcoord[0] */
216 0x401f9c6c, 0x0040080d, 0x8106c083, 0x6041ff9c,
217 /* MOV result.texcoord[1], vertex.texcoord[1] */
218 0x401f9c6c, 0x0040090d, 0x8106c083, 0x6041ffa1,
222 nv_shader_t nv40_fp_yv12_bicubic = {
223 .card_priv.NV30FP.num_regs = 4,
224 .size = (29*4),
225 .data = {
226 /* INST 0: MOVR R0.xy (TR0.xyzw), attrib.texcoord[0] */
227 0x01008600, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
228 /* INST 1: ADDR R0.z (TR0.xyzw), R0.yyyy, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
229 0x03000800, 0x1c9caa00, 0x00000002, 0x0001c800,
230 0x3f000000, 0x00000000, 0x00000000, 0x00000000,
231 /* INST 2: ADDR R1.x (TR0.xyzw), R0, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
232 0x03000202, 0x1c9dc800, 0x00000002, 0x0001c800,
233 0x3f000000, 0x00000000, 0x00000000, 0x00000000,
234 /* INST 3: TEXRC0 R1.xyz (TR0.xyzw), R0.zzzz, texture[0] */
235 0x17000f82, 0x1c9d5400, 0x0001c800, 0x0001c800,
236 /* INST 4: MULR R2.yw (TR0.xyzw), R1.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
237 0x02001404, 0x1c9ca104, 0x0000a002, 0x0001c800,
238 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
239 /* INST 5: TEXR R3.xyz (TR0.xyzw), R1, texture[0] */
240 0x17000e86, 0x1c9dc804, 0x0001c800, 0x0001c800,
241 /* INST 6: MULR R2.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
242 0x02000a04, 0x1c9ca10c, 0x0000a002, 0x0001c800,
243 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
244 /* INST 7: ADDR R2 (TR0.xyzw), R0.xyxy, R2 */
245 0x03001e04, 0x1c9c8800, 0x0001c808, 0x0001c800,
246 /* INST 8: TEXR R1.y (TR0.xyzw), R2.zwzz, -texture[1] */
247 0x17020402, 0x1c9d5c08, 0x0001c800, 0x0001c800,
248 /* INST 9: MADH R1.x (TR0.xyzw), -R1.zzzz, R1.yyyy, R1.yyyy */
249 0x04400282, 0x1c9f5504, 0x0000aa04, 0x0000aa04,
250 /* INST 10: TEXR R0.y (TR0.xyzw), R2.xwxw, -texture[1] */
251 0x17020400, 0x1c9d9808, 0x0001c800, 0x0001c800,
252 /* INST 11: MADH R0.w (TR0.xyzw), -R1.zzzz, R0.yyyy, R0.yyyy */
253 0x04401080, 0x1c9f5504, 0x0000aa00, 0x0000aa00,
254 /* INST 12: TEXR R0.x (TR0.xyzw), R2.zyxy, texture[1] */
255 0x17020200, 0x1c9c8c08, 0x0001c800, 0x0001c800,
256 /* INST 13: MADH R1.x (TR0.xyzw), R1.zzzz, R0, R1 */
257 0x04400282, 0x1c9d5504, 0x0001c800, 0x0001c904,
258 /* INST 14: TEXR R0.x (NE0.zzzz), R2, texture[1] */
259 0x17020200, 0x1555c808, 0x0001c800, 0x0001c800,
260 /* INST 15: MADH R0.x (TR0.xyzw), R1.zzzz, R0, R0.wwww */
261 0x04400280, 0x1c9d5504, 0x0001c800, 0x0001ff00,
262 /* INST 16: MADH R0.w (TR0.xyzw), -R3.zzzz, R1.xxxx, R1.xxxx */
263 0x04401080, 0x1c9f550c, 0x00000104, 0x00000104,
264 /* INST 17: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
265 0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
266 /* INST 18: MADH R0.x (TR0.xyzw), R3.zzzz, R0, R0.wwww */
267 0x04400280, 0x1c9d550c, 0x0001c900, 0x0001ff00,
268 /* INST 19: MADH R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
269 0x04400e82, 0x1c9c0100, 0x00000002, 0x0001f202,
270 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
271 /* INST 20: MADH R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */
272 0x04400e82, 0x1c9cab00, 0x0001c802, 0x0001c904,
273 0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
274 /* INST 21: MADH R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */
275 0x04400e81, 0x1c9d5500, 0x0001c802, 0x0001c904,
276 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
280 nv_shader_t nv30_fp_yv12_bicubic = {
281 .card_priv.NV30FP.num_regs = 4,
282 .size = (24*4),
283 .data = {
284 /* INST 0: MOVR R2.xy (TR0.xyzw), attrib.texcoord[0] */
285 0x01008604, 0x1c9dc801, 0x0001c800, 0x0001c800,
286 /* INST 1: ADDR R0.xy (TR0.xyzw), R2, { 0.50, 0.00, 0.00, 0.00 }.xxxx */
287 0x03000600, 0x1c9dc808, 0x00000002, 0x0001c800,
288 0x3f000000, 0x00000000, 0x00000000, 0x00000000,
289 /* INST 2: TEXR R3.xyz (TR0.xyzw), R0, texture[0] */
290 0x17000e06, 0x1c9dc800, 0x0001c800, 0x0001c800,
291 /* INST 3: TEXR R0.xyz (TR0.xyzw), R0.yyyy, texture[0] */
292 0x17000e00, 0x1c9caa00, 0x0001c800, 0x0001c800,
293 /* INST 4: MULR R1.xz (TR0.xyzw), R3.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
294 0x02000a02, 0x1c9ca00c, 0x0000a002, 0x0001c800,
295 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
296 /* INST 5: MULR R1.yw (TR0.xyzw), R0.xxyy, { -1.00, 1.00, 0.00, 0.00 }.xxyy */
297 0x02001402, 0x1c9ca000, 0x0000a002, 0x0001c800,
298 0xbf800000, 0x3f800000, 0x00000000, 0x00000000,
299 /* INST 6: ADDR R2 (TR0.xyzw), R2.xyxy, R1 */
300 0x03001e04, 0x1c9c8808, 0x0001c804, 0x0001c800,
301 /* INST 7: TEXR R0.x (TR0.xyzw), R2, texture[1] */
302 0x17020200, 0x1c9dc808, 0x0001c800, 0x0001c800,
303 /* INST 8: TEXR R1.y (TR0.xyzw), R2.xwxw, texture[1] */
304 0x17020402, 0x1c9d9808, 0x0001c800, 0x0001c800,
305 /* INST 9: TEXR R1.x (TR0.xyzw), R2.zyxy, texture[1] */
306 0x17020202, 0x1c9c8c08, 0x0001c800, 0x0001c800,
307 /* INST 10: LRPH R0.x (TR0.xyzw), R0.zzzz, R0, R1.yyyy */
308 0x1f400280, 0x1c9d5400, 0x0001c800, 0x0000aa04,
309 /* INST 11: TEXR R0.y (TR0.xyzw), R2.zwzz, texture[1] */
310 0x17020400, 0x1c9d5c08, 0x0001c800, 0x0001c800,
311 /* INST 12: LRPH R0.y (TR0.xyzw), R0.zzzz, R1.xxxx, R0 */
312 0x1f400480, 0x1c9d5400, 0x00000004, 0x0001c800,
313 /* INST 13: LRPH R0.x (TR0.xyzw), R3.zzzz, R0, R0.yyyy */
314 0x1f400280, 0x1c9d540c, 0x0001c900, 0x0000ab00,
315 /* INST 14: MADH R0.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
316 0x04400e80, 0x1c9c0100, 0x00000002, 0x0001f202,
317 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
318 /* INST 15: TEXR R1.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
319 0x1704ac02, 0x1c9dc801, 0x0001c800, 0x0001c800,
320 /* INST 16: MADH R0.xyz (TR0.xyzw), R1.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R0 */
321 0x04400e80, 0x1c9caa04, 0x0001c802, 0x0001c900,
322 0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
323 /* INST 17: MADH R0.xyz (TR0.xyzw), R1.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R0 + END */
324 0x04400e81, 0x1c9d5404, 0x0001c802, 0x0001c900,
325 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,
329 nv_shader_t nv30_fp_yv12_bilinear = {
330 .card_priv.NV30FP.num_regs = 2,
331 .size = (8*4),
332 .data = {
333 /* INST 0: TEXR R0.x (TR0.xyzw), attrib.texcoord[0], abs(texture[1]) */
334 0x17028200, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
335 /* INST 1: MADR R1.xyz (TR0.xyzw), R0.xxxx, { 1.16, -0.87, 0.53, -1.08 }.xxxx, { 1.16, -0.87, 0.53, -1.08 }.yzww */
336 0x04000e02, 0x1c9c0000, 0x00000002, 0x0001f202,
337 0x3f9507c8, 0xbf5ee393, 0x3f078fef, 0xbf8a6762,
338 /* INST 2: TEXR R0.yz (TR0.xyzw), attrib.texcoord[1], abs(texture[2]) */
339 0x1704ac80, 0x1c9dc801, 0x0001c800, 0x3fe1c800,
340 /* INST 3: MADR R1.xyz (TR0.xyzw), R0.yyyy, { 0.00, -0.39, 2.02, 0.00 }, R1 */
341 0x04000e02, 0x1c9cab00, 0x0001c802, 0x0001c804,
342 0x00000000, 0xbec890d6, 0x40011687, 0x00000000,
343 /* INST 4: MADR R0.xyz (TR0.xyzw), R0.zzzz, { 1.60, -0.81, 0.00, 0.00 }, R1 + END */
344 0x04000e81, 0x1c9d5500, 0x0001c802, 0x0001c804,
345 0x3fcc432d, 0xbf501a37, 0x00000000, 0x00000000,