initial commit with v2.6.32.60
[linux-2.6.32.60-moxart.git] / drivers / gpu / drm / radeon / radeon_state.c
blob474791076cf974c51c3c07939190e4ad08b0f87f
1 /* radeon_state.c -- State support for Radeon -*- linux-c -*- */
2 /*
3 * Copyright 2000 VA Linux Systems, Inc., Fremont, California.
4 * All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
21 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
22 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
23 * DEALINGS IN THE SOFTWARE.
25 * Authors:
26 * Gareth Hughes <gareth@valinux.com>
27 * Kevin E. Martin <martin@valinux.com>
30 #include "drmP.h"
31 #include "drm.h"
32 #include "drm_sarea.h"
33 #include "radeon_drm.h"
34 #include "radeon_drv.h"
36 /* ================================================================
37 * Helper functions for client state checking and fixup
40 static __inline__ int radeon_check_and_fixup_offset(drm_radeon_private_t *
41 dev_priv,
42 struct drm_file * file_priv,
43 u32 *offset)
45 u64 off = *offset;
46 u32 fb_end = dev_priv->fb_location + dev_priv->fb_size - 1;
47 struct drm_radeon_driver_file_fields *radeon_priv;
49 /* Hrm ... the story of the offset ... So this function converts
50 * the various ideas of what userland clients might have for an
51 * offset in the card address space into an offset into the card
52 * address space :) So with a sane client, it should just keep
53 * the value intact and just do some boundary checking. However,
54 * not all clients are sane. Some older clients pass us 0 based
55 * offsets relative to the start of the framebuffer and some may
56 * assume the AGP aperture it appended to the framebuffer, so we
57 * try to detect those cases and fix them up.
59 * Note: It might be a good idea here to make sure the offset lands
60 * in some "allowed" area to protect things like the PCIE GART...
63 /* First, the best case, the offset already lands in either the
64 * framebuffer or the GART mapped space
66 if (radeon_check_offset(dev_priv, off))
67 return 0;
69 /* Ok, that didn't happen... now check if we have a zero based
70 * offset that fits in the framebuffer + gart space, apply the
71 * magic offset we get from SETPARAM or calculated from fb_location
73 if (off < (dev_priv->fb_size + dev_priv->gart_size)) {
74 radeon_priv = file_priv->driver_priv;
75 off += radeon_priv->radeon_fb_delta;
78 /* Finally, assume we aimed at a GART offset if beyond the fb */
79 if (off > fb_end)
80 off = off - fb_end - 1 + dev_priv->gart_vm_start;
82 /* Now recheck and fail if out of bounds */
83 if (radeon_check_offset(dev_priv, off)) {
84 DRM_DEBUG("offset fixed up to 0x%x\n", (unsigned int)off);
85 *offset = off;
86 return 0;
88 return -EINVAL;
91 static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
92 dev_priv,
93 struct drm_file *file_priv,
94 int id, u32 *data)
96 switch (id) {
98 case RADEON_EMIT_PP_MISC:
99 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
100 &data[(RADEON_RB3D_DEPTHOFFSET - RADEON_PP_MISC) / 4])) {
101 DRM_ERROR("Invalid depth buffer offset\n");
102 return -EINVAL;
104 dev_priv->have_z_offset = 1;
105 break;
107 case RADEON_EMIT_PP_CNTL:
108 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
109 &data[(RADEON_RB3D_COLOROFFSET - RADEON_PP_CNTL) / 4])) {
110 DRM_ERROR("Invalid colour buffer offset\n");
111 return -EINVAL;
113 break;
115 case R200_EMIT_PP_TXOFFSET_0:
116 case R200_EMIT_PP_TXOFFSET_1:
117 case R200_EMIT_PP_TXOFFSET_2:
118 case R200_EMIT_PP_TXOFFSET_3:
119 case R200_EMIT_PP_TXOFFSET_4:
120 case R200_EMIT_PP_TXOFFSET_5:
121 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
122 &data[0])) {
123 DRM_ERROR("Invalid R200 texture offset\n");
124 return -EINVAL;
126 break;
128 case RADEON_EMIT_PP_TXFILTER_0:
129 case RADEON_EMIT_PP_TXFILTER_1:
130 case RADEON_EMIT_PP_TXFILTER_2:
131 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
132 &data[(RADEON_PP_TXOFFSET_0 - RADEON_PP_TXFILTER_0) / 4])) {
133 DRM_ERROR("Invalid R100 texture offset\n");
134 return -EINVAL;
136 break;
138 case R200_EMIT_PP_CUBIC_OFFSETS_0:
139 case R200_EMIT_PP_CUBIC_OFFSETS_1:
140 case R200_EMIT_PP_CUBIC_OFFSETS_2:
141 case R200_EMIT_PP_CUBIC_OFFSETS_3:
142 case R200_EMIT_PP_CUBIC_OFFSETS_4:
143 case R200_EMIT_PP_CUBIC_OFFSETS_5:{
144 int i;
145 for (i = 0; i < 5; i++) {
146 if (radeon_check_and_fixup_offset(dev_priv,
147 file_priv,
148 &data[i])) {
149 DRM_ERROR
150 ("Invalid R200 cubic texture offset\n");
151 return -EINVAL;
154 break;
157 case RADEON_EMIT_PP_CUBIC_OFFSETS_T0:
158 case RADEON_EMIT_PP_CUBIC_OFFSETS_T1:
159 case RADEON_EMIT_PP_CUBIC_OFFSETS_T2:{
160 int i;
161 for (i = 0; i < 5; i++) {
162 if (radeon_check_and_fixup_offset(dev_priv,
163 file_priv,
164 &data[i])) {
165 DRM_ERROR
166 ("Invalid R100 cubic texture offset\n");
167 return -EINVAL;
171 break;
173 case R200_EMIT_VAP_CTL:{
174 RING_LOCALS;
175 BEGIN_RING(2);
176 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
177 ADVANCE_RING();
179 break;
181 case RADEON_EMIT_RB3D_COLORPITCH:
182 case RADEON_EMIT_RE_LINE_PATTERN:
183 case RADEON_EMIT_SE_LINE_WIDTH:
184 case RADEON_EMIT_PP_LUM_MATRIX:
185 case RADEON_EMIT_PP_ROT_MATRIX_0:
186 case RADEON_EMIT_RB3D_STENCILREFMASK:
187 case RADEON_EMIT_SE_VPORT_XSCALE:
188 case RADEON_EMIT_SE_CNTL:
189 case RADEON_EMIT_SE_CNTL_STATUS:
190 case RADEON_EMIT_RE_MISC:
191 case RADEON_EMIT_PP_BORDER_COLOR_0:
192 case RADEON_EMIT_PP_BORDER_COLOR_1:
193 case RADEON_EMIT_PP_BORDER_COLOR_2:
194 case RADEON_EMIT_SE_ZBIAS_FACTOR:
195 case RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT:
196 case RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED:
197 case R200_EMIT_PP_TXCBLEND_0:
198 case R200_EMIT_PP_TXCBLEND_1:
199 case R200_EMIT_PP_TXCBLEND_2:
200 case R200_EMIT_PP_TXCBLEND_3:
201 case R200_EMIT_PP_TXCBLEND_4:
202 case R200_EMIT_PP_TXCBLEND_5:
203 case R200_EMIT_PP_TXCBLEND_6:
204 case R200_EMIT_PP_TXCBLEND_7:
205 case R200_EMIT_TCL_LIGHT_MODEL_CTL_0:
206 case R200_EMIT_TFACTOR_0:
207 case R200_EMIT_VTX_FMT_0:
208 case R200_EMIT_MATRIX_SELECT_0:
209 case R200_EMIT_TEX_PROC_CTL_2:
210 case R200_EMIT_TCL_UCP_VERT_BLEND_CTL:
211 case R200_EMIT_PP_TXFILTER_0:
212 case R200_EMIT_PP_TXFILTER_1:
213 case R200_EMIT_PP_TXFILTER_2:
214 case R200_EMIT_PP_TXFILTER_3:
215 case R200_EMIT_PP_TXFILTER_4:
216 case R200_EMIT_PP_TXFILTER_5:
217 case R200_EMIT_VTE_CNTL:
218 case R200_EMIT_OUTPUT_VTX_COMP_SEL:
219 case R200_EMIT_PP_TAM_DEBUG3:
220 case R200_EMIT_PP_CNTL_X:
221 case R200_EMIT_RB3D_DEPTHXY_OFFSET:
222 case R200_EMIT_RE_AUX_SCISSOR_CNTL:
223 case R200_EMIT_RE_SCISSOR_TL_0:
224 case R200_EMIT_RE_SCISSOR_TL_1:
225 case R200_EMIT_RE_SCISSOR_TL_2:
226 case R200_EMIT_SE_VAP_CNTL_STATUS:
227 case R200_EMIT_SE_VTX_STATE_CNTL:
228 case R200_EMIT_RE_POINTSIZE:
229 case R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0:
230 case R200_EMIT_PP_CUBIC_FACES_0:
231 case R200_EMIT_PP_CUBIC_FACES_1:
232 case R200_EMIT_PP_CUBIC_FACES_2:
233 case R200_EMIT_PP_CUBIC_FACES_3:
234 case R200_EMIT_PP_CUBIC_FACES_4:
235 case R200_EMIT_PP_CUBIC_FACES_5:
236 case RADEON_EMIT_PP_TEX_SIZE_0:
237 case RADEON_EMIT_PP_TEX_SIZE_1:
238 case RADEON_EMIT_PP_TEX_SIZE_2:
239 case R200_EMIT_RB3D_BLENDCOLOR:
240 case R200_EMIT_TCL_POINT_SPRITE_CNTL:
241 case RADEON_EMIT_PP_CUBIC_FACES_0:
242 case RADEON_EMIT_PP_CUBIC_FACES_1:
243 case RADEON_EMIT_PP_CUBIC_FACES_2:
244 case R200_EMIT_PP_TRI_PERF_CNTL:
245 case R200_EMIT_PP_AFS_0:
246 case R200_EMIT_PP_AFS_1:
247 case R200_EMIT_ATF_TFACTOR:
248 case R200_EMIT_PP_TXCTLALL_0:
249 case R200_EMIT_PP_TXCTLALL_1:
250 case R200_EMIT_PP_TXCTLALL_2:
251 case R200_EMIT_PP_TXCTLALL_3:
252 case R200_EMIT_PP_TXCTLALL_4:
253 case R200_EMIT_PP_TXCTLALL_5:
254 case R200_EMIT_VAP_PVS_CNTL:
255 /* These packets don't contain memory offsets */
256 break;
258 default:
259 DRM_ERROR("Unknown state packet ID %d\n", id);
260 return -EINVAL;
263 return 0;
266 static __inline__ int radeon_check_and_fixup_packet3(drm_radeon_private_t *
267 dev_priv,
268 struct drm_file *file_priv,
269 drm_radeon_kcmd_buffer_t *
270 cmdbuf,
271 unsigned int *cmdsz)
273 u32 *cmd = (u32 *) cmdbuf->buf;
274 u32 offset, narrays;
275 int count, i, k;
277 *cmdsz = 2 + ((cmd[0] & RADEON_CP_PACKET_COUNT_MASK) >> 16);
279 if ((cmd[0] & 0xc0000000) != RADEON_CP_PACKET3) {
280 DRM_ERROR("Not a type 3 packet\n");
281 return -EINVAL;
284 if (4 * *cmdsz > cmdbuf->bufsz) {
285 DRM_ERROR("Packet size larger than size of data provided\n");
286 return -EINVAL;
289 switch(cmd[0] & 0xff00) {
290 /* XXX Are there old drivers needing other packets? */
292 case RADEON_3D_DRAW_IMMD:
293 case RADEON_3D_DRAW_VBUF:
294 case RADEON_3D_DRAW_INDX:
295 case RADEON_WAIT_FOR_IDLE:
296 case RADEON_CP_NOP:
297 case RADEON_3D_CLEAR_ZMASK:
298 /* case RADEON_CP_NEXT_CHAR:
299 case RADEON_CP_PLY_NEXTSCAN:
300 case RADEON_CP_SET_SCISSORS: */ /* probably safe but will never need them? */
301 /* these packets are safe */
302 break;
304 case RADEON_CP_3D_DRAW_IMMD_2:
305 case RADEON_CP_3D_DRAW_VBUF_2:
306 case RADEON_CP_3D_DRAW_INDX_2:
307 case RADEON_3D_CLEAR_HIZ:
308 /* safe but r200 only */
309 if (dev_priv->microcode_version != UCODE_R200) {
310 DRM_ERROR("Invalid 3d packet for r100-class chip\n");
311 return -EINVAL;
313 break;
315 case RADEON_3D_LOAD_VBPNTR:
316 count = (cmd[0] >> 16) & 0x3fff;
318 if (count > 18) { /* 12 arrays max */
319 DRM_ERROR("Too large payload in 3D_LOAD_VBPNTR (count=%d)\n",
320 count);
321 return -EINVAL;
324 /* carefully check packet contents */
325 narrays = cmd[1] & ~0xc000;
326 k = 0;
327 i = 2;
328 while ((k < narrays) && (i < (count + 2))) {
329 i++; /* skip attribute field */
330 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
331 &cmd[i])) {
332 DRM_ERROR
333 ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
334 k, i);
335 return -EINVAL;
337 k++;
338 i++;
339 if (k == narrays)
340 break;
341 /* have one more to process, they come in pairs */
342 if (radeon_check_and_fixup_offset(dev_priv,
343 file_priv, &cmd[i]))
345 DRM_ERROR
346 ("Invalid offset (k=%d i=%d) in 3D_LOAD_VBPNTR packet.\n",
347 k, i);
348 return -EINVAL;
350 k++;
351 i++;
353 /* do the counts match what we expect ? */
354 if ((k != narrays) || (i != (count + 2))) {
355 DRM_ERROR
356 ("Malformed 3D_LOAD_VBPNTR packet (k=%d i=%d narrays=%d count+1=%d).\n",
357 k, i, narrays, count + 1);
358 return -EINVAL;
360 break;
362 case RADEON_3D_RNDR_GEN_INDX_PRIM:
363 if (dev_priv->microcode_version != UCODE_R100) {
364 DRM_ERROR("Invalid 3d packet for r200-class chip\n");
365 return -EINVAL;
367 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[1])) {
368 DRM_ERROR("Invalid rndr_gen_indx offset\n");
369 return -EINVAL;
371 break;
373 case RADEON_CP_INDX_BUFFER:
374 if (dev_priv->microcode_version != UCODE_R200) {
375 DRM_ERROR("Invalid 3d packet for r100-class chip\n");
376 return -EINVAL;
378 if ((cmd[1] & 0x8000ffff) != 0x80000810) {
379 DRM_ERROR("Invalid indx_buffer reg address %08X\n", cmd[1]);
380 return -EINVAL;
382 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &cmd[2])) {
383 DRM_ERROR("Invalid indx_buffer offset is %08X\n", cmd[2]);
384 return -EINVAL;
386 break;
388 case RADEON_CNTL_HOSTDATA_BLT:
389 case RADEON_CNTL_PAINT_MULTI:
390 case RADEON_CNTL_BITBLT_MULTI:
391 /* MSB of opcode: next DWORD GUI_CNTL */
392 if (cmd[1] & (RADEON_GMC_SRC_PITCH_OFFSET_CNTL
393 | RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
394 offset = cmd[2] << 10;
395 if (radeon_check_and_fixup_offset
396 (dev_priv, file_priv, &offset)) {
397 DRM_ERROR("Invalid first packet offset\n");
398 return -EINVAL;
400 cmd[2] = (cmd[2] & 0xffc00000) | offset >> 10;
403 if ((cmd[1] & RADEON_GMC_SRC_PITCH_OFFSET_CNTL) &&
404 (cmd[1] & RADEON_GMC_DST_PITCH_OFFSET_CNTL)) {
405 offset = cmd[3] << 10;
406 if (radeon_check_and_fixup_offset
407 (dev_priv, file_priv, &offset)) {
408 DRM_ERROR("Invalid second packet offset\n");
409 return -EINVAL;
411 cmd[3] = (cmd[3] & 0xffc00000) | offset >> 10;
413 break;
415 default:
416 DRM_ERROR("Invalid packet type %x\n", cmd[0] & 0xff00);
417 return -EINVAL;
420 return 0;
423 /* ================================================================
424 * CP hardware state programming functions
427 static __inline__ void radeon_emit_clip_rect(drm_radeon_private_t * dev_priv,
428 struct drm_clip_rect * box)
430 RING_LOCALS;
432 DRM_DEBUG(" box: x1=%d y1=%d x2=%d y2=%d\n",
433 box->x1, box->y1, box->x2, box->y2);
435 BEGIN_RING(4);
436 OUT_RING(CP_PACKET0(RADEON_RE_TOP_LEFT, 0));
437 OUT_RING((box->y1 << 16) | box->x1);
438 OUT_RING(CP_PACKET0(RADEON_RE_WIDTH_HEIGHT, 0));
439 OUT_RING(((box->y2 - 1) << 16) | (box->x2 - 1));
440 ADVANCE_RING();
443 /* Emit 1.1 state
445 static int radeon_emit_state(drm_radeon_private_t * dev_priv,
446 struct drm_file *file_priv,
447 drm_radeon_context_regs_t * ctx,
448 drm_radeon_texture_regs_t * tex,
449 unsigned int dirty)
451 RING_LOCALS;
452 DRM_DEBUG("dirty=0x%08x\n", dirty);
454 if (dirty & RADEON_UPLOAD_CONTEXT) {
455 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
456 &ctx->rb3d_depthoffset)) {
457 DRM_ERROR("Invalid depth buffer offset\n");
458 return -EINVAL;
461 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
462 &ctx->rb3d_coloroffset)) {
463 DRM_ERROR("Invalid depth buffer offset\n");
464 return -EINVAL;
467 BEGIN_RING(14);
468 OUT_RING(CP_PACKET0(RADEON_PP_MISC, 6));
469 OUT_RING(ctx->pp_misc);
470 OUT_RING(ctx->pp_fog_color);
471 OUT_RING(ctx->re_solid_color);
472 OUT_RING(ctx->rb3d_blendcntl);
473 OUT_RING(ctx->rb3d_depthoffset);
474 OUT_RING(ctx->rb3d_depthpitch);
475 OUT_RING(ctx->rb3d_zstencilcntl);
476 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 2));
477 OUT_RING(ctx->pp_cntl);
478 OUT_RING(ctx->rb3d_cntl);
479 OUT_RING(ctx->rb3d_coloroffset);
480 OUT_RING(CP_PACKET0(RADEON_RB3D_COLORPITCH, 0));
481 OUT_RING(ctx->rb3d_colorpitch);
482 ADVANCE_RING();
485 if (dirty & RADEON_UPLOAD_VERTFMT) {
486 BEGIN_RING(2);
487 OUT_RING(CP_PACKET0(RADEON_SE_COORD_FMT, 0));
488 OUT_RING(ctx->se_coord_fmt);
489 ADVANCE_RING();
492 if (dirty & RADEON_UPLOAD_LINE) {
493 BEGIN_RING(5);
494 OUT_RING(CP_PACKET0(RADEON_RE_LINE_PATTERN, 1));
495 OUT_RING(ctx->re_line_pattern);
496 OUT_RING(ctx->re_line_state);
497 OUT_RING(CP_PACKET0(RADEON_SE_LINE_WIDTH, 0));
498 OUT_RING(ctx->se_line_width);
499 ADVANCE_RING();
502 if (dirty & RADEON_UPLOAD_BUMPMAP) {
503 BEGIN_RING(5);
504 OUT_RING(CP_PACKET0(RADEON_PP_LUM_MATRIX, 0));
505 OUT_RING(ctx->pp_lum_matrix);
506 OUT_RING(CP_PACKET0(RADEON_PP_ROT_MATRIX_0, 1));
507 OUT_RING(ctx->pp_rot_matrix_0);
508 OUT_RING(ctx->pp_rot_matrix_1);
509 ADVANCE_RING();
512 if (dirty & RADEON_UPLOAD_MASKS) {
513 BEGIN_RING(4);
514 OUT_RING(CP_PACKET0(RADEON_RB3D_STENCILREFMASK, 2));
515 OUT_RING(ctx->rb3d_stencilrefmask);
516 OUT_RING(ctx->rb3d_ropcntl);
517 OUT_RING(ctx->rb3d_planemask);
518 ADVANCE_RING();
521 if (dirty & RADEON_UPLOAD_VIEWPORT) {
522 BEGIN_RING(7);
523 OUT_RING(CP_PACKET0(RADEON_SE_VPORT_XSCALE, 5));
524 OUT_RING(ctx->se_vport_xscale);
525 OUT_RING(ctx->se_vport_xoffset);
526 OUT_RING(ctx->se_vport_yscale);
527 OUT_RING(ctx->se_vport_yoffset);
528 OUT_RING(ctx->se_vport_zscale);
529 OUT_RING(ctx->se_vport_zoffset);
530 ADVANCE_RING();
533 if (dirty & RADEON_UPLOAD_SETUP) {
534 BEGIN_RING(4);
535 OUT_RING(CP_PACKET0(RADEON_SE_CNTL, 0));
536 OUT_RING(ctx->se_cntl);
537 OUT_RING(CP_PACKET0(RADEON_SE_CNTL_STATUS, 0));
538 OUT_RING(ctx->se_cntl_status);
539 ADVANCE_RING();
542 if (dirty & RADEON_UPLOAD_MISC) {
543 BEGIN_RING(2);
544 OUT_RING(CP_PACKET0(RADEON_RE_MISC, 0));
545 OUT_RING(ctx->re_misc);
546 ADVANCE_RING();
549 if (dirty & RADEON_UPLOAD_TEX0) {
550 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
551 &tex[0].pp_txoffset)) {
552 DRM_ERROR("Invalid texture offset for unit 0\n");
553 return -EINVAL;
556 BEGIN_RING(9);
557 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_0, 5));
558 OUT_RING(tex[0].pp_txfilter);
559 OUT_RING(tex[0].pp_txformat);
560 OUT_RING(tex[0].pp_txoffset);
561 OUT_RING(tex[0].pp_txcblend);
562 OUT_RING(tex[0].pp_txablend);
563 OUT_RING(tex[0].pp_tfactor);
564 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_0, 0));
565 OUT_RING(tex[0].pp_border_color);
566 ADVANCE_RING();
569 if (dirty & RADEON_UPLOAD_TEX1) {
570 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
571 &tex[1].pp_txoffset)) {
572 DRM_ERROR("Invalid texture offset for unit 1\n");
573 return -EINVAL;
576 BEGIN_RING(9);
577 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_1, 5));
578 OUT_RING(tex[1].pp_txfilter);
579 OUT_RING(tex[1].pp_txformat);
580 OUT_RING(tex[1].pp_txoffset);
581 OUT_RING(tex[1].pp_txcblend);
582 OUT_RING(tex[1].pp_txablend);
583 OUT_RING(tex[1].pp_tfactor);
584 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_1, 0));
585 OUT_RING(tex[1].pp_border_color);
586 ADVANCE_RING();
589 if (dirty & RADEON_UPLOAD_TEX2) {
590 if (radeon_check_and_fixup_offset(dev_priv, file_priv,
591 &tex[2].pp_txoffset)) {
592 DRM_ERROR("Invalid texture offset for unit 2\n");
593 return -EINVAL;
596 BEGIN_RING(9);
597 OUT_RING(CP_PACKET0(RADEON_PP_TXFILTER_2, 5));
598 OUT_RING(tex[2].pp_txfilter);
599 OUT_RING(tex[2].pp_txformat);
600 OUT_RING(tex[2].pp_txoffset);
601 OUT_RING(tex[2].pp_txcblend);
602 OUT_RING(tex[2].pp_txablend);
603 OUT_RING(tex[2].pp_tfactor);
604 OUT_RING(CP_PACKET0(RADEON_PP_BORDER_COLOR_2, 0));
605 OUT_RING(tex[2].pp_border_color);
606 ADVANCE_RING();
609 return 0;
612 /* Emit 1.2 state
614 static int radeon_emit_state2(drm_radeon_private_t * dev_priv,
615 struct drm_file *file_priv,
616 drm_radeon_state_t * state)
618 RING_LOCALS;
620 if (state->dirty & RADEON_UPLOAD_ZBIAS) {
621 BEGIN_RING(3);
622 OUT_RING(CP_PACKET0(RADEON_SE_ZBIAS_FACTOR, 1));
623 OUT_RING(state->context2.se_zbias_factor);
624 OUT_RING(state->context2.se_zbias_constant);
625 ADVANCE_RING();
628 return radeon_emit_state(dev_priv, file_priv, &state->context,
629 state->tex, state->dirty);
632 /* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in
633 * 1.3 cmdbuffers allow all previous state to be updated as well as
634 * the tcl scalar and vector areas.
636 static struct {
637 int start;
638 int len;
639 const char *name;
640 } packet[RADEON_MAX_STATE_PACKETS] = {
641 {RADEON_PP_MISC, 7, "RADEON_PP_MISC"},
642 {RADEON_PP_CNTL, 3, "RADEON_PP_CNTL"},
643 {RADEON_RB3D_COLORPITCH, 1, "RADEON_RB3D_COLORPITCH"},
644 {RADEON_RE_LINE_PATTERN, 2, "RADEON_RE_LINE_PATTERN"},
645 {RADEON_SE_LINE_WIDTH, 1, "RADEON_SE_LINE_WIDTH"},
646 {RADEON_PP_LUM_MATRIX, 1, "RADEON_PP_LUM_MATRIX"},
647 {RADEON_PP_ROT_MATRIX_0, 2, "RADEON_PP_ROT_MATRIX_0"},
648 {RADEON_RB3D_STENCILREFMASK, 3, "RADEON_RB3D_STENCILREFMASK"},
649 {RADEON_SE_VPORT_XSCALE, 6, "RADEON_SE_VPORT_XSCALE"},
650 {RADEON_SE_CNTL, 2, "RADEON_SE_CNTL"},
651 {RADEON_SE_CNTL_STATUS, 1, "RADEON_SE_CNTL_STATUS"},
652 {RADEON_RE_MISC, 1, "RADEON_RE_MISC"},
653 {RADEON_PP_TXFILTER_0, 6, "RADEON_PP_TXFILTER_0"},
654 {RADEON_PP_BORDER_COLOR_0, 1, "RADEON_PP_BORDER_COLOR_0"},
655 {RADEON_PP_TXFILTER_1, 6, "RADEON_PP_TXFILTER_1"},
656 {RADEON_PP_BORDER_COLOR_1, 1, "RADEON_PP_BORDER_COLOR_1"},
657 {RADEON_PP_TXFILTER_2, 6, "RADEON_PP_TXFILTER_2"},
658 {RADEON_PP_BORDER_COLOR_2, 1, "RADEON_PP_BORDER_COLOR_2"},
659 {RADEON_SE_ZBIAS_FACTOR, 2, "RADEON_SE_ZBIAS_FACTOR"},
660 {RADEON_SE_TCL_OUTPUT_VTX_FMT, 11, "RADEON_SE_TCL_OUTPUT_VTX_FMT"},
661 {RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED, 17,
662 "RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED"},
663 {R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0"},
664 {R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1"},
665 {R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2"},
666 {R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3"},
667 {R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4"},
668 {R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5"},
669 {R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6"},
670 {R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7"},
671 {R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0"},
672 {R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0"},
673 {R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0"},
674 {R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL"},
675 {R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0"},
676 {R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2"},
677 {R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL"},
678 {R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0"},
679 {R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1"},
680 {R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2"},
681 {R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3"},
682 {R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4"},
683 {R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5"},
684 {R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0"},
685 {R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1"},
686 {R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2"},
687 {R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3"},
688 {R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4"},
689 {R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5"},
690 {R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL"},
691 {R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1,
692 "R200_SE_TCL_OUTPUT_VTX_COMP_SEL"},
693 {R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3"},
694 {R200_PP_CNTL_X, 1, "R200_PP_CNTL_X"},
695 {R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET"},
696 {R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL"},
697 {R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0"},
698 {R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1"},
699 {R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2"},
700 {R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS"},
701 {R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL"},
702 {R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE"},
703 {R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4,
704 "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0"},
705 {R200_PP_CUBIC_FACES_0, 1, "R200_PP_CUBIC_FACES_0"}, /* 61 */
706 {R200_PP_CUBIC_OFFSET_F1_0, 5, "R200_PP_CUBIC_OFFSET_F1_0"}, /* 62 */
707 {R200_PP_CUBIC_FACES_1, 1, "R200_PP_CUBIC_FACES_1"},
708 {R200_PP_CUBIC_OFFSET_F1_1, 5, "R200_PP_CUBIC_OFFSET_F1_1"},
709 {R200_PP_CUBIC_FACES_2, 1, "R200_PP_CUBIC_FACES_2"},
710 {R200_PP_CUBIC_OFFSET_F1_2, 5, "R200_PP_CUBIC_OFFSET_F1_2"},
711 {R200_PP_CUBIC_FACES_3, 1, "R200_PP_CUBIC_FACES_3"},
712 {R200_PP_CUBIC_OFFSET_F1_3, 5, "R200_PP_CUBIC_OFFSET_F1_3"},
713 {R200_PP_CUBIC_FACES_4, 1, "R200_PP_CUBIC_FACES_4"},
714 {R200_PP_CUBIC_OFFSET_F1_4, 5, "R200_PP_CUBIC_OFFSET_F1_4"},
715 {R200_PP_CUBIC_FACES_5, 1, "R200_PP_CUBIC_FACES_5"},
716 {R200_PP_CUBIC_OFFSET_F1_5, 5, "R200_PP_CUBIC_OFFSET_F1_5"},
717 {RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"},
718 {RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"},
719 {RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"},
720 {R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},
721 {R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
722 {RADEON_PP_CUBIC_FACES_0, 1, "RADEON_PP_CUBIC_FACES_0"},
723 {RADEON_PP_CUBIC_OFFSET_T0_0, 5, "RADEON_PP_CUBIC_OFFSET_T0_0"},
724 {RADEON_PP_CUBIC_FACES_1, 1, "RADEON_PP_CUBIC_FACES_1"},
725 {RADEON_PP_CUBIC_OFFSET_T1_0, 5, "RADEON_PP_CUBIC_OFFSET_T1_0"},
726 {RADEON_PP_CUBIC_FACES_2, 1, "RADEON_PP_CUBIC_FACES_2"},
727 {RADEON_PP_CUBIC_OFFSET_T2_0, 5, "RADEON_PP_CUBIC_OFFSET_T2_0"},
728 {R200_PP_TRI_PERF, 2, "R200_PP_TRI_PERF"},
729 {R200_PP_AFS_0, 32, "R200_PP_AFS_0"}, /* 85 */
730 {R200_PP_AFS_1, 32, "R200_PP_AFS_1"},
731 {R200_PP_TFACTOR_0, 8, "R200_ATF_TFACTOR"},
732 {R200_PP_TXFILTER_0, 8, "R200_PP_TXCTLALL_0"},
733 {R200_PP_TXFILTER_1, 8, "R200_PP_TXCTLALL_1"},
734 {R200_PP_TXFILTER_2, 8, "R200_PP_TXCTLALL_2"},
735 {R200_PP_TXFILTER_3, 8, "R200_PP_TXCTLALL_3"},
736 {R200_PP_TXFILTER_4, 8, "R200_PP_TXCTLALL_4"},
737 {R200_PP_TXFILTER_5, 8, "R200_PP_TXCTLALL_5"},
738 {R200_VAP_PVS_CNTL_1, 2, "R200_VAP_PVS_CNTL"},
741 /* ================================================================
742 * Performance monitoring functions
745 static void radeon_clear_box(drm_radeon_private_t * dev_priv,
746 struct drm_radeon_master_private *master_priv,
747 int x, int y, int w, int h, int r, int g, int b)
749 u32 color;
750 RING_LOCALS;
752 x += master_priv->sarea_priv->boxes[0].x1;
753 y += master_priv->sarea_priv->boxes[0].y1;
755 switch (dev_priv->color_fmt) {
756 case RADEON_COLOR_FORMAT_RGB565:
757 color = (((r & 0xf8) << 8) |
758 ((g & 0xfc) << 3) | ((b & 0xf8) >> 3));
759 break;
760 case RADEON_COLOR_FORMAT_ARGB8888:
761 default:
762 color = (((0xff) << 24) | (r << 16) | (g << 8) | b);
763 break;
766 BEGIN_RING(4);
767 RADEON_WAIT_UNTIL_3D_IDLE();
768 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
769 OUT_RING(0xffffffff);
770 ADVANCE_RING();
772 BEGIN_RING(6);
774 OUT_RING(CP_PACKET3(RADEON_CNTL_PAINT_MULTI, 4));
775 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
776 RADEON_GMC_BRUSH_SOLID_COLOR |
777 (dev_priv->color_fmt << 8) |
778 RADEON_GMC_SRC_DATATYPE_COLOR |
779 RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS);
781 if (master_priv->sarea_priv->pfCurrentPage == 1) {
782 OUT_RING(dev_priv->front_pitch_offset);
783 } else {
784 OUT_RING(dev_priv->back_pitch_offset);
787 OUT_RING(color);
789 OUT_RING((x << 16) | y);
790 OUT_RING((w << 16) | h);
792 ADVANCE_RING();
795 static void radeon_cp_performance_boxes(drm_radeon_private_t *dev_priv, struct drm_radeon_master_private *master_priv)
797 /* Collapse various things into a wait flag -- trying to
798 * guess if userspase slept -- better just to have them tell us.
800 if (dev_priv->stats.last_frame_reads > 1 ||
801 dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
802 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
805 if (dev_priv->stats.freelist_loops) {
806 dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
809 /* Purple box for page flipping
811 if (dev_priv->stats.boxes & RADEON_BOX_FLIP)
812 radeon_clear_box(dev_priv, master_priv, 4, 4, 8, 8, 255, 0, 255);
814 /* Red box if we have to wait for idle at any point
816 if (dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE)
817 radeon_clear_box(dev_priv, master_priv, 16, 4, 8, 8, 255, 0, 0);
819 /* Blue box: lost context?
822 /* Yellow box for texture swaps
824 if (dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD)
825 radeon_clear_box(dev_priv, master_priv, 40, 4, 8, 8, 255, 255, 0);
827 /* Green box if hardware never idles (as far as we can tell)
829 if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE))
830 radeon_clear_box(dev_priv, master_priv, 64, 4, 8, 8, 0, 255, 0);
832 /* Draw bars indicating number of buffers allocated
833 * (not a great measure, easily confused)
835 if (dev_priv->stats.requested_bufs) {
836 if (dev_priv->stats.requested_bufs > 100)
837 dev_priv->stats.requested_bufs = 100;
839 radeon_clear_box(dev_priv, master_priv, 4, 16,
840 dev_priv->stats.requested_bufs, 4,
841 196, 128, 128);
844 memset(&dev_priv->stats, 0, sizeof(dev_priv->stats));
848 /* ================================================================
849 * CP command dispatch functions
852 static void radeon_cp_dispatch_clear(struct drm_device * dev,
853 struct drm_master *master,
854 drm_radeon_clear_t * clear,
855 drm_radeon_clear_rect_t * depth_boxes)
857 drm_radeon_private_t *dev_priv = dev->dev_private;
858 struct drm_radeon_master_private *master_priv = master->driver_priv;
859 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
860 drm_radeon_depth_clear_t *depth_clear = &dev_priv->depth_clear;
861 int nbox = sarea_priv->nbox;
862 struct drm_clip_rect *pbox = sarea_priv->boxes;
863 unsigned int flags = clear->flags;
864 u32 rb3d_cntl = 0, rb3d_stencilrefmask = 0;
865 int i;
866 RING_LOCALS;
867 DRM_DEBUG("flags = 0x%x\n", flags);
869 dev_priv->stats.clears++;
871 if (sarea_priv->pfCurrentPage == 1) {
872 unsigned int tmp = flags;
874 flags &= ~(RADEON_FRONT | RADEON_BACK);
875 if (tmp & RADEON_FRONT)
876 flags |= RADEON_BACK;
877 if (tmp & RADEON_BACK)
878 flags |= RADEON_FRONT;
880 if (flags & (RADEON_DEPTH|RADEON_STENCIL)) {
881 if (!dev_priv->have_z_offset) {
882 printk_once(KERN_ERR "radeon: illegal depth clear request. Buggy mesa detected - please update.\n");
883 flags &= ~(RADEON_DEPTH | RADEON_STENCIL);
887 if (flags & (RADEON_FRONT | RADEON_BACK)) {
889 BEGIN_RING(4);
891 /* Ensure the 3D stream is idle before doing a
892 * 2D fill to clear the front or back buffer.
894 RADEON_WAIT_UNTIL_3D_IDLE();
896 OUT_RING(CP_PACKET0(RADEON_DP_WRITE_MASK, 0));
897 OUT_RING(clear->color_mask);
899 ADVANCE_RING();
901 /* Make sure we restore the 3D state next time.
903 sarea_priv->ctx_owner = 0;
905 for (i = 0; i < nbox; i++) {
906 int x = pbox[i].x1;
907 int y = pbox[i].y1;
908 int w = pbox[i].x2 - x;
909 int h = pbox[i].y2 - y;
911 DRM_DEBUG("%d,%d-%d,%d flags 0x%x\n",
912 x, y, w, h, flags);
914 if (flags & RADEON_FRONT) {
915 BEGIN_RING(6);
917 OUT_RING(CP_PACKET3
918 (RADEON_CNTL_PAINT_MULTI, 4));
919 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
920 RADEON_GMC_BRUSH_SOLID_COLOR |
921 (dev_priv->
922 color_fmt << 8) |
923 RADEON_GMC_SRC_DATATYPE_COLOR |
924 RADEON_ROP3_P |
925 RADEON_GMC_CLR_CMP_CNTL_DIS);
927 OUT_RING(dev_priv->front_pitch_offset);
928 OUT_RING(clear->clear_color);
930 OUT_RING((x << 16) | y);
931 OUT_RING((w << 16) | h);
933 ADVANCE_RING();
936 if (flags & RADEON_BACK) {
937 BEGIN_RING(6);
939 OUT_RING(CP_PACKET3
940 (RADEON_CNTL_PAINT_MULTI, 4));
941 OUT_RING(RADEON_GMC_DST_PITCH_OFFSET_CNTL |
942 RADEON_GMC_BRUSH_SOLID_COLOR |
943 (dev_priv->
944 color_fmt << 8) |
945 RADEON_GMC_SRC_DATATYPE_COLOR |
946 RADEON_ROP3_P |
947 RADEON_GMC_CLR_CMP_CNTL_DIS);
949 OUT_RING(dev_priv->back_pitch_offset);
950 OUT_RING(clear->clear_color);
952 OUT_RING((x << 16) | y);
953 OUT_RING((w << 16) | h);
955 ADVANCE_RING();
960 /* hyper z clear */
961 /* no docs available, based on reverse engeneering by Stephane Marchesin */
962 if ((flags & (RADEON_DEPTH | RADEON_STENCIL))
963 && (flags & RADEON_CLEAR_FASTZ)) {
965 int i;
966 int depthpixperline =
967 dev_priv->depth_fmt ==
968 RADEON_DEPTH_FORMAT_16BIT_INT_Z ? (dev_priv->depth_pitch /
969 2) : (dev_priv->
970 depth_pitch / 4);
972 u32 clearmask;
974 u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
975 ((clear->depth_mask & 0xff) << 24);
977 /* Make sure we restore the 3D state next time.
978 * we haven't touched any "normal" state - still need this?
980 sarea_priv->ctx_owner = 0;
982 if ((dev_priv->flags & RADEON_HAS_HIERZ)
983 && (flags & RADEON_USE_HIERZ)) {
984 /* FIXME : reverse engineer that for Rx00 cards */
985 /* FIXME : the mask supposedly contains low-res z values. So can't set
986 just to the max (0xff? or actually 0x3fff?), need to take z clear
987 value into account? */
988 /* pattern seems to work for r100, though get slight
989 rendering errors with glxgears. If hierz is not enabled for r100,
990 only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
991 other ones are ignored, and the same clear mask can be used. That's
992 very different behaviour than R200 which needs different clear mask
993 and different number of tiles to clear if hierz is enabled or not !?!
995 clearmask = (0xff << 22) | (0xff << 6) | 0x003f003f;
996 } else {
997 /* clear mask : chooses the clearing pattern.
998 rv250: could be used to clear only parts of macrotiles
999 (but that would get really complicated...)?
1000 bit 0 and 1 (either or both of them ?!?!) are used to
1001 not clear tile (or maybe one of the bits indicates if the tile is
1002 compressed or not), bit 2 and 3 to not clear tile 1,...,.
1003 Pattern is as follows:
1004 | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
1005 bits -------------------------------------------------
1006 | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
1007 rv100: clearmask covers 2x8 4x1 tiles, but one clear still
1008 covers 256 pixels ?!?
1010 clearmask = 0x0;
1013 BEGIN_RING(8);
1014 RADEON_WAIT_UNTIL_2D_IDLE();
1015 OUT_RING_REG(RADEON_RB3D_DEPTHCLEARVALUE,
1016 tempRB3D_DEPTHCLEARVALUE);
1017 /* what offset is this exactly ? */
1018 OUT_RING_REG(RADEON_RB3D_ZMASKOFFSET, 0);
1019 /* need ctlstat, otherwise get some strange black flickering */
1020 OUT_RING_REG(RADEON_RB3D_ZCACHE_CTLSTAT,
1021 RADEON_RB3D_ZC_FLUSH_ALL);
1022 ADVANCE_RING();
1024 for (i = 0; i < nbox; i++) {
1025 int tileoffset, nrtilesx, nrtilesy, j;
1026 /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
1027 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1028 && !(dev_priv->microcode_version == UCODE_R200)) {
1029 /* FIXME : figure this out for r200 (when hierz is enabled). Or
1030 maybe r200 actually doesn't need to put the low-res z value into
1031 the tile cache like r100, but just needs to clear the hi-level z-buffer?
1032 Works for R100, both with hierz and without.
1033 R100 seems to operate on 2x1 8x8 tiles, but...
1034 odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
1035 problematic with resolutions which are not 64 pix aligned? */
1036 tileoffset =
1037 ((pbox[i].y1 >> 3) * depthpixperline +
1038 pbox[i].x1) >> 6;
1039 nrtilesx =
1040 ((pbox[i].x2 & ~63) -
1041 (pbox[i].x1 & ~63)) >> 4;
1042 nrtilesy =
1043 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1044 for (j = 0; j <= nrtilesy; j++) {
1045 BEGIN_RING(4);
1046 OUT_RING(CP_PACKET3
1047 (RADEON_3D_CLEAR_ZMASK, 2));
1048 /* first tile */
1049 OUT_RING(tileoffset * 8);
1050 /* the number of tiles to clear */
1051 OUT_RING(nrtilesx + 4);
1052 /* clear mask : chooses the clearing pattern. */
1053 OUT_RING(clearmask);
1054 ADVANCE_RING();
1055 tileoffset += depthpixperline >> 6;
1057 } else if (dev_priv->microcode_version == UCODE_R200) {
1058 /* works for rv250. */
1059 /* find first macro tile (8x2 4x4 z-pixels on rv250) */
1060 tileoffset =
1061 ((pbox[i].y1 >> 3) * depthpixperline +
1062 pbox[i].x1) >> 5;
1063 nrtilesx =
1064 (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
1065 nrtilesy =
1066 (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
1067 for (j = 0; j <= nrtilesy; j++) {
1068 BEGIN_RING(4);
1069 OUT_RING(CP_PACKET3
1070 (RADEON_3D_CLEAR_ZMASK, 2));
1071 /* first tile */
1072 /* judging by the first tile offset needed, could possibly
1073 directly address/clear 4x4 tiles instead of 8x2 * 4x4
1074 macro tiles, though would still need clear mask for
1075 right/bottom if truely 4x4 granularity is desired ? */
1076 OUT_RING(tileoffset * 16);
1077 /* the number of tiles to clear */
1078 OUT_RING(nrtilesx + 1);
1079 /* clear mask : chooses the clearing pattern. */
1080 OUT_RING(clearmask);
1081 ADVANCE_RING();
1082 tileoffset += depthpixperline >> 5;
1084 } else { /* rv 100 */
1085 /* rv100 might not need 64 pix alignment, who knows */
1086 /* offsets are, hmm, weird */
1087 tileoffset =
1088 ((pbox[i].y1 >> 4) * depthpixperline +
1089 pbox[i].x1) >> 6;
1090 nrtilesx =
1091 ((pbox[i].x2 & ~63) -
1092 (pbox[i].x1 & ~63)) >> 4;
1093 nrtilesy =
1094 (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
1095 for (j = 0; j <= nrtilesy; j++) {
1096 BEGIN_RING(4);
1097 OUT_RING(CP_PACKET3
1098 (RADEON_3D_CLEAR_ZMASK, 2));
1099 OUT_RING(tileoffset * 128);
1100 /* the number of tiles to clear */
1101 OUT_RING(nrtilesx + 4);
1102 /* clear mask : chooses the clearing pattern. */
1103 OUT_RING(clearmask);
1104 ADVANCE_RING();
1105 tileoffset += depthpixperline >> 6;
1110 /* TODO don't always clear all hi-level z tiles */
1111 if ((dev_priv->flags & RADEON_HAS_HIERZ)
1112 && (dev_priv->microcode_version == UCODE_R200)
1113 && (flags & RADEON_USE_HIERZ))
1114 /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
1115 /* FIXME : the mask supposedly contains low-res z values. So can't set
1116 just to the max (0xff? or actually 0x3fff?), need to take z clear
1117 value into account? */
1119 BEGIN_RING(4);
1120 OUT_RING(CP_PACKET3(RADEON_3D_CLEAR_HIZ, 2));
1121 OUT_RING(0x0); /* First tile */
1122 OUT_RING(0x3cc0);
1123 OUT_RING((0xff << 22) | (0xff << 6) | 0x003f003f);
1124 ADVANCE_RING();
1128 /* We have to clear the depth and/or stencil buffers by
1129 * rendering a quad into just those buffers. Thus, we have to
1130 * make sure the 3D engine is configured correctly.
1132 else if ((dev_priv->microcode_version == UCODE_R200) &&
1133 (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1135 int tempPP_CNTL;
1136 int tempRE_CNTL;
1137 int tempRB3D_CNTL;
1138 int tempRB3D_ZSTENCILCNTL;
1139 int tempRB3D_STENCILREFMASK;
1140 int tempRB3D_PLANEMASK;
1141 int tempSE_CNTL;
1142 int tempSE_VTE_CNTL;
1143 int tempSE_VTX_FMT_0;
1144 int tempSE_VTX_FMT_1;
1145 int tempSE_VAP_CNTL;
1146 int tempRE_AUX_SCISSOR_CNTL;
1148 tempPP_CNTL = 0;
1149 tempRE_CNTL = 0;
1151 tempRB3D_CNTL = depth_clear->rb3d_cntl;
1153 tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1154 tempRB3D_STENCILREFMASK = 0x0;
1156 tempSE_CNTL = depth_clear->se_cntl;
1158 /* Disable TCL */
1160 tempSE_VAP_CNTL = ( /* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
1161 (0x9 <<
1162 SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
1164 tempRB3D_PLANEMASK = 0x0;
1166 tempRE_AUX_SCISSOR_CNTL = 0x0;
1168 tempSE_VTE_CNTL =
1169 SE_VTE_CNTL__VTX_XY_FMT_MASK | SE_VTE_CNTL__VTX_Z_FMT_MASK;
1171 /* Vertex format (X, Y, Z, W) */
1172 tempSE_VTX_FMT_0 =
1173 SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
1174 SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
1175 tempSE_VTX_FMT_1 = 0x0;
1178 * Depth buffer specific enables
1180 if (flags & RADEON_DEPTH) {
1181 /* Enable depth buffer */
1182 tempRB3D_CNTL |= RADEON_Z_ENABLE;
1183 } else {
1184 /* Disable depth buffer */
1185 tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
1189 * Stencil buffer specific enables
1191 if (flags & RADEON_STENCIL) {
1192 tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
1193 tempRB3D_STENCILREFMASK = clear->depth_mask;
1194 } else {
1195 tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
1196 tempRB3D_STENCILREFMASK = 0x00000000;
1199 if (flags & RADEON_USE_COMP_ZBUF) {
1200 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1201 RADEON_Z_DECOMPRESSION_ENABLE;
1203 if (flags & RADEON_USE_HIERZ) {
1204 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1207 BEGIN_RING(26);
1208 RADEON_WAIT_UNTIL_2D_IDLE();
1210 OUT_RING_REG(RADEON_PP_CNTL, tempPP_CNTL);
1211 OUT_RING_REG(R200_RE_CNTL, tempRE_CNTL);
1212 OUT_RING_REG(RADEON_RB3D_CNTL, tempRB3D_CNTL);
1213 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1214 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK,
1215 tempRB3D_STENCILREFMASK);
1216 OUT_RING_REG(RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK);
1217 OUT_RING_REG(RADEON_SE_CNTL, tempSE_CNTL);
1218 OUT_RING_REG(R200_SE_VTE_CNTL, tempSE_VTE_CNTL);
1219 OUT_RING_REG(R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0);
1220 OUT_RING_REG(R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1);
1221 OUT_RING_REG(R200_SE_VAP_CNTL, tempSE_VAP_CNTL);
1222 OUT_RING_REG(R200_RE_AUX_SCISSOR_CNTL, tempRE_AUX_SCISSOR_CNTL);
1223 ADVANCE_RING();
1225 /* Make sure we restore the 3D state next time.
1227 sarea_priv->ctx_owner = 0;
1229 for (i = 0; i < nbox; i++) {
1231 /* Funny that this should be required --
1232 * sets top-left?
1234 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1236 BEGIN_RING(14);
1237 OUT_RING(CP_PACKET3(R200_3D_DRAW_IMMD_2, 12));
1238 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1239 RADEON_PRIM_WALK_RING |
1240 (3 << RADEON_NUM_VERTICES_SHIFT)));
1241 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1242 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1243 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1244 OUT_RING(0x3f800000);
1245 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1246 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1247 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1248 OUT_RING(0x3f800000);
1249 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1250 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1251 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1252 OUT_RING(0x3f800000);
1253 ADVANCE_RING();
1255 } else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
1257 int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
1259 rb3d_cntl = depth_clear->rb3d_cntl;
1261 if (flags & RADEON_DEPTH) {
1262 rb3d_cntl |= RADEON_Z_ENABLE;
1263 } else {
1264 rb3d_cntl &= ~RADEON_Z_ENABLE;
1267 if (flags & RADEON_STENCIL) {
1268 rb3d_cntl |= RADEON_STENCIL_ENABLE;
1269 rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
1270 } else {
1271 rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
1272 rb3d_stencilrefmask = 0x00000000;
1275 if (flags & RADEON_USE_COMP_ZBUF) {
1276 tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
1277 RADEON_Z_DECOMPRESSION_ENABLE;
1279 if (flags & RADEON_USE_HIERZ) {
1280 tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
1283 BEGIN_RING(13);
1284 RADEON_WAIT_UNTIL_2D_IDLE();
1286 OUT_RING(CP_PACKET0(RADEON_PP_CNTL, 1));
1287 OUT_RING(0x00000000);
1288 OUT_RING(rb3d_cntl);
1290 OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
1291 OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
1292 OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
1293 OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);
1294 ADVANCE_RING();
1296 /* Make sure we restore the 3D state next time.
1298 sarea_priv->ctx_owner = 0;
1300 for (i = 0; i < nbox; i++) {
1302 /* Funny that this should be required --
1303 * sets top-left?
1305 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1307 BEGIN_RING(15);
1309 OUT_RING(CP_PACKET3(RADEON_3D_DRAW_IMMD, 13));
1310 OUT_RING(RADEON_VTX_Z_PRESENT |
1311 RADEON_VTX_PKCOLOR_PRESENT);
1312 OUT_RING((RADEON_PRIM_TYPE_RECT_LIST |
1313 RADEON_PRIM_WALK_RING |
1314 RADEON_MAOS_ENABLE |
1315 RADEON_VTX_FMT_RADEON_MODE |
1316 (3 << RADEON_NUM_VERTICES_SHIFT)));
1318 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1319 OUT_RING(depth_boxes[i].ui[CLEAR_Y1]);
1320 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1321 OUT_RING(0x0);
1323 OUT_RING(depth_boxes[i].ui[CLEAR_X1]);
1324 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1325 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1326 OUT_RING(0x0);
1328 OUT_RING(depth_boxes[i].ui[CLEAR_X2]);
1329 OUT_RING(depth_boxes[i].ui[CLEAR_Y2]);
1330 OUT_RING(depth_boxes[i].ui[CLEAR_DEPTH]);
1331 OUT_RING(0x0);
1333 ADVANCE_RING();
1337 /* Increment the clear counter. The client-side 3D driver must
1338 * wait on this value before performing the clear ioctl. We
1339 * need this because the card's so damned fast...
1341 sarea_priv->last_clear++;
1343 BEGIN_RING(4);
1345 RADEON_CLEAR_AGE(sarea_priv->last_clear);
1346 RADEON_WAIT_UNTIL_IDLE();
1348 ADVANCE_RING();
1351 static void radeon_cp_dispatch_swap(struct drm_device *dev, struct drm_master *master)
1353 drm_radeon_private_t *dev_priv = dev->dev_private;
1354 struct drm_radeon_master_private *master_priv = master->driver_priv;
1355 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1356 int nbox = sarea_priv->nbox;
1357 struct drm_clip_rect *pbox = sarea_priv->boxes;
1358 int i;
1359 RING_LOCALS;
1360 DRM_DEBUG("\n");
1362 /* Do some trivial performance monitoring...
1364 if (dev_priv->do_boxes)
1365 radeon_cp_performance_boxes(dev_priv, master_priv);
1367 /* Wait for the 3D stream to idle before dispatching the bitblt.
1368 * This will prevent data corruption between the two streams.
1370 BEGIN_RING(2);
1372 RADEON_WAIT_UNTIL_3D_IDLE();
1374 ADVANCE_RING();
1376 for (i = 0; i < nbox; i++) {
1377 int x = pbox[i].x1;
1378 int y = pbox[i].y1;
1379 int w = pbox[i].x2 - x;
1380 int h = pbox[i].y2 - y;
1382 DRM_DEBUG("%d,%d-%d,%d\n", x, y, w, h);
1384 BEGIN_RING(9);
1386 OUT_RING(CP_PACKET0(RADEON_DP_GUI_MASTER_CNTL, 0));
1387 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1388 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1389 RADEON_GMC_BRUSH_NONE |
1390 (dev_priv->color_fmt << 8) |
1391 RADEON_GMC_SRC_DATATYPE_COLOR |
1392 RADEON_ROP3_S |
1393 RADEON_DP_SRC_SOURCE_MEMORY |
1394 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1396 /* Make this work even if front & back are flipped:
1398 OUT_RING(CP_PACKET0(RADEON_SRC_PITCH_OFFSET, 1));
1399 if (sarea_priv->pfCurrentPage == 0) {
1400 OUT_RING(dev_priv->back_pitch_offset);
1401 OUT_RING(dev_priv->front_pitch_offset);
1402 } else {
1403 OUT_RING(dev_priv->front_pitch_offset);
1404 OUT_RING(dev_priv->back_pitch_offset);
1407 OUT_RING(CP_PACKET0(RADEON_SRC_X_Y, 2));
1408 OUT_RING((x << 16) | y);
1409 OUT_RING((x << 16) | y);
1410 OUT_RING((w << 16) | h);
1412 ADVANCE_RING();
1415 /* Increment the frame counter. The client-side 3D driver must
1416 * throttle the framerate by waiting for this value before
1417 * performing the swapbuffer ioctl.
1419 sarea_priv->last_frame++;
1421 BEGIN_RING(4);
1423 RADEON_FRAME_AGE(sarea_priv->last_frame);
1424 RADEON_WAIT_UNTIL_2D_IDLE();
1426 ADVANCE_RING();
1429 void radeon_cp_dispatch_flip(struct drm_device *dev, struct drm_master *master)
1431 drm_radeon_private_t *dev_priv = dev->dev_private;
1432 struct drm_radeon_master_private *master_priv = master->driver_priv;
1433 struct drm_sarea *sarea = (struct drm_sarea *)master_priv->sarea->handle;
1434 int offset = (master_priv->sarea_priv->pfCurrentPage == 1)
1435 ? dev_priv->front_offset : dev_priv->back_offset;
1436 RING_LOCALS;
1437 DRM_DEBUG("pfCurrentPage=%d\n",
1438 master_priv->sarea_priv->pfCurrentPage);
1440 /* Do some trivial performance monitoring...
1442 if (dev_priv->do_boxes) {
1443 dev_priv->stats.boxes |= RADEON_BOX_FLIP;
1444 radeon_cp_performance_boxes(dev_priv, master_priv);
1447 /* Update the frame offsets for both CRTCs
1449 BEGIN_RING(6);
1451 RADEON_WAIT_UNTIL_3D_IDLE();
1452 OUT_RING_REG(RADEON_CRTC_OFFSET,
1453 ((sarea->frame.y * dev_priv->front_pitch +
1454 sarea->frame.x * (dev_priv->color_fmt - 2)) & ~7)
1455 + offset);
1456 OUT_RING_REG(RADEON_CRTC2_OFFSET, master_priv->sarea_priv->crtc2_base
1457 + offset);
1459 ADVANCE_RING();
1461 /* Increment the frame counter. The client-side 3D driver must
1462 * throttle the framerate by waiting for this value before
1463 * performing the swapbuffer ioctl.
1465 master_priv->sarea_priv->last_frame++;
1466 master_priv->sarea_priv->pfCurrentPage =
1467 1 - master_priv->sarea_priv->pfCurrentPage;
1469 BEGIN_RING(2);
1471 RADEON_FRAME_AGE(master_priv->sarea_priv->last_frame);
1473 ADVANCE_RING();
1476 static int bad_prim_vertex_nr(int primitive, int nr)
1478 switch (primitive & RADEON_PRIM_TYPE_MASK) {
1479 case RADEON_PRIM_TYPE_NONE:
1480 case RADEON_PRIM_TYPE_POINT:
1481 return nr < 1;
1482 case RADEON_PRIM_TYPE_LINE:
1483 return (nr & 1) || nr == 0;
1484 case RADEON_PRIM_TYPE_LINE_STRIP:
1485 return nr < 2;
1486 case RADEON_PRIM_TYPE_TRI_LIST:
1487 case RADEON_PRIM_TYPE_3VRT_POINT_LIST:
1488 case RADEON_PRIM_TYPE_3VRT_LINE_LIST:
1489 case RADEON_PRIM_TYPE_RECT_LIST:
1490 return nr % 3 || nr == 0;
1491 case RADEON_PRIM_TYPE_TRI_FAN:
1492 case RADEON_PRIM_TYPE_TRI_STRIP:
1493 return nr < 3;
1494 default:
1495 return 1;
1499 typedef struct {
1500 unsigned int start;
1501 unsigned int finish;
1502 unsigned int prim;
1503 unsigned int numverts;
1504 unsigned int offset;
1505 unsigned int vc_format;
1506 } drm_radeon_tcl_prim_t;
1508 static void radeon_cp_dispatch_vertex(struct drm_device * dev,
1509 struct drm_file *file_priv,
1510 struct drm_buf * buf,
1511 drm_radeon_tcl_prim_t * prim)
1513 drm_radeon_private_t *dev_priv = dev->dev_private;
1514 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
1515 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1516 int offset = dev_priv->gart_buffers_offset + buf->offset + prim->start;
1517 int numverts = (int)prim->numverts;
1518 int nbox = sarea_priv->nbox;
1519 int i = 0;
1520 RING_LOCALS;
1522 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d %d verts\n",
1523 prim->prim,
1524 prim->vc_format, prim->start, prim->finish, prim->numverts);
1526 if (bad_prim_vertex_nr(prim->prim, prim->numverts)) {
1527 DRM_ERROR("bad prim %x numverts %d\n",
1528 prim->prim, prim->numverts);
1529 return;
1532 do {
1533 /* Emit the next cliprect */
1534 if (i < nbox) {
1535 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1538 /* Emit the vertex buffer rendering commands */
1539 BEGIN_RING(5);
1541 OUT_RING(CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, 3));
1542 OUT_RING(offset);
1543 OUT_RING(numverts);
1544 OUT_RING(prim->vc_format);
1545 OUT_RING(prim->prim | RADEON_PRIM_WALK_LIST |
1546 RADEON_COLOR_ORDER_RGBA |
1547 RADEON_VTX_FMT_RADEON_MODE |
1548 (numverts << RADEON_NUM_VERTICES_SHIFT));
1550 ADVANCE_RING();
1552 i++;
1553 } while (i < nbox);
1556 void radeon_cp_discard_buffer(struct drm_device *dev, struct drm_master *master, struct drm_buf *buf)
1558 drm_radeon_private_t *dev_priv = dev->dev_private;
1559 struct drm_radeon_master_private *master_priv = master->driver_priv;
1560 drm_radeon_buf_priv_t *buf_priv = buf->dev_private;
1561 RING_LOCALS;
1563 buf_priv->age = ++master_priv->sarea_priv->last_dispatch;
1565 /* Emit the vertex buffer age */
1566 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600) {
1567 BEGIN_RING(3);
1568 R600_DISPATCH_AGE(buf_priv->age);
1569 ADVANCE_RING();
1570 } else {
1571 BEGIN_RING(2);
1572 RADEON_DISPATCH_AGE(buf_priv->age);
1573 ADVANCE_RING();
1576 buf->pending = 1;
1577 buf->used = 0;
1580 static void radeon_cp_dispatch_indirect(struct drm_device * dev,
1581 struct drm_buf * buf, int start, int end)
1583 drm_radeon_private_t *dev_priv = dev->dev_private;
1584 RING_LOCALS;
1585 DRM_DEBUG("buf=%d s=0x%x e=0x%x\n", buf->idx, start, end);
1587 if (start != end) {
1588 int offset = (dev_priv->gart_buffers_offset
1589 + buf->offset + start);
1590 int dwords = (end - start + 3) / sizeof(u32);
1592 /* Indirect buffer data must be an even number of
1593 * dwords, so if we've been given an odd number we must
1594 * pad the data with a Type-2 CP packet.
1596 if (dwords & 1) {
1597 u32 *data = (u32 *)
1598 ((char *)dev->agp_buffer_map->handle
1599 + buf->offset + start);
1600 data[dwords++] = RADEON_CP_PACKET2;
1603 /* Fire off the indirect buffer */
1604 BEGIN_RING(3);
1606 OUT_RING(CP_PACKET0(RADEON_CP_IB_BASE, 1));
1607 OUT_RING(offset);
1608 OUT_RING(dwords);
1610 ADVANCE_RING();
1614 static void radeon_cp_dispatch_indices(struct drm_device *dev,
1615 struct drm_master *master,
1616 struct drm_buf * elt_buf,
1617 drm_radeon_tcl_prim_t * prim)
1619 drm_radeon_private_t *dev_priv = dev->dev_private;
1620 struct drm_radeon_master_private *master_priv = master->driver_priv;
1621 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
1622 int offset = dev_priv->gart_buffers_offset + prim->offset;
1623 u32 *data;
1624 int dwords;
1625 int i = 0;
1626 int start = prim->start + RADEON_INDEX_PRIM_OFFSET;
1627 int count = (prim->finish - start) / sizeof(u16);
1628 int nbox = sarea_priv->nbox;
1630 DRM_DEBUG("hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n",
1631 prim->prim,
1632 prim->vc_format,
1633 prim->start, prim->finish, prim->offset, prim->numverts);
1635 if (bad_prim_vertex_nr(prim->prim, count)) {
1636 DRM_ERROR("bad prim %x count %d\n", prim->prim, count);
1637 return;
1640 if (start >= prim->finish || (prim->start & 0x7)) {
1641 DRM_ERROR("buffer prim %d\n", prim->prim);
1642 return;
1645 dwords = (prim->finish - prim->start + 3) / sizeof(u32);
1647 data = (u32 *) ((char *)dev->agp_buffer_map->handle +
1648 elt_buf->offset + prim->start);
1650 data[0] = CP_PACKET3(RADEON_3D_RNDR_GEN_INDX_PRIM, dwords - 2);
1651 data[1] = offset;
1652 data[2] = prim->numverts;
1653 data[3] = prim->vc_format;
1654 data[4] = (prim->prim |
1655 RADEON_PRIM_WALK_IND |
1656 RADEON_COLOR_ORDER_RGBA |
1657 RADEON_VTX_FMT_RADEON_MODE |
1658 (count << RADEON_NUM_VERTICES_SHIFT));
1660 do {
1661 if (i < nbox)
1662 radeon_emit_clip_rect(dev_priv, &sarea_priv->boxes[i]);
1664 radeon_cp_dispatch_indirect(dev, elt_buf,
1665 prim->start, prim->finish);
1667 i++;
1668 } while (i < nbox);
1672 #define RADEON_MAX_TEXTURE_SIZE RADEON_BUFFER_SIZE
1674 static int radeon_cp_dispatch_texture(struct drm_device * dev,
1675 struct drm_file *file_priv,
1676 drm_radeon_texture_t * tex,
1677 drm_radeon_tex_image_t * image)
1679 drm_radeon_private_t *dev_priv = dev->dev_private;
1680 struct drm_buf *buf;
1681 u32 format;
1682 u32 *buffer;
1683 const u8 __user *data;
1684 int size, dwords, tex_width, blit_width, spitch;
1685 u32 height;
1686 int i;
1687 u32 texpitch, microtile;
1688 u32 offset, byte_offset;
1689 RING_LOCALS;
1691 if (radeon_check_and_fixup_offset(dev_priv, file_priv, &tex->offset)) {
1692 DRM_ERROR("Invalid destination offset\n");
1693 return -EINVAL;
1696 dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
1698 /* Flush the pixel cache. This ensures no pixel data gets mixed
1699 * up with the texture data from the host data blit, otherwise
1700 * part of the texture image may be corrupted.
1702 BEGIN_RING(4);
1703 RADEON_FLUSH_CACHE();
1704 RADEON_WAIT_UNTIL_IDLE();
1705 ADVANCE_RING();
1707 /* The compiler won't optimize away a division by a variable,
1708 * even if the only legal values are powers of two. Thus, we'll
1709 * use a shift instead.
1711 switch (tex->format) {
1712 case RADEON_TXFORMAT_ARGB8888:
1713 case RADEON_TXFORMAT_RGBA8888:
1714 format = RADEON_COLOR_FORMAT_ARGB8888;
1715 tex_width = tex->width * 4;
1716 blit_width = image->width * 4;
1717 break;
1718 case RADEON_TXFORMAT_AI88:
1719 case RADEON_TXFORMAT_ARGB1555:
1720 case RADEON_TXFORMAT_RGB565:
1721 case RADEON_TXFORMAT_ARGB4444:
1722 case RADEON_TXFORMAT_VYUY422:
1723 case RADEON_TXFORMAT_YVYU422:
1724 format = RADEON_COLOR_FORMAT_RGB565;
1725 tex_width = tex->width * 2;
1726 blit_width = image->width * 2;
1727 break;
1728 case RADEON_TXFORMAT_I8:
1729 case RADEON_TXFORMAT_RGB332:
1730 format = RADEON_COLOR_FORMAT_CI8;
1731 tex_width = tex->width * 1;
1732 blit_width = image->width * 1;
1733 break;
1734 default:
1735 DRM_ERROR("invalid texture format %d\n", tex->format);
1736 return -EINVAL;
1738 spitch = blit_width >> 6;
1739 if (spitch == 0 && image->height > 1)
1740 return -EINVAL;
1742 texpitch = tex->pitch;
1743 if ((texpitch << 22) & RADEON_DST_TILE_MICRO) {
1744 microtile = 1;
1745 if (tex_width < 64) {
1746 texpitch &= ~(RADEON_DST_TILE_MICRO >> 22);
1747 /* we got tiled coordinates, untile them */
1748 image->x *= 2;
1750 } else
1751 microtile = 0;
1753 /* this might fail for zero-sized uploads - are those illegal? */
1754 if (!radeon_check_offset(dev_priv, tex->offset + image->height *
1755 blit_width - 1)) {
1756 DRM_ERROR("Invalid final destination offset\n");
1757 return -EINVAL;
1760 DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width);
1762 do {
1763 DRM_DEBUG("tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
1764 tex->offset >> 10, tex->pitch, tex->format,
1765 image->x, image->y, image->width, image->height);
1767 /* Make a copy of some parameters in case we have to
1768 * update them for a multi-pass texture blit.
1770 height = image->height;
1771 data = (const u8 __user *)image->data;
1773 size = height * blit_width;
1775 if (size > RADEON_MAX_TEXTURE_SIZE) {
1776 height = RADEON_MAX_TEXTURE_SIZE / blit_width;
1777 size = height * blit_width;
1778 } else if (size < 4 && size > 0) {
1779 size = 4;
1780 } else if (size == 0) {
1781 return 0;
1784 buf = radeon_freelist_get(dev);
1785 if (0 && !buf) {
1786 radeon_do_cp_idle(dev_priv);
1787 buf = radeon_freelist_get(dev);
1789 if (!buf) {
1790 DRM_DEBUG("EAGAIN\n");
1791 if (DRM_COPY_TO_USER(tex->image, image, sizeof(*image)))
1792 return -EFAULT;
1793 return -EAGAIN;
1796 /* Dispatch the indirect buffer.
1798 buffer =
1799 (u32 *) ((char *)dev->agp_buffer_map->handle + buf->offset);
1800 dwords = size / 4;
1802 #define RADEON_COPY_MT(_buf, _data, _width) \
1803 do { \
1804 if (DRM_COPY_FROM_USER(_buf, _data, (_width))) {\
1805 DRM_ERROR("EFAULT on pad, %d bytes\n", (_width)); \
1806 return -EFAULT; \
1808 } while(0)
1810 if (microtile) {
1811 /* texture micro tiling in use, minimum texture width is thus 16 bytes.
1812 however, we cannot use blitter directly for texture width < 64 bytes,
1813 since minimum tex pitch is 64 bytes and we need this to match
1814 the texture width, otherwise the blitter will tile it wrong.
1815 Thus, tiling manually in this case. Additionally, need to special
1816 case tex height = 1, since our actual image will have height 2
1817 and we need to ensure we don't read beyond the texture size
1818 from user space. */
1819 if (tex->height == 1) {
1820 if (tex_width >= 64 || tex_width <= 16) {
1821 RADEON_COPY_MT(buffer, data,
1822 (int)(tex_width * sizeof(u32)));
1823 } else if (tex_width == 32) {
1824 RADEON_COPY_MT(buffer, data, 16);
1825 RADEON_COPY_MT(buffer + 8,
1826 data + 16, 16);
1828 } else if (tex_width >= 64 || tex_width == 16) {
1829 RADEON_COPY_MT(buffer, data,
1830 (int)(dwords * sizeof(u32)));
1831 } else if (tex_width < 16) {
1832 for (i = 0; i < tex->height; i++) {
1833 RADEON_COPY_MT(buffer, data, tex_width);
1834 buffer += 4;
1835 data += tex_width;
1837 } else if (tex_width == 32) {
1838 /* TODO: make sure this works when not fitting in one buffer
1839 (i.e. 32bytes x 2048...) */
1840 for (i = 0; i < tex->height; i += 2) {
1841 RADEON_COPY_MT(buffer, data, 16);
1842 data += 16;
1843 RADEON_COPY_MT(buffer + 8, data, 16);
1844 data += 16;
1845 RADEON_COPY_MT(buffer + 4, data, 16);
1846 data += 16;
1847 RADEON_COPY_MT(buffer + 12, data, 16);
1848 data += 16;
1849 buffer += 16;
1852 } else {
1853 if (tex_width >= 32) {
1854 /* Texture image width is larger than the minimum, so we
1855 * can upload it directly.
1857 RADEON_COPY_MT(buffer, data,
1858 (int)(dwords * sizeof(u32)));
1859 } else {
1860 /* Texture image width is less than the minimum, so we
1861 * need to pad out each image scanline to the minimum
1862 * width.
1864 for (i = 0; i < tex->height; i++) {
1865 RADEON_COPY_MT(buffer, data, tex_width);
1866 buffer += 8;
1867 data += tex_width;
1872 #undef RADEON_COPY_MT
1873 byte_offset = (image->y & ~2047) * blit_width;
1874 buf->file_priv = file_priv;
1875 buf->used = size;
1876 offset = dev_priv->gart_buffers_offset + buf->offset;
1877 BEGIN_RING(9);
1878 OUT_RING(CP_PACKET3(RADEON_CNTL_BITBLT_MULTI, 5));
1879 OUT_RING(RADEON_GMC_SRC_PITCH_OFFSET_CNTL |
1880 RADEON_GMC_DST_PITCH_OFFSET_CNTL |
1881 RADEON_GMC_BRUSH_NONE |
1882 (format << 8) |
1883 RADEON_GMC_SRC_DATATYPE_COLOR |
1884 RADEON_ROP3_S |
1885 RADEON_DP_SRC_SOURCE_MEMORY |
1886 RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS);
1887 OUT_RING((spitch << 22) | (offset >> 10));
1888 OUT_RING((texpitch << 22) | ((tex->offset >> 10) + (byte_offset >> 10)));
1889 OUT_RING(0);
1890 OUT_RING((image->x << 16) | (image->y % 2048));
1891 OUT_RING((image->width << 16) | height);
1892 RADEON_WAIT_UNTIL_2D_IDLE();
1893 ADVANCE_RING();
1894 COMMIT_RING();
1896 radeon_cp_discard_buffer(dev, file_priv->master, buf);
1898 /* Update the input parameters for next time */
1899 image->y += height;
1900 image->height -= height;
1901 image->data = (const u8 __user *)image->data + size;
1902 } while (image->height > 0);
1904 /* Flush the pixel cache after the blit completes. This ensures
1905 * the texture data is written out to memory before rendering
1906 * continues.
1908 BEGIN_RING(4);
1909 RADEON_FLUSH_CACHE();
1910 RADEON_WAIT_UNTIL_2D_IDLE();
1911 ADVANCE_RING();
1912 COMMIT_RING();
1914 return 0;
1917 static void radeon_cp_dispatch_stipple(struct drm_device * dev, u32 * stipple)
1919 drm_radeon_private_t *dev_priv = dev->dev_private;
1920 int i;
1921 RING_LOCALS;
1922 DRM_DEBUG("\n");
1924 BEGIN_RING(35);
1926 OUT_RING(CP_PACKET0(RADEON_RE_STIPPLE_ADDR, 0));
1927 OUT_RING(0x00000000);
1929 OUT_RING(CP_PACKET0_TABLE(RADEON_RE_STIPPLE_DATA, 31));
1930 for (i = 0; i < 32; i++) {
1931 OUT_RING(stipple[i]);
1934 ADVANCE_RING();
1937 static void radeon_apply_surface_regs(int surf_index,
1938 drm_radeon_private_t *dev_priv)
1940 if (!dev_priv->mmio)
1941 return;
1943 radeon_do_cp_idle(dev_priv);
1945 RADEON_WRITE(RADEON_SURFACE0_INFO + 16 * surf_index,
1946 dev_priv->surfaces[surf_index].flags);
1947 RADEON_WRITE(RADEON_SURFACE0_LOWER_BOUND + 16 * surf_index,
1948 dev_priv->surfaces[surf_index].lower);
1949 RADEON_WRITE(RADEON_SURFACE0_UPPER_BOUND + 16 * surf_index,
1950 dev_priv->surfaces[surf_index].upper);
1953 /* Allocates a virtual surface
1954 * doesn't always allocate a real surface, will stretch an existing
1955 * surface when possible.
1957 * Note that refcount can be at most 2, since during a free refcount=3
1958 * might mean we have to allocate a new surface which might not always
1959 * be available.
1960 * For example : we allocate three contigous surfaces ABC. If B is
1961 * freed, we suddenly need two surfaces to store A and C, which might
1962 * not always be available.
1964 static int alloc_surface(drm_radeon_surface_alloc_t *new,
1965 drm_radeon_private_t *dev_priv,
1966 struct drm_file *file_priv)
1968 struct radeon_virt_surface *s;
1969 int i;
1970 int virt_surface_index;
1971 uint32_t new_upper, new_lower;
1973 new_lower = new->address;
1974 new_upper = new_lower + new->size - 1;
1976 /* sanity check */
1977 if ((new_lower >= new_upper) || (new->flags == 0) || (new->size == 0) ||
1978 ((new_upper & RADEON_SURF_ADDRESS_FIXED_MASK) !=
1979 RADEON_SURF_ADDRESS_FIXED_MASK)
1980 || ((new_lower & RADEON_SURF_ADDRESS_FIXED_MASK) != 0))
1981 return -1;
1983 /* make sure there is no overlap with existing surfaces */
1984 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
1985 if ((dev_priv->surfaces[i].refcount != 0) &&
1986 (((new_lower >= dev_priv->surfaces[i].lower) &&
1987 (new_lower < dev_priv->surfaces[i].upper)) ||
1988 ((new_lower < dev_priv->surfaces[i].lower) &&
1989 (new_upper > dev_priv->surfaces[i].lower)))) {
1990 return -1;
1994 /* find a virtual surface */
1995 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++)
1996 if (dev_priv->virt_surfaces[i].file_priv == NULL)
1997 break;
1998 if (i == 2 * RADEON_MAX_SURFACES) {
1999 return -1;
2001 virt_surface_index = i;
2003 /* try to reuse an existing surface */
2004 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2005 /* extend before */
2006 if ((dev_priv->surfaces[i].refcount == 1) &&
2007 (new->flags == dev_priv->surfaces[i].flags) &&
2008 (new_upper + 1 == dev_priv->surfaces[i].lower)) {
2009 s = &(dev_priv->virt_surfaces[virt_surface_index]);
2010 s->surface_index = i;
2011 s->lower = new_lower;
2012 s->upper = new_upper;
2013 s->flags = new->flags;
2014 s->file_priv = file_priv;
2015 dev_priv->surfaces[i].refcount++;
2016 dev_priv->surfaces[i].lower = s->lower;
2017 radeon_apply_surface_regs(s->surface_index, dev_priv);
2018 return virt_surface_index;
2021 /* extend after */
2022 if ((dev_priv->surfaces[i].refcount == 1) &&
2023 (new->flags == dev_priv->surfaces[i].flags) &&
2024 (new_lower == dev_priv->surfaces[i].upper + 1)) {
2025 s = &(dev_priv->virt_surfaces[virt_surface_index]);
2026 s->surface_index = i;
2027 s->lower = new_lower;
2028 s->upper = new_upper;
2029 s->flags = new->flags;
2030 s->file_priv = file_priv;
2031 dev_priv->surfaces[i].refcount++;
2032 dev_priv->surfaces[i].upper = s->upper;
2033 radeon_apply_surface_regs(s->surface_index, dev_priv);
2034 return virt_surface_index;
2038 /* okay, we need a new one */
2039 for (i = 0; i < RADEON_MAX_SURFACES; i++) {
2040 if (dev_priv->surfaces[i].refcount == 0) {
2041 s = &(dev_priv->virt_surfaces[virt_surface_index]);
2042 s->surface_index = i;
2043 s->lower = new_lower;
2044 s->upper = new_upper;
2045 s->flags = new->flags;
2046 s->file_priv = file_priv;
2047 dev_priv->surfaces[i].refcount = 1;
2048 dev_priv->surfaces[i].lower = s->lower;
2049 dev_priv->surfaces[i].upper = s->upper;
2050 dev_priv->surfaces[i].flags = s->flags;
2051 radeon_apply_surface_regs(s->surface_index, dev_priv);
2052 return virt_surface_index;
2056 /* we didn't find anything */
2057 return -1;
2060 static int free_surface(struct drm_file *file_priv,
2061 drm_radeon_private_t * dev_priv,
2062 int lower)
2064 struct radeon_virt_surface *s;
2065 int i;
2066 /* find the virtual surface */
2067 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2068 s = &(dev_priv->virt_surfaces[i]);
2069 if (s->file_priv) {
2070 if ((lower == s->lower) && (file_priv == s->file_priv))
2072 if (dev_priv->surfaces[s->surface_index].
2073 lower == s->lower)
2074 dev_priv->surfaces[s->surface_index].
2075 lower = s->upper;
2077 if (dev_priv->surfaces[s->surface_index].
2078 upper == s->upper)
2079 dev_priv->surfaces[s->surface_index].
2080 upper = s->lower;
2082 dev_priv->surfaces[s->surface_index].refcount--;
2083 if (dev_priv->surfaces[s->surface_index].
2084 refcount == 0)
2085 dev_priv->surfaces[s->surface_index].
2086 flags = 0;
2087 s->file_priv = NULL;
2088 radeon_apply_surface_regs(s->surface_index,
2089 dev_priv);
2090 return 0;
2094 return 1;
2097 static void radeon_surfaces_release(struct drm_file *file_priv,
2098 drm_radeon_private_t * dev_priv)
2100 int i;
2101 for (i = 0; i < 2 * RADEON_MAX_SURFACES; i++) {
2102 if (dev_priv->virt_surfaces[i].file_priv == file_priv)
2103 free_surface(file_priv, dev_priv,
2104 dev_priv->virt_surfaces[i].lower);
2108 /* ================================================================
2109 * IOCTL functions
2111 static int radeon_surface_alloc(struct drm_device *dev, void *data, struct drm_file *file_priv)
2113 drm_radeon_private_t *dev_priv = dev->dev_private;
2114 drm_radeon_surface_alloc_t *alloc = data;
2116 if (alloc_surface(alloc, dev_priv, file_priv) == -1)
2117 return -EINVAL;
2118 else
2119 return 0;
2122 static int radeon_surface_free(struct drm_device *dev, void *data, struct drm_file *file_priv)
2124 drm_radeon_private_t *dev_priv = dev->dev_private;
2125 drm_radeon_surface_free_t *memfree = data;
2127 if (free_surface(file_priv, dev_priv, memfree->address))
2128 return -EINVAL;
2129 else
2130 return 0;
2133 static int radeon_cp_clear(struct drm_device *dev, void *data, struct drm_file *file_priv)
2135 drm_radeon_private_t *dev_priv = dev->dev_private;
2136 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2137 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2138 drm_radeon_clear_t *clear = data;
2139 drm_radeon_clear_rect_t depth_boxes[RADEON_NR_SAREA_CLIPRECTS];
2140 DRM_DEBUG("\n");
2142 LOCK_TEST_WITH_RETURN(dev, file_priv);
2144 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2146 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2147 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2149 if (DRM_COPY_FROM_USER(&depth_boxes, clear->depth_boxes,
2150 sarea_priv->nbox * sizeof(depth_boxes[0])))
2151 return -EFAULT;
2153 radeon_cp_dispatch_clear(dev, file_priv->master, clear, depth_boxes);
2155 COMMIT_RING();
2156 return 0;
2159 /* Not sure why this isn't set all the time:
2161 static int radeon_do_init_pageflip(struct drm_device *dev, struct drm_master *master)
2163 drm_radeon_private_t *dev_priv = dev->dev_private;
2164 struct drm_radeon_master_private *master_priv = master->driver_priv;
2165 RING_LOCALS;
2167 DRM_DEBUG("\n");
2169 BEGIN_RING(6);
2170 RADEON_WAIT_UNTIL_3D_IDLE();
2171 OUT_RING(CP_PACKET0(RADEON_CRTC_OFFSET_CNTL, 0));
2172 OUT_RING(RADEON_READ(RADEON_CRTC_OFFSET_CNTL) |
2173 RADEON_CRTC_OFFSET_FLIP_CNTL);
2174 OUT_RING(CP_PACKET0(RADEON_CRTC2_OFFSET_CNTL, 0));
2175 OUT_RING(RADEON_READ(RADEON_CRTC2_OFFSET_CNTL) |
2176 RADEON_CRTC_OFFSET_FLIP_CNTL);
2177 ADVANCE_RING();
2179 dev_priv->page_flipping = 1;
2181 if (master_priv->sarea_priv->pfCurrentPage != 1)
2182 master_priv->sarea_priv->pfCurrentPage = 0;
2184 return 0;
2187 /* Swapping and flipping are different operations, need different ioctls.
2188 * They can & should be intermixed to support multiple 3d windows.
2190 static int radeon_cp_flip(struct drm_device *dev, void *data, struct drm_file *file_priv)
2192 drm_radeon_private_t *dev_priv = dev->dev_private;
2193 DRM_DEBUG("\n");
2195 LOCK_TEST_WITH_RETURN(dev, file_priv);
2197 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2199 if (!dev_priv->page_flipping)
2200 radeon_do_init_pageflip(dev, file_priv->master);
2202 radeon_cp_dispatch_flip(dev, file_priv->master);
2204 COMMIT_RING();
2205 return 0;
2208 static int radeon_cp_swap(struct drm_device *dev, void *data, struct drm_file *file_priv)
2210 drm_radeon_private_t *dev_priv = dev->dev_private;
2211 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2212 drm_radeon_sarea_t *sarea_priv = master_priv->sarea_priv;
2214 DRM_DEBUG("\n");
2216 LOCK_TEST_WITH_RETURN(dev, file_priv);
2218 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2220 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2221 sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS;
2223 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2224 r600_cp_dispatch_swap(dev, file_priv);
2225 else
2226 radeon_cp_dispatch_swap(dev, file_priv->master);
2227 sarea_priv->ctx_owner = 0;
2229 COMMIT_RING();
2230 return 0;
2233 static int radeon_cp_vertex(struct drm_device *dev, void *data, struct drm_file *file_priv)
2235 drm_radeon_private_t *dev_priv = dev->dev_private;
2236 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2237 drm_radeon_sarea_t *sarea_priv;
2238 struct drm_device_dma *dma = dev->dma;
2239 struct drm_buf *buf;
2240 drm_radeon_vertex_t *vertex = data;
2241 drm_radeon_tcl_prim_t prim;
2243 LOCK_TEST_WITH_RETURN(dev, file_priv);
2245 sarea_priv = master_priv->sarea_priv;
2247 DRM_DEBUG("pid=%d index=%d count=%d discard=%d\n",
2248 DRM_CURRENTPID, vertex->idx, vertex->count, vertex->discard);
2250 if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2251 DRM_ERROR("buffer index %d (of %d max)\n",
2252 vertex->idx, dma->buf_count - 1);
2253 return -EINVAL;
2255 if (vertex->prim < 0 || vertex->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2256 DRM_ERROR("buffer prim %d\n", vertex->prim);
2257 return -EINVAL;
2260 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2261 VB_AGE_TEST_WITH_RETURN(dev_priv);
2263 buf = dma->buflist[vertex->idx];
2265 if (buf->file_priv != file_priv) {
2266 DRM_ERROR("process %d using buffer owned by %p\n",
2267 DRM_CURRENTPID, buf->file_priv);
2268 return -EINVAL;
2270 if (buf->pending) {
2271 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2272 return -EINVAL;
2275 /* Build up a prim_t record:
2277 if (vertex->count) {
2278 buf->used = vertex->count; /* not used? */
2280 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2281 if (radeon_emit_state(dev_priv, file_priv,
2282 &sarea_priv->context_state,
2283 sarea_priv->tex_state,
2284 sarea_priv->dirty)) {
2285 DRM_ERROR("radeon_emit_state failed\n");
2286 return -EINVAL;
2289 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2290 RADEON_UPLOAD_TEX1IMAGES |
2291 RADEON_UPLOAD_TEX2IMAGES |
2292 RADEON_REQUIRE_QUIESCENCE);
2295 prim.start = 0;
2296 prim.finish = vertex->count; /* unused */
2297 prim.prim = vertex->prim;
2298 prim.numverts = vertex->count;
2299 prim.vc_format = sarea_priv->vc_format;
2301 radeon_cp_dispatch_vertex(dev, file_priv, buf, &prim);
2304 if (vertex->discard) {
2305 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2308 COMMIT_RING();
2309 return 0;
2312 static int radeon_cp_indices(struct drm_device *dev, void *data, struct drm_file *file_priv)
2314 drm_radeon_private_t *dev_priv = dev->dev_private;
2315 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2316 drm_radeon_sarea_t *sarea_priv;
2317 struct drm_device_dma *dma = dev->dma;
2318 struct drm_buf *buf;
2319 drm_radeon_indices_t *elts = data;
2320 drm_radeon_tcl_prim_t prim;
2321 int count;
2323 LOCK_TEST_WITH_RETURN(dev, file_priv);
2325 sarea_priv = master_priv->sarea_priv;
2327 DRM_DEBUG("pid=%d index=%d start=%d end=%d discard=%d\n",
2328 DRM_CURRENTPID, elts->idx, elts->start, elts->end,
2329 elts->discard);
2331 if (elts->idx < 0 || elts->idx >= dma->buf_count) {
2332 DRM_ERROR("buffer index %d (of %d max)\n",
2333 elts->idx, dma->buf_count - 1);
2334 return -EINVAL;
2336 if (elts->prim < 0 || elts->prim > RADEON_PRIM_TYPE_3VRT_LINE_LIST) {
2337 DRM_ERROR("buffer prim %d\n", elts->prim);
2338 return -EINVAL;
2341 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2342 VB_AGE_TEST_WITH_RETURN(dev_priv);
2344 buf = dma->buflist[elts->idx];
2346 if (buf->file_priv != file_priv) {
2347 DRM_ERROR("process %d using buffer owned by %p\n",
2348 DRM_CURRENTPID, buf->file_priv);
2349 return -EINVAL;
2351 if (buf->pending) {
2352 DRM_ERROR("sending pending buffer %d\n", elts->idx);
2353 return -EINVAL;
2356 count = (elts->end - elts->start) / sizeof(u16);
2357 elts->start -= RADEON_INDEX_PRIM_OFFSET;
2359 if (elts->start & 0x7) {
2360 DRM_ERROR("misaligned buffer 0x%x\n", elts->start);
2361 return -EINVAL;
2363 if (elts->start < buf->used) {
2364 DRM_ERROR("no header 0x%x - 0x%x\n", elts->start, buf->used);
2365 return -EINVAL;
2368 buf->used = elts->end;
2370 if (sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS) {
2371 if (radeon_emit_state(dev_priv, file_priv,
2372 &sarea_priv->context_state,
2373 sarea_priv->tex_state,
2374 sarea_priv->dirty)) {
2375 DRM_ERROR("radeon_emit_state failed\n");
2376 return -EINVAL;
2379 sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES |
2380 RADEON_UPLOAD_TEX1IMAGES |
2381 RADEON_UPLOAD_TEX2IMAGES |
2382 RADEON_REQUIRE_QUIESCENCE);
2385 /* Build up a prim_t record:
2387 prim.start = elts->start;
2388 prim.finish = elts->end;
2389 prim.prim = elts->prim;
2390 prim.offset = 0; /* offset from start of dma buffers */
2391 prim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2392 prim.vc_format = sarea_priv->vc_format;
2394 radeon_cp_dispatch_indices(dev, file_priv->master, buf, &prim);
2395 if (elts->discard) {
2396 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2399 COMMIT_RING();
2400 return 0;
2403 static int radeon_cp_texture(struct drm_device *dev, void *data, struct drm_file *file_priv)
2405 drm_radeon_private_t *dev_priv = dev->dev_private;
2406 drm_radeon_texture_t *tex = data;
2407 drm_radeon_tex_image_t image;
2408 int ret;
2410 LOCK_TEST_WITH_RETURN(dev, file_priv);
2412 if (tex->image == NULL) {
2413 DRM_ERROR("null texture image!\n");
2414 return -EINVAL;
2417 if (DRM_COPY_FROM_USER(&image,
2418 (drm_radeon_tex_image_t __user *) tex->image,
2419 sizeof(image)))
2420 return -EFAULT;
2422 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2423 VB_AGE_TEST_WITH_RETURN(dev_priv);
2425 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2426 ret = r600_cp_dispatch_texture(dev, file_priv, tex, &image);
2427 else
2428 ret = radeon_cp_dispatch_texture(dev, file_priv, tex, &image);
2430 return ret;
2433 static int radeon_cp_stipple(struct drm_device *dev, void *data, struct drm_file *file_priv)
2435 drm_radeon_private_t *dev_priv = dev->dev_private;
2436 drm_radeon_stipple_t *stipple = data;
2437 u32 mask[32];
2439 LOCK_TEST_WITH_RETURN(dev, file_priv);
2441 if (DRM_COPY_FROM_USER(&mask, stipple->mask, 32 * sizeof(u32)))
2442 return -EFAULT;
2444 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2446 radeon_cp_dispatch_stipple(dev, mask);
2448 COMMIT_RING();
2449 return 0;
2452 static int radeon_cp_indirect(struct drm_device *dev, void *data, struct drm_file *file_priv)
2454 drm_radeon_private_t *dev_priv = dev->dev_private;
2455 struct drm_device_dma *dma = dev->dma;
2456 struct drm_buf *buf;
2457 drm_radeon_indirect_t *indirect = data;
2458 RING_LOCALS;
2460 LOCK_TEST_WITH_RETURN(dev, file_priv);
2462 DRM_DEBUG("idx=%d s=%d e=%d d=%d\n",
2463 indirect->idx, indirect->start, indirect->end,
2464 indirect->discard);
2466 if (indirect->idx < 0 || indirect->idx >= dma->buf_count) {
2467 DRM_ERROR("buffer index %d (of %d max)\n",
2468 indirect->idx, dma->buf_count - 1);
2469 return -EINVAL;
2472 buf = dma->buflist[indirect->idx];
2474 if (buf->file_priv != file_priv) {
2475 DRM_ERROR("process %d using buffer owned by %p\n",
2476 DRM_CURRENTPID, buf->file_priv);
2477 return -EINVAL;
2479 if (buf->pending) {
2480 DRM_ERROR("sending pending buffer %d\n", indirect->idx);
2481 return -EINVAL;
2484 if (indirect->start < buf->used) {
2485 DRM_ERROR("reusing indirect: start=0x%x actual=0x%x\n",
2486 indirect->start, buf->used);
2487 return -EINVAL;
2490 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2491 VB_AGE_TEST_WITH_RETURN(dev_priv);
2493 buf->used = indirect->end;
2495 /* Dispatch the indirect buffer full of commands from the
2496 * X server. This is insecure and is thus only available to
2497 * privileged clients.
2499 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
2500 r600_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2501 else {
2502 /* Wait for the 3D stream to idle before the indirect buffer
2503 * containing 2D acceleration commands is processed.
2505 BEGIN_RING(2);
2506 RADEON_WAIT_UNTIL_3D_IDLE();
2507 ADVANCE_RING();
2508 radeon_cp_dispatch_indirect(dev, buf, indirect->start, indirect->end);
2511 if (indirect->discard) {
2512 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2515 COMMIT_RING();
2516 return 0;
2519 static int radeon_cp_vertex2(struct drm_device *dev, void *data, struct drm_file *file_priv)
2521 drm_radeon_private_t *dev_priv = dev->dev_private;
2522 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
2523 drm_radeon_sarea_t *sarea_priv;
2524 struct drm_device_dma *dma = dev->dma;
2525 struct drm_buf *buf;
2526 drm_radeon_vertex2_t *vertex = data;
2527 int i;
2528 unsigned char laststate;
2530 LOCK_TEST_WITH_RETURN(dev, file_priv);
2532 sarea_priv = master_priv->sarea_priv;
2534 DRM_DEBUG("pid=%d index=%d discard=%d\n",
2535 DRM_CURRENTPID, vertex->idx, vertex->discard);
2537 if (vertex->idx < 0 || vertex->idx >= dma->buf_count) {
2538 DRM_ERROR("buffer index %d (of %d max)\n",
2539 vertex->idx, dma->buf_count - 1);
2540 return -EINVAL;
2543 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2544 VB_AGE_TEST_WITH_RETURN(dev_priv);
2546 buf = dma->buflist[vertex->idx];
2548 if (buf->file_priv != file_priv) {
2549 DRM_ERROR("process %d using buffer owned by %p\n",
2550 DRM_CURRENTPID, buf->file_priv);
2551 return -EINVAL;
2554 if (buf->pending) {
2555 DRM_ERROR("sending pending buffer %d\n", vertex->idx);
2556 return -EINVAL;
2559 if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS)
2560 return -EINVAL;
2562 for (laststate = 0xff, i = 0; i < vertex->nr_prims; i++) {
2563 drm_radeon_prim_t prim;
2564 drm_radeon_tcl_prim_t tclprim;
2566 if (DRM_COPY_FROM_USER(&prim, &vertex->prim[i], sizeof(prim)))
2567 return -EFAULT;
2569 if (prim.stateidx != laststate) {
2570 drm_radeon_state_t state;
2572 if (DRM_COPY_FROM_USER(&state,
2573 &vertex->state[prim.stateidx],
2574 sizeof(state)))
2575 return -EFAULT;
2577 if (radeon_emit_state2(dev_priv, file_priv, &state)) {
2578 DRM_ERROR("radeon_emit_state2 failed\n");
2579 return -EINVAL;
2582 laststate = prim.stateidx;
2585 tclprim.start = prim.start;
2586 tclprim.finish = prim.finish;
2587 tclprim.prim = prim.prim;
2588 tclprim.vc_format = prim.vc_format;
2590 if (prim.prim & RADEON_PRIM_WALK_IND) {
2591 tclprim.offset = prim.numverts * 64;
2592 tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */
2594 radeon_cp_dispatch_indices(dev, file_priv->master, buf, &tclprim);
2595 } else {
2596 tclprim.numverts = prim.numverts;
2597 tclprim.offset = 0; /* not used */
2599 radeon_cp_dispatch_vertex(dev, file_priv, buf, &tclprim);
2602 if (sarea_priv->nbox == 1)
2603 sarea_priv->nbox = 0;
2606 if (vertex->discard) {
2607 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2610 COMMIT_RING();
2611 return 0;
2614 static int radeon_emit_packets(drm_radeon_private_t * dev_priv,
2615 struct drm_file *file_priv,
2616 drm_radeon_cmd_header_t header,
2617 drm_radeon_kcmd_buffer_t *cmdbuf)
2619 int id = (int)header.packet.packet_id;
2620 int sz, reg;
2621 int *data = (int *)cmdbuf->buf;
2622 RING_LOCALS;
2624 if (id >= RADEON_MAX_STATE_PACKETS)
2625 return -EINVAL;
2627 sz = packet[id].len;
2628 reg = packet[id].start;
2630 if (sz * sizeof(int) > cmdbuf->bufsz) {
2631 DRM_ERROR("Packet size provided larger than data provided\n");
2632 return -EINVAL;
2635 if (radeon_check_and_fixup_packets(dev_priv, file_priv, id, data)) {
2636 DRM_ERROR("Packet verification failed\n");
2637 return -EINVAL;
2640 BEGIN_RING(sz + 1);
2641 OUT_RING(CP_PACKET0(reg, (sz - 1)));
2642 OUT_RING_TABLE(data, sz);
2643 ADVANCE_RING();
2645 cmdbuf->buf += sz * sizeof(int);
2646 cmdbuf->bufsz -= sz * sizeof(int);
2647 return 0;
2650 static __inline__ int radeon_emit_scalars(drm_radeon_private_t *dev_priv,
2651 drm_radeon_cmd_header_t header,
2652 drm_radeon_kcmd_buffer_t *cmdbuf)
2654 int sz = header.scalars.count;
2655 int start = header.scalars.offset;
2656 int stride = header.scalars.stride;
2657 RING_LOCALS;
2659 BEGIN_RING(3 + sz);
2660 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2661 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2662 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2663 OUT_RING_TABLE(cmdbuf->buf, sz);
2664 ADVANCE_RING();
2665 cmdbuf->buf += sz * sizeof(int);
2666 cmdbuf->bufsz -= sz * sizeof(int);
2667 return 0;
2670 /* God this is ugly
2672 static __inline__ int radeon_emit_scalars2(drm_radeon_private_t *dev_priv,
2673 drm_radeon_cmd_header_t header,
2674 drm_radeon_kcmd_buffer_t *cmdbuf)
2676 int sz = header.scalars.count;
2677 int start = ((unsigned int)header.scalars.offset) + 0x100;
2678 int stride = header.scalars.stride;
2679 RING_LOCALS;
2681 BEGIN_RING(3 + sz);
2682 OUT_RING(CP_PACKET0(RADEON_SE_TCL_SCALAR_INDX_REG, 0));
2683 OUT_RING(start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
2684 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_SCALAR_DATA_REG, sz - 1));
2685 OUT_RING_TABLE(cmdbuf->buf, sz);
2686 ADVANCE_RING();
2687 cmdbuf->buf += sz * sizeof(int);
2688 cmdbuf->bufsz -= sz * sizeof(int);
2689 return 0;
2692 static __inline__ int radeon_emit_vectors(drm_radeon_private_t *dev_priv,
2693 drm_radeon_cmd_header_t header,
2694 drm_radeon_kcmd_buffer_t *cmdbuf)
2696 int sz = header.vectors.count;
2697 int start = header.vectors.offset;
2698 int stride = header.vectors.stride;
2699 RING_LOCALS;
2701 BEGIN_RING(5 + sz);
2702 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2703 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2704 OUT_RING(start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2705 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2706 OUT_RING_TABLE(cmdbuf->buf, sz);
2707 ADVANCE_RING();
2709 cmdbuf->buf += sz * sizeof(int);
2710 cmdbuf->bufsz -= sz * sizeof(int);
2711 return 0;
2714 static __inline__ int radeon_emit_veclinear(drm_radeon_private_t *dev_priv,
2715 drm_radeon_cmd_header_t header,
2716 drm_radeon_kcmd_buffer_t *cmdbuf)
2718 int sz = header.veclinear.count * 4;
2719 int start = header.veclinear.addr_lo | (header.veclinear.addr_hi << 8);
2720 RING_LOCALS;
2722 if (!sz)
2723 return 0;
2724 if (sz * 4 > cmdbuf->bufsz)
2725 return -EINVAL;
2727 BEGIN_RING(5 + sz);
2728 OUT_RING_REG(RADEON_SE_TCL_STATE_FLUSH, 0);
2729 OUT_RING(CP_PACKET0(RADEON_SE_TCL_VECTOR_INDX_REG, 0));
2730 OUT_RING(start | (1 << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT));
2731 OUT_RING(CP_PACKET0_TABLE(RADEON_SE_TCL_VECTOR_DATA_REG, (sz - 1)));
2732 OUT_RING_TABLE(cmdbuf->buf, sz);
2733 ADVANCE_RING();
2735 cmdbuf->buf += sz * sizeof(int);
2736 cmdbuf->bufsz -= sz * sizeof(int);
2737 return 0;
2740 static int radeon_emit_packet3(struct drm_device * dev,
2741 struct drm_file *file_priv,
2742 drm_radeon_kcmd_buffer_t *cmdbuf)
2744 drm_radeon_private_t *dev_priv = dev->dev_private;
2745 unsigned int cmdsz;
2746 int ret;
2747 RING_LOCALS;
2749 DRM_DEBUG("\n");
2751 if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2752 cmdbuf, &cmdsz))) {
2753 DRM_ERROR("Packet verification failed\n");
2754 return ret;
2757 BEGIN_RING(cmdsz);
2758 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2759 ADVANCE_RING();
2761 cmdbuf->buf += cmdsz * 4;
2762 cmdbuf->bufsz -= cmdsz * 4;
2763 return 0;
2766 static int radeon_emit_packet3_cliprect(struct drm_device *dev,
2767 struct drm_file *file_priv,
2768 drm_radeon_kcmd_buffer_t *cmdbuf,
2769 int orig_nbox)
2771 drm_radeon_private_t *dev_priv = dev->dev_private;
2772 struct drm_clip_rect box;
2773 unsigned int cmdsz;
2774 int ret;
2775 struct drm_clip_rect __user *boxes = cmdbuf->boxes;
2776 int i = 0;
2777 RING_LOCALS;
2779 DRM_DEBUG("\n");
2781 if ((ret = radeon_check_and_fixup_packet3(dev_priv, file_priv,
2782 cmdbuf, &cmdsz))) {
2783 DRM_ERROR("Packet verification failed\n");
2784 return ret;
2787 if (!orig_nbox)
2788 goto out;
2790 do {
2791 if (i < cmdbuf->nbox) {
2792 if (DRM_COPY_FROM_USER(&box, &boxes[i], sizeof(box)))
2793 return -EFAULT;
2794 /* FIXME The second and subsequent times round
2795 * this loop, send a WAIT_UNTIL_3D_IDLE before
2796 * calling emit_clip_rect(). This fixes a
2797 * lockup on fast machines when sending
2798 * several cliprects with a cmdbuf, as when
2799 * waving a 2D window over a 3D
2800 * window. Something in the commands from user
2801 * space seems to hang the card when they're
2802 * sent several times in a row. That would be
2803 * the correct place to fix it but this works
2804 * around it until I can figure that out - Tim
2805 * Smith */
2806 if (i) {
2807 BEGIN_RING(2);
2808 RADEON_WAIT_UNTIL_3D_IDLE();
2809 ADVANCE_RING();
2811 radeon_emit_clip_rect(dev_priv, &box);
2814 BEGIN_RING(cmdsz);
2815 OUT_RING_TABLE(cmdbuf->buf, cmdsz);
2816 ADVANCE_RING();
2818 } while (++i < cmdbuf->nbox);
2819 if (cmdbuf->nbox == 1)
2820 cmdbuf->nbox = 0;
2822 out:
2823 cmdbuf->buf += cmdsz * 4;
2824 cmdbuf->bufsz -= cmdsz * 4;
2825 return 0;
2828 static int radeon_emit_wait(struct drm_device * dev, int flags)
2830 drm_radeon_private_t *dev_priv = dev->dev_private;
2831 RING_LOCALS;
2833 DRM_DEBUG("%x\n", flags);
2834 switch (flags) {
2835 case RADEON_WAIT_2D:
2836 BEGIN_RING(2);
2837 RADEON_WAIT_UNTIL_2D_IDLE();
2838 ADVANCE_RING();
2839 break;
2840 case RADEON_WAIT_3D:
2841 BEGIN_RING(2);
2842 RADEON_WAIT_UNTIL_3D_IDLE();
2843 ADVANCE_RING();
2844 break;
2845 case RADEON_WAIT_2D | RADEON_WAIT_3D:
2846 BEGIN_RING(2);
2847 RADEON_WAIT_UNTIL_IDLE();
2848 ADVANCE_RING();
2849 break;
2850 default:
2851 return -EINVAL;
2854 return 0;
2857 static int radeon_cp_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
2859 drm_radeon_private_t *dev_priv = dev->dev_private;
2860 struct drm_device_dma *dma = dev->dma;
2861 struct drm_buf *buf = NULL;
2862 int idx;
2863 drm_radeon_kcmd_buffer_t *cmdbuf = data;
2864 drm_radeon_cmd_header_t header;
2865 int orig_nbox, orig_bufsz;
2866 char *kbuf = NULL;
2868 LOCK_TEST_WITH_RETURN(dev, file_priv);
2870 RING_SPACE_TEST_WITH_RETURN(dev_priv);
2871 VB_AGE_TEST_WITH_RETURN(dev_priv);
2873 if (cmdbuf->bufsz > 64 * 1024 || cmdbuf->bufsz < 0) {
2874 return -EINVAL;
2877 /* Allocate an in-kernel area and copy in the cmdbuf. Do this to avoid
2878 * races between checking values and using those values in other code,
2879 * and simply to avoid a lot of function calls to copy in data.
2881 orig_bufsz = cmdbuf->bufsz;
2882 if (orig_bufsz != 0) {
2883 kbuf = kmalloc(cmdbuf->bufsz, GFP_KERNEL);
2884 if (kbuf == NULL)
2885 return -ENOMEM;
2886 if (DRM_COPY_FROM_USER(kbuf, (void __user *)cmdbuf->buf,
2887 cmdbuf->bufsz)) {
2888 kfree(kbuf);
2889 return -EFAULT;
2891 cmdbuf->buf = kbuf;
2894 orig_nbox = cmdbuf->nbox;
2896 if (dev_priv->microcode_version == UCODE_R300) {
2897 int temp;
2898 temp = r300_do_cp_cmdbuf(dev, file_priv, cmdbuf);
2900 if (orig_bufsz != 0)
2901 kfree(kbuf);
2903 return temp;
2906 /* microcode_version != r300 */
2907 while (cmdbuf->bufsz >= sizeof(header)) {
2909 header.i = *(int *)cmdbuf->buf;
2910 cmdbuf->buf += sizeof(header);
2911 cmdbuf->bufsz -= sizeof(header);
2913 switch (header.header.cmd_type) {
2914 case RADEON_CMD_PACKET:
2915 DRM_DEBUG("RADEON_CMD_PACKET\n");
2916 if (radeon_emit_packets
2917 (dev_priv, file_priv, header, cmdbuf)) {
2918 DRM_ERROR("radeon_emit_packets failed\n");
2919 goto err;
2921 break;
2923 case RADEON_CMD_SCALARS:
2924 DRM_DEBUG("RADEON_CMD_SCALARS\n");
2925 if (radeon_emit_scalars(dev_priv, header, cmdbuf)) {
2926 DRM_ERROR("radeon_emit_scalars failed\n");
2927 goto err;
2929 break;
2931 case RADEON_CMD_VECTORS:
2932 DRM_DEBUG("RADEON_CMD_VECTORS\n");
2933 if (radeon_emit_vectors(dev_priv, header, cmdbuf)) {
2934 DRM_ERROR("radeon_emit_vectors failed\n");
2935 goto err;
2937 break;
2939 case RADEON_CMD_DMA_DISCARD:
2940 DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
2941 idx = header.dma.buf_idx;
2942 if (idx < 0 || idx >= dma->buf_count) {
2943 DRM_ERROR("buffer index %d (of %d max)\n",
2944 idx, dma->buf_count - 1);
2945 goto err;
2948 buf = dma->buflist[idx];
2949 if (buf->file_priv != file_priv || buf->pending) {
2950 DRM_ERROR("bad buffer %p %p %d\n",
2951 buf->file_priv, file_priv,
2952 buf->pending);
2953 goto err;
2956 radeon_cp_discard_buffer(dev, file_priv->master, buf);
2957 break;
2959 case RADEON_CMD_PACKET3:
2960 DRM_DEBUG("RADEON_CMD_PACKET3\n");
2961 if (radeon_emit_packet3(dev, file_priv, cmdbuf)) {
2962 DRM_ERROR("radeon_emit_packet3 failed\n");
2963 goto err;
2965 break;
2967 case RADEON_CMD_PACKET3_CLIP:
2968 DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
2969 if (radeon_emit_packet3_cliprect
2970 (dev, file_priv, cmdbuf, orig_nbox)) {
2971 DRM_ERROR("radeon_emit_packet3_clip failed\n");
2972 goto err;
2974 break;
2976 case RADEON_CMD_SCALARS2:
2977 DRM_DEBUG("RADEON_CMD_SCALARS2\n");
2978 if (radeon_emit_scalars2(dev_priv, header, cmdbuf)) {
2979 DRM_ERROR("radeon_emit_scalars2 failed\n");
2980 goto err;
2982 break;
2984 case RADEON_CMD_WAIT:
2985 DRM_DEBUG("RADEON_CMD_WAIT\n");
2986 if (radeon_emit_wait(dev, header.wait.flags)) {
2987 DRM_ERROR("radeon_emit_wait failed\n");
2988 goto err;
2990 break;
2991 case RADEON_CMD_VECLINEAR:
2992 DRM_DEBUG("RADEON_CMD_VECLINEAR\n");
2993 if (radeon_emit_veclinear(dev_priv, header, cmdbuf)) {
2994 DRM_ERROR("radeon_emit_veclinear failed\n");
2995 goto err;
2997 break;
2999 default:
3000 DRM_ERROR("bad cmd_type %d at %p\n",
3001 header.header.cmd_type,
3002 cmdbuf->buf - sizeof(header));
3003 goto err;
3007 if (orig_bufsz != 0)
3008 kfree(kbuf);
3010 DRM_DEBUG("DONE\n");
3011 COMMIT_RING();
3012 return 0;
3014 err:
3015 if (orig_bufsz != 0)
3016 kfree(kbuf);
3017 return -EINVAL;
3020 static int radeon_cp_getparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3022 drm_radeon_private_t *dev_priv = dev->dev_private;
3023 drm_radeon_getparam_t *param = data;
3024 int value;
3026 DRM_DEBUG("pid=%d\n", DRM_CURRENTPID);
3028 switch (param->param) {
3029 case RADEON_PARAM_GART_BUFFER_OFFSET:
3030 value = dev_priv->gart_buffers_offset;
3031 break;
3032 case RADEON_PARAM_LAST_FRAME:
3033 dev_priv->stats.last_frame_reads++;
3034 value = GET_SCRATCH(dev_priv, 0);
3035 break;
3036 case RADEON_PARAM_LAST_DISPATCH:
3037 value = GET_SCRATCH(dev_priv, 1);
3038 break;
3039 case RADEON_PARAM_LAST_CLEAR:
3040 dev_priv->stats.last_clear_reads++;
3041 value = GET_SCRATCH(dev_priv, 2);
3042 break;
3043 case RADEON_PARAM_IRQ_NR:
3044 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3045 value = 0;
3046 else
3047 value = drm_dev_to_irq(dev);
3048 break;
3049 case RADEON_PARAM_GART_BASE:
3050 value = dev_priv->gart_vm_start;
3051 break;
3052 case RADEON_PARAM_REGISTER_HANDLE:
3053 value = dev_priv->mmio->offset;
3054 break;
3055 case RADEON_PARAM_STATUS_HANDLE:
3056 value = dev_priv->ring_rptr_offset;
3057 break;
3058 #if BITS_PER_LONG == 32
3060 * This ioctl() doesn't work on 64-bit platforms because hw_lock is a
3061 * pointer which can't fit into an int-sized variable. According to
3062 * Michel Dänzer, the ioctl() is only used on embedded platforms, so
3063 * not supporting it shouldn't be a problem. If the same functionality
3064 * is needed on 64-bit platforms, a new ioctl() would have to be added,
3065 * so backwards-compatibility for the embedded platforms can be
3066 * maintained. --davidm 4-Feb-2004.
3068 case RADEON_PARAM_SAREA_HANDLE:
3069 /* The lock is the first dword in the sarea. */
3070 /* no users of this parameter */
3071 break;
3072 #endif
3073 case RADEON_PARAM_GART_TEX_HANDLE:
3074 value = dev_priv->gart_textures_offset;
3075 break;
3076 case RADEON_PARAM_SCRATCH_OFFSET:
3077 if (!dev_priv->writeback_works)
3078 return -EINVAL;
3079 if ((dev_priv->flags & RADEON_FAMILY_MASK) >= CHIP_R600)
3080 value = R600_SCRATCH_REG_OFFSET;
3081 else
3082 value = RADEON_SCRATCH_REG_OFFSET;
3083 break;
3084 case RADEON_PARAM_CARD_TYPE:
3085 if (dev_priv->flags & RADEON_IS_PCIE)
3086 value = RADEON_CARD_PCIE;
3087 else if (dev_priv->flags & RADEON_IS_AGP)
3088 value = RADEON_CARD_AGP;
3089 else
3090 value = RADEON_CARD_PCI;
3091 break;
3092 case RADEON_PARAM_VBLANK_CRTC:
3093 value = radeon_vblank_crtc_get(dev);
3094 break;
3095 case RADEON_PARAM_FB_LOCATION:
3096 value = radeon_read_fb_location(dev_priv);
3097 break;
3098 case RADEON_PARAM_NUM_GB_PIPES:
3099 value = dev_priv->num_gb_pipes;
3100 break;
3101 case RADEON_PARAM_NUM_Z_PIPES:
3102 value = dev_priv->num_z_pipes;
3103 break;
3104 default:
3105 DRM_DEBUG("Invalid parameter %d\n", param->param);
3106 return -EINVAL;
3109 if (DRM_COPY_TO_USER(param->value, &value, sizeof(int))) {
3110 DRM_ERROR("copy_to_user\n");
3111 return -EFAULT;
3114 return 0;
3117 static int radeon_cp_setparam(struct drm_device *dev, void *data, struct drm_file *file_priv)
3119 drm_radeon_private_t *dev_priv = dev->dev_private;
3120 struct drm_radeon_master_private *master_priv = file_priv->master->driver_priv;
3121 drm_radeon_setparam_t *sp = data;
3122 struct drm_radeon_driver_file_fields *radeon_priv;
3124 switch (sp->param) {
3125 case RADEON_SETPARAM_FB_LOCATION:
3126 radeon_priv = file_priv->driver_priv;
3127 radeon_priv->radeon_fb_delta = dev_priv->fb_location -
3128 sp->value;
3129 break;
3130 case RADEON_SETPARAM_SWITCH_TILING:
3131 if (sp->value == 0) {
3132 DRM_DEBUG("color tiling disabled\n");
3133 dev_priv->front_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3134 dev_priv->back_pitch_offset &= ~RADEON_DST_TILE_MACRO;
3135 if (master_priv->sarea_priv)
3136 master_priv->sarea_priv->tiling_enabled = 0;
3137 } else if (sp->value == 1) {
3138 DRM_DEBUG("color tiling enabled\n");
3139 dev_priv->front_pitch_offset |= RADEON_DST_TILE_MACRO;
3140 dev_priv->back_pitch_offset |= RADEON_DST_TILE_MACRO;
3141 if (master_priv->sarea_priv)
3142 master_priv->sarea_priv->tiling_enabled = 1;
3144 break;
3145 case RADEON_SETPARAM_PCIGART_LOCATION:
3146 dev_priv->pcigart_offset = sp->value;
3147 dev_priv->pcigart_offset_set = 1;
3148 break;
3149 case RADEON_SETPARAM_NEW_MEMMAP:
3150 dev_priv->new_memmap = sp->value;
3151 break;
3152 case RADEON_SETPARAM_PCIGART_TABLE_SIZE:
3153 dev_priv->gart_info.table_size = sp->value;
3154 if (dev_priv->gart_info.table_size < RADEON_PCIGART_TABLE_SIZE)
3155 dev_priv->gart_info.table_size = RADEON_PCIGART_TABLE_SIZE;
3156 break;
3157 case RADEON_SETPARAM_VBLANK_CRTC:
3158 return radeon_vblank_crtc_set(dev, sp->value);
3159 break;
3160 default:
3161 DRM_DEBUG("Invalid parameter %d\n", sp->param);
3162 return -EINVAL;
3165 return 0;
3168 /* When a client dies:
3169 * - Check for and clean up flipped page state
3170 * - Free any alloced GART memory.
3171 * - Free any alloced radeon surfaces.
3173 * DRM infrastructure takes care of reclaiming dma buffers.
3175 void radeon_driver_preclose(struct drm_device *dev, struct drm_file *file_priv)
3177 if (dev->dev_private) {
3178 drm_radeon_private_t *dev_priv = dev->dev_private;
3179 dev_priv->page_flipping = 0;
3180 radeon_mem_release(file_priv, dev_priv->gart_heap);
3181 radeon_mem_release(file_priv, dev_priv->fb_heap);
3182 radeon_surfaces_release(file_priv, dev_priv);
3186 void radeon_driver_lastclose(struct drm_device *dev)
3188 radeon_surfaces_release(PCIGART_FILE_PRIV, dev->dev_private);
3189 radeon_do_release(dev);
3192 int radeon_driver_open(struct drm_device *dev, struct drm_file *file_priv)
3194 drm_radeon_private_t *dev_priv = dev->dev_private;
3195 struct drm_radeon_driver_file_fields *radeon_priv;
3197 DRM_DEBUG("\n");
3198 radeon_priv = kmalloc(sizeof(*radeon_priv), GFP_KERNEL);
3200 if (!radeon_priv)
3201 return -ENOMEM;
3203 file_priv->driver_priv = radeon_priv;
3205 if (dev_priv)
3206 radeon_priv->radeon_fb_delta = dev_priv->fb_location;
3207 else
3208 radeon_priv->radeon_fb_delta = 0;
3209 return 0;
3212 void radeon_driver_postclose(struct drm_device *dev, struct drm_file *file_priv)
3214 struct drm_radeon_driver_file_fields *radeon_priv =
3215 file_priv->driver_priv;
3217 kfree(radeon_priv);
3220 struct drm_ioctl_desc radeon_ioctls[] = {
3221 DRM_IOCTL_DEF(DRM_RADEON_CP_INIT, radeon_cp_init, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3222 DRM_IOCTL_DEF(DRM_RADEON_CP_START, radeon_cp_start, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3223 DRM_IOCTL_DEF(DRM_RADEON_CP_STOP, radeon_cp_stop, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3224 DRM_IOCTL_DEF(DRM_RADEON_CP_RESET, radeon_cp_reset, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3225 DRM_IOCTL_DEF(DRM_RADEON_CP_IDLE, radeon_cp_idle, DRM_AUTH),
3226 DRM_IOCTL_DEF(DRM_RADEON_CP_RESUME, radeon_cp_resume, DRM_AUTH),
3227 DRM_IOCTL_DEF(DRM_RADEON_RESET, radeon_engine_reset, DRM_AUTH),
3228 DRM_IOCTL_DEF(DRM_RADEON_FULLSCREEN, radeon_fullscreen, DRM_AUTH),
3229 DRM_IOCTL_DEF(DRM_RADEON_SWAP, radeon_cp_swap, DRM_AUTH),
3230 DRM_IOCTL_DEF(DRM_RADEON_CLEAR, radeon_cp_clear, DRM_AUTH),
3231 DRM_IOCTL_DEF(DRM_RADEON_VERTEX, radeon_cp_vertex, DRM_AUTH),
3232 DRM_IOCTL_DEF(DRM_RADEON_INDICES, radeon_cp_indices, DRM_AUTH),
3233 DRM_IOCTL_DEF(DRM_RADEON_TEXTURE, radeon_cp_texture, DRM_AUTH),
3234 DRM_IOCTL_DEF(DRM_RADEON_STIPPLE, radeon_cp_stipple, DRM_AUTH),
3235 DRM_IOCTL_DEF(DRM_RADEON_INDIRECT, radeon_cp_indirect, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3236 DRM_IOCTL_DEF(DRM_RADEON_VERTEX2, radeon_cp_vertex2, DRM_AUTH),
3237 DRM_IOCTL_DEF(DRM_RADEON_CMDBUF, radeon_cp_cmdbuf, DRM_AUTH),
3238 DRM_IOCTL_DEF(DRM_RADEON_GETPARAM, radeon_cp_getparam, DRM_AUTH),
3239 DRM_IOCTL_DEF(DRM_RADEON_FLIP, radeon_cp_flip, DRM_AUTH),
3240 DRM_IOCTL_DEF(DRM_RADEON_ALLOC, radeon_mem_alloc, DRM_AUTH),
3241 DRM_IOCTL_DEF(DRM_RADEON_FREE, radeon_mem_free, DRM_AUTH),
3242 DRM_IOCTL_DEF(DRM_RADEON_INIT_HEAP, radeon_mem_init_heap, DRM_AUTH|DRM_MASTER|DRM_ROOT_ONLY),
3243 DRM_IOCTL_DEF(DRM_RADEON_IRQ_EMIT, radeon_irq_emit, DRM_AUTH),
3244 DRM_IOCTL_DEF(DRM_RADEON_IRQ_WAIT, radeon_irq_wait, DRM_AUTH),
3245 DRM_IOCTL_DEF(DRM_RADEON_SETPARAM, radeon_cp_setparam, DRM_AUTH),
3246 DRM_IOCTL_DEF(DRM_RADEON_SURF_ALLOC, radeon_surface_alloc, DRM_AUTH),
3247 DRM_IOCTL_DEF(DRM_RADEON_SURF_FREE, radeon_surface_free, DRM_AUTH),
3248 DRM_IOCTL_DEF(DRM_RADEON_CS, r600_cs_legacy_ioctl, DRM_AUTH)
3251 int radeon_max_ioctl = DRM_ARRAY_SIZE(radeon_ioctls);