WIP FPC-III support
[linux/fpc-iii.git] / drivers / gpu / drm / savage / savage_state.c
blobe0d40ae67d54240e90e7a6499f2cb595bf70662f
1 /* savage_state.c -- State and drawing support for Savage
3 * Copyright 2004 Felix Kuehling
4 * All Rights Reserved.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sub license,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice (including the
14 * next paragraph) shall be included in all copies or substantial portions
15 * of the Software.
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
18 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
19 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
20 * NON-INFRINGEMENT. IN NO EVENT SHALL FELIX KUEHLING BE LIABLE FOR
21 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF
22 * CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
23 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
26 #include <linux/slab.h>
27 #include <linux/uaccess.h>
29 #include <drm/drm_device.h>
30 #include <drm/drm_file.h>
31 #include <drm/drm_print.h>
32 #include <drm/savage_drm.h>
34 #include "savage_drv.h"
36 void savage_emit_clip_rect_s3d(drm_savage_private_t * dev_priv,
37 const struct drm_clip_rect * pbox)
39 uint32_t scstart = dev_priv->state.s3d.new_scstart;
40 uint32_t scend = dev_priv->state.s3d.new_scend;
41 scstart = (scstart & ~SAVAGE_SCISSOR_MASK_S3D) |
42 ((uint32_t) pbox->x1 & 0x000007ff) |
43 (((uint32_t) pbox->y1 << 16) & 0x07ff0000);
44 scend = (scend & ~SAVAGE_SCISSOR_MASK_S3D) |
45 (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
46 ((((uint32_t) pbox->y2 - 1) << 16) & 0x07ff0000);
47 if (scstart != dev_priv->state.s3d.scstart ||
48 scend != dev_priv->state.s3d.scend) {
49 DMA_LOCALS;
50 BEGIN_DMA(4);
51 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
52 DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2);
53 DMA_WRITE(scstart);
54 DMA_WRITE(scend);
55 dev_priv->state.s3d.scstart = scstart;
56 dev_priv->state.s3d.scend = scend;
57 dev_priv->waiting = 1;
58 DMA_COMMIT();
62 void savage_emit_clip_rect_s4(drm_savage_private_t * dev_priv,
63 const struct drm_clip_rect * pbox)
65 uint32_t drawctrl0 = dev_priv->state.s4.new_drawctrl0;
66 uint32_t drawctrl1 = dev_priv->state.s4.new_drawctrl1;
67 drawctrl0 = (drawctrl0 & ~SAVAGE_SCISSOR_MASK_S4) |
68 ((uint32_t) pbox->x1 & 0x000007ff) |
69 (((uint32_t) pbox->y1 << 12) & 0x00fff000);
70 drawctrl1 = (drawctrl1 & ~SAVAGE_SCISSOR_MASK_S4) |
71 (((uint32_t) pbox->x2 - 1) & 0x000007ff) |
72 ((((uint32_t) pbox->y2 - 1) << 12) & 0x00fff000);
73 if (drawctrl0 != dev_priv->state.s4.drawctrl0 ||
74 drawctrl1 != dev_priv->state.s4.drawctrl1) {
75 DMA_LOCALS;
76 BEGIN_DMA(4);
77 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
78 DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2);
79 DMA_WRITE(drawctrl0);
80 DMA_WRITE(drawctrl1);
81 dev_priv->state.s4.drawctrl0 = drawctrl0;
82 dev_priv->state.s4.drawctrl1 = drawctrl1;
83 dev_priv->waiting = 1;
84 DMA_COMMIT();
88 static int savage_verify_texaddr(drm_savage_private_t * dev_priv, int unit,
89 uint32_t addr)
91 if ((addr & 6) != 2) { /* reserved bits */
92 DRM_ERROR("bad texAddr%d %08x (reserved bits)\n", unit, addr);
93 return -EINVAL;
95 if (!(addr & 1)) { /* local */
96 addr &= ~7;
97 if (addr < dev_priv->texture_offset ||
98 addr >= dev_priv->texture_offset + dev_priv->texture_size) {
99 DRM_ERROR
100 ("bad texAddr%d %08x (local addr out of range)\n",
101 unit, addr);
102 return -EINVAL;
104 } else { /* AGP */
105 if (!dev_priv->agp_textures) {
106 DRM_ERROR("bad texAddr%d %08x (AGP not available)\n",
107 unit, addr);
108 return -EINVAL;
110 addr &= ~7;
111 if (addr < dev_priv->agp_textures->offset ||
112 addr >= (dev_priv->agp_textures->offset +
113 dev_priv->agp_textures->size)) {
114 DRM_ERROR
115 ("bad texAddr%d %08x (AGP addr out of range)\n",
116 unit, addr);
117 return -EINVAL;
120 return 0;
123 #define SAVE_STATE(reg,where) \
124 if(start <= reg && start+count > reg) \
125 dev_priv->state.where = regs[reg - start]
126 #define SAVE_STATE_MASK(reg,where,mask) do { \
127 if(start <= reg && start+count > reg) { \
128 uint32_t tmp; \
129 tmp = regs[reg - start]; \
130 dev_priv->state.where = (tmp & (mask)) | \
131 (dev_priv->state.where & ~(mask)); \
133 } while (0)
135 static int savage_verify_state_s3d(drm_savage_private_t * dev_priv,
136 unsigned int start, unsigned int count,
137 const uint32_t *regs)
139 if (start < SAVAGE_TEXPALADDR_S3D ||
140 start + count - 1 > SAVAGE_DESTTEXRWWATERMARK_S3D) {
141 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
142 start, start + count - 1);
143 return -EINVAL;
146 SAVE_STATE_MASK(SAVAGE_SCSTART_S3D, s3d.new_scstart,
147 ~SAVAGE_SCISSOR_MASK_S3D);
148 SAVE_STATE_MASK(SAVAGE_SCEND_S3D, s3d.new_scend,
149 ~SAVAGE_SCISSOR_MASK_S3D);
151 /* if any texture regs were changed ... */
152 if (start <= SAVAGE_TEXCTRL_S3D &&
153 start + count > SAVAGE_TEXPALADDR_S3D) {
154 /* ... check texture state */
155 SAVE_STATE(SAVAGE_TEXCTRL_S3D, s3d.texctrl);
156 SAVE_STATE(SAVAGE_TEXADDR_S3D, s3d.texaddr);
157 if (dev_priv->state.s3d.texctrl & SAVAGE_TEXCTRL_TEXEN_MASK)
158 return savage_verify_texaddr(dev_priv, 0,
159 dev_priv->state.s3d.texaddr);
162 return 0;
165 static int savage_verify_state_s4(drm_savage_private_t * dev_priv,
166 unsigned int start, unsigned int count,
167 const uint32_t *regs)
169 int ret = 0;
171 if (start < SAVAGE_DRAWLOCALCTRL_S4 ||
172 start + count - 1 > SAVAGE_TEXBLENDCOLOR_S4) {
173 DRM_ERROR("invalid register range (0x%04x-0x%04x)\n",
174 start, start + count - 1);
175 return -EINVAL;
178 SAVE_STATE_MASK(SAVAGE_DRAWCTRL0_S4, s4.new_drawctrl0,
179 ~SAVAGE_SCISSOR_MASK_S4);
180 SAVE_STATE_MASK(SAVAGE_DRAWCTRL1_S4, s4.new_drawctrl1,
181 ~SAVAGE_SCISSOR_MASK_S4);
183 /* if any texture regs were changed ... */
184 if (start <= SAVAGE_TEXDESCR_S4 &&
185 start + count > SAVAGE_TEXPALADDR_S4) {
186 /* ... check texture state */
187 SAVE_STATE(SAVAGE_TEXDESCR_S4, s4.texdescr);
188 SAVE_STATE(SAVAGE_TEXADDR0_S4, s4.texaddr0);
189 SAVE_STATE(SAVAGE_TEXADDR1_S4, s4.texaddr1);
190 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX0EN_MASK)
191 ret |= savage_verify_texaddr(dev_priv, 0,
192 dev_priv->state.s4.texaddr0);
193 if (dev_priv->state.s4.texdescr & SAVAGE_TEXDESCR_TEX1EN_MASK)
194 ret |= savage_verify_texaddr(dev_priv, 1,
195 dev_priv->state.s4.texaddr1);
198 return ret;
201 #undef SAVE_STATE
202 #undef SAVE_STATE_MASK
204 static int savage_dispatch_state(drm_savage_private_t * dev_priv,
205 const drm_savage_cmd_header_t * cmd_header,
206 const uint32_t *regs)
208 unsigned int count = cmd_header->state.count;
209 unsigned int start = cmd_header->state.start;
210 unsigned int count2 = 0;
211 unsigned int bci_size;
212 int ret;
213 DMA_LOCALS;
215 if (!count)
216 return 0;
218 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
219 ret = savage_verify_state_s3d(dev_priv, start, count, regs);
220 if (ret != 0)
221 return ret;
222 /* scissor regs are emitted in savage_dispatch_draw */
223 if (start < SAVAGE_SCSTART_S3D) {
224 if (start + count > SAVAGE_SCEND_S3D + 1)
225 count2 = count - (SAVAGE_SCEND_S3D + 1 - start);
226 if (start + count > SAVAGE_SCSTART_S3D)
227 count = SAVAGE_SCSTART_S3D - start;
228 } else if (start <= SAVAGE_SCEND_S3D) {
229 if (start + count > SAVAGE_SCEND_S3D + 1) {
230 count -= SAVAGE_SCEND_S3D + 1 - start;
231 start = SAVAGE_SCEND_S3D + 1;
232 } else
233 return 0;
235 } else {
236 ret = savage_verify_state_s4(dev_priv, start, count, regs);
237 if (ret != 0)
238 return ret;
239 /* scissor regs are emitted in savage_dispatch_draw */
240 if (start < SAVAGE_DRAWCTRL0_S4) {
241 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1)
242 count2 = count -
243 (SAVAGE_DRAWCTRL1_S4 + 1 - start);
244 if (start + count > SAVAGE_DRAWCTRL0_S4)
245 count = SAVAGE_DRAWCTRL0_S4 - start;
246 } else if (start <= SAVAGE_DRAWCTRL1_S4) {
247 if (start + count > SAVAGE_DRAWCTRL1_S4 + 1) {
248 count -= SAVAGE_DRAWCTRL1_S4 + 1 - start;
249 start = SAVAGE_DRAWCTRL1_S4 + 1;
250 } else
251 return 0;
255 bci_size = count + (count + 254) / 255 + count2 + (count2 + 254) / 255;
257 if (cmd_header->state.global) {
258 BEGIN_DMA(bci_size + 1);
259 DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D);
260 dev_priv->waiting = 1;
261 } else {
262 BEGIN_DMA(bci_size);
265 do {
266 while (count > 0) {
267 unsigned int n = count < 255 ? count : 255;
268 DMA_SET_REGISTERS(start, n);
269 DMA_COPY(regs, n);
270 count -= n;
271 start += n;
272 regs += n;
274 start += 2;
275 regs += 2;
276 count = count2;
277 count2 = 0;
278 } while (count);
280 DMA_COMMIT();
282 return 0;
285 static int savage_dispatch_dma_prim(drm_savage_private_t * dev_priv,
286 const drm_savage_cmd_header_t * cmd_header,
287 const struct drm_buf * dmabuf)
289 unsigned char reorder = 0;
290 unsigned int prim = cmd_header->prim.prim;
291 unsigned int skip = cmd_header->prim.skip;
292 unsigned int n = cmd_header->prim.count;
293 unsigned int start = cmd_header->prim.start;
294 unsigned int i;
295 BCI_LOCALS;
297 if (!dmabuf) {
298 DRM_ERROR("called without dma buffers!\n");
299 return -EINVAL;
302 if (!n)
303 return 0;
305 switch (prim) {
306 case SAVAGE_PRIM_TRILIST_201:
307 reorder = 1;
308 prim = SAVAGE_PRIM_TRILIST;
309 fallthrough;
310 case SAVAGE_PRIM_TRILIST:
311 if (n % 3 != 0) {
312 DRM_ERROR("wrong number of vertices %u in TRILIST\n",
314 return -EINVAL;
316 break;
317 case SAVAGE_PRIM_TRISTRIP:
318 case SAVAGE_PRIM_TRIFAN:
319 if (n < 3) {
320 DRM_ERROR
321 ("wrong number of vertices %u in TRIFAN/STRIP\n",
323 return -EINVAL;
325 break;
326 default:
327 DRM_ERROR("invalid primitive type %u\n", prim);
328 return -EINVAL;
331 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
332 if (skip != 0) {
333 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
334 return -EINVAL;
336 } else {
337 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
338 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
339 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
340 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
341 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
342 return -EINVAL;
344 if (reorder) {
345 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
346 return -EINVAL;
350 if (start + n > dmabuf->total / 32) {
351 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
352 start, start + n - 1, dmabuf->total / 32);
353 return -EINVAL;
356 /* Vertex DMA doesn't work with command DMA at the same time,
357 * so we use BCI_... to submit commands here. Flush buffered
358 * faked DMA first. */
359 DMA_FLUSH();
361 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
362 BEGIN_BCI(2);
363 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
364 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
365 dev_priv->state.common.vbaddr = dmabuf->bus_address;
367 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
368 /* Workaround for what looks like a hardware bug. If a
369 * WAIT_3D_IDLE was emitted some time before the
370 * indexed drawing command then the engine will lock
371 * up. There are two known workarounds:
372 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
373 BEGIN_BCI(63);
374 for (i = 0; i < 63; ++i)
375 BCI_WRITE(BCI_CMD_WAIT);
376 dev_priv->waiting = 0;
379 prim <<= 25;
380 while (n != 0) {
381 /* Can emit up to 255 indices (85 triangles) at once. */
382 unsigned int count = n > 255 ? 255 : n;
383 if (reorder) {
384 /* Need to reorder indices for correct flat
385 * shading while preserving the clock sense
386 * for correct culling. Only on Savage3D. */
387 int reorder[3] = { -1, -1, -1 };
388 reorder[start % 3] = 2;
390 BEGIN_BCI((count + 1 + 1) / 2);
391 BCI_DRAW_INDICES_S3D(count, prim, start + 2);
393 for (i = start + 1; i + 1 < start + count; i += 2)
394 BCI_WRITE((i + reorder[i % 3]) |
395 ((i + 1 +
396 reorder[(i + 1) % 3]) << 16));
397 if (i < start + count)
398 BCI_WRITE(i + reorder[i % 3]);
399 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
400 BEGIN_BCI((count + 1 + 1) / 2);
401 BCI_DRAW_INDICES_S3D(count, prim, start);
403 for (i = start + 1; i + 1 < start + count; i += 2)
404 BCI_WRITE(i | ((i + 1) << 16));
405 if (i < start + count)
406 BCI_WRITE(i);
407 } else {
408 BEGIN_BCI((count + 2 + 1) / 2);
409 BCI_DRAW_INDICES_S4(count, prim, skip);
411 for (i = start; i + 1 < start + count; i += 2)
412 BCI_WRITE(i | ((i + 1) << 16));
413 if (i < start + count)
414 BCI_WRITE(i);
417 start += count;
418 n -= count;
420 prim |= BCI_CMD_DRAW_CONT;
423 return 0;
426 static int savage_dispatch_vb_prim(drm_savage_private_t * dev_priv,
427 const drm_savage_cmd_header_t * cmd_header,
428 const uint32_t *vtxbuf, unsigned int vb_size,
429 unsigned int vb_stride)
431 unsigned char reorder = 0;
432 unsigned int prim = cmd_header->prim.prim;
433 unsigned int skip = cmd_header->prim.skip;
434 unsigned int n = cmd_header->prim.count;
435 unsigned int start = cmd_header->prim.start;
436 unsigned int vtx_size;
437 unsigned int i;
438 DMA_LOCALS;
440 if (!n)
441 return 0;
443 switch (prim) {
444 case SAVAGE_PRIM_TRILIST_201:
445 reorder = 1;
446 prim = SAVAGE_PRIM_TRILIST;
447 fallthrough;
448 case SAVAGE_PRIM_TRILIST:
449 if (n % 3 != 0) {
450 DRM_ERROR("wrong number of vertices %u in TRILIST\n",
452 return -EINVAL;
454 break;
455 case SAVAGE_PRIM_TRISTRIP:
456 case SAVAGE_PRIM_TRIFAN:
457 if (n < 3) {
458 DRM_ERROR
459 ("wrong number of vertices %u in TRIFAN/STRIP\n",
461 return -EINVAL;
463 break;
464 default:
465 DRM_ERROR("invalid primitive type %u\n", prim);
466 return -EINVAL;
469 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
470 if (skip > SAVAGE_SKIP_ALL_S3D) {
471 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
472 return -EINVAL;
474 vtx_size = 8; /* full vertex */
475 } else {
476 if (skip > SAVAGE_SKIP_ALL_S4) {
477 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
478 return -EINVAL;
480 vtx_size = 10; /* full vertex */
483 vtx_size -= (skip & 1) + (skip >> 1 & 1) +
484 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
485 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
487 if (vtx_size > vb_stride) {
488 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
489 vtx_size, vb_stride);
490 return -EINVAL;
493 if (start + n > vb_size / (vb_stride * 4)) {
494 DRM_ERROR("vertex indices (%u-%u) out of range (0-%u)\n",
495 start, start + n - 1, vb_size / (vb_stride * 4));
496 return -EINVAL;
499 prim <<= 25;
500 while (n != 0) {
501 /* Can emit up to 255 vertices (85 triangles) at once. */
502 unsigned int count = n > 255 ? 255 : n;
503 if (reorder) {
504 /* Need to reorder vertices for correct flat
505 * shading while preserving the clock sense
506 * for correct culling. Only on Savage3D. */
507 int reorder[3] = { -1, -1, -1 };
508 reorder[start % 3] = 2;
510 BEGIN_DMA(count * vtx_size + 1);
511 DMA_DRAW_PRIMITIVE(count, prim, skip);
513 for (i = start; i < start + count; ++i) {
514 unsigned int j = i + reorder[i % 3];
515 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
518 DMA_COMMIT();
519 } else {
520 BEGIN_DMA(count * vtx_size + 1);
521 DMA_DRAW_PRIMITIVE(count, prim, skip);
523 if (vb_stride == vtx_size) {
524 DMA_COPY(&vtxbuf[vb_stride * start],
525 vtx_size * count);
526 } else {
527 for (i = start; i < start + count; ++i) {
528 DMA_COPY(&vtxbuf [vb_stride * i],
529 vtx_size);
533 DMA_COMMIT();
536 start += count;
537 n -= count;
539 prim |= BCI_CMD_DRAW_CONT;
542 return 0;
545 static int savage_dispatch_dma_idx(drm_savage_private_t * dev_priv,
546 const drm_savage_cmd_header_t * cmd_header,
547 const uint16_t *idx,
548 const struct drm_buf * dmabuf)
550 unsigned char reorder = 0;
551 unsigned int prim = cmd_header->idx.prim;
552 unsigned int skip = cmd_header->idx.skip;
553 unsigned int n = cmd_header->idx.count;
554 unsigned int i;
555 BCI_LOCALS;
557 if (!dmabuf) {
558 DRM_ERROR("called without dma buffers!\n");
559 return -EINVAL;
562 if (!n)
563 return 0;
565 switch (prim) {
566 case SAVAGE_PRIM_TRILIST_201:
567 reorder = 1;
568 prim = SAVAGE_PRIM_TRILIST;
569 fallthrough;
570 case SAVAGE_PRIM_TRILIST:
571 if (n % 3 != 0) {
572 DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
573 return -EINVAL;
575 break;
576 case SAVAGE_PRIM_TRISTRIP:
577 case SAVAGE_PRIM_TRIFAN:
578 if (n < 3) {
579 DRM_ERROR
580 ("wrong number of indices %u in TRIFAN/STRIP\n", n);
581 return -EINVAL;
583 break;
584 default:
585 DRM_ERROR("invalid primitive type %u\n", prim);
586 return -EINVAL;
589 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
590 if (skip != 0) {
591 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
592 return -EINVAL;
594 } else {
595 unsigned int size = 10 - (skip & 1) - (skip >> 1 & 1) -
596 (skip >> 2 & 1) - (skip >> 3 & 1) - (skip >> 4 & 1) -
597 (skip >> 5 & 1) - (skip >> 6 & 1) - (skip >> 7 & 1);
598 if (skip > SAVAGE_SKIP_ALL_S4 || size != 8) {
599 DRM_ERROR("invalid skip flags 0x%04x for DMA\n", skip);
600 return -EINVAL;
602 if (reorder) {
603 DRM_ERROR("TRILIST_201 used on Savage4 hardware\n");
604 return -EINVAL;
608 /* Vertex DMA doesn't work with command DMA at the same time,
609 * so we use BCI_... to submit commands here. Flush buffered
610 * faked DMA first. */
611 DMA_FLUSH();
613 if (dmabuf->bus_address != dev_priv->state.common.vbaddr) {
614 BEGIN_BCI(2);
615 BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1);
616 BCI_WRITE(dmabuf->bus_address | dev_priv->dma_type);
617 dev_priv->state.common.vbaddr = dmabuf->bus_address;
619 if (S3_SAVAGE3D_SERIES(dev_priv->chipset) && dev_priv->waiting) {
620 /* Workaround for what looks like a hardware bug. If a
621 * WAIT_3D_IDLE was emitted some time before the
622 * indexed drawing command then the engine will lock
623 * up. There are two known workarounds:
624 * WAIT_IDLE_EMPTY or emit at least 63 NOPs. */
625 BEGIN_BCI(63);
626 for (i = 0; i < 63; ++i)
627 BCI_WRITE(BCI_CMD_WAIT);
628 dev_priv->waiting = 0;
631 prim <<= 25;
632 while (n != 0) {
633 /* Can emit up to 255 indices (85 triangles) at once. */
634 unsigned int count = n > 255 ? 255 : n;
636 /* check indices */
637 for (i = 0; i < count; ++i) {
638 if (idx[i] > dmabuf->total / 32) {
639 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
640 i, idx[i], dmabuf->total / 32);
641 return -EINVAL;
645 if (reorder) {
646 /* Need to reorder indices for correct flat
647 * shading while preserving the clock sense
648 * for correct culling. Only on Savage3D. */
649 int reorder[3] = { 2, -1, -1 };
651 BEGIN_BCI((count + 1 + 1) / 2);
652 BCI_DRAW_INDICES_S3D(count, prim, idx[2]);
654 for (i = 1; i + 1 < count; i += 2)
655 BCI_WRITE(idx[i + reorder[i % 3]] |
656 (idx[i + 1 +
657 reorder[(i + 1) % 3]] << 16));
658 if (i < count)
659 BCI_WRITE(idx[i + reorder[i % 3]]);
660 } else if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
661 BEGIN_BCI((count + 1 + 1) / 2);
662 BCI_DRAW_INDICES_S3D(count, prim, idx[0]);
664 for (i = 1; i + 1 < count; i += 2)
665 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
666 if (i < count)
667 BCI_WRITE(idx[i]);
668 } else {
669 BEGIN_BCI((count + 2 + 1) / 2);
670 BCI_DRAW_INDICES_S4(count, prim, skip);
672 for (i = 0; i + 1 < count; i += 2)
673 BCI_WRITE(idx[i] | (idx[i + 1] << 16));
674 if (i < count)
675 BCI_WRITE(idx[i]);
678 idx += count;
679 n -= count;
681 prim |= BCI_CMD_DRAW_CONT;
684 return 0;
687 static int savage_dispatch_vb_idx(drm_savage_private_t * dev_priv,
688 const drm_savage_cmd_header_t * cmd_header,
689 const uint16_t *idx,
690 const uint32_t *vtxbuf,
691 unsigned int vb_size, unsigned int vb_stride)
693 unsigned char reorder = 0;
694 unsigned int prim = cmd_header->idx.prim;
695 unsigned int skip = cmd_header->idx.skip;
696 unsigned int n = cmd_header->idx.count;
697 unsigned int vtx_size;
698 unsigned int i;
699 DMA_LOCALS;
701 if (!n)
702 return 0;
704 switch (prim) {
705 case SAVAGE_PRIM_TRILIST_201:
706 reorder = 1;
707 prim = SAVAGE_PRIM_TRILIST;
708 fallthrough;
709 case SAVAGE_PRIM_TRILIST:
710 if (n % 3 != 0) {
711 DRM_ERROR("wrong number of indices %u in TRILIST\n", n);
712 return -EINVAL;
714 break;
715 case SAVAGE_PRIM_TRISTRIP:
716 case SAVAGE_PRIM_TRIFAN:
717 if (n < 3) {
718 DRM_ERROR
719 ("wrong number of indices %u in TRIFAN/STRIP\n", n);
720 return -EINVAL;
722 break;
723 default:
724 DRM_ERROR("invalid primitive type %u\n", prim);
725 return -EINVAL;
728 if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) {
729 if (skip > SAVAGE_SKIP_ALL_S3D) {
730 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
731 return -EINVAL;
733 vtx_size = 8; /* full vertex */
734 } else {
735 if (skip > SAVAGE_SKIP_ALL_S4) {
736 DRM_ERROR("invalid skip flags 0x%04x\n", skip);
737 return -EINVAL;
739 vtx_size = 10; /* full vertex */
742 vtx_size -= (skip & 1) + (skip >> 1 & 1) +
743 (skip >> 2 & 1) + (skip >> 3 & 1) + (skip >> 4 & 1) +
744 (skip >> 5 & 1) + (skip >> 6 & 1) + (skip >> 7 & 1);
746 if (vtx_size > vb_stride) {
747 DRM_ERROR("vertex size greater than vb stride (%u > %u)\n",
748 vtx_size, vb_stride);
749 return -EINVAL;
752 prim <<= 25;
753 while (n != 0) {
754 /* Can emit up to 255 vertices (85 triangles) at once. */
755 unsigned int count = n > 255 ? 255 : n;
757 /* Check indices */
758 for (i = 0; i < count; ++i) {
759 if (idx[i] > vb_size / (vb_stride * 4)) {
760 DRM_ERROR("idx[%u]=%u out of range (0-%u)\n",
761 i, idx[i], vb_size / (vb_stride * 4));
762 return -EINVAL;
766 if (reorder) {
767 /* Need to reorder vertices for correct flat
768 * shading while preserving the clock sense
769 * for correct culling. Only on Savage3D. */
770 int reorder[3] = { 2, -1, -1 };
772 BEGIN_DMA(count * vtx_size + 1);
773 DMA_DRAW_PRIMITIVE(count, prim, skip);
775 for (i = 0; i < count; ++i) {
776 unsigned int j = idx[i + reorder[i % 3]];
777 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
780 DMA_COMMIT();
781 } else {
782 BEGIN_DMA(count * vtx_size + 1);
783 DMA_DRAW_PRIMITIVE(count, prim, skip);
785 for (i = 0; i < count; ++i) {
786 unsigned int j = idx[i];
787 DMA_COPY(&vtxbuf[vb_stride * j], vtx_size);
790 DMA_COMMIT();
793 idx += count;
794 n -= count;
796 prim |= BCI_CMD_DRAW_CONT;
799 return 0;
802 static int savage_dispatch_clear(drm_savage_private_t * dev_priv,
803 const drm_savage_cmd_header_t * cmd_header,
804 const drm_savage_cmd_header_t *data,
805 unsigned int nbox,
806 const struct drm_clip_rect *boxes)
808 unsigned int flags = cmd_header->clear0.flags;
809 unsigned int clear_cmd;
810 unsigned int i, nbufs;
811 DMA_LOCALS;
813 if (nbox == 0)
814 return 0;
816 clear_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
817 BCI_CMD_SEND_COLOR | BCI_CMD_DEST_PBD_NEW;
818 BCI_CMD_SET_ROP(clear_cmd, 0xCC);
820 nbufs = ((flags & SAVAGE_FRONT) ? 1 : 0) +
821 ((flags & SAVAGE_BACK) ? 1 : 0) + ((flags & SAVAGE_DEPTH) ? 1 : 0);
822 if (nbufs == 0)
823 return 0;
825 if (data->clear1.mask != 0xffffffff) {
826 /* set mask */
827 BEGIN_DMA(2);
828 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
829 DMA_WRITE(data->clear1.mask);
830 DMA_COMMIT();
832 for (i = 0; i < nbox; ++i) {
833 unsigned int x, y, w, h;
834 unsigned int buf;
835 x = boxes[i].x1, y = boxes[i].y1;
836 w = boxes[i].x2 - boxes[i].x1;
837 h = boxes[i].y2 - boxes[i].y1;
838 BEGIN_DMA(nbufs * 6);
839 for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) {
840 if (!(flags & buf))
841 continue;
842 DMA_WRITE(clear_cmd);
843 switch (buf) {
844 case SAVAGE_FRONT:
845 DMA_WRITE(dev_priv->front_offset);
846 DMA_WRITE(dev_priv->front_bd);
847 break;
848 case SAVAGE_BACK:
849 DMA_WRITE(dev_priv->back_offset);
850 DMA_WRITE(dev_priv->back_bd);
851 break;
852 case SAVAGE_DEPTH:
853 DMA_WRITE(dev_priv->depth_offset);
854 DMA_WRITE(dev_priv->depth_bd);
855 break;
857 DMA_WRITE(data->clear1.value);
858 DMA_WRITE(BCI_X_Y(x, y));
859 DMA_WRITE(BCI_W_H(w, h));
861 DMA_COMMIT();
863 if (data->clear1.mask != 0xffffffff) {
864 /* reset mask */
865 BEGIN_DMA(2);
866 DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1);
867 DMA_WRITE(0xffffffff);
868 DMA_COMMIT();
871 return 0;
874 static int savage_dispatch_swap(drm_savage_private_t * dev_priv,
875 unsigned int nbox, const struct drm_clip_rect *boxes)
877 unsigned int swap_cmd;
878 unsigned int i;
879 DMA_LOCALS;
881 if (nbox == 0)
882 return 0;
884 swap_cmd = BCI_CMD_RECT | BCI_CMD_RECT_XP | BCI_CMD_RECT_YP |
885 BCI_CMD_SRC_PBD_COLOR_NEW | BCI_CMD_DEST_GBD;
886 BCI_CMD_SET_ROP(swap_cmd, 0xCC);
888 for (i = 0; i < nbox; ++i) {
889 BEGIN_DMA(6);
890 DMA_WRITE(swap_cmd);
891 DMA_WRITE(dev_priv->back_offset);
892 DMA_WRITE(dev_priv->back_bd);
893 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
894 DMA_WRITE(BCI_X_Y(boxes[i].x1, boxes[i].y1));
895 DMA_WRITE(BCI_W_H(boxes[i].x2 - boxes[i].x1,
896 boxes[i].y2 - boxes[i].y1));
897 DMA_COMMIT();
900 return 0;
903 static int savage_dispatch_draw(drm_savage_private_t * dev_priv,
904 const drm_savage_cmd_header_t *start,
905 const drm_savage_cmd_header_t *end,
906 const struct drm_buf * dmabuf,
907 const unsigned int *vtxbuf,
908 unsigned int vb_size, unsigned int vb_stride,
909 unsigned int nbox,
910 const struct drm_clip_rect *boxes)
912 unsigned int i, j;
913 int ret;
915 for (i = 0; i < nbox; ++i) {
916 const drm_savage_cmd_header_t *cmdbuf;
917 dev_priv->emit_clip_rect(dev_priv, &boxes[i]);
919 cmdbuf = start;
920 while (cmdbuf < end) {
921 drm_savage_cmd_header_t cmd_header;
922 cmd_header = *cmdbuf;
923 cmdbuf++;
924 switch (cmd_header.cmd.cmd) {
925 case SAVAGE_CMD_DMA_PRIM:
926 ret = savage_dispatch_dma_prim(
927 dev_priv, &cmd_header, dmabuf);
928 break;
929 case SAVAGE_CMD_VB_PRIM:
930 ret = savage_dispatch_vb_prim(
931 dev_priv, &cmd_header,
932 vtxbuf, vb_size, vb_stride);
933 break;
934 case SAVAGE_CMD_DMA_IDX:
935 j = (cmd_header.idx.count + 3) / 4;
936 /* j was check in savage_bci_cmdbuf */
937 ret = savage_dispatch_dma_idx(dev_priv,
938 &cmd_header, (const uint16_t *)cmdbuf,
939 dmabuf);
940 cmdbuf += j;
941 break;
942 case SAVAGE_CMD_VB_IDX:
943 j = (cmd_header.idx.count + 3) / 4;
944 /* j was check in savage_bci_cmdbuf */
945 ret = savage_dispatch_vb_idx(dev_priv,
946 &cmd_header, (const uint16_t *)cmdbuf,
947 (const uint32_t *)vtxbuf, vb_size,
948 vb_stride);
949 cmdbuf += j;
950 break;
951 default:
952 /* What's the best return code? EFAULT? */
953 DRM_ERROR("IMPLEMENTATION ERROR: "
954 "non-drawing-command %d\n",
955 cmd_header.cmd.cmd);
956 return -EINVAL;
959 if (ret != 0)
960 return ret;
964 return 0;
967 int savage_bci_cmdbuf(struct drm_device *dev, void *data, struct drm_file *file_priv)
969 drm_savage_private_t *dev_priv = dev->dev_private;
970 struct drm_device_dma *dma = dev->dma;
971 struct drm_buf *dmabuf;
972 drm_savage_cmdbuf_t *cmdbuf = data;
973 drm_savage_cmd_header_t *kcmd_addr = NULL;
974 drm_savage_cmd_header_t *first_draw_cmd;
975 unsigned int *kvb_addr = NULL;
976 struct drm_clip_rect *kbox_addr = NULL;
977 unsigned int i, j;
978 int ret = 0;
980 DRM_DEBUG("\n");
982 LOCK_TEST_WITH_RETURN(dev, file_priv);
984 if (dma && dma->buflist) {
985 if (cmdbuf->dma_idx >= dma->buf_count) {
986 DRM_ERROR
987 ("vertex buffer index %u out of range (0-%u)\n",
988 cmdbuf->dma_idx, dma->buf_count - 1);
989 return -EINVAL;
991 dmabuf = dma->buflist[cmdbuf->dma_idx];
992 } else {
993 dmabuf = NULL;
996 /* Copy the user buffers into kernel temporary areas. This hasn't been
997 * a performance loss compared to VERIFYAREA_READ/
998 * COPY_FROM_USER_UNCHECKED when done in other drivers, and is correct
999 * for locking on FreeBSD.
1001 if (cmdbuf->size) {
1002 kcmd_addr = kmalloc_array(cmdbuf->size, 8, GFP_KERNEL);
1003 if (kcmd_addr == NULL)
1004 return -ENOMEM;
1006 if (copy_from_user(kcmd_addr, cmdbuf->cmd_addr,
1007 cmdbuf->size * 8))
1009 kfree(kcmd_addr);
1010 return -EFAULT;
1012 cmdbuf->cmd_addr = kcmd_addr;
1014 if (cmdbuf->vb_size) {
1015 kvb_addr = memdup_user(cmdbuf->vb_addr, cmdbuf->vb_size);
1016 if (IS_ERR(kvb_addr)) {
1017 ret = PTR_ERR(kvb_addr);
1018 kvb_addr = NULL;
1019 goto done;
1021 cmdbuf->vb_addr = kvb_addr;
1023 if (cmdbuf->nbox) {
1024 kbox_addr = kmalloc_array(cmdbuf->nbox, sizeof(struct drm_clip_rect),
1025 GFP_KERNEL);
1026 if (kbox_addr == NULL) {
1027 ret = -ENOMEM;
1028 goto done;
1031 if (copy_from_user(kbox_addr, cmdbuf->box_addr,
1032 cmdbuf->nbox * sizeof(struct drm_clip_rect))) {
1033 ret = -EFAULT;
1034 goto done;
1036 cmdbuf->box_addr = kbox_addr;
1039 /* Make sure writes to DMA buffers are finished before sending
1040 * DMA commands to the graphics hardware. */
1041 mb();
1043 /* Coming from user space. Don't know if the Xserver has
1044 * emitted wait commands. Assuming the worst. */
1045 dev_priv->waiting = 1;
1047 i = 0;
1048 first_draw_cmd = NULL;
1049 while (i < cmdbuf->size) {
1050 drm_savage_cmd_header_t cmd_header;
1051 cmd_header = *(drm_savage_cmd_header_t *)cmdbuf->cmd_addr;
1052 cmdbuf->cmd_addr++;
1053 i++;
1055 /* Group drawing commands with same state to minimize
1056 * iterations over clip rects. */
1057 j = 0;
1058 switch (cmd_header.cmd.cmd) {
1059 case SAVAGE_CMD_DMA_IDX:
1060 case SAVAGE_CMD_VB_IDX:
1061 j = (cmd_header.idx.count + 3) / 4;
1062 if (i + j > cmdbuf->size) {
1063 DRM_ERROR("indexed drawing command extends "
1064 "beyond end of command buffer\n");
1065 DMA_FLUSH();
1066 ret = -EINVAL;
1067 goto done;
1069 fallthrough;
1070 case SAVAGE_CMD_DMA_PRIM:
1071 case SAVAGE_CMD_VB_PRIM:
1072 if (!first_draw_cmd)
1073 first_draw_cmd = cmdbuf->cmd_addr - 1;
1074 cmdbuf->cmd_addr += j;
1075 i += j;
1076 break;
1077 default:
1078 if (first_draw_cmd) {
1079 ret = savage_dispatch_draw(
1080 dev_priv, first_draw_cmd,
1081 cmdbuf->cmd_addr - 1,
1082 dmabuf, cmdbuf->vb_addr, cmdbuf->vb_size,
1083 cmdbuf->vb_stride,
1084 cmdbuf->nbox, cmdbuf->box_addr);
1085 if (ret != 0)
1086 goto done;
1087 first_draw_cmd = NULL;
1090 if (first_draw_cmd)
1091 continue;
1093 switch (cmd_header.cmd.cmd) {
1094 case SAVAGE_CMD_STATE:
1095 j = (cmd_header.state.count + 1) / 2;
1096 if (i + j > cmdbuf->size) {
1097 DRM_ERROR("command SAVAGE_CMD_STATE extends "
1098 "beyond end of command buffer\n");
1099 DMA_FLUSH();
1100 ret = -EINVAL;
1101 goto done;
1103 ret = savage_dispatch_state(dev_priv, &cmd_header,
1104 (const uint32_t *)cmdbuf->cmd_addr);
1105 cmdbuf->cmd_addr += j;
1106 i += j;
1107 break;
1108 case SAVAGE_CMD_CLEAR:
1109 if (i + 1 > cmdbuf->size) {
1110 DRM_ERROR("command SAVAGE_CMD_CLEAR extends "
1111 "beyond end of command buffer\n");
1112 DMA_FLUSH();
1113 ret = -EINVAL;
1114 goto done;
1116 ret = savage_dispatch_clear(dev_priv, &cmd_header,
1117 cmdbuf->cmd_addr,
1118 cmdbuf->nbox,
1119 cmdbuf->box_addr);
1120 cmdbuf->cmd_addr++;
1121 i++;
1122 break;
1123 case SAVAGE_CMD_SWAP:
1124 ret = savage_dispatch_swap(dev_priv, cmdbuf->nbox,
1125 cmdbuf->box_addr);
1126 break;
1127 default:
1128 DRM_ERROR("invalid command 0x%x\n",
1129 cmd_header.cmd.cmd);
1130 DMA_FLUSH();
1131 ret = -EINVAL;
1132 goto done;
1135 if (ret != 0) {
1136 DMA_FLUSH();
1137 goto done;
1141 if (first_draw_cmd) {
1142 ret = savage_dispatch_draw (
1143 dev_priv, first_draw_cmd, cmdbuf->cmd_addr, dmabuf,
1144 cmdbuf->vb_addr, cmdbuf->vb_size, cmdbuf->vb_stride,
1145 cmdbuf->nbox, cmdbuf->box_addr);
1146 if (ret != 0) {
1147 DMA_FLUSH();
1148 goto done;
1152 DMA_FLUSH();
1154 if (dmabuf && cmdbuf->discard) {
1155 drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private;
1156 uint16_t event;
1157 event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D);
1158 SET_AGE(&buf_priv->age, event, dev_priv->event_wrap);
1159 savage_freelist_put(dev, dmabuf);
1162 done:
1163 /* If we didn't need to allocate them, these'll be NULL */
1164 kfree(kcmd_addr);
1165 kfree(kvb_addr);
1166 kfree(kbox_addr);
1168 return ret;