2 * Copyright 2010 Advanced Micro Devices, Inc.
3 * Copyright 2008 Red Hat Inc.
4 * Copyright 2009 Jerome Glisse.
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
13 * The above copyright notice and this permission notice shall be included in
14 * all copies or substantial portions of the Software.
16 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 * OTHER DEALINGS IN THE SOFTWARE.
24 * Authors: Dave Airlie
30 #include "evergreend.h"
31 #include "evergreen_reg_safe.h"
32 #include "cayman_reg_safe.h"
34 #define MAX(a,b) (((a)>(b))?(a):(b))
35 #define MIN(a,b) (((a)<(b))?(a):(b))
37 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
38 struct radeon_cs_reloc
**cs_reloc
);
40 struct evergreen_cs_track
{
46 u32 nsamples
; /* unused */
47 struct radeon_bo
*cb_color_bo
[12];
48 u32 cb_color_bo_offset
[12];
49 struct radeon_bo
*cb_color_fmask_bo
[8]; /* unused */
50 struct radeon_bo
*cb_color_cmask_bo
[8]; /* unused */
51 u32 cb_color_info
[12];
52 u32 cb_color_view
[12];
53 u32 cb_color_pitch
[12];
54 u32 cb_color_slice
[12];
55 u32 cb_color_slice_idx
[12];
56 u32 cb_color_attrib
[12];
57 u32 cb_color_cmask_slice
[8];/* unused */
58 u32 cb_color_fmask_slice
[8];/* unused */
60 u32 cb_shader_mask
; /* unused */
61 u32 vgt_strmout_config
;
62 u32 vgt_strmout_buffer_config
;
63 struct radeon_bo
*vgt_strmout_bo
[4];
64 u32 vgt_strmout_bo_offset
[4];
65 u32 vgt_strmout_size
[4];
72 u32 db_z_write_offset
;
73 struct radeon_bo
*db_z_read_bo
;
74 struct radeon_bo
*db_z_write_bo
;
77 u32 db_s_write_offset
;
78 struct radeon_bo
*db_s_read_bo
;
79 struct radeon_bo
*db_s_write_bo
;
80 bool sx_misc_kill_all_prims
;
86 struct radeon_bo
*htile_bo
;
89 static u32
evergreen_cs_get_aray_mode(u32 tiling_flags
)
91 if (tiling_flags
& RADEON_TILING_MACRO
)
92 return ARRAY_2D_TILED_THIN1
;
93 else if (tiling_flags
& RADEON_TILING_MICRO
)
94 return ARRAY_1D_TILED_THIN1
;
96 return ARRAY_LINEAR_GENERAL
;
99 static u32
evergreen_cs_get_num_banks(u32 nbanks
)
103 return ADDR_SURF_2_BANK
;
105 return ADDR_SURF_4_BANK
;
108 return ADDR_SURF_8_BANK
;
110 return ADDR_SURF_16_BANK
;
114 static void evergreen_cs_track_init(struct evergreen_cs_track
*track
)
118 for (i
= 0; i
< 8; i
++) {
119 track
->cb_color_fmask_bo
[i
] = NULL
;
120 track
->cb_color_cmask_bo
[i
] = NULL
;
121 track
->cb_color_cmask_slice
[i
] = 0;
122 track
->cb_color_fmask_slice
[i
] = 0;
125 for (i
= 0; i
< 12; i
++) {
126 track
->cb_color_bo
[i
] = NULL
;
127 track
->cb_color_bo_offset
[i
] = 0xFFFFFFFF;
128 track
->cb_color_info
[i
] = 0;
129 track
->cb_color_view
[i
] = 0xFFFFFFFF;
130 track
->cb_color_pitch
[i
] = 0;
131 track
->cb_color_slice
[i
] = 0xfffffff;
132 track
->cb_color_slice_idx
[i
] = 0;
134 track
->cb_target_mask
= 0xFFFFFFFF;
135 track
->cb_shader_mask
= 0xFFFFFFFF;
136 track
->cb_dirty
= true;
138 track
->db_depth_slice
= 0xffffffff;
139 track
->db_depth_view
= 0xFFFFC000;
140 track
->db_depth_size
= 0xFFFFFFFF;
141 track
->db_depth_control
= 0xFFFFFFFF;
142 track
->db_z_info
= 0xFFFFFFFF;
143 track
->db_z_read_offset
= 0xFFFFFFFF;
144 track
->db_z_write_offset
= 0xFFFFFFFF;
145 track
->db_z_read_bo
= NULL
;
146 track
->db_z_write_bo
= NULL
;
147 track
->db_s_info
= 0xFFFFFFFF;
148 track
->db_s_read_offset
= 0xFFFFFFFF;
149 track
->db_s_write_offset
= 0xFFFFFFFF;
150 track
->db_s_read_bo
= NULL
;
151 track
->db_s_write_bo
= NULL
;
152 track
->db_dirty
= true;
153 track
->htile_bo
= NULL
;
154 track
->htile_offset
= 0xFFFFFFFF;
155 track
->htile_surface
= 0;
157 for (i
= 0; i
< 4; i
++) {
158 track
->vgt_strmout_size
[i
] = 0;
159 track
->vgt_strmout_bo
[i
] = NULL
;
160 track
->vgt_strmout_bo_offset
[i
] = 0xFFFFFFFF;
162 track
->streamout_dirty
= true;
163 track
->sx_misc_kill_all_prims
= false;
167 /* value gathered from cs */
183 unsigned long base_align
;
186 static int evergreen_surface_check_linear(struct radeon_cs_parser
*p
,
187 struct eg_surface
*surf
,
190 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* surf
->nsamples
;
191 surf
->base_align
= surf
->bpe
;
197 static int evergreen_surface_check_linear_aligned(struct radeon_cs_parser
*p
,
198 struct eg_surface
*surf
,
201 struct evergreen_cs_track
*track
= p
->track
;
204 palign
= MAX(64, track
->group_size
/ surf
->bpe
);
205 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
* surf
->nsamples
;
206 surf
->base_align
= track
->group_size
;
207 surf
->palign
= palign
;
209 if (surf
->nbx
& (palign
- 1)) {
211 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d\n",
212 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
);
219 static int evergreen_surface_check_1d(struct radeon_cs_parser
*p
,
220 struct eg_surface
*surf
,
223 struct evergreen_cs_track
*track
= p
->track
;
226 palign
= track
->group_size
/ (8 * surf
->bpe
* surf
->nsamples
);
227 palign
= MAX(8, palign
);
228 surf
->layer_size
= surf
->nbx
* surf
->nby
* surf
->bpe
;
229 surf
->base_align
= track
->group_size
;
230 surf
->palign
= palign
;
232 if ((surf
->nbx
& (palign
- 1))) {
234 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d (%d %d %d)\n",
235 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
,
236 track
->group_size
, surf
->bpe
, surf
->nsamples
);
240 if ((surf
->nby
& (8 - 1))) {
242 dev_warn(p
->dev
, "%s:%d %s height %d invalid must be aligned with 8\n",
243 __func__
, __LINE__
, prefix
, surf
->nby
);
250 static int evergreen_surface_check_2d(struct radeon_cs_parser
*p
,
251 struct eg_surface
*surf
,
254 struct evergreen_cs_track
*track
= p
->track
;
255 unsigned palign
, halign
, tileb
, slice_pt
;
256 unsigned mtile_pr
, mtile_ps
, mtileb
;
258 tileb
= 64 * surf
->bpe
* surf
->nsamples
;
260 if (tileb
> surf
->tsplit
) {
261 slice_pt
= tileb
/ surf
->tsplit
;
263 tileb
= tileb
/ slice_pt
;
264 /* macro tile width & height */
265 palign
= (8 * surf
->bankw
* track
->npipes
) * surf
->mtilea
;
266 halign
= (8 * surf
->bankh
* surf
->nbanks
) / surf
->mtilea
;
267 mtileb
= (palign
/ 8) * (halign
/ 8) * tileb
;;
268 mtile_pr
= surf
->nbx
/ palign
;
269 mtile_ps
= (mtile_pr
* surf
->nby
) / halign
;
270 surf
->layer_size
= mtile_ps
* mtileb
* slice_pt
;
271 surf
->base_align
= (palign
/ 8) * (halign
/ 8) * tileb
;
272 surf
->palign
= palign
;
273 surf
->halign
= halign
;
275 if ((surf
->nbx
& (palign
- 1))) {
277 dev_warn(p
->dev
, "%s:%d %s pitch %d invalid must be aligned with %d\n",
278 __func__
, __LINE__
, prefix
, surf
->nbx
, palign
);
282 if ((surf
->nby
& (halign
- 1))) {
284 dev_warn(p
->dev
, "%s:%d %s height %d invalid must be aligned with %d\n",
285 __func__
, __LINE__
, prefix
, surf
->nby
, halign
);
293 static int evergreen_surface_check(struct radeon_cs_parser
*p
,
294 struct eg_surface
*surf
,
297 /* some common value computed here */
298 surf
->bpe
= r600_fmt_get_blocksize(surf
->format
);
300 switch (surf
->mode
) {
301 case ARRAY_LINEAR_GENERAL
:
302 return evergreen_surface_check_linear(p
, surf
, prefix
);
303 case ARRAY_LINEAR_ALIGNED
:
304 return evergreen_surface_check_linear_aligned(p
, surf
, prefix
);
305 case ARRAY_1D_TILED_THIN1
:
306 return evergreen_surface_check_1d(p
, surf
, prefix
);
307 case ARRAY_2D_TILED_THIN1
:
308 return evergreen_surface_check_2d(p
, surf
, prefix
);
310 dev_warn(p
->dev
, "%s:%d %s invalid array mode %d\n",
311 __func__
, __LINE__
, prefix
, surf
->mode
);
317 static int evergreen_surface_value_conv_check(struct radeon_cs_parser
*p
,
318 struct eg_surface
*surf
,
321 switch (surf
->mode
) {
322 case ARRAY_2D_TILED_THIN1
:
324 case ARRAY_LINEAR_GENERAL
:
325 case ARRAY_LINEAR_ALIGNED
:
326 case ARRAY_1D_TILED_THIN1
:
329 dev_warn(p
->dev
, "%s:%d %s invalid array mode %d\n",
330 __func__
, __LINE__
, prefix
, surf
->mode
);
334 switch (surf
->nbanks
) {
335 case 0: surf
->nbanks
= 2; break;
336 case 1: surf
->nbanks
= 4; break;
337 case 2: surf
->nbanks
= 8; break;
338 case 3: surf
->nbanks
= 16; break;
340 dev_warn(p
->dev
, "%s:%d %s invalid number of banks %d\n",
341 __func__
, __LINE__
, prefix
, surf
->nbanks
);
344 switch (surf
->bankw
) {
345 case 0: surf
->bankw
= 1; break;
346 case 1: surf
->bankw
= 2; break;
347 case 2: surf
->bankw
= 4; break;
348 case 3: surf
->bankw
= 8; break;
350 dev_warn(p
->dev
, "%s:%d %s invalid bankw %d\n",
351 __func__
, __LINE__
, prefix
, surf
->bankw
);
354 switch (surf
->bankh
) {
355 case 0: surf
->bankh
= 1; break;
356 case 1: surf
->bankh
= 2; break;
357 case 2: surf
->bankh
= 4; break;
358 case 3: surf
->bankh
= 8; break;
360 dev_warn(p
->dev
, "%s:%d %s invalid bankh %d\n",
361 __func__
, __LINE__
, prefix
, surf
->bankh
);
364 switch (surf
->mtilea
) {
365 case 0: surf
->mtilea
= 1; break;
366 case 1: surf
->mtilea
= 2; break;
367 case 2: surf
->mtilea
= 4; break;
368 case 3: surf
->mtilea
= 8; break;
370 dev_warn(p
->dev
, "%s:%d %s invalid macro tile aspect %d\n",
371 __func__
, __LINE__
, prefix
, surf
->mtilea
);
374 switch (surf
->tsplit
) {
375 case 0: surf
->tsplit
= 64; break;
376 case 1: surf
->tsplit
= 128; break;
377 case 2: surf
->tsplit
= 256; break;
378 case 3: surf
->tsplit
= 512; break;
379 case 4: surf
->tsplit
= 1024; break;
380 case 5: surf
->tsplit
= 2048; break;
381 case 6: surf
->tsplit
= 4096; break;
383 dev_warn(p
->dev
, "%s:%d %s invalid tile split %d\n",
384 __func__
, __LINE__
, prefix
, surf
->tsplit
);
390 static int evergreen_cs_track_validate_cb(struct radeon_cs_parser
*p
, unsigned id
)
392 struct evergreen_cs_track
*track
= p
->track
;
393 struct eg_surface surf
;
394 unsigned pitch
, slice
, mslice
;
395 unsigned long offset
;
398 mslice
= G_028C6C_SLICE_MAX(track
->cb_color_view
[id
]) + 1;
399 pitch
= track
->cb_color_pitch
[id
];
400 slice
= track
->cb_color_slice
[id
];
401 surf
.nbx
= (pitch
+ 1) * 8;
402 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
403 surf
.mode
= G_028C70_ARRAY_MODE(track
->cb_color_info
[id
]);
404 surf
.format
= G_028C70_FORMAT(track
->cb_color_info
[id
]);
405 surf
.tsplit
= G_028C74_TILE_SPLIT(track
->cb_color_attrib
[id
]);
406 surf
.nbanks
= G_028C74_NUM_BANKS(track
->cb_color_attrib
[id
]);
407 surf
.bankw
= G_028C74_BANK_WIDTH(track
->cb_color_attrib
[id
]);
408 surf
.bankh
= G_028C74_BANK_HEIGHT(track
->cb_color_attrib
[id
]);
409 surf
.mtilea
= G_028C74_MACRO_TILE_ASPECT(track
->cb_color_attrib
[id
]);
412 if (!r600_fmt_is_valid_color(surf
.format
)) {
413 dev_warn(p
->dev
, "%s:%d cb invalid format %d for %d (0x%08x)\n",
414 __func__
, __LINE__
, surf
.format
,
415 id
, track
->cb_color_info
[id
]);
419 r
= evergreen_surface_value_conv_check(p
, &surf
, "cb");
424 r
= evergreen_surface_check(p
, &surf
, "cb");
426 dev_warn(p
->dev
, "%s:%d cb[%d] invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
427 __func__
, __LINE__
, id
, track
->cb_color_pitch
[id
],
428 track
->cb_color_slice
[id
], track
->cb_color_attrib
[id
],
429 track
->cb_color_info
[id
]);
433 offset
= track
->cb_color_bo_offset
[id
] << 8;
434 if (offset
& (surf
.base_align
- 1)) {
435 dev_warn(p
->dev
, "%s:%d cb[%d] bo base %ld not aligned with %ld\n",
436 __func__
, __LINE__
, id
, offset
, surf
.base_align
);
440 offset
+= surf
.layer_size
* mslice
;
441 if (offset
> radeon_bo_size(track
->cb_color_bo
[id
])) {
442 /* old ddx are broken they allocate bo with w*h*bpp but
443 * program slice with ALIGN(h, 8), catch this and patch
447 volatile u32
*ib
= p
->ib
.ptr
;
448 unsigned long tmp
, nby
, bsize
, size
, min
= 0;
450 /* find the height the ddx wants */
454 bsize
= radeon_bo_size(track
->cb_color_bo
[id
]);
455 tmp
= track
->cb_color_bo_offset
[id
] << 8;
456 for (nby
= surf
.nby
; nby
> min
; nby
--) {
457 size
= nby
* surf
.nbx
* surf
.bpe
* surf
.nsamples
;
458 if ((tmp
+ size
* mslice
) <= bsize
) {
464 slice
= ((nby
* surf
.nbx
) / 64) - 1;
465 if (!evergreen_surface_check(p
, &surf
, "cb")) {
466 /* check if this one works */
467 tmp
+= surf
.layer_size
* mslice
;
469 ib
[track
->cb_color_slice_idx
[id
]] = slice
;
475 dev_warn(p
->dev
, "%s:%d cb[%d] bo too small (layer size %d, "
476 "offset %d, max layer %d, bo size %ld, slice %d)\n",
477 __func__
, __LINE__
, id
, surf
.layer_size
,
478 track
->cb_color_bo_offset
[id
] << 8, mslice
,
479 radeon_bo_size(track
->cb_color_bo
[id
]), slice
);
480 dev_warn(p
->dev
, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
481 __func__
, __LINE__
, surf
.nbx
, surf
.nby
,
482 surf
.mode
, surf
.bpe
, surf
.nsamples
,
483 surf
.bankw
, surf
.bankh
,
484 surf
.tsplit
, surf
.mtilea
);
492 static int evergreen_cs_track_validate_htile(struct radeon_cs_parser
*p
,
493 unsigned nbx
, unsigned nby
)
495 struct evergreen_cs_track
*track
= p
->track
;
498 if (track
->htile_bo
== NULL
) {
499 dev_warn(p
->dev
, "%s:%d htile enabled without htile surface 0x%08x\n",
500 __func__
, __LINE__
, track
->db_z_info
);
504 if (G_028ABC_LINEAR(track
->htile_surface
)) {
505 /* pitch must be 16 htiles aligned == 16 * 8 pixel aligned */
506 nbx
= round_up(nbx
, 16 * 8);
507 /* height is npipes htiles aligned == npipes * 8 pixel aligned */
508 nby
= round_up(nby
, track
->npipes
* 8);
510 switch (track
->npipes
) {
512 nbx
= round_up(nbx
, 64 * 8);
513 nby
= round_up(nby
, 64 * 8);
516 nbx
= round_up(nbx
, 64 * 8);
517 nby
= round_up(nby
, 32 * 8);
520 nbx
= round_up(nbx
, 32 * 8);
521 nby
= round_up(nby
, 32 * 8);
524 nbx
= round_up(nbx
, 32 * 8);
525 nby
= round_up(nby
, 16 * 8);
528 dev_warn(p
->dev
, "%s:%d invalid num pipes %d\n",
529 __func__
, __LINE__
, track
->npipes
);
533 /* compute number of htile */
536 size
= nbx
* nby
* 4;
537 size
+= track
->htile_offset
;
539 if (size
> radeon_bo_size(track
->htile_bo
)) {
540 dev_warn(p
->dev
, "%s:%d htile surface too small %ld for %ld (%d %d)\n",
541 __func__
, __LINE__
, radeon_bo_size(track
->htile_bo
),
548 static int evergreen_cs_track_validate_stencil(struct radeon_cs_parser
*p
)
550 struct evergreen_cs_track
*track
= p
->track
;
551 struct eg_surface surf
;
552 unsigned pitch
, slice
, mslice
;
553 unsigned long offset
;
556 mslice
= G_028008_SLICE_MAX(track
->db_depth_view
) + 1;
557 pitch
= G_028058_PITCH_TILE_MAX(track
->db_depth_size
);
558 slice
= track
->db_depth_slice
;
559 surf
.nbx
= (pitch
+ 1) * 8;
560 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
561 surf
.mode
= G_028040_ARRAY_MODE(track
->db_z_info
);
562 surf
.format
= G_028044_FORMAT(track
->db_s_info
);
563 surf
.tsplit
= G_028044_TILE_SPLIT(track
->db_s_info
);
564 surf
.nbanks
= G_028040_NUM_BANKS(track
->db_z_info
);
565 surf
.bankw
= G_028040_BANK_WIDTH(track
->db_z_info
);
566 surf
.bankh
= G_028040_BANK_HEIGHT(track
->db_z_info
);
567 surf
.mtilea
= G_028040_MACRO_TILE_ASPECT(track
->db_z_info
);
570 if (surf
.format
!= 1) {
571 dev_warn(p
->dev
, "%s:%d stencil invalid format %d\n",
572 __func__
, __LINE__
, surf
.format
);
575 /* replace by color format so we can use same code */
576 surf
.format
= V_028C70_COLOR_8
;
578 r
= evergreen_surface_value_conv_check(p
, &surf
, "stencil");
583 r
= evergreen_surface_check(p
, &surf
, NULL
);
585 /* old userspace doesn't compute proper depth/stencil alignment
586 * check that alignment against a bigger byte per elements and
587 * only report if that alignment is wrong too.
589 surf
.format
= V_028C70_COLOR_8_8_8_8
;
590 r
= evergreen_surface_check(p
, &surf
, "stencil");
592 dev_warn(p
->dev
, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
593 __func__
, __LINE__
, track
->db_depth_size
,
594 track
->db_depth_slice
, track
->db_s_info
, track
->db_z_info
);
599 offset
= track
->db_s_read_offset
<< 8;
600 if (offset
& (surf
.base_align
- 1)) {
601 dev_warn(p
->dev
, "%s:%d stencil read bo base %ld not aligned with %ld\n",
602 __func__
, __LINE__
, offset
, surf
.base_align
);
605 offset
+= surf
.layer_size
* mslice
;
606 if (offset
> radeon_bo_size(track
->db_s_read_bo
)) {
607 dev_warn(p
->dev
, "%s:%d stencil read bo too small (layer size %d, "
608 "offset %ld, max layer %d, bo size %ld)\n",
609 __func__
, __LINE__
, surf
.layer_size
,
610 (unsigned long)track
->db_s_read_offset
<< 8, mslice
,
611 radeon_bo_size(track
->db_s_read_bo
));
612 dev_warn(p
->dev
, "%s:%d stencil invalid (0x%08x 0x%08x 0x%08x 0x%08x)\n",
613 __func__
, __LINE__
, track
->db_depth_size
,
614 track
->db_depth_slice
, track
->db_s_info
, track
->db_z_info
);
618 offset
= track
->db_s_write_offset
<< 8;
619 if (offset
& (surf
.base_align
- 1)) {
620 dev_warn(p
->dev
, "%s:%d stencil write bo base %ld not aligned with %ld\n",
621 __func__
, __LINE__
, offset
, surf
.base_align
);
624 offset
+= surf
.layer_size
* mslice
;
625 if (offset
> radeon_bo_size(track
->db_s_write_bo
)) {
626 dev_warn(p
->dev
, "%s:%d stencil write bo too small (layer size %d, "
627 "offset %ld, max layer %d, bo size %ld)\n",
628 __func__
, __LINE__
, surf
.layer_size
,
629 (unsigned long)track
->db_s_write_offset
<< 8, mslice
,
630 radeon_bo_size(track
->db_s_write_bo
));
635 if (G_028040_TILE_SURFACE_ENABLE(track
->db_z_info
)) {
636 r
= evergreen_cs_track_validate_htile(p
, surf
.nbx
, surf
.nby
);
645 static int evergreen_cs_track_validate_depth(struct radeon_cs_parser
*p
)
647 struct evergreen_cs_track
*track
= p
->track
;
648 struct eg_surface surf
;
649 unsigned pitch
, slice
, mslice
;
650 unsigned long offset
;
653 mslice
= G_028008_SLICE_MAX(track
->db_depth_view
) + 1;
654 pitch
= G_028058_PITCH_TILE_MAX(track
->db_depth_size
);
655 slice
= track
->db_depth_slice
;
656 surf
.nbx
= (pitch
+ 1) * 8;
657 surf
.nby
= ((slice
+ 1) * 64) / surf
.nbx
;
658 surf
.mode
= G_028040_ARRAY_MODE(track
->db_z_info
);
659 surf
.format
= G_028040_FORMAT(track
->db_z_info
);
660 surf
.tsplit
= G_028040_TILE_SPLIT(track
->db_z_info
);
661 surf
.nbanks
= G_028040_NUM_BANKS(track
->db_z_info
);
662 surf
.bankw
= G_028040_BANK_WIDTH(track
->db_z_info
);
663 surf
.bankh
= G_028040_BANK_HEIGHT(track
->db_z_info
);
664 surf
.mtilea
= G_028040_MACRO_TILE_ASPECT(track
->db_z_info
);
667 switch (surf
.format
) {
669 surf
.format
= V_028C70_COLOR_16
;
672 case V_028040_Z_32_FLOAT
:
673 surf
.format
= V_028C70_COLOR_8_8_8_8
;
676 dev_warn(p
->dev
, "%s:%d depth invalid format %d\n",
677 __func__
, __LINE__
, surf
.format
);
681 r
= evergreen_surface_value_conv_check(p
, &surf
, "depth");
683 dev_warn(p
->dev
, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
684 __func__
, __LINE__
, track
->db_depth_size
,
685 track
->db_depth_slice
, track
->db_z_info
);
689 r
= evergreen_surface_check(p
, &surf
, "depth");
691 dev_warn(p
->dev
, "%s:%d depth invalid (0x%08x 0x%08x 0x%08x)\n",
692 __func__
, __LINE__
, track
->db_depth_size
,
693 track
->db_depth_slice
, track
->db_z_info
);
697 offset
= track
->db_z_read_offset
<< 8;
698 if (offset
& (surf
.base_align
- 1)) {
699 dev_warn(p
->dev
, "%s:%d stencil read bo base %ld not aligned with %ld\n",
700 __func__
, __LINE__
, offset
, surf
.base_align
);
703 offset
+= surf
.layer_size
* mslice
;
704 if (offset
> radeon_bo_size(track
->db_z_read_bo
)) {
705 dev_warn(p
->dev
, "%s:%d depth read bo too small (layer size %d, "
706 "offset %ld, max layer %d, bo size %ld)\n",
707 __func__
, __LINE__
, surf
.layer_size
,
708 (unsigned long)track
->db_z_read_offset
<< 8, mslice
,
709 radeon_bo_size(track
->db_z_read_bo
));
713 offset
= track
->db_z_write_offset
<< 8;
714 if (offset
& (surf
.base_align
- 1)) {
715 dev_warn(p
->dev
, "%s:%d stencil write bo base %ld not aligned with %ld\n",
716 __func__
, __LINE__
, offset
, surf
.base_align
);
719 offset
+= surf
.layer_size
* mslice
;
720 if (offset
> radeon_bo_size(track
->db_z_write_bo
)) {
721 dev_warn(p
->dev
, "%s:%d depth write bo too small (layer size %d, "
722 "offset %ld, max layer %d, bo size %ld)\n",
723 __func__
, __LINE__
, surf
.layer_size
,
724 (unsigned long)track
->db_z_write_offset
<< 8, mslice
,
725 radeon_bo_size(track
->db_z_write_bo
));
730 if (G_028040_TILE_SURFACE_ENABLE(track
->db_z_info
)) {
731 r
= evergreen_cs_track_validate_htile(p
, surf
.nbx
, surf
.nby
);
740 static int evergreen_cs_track_validate_texture(struct radeon_cs_parser
*p
,
741 struct radeon_bo
*texture
,
742 struct radeon_bo
*mipmap
,
745 struct eg_surface surf
;
746 unsigned long toffset
, moffset
;
747 unsigned dim
, llevel
, mslice
, width
, height
, depth
, i
;
751 texdw
[0] = radeon_get_ib_value(p
, idx
+ 0);
752 texdw
[1] = radeon_get_ib_value(p
, idx
+ 1);
753 texdw
[2] = radeon_get_ib_value(p
, idx
+ 2);
754 texdw
[3] = radeon_get_ib_value(p
, idx
+ 3);
755 texdw
[4] = radeon_get_ib_value(p
, idx
+ 4);
756 texdw
[5] = radeon_get_ib_value(p
, idx
+ 5);
757 texdw
[6] = radeon_get_ib_value(p
, idx
+ 6);
758 texdw
[7] = radeon_get_ib_value(p
, idx
+ 7);
759 dim
= G_030000_DIM(texdw
[0]);
760 llevel
= G_030014_LAST_LEVEL(texdw
[5]);
761 mslice
= G_030014_LAST_ARRAY(texdw
[5]) + 1;
762 width
= G_030000_TEX_WIDTH(texdw
[0]) + 1;
763 height
= G_030004_TEX_HEIGHT(texdw
[1]) + 1;
764 depth
= G_030004_TEX_DEPTH(texdw
[1]) + 1;
765 surf
.format
= G_03001C_DATA_FORMAT(texdw
[7]);
766 surf
.nbx
= (G_030000_PITCH(texdw
[0]) + 1) * 8;
767 surf
.nbx
= r600_fmt_get_nblocksx(surf
.format
, surf
.nbx
);
768 surf
.nby
= r600_fmt_get_nblocksy(surf
.format
, height
);
769 surf
.mode
= G_030004_ARRAY_MODE(texdw
[1]);
770 surf
.tsplit
= G_030018_TILE_SPLIT(texdw
[6]);
771 surf
.nbanks
= G_03001C_NUM_BANKS(texdw
[7]);
772 surf
.bankw
= G_03001C_BANK_WIDTH(texdw
[7]);
773 surf
.bankh
= G_03001C_BANK_HEIGHT(texdw
[7]);
774 surf
.mtilea
= G_03001C_MACRO_TILE_ASPECT(texdw
[7]);
776 toffset
= texdw
[2] << 8;
777 moffset
= texdw
[3] << 8;
779 if (!r600_fmt_is_valid_texture(surf
.format
, p
->family
)) {
780 dev_warn(p
->dev
, "%s:%d texture invalid format %d\n",
781 __func__
, __LINE__
, surf
.format
);
785 case V_030000_SQ_TEX_DIM_1D
:
786 case V_030000_SQ_TEX_DIM_2D
:
787 case V_030000_SQ_TEX_DIM_CUBEMAP
:
788 case V_030000_SQ_TEX_DIM_1D_ARRAY
:
789 case V_030000_SQ_TEX_DIM_2D_ARRAY
:
792 case V_030000_SQ_TEX_DIM_2D_MSAA
:
793 case V_030000_SQ_TEX_DIM_2D_ARRAY_MSAA
:
794 surf
.nsamples
= 1 << llevel
;
798 case V_030000_SQ_TEX_DIM_3D
:
801 dev_warn(p
->dev
, "%s:%d texture invalid dimension %d\n",
802 __func__
, __LINE__
, dim
);
806 r
= evergreen_surface_value_conv_check(p
, &surf
, "texture");
812 evergreen_surface_check(p
, &surf
, NULL
);
813 surf
.nby
= ALIGN(surf
.nby
, surf
.halign
);
815 r
= evergreen_surface_check(p
, &surf
, "texture");
817 dev_warn(p
->dev
, "%s:%d texture invalid 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x 0x%08x\n",
818 __func__
, __LINE__
, texdw
[0], texdw
[1], texdw
[4],
819 texdw
[5], texdw
[6], texdw
[7]);
823 /* check texture size */
824 if (toffset
& (surf
.base_align
- 1)) {
825 dev_warn(p
->dev
, "%s:%d texture bo base %ld not aligned with %ld\n",
826 __func__
, __LINE__
, toffset
, surf
.base_align
);
829 if (moffset
& (surf
.base_align
- 1)) {
830 dev_warn(p
->dev
, "%s:%d mipmap bo base %ld not aligned with %ld\n",
831 __func__
, __LINE__
, moffset
, surf
.base_align
);
834 if (dim
== SQ_TEX_DIM_3D
) {
835 toffset
+= surf
.layer_size
* depth
;
837 toffset
+= surf
.layer_size
* mslice
;
839 if (toffset
> radeon_bo_size(texture
)) {
840 dev_warn(p
->dev
, "%s:%d texture bo too small (layer size %d, "
841 "offset %ld, max layer %d, depth %d, bo size %ld) (%d %d)\n",
842 __func__
, __LINE__
, surf
.layer_size
,
843 (unsigned long)texdw
[2] << 8, mslice
,
844 depth
, radeon_bo_size(texture
),
851 dev_warn(p
->dev
, "%s:%i got NULL MIP_ADDRESS relocation\n",
855 return 0; /* everything's ok */
859 /* check mipmap size */
860 for (i
= 1; i
<= llevel
; i
++) {
863 w
= r600_mip_minify(width
, i
);
864 h
= r600_mip_minify(height
, i
);
865 d
= r600_mip_minify(depth
, i
);
866 surf
.nbx
= r600_fmt_get_nblocksx(surf
.format
, w
);
867 surf
.nby
= r600_fmt_get_nblocksy(surf
.format
, h
);
870 case ARRAY_2D_TILED_THIN1
:
871 if (surf
.nbx
< surf
.palign
|| surf
.nby
< surf
.halign
) {
872 surf
.mode
= ARRAY_1D_TILED_THIN1
;
874 /* recompute alignment */
875 evergreen_surface_check(p
, &surf
, NULL
);
877 case ARRAY_LINEAR_GENERAL
:
878 case ARRAY_LINEAR_ALIGNED
:
879 case ARRAY_1D_TILED_THIN1
:
882 dev_warn(p
->dev
, "%s:%d invalid array mode %d\n",
883 __func__
, __LINE__
, surf
.mode
);
886 surf
.nbx
= ALIGN(surf
.nbx
, surf
.palign
);
887 surf
.nby
= ALIGN(surf
.nby
, surf
.halign
);
889 r
= evergreen_surface_check(p
, &surf
, "mipmap");
894 if (dim
== SQ_TEX_DIM_3D
) {
895 moffset
+= surf
.layer_size
* d
;
897 moffset
+= surf
.layer_size
* mslice
;
899 if (moffset
> radeon_bo_size(mipmap
)) {
900 dev_warn(p
->dev
, "%s:%d mipmap [%d] bo too small (layer size %d, "
901 "offset %ld, coffset %ld, max layer %d, depth %d, "
902 "bo size %ld) level0 (%d %d %d)\n",
903 __func__
, __LINE__
, i
, surf
.layer_size
,
904 (unsigned long)texdw
[3] << 8, moffset
, mslice
,
905 d
, radeon_bo_size(mipmap
),
906 width
, height
, depth
);
907 dev_warn(p
->dev
, "%s:%d problematic surf: (%d %d) (%d %d %d %d %d %d %d)\n",
908 __func__
, __LINE__
, surf
.nbx
, surf
.nby
,
909 surf
.mode
, surf
.bpe
, surf
.nsamples
,
910 surf
.bankw
, surf
.bankh
,
911 surf
.tsplit
, surf
.mtilea
);
919 static int evergreen_cs_track_check(struct radeon_cs_parser
*p
)
921 struct evergreen_cs_track
*track
= p
->track
;
924 unsigned buffer_mask
= 0;
926 /* check streamout */
927 if (track
->streamout_dirty
&& track
->vgt_strmout_config
) {
928 for (i
= 0; i
< 4; i
++) {
929 if (track
->vgt_strmout_config
& (1 << i
)) {
930 buffer_mask
|= (track
->vgt_strmout_buffer_config
>> (i
* 4)) & 0xf;
934 for (i
= 0; i
< 4; i
++) {
935 if (buffer_mask
& (1 << i
)) {
936 if (track
->vgt_strmout_bo
[i
]) {
937 u64 offset
= (u64
)track
->vgt_strmout_bo_offset
[i
] +
938 (u64
)track
->vgt_strmout_size
[i
];
939 if (offset
> radeon_bo_size(track
->vgt_strmout_bo
[i
])) {
940 DRM_ERROR("streamout %d bo too small: 0x%llx, 0x%lx\n",
942 radeon_bo_size(track
->vgt_strmout_bo
[i
]));
946 dev_warn(p
->dev
, "No buffer for streamout %d\n", i
);
951 track
->streamout_dirty
= false;
954 if (track
->sx_misc_kill_all_prims
)
957 /* check that we have a cb for each enabled target
959 if (track
->cb_dirty
) {
960 tmp
= track
->cb_target_mask
;
961 for (i
= 0; i
< 8; i
++) {
962 if ((tmp
>> (i
* 4)) & 0xF) {
963 /* at least one component is enabled */
964 if (track
->cb_color_bo
[i
] == NULL
) {
965 dev_warn(p
->dev
, "%s:%d mask 0x%08X | 0x%08X no cb for %d\n",
966 __func__
, __LINE__
, track
->cb_target_mask
, track
->cb_shader_mask
, i
);
970 r
= evergreen_cs_track_validate_cb(p
, i
);
976 track
->cb_dirty
= false;
979 if (track
->db_dirty
) {
980 /* Check stencil buffer */
981 if (G_028044_FORMAT(track
->db_s_info
) != V_028044_STENCIL_INVALID
&&
982 G_028800_STENCIL_ENABLE(track
->db_depth_control
)) {
983 r
= evergreen_cs_track_validate_stencil(p
);
987 /* Check depth buffer */
988 if (G_028040_FORMAT(track
->db_z_info
) != V_028040_Z_INVALID
&&
989 G_028800_Z_ENABLE(track
->db_depth_control
)) {
990 r
= evergreen_cs_track_validate_depth(p
);
994 track
->db_dirty
= false;
1001 * evergreen_cs_packet_parse() - parse cp packet and point ib index to next packet
1002 * @parser: parser structure holding parsing context.
1003 * @pkt: where to store packet informations
1005 * Assume that chunk_ib_index is properly set. Will return -EINVAL
1006 * if packet is bigger than remaining ib size. or if packets is unknown.
1008 int evergreen_cs_packet_parse(struct radeon_cs_parser
*p
,
1009 struct radeon_cs_packet
*pkt
,
1012 struct radeon_cs_chunk
*ib_chunk
= &p
->chunks
[p
->chunk_ib_idx
];
1015 if (idx
>= ib_chunk
->length_dw
) {
1016 DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
1017 idx
, ib_chunk
->length_dw
);
1020 header
= radeon_get_ib_value(p
, idx
);
1022 pkt
->type
= CP_PACKET_GET_TYPE(header
);
1023 pkt
->count
= CP_PACKET_GET_COUNT(header
);
1024 pkt
->one_reg_wr
= 0;
1025 switch (pkt
->type
) {
1027 pkt
->reg
= CP_PACKET0_GET_REG(header
);
1030 pkt
->opcode
= CP_PACKET3_GET_OPCODE(header
);
1036 DRM_ERROR("Unknown packet type %d at %d !\n", pkt
->type
, idx
);
1039 if ((pkt
->count
+ 1 + pkt
->idx
) >= ib_chunk
->length_dw
) {
1040 DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
1041 pkt
->idx
, pkt
->type
, pkt
->count
, ib_chunk
->length_dw
);
1048 * evergreen_cs_packet_next_reloc() - parse next packet which should be reloc packet3
1049 * @parser: parser structure holding parsing context.
1050 * @data: pointer to relocation data
1051 * @offset_start: starting offset
1052 * @offset_mask: offset mask (to align start offset on)
1053 * @reloc: reloc informations
1055 * Check next packet is relocation packet3, do bo validation and compute
1056 * GPU offset using the provided start.
1058 static int evergreen_cs_packet_next_reloc(struct radeon_cs_parser
*p
,
1059 struct radeon_cs_reloc
**cs_reloc
)
1061 struct radeon_cs_chunk
*relocs_chunk
;
1062 struct radeon_cs_packet p3reloc
;
1066 if (p
->chunk_relocs_idx
== -1) {
1067 DRM_ERROR("No relocation chunk !\n");
1071 relocs_chunk
= &p
->chunks
[p
->chunk_relocs_idx
];
1072 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
);
1076 p
->idx
+= p3reloc
.count
+ 2;
1077 if (p3reloc
.type
!= PACKET_TYPE3
|| p3reloc
.opcode
!= PACKET3_NOP
) {
1078 DRM_ERROR("No packet3 for relocation for packet at %d.\n",
1082 idx
= radeon_get_ib_value(p
, p3reloc
.idx
+ 1);
1083 if (idx
>= relocs_chunk
->length_dw
) {
1084 DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
1085 idx
, relocs_chunk
->length_dw
);
1088 /* FIXME: we assume reloc size is 4 dwords */
1089 *cs_reloc
= p
->relocs_ptr
[(idx
/ 4)];
1094 * evergreen_cs_packet_next_is_pkt3_nop() - test if the next packet is NOP
1095 * @p: structure holding the parser context.
1097 * Check if the next packet is a relocation packet3.
1099 static bool evergreen_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser
*p
)
1101 struct radeon_cs_packet p3reloc
;
1104 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
);
1108 if (p3reloc
.type
!= PACKET_TYPE3
|| p3reloc
.opcode
!= PACKET3_NOP
) {
1115 * evergreen_cs_packet_next_vline() - parse userspace VLINE packet
1116 * @parser: parser structure holding parsing context.
1118 * Userspace sends a special sequence for VLINE waits.
1119 * PACKET0 - VLINE_START_END + value
1120 * PACKET3 - WAIT_REG_MEM poll vline status reg
1121 * RELOC (P3) - crtc_id in reloc.
1123 * This function parses this and relocates the VLINE START END
1124 * and WAIT_REG_MEM packets to the correct crtc.
1125 * It also detects a switched off crtc and nulls out the
1126 * wait in that case.
1128 static int evergreen_cs_packet_parse_vline(struct radeon_cs_parser
*p
)
1130 struct drm_mode_object
*obj
;
1131 struct drm_crtc
*crtc
;
1132 struct radeon_crtc
*radeon_crtc
;
1133 struct radeon_cs_packet p3reloc
, wait_reg_mem
;
1136 uint32_t header
, h_idx
, reg
, wait_reg_mem_info
;
1137 volatile uint32_t *ib
;
1141 /* parse the WAIT_REG_MEM */
1142 r
= evergreen_cs_packet_parse(p
, &wait_reg_mem
, p
->idx
);
1146 /* check its a WAIT_REG_MEM */
1147 if (wait_reg_mem
.type
!= PACKET_TYPE3
||
1148 wait_reg_mem
.opcode
!= PACKET3_WAIT_REG_MEM
) {
1149 DRM_ERROR("vline wait missing WAIT_REG_MEM segment\n");
1153 wait_reg_mem_info
= radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 1);
1154 /* bit 4 is reg (0) or mem (1) */
1155 if (wait_reg_mem_info
& 0x10) {
1156 DRM_ERROR("vline WAIT_REG_MEM waiting on MEM rather than REG\n");
1159 /* waiting for value to be equal */
1160 if ((wait_reg_mem_info
& 0x7) != 0x3) {
1161 DRM_ERROR("vline WAIT_REG_MEM function not equal\n");
1164 if ((radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 2) << 2) != EVERGREEN_VLINE_STATUS
) {
1165 DRM_ERROR("vline WAIT_REG_MEM bad reg\n");
1169 if (radeon_get_ib_value(p
, wait_reg_mem
.idx
+ 5) != EVERGREEN_VLINE_STAT
) {
1170 DRM_ERROR("vline WAIT_REG_MEM bad bit mask\n");
1174 /* jump over the NOP */
1175 r
= evergreen_cs_packet_parse(p
, &p3reloc
, p
->idx
+ wait_reg_mem
.count
+ 2);
1180 p
->idx
+= wait_reg_mem
.count
+ 2;
1181 p
->idx
+= p3reloc
.count
+ 2;
1183 header
= radeon_get_ib_value(p
, h_idx
);
1184 crtc_id
= radeon_get_ib_value(p
, h_idx
+ 2 + 7 + 1);
1185 reg
= CP_PACKET0_GET_REG(header
);
1186 obj
= drm_mode_object_find(p
->rdev
->ddev
, crtc_id
, DRM_MODE_OBJECT_CRTC
);
1188 DRM_ERROR("cannot find crtc %d\n", crtc_id
);
1191 crtc
= obj_to_crtc(obj
);
1192 radeon_crtc
= to_radeon_crtc(crtc
);
1193 crtc_id
= radeon_crtc
->crtc_id
;
1195 if (!crtc
->enabled
) {
1196 /* if the CRTC isn't enabled - we need to nop out the WAIT_REG_MEM */
1197 ib
[h_idx
+ 2] = PACKET2(0);
1198 ib
[h_idx
+ 3] = PACKET2(0);
1199 ib
[h_idx
+ 4] = PACKET2(0);
1200 ib
[h_idx
+ 5] = PACKET2(0);
1201 ib
[h_idx
+ 6] = PACKET2(0);
1202 ib
[h_idx
+ 7] = PACKET2(0);
1203 ib
[h_idx
+ 8] = PACKET2(0);
1206 case EVERGREEN_VLINE_START_END
:
1207 header
&= ~R600_CP_PACKET0_REG_MASK
;
1208 header
|= (EVERGREEN_VLINE_START_END
+ radeon_crtc
->crtc_offset
) >> 2;
1210 ib
[h_idx
+ 4] = (EVERGREEN_VLINE_STATUS
+ radeon_crtc
->crtc_offset
) >> 2;
1213 DRM_ERROR("unknown crtc reloc\n");
1220 static int evergreen_packet0_check(struct radeon_cs_parser
*p
,
1221 struct radeon_cs_packet
*pkt
,
1222 unsigned idx
, unsigned reg
)
1227 case EVERGREEN_VLINE_START_END
:
1228 r
= evergreen_cs_packet_parse_vline(p
);
1230 DRM_ERROR("No reloc for ib[%d]=0x%04X\n",
1236 printk(KERN_ERR
"Forbidden register 0x%04X in cs at %d\n",
1243 static int evergreen_cs_parse_packet0(struct radeon_cs_parser
*p
,
1244 struct radeon_cs_packet
*pkt
)
1252 for (i
= 0; i
<= pkt
->count
; i
++, idx
++, reg
+= 4) {
1253 r
= evergreen_packet0_check(p
, pkt
, idx
, reg
);
1262 * evergreen_cs_check_reg() - check if register is authorized or not
1263 * @parser: parser structure holding parsing context
1264 * @reg: register we are testing
1265 * @idx: index into the cs buffer
1267 * This function will test against evergreen_reg_safe_bm and return 0
1268 * if register is safe. If register is not flag as safe this function
1269 * will test it against a list of register needind special handling.
1271 static int evergreen_cs_check_reg(struct radeon_cs_parser
*p
, u32 reg
, u32 idx
)
1273 struct evergreen_cs_track
*track
= (struct evergreen_cs_track
*)p
->track
;
1274 struct radeon_cs_reloc
*reloc
;
1279 if (p
->rdev
->family
>= CHIP_CAYMAN
)
1280 last_reg
= ARRAY_SIZE(cayman_reg_safe_bm
);
1282 last_reg
= ARRAY_SIZE(evergreen_reg_safe_bm
);
1285 if (i
>= last_reg
) {
1286 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1289 m
= 1 << ((reg
>> 2) & 31);
1290 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1291 if (!(cayman_reg_safe_bm
[i
] & m
))
1294 if (!(evergreen_reg_safe_bm
[i
] & m
))
1299 /* force following reg to 0 in an attempt to disable out buffer
1300 * which will need us to better understand how it works to perform
1301 * security check on it (Jerome)
1303 case SQ_ESGS_RING_SIZE
:
1304 case SQ_GSVS_RING_SIZE
:
1305 case SQ_ESTMP_RING_SIZE
:
1306 case SQ_GSTMP_RING_SIZE
:
1307 case SQ_HSTMP_RING_SIZE
:
1308 case SQ_LSTMP_RING_SIZE
:
1309 case SQ_PSTMP_RING_SIZE
:
1310 case SQ_VSTMP_RING_SIZE
:
1311 case SQ_ESGS_RING_ITEMSIZE
:
1312 case SQ_ESTMP_RING_ITEMSIZE
:
1313 case SQ_GSTMP_RING_ITEMSIZE
:
1314 case SQ_GSVS_RING_ITEMSIZE
:
1315 case SQ_GS_VERT_ITEMSIZE
:
1316 case SQ_GS_VERT_ITEMSIZE_1
:
1317 case SQ_GS_VERT_ITEMSIZE_2
:
1318 case SQ_GS_VERT_ITEMSIZE_3
:
1319 case SQ_GSVS_RING_OFFSET_1
:
1320 case SQ_GSVS_RING_OFFSET_2
:
1321 case SQ_GSVS_RING_OFFSET_3
:
1322 case SQ_HSTMP_RING_ITEMSIZE
:
1323 case SQ_LSTMP_RING_ITEMSIZE
:
1324 case SQ_PSTMP_RING_ITEMSIZE
:
1325 case SQ_VSTMP_RING_ITEMSIZE
:
1326 case VGT_TF_RING_SIZE
:
1327 /* get value to populate the IB don't remove */
1328 /*tmp =radeon_get_ib_value(p, idx);
1331 case SQ_ESGS_RING_BASE
:
1332 case SQ_GSVS_RING_BASE
:
1333 case SQ_ESTMP_RING_BASE
:
1334 case SQ_GSTMP_RING_BASE
:
1335 case SQ_HSTMP_RING_BASE
:
1336 case SQ_LSTMP_RING_BASE
:
1337 case SQ_PSTMP_RING_BASE
:
1338 case SQ_VSTMP_RING_BASE
:
1339 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1341 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1345 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1347 case DB_DEPTH_CONTROL
:
1348 track
->db_depth_control
= radeon_get_ib_value(p
, idx
);
1349 track
->db_dirty
= true;
1351 case CAYMAN_DB_EQAA
:
1352 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1353 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1358 case CAYMAN_DB_DEPTH_INFO
:
1359 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1360 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1366 track
->db_z_info
= radeon_get_ib_value(p
, idx
);
1367 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1368 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1370 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1374 ib
[idx
] &= ~Z_ARRAY_MODE(0xf);
1375 track
->db_z_info
&= ~Z_ARRAY_MODE(0xf);
1376 ib
[idx
] |= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1377 track
->db_z_info
|= Z_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1378 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1379 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1381 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1382 &bankw
, &bankh
, &mtaspect
,
1384 ib
[idx
] |= DB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1385 ib
[idx
] |= DB_TILE_SPLIT(tile_split
) |
1386 DB_BANK_WIDTH(bankw
) |
1387 DB_BANK_HEIGHT(bankh
) |
1388 DB_MACRO_TILE_ASPECT(mtaspect
);
1391 track
->db_dirty
= true;
1393 case DB_STENCIL_INFO
:
1394 track
->db_s_info
= radeon_get_ib_value(p
, idx
);
1395 track
->db_dirty
= true;
1398 track
->db_depth_view
= radeon_get_ib_value(p
, idx
);
1399 track
->db_dirty
= true;
1402 track
->db_depth_size
= radeon_get_ib_value(p
, idx
);
1403 track
->db_dirty
= true;
1405 case R_02805C_DB_DEPTH_SLICE
:
1406 track
->db_depth_slice
= radeon_get_ib_value(p
, idx
);
1407 track
->db_dirty
= true;
1409 case DB_Z_READ_BASE
:
1410 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1412 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1416 track
->db_z_read_offset
= radeon_get_ib_value(p
, idx
);
1417 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1418 track
->db_z_read_bo
= reloc
->robj
;
1419 track
->db_dirty
= true;
1421 case DB_Z_WRITE_BASE
:
1422 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1424 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1428 track
->db_z_write_offset
= radeon_get_ib_value(p
, idx
);
1429 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1430 track
->db_z_write_bo
= reloc
->robj
;
1431 track
->db_dirty
= true;
1433 case DB_STENCIL_READ_BASE
:
1434 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1436 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1440 track
->db_s_read_offset
= radeon_get_ib_value(p
, idx
);
1441 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1442 track
->db_s_read_bo
= reloc
->robj
;
1443 track
->db_dirty
= true;
1445 case DB_STENCIL_WRITE_BASE
:
1446 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1448 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1452 track
->db_s_write_offset
= radeon_get_ib_value(p
, idx
);
1453 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1454 track
->db_s_write_bo
= reloc
->robj
;
1455 track
->db_dirty
= true;
1457 case VGT_STRMOUT_CONFIG
:
1458 track
->vgt_strmout_config
= radeon_get_ib_value(p
, idx
);
1459 track
->streamout_dirty
= true;
1461 case VGT_STRMOUT_BUFFER_CONFIG
:
1462 track
->vgt_strmout_buffer_config
= radeon_get_ib_value(p
, idx
);
1463 track
->streamout_dirty
= true;
1465 case VGT_STRMOUT_BUFFER_BASE_0
:
1466 case VGT_STRMOUT_BUFFER_BASE_1
:
1467 case VGT_STRMOUT_BUFFER_BASE_2
:
1468 case VGT_STRMOUT_BUFFER_BASE_3
:
1469 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1471 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1475 tmp
= (reg
- VGT_STRMOUT_BUFFER_BASE_0
) / 16;
1476 track
->vgt_strmout_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
) << 8;
1477 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1478 track
->vgt_strmout_bo
[tmp
] = reloc
->robj
;
1479 track
->streamout_dirty
= true;
1481 case VGT_STRMOUT_BUFFER_SIZE_0
:
1482 case VGT_STRMOUT_BUFFER_SIZE_1
:
1483 case VGT_STRMOUT_BUFFER_SIZE_2
:
1484 case VGT_STRMOUT_BUFFER_SIZE_3
:
1485 tmp
= (reg
- VGT_STRMOUT_BUFFER_SIZE_0
) / 16;
1486 /* size in register is DWs, convert to bytes */
1487 track
->vgt_strmout_size
[tmp
] = radeon_get_ib_value(p
, idx
) * 4;
1488 track
->streamout_dirty
= true;
1491 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1493 dev_warn(p
->dev
, "missing reloc for CP_COHER_BASE "
1497 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1498 case CB_TARGET_MASK
:
1499 track
->cb_target_mask
= radeon_get_ib_value(p
, idx
);
1500 track
->cb_dirty
= true;
1502 case CB_SHADER_MASK
:
1503 track
->cb_shader_mask
= radeon_get_ib_value(p
, idx
);
1504 track
->cb_dirty
= true;
1506 case PA_SC_AA_CONFIG
:
1507 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1508 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1512 tmp
= radeon_get_ib_value(p
, idx
) & MSAA_NUM_SAMPLES_MASK
;
1513 track
->nsamples
= 1 << tmp
;
1515 case CAYMAN_PA_SC_AA_CONFIG
:
1516 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1517 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1521 tmp
= radeon_get_ib_value(p
, idx
) & CAYMAN_MSAA_NUM_SAMPLES_MASK
;
1522 track
->nsamples
= 1 << tmp
;
1524 case CB_COLOR0_VIEW
:
1525 case CB_COLOR1_VIEW
:
1526 case CB_COLOR2_VIEW
:
1527 case CB_COLOR3_VIEW
:
1528 case CB_COLOR4_VIEW
:
1529 case CB_COLOR5_VIEW
:
1530 case CB_COLOR6_VIEW
:
1531 case CB_COLOR7_VIEW
:
1532 tmp
= (reg
- CB_COLOR0_VIEW
) / 0x3c;
1533 track
->cb_color_view
[tmp
] = radeon_get_ib_value(p
, idx
);
1534 track
->cb_dirty
= true;
1536 case CB_COLOR8_VIEW
:
1537 case CB_COLOR9_VIEW
:
1538 case CB_COLOR10_VIEW
:
1539 case CB_COLOR11_VIEW
:
1540 tmp
= ((reg
- CB_COLOR8_VIEW
) / 0x1c) + 8;
1541 track
->cb_color_view
[tmp
] = radeon_get_ib_value(p
, idx
);
1542 track
->cb_dirty
= true;
1544 case CB_COLOR0_INFO
:
1545 case CB_COLOR1_INFO
:
1546 case CB_COLOR2_INFO
:
1547 case CB_COLOR3_INFO
:
1548 case CB_COLOR4_INFO
:
1549 case CB_COLOR5_INFO
:
1550 case CB_COLOR6_INFO
:
1551 case CB_COLOR7_INFO
:
1552 tmp
= (reg
- CB_COLOR0_INFO
) / 0x3c;
1553 track
->cb_color_info
[tmp
] = radeon_get_ib_value(p
, idx
);
1554 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1555 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1557 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1561 ib
[idx
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1562 track
->cb_color_info
[tmp
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1564 track
->cb_dirty
= true;
1566 case CB_COLOR8_INFO
:
1567 case CB_COLOR9_INFO
:
1568 case CB_COLOR10_INFO
:
1569 case CB_COLOR11_INFO
:
1570 tmp
= ((reg
- CB_COLOR8_INFO
) / 0x1c) + 8;
1571 track
->cb_color_info
[tmp
] = radeon_get_ib_value(p
, idx
);
1572 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1573 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1575 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1579 ib
[idx
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1580 track
->cb_color_info
[tmp
] |= CB_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
1582 track
->cb_dirty
= true;
1584 case CB_COLOR0_PITCH
:
1585 case CB_COLOR1_PITCH
:
1586 case CB_COLOR2_PITCH
:
1587 case CB_COLOR3_PITCH
:
1588 case CB_COLOR4_PITCH
:
1589 case CB_COLOR5_PITCH
:
1590 case CB_COLOR6_PITCH
:
1591 case CB_COLOR7_PITCH
:
1592 tmp
= (reg
- CB_COLOR0_PITCH
) / 0x3c;
1593 track
->cb_color_pitch
[tmp
] = radeon_get_ib_value(p
, idx
);
1594 track
->cb_dirty
= true;
1596 case CB_COLOR8_PITCH
:
1597 case CB_COLOR9_PITCH
:
1598 case CB_COLOR10_PITCH
:
1599 case CB_COLOR11_PITCH
:
1600 tmp
= ((reg
- CB_COLOR8_PITCH
) / 0x1c) + 8;
1601 track
->cb_color_pitch
[tmp
] = radeon_get_ib_value(p
, idx
);
1602 track
->cb_dirty
= true;
1604 case CB_COLOR0_SLICE
:
1605 case CB_COLOR1_SLICE
:
1606 case CB_COLOR2_SLICE
:
1607 case CB_COLOR3_SLICE
:
1608 case CB_COLOR4_SLICE
:
1609 case CB_COLOR5_SLICE
:
1610 case CB_COLOR6_SLICE
:
1611 case CB_COLOR7_SLICE
:
1612 tmp
= (reg
- CB_COLOR0_SLICE
) / 0x3c;
1613 track
->cb_color_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1614 track
->cb_color_slice_idx
[tmp
] = idx
;
1615 track
->cb_dirty
= true;
1617 case CB_COLOR8_SLICE
:
1618 case CB_COLOR9_SLICE
:
1619 case CB_COLOR10_SLICE
:
1620 case CB_COLOR11_SLICE
:
1621 tmp
= ((reg
- CB_COLOR8_SLICE
) / 0x1c) + 8;
1622 track
->cb_color_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1623 track
->cb_color_slice_idx
[tmp
] = idx
;
1624 track
->cb_dirty
= true;
1626 case CB_COLOR0_ATTRIB
:
1627 case CB_COLOR1_ATTRIB
:
1628 case CB_COLOR2_ATTRIB
:
1629 case CB_COLOR3_ATTRIB
:
1630 case CB_COLOR4_ATTRIB
:
1631 case CB_COLOR5_ATTRIB
:
1632 case CB_COLOR6_ATTRIB
:
1633 case CB_COLOR7_ATTRIB
:
1634 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1636 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1640 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1641 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1642 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1644 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1645 &bankw
, &bankh
, &mtaspect
,
1647 ib
[idx
] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1648 ib
[idx
] |= CB_TILE_SPLIT(tile_split
) |
1649 CB_BANK_WIDTH(bankw
) |
1650 CB_BANK_HEIGHT(bankh
) |
1651 CB_MACRO_TILE_ASPECT(mtaspect
);
1654 tmp
= ((reg
- CB_COLOR0_ATTRIB
) / 0x3c);
1655 track
->cb_color_attrib
[tmp
] = ib
[idx
];
1656 track
->cb_dirty
= true;
1658 case CB_COLOR8_ATTRIB
:
1659 case CB_COLOR9_ATTRIB
:
1660 case CB_COLOR10_ATTRIB
:
1661 case CB_COLOR11_ATTRIB
:
1662 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1664 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1668 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
1669 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
1670 unsigned bankw
, bankh
, mtaspect
, tile_split
;
1672 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
1673 &bankw
, &bankh
, &mtaspect
,
1675 ib
[idx
] |= CB_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
1676 ib
[idx
] |= CB_TILE_SPLIT(tile_split
) |
1677 CB_BANK_WIDTH(bankw
) |
1678 CB_BANK_HEIGHT(bankh
) |
1679 CB_MACRO_TILE_ASPECT(mtaspect
);
1682 tmp
= ((reg
- CB_COLOR8_ATTRIB
) / 0x1c) + 8;
1683 track
->cb_color_attrib
[tmp
] = ib
[idx
];
1684 track
->cb_dirty
= true;
1686 case CB_COLOR0_FMASK
:
1687 case CB_COLOR1_FMASK
:
1688 case CB_COLOR2_FMASK
:
1689 case CB_COLOR3_FMASK
:
1690 case CB_COLOR4_FMASK
:
1691 case CB_COLOR5_FMASK
:
1692 case CB_COLOR6_FMASK
:
1693 case CB_COLOR7_FMASK
:
1694 tmp
= (reg
- CB_COLOR0_FMASK
) / 0x3c;
1695 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1697 dev_err(p
->dev
, "bad SET_CONTEXT_REG 0x%04X\n", reg
);
1700 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1701 track
->cb_color_fmask_bo
[tmp
] = reloc
->robj
;
1703 case CB_COLOR0_CMASK
:
1704 case CB_COLOR1_CMASK
:
1705 case CB_COLOR2_CMASK
:
1706 case CB_COLOR3_CMASK
:
1707 case CB_COLOR4_CMASK
:
1708 case CB_COLOR5_CMASK
:
1709 case CB_COLOR6_CMASK
:
1710 case CB_COLOR7_CMASK
:
1711 tmp
= (reg
- CB_COLOR0_CMASK
) / 0x3c;
1712 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1714 dev_err(p
->dev
, "bad SET_CONTEXT_REG 0x%04X\n", reg
);
1717 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1718 track
->cb_color_cmask_bo
[tmp
] = reloc
->robj
;
1720 case CB_COLOR0_FMASK_SLICE
:
1721 case CB_COLOR1_FMASK_SLICE
:
1722 case CB_COLOR2_FMASK_SLICE
:
1723 case CB_COLOR3_FMASK_SLICE
:
1724 case CB_COLOR4_FMASK_SLICE
:
1725 case CB_COLOR5_FMASK_SLICE
:
1726 case CB_COLOR6_FMASK_SLICE
:
1727 case CB_COLOR7_FMASK_SLICE
:
1728 tmp
= (reg
- CB_COLOR0_FMASK_SLICE
) / 0x3c;
1729 track
->cb_color_fmask_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1731 case CB_COLOR0_CMASK_SLICE
:
1732 case CB_COLOR1_CMASK_SLICE
:
1733 case CB_COLOR2_CMASK_SLICE
:
1734 case CB_COLOR3_CMASK_SLICE
:
1735 case CB_COLOR4_CMASK_SLICE
:
1736 case CB_COLOR5_CMASK_SLICE
:
1737 case CB_COLOR6_CMASK_SLICE
:
1738 case CB_COLOR7_CMASK_SLICE
:
1739 tmp
= (reg
- CB_COLOR0_CMASK_SLICE
) / 0x3c;
1740 track
->cb_color_cmask_slice
[tmp
] = radeon_get_ib_value(p
, idx
);
1742 case CB_COLOR0_BASE
:
1743 case CB_COLOR1_BASE
:
1744 case CB_COLOR2_BASE
:
1745 case CB_COLOR3_BASE
:
1746 case CB_COLOR4_BASE
:
1747 case CB_COLOR5_BASE
:
1748 case CB_COLOR6_BASE
:
1749 case CB_COLOR7_BASE
:
1750 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1752 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1756 tmp
= (reg
- CB_COLOR0_BASE
) / 0x3c;
1757 track
->cb_color_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
);
1758 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1759 track
->cb_color_bo
[tmp
] = reloc
->robj
;
1760 track
->cb_dirty
= true;
1762 case CB_COLOR8_BASE
:
1763 case CB_COLOR9_BASE
:
1764 case CB_COLOR10_BASE
:
1765 case CB_COLOR11_BASE
:
1766 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1768 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1772 tmp
= ((reg
- CB_COLOR8_BASE
) / 0x1c) + 8;
1773 track
->cb_color_bo_offset
[tmp
] = radeon_get_ib_value(p
, idx
);
1774 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1775 track
->cb_color_bo
[tmp
] = reloc
->robj
;
1776 track
->cb_dirty
= true;
1778 case DB_HTILE_DATA_BASE
:
1779 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1781 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1785 track
->htile_offset
= radeon_get_ib_value(p
, idx
);
1786 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1787 track
->htile_bo
= reloc
->robj
;
1788 track
->db_dirty
= true;
1790 case DB_HTILE_SURFACE
:
1792 track
->htile_surface
= radeon_get_ib_value(p
, idx
);
1793 track
->db_dirty
= true;
1795 case CB_IMMED0_BASE
:
1796 case CB_IMMED1_BASE
:
1797 case CB_IMMED2_BASE
:
1798 case CB_IMMED3_BASE
:
1799 case CB_IMMED4_BASE
:
1800 case CB_IMMED5_BASE
:
1801 case CB_IMMED6_BASE
:
1802 case CB_IMMED7_BASE
:
1803 case CB_IMMED8_BASE
:
1804 case CB_IMMED9_BASE
:
1805 case CB_IMMED10_BASE
:
1806 case CB_IMMED11_BASE
:
1807 case SQ_PGM_START_FS
:
1808 case SQ_PGM_START_ES
:
1809 case SQ_PGM_START_VS
:
1810 case SQ_PGM_START_GS
:
1811 case SQ_PGM_START_PS
:
1812 case SQ_PGM_START_HS
:
1813 case SQ_PGM_START_LS
:
1814 case SQ_CONST_MEM_BASE
:
1815 case SQ_ALU_CONST_CACHE_GS_0
:
1816 case SQ_ALU_CONST_CACHE_GS_1
:
1817 case SQ_ALU_CONST_CACHE_GS_2
:
1818 case SQ_ALU_CONST_CACHE_GS_3
:
1819 case SQ_ALU_CONST_CACHE_GS_4
:
1820 case SQ_ALU_CONST_CACHE_GS_5
:
1821 case SQ_ALU_CONST_CACHE_GS_6
:
1822 case SQ_ALU_CONST_CACHE_GS_7
:
1823 case SQ_ALU_CONST_CACHE_GS_8
:
1824 case SQ_ALU_CONST_CACHE_GS_9
:
1825 case SQ_ALU_CONST_CACHE_GS_10
:
1826 case SQ_ALU_CONST_CACHE_GS_11
:
1827 case SQ_ALU_CONST_CACHE_GS_12
:
1828 case SQ_ALU_CONST_CACHE_GS_13
:
1829 case SQ_ALU_CONST_CACHE_GS_14
:
1830 case SQ_ALU_CONST_CACHE_GS_15
:
1831 case SQ_ALU_CONST_CACHE_PS_0
:
1832 case SQ_ALU_CONST_CACHE_PS_1
:
1833 case SQ_ALU_CONST_CACHE_PS_2
:
1834 case SQ_ALU_CONST_CACHE_PS_3
:
1835 case SQ_ALU_CONST_CACHE_PS_4
:
1836 case SQ_ALU_CONST_CACHE_PS_5
:
1837 case SQ_ALU_CONST_CACHE_PS_6
:
1838 case SQ_ALU_CONST_CACHE_PS_7
:
1839 case SQ_ALU_CONST_CACHE_PS_8
:
1840 case SQ_ALU_CONST_CACHE_PS_9
:
1841 case SQ_ALU_CONST_CACHE_PS_10
:
1842 case SQ_ALU_CONST_CACHE_PS_11
:
1843 case SQ_ALU_CONST_CACHE_PS_12
:
1844 case SQ_ALU_CONST_CACHE_PS_13
:
1845 case SQ_ALU_CONST_CACHE_PS_14
:
1846 case SQ_ALU_CONST_CACHE_PS_15
:
1847 case SQ_ALU_CONST_CACHE_VS_0
:
1848 case SQ_ALU_CONST_CACHE_VS_1
:
1849 case SQ_ALU_CONST_CACHE_VS_2
:
1850 case SQ_ALU_CONST_CACHE_VS_3
:
1851 case SQ_ALU_CONST_CACHE_VS_4
:
1852 case SQ_ALU_CONST_CACHE_VS_5
:
1853 case SQ_ALU_CONST_CACHE_VS_6
:
1854 case SQ_ALU_CONST_CACHE_VS_7
:
1855 case SQ_ALU_CONST_CACHE_VS_8
:
1856 case SQ_ALU_CONST_CACHE_VS_9
:
1857 case SQ_ALU_CONST_CACHE_VS_10
:
1858 case SQ_ALU_CONST_CACHE_VS_11
:
1859 case SQ_ALU_CONST_CACHE_VS_12
:
1860 case SQ_ALU_CONST_CACHE_VS_13
:
1861 case SQ_ALU_CONST_CACHE_VS_14
:
1862 case SQ_ALU_CONST_CACHE_VS_15
:
1863 case SQ_ALU_CONST_CACHE_HS_0
:
1864 case SQ_ALU_CONST_CACHE_HS_1
:
1865 case SQ_ALU_CONST_CACHE_HS_2
:
1866 case SQ_ALU_CONST_CACHE_HS_3
:
1867 case SQ_ALU_CONST_CACHE_HS_4
:
1868 case SQ_ALU_CONST_CACHE_HS_5
:
1869 case SQ_ALU_CONST_CACHE_HS_6
:
1870 case SQ_ALU_CONST_CACHE_HS_7
:
1871 case SQ_ALU_CONST_CACHE_HS_8
:
1872 case SQ_ALU_CONST_CACHE_HS_9
:
1873 case SQ_ALU_CONST_CACHE_HS_10
:
1874 case SQ_ALU_CONST_CACHE_HS_11
:
1875 case SQ_ALU_CONST_CACHE_HS_12
:
1876 case SQ_ALU_CONST_CACHE_HS_13
:
1877 case SQ_ALU_CONST_CACHE_HS_14
:
1878 case SQ_ALU_CONST_CACHE_HS_15
:
1879 case SQ_ALU_CONST_CACHE_LS_0
:
1880 case SQ_ALU_CONST_CACHE_LS_1
:
1881 case SQ_ALU_CONST_CACHE_LS_2
:
1882 case SQ_ALU_CONST_CACHE_LS_3
:
1883 case SQ_ALU_CONST_CACHE_LS_4
:
1884 case SQ_ALU_CONST_CACHE_LS_5
:
1885 case SQ_ALU_CONST_CACHE_LS_6
:
1886 case SQ_ALU_CONST_CACHE_LS_7
:
1887 case SQ_ALU_CONST_CACHE_LS_8
:
1888 case SQ_ALU_CONST_CACHE_LS_9
:
1889 case SQ_ALU_CONST_CACHE_LS_10
:
1890 case SQ_ALU_CONST_CACHE_LS_11
:
1891 case SQ_ALU_CONST_CACHE_LS_12
:
1892 case SQ_ALU_CONST_CACHE_LS_13
:
1893 case SQ_ALU_CONST_CACHE_LS_14
:
1894 case SQ_ALU_CONST_CACHE_LS_15
:
1895 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1897 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1901 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1903 case SX_MEMORY_EXPORT_BASE
:
1904 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1905 dev_warn(p
->dev
, "bad SET_CONFIG_REG "
1909 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1911 dev_warn(p
->dev
, "bad SET_CONFIG_REG "
1915 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1917 case CAYMAN_SX_SCATTER_EXPORT_BASE
:
1918 if (p
->rdev
->family
< CHIP_CAYMAN
) {
1919 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1923 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
1925 dev_warn(p
->dev
, "bad SET_CONTEXT_REG "
1929 ib
[idx
] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
1932 track
->sx_misc_kill_all_prims
= (radeon_get_ib_value(p
, idx
) & 0x1) != 0;
1935 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1941 static bool evergreen_is_safe_reg(struct radeon_cs_parser
*p
, u32 reg
, u32 idx
)
1945 if (p
->rdev
->family
>= CHIP_CAYMAN
)
1946 last_reg
= ARRAY_SIZE(cayman_reg_safe_bm
);
1948 last_reg
= ARRAY_SIZE(evergreen_reg_safe_bm
);
1951 if (i
>= last_reg
) {
1952 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1955 m
= 1 << ((reg
>> 2) & 31);
1956 if (p
->rdev
->family
>= CHIP_CAYMAN
) {
1957 if (!(cayman_reg_safe_bm
[i
] & m
))
1960 if (!(evergreen_reg_safe_bm
[i
] & m
))
1963 dev_warn(p
->dev
, "forbidden register 0x%08x at %d\n", reg
, idx
);
1967 static int evergreen_packet3_check(struct radeon_cs_parser
*p
,
1968 struct radeon_cs_packet
*pkt
)
1970 struct radeon_cs_reloc
*reloc
;
1971 struct evergreen_cs_track
*track
;
1975 unsigned start_reg
, end_reg
, reg
;
1979 track
= (struct evergreen_cs_track
*)p
->track
;
1982 idx_value
= radeon_get_ib_value(p
, idx
);
1984 switch (pkt
->opcode
) {
1985 case PACKET3_SET_PREDICATION
:
1991 if (pkt
->count
!= 1) {
1992 DRM_ERROR("bad SET PREDICATION\n");
1996 tmp
= radeon_get_ib_value(p
, idx
+ 1);
1997 pred_op
= (tmp
>> 16) & 0x7;
1999 /* for the clear predicate operation */
2004 DRM_ERROR("bad SET PREDICATION operation %d\n", pred_op
);
2008 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2010 DRM_ERROR("bad SET PREDICATION\n");
2014 offset
= reloc
->lobj
.gpu_offset
+
2015 (idx_value
& 0xfffffff0) +
2016 ((u64
)(tmp
& 0xff) << 32);
2018 ib
[idx
+ 0] = offset
;
2019 ib
[idx
+ 1] = (tmp
& 0xffffff00) | (upper_32_bits(offset
) & 0xff);
2022 case PACKET3_CONTEXT_CONTROL
:
2023 if (pkt
->count
!= 1) {
2024 DRM_ERROR("bad CONTEXT_CONTROL\n");
2028 case PACKET3_INDEX_TYPE
:
2029 case PACKET3_NUM_INSTANCES
:
2030 case PACKET3_CLEAR_STATE
:
2032 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2036 case CAYMAN_PACKET3_DEALLOC_STATE
:
2037 if (p
->rdev
->family
< CHIP_CAYMAN
) {
2038 DRM_ERROR("bad PACKET3_DEALLOC_STATE\n");
2042 DRM_ERROR("bad INDEX_TYPE/NUM_INSTANCES/CLEAR_STATE\n");
2046 case PACKET3_INDEX_BASE
:
2050 if (pkt
->count
!= 1) {
2051 DRM_ERROR("bad INDEX_BASE\n");
2054 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2056 DRM_ERROR("bad INDEX_BASE\n");
2060 offset
= reloc
->lobj
.gpu_offset
+
2062 ((u64
)(radeon_get_ib_value(p
, idx
+1) & 0xff) << 32);
2065 ib
[idx
+1] = upper_32_bits(offset
) & 0xff;
2067 r
= evergreen_cs_track_check(p
);
2069 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2074 case PACKET3_DRAW_INDEX
:
2077 if (pkt
->count
!= 3) {
2078 DRM_ERROR("bad DRAW_INDEX\n");
2081 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2083 DRM_ERROR("bad DRAW_INDEX\n");
2087 offset
= reloc
->lobj
.gpu_offset
+
2089 ((u64
)(radeon_get_ib_value(p
, idx
+1) & 0xff) << 32);
2092 ib
[idx
+1] = upper_32_bits(offset
) & 0xff;
2094 r
= evergreen_cs_track_check(p
);
2096 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2101 case PACKET3_DRAW_INDEX_2
:
2105 if (pkt
->count
!= 4) {
2106 DRM_ERROR("bad DRAW_INDEX_2\n");
2109 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2111 DRM_ERROR("bad DRAW_INDEX_2\n");
2115 offset
= reloc
->lobj
.gpu_offset
+
2116 radeon_get_ib_value(p
, idx
+1) +
2117 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2120 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2122 r
= evergreen_cs_track_check(p
);
2124 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2129 case PACKET3_DRAW_INDEX_AUTO
:
2130 if (pkt
->count
!= 1) {
2131 DRM_ERROR("bad DRAW_INDEX_AUTO\n");
2134 r
= evergreen_cs_track_check(p
);
2136 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
2140 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
2141 if (pkt
->count
!= 2) {
2142 DRM_ERROR("bad DRAW_INDEX_MULTI_AUTO\n");
2145 r
= evergreen_cs_track_check(p
);
2147 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
2151 case PACKET3_DRAW_INDEX_IMMD
:
2152 if (pkt
->count
< 2) {
2153 DRM_ERROR("bad DRAW_INDEX_IMMD\n");
2156 r
= evergreen_cs_track_check(p
);
2158 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2162 case PACKET3_DRAW_INDEX_OFFSET
:
2163 if (pkt
->count
!= 2) {
2164 DRM_ERROR("bad DRAW_INDEX_OFFSET\n");
2167 r
= evergreen_cs_track_check(p
);
2169 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2173 case PACKET3_DRAW_INDEX_OFFSET_2
:
2174 if (pkt
->count
!= 3) {
2175 DRM_ERROR("bad DRAW_INDEX_OFFSET_2\n");
2178 r
= evergreen_cs_track_check(p
);
2180 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2184 case PACKET3_DISPATCH_DIRECT
:
2185 if (pkt
->count
!= 3) {
2186 DRM_ERROR("bad DISPATCH_DIRECT\n");
2189 r
= evergreen_cs_track_check(p
);
2191 dev_warn(p
->dev
, "%s:%d invalid cmd stream %d\n", __func__
, __LINE__
, idx
);
2195 case PACKET3_DISPATCH_INDIRECT
:
2196 if (pkt
->count
!= 1) {
2197 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2200 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2202 DRM_ERROR("bad DISPATCH_INDIRECT\n");
2205 ib
[idx
+0] = idx_value
+ (u32
)(reloc
->lobj
.gpu_offset
& 0xffffffff);
2206 r
= evergreen_cs_track_check(p
);
2208 dev_warn(p
->dev
, "%s:%d invalid cmd stream\n", __func__
, __LINE__
);
2212 case PACKET3_WAIT_REG_MEM
:
2213 if (pkt
->count
!= 5) {
2214 DRM_ERROR("bad WAIT_REG_MEM\n");
2217 /* bit 4 is reg (0) or mem (1) */
2218 if (idx_value
& 0x10) {
2221 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2223 DRM_ERROR("bad WAIT_REG_MEM\n");
2227 offset
= reloc
->lobj
.gpu_offset
+
2228 (radeon_get_ib_value(p
, idx
+1) & 0xfffffffc) +
2229 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2231 ib
[idx
+1] = (ib
[idx
+1] & 0x3) | (offset
& 0xfffffffc);
2232 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2235 case PACKET3_SURFACE_SYNC
:
2236 if (pkt
->count
!= 3) {
2237 DRM_ERROR("bad SURFACE_SYNC\n");
2240 /* 0xffffffff/0x0 is flush all cache flag */
2241 if (radeon_get_ib_value(p
, idx
+ 1) != 0xffffffff ||
2242 radeon_get_ib_value(p
, idx
+ 2) != 0) {
2243 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2245 DRM_ERROR("bad SURFACE_SYNC\n");
2248 ib
[idx
+2] += (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2251 case PACKET3_EVENT_WRITE
:
2252 if (pkt
->count
!= 2 && pkt
->count
!= 0) {
2253 DRM_ERROR("bad EVENT_WRITE\n");
2259 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2261 DRM_ERROR("bad EVENT_WRITE\n");
2264 offset
= reloc
->lobj
.gpu_offset
+
2265 (radeon_get_ib_value(p
, idx
+1) & 0xfffffff8) +
2266 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2268 ib
[idx
+1] = offset
& 0xfffffff8;
2269 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2272 case PACKET3_EVENT_WRITE_EOP
:
2276 if (pkt
->count
!= 4) {
2277 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2280 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2282 DRM_ERROR("bad EVENT_WRITE_EOP\n");
2286 offset
= reloc
->lobj
.gpu_offset
+
2287 (radeon_get_ib_value(p
, idx
+1) & 0xfffffffc) +
2288 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2290 ib
[idx
+1] = offset
& 0xfffffffc;
2291 ib
[idx
+2] = (ib
[idx
+2] & 0xffffff00) | (upper_32_bits(offset
) & 0xff);
2294 case PACKET3_EVENT_WRITE_EOS
:
2298 if (pkt
->count
!= 3) {
2299 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2302 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2304 DRM_ERROR("bad EVENT_WRITE_EOS\n");
2308 offset
= reloc
->lobj
.gpu_offset
+
2309 (radeon_get_ib_value(p
, idx
+1) & 0xfffffffc) +
2310 ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff) << 32);
2312 ib
[idx
+1] = offset
& 0xfffffffc;
2313 ib
[idx
+2] = (ib
[idx
+2] & 0xffffff00) | (upper_32_bits(offset
) & 0xff);
2316 case PACKET3_SET_CONFIG_REG
:
2317 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2318 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2319 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2320 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2321 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2322 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2325 for (i
= 0; i
< pkt
->count
; i
++) {
2326 reg
= start_reg
+ (4 * i
);
2327 r
= evergreen_cs_check_reg(p
, reg
, idx
+1+i
);
2332 case PACKET3_SET_CONTEXT_REG
:
2333 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONTEXT_REG_START
;
2334 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2335 if ((start_reg
< PACKET3_SET_CONTEXT_REG_START
) ||
2336 (start_reg
>= PACKET3_SET_CONTEXT_REG_END
) ||
2337 (end_reg
>= PACKET3_SET_CONTEXT_REG_END
)) {
2338 DRM_ERROR("bad PACKET3_SET_CONTEXT_REG\n");
2341 for (i
= 0; i
< pkt
->count
; i
++) {
2342 reg
= start_reg
+ (4 * i
);
2343 r
= evergreen_cs_check_reg(p
, reg
, idx
+1+i
);
2348 case PACKET3_SET_RESOURCE
:
2349 if (pkt
->count
% 8) {
2350 DRM_ERROR("bad SET_RESOURCE\n");
2353 start_reg
= (idx_value
<< 2) + PACKET3_SET_RESOURCE_START
;
2354 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2355 if ((start_reg
< PACKET3_SET_RESOURCE_START
) ||
2356 (start_reg
>= PACKET3_SET_RESOURCE_END
) ||
2357 (end_reg
>= PACKET3_SET_RESOURCE_END
)) {
2358 DRM_ERROR("bad SET_RESOURCE\n");
2361 for (i
= 0; i
< (pkt
->count
/ 8); i
++) {
2362 struct radeon_bo
*texture
, *mipmap
;
2363 u32 toffset
, moffset
;
2364 u32 size
, offset
, mip_address
, tex_dim
;
2366 switch (G__SQ_CONSTANT_TYPE(radeon_get_ib_value(p
, idx
+1+(i
*8)+7))) {
2367 case SQ_TEX_VTX_VALID_TEXTURE
:
2369 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2371 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2374 if (!(p
->cs_flags
& RADEON_CS_KEEP_TILING_FLAGS
)) {
2375 ib
[idx
+1+(i
*8)+1] |=
2376 TEX_ARRAY_MODE(evergreen_cs_get_aray_mode(reloc
->lobj
.tiling_flags
));
2377 if (reloc
->lobj
.tiling_flags
& RADEON_TILING_MACRO
) {
2378 unsigned bankw
, bankh
, mtaspect
, tile_split
;
2380 evergreen_tiling_fields(reloc
->lobj
.tiling_flags
,
2381 &bankw
, &bankh
, &mtaspect
,
2383 ib
[idx
+1+(i
*8)+6] |= TEX_TILE_SPLIT(tile_split
);
2384 ib
[idx
+1+(i
*8)+7] |=
2385 TEX_BANK_WIDTH(bankw
) |
2386 TEX_BANK_HEIGHT(bankh
) |
2387 MACRO_TILE_ASPECT(mtaspect
) |
2388 TEX_NUM_BANKS(evergreen_cs_get_num_banks(track
->nbanks
));
2391 texture
= reloc
->robj
;
2392 toffset
= (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2395 tex_dim
= ib
[idx
+1+(i
*8)+0] & 0x7;
2396 mip_address
= ib
[idx
+1+(i
*8)+3];
2398 if ((tex_dim
== SQ_TEX_DIM_2D_MSAA
|| tex_dim
== SQ_TEX_DIM_2D_ARRAY_MSAA
) &&
2400 !evergreen_cs_packet_next_is_pkt3_nop(p
)) {
2401 /* MIP_ADDRESS should point to FMASK for an MSAA texture.
2402 * It should be 0 if FMASK is disabled. */
2406 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2408 DRM_ERROR("bad SET_RESOURCE (tex)\n");
2411 moffset
= (u32
)((reloc
->lobj
.gpu_offset
>> 8) & 0xffffffff);
2412 mipmap
= reloc
->robj
;
2415 r
= evergreen_cs_track_validate_texture(p
, texture
, mipmap
, idx
+1+(i
*8));
2418 ib
[idx
+1+(i
*8)+2] += toffset
;
2419 ib
[idx
+1+(i
*8)+3] += moffset
;
2421 case SQ_TEX_VTX_VALID_BUFFER
:
2425 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2427 DRM_ERROR("bad SET_RESOURCE (vtx)\n");
2430 offset
= radeon_get_ib_value(p
, idx
+1+(i
*8)+0);
2431 size
= radeon_get_ib_value(p
, idx
+1+(i
*8)+1);
2432 if (p
->rdev
&& (size
+ offset
) > radeon_bo_size(reloc
->robj
)) {
2433 /* force size to size of the buffer */
2434 dev_warn(p
->dev
, "vbo resource seems too big for the bo\n");
2435 ib
[idx
+1+(i
*8)+1] = radeon_bo_size(reloc
->robj
) - offset
;
2438 offset64
= reloc
->lobj
.gpu_offset
+ offset
;
2439 ib
[idx
+1+(i
*8)+0] = offset64
;
2440 ib
[idx
+1+(i
*8)+2] = (ib
[idx
+1+(i
*8)+2] & 0xffffff00) |
2441 (upper_32_bits(offset64
) & 0xff);
2444 case SQ_TEX_VTX_INVALID_TEXTURE
:
2445 case SQ_TEX_VTX_INVALID_BUFFER
:
2447 DRM_ERROR("bad SET_RESOURCE\n");
2452 case PACKET3_SET_ALU_CONST
:
2453 /* XXX fix me ALU const buffers only */
2455 case PACKET3_SET_BOOL_CONST
:
2456 start_reg
= (idx_value
<< 2) + PACKET3_SET_BOOL_CONST_START
;
2457 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2458 if ((start_reg
< PACKET3_SET_BOOL_CONST_START
) ||
2459 (start_reg
>= PACKET3_SET_BOOL_CONST_END
) ||
2460 (end_reg
>= PACKET3_SET_BOOL_CONST_END
)) {
2461 DRM_ERROR("bad SET_BOOL_CONST\n");
2465 case PACKET3_SET_LOOP_CONST
:
2466 start_reg
= (idx_value
<< 2) + PACKET3_SET_LOOP_CONST_START
;
2467 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2468 if ((start_reg
< PACKET3_SET_LOOP_CONST_START
) ||
2469 (start_reg
>= PACKET3_SET_LOOP_CONST_END
) ||
2470 (end_reg
>= PACKET3_SET_LOOP_CONST_END
)) {
2471 DRM_ERROR("bad SET_LOOP_CONST\n");
2475 case PACKET3_SET_CTL_CONST
:
2476 start_reg
= (idx_value
<< 2) + PACKET3_SET_CTL_CONST_START
;
2477 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2478 if ((start_reg
< PACKET3_SET_CTL_CONST_START
) ||
2479 (start_reg
>= PACKET3_SET_CTL_CONST_END
) ||
2480 (end_reg
>= PACKET3_SET_CTL_CONST_END
)) {
2481 DRM_ERROR("bad SET_CTL_CONST\n");
2485 case PACKET3_SET_SAMPLER
:
2486 if (pkt
->count
% 3) {
2487 DRM_ERROR("bad SET_SAMPLER\n");
2490 start_reg
= (idx_value
<< 2) + PACKET3_SET_SAMPLER_START
;
2491 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2492 if ((start_reg
< PACKET3_SET_SAMPLER_START
) ||
2493 (start_reg
>= PACKET3_SET_SAMPLER_END
) ||
2494 (end_reg
>= PACKET3_SET_SAMPLER_END
)) {
2495 DRM_ERROR("bad SET_SAMPLER\n");
2499 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2500 if (pkt
->count
!= 4) {
2501 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (invalid count)\n");
2504 /* Updating memory at DST_ADDRESS. */
2505 if (idx_value
& 0x1) {
2507 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2509 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing dst reloc)\n");
2512 offset
= radeon_get_ib_value(p
, idx
+1);
2513 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff)) << 32;
2514 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2515 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE dst bo too small: 0x%llx, 0x%lx\n",
2516 offset
+ 4, radeon_bo_size(reloc
->robj
));
2519 offset
+= reloc
->lobj
.gpu_offset
;
2521 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2523 /* Reading data from SRC_ADDRESS. */
2524 if (((idx_value
>> 1) & 0x3) == 2) {
2526 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2528 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE (missing src reloc)\n");
2531 offset
= radeon_get_ib_value(p
, idx
+3);
2532 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+4) & 0xff)) << 32;
2533 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2534 DRM_ERROR("bad STRMOUT_BUFFER_UPDATE src bo too small: 0x%llx, 0x%lx\n",
2535 offset
+ 4, radeon_bo_size(reloc
->robj
));
2538 offset
+= reloc
->lobj
.gpu_offset
;
2540 ib
[idx
+4] = upper_32_bits(offset
) & 0xff;
2543 case PACKET3_COPY_DW
:
2544 if (pkt
->count
!= 4) {
2545 DRM_ERROR("bad COPY_DW (invalid count)\n");
2548 if (idx_value
& 0x1) {
2550 /* SRC is memory. */
2551 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2553 DRM_ERROR("bad COPY_DW (missing src reloc)\n");
2556 offset
= radeon_get_ib_value(p
, idx
+1);
2557 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+2) & 0xff)) << 32;
2558 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2559 DRM_ERROR("bad COPY_DW src bo too small: 0x%llx, 0x%lx\n",
2560 offset
+ 4, radeon_bo_size(reloc
->robj
));
2563 offset
+= reloc
->lobj
.gpu_offset
;
2565 ib
[idx
+2] = upper_32_bits(offset
) & 0xff;
2568 reg
= radeon_get_ib_value(p
, idx
+1) << 2;
2569 if (!evergreen_is_safe_reg(p
, reg
, idx
+1))
2572 if (idx_value
& 0x2) {
2574 /* DST is memory. */
2575 r
= evergreen_cs_packet_next_reloc(p
, &reloc
);
2577 DRM_ERROR("bad COPY_DW (missing dst reloc)\n");
2580 offset
= radeon_get_ib_value(p
, idx
+3);
2581 offset
+= ((u64
)(radeon_get_ib_value(p
, idx
+4) & 0xff)) << 32;
2582 if ((offset
+ 4) > radeon_bo_size(reloc
->robj
)) {
2583 DRM_ERROR("bad COPY_DW dst bo too small: 0x%llx, 0x%lx\n",
2584 offset
+ 4, radeon_bo_size(reloc
->robj
));
2587 offset
+= reloc
->lobj
.gpu_offset
;
2589 ib
[idx
+4] = upper_32_bits(offset
) & 0xff;
2592 reg
= radeon_get_ib_value(p
, idx
+3) << 2;
2593 if (!evergreen_is_safe_reg(p
, reg
, idx
+3))
2600 DRM_ERROR("Packet3 opcode %x not supported\n", pkt
->opcode
);
2606 int evergreen_cs_parse(struct radeon_cs_parser
*p
)
2608 struct radeon_cs_packet pkt
;
2609 struct evergreen_cs_track
*track
;
2613 if (p
->track
== NULL
) {
2614 /* initialize tracker, we are in kms */
2615 track
= kzalloc(sizeof(*track
), GFP_KERNEL
);
2618 evergreen_cs_track_init(track
);
2619 if (p
->rdev
->family
>= CHIP_CAYMAN
)
2620 tmp
= p
->rdev
->config
.cayman
.tile_config
;
2622 tmp
= p
->rdev
->config
.evergreen
.tile_config
;
2624 switch (tmp
& 0xf) {
2640 switch ((tmp
& 0xf0) >> 4) {
2653 switch ((tmp
& 0xf00) >> 8) {
2655 track
->group_size
= 256;
2659 track
->group_size
= 512;
2663 switch ((tmp
& 0xf000) >> 12) {
2665 track
->row_size
= 1;
2669 track
->row_size
= 2;
2672 track
->row_size
= 4;
2679 r
= evergreen_cs_packet_parse(p
, &pkt
, p
->idx
);
2685 p
->idx
+= pkt
.count
+ 2;
2688 r
= evergreen_cs_parse_packet0(p
, &pkt
);
2693 r
= evergreen_packet3_check(p
, &pkt
);
2696 DRM_ERROR("Unknown packet type %d !\n", pkt
.type
);
2706 } while (p
->idx
< p
->chunks
[p
->chunk_ib_idx
].length_dw
);
2708 for (r
= 0; r
< p
->ib
.length_dw
; r
++) {
2709 printk(KERN_INFO
"%05d 0x%08X\n", r
, p
->ib
.ptr
[r
]);
2719 static bool evergreen_vm_reg_valid(u32 reg
)
2721 /* context regs are fine */
2725 /* check config regs */
2727 case GRBM_GFX_INDEX
:
2728 case CP_STRMOUT_CNTL
:
2731 case VGT_VTX_VECT_EJECT_REG
:
2732 case VGT_CACHE_INVALIDATION
:
2733 case VGT_GS_VERTEX_REUSE
:
2734 case VGT_PRIMITIVE_TYPE
:
2735 case VGT_INDEX_TYPE
:
2736 case VGT_NUM_INDICES
:
2737 case VGT_NUM_INSTANCES
:
2738 case VGT_COMPUTE_DIM_X
:
2739 case VGT_COMPUTE_DIM_Y
:
2740 case VGT_COMPUTE_DIM_Z
:
2741 case VGT_COMPUTE_START_X
:
2742 case VGT_COMPUTE_START_Y
:
2743 case VGT_COMPUTE_START_Z
:
2744 case VGT_COMPUTE_INDEX
:
2745 case VGT_COMPUTE_THREAD_GROUP_SIZE
:
2746 case VGT_HS_OFFCHIP_PARAM
:
2748 case PA_SU_LINE_STIPPLE_VALUE
:
2749 case PA_SC_LINE_STIPPLE_STATE
:
2751 case SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
:
2752 case SQ_DYN_GPR_SIMD_LOCK_EN
:
2754 case SQ_GPR_RESOURCE_MGMT_1
:
2755 case SQ_GLOBAL_GPR_RESOURCE_MGMT_1
:
2756 case SQ_GLOBAL_GPR_RESOURCE_MGMT_2
:
2757 case SQ_CONST_MEM_BASE
:
2758 case SQ_STATIC_THREAD_MGMT_1
:
2759 case SQ_STATIC_THREAD_MGMT_2
:
2760 case SQ_STATIC_THREAD_MGMT_3
:
2761 case SPI_CONFIG_CNTL
:
2762 case SPI_CONFIG_CNTL_1
:
2769 case TD_PS_BORDER_COLOR_INDEX
:
2770 case TD_PS_BORDER_COLOR_RED
:
2771 case TD_PS_BORDER_COLOR_GREEN
:
2772 case TD_PS_BORDER_COLOR_BLUE
:
2773 case TD_PS_BORDER_COLOR_ALPHA
:
2774 case TD_VS_BORDER_COLOR_INDEX
:
2775 case TD_VS_BORDER_COLOR_RED
:
2776 case TD_VS_BORDER_COLOR_GREEN
:
2777 case TD_VS_BORDER_COLOR_BLUE
:
2778 case TD_VS_BORDER_COLOR_ALPHA
:
2779 case TD_GS_BORDER_COLOR_INDEX
:
2780 case TD_GS_BORDER_COLOR_RED
:
2781 case TD_GS_BORDER_COLOR_GREEN
:
2782 case TD_GS_BORDER_COLOR_BLUE
:
2783 case TD_GS_BORDER_COLOR_ALPHA
:
2784 case TD_HS_BORDER_COLOR_INDEX
:
2785 case TD_HS_BORDER_COLOR_RED
:
2786 case TD_HS_BORDER_COLOR_GREEN
:
2787 case TD_HS_BORDER_COLOR_BLUE
:
2788 case TD_HS_BORDER_COLOR_ALPHA
:
2789 case TD_LS_BORDER_COLOR_INDEX
:
2790 case TD_LS_BORDER_COLOR_RED
:
2791 case TD_LS_BORDER_COLOR_GREEN
:
2792 case TD_LS_BORDER_COLOR_BLUE
:
2793 case TD_LS_BORDER_COLOR_ALPHA
:
2794 case TD_CS_BORDER_COLOR_INDEX
:
2795 case TD_CS_BORDER_COLOR_RED
:
2796 case TD_CS_BORDER_COLOR_GREEN
:
2797 case TD_CS_BORDER_COLOR_BLUE
:
2798 case TD_CS_BORDER_COLOR_ALPHA
:
2799 case SQ_ESGS_RING_SIZE
:
2800 case SQ_GSVS_RING_SIZE
:
2801 case SQ_ESTMP_RING_SIZE
:
2802 case SQ_GSTMP_RING_SIZE
:
2803 case SQ_HSTMP_RING_SIZE
:
2804 case SQ_LSTMP_RING_SIZE
:
2805 case SQ_PSTMP_RING_SIZE
:
2806 case SQ_VSTMP_RING_SIZE
:
2807 case SQ_ESGS_RING_ITEMSIZE
:
2808 case SQ_ESTMP_RING_ITEMSIZE
:
2809 case SQ_GSTMP_RING_ITEMSIZE
:
2810 case SQ_GSVS_RING_ITEMSIZE
:
2811 case SQ_GS_VERT_ITEMSIZE
:
2812 case SQ_GS_VERT_ITEMSIZE_1
:
2813 case SQ_GS_VERT_ITEMSIZE_2
:
2814 case SQ_GS_VERT_ITEMSIZE_3
:
2815 case SQ_GSVS_RING_OFFSET_1
:
2816 case SQ_GSVS_RING_OFFSET_2
:
2817 case SQ_GSVS_RING_OFFSET_3
:
2818 case SQ_HSTMP_RING_ITEMSIZE
:
2819 case SQ_LSTMP_RING_ITEMSIZE
:
2820 case SQ_PSTMP_RING_ITEMSIZE
:
2821 case SQ_VSTMP_RING_ITEMSIZE
:
2822 case VGT_TF_RING_SIZE
:
2823 case SQ_ESGS_RING_BASE
:
2824 case SQ_GSVS_RING_BASE
:
2825 case SQ_ESTMP_RING_BASE
:
2826 case SQ_GSTMP_RING_BASE
:
2827 case SQ_HSTMP_RING_BASE
:
2828 case SQ_LSTMP_RING_BASE
:
2829 case SQ_PSTMP_RING_BASE
:
2830 case SQ_VSTMP_RING_BASE
:
2831 case CAYMAN_VGT_OFFCHIP_LDS_BASE
:
2832 case CAYMAN_SQ_EX_ALLOC_TABLE_SLOTS
:
2835 DRM_ERROR("Invalid register 0x%x in CS\n", reg
);
2840 static int evergreen_vm_packet3_check(struct radeon_device
*rdev
,
2841 u32
*ib
, struct radeon_cs_packet
*pkt
)
2843 u32 idx
= pkt
->idx
+ 1;
2844 u32 idx_value
= ib
[idx
];
2845 u32 start_reg
, end_reg
, reg
, i
;
2847 switch (pkt
->opcode
) {
2849 case PACKET3_SET_BASE
:
2850 case PACKET3_CLEAR_STATE
:
2851 case PACKET3_INDEX_BUFFER_SIZE
:
2852 case PACKET3_DISPATCH_DIRECT
:
2853 case PACKET3_DISPATCH_INDIRECT
:
2854 case PACKET3_MODE_CONTROL
:
2855 case PACKET3_SET_PREDICATION
:
2856 case PACKET3_COND_EXEC
:
2857 case PACKET3_PRED_EXEC
:
2858 case PACKET3_DRAW_INDIRECT
:
2859 case PACKET3_DRAW_INDEX_INDIRECT
:
2860 case PACKET3_INDEX_BASE
:
2861 case PACKET3_DRAW_INDEX_2
:
2862 case PACKET3_CONTEXT_CONTROL
:
2863 case PACKET3_DRAW_INDEX_OFFSET
:
2864 case PACKET3_INDEX_TYPE
:
2865 case PACKET3_DRAW_INDEX
:
2866 case PACKET3_DRAW_INDEX_AUTO
:
2867 case PACKET3_DRAW_INDEX_IMMD
:
2868 case PACKET3_NUM_INSTANCES
:
2869 case PACKET3_DRAW_INDEX_MULTI_AUTO
:
2870 case PACKET3_STRMOUT_BUFFER_UPDATE
:
2871 case PACKET3_DRAW_INDEX_OFFSET_2
:
2872 case PACKET3_DRAW_INDEX_MULTI_ELEMENT
:
2873 case PACKET3_MPEG_INDEX
:
2874 case PACKET3_WAIT_REG_MEM
:
2875 case PACKET3_MEM_WRITE
:
2876 case PACKET3_SURFACE_SYNC
:
2877 case PACKET3_EVENT_WRITE
:
2878 case PACKET3_EVENT_WRITE_EOP
:
2879 case PACKET3_EVENT_WRITE_EOS
:
2880 case PACKET3_SET_CONTEXT_REG
:
2881 case PACKET3_SET_BOOL_CONST
:
2882 case PACKET3_SET_LOOP_CONST
:
2883 case PACKET3_SET_RESOURCE
:
2884 case PACKET3_SET_SAMPLER
:
2885 case PACKET3_SET_CTL_CONST
:
2886 case PACKET3_SET_RESOURCE_OFFSET
:
2887 case PACKET3_SET_CONTEXT_REG_INDIRECT
:
2888 case PACKET3_SET_RESOURCE_INDIRECT
:
2889 case CAYMAN_PACKET3_DEALLOC_STATE
:
2891 case PACKET3_COND_WRITE
:
2892 if (idx_value
& 0x100) {
2893 reg
= ib
[idx
+ 5] * 4;
2894 if (!evergreen_vm_reg_valid(reg
))
2898 case PACKET3_COPY_DW
:
2899 if (idx_value
& 0x2) {
2900 reg
= ib
[idx
+ 3] * 4;
2901 if (!evergreen_vm_reg_valid(reg
))
2905 case PACKET3_SET_CONFIG_REG
:
2906 start_reg
= (idx_value
<< 2) + PACKET3_SET_CONFIG_REG_START
;
2907 end_reg
= 4 * pkt
->count
+ start_reg
- 4;
2908 if ((start_reg
< PACKET3_SET_CONFIG_REG_START
) ||
2909 (start_reg
>= PACKET3_SET_CONFIG_REG_END
) ||
2910 (end_reg
>= PACKET3_SET_CONFIG_REG_END
)) {
2911 DRM_ERROR("bad PACKET3_SET_CONFIG_REG\n");
2914 for (i
= 0; i
< pkt
->count
; i
++) {
2915 reg
= start_reg
+ (4 * i
);
2916 if (!evergreen_vm_reg_valid(reg
))
2926 int evergreen_ib_parse(struct radeon_device
*rdev
, struct radeon_ib
*ib
)
2930 struct radeon_cs_packet pkt
;
2934 pkt
.type
= CP_PACKET_GET_TYPE(ib
->ptr
[idx
]);
2935 pkt
.count
= CP_PACKET_GET_COUNT(ib
->ptr
[idx
]);
2939 dev_err(rdev
->dev
, "Packet0 not allowed!\n");
2946 pkt
.opcode
= CP_PACKET3_GET_OPCODE(ib
->ptr
[idx
]);
2947 ret
= evergreen_vm_packet3_check(rdev
, ib
->ptr
, &pkt
);
2948 idx
+= pkt
.count
+ 2;
2951 dev_err(rdev
->dev
, "Unknown packet type %d !\n", pkt
.type
);
2957 } while (idx
< ib
->length_dw
);