2 * Copyright 2010 Advanced Micro Devices, Inc.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE COPYRIGHT HOLDER(S) AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
19 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
24 * Alex Deucher <alexander.deucher@amd.com>
29 #include "radeon_drm.h"
32 #include "evergreend.h"
33 #include "evergreen_blit_shaders.h"
35 #define DI_PT_RECTLIST 0x11
36 #define DI_INDEX_SIZE_16_BIT 0x0
37 #define DI_SRC_SEL_AUTO_INDEX 0x2
41 #define FMT_8_8_8_8 0x1a
43 #define COLOR_5_6_5 0x8
44 #define COLOR_8_8_8_8 0x1a
48 set_render_target(struct radeon_device
*rdev
, int format
,
49 int w
, int h
, u64 gpu_addr
)
58 cb_color_info
= ((format
<< 2) | (1 << 24) | (1 << 8));
60 slice
= ((w
* h
) / 64) - 1;
62 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONTEXT_REG
, 15));
63 radeon_ring_write(rdev
, (CB_COLOR0_BASE
- PACKET3_SET_CONTEXT_REG_START
) >> 2);
64 radeon_ring_write(rdev
, gpu_addr
>> 8);
65 radeon_ring_write(rdev
, pitch
);
66 radeon_ring_write(rdev
, slice
);
67 radeon_ring_write(rdev
, 0);
68 radeon_ring_write(rdev
, cb_color_info
);
69 radeon_ring_write(rdev
, (1 << 4));
70 radeon_ring_write(rdev
, (w
- 1) | ((h
- 1) << 16));
71 radeon_ring_write(rdev
, 0);
72 radeon_ring_write(rdev
, 0);
73 radeon_ring_write(rdev
, 0);
74 radeon_ring_write(rdev
, 0);
75 radeon_ring_write(rdev
, 0);
76 radeon_ring_write(rdev
, 0);
77 radeon_ring_write(rdev
, 0);
78 radeon_ring_write(rdev
, 0);
83 cp_set_surface_sync(struct radeon_device
*rdev
,
84 u32 sync_type
, u32 size
,
89 if (size
== 0xffffffff)
90 cp_coher_size
= 0xffffffff;
92 cp_coher_size
= ((size
+ 255) >> 8);
94 radeon_ring_write(rdev
, PACKET3(PACKET3_SURFACE_SYNC
, 3));
95 radeon_ring_write(rdev
, sync_type
);
96 radeon_ring_write(rdev
, cp_coher_size
);
97 radeon_ring_write(rdev
, mc_addr
>> 8);
98 radeon_ring_write(rdev
, 10); /* poll interval */
101 /* emits 11dw + 1 surface sync = 16dw */
103 set_shaders(struct radeon_device
*rdev
)
108 gpu_addr
= rdev
->r600_blit
.shader_gpu_addr
+ rdev
->r600_blit
.vs_offset
;
109 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONTEXT_REG
, 3));
110 radeon_ring_write(rdev
, (SQ_PGM_START_VS
- PACKET3_SET_CONTEXT_REG_START
) >> 2);
111 radeon_ring_write(rdev
, gpu_addr
>> 8);
112 radeon_ring_write(rdev
, 2);
113 radeon_ring_write(rdev
, 0);
116 gpu_addr
= rdev
->r600_blit
.shader_gpu_addr
+ rdev
->r600_blit
.ps_offset
;
117 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONTEXT_REG
, 4));
118 radeon_ring_write(rdev
, (SQ_PGM_START_PS
- PACKET3_SET_CONTEXT_REG_START
) >> 2);
119 radeon_ring_write(rdev
, gpu_addr
>> 8);
120 radeon_ring_write(rdev
, 1);
121 radeon_ring_write(rdev
, 0);
122 radeon_ring_write(rdev
, 2);
124 gpu_addr
= rdev
->r600_blit
.shader_gpu_addr
+ rdev
->r600_blit
.vs_offset
;
125 cp_set_surface_sync(rdev
, PACKET3_SH_ACTION_ENA
, 512, gpu_addr
);
128 /* emits 10 + 1 sync (5) = 15 */
130 set_vtx_resource(struct radeon_device
*rdev
, u64 gpu_addr
)
132 u32 sq_vtx_constant_word2
, sq_vtx_constant_word3
;
134 /* high addr, stride */
135 sq_vtx_constant_word2
= ((upper_32_bits(gpu_addr
) & 0xff) | (16 << 8));
137 sq_vtx_constant_word2
|= (2 << 30);
140 sq_vtx_constant_word3
= (0 << 3) | (1 << 6) | (2 << 9) | (3 << 12);
142 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_RESOURCE
, 8));
143 radeon_ring_write(rdev
, 0x580);
144 radeon_ring_write(rdev
, gpu_addr
& 0xffffffff);
145 radeon_ring_write(rdev
, 48 - 1); /* size */
146 radeon_ring_write(rdev
, sq_vtx_constant_word2
);
147 radeon_ring_write(rdev
, sq_vtx_constant_word3
);
148 radeon_ring_write(rdev
, 0);
149 radeon_ring_write(rdev
, 0);
150 radeon_ring_write(rdev
, 0);
151 radeon_ring_write(rdev
, SQ_TEX_VTX_VALID_BUFFER
<< 30);
153 if ((rdev
->family
== CHIP_CEDAR
) ||
154 (rdev
->family
== CHIP_PALM
) ||
155 (rdev
->family
== CHIP_CAICOS
))
156 cp_set_surface_sync(rdev
,
157 PACKET3_TC_ACTION_ENA
, 48, gpu_addr
);
159 cp_set_surface_sync(rdev
,
160 PACKET3_VC_ACTION_ENA
, 48, gpu_addr
);
166 set_tex_resource(struct radeon_device
*rdev
,
167 int format
, int w
, int h
, int pitch
,
170 u32 sq_tex_resource_word0
, sq_tex_resource_word1
;
171 u32 sq_tex_resource_word4
, sq_tex_resource_word7
;
176 sq_tex_resource_word0
= (1 << 0); /* 2D */
177 sq_tex_resource_word0
|= ((((pitch
>> 3) - 1) << 6) |
179 sq_tex_resource_word1
= ((h
- 1) << 0) | (1 << 28);
181 sq_tex_resource_word4
= (0 << 16) | (1 << 19) | (2 << 22) | (3 << 25);
183 sq_tex_resource_word7
= format
| (SQ_TEX_VTX_VALID_TEXTURE
<< 30);
185 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_RESOURCE
, 8));
186 radeon_ring_write(rdev
, 0);
187 radeon_ring_write(rdev
, sq_tex_resource_word0
);
188 radeon_ring_write(rdev
, sq_tex_resource_word1
);
189 radeon_ring_write(rdev
, gpu_addr
>> 8);
190 radeon_ring_write(rdev
, gpu_addr
>> 8);
191 radeon_ring_write(rdev
, sq_tex_resource_word4
);
192 radeon_ring_write(rdev
, 0);
193 radeon_ring_write(rdev
, 0);
194 radeon_ring_write(rdev
, sq_tex_resource_word7
);
199 set_scissors(struct radeon_device
*rdev
, int x1
, int y1
,
202 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
203 radeon_ring_write(rdev
, (PA_SC_SCREEN_SCISSOR_TL
- PACKET3_SET_CONTEXT_REG_START
) >> 2);
204 radeon_ring_write(rdev
, (x1
<< 0) | (y1
<< 16));
205 radeon_ring_write(rdev
, (x2
<< 0) | (y2
<< 16));
207 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
208 radeon_ring_write(rdev
, (PA_SC_GENERIC_SCISSOR_TL
- PACKET3_SET_CONTEXT_REG_START
) >> 2);
209 radeon_ring_write(rdev
, (x1
<< 0) | (y1
<< 16) | (1 << 31));
210 radeon_ring_write(rdev
, (x2
<< 0) | (y2
<< 16));
212 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONTEXT_REG
, 2));
213 radeon_ring_write(rdev
, (PA_SC_WINDOW_SCISSOR_TL
- PACKET3_SET_CONTEXT_REG_START
) >> 2);
214 radeon_ring_write(rdev
, (x1
<< 0) | (y1
<< 16) | (1 << 31));
215 radeon_ring_write(rdev
, (x2
<< 0) | (y2
<< 16));
220 draw_auto(struct radeon_device
*rdev
)
222 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
223 radeon_ring_write(rdev
, (VGT_PRIMITIVE_TYPE
- PACKET3_SET_CONFIG_REG_START
) >> 2);
224 radeon_ring_write(rdev
, DI_PT_RECTLIST
);
226 radeon_ring_write(rdev
, PACKET3(PACKET3_INDEX_TYPE
, 0));
227 radeon_ring_write(rdev
,
231 DI_INDEX_SIZE_16_BIT
);
233 radeon_ring_write(rdev
, PACKET3(PACKET3_NUM_INSTANCES
, 0));
234 radeon_ring_write(rdev
, 1);
236 radeon_ring_write(rdev
, PACKET3(PACKET3_DRAW_INDEX_AUTO
, 1));
237 radeon_ring_write(rdev
, 3);
238 radeon_ring_write(rdev
, DI_SRC_SEL_AUTO_INDEX
);
244 set_default_state(struct radeon_device
*rdev
)
246 u32 sq_config
, sq_gpr_resource_mgmt_1
, sq_gpr_resource_mgmt_2
, sq_gpr_resource_mgmt_3
;
247 u32 sq_thread_resource_mgmt
, sq_thread_resource_mgmt_2
;
248 u32 sq_stack_resource_mgmt_1
, sq_stack_resource_mgmt_2
, sq_stack_resource_mgmt_3
;
249 int num_ps_gprs
, num_vs_gprs
, num_temp_gprs
;
250 int num_gs_gprs
, num_es_gprs
, num_hs_gprs
, num_ls_gprs
;
251 int num_ps_threads
, num_vs_threads
, num_gs_threads
, num_es_threads
;
252 int num_hs_threads
, num_ls_threads
;
253 int num_ps_stack_entries
, num_vs_stack_entries
, num_gs_stack_entries
, num_es_stack_entries
;
254 int num_hs_stack_entries
, num_ls_stack_entries
;
258 switch (rdev
->family
) {
274 num_ps_stack_entries
= 42;
275 num_vs_stack_entries
= 42;
276 num_gs_stack_entries
= 42;
277 num_es_stack_entries
= 42;
278 num_hs_stack_entries
= 42;
279 num_ls_stack_entries
= 42;
289 num_ps_threads
= 128;
295 num_ps_stack_entries
= 42;
296 num_vs_stack_entries
= 42;
297 num_gs_stack_entries
= 42;
298 num_es_stack_entries
= 42;
299 num_hs_stack_entries
= 42;
300 num_ls_stack_entries
= 42;
310 num_ps_threads
= 128;
316 num_ps_stack_entries
= 85;
317 num_vs_stack_entries
= 85;
318 num_gs_stack_entries
= 85;
319 num_es_stack_entries
= 85;
320 num_hs_stack_entries
= 85;
321 num_ls_stack_entries
= 85;
332 num_ps_threads
= 128;
338 num_ps_stack_entries
= 85;
339 num_vs_stack_entries
= 85;
340 num_gs_stack_entries
= 85;
341 num_es_stack_entries
= 85;
342 num_hs_stack_entries
= 85;
343 num_ls_stack_entries
= 85;
359 num_ps_stack_entries
= 42;
360 num_vs_stack_entries
= 42;
361 num_gs_stack_entries
= 42;
362 num_es_stack_entries
= 42;
363 num_hs_stack_entries
= 42;
364 num_ls_stack_entries
= 42;
374 num_ps_threads
= 128;
380 num_ps_stack_entries
= 85;
381 num_vs_stack_entries
= 85;
382 num_gs_stack_entries
= 85;
383 num_es_stack_entries
= 85;
384 num_hs_stack_entries
= 85;
385 num_ls_stack_entries
= 85;
395 num_ps_threads
= 128;
401 num_ps_stack_entries
= 42;
402 num_vs_stack_entries
= 42;
403 num_gs_stack_entries
= 42;
404 num_es_stack_entries
= 42;
405 num_hs_stack_entries
= 42;
406 num_ls_stack_entries
= 42;
416 num_ps_threads
= 128;
422 num_ps_stack_entries
= 42;
423 num_vs_stack_entries
= 42;
424 num_gs_stack_entries
= 42;
425 num_es_stack_entries
= 42;
426 num_hs_stack_entries
= 42;
427 num_ls_stack_entries
= 42;
431 if ((rdev
->family
== CHIP_CEDAR
) ||
432 (rdev
->family
== CHIP_PALM
) ||
433 (rdev
->family
== CHIP_CAICOS
))
436 sq_config
= VC_ENABLE
;
438 sq_config
|= (EXPORT_SRC_C
|
447 sq_gpr_resource_mgmt_1
= (NUM_PS_GPRS(num_ps_gprs
) |
448 NUM_VS_GPRS(num_vs_gprs
) |
449 NUM_CLAUSE_TEMP_GPRS(num_temp_gprs
));
450 sq_gpr_resource_mgmt_2
= (NUM_GS_GPRS(num_gs_gprs
) |
451 NUM_ES_GPRS(num_es_gprs
));
452 sq_gpr_resource_mgmt_3
= (NUM_HS_GPRS(num_hs_gprs
) |
453 NUM_LS_GPRS(num_ls_gprs
));
454 sq_thread_resource_mgmt
= (NUM_PS_THREADS(num_ps_threads
) |
455 NUM_VS_THREADS(num_vs_threads
) |
456 NUM_GS_THREADS(num_gs_threads
) |
457 NUM_ES_THREADS(num_es_threads
));
458 sq_thread_resource_mgmt_2
= (NUM_HS_THREADS(num_hs_threads
) |
459 NUM_LS_THREADS(num_ls_threads
));
460 sq_stack_resource_mgmt_1
= (NUM_PS_STACK_ENTRIES(num_ps_stack_entries
) |
461 NUM_VS_STACK_ENTRIES(num_vs_stack_entries
));
462 sq_stack_resource_mgmt_2
= (NUM_GS_STACK_ENTRIES(num_gs_stack_entries
) |
463 NUM_ES_STACK_ENTRIES(num_es_stack_entries
));
464 sq_stack_resource_mgmt_3
= (NUM_HS_STACK_ENTRIES(num_hs_stack_entries
) |
465 NUM_LS_STACK_ENTRIES(num_ls_stack_entries
));
467 /* set clear context state */
468 radeon_ring_write(rdev
, PACKET3(PACKET3_CLEAR_STATE
, 0));
469 radeon_ring_write(rdev
, 0);
471 /* disable dyn gprs */
472 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONFIG_REG
, 1));
473 radeon_ring_write(rdev
, (SQ_DYN_GPR_CNTL_PS_FLUSH_REQ
- PACKET3_SET_CONFIG_REG_START
) >> 2);
474 radeon_ring_write(rdev
, 0);
477 radeon_ring_write(rdev
, PACKET3(PACKET3_SET_CONFIG_REG
, 11));
478 radeon_ring_write(rdev
, (SQ_CONFIG
- PACKET3_SET_CONFIG_REG_START
) >> 2);
479 radeon_ring_write(rdev
, sq_config
);
480 radeon_ring_write(rdev
, sq_gpr_resource_mgmt_1
);
481 radeon_ring_write(rdev
, sq_gpr_resource_mgmt_2
);
482 radeon_ring_write(rdev
, sq_gpr_resource_mgmt_3
);
483 radeon_ring_write(rdev
, 0);
484 radeon_ring_write(rdev
, 0);
485 radeon_ring_write(rdev
, sq_thread_resource_mgmt
);
486 radeon_ring_write(rdev
, sq_thread_resource_mgmt_2
);
487 radeon_ring_write(rdev
, sq_stack_resource_mgmt_1
);
488 radeon_ring_write(rdev
, sq_stack_resource_mgmt_2
);
489 radeon_ring_write(rdev
, sq_stack_resource_mgmt_3
);
491 /* CONTEXT_CONTROL */
492 radeon_ring_write(rdev
, 0xc0012800);
493 radeon_ring_write(rdev
, 0x80000000);
494 radeon_ring_write(rdev
, 0x80000000);
496 /* SQ_VTX_BASE_VTX_LOC */
497 radeon_ring_write(rdev
, 0xc0026f00);
498 radeon_ring_write(rdev
, 0x00000000);
499 radeon_ring_write(rdev
, 0x00000000);
500 radeon_ring_write(rdev
, 0x00000000);
503 radeon_ring_write(rdev
, 0xc0036e00);
504 radeon_ring_write(rdev
, 0x00000000);
505 radeon_ring_write(rdev
, 0x00000012);
506 radeon_ring_write(rdev
, 0x00000000);
507 radeon_ring_write(rdev
, 0x00000000);
509 /* set to DX10/11 mode */
510 radeon_ring_write(rdev
, PACKET3(PACKET3_MODE_CONTROL
, 0));
511 radeon_ring_write(rdev
, 1);
513 /* emit an IB pointing at default state */
514 dwords
= ALIGN(rdev
->r600_blit
.state_len
, 0x10);
515 gpu_addr
= rdev
->r600_blit
.shader_gpu_addr
+ rdev
->r600_blit
.state_offset
;
516 radeon_ring_write(rdev
, PACKET3(PACKET3_INDIRECT_BUFFER
, 2));
517 radeon_ring_write(rdev
, gpu_addr
& 0xFFFFFFFC);
518 radeon_ring_write(rdev
, upper_32_bits(gpu_addr
) & 0xFF);
519 radeon_ring_write(rdev
, dwords
);
523 static inline uint32_t i2f(uint32_t input
)
525 u32 result
, i
, exponent
, fraction
;
527 if ((input
& 0x3fff) == 0)
528 result
= 0; /* 0 is a special case */
530 exponent
= 140; /* exponent biased by 127; */
531 fraction
= (input
& 0x3fff) << 10; /* cheat and only
532 handle numbers below 2^^15 */
533 for (i
= 0; i
< 14; i
++) {
534 if (fraction
& 0x800000)
537 fraction
= fraction
<< 1; /* keep
538 shifting left until top bit = 1 */
539 exponent
= exponent
- 1;
542 result
= exponent
<< 23 | (fraction
& 0x7fffff); /* mask
543 off top bit; assumed 1 */
548 int evergreen_blit_init(struct radeon_device
*rdev
)
554 int num_packet2s
= 0;
556 /* pin copy shader into vram if already initialized */
557 if (rdev
->r600_blit
.shader_obj
)
560 mutex_init(&rdev
->r600_blit
.mutex
);
561 rdev
->r600_blit
.state_offset
= 0;
563 rdev
->r600_blit
.state_len
= evergreen_default_size
;
565 dwords
= rdev
->r600_blit
.state_len
;
566 while (dwords
& 0xf) {
567 packet2s
[num_packet2s
++] = cpu_to_le32(PACKET2(0));
571 obj_size
= dwords
* 4;
572 obj_size
= ALIGN(obj_size
, 256);
574 rdev
->r600_blit
.vs_offset
= obj_size
;
575 obj_size
+= evergreen_vs_size
* 4;
576 obj_size
= ALIGN(obj_size
, 256);
578 rdev
->r600_blit
.ps_offset
= obj_size
;
579 obj_size
+= evergreen_ps_size
* 4;
580 obj_size
= ALIGN(obj_size
, 256);
582 r
= radeon_bo_create(rdev
, obj_size
, PAGE_SIZE
, true, RADEON_GEM_DOMAIN_VRAM
,
583 &rdev
->r600_blit
.shader_obj
);
585 DRM_ERROR("evergreen failed to allocate shader\n");
589 DRM_DEBUG("evergreen blit allocated bo %08x vs %08x ps %08x\n",
591 rdev
->r600_blit
.vs_offset
, rdev
->r600_blit
.ps_offset
);
593 r
= radeon_bo_reserve(rdev
->r600_blit
.shader_obj
, false);
594 if (unlikely(r
!= 0))
596 r
= radeon_bo_kmap(rdev
->r600_blit
.shader_obj
, &ptr
);
598 DRM_ERROR("failed to map blit object %d\n", r
);
602 memcpy_toio(ptr
+ rdev
->r600_blit
.state_offset
,
603 evergreen_default_state
, rdev
->r600_blit
.state_len
* 4);
606 memcpy_toio(ptr
+ rdev
->r600_blit
.state_offset
+ (rdev
->r600_blit
.state_len
* 4),
607 packet2s
, num_packet2s
* 4);
608 for (i
= 0; i
< evergreen_vs_size
; i
++)
609 *(u32
*)((unsigned long)ptr
+ rdev
->r600_blit
.vs_offset
+ i
* 4) = cpu_to_le32(evergreen_vs
[i
]);
610 for (i
= 0; i
< evergreen_ps_size
; i
++)
611 *(u32
*)((unsigned long)ptr
+ rdev
->r600_blit
.ps_offset
+ i
* 4) = cpu_to_le32(evergreen_ps
[i
]);
612 radeon_bo_kunmap(rdev
->r600_blit
.shader_obj
);
613 radeon_bo_unreserve(rdev
->r600_blit
.shader_obj
);
616 r
= radeon_bo_reserve(rdev
->r600_blit
.shader_obj
, false);
617 if (unlikely(r
!= 0))
619 r
= radeon_bo_pin(rdev
->r600_blit
.shader_obj
, RADEON_GEM_DOMAIN_VRAM
,
620 &rdev
->r600_blit
.shader_gpu_addr
);
621 radeon_bo_unreserve(rdev
->r600_blit
.shader_obj
);
623 dev_err(rdev
->dev
, "(%d) pin blit object failed\n", r
);
626 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.real_vram_size
);
630 void evergreen_blit_fini(struct radeon_device
*rdev
)
634 radeon_ttm_set_active_vram_size(rdev
, rdev
->mc
.visible_vram_size
);
635 if (rdev
->r600_blit
.shader_obj
== NULL
)
637 /* If we can't reserve the bo, unref should be enough to destroy
638 * it when it becomes idle.
640 r
= radeon_bo_reserve(rdev
->r600_blit
.shader_obj
, false);
642 radeon_bo_unpin(rdev
->r600_blit
.shader_obj
);
643 radeon_bo_unreserve(rdev
->r600_blit
.shader_obj
);
645 radeon_bo_unref(&rdev
->r600_blit
.shader_obj
);
648 static int evergreen_vb_ib_get(struct radeon_device
*rdev
)
651 r
= radeon_ib_get(rdev
, &rdev
->r600_blit
.vb_ib
);
653 DRM_ERROR("failed to get IB for vertex buffer\n");
657 rdev
->r600_blit
.vb_total
= 64*1024;
658 rdev
->r600_blit
.vb_used
= 0;
662 static void evergreen_vb_ib_put(struct radeon_device
*rdev
)
664 radeon_fence_emit(rdev
, rdev
->r600_blit
.vb_ib
->fence
);
665 radeon_ib_free(rdev
, &rdev
->r600_blit
.vb_ib
);
668 int evergreen_blit_prepare_copy(struct radeon_device
*rdev
, int size_bytes
)
671 int ring_size
, line_size
;
673 /* loops of emits + fence emit possible */
674 int dwords_per_loop
= 74, num_loops
;
676 r
= evergreen_vb_ib_get(rdev
);
680 /* 8 bpp vs 32 bpp for xfer unit */
684 line_size
= 8192 * 4;
686 max_size
= 8192 * line_size
;
688 /* major loops cover the max size transfer */
689 num_loops
= ((size_bytes
+ max_size
) / max_size
);
690 /* minor loops cover the extra non aligned bits */
691 num_loops
+= ((size_bytes
% line_size
) ? 1 : 0);
692 /* calculate number of loops correctly */
693 ring_size
= num_loops
* dwords_per_loop
;
694 /* set default + shaders */
695 ring_size
+= 52; /* shaders + def state */
696 ring_size
+= 10; /* fence emit for VB IB */
697 ring_size
+= 5; /* done copy */
698 ring_size
+= 10; /* fence emit for done copy */
699 r
= radeon_ring_lock(rdev
, ring_size
);
703 set_default_state(rdev
); /* 36 */
704 set_shaders(rdev
); /* 16 */
708 void evergreen_blit_done_copy(struct radeon_device
*rdev
, struct radeon_fence
*fence
)
712 if (rdev
->r600_blit
.vb_ib
)
713 evergreen_vb_ib_put(rdev
);
716 r
= radeon_fence_emit(rdev
, fence
);
718 radeon_ring_unlock_commit(rdev
);
721 void evergreen_kms_blit_copy(struct radeon_device
*rdev
,
722 u64 src_gpu_addr
, u64 dst_gpu_addr
,
729 DRM_DEBUG("emitting copy %16llx %16llx %d %d\n", src_gpu_addr
, dst_gpu_addr
,
730 size_bytes
, rdev
->r600_blit
.vb_used
);
731 vb
= (u32
*)(rdev
->r600_blit
.vb_ib
->ptr
+ rdev
->r600_blit
.vb_used
);
732 if ((size_bytes
& 3) || (src_gpu_addr
& 3) || (dst_gpu_addr
& 3)) {
736 int cur_size
= size_bytes
;
737 int src_x
= src_gpu_addr
& 255;
738 int dst_x
= dst_gpu_addr
& 255;
740 src_gpu_addr
= src_gpu_addr
& ~255ULL;
741 dst_gpu_addr
= dst_gpu_addr
& ~255ULL;
743 if (!src_x
&& !dst_x
) {
744 h
= (cur_size
/ max_bytes
);
750 cur_size
= max_bytes
;
752 if (cur_size
> max_bytes
)
753 cur_size
= max_bytes
;
754 if (cur_size
> (max_bytes
- dst_x
))
755 cur_size
= (max_bytes
- dst_x
);
756 if (cur_size
> (max_bytes
- src_x
))
757 cur_size
= (max_bytes
- src_x
);
760 if ((rdev
->r600_blit
.vb_used
+ 48) > rdev
->r600_blit
.vb_total
) {
774 vb
[8] = i2f(dst_x
+ cur_size
);
776 vb
[10] = i2f(src_x
+ cur_size
);
780 set_tex_resource(rdev
, FMT_8
,
781 src_x
+ cur_size
, h
, src_x
+ cur_size
,
785 cp_set_surface_sync(rdev
,
786 PACKET3_TC_ACTION_ENA
, (src_x
+ cur_size
* h
), src_gpu_addr
);
790 set_render_target(rdev
, COLOR_8
,
795 set_scissors(rdev
, dst_x
, 0, dst_x
+ cur_size
, h
);
798 vb_gpu_addr
= rdev
->r600_blit
.vb_ib
->gpu_addr
+ rdev
->r600_blit
.vb_used
;
799 set_vtx_resource(rdev
, vb_gpu_addr
);
805 cp_set_surface_sync(rdev
,
806 PACKET3_CB_ACTION_ENA
| PACKET3_CB0_DEST_BASE_ENA
,
807 cur_size
* h
, dst_gpu_addr
);
810 rdev
->r600_blit
.vb_used
+= 12 * 4;
812 src_gpu_addr
+= cur_size
* h
;
813 dst_gpu_addr
+= cur_size
* h
;
814 size_bytes
-= cur_size
* h
;
817 max_bytes
= 8192 * 4;
820 int cur_size
= size_bytes
;
821 int src_x
= (src_gpu_addr
& 255);
822 int dst_x
= (dst_gpu_addr
& 255);
824 src_gpu_addr
= src_gpu_addr
& ~255ULL;
825 dst_gpu_addr
= dst_gpu_addr
& ~255ULL;
827 if (!src_x
&& !dst_x
) {
828 h
= (cur_size
/ max_bytes
);
834 cur_size
= max_bytes
;
836 if (cur_size
> max_bytes
)
837 cur_size
= max_bytes
;
838 if (cur_size
> (max_bytes
- dst_x
))
839 cur_size
= (max_bytes
- dst_x
);
840 if (cur_size
> (max_bytes
- src_x
))
841 cur_size
= (max_bytes
- src_x
);
844 if ((rdev
->r600_blit
.vb_used
+ 48) > rdev
->r600_blit
.vb_total
) {
848 vb
[0] = i2f(dst_x
/ 4);
850 vb
[2] = i2f(src_x
/ 4);
853 vb
[4] = i2f(dst_x
/ 4);
855 vb
[6] = i2f(src_x
/ 4);
858 vb
[8] = i2f((dst_x
+ cur_size
) / 4);
860 vb
[10] = i2f((src_x
+ cur_size
) / 4);
864 set_tex_resource(rdev
, FMT_8_8_8_8
,
865 (src_x
+ cur_size
) / 4,
866 h
, (src_x
+ cur_size
) / 4,
869 cp_set_surface_sync(rdev
,
870 PACKET3_TC_ACTION_ENA
, (src_x
+ cur_size
* h
), src_gpu_addr
);
873 set_render_target(rdev
, COLOR_8_8_8_8
,
874 (dst_x
+ cur_size
) / 4, h
,
878 set_scissors(rdev
, (dst_x
/ 4), 0, (dst_x
+ cur_size
/ 4), h
);
880 /* Vertex buffer setup 15 */
881 vb_gpu_addr
= rdev
->r600_blit
.vb_ib
->gpu_addr
+ rdev
->r600_blit
.vb_used
;
882 set_vtx_resource(rdev
, vb_gpu_addr
);
888 cp_set_surface_sync(rdev
,
889 PACKET3_CB_ACTION_ENA
| PACKET3_CB0_DEST_BASE_ENA
,
890 cur_size
* h
, dst_gpu_addr
);
892 /* 74 ring dwords per loop */
894 rdev
->r600_blit
.vb_used
+= 12 * 4;
896 src_gpu_addr
+= cur_size
* h
;
897 dst_gpu_addr
+= cur_size
* h
;
898 size_bytes
-= cur_size
* h
;