2 * Copyright © 2008 Jérôme Glisse
5 * Permission is hereby granted, free of charge, to any person obtaining
6 * a copy of this software and associated documentation files (the
7 * "Software"), to deal in the Software without restriction, including
8 * without limitation the rights to use, copy, modify, merge, publish,
9 * distribute, sub license, and/or sell copies of the Software, and to
10 * permit persons to whom the Software is furnished to do so, subject to
11 * the following conditions:
13 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
14 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
15 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
16 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS
17 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
19 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
20 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22 * The above copyright notice and this permission notice (including the
23 * next paragraph) shall be included in all copies or substantial portions
28 * Aapo Tahkola <aet@rasterburn.org>
29 * Nicolai Haehnle <prefect_@gmx.net>
30 * Jérôme Glisse <glisse@freedesktop.org>
37 #include <sys/ioctl.h>
38 #include "radeon_cs.h"
39 #include "radeon_cs_int.h"
40 #include "radeon_bo_int.h"
41 #include "radeon_cs_gem.h"
42 #include "radeon_bo_gem.h"
44 #include "libdrm_macros.h"
46 #include "xf86atomic.h"
47 #include "radeon_drm.h"
49 /* Add LIBDRM_RADEON_BOF_FILES to libdrm_radeon_la_SOURCES when building with BOF_DUMP */
55 struct radeon_cs_manager_gem
{
56 struct radeon_cs_manager base
;
65 uint32_t write_domain
;
70 #define RELOC_SIZE (sizeof(struct cs_reloc_gem) / sizeof(uint32_t))
73 struct radeon_cs_int base
;
74 struct drm_radeon_cs cs
;
75 struct drm_radeon_cs_chunk chunks
[2];
78 struct radeon_bo_int
**relocs_bo
;
81 static pthread_mutex_t id_mutex
= PTHREAD_MUTEX_INITIALIZER
;
82 static uint32_t cs_id_source
= 0;
85 * result is undefined if called with ~0
87 static uint32_t get_first_zero(const uint32_t n
)
89 /* __builtin_ctz returns number of trailing zeros. */
90 return 1 << __builtin_ctz(~n
);
94 * Returns a free id for cs.
95 * If there is no free id we return zero
97 static uint32_t generate_id(void)
100 pthread_mutex_lock( &id_mutex
);
101 /* check for free ids */
102 if (cs_id_source
!= ~r
) {
103 /* find first zero bit */
104 r
= get_first_zero(cs_id_source
);
106 /* set id as reserved */
109 pthread_mutex_unlock( &id_mutex
);
114 * Free the id for later reuse
116 static void free_id(uint32_t id
)
118 pthread_mutex_lock( &id_mutex
);
122 pthread_mutex_unlock( &id_mutex
);
125 static struct radeon_cs_int
*cs_gem_create(struct radeon_cs_manager
*csm
,
130 /* max cmd buffer size is 64Kb */
131 if (ndw
> (64 * 1024 / 4)) {
134 csg
= (struct cs_gem
*)calloc(1, sizeof(struct cs_gem
));
139 csg
->base
.ndw
= 64 * 1024 / 4;
140 csg
->base
.packets
= (uint32_t*)calloc(1, 64 * 1024);
141 if (csg
->base
.packets
== NULL
) {
145 csg
->base
.relocs_total_size
= 0;
146 csg
->base
.crelocs
= 0;
147 csg
->base
.id
= generate_id();
148 csg
->nrelocs
= 4096 / (4 * 4) ;
149 csg
->relocs_bo
= (struct radeon_bo_int
**)calloc(1,
150 csg
->nrelocs
*sizeof(void*));
151 if (csg
->relocs_bo
== NULL
) {
152 free(csg
->base
.packets
);
156 csg
->base
.relocs
= csg
->relocs
= (uint32_t*)calloc(1, 4096);
157 if (csg
->relocs
== NULL
) {
158 free(csg
->relocs_bo
);
159 free(csg
->base
.packets
);
163 csg
->chunks
[0].chunk_id
= RADEON_CHUNK_ID_IB
;
164 csg
->chunks
[0].length_dw
= 0;
165 csg
->chunks
[0].chunk_data
= (uint64_t)(uintptr_t)csg
->base
.packets
;
166 csg
->chunks
[1].chunk_id
= RADEON_CHUNK_ID_RELOCS
;
167 csg
->chunks
[1].length_dw
= 0;
168 csg
->chunks
[1].chunk_data
= (uint64_t)(uintptr_t)csg
->relocs
;
169 return (struct radeon_cs_int
*)csg
;
172 static int cs_gem_write_reloc(struct radeon_cs_int
*cs
,
173 struct radeon_bo
*bo
,
174 uint32_t read_domain
,
175 uint32_t write_domain
,
178 struct radeon_bo_int
*boi
= (struct radeon_bo_int
*)bo
;
179 struct cs_gem
*csg
= (struct cs_gem
*)cs
;
180 struct cs_reloc_gem
*reloc
;
184 assert(boi
->space_accounted
);
187 if ((read_domain
&& write_domain
) || (!read_domain
&& !write_domain
)) {
188 /* in one CS a bo can only be in read or write domain but not
189 * in read & write domain at the same time
193 if (read_domain
== RADEON_GEM_DOMAIN_CPU
) {
196 if (write_domain
== RADEON_GEM_DOMAIN_CPU
) {
199 /* use bit field hash function to determine
200 if this bo is for sure not in this cs.*/
201 if ((atomic_read((atomic_t
*)radeon_gem_get_reloc_in_cs(bo
)) & cs
->id
)) {
202 /* check if bo is already referenced.
203 * Scanning from end to begin reduces cycles with mesa because
204 * it often relocates same shared dma bo again. */
205 for(i
= cs
->crelocs
; i
!= 0;) {
207 idx
= i
* RELOC_SIZE
;
208 reloc
= (struct cs_reloc_gem
*)&csg
->relocs
[idx
];
209 if (reloc
->handle
== bo
->handle
) {
210 /* Check domains must be in read or write. As we check already
211 * checked that in argument one of the read or write domain was
212 * set we only need to check that if previous reloc as the read
213 * domain set then the read_domain should also be set for this
216 /* the DDX expects to read and write from same pixmap */
217 if (write_domain
&& (reloc
->read_domain
& write_domain
)) {
218 reloc
->read_domain
= 0;
219 reloc
->write_domain
= write_domain
;
220 } else if (read_domain
& reloc
->write_domain
) {
221 reloc
->read_domain
= 0;
223 if (write_domain
!= reloc
->write_domain
)
225 if (read_domain
!= reloc
->read_domain
)
229 reloc
->read_domain
|= read_domain
;
230 reloc
->write_domain
|= write_domain
;
232 reloc
->flags
|= (flags
& reloc
->flags
);
233 /* write relocation packet */
234 radeon_cs_write_dword((struct radeon_cs
*)cs
, 0xc0001000);
235 radeon_cs_write_dword((struct radeon_cs
*)cs
, idx
);
241 if (csg
->base
.crelocs
>= csg
->nrelocs
) {
242 /* allocate more memory (TODO: should use a slab allocator maybe) */
244 size
= ((csg
->nrelocs
+ 1) * sizeof(struct radeon_bo
*));
245 tmp
= (uint32_t*)realloc(csg
->relocs_bo
, size
);
249 csg
->relocs_bo
= (struct radeon_bo_int
**)tmp
;
250 size
= ((csg
->nrelocs
+ 1) * RELOC_SIZE
* 4);
251 tmp
= (uint32_t*)realloc(csg
->relocs
, size
);
255 cs
->relocs
= csg
->relocs
= tmp
;
257 csg
->chunks
[1].chunk_data
= (uint64_t)(uintptr_t)csg
->relocs
;
259 csg
->relocs_bo
[csg
->base
.crelocs
] = boi
;
260 idx
= (csg
->base
.crelocs
++) * RELOC_SIZE
;
261 reloc
= (struct cs_reloc_gem
*)&csg
->relocs
[idx
];
262 reloc
->handle
= bo
->handle
;
263 reloc
->read_domain
= read_domain
;
264 reloc
->write_domain
= write_domain
;
265 reloc
->flags
= flags
;
266 csg
->chunks
[1].length_dw
+= RELOC_SIZE
;
268 /* bo might be referenced from another context so have to use atomic operations */
269 atomic_add((atomic_t
*)radeon_gem_get_reloc_in_cs(bo
), cs
->id
);
270 cs
->relocs_total_size
+= boi
->size
;
271 radeon_cs_write_dword((struct radeon_cs
*)cs
, 0xc0001000);
272 radeon_cs_write_dword((struct radeon_cs
*)cs
, idx
);
276 static int cs_gem_begin(struct radeon_cs_int
*cs
,
283 if (cs
->section_ndw
) {
284 fprintf(stderr
, "CS already in a section(%s,%s,%d)\n",
285 cs
->section_file
, cs
->section_func
, cs
->section_line
);
286 fprintf(stderr
, "CS can't start section(%s,%s,%d)\n",
290 cs
->section_ndw
= ndw
;
292 cs
->section_file
= file
;
293 cs
->section_func
= func
;
294 cs
->section_line
= line
;
296 if (cs
->cdw
+ ndw
> cs
->ndw
) {
299 /* round up the required size to a multiple of 1024 */
300 tmp
= (cs
->cdw
+ ndw
+ 0x3FF) & (~0x3FF);
301 ptr
= (uint32_t*)realloc(cs
->packets
, 4 * tmp
);
311 static int cs_gem_end(struct radeon_cs_int
*cs
,
317 if (!cs
->section_ndw
) {
318 fprintf(stderr
, "CS no section to end at (%s,%s,%d)\n",
322 if (cs
->section_ndw
!= cs
->section_cdw
) {
323 fprintf(stderr
, "CS section size mismatch start at (%s,%s,%d) %d vs %d\n",
324 cs
->section_file
, cs
->section_func
, cs
->section_line
, cs
->section_ndw
, cs
->section_cdw
);
325 fprintf(stderr
, "CS section end at (%s,%s,%d)\n",
328 /* We must reset the section even when there is error. */
337 static void cs_gem_dump_bof(struct radeon_cs_int
*cs
)
339 struct cs_gem
*csg
= (struct cs_gem
*)cs
;
340 struct radeon_cs_manager_gem
*csm
;
341 bof_t
*bcs
, *blob
, *array
, *bo
, *size
, *handle
, *device_id
, *root
;
345 csm
= (struct radeon_cs_manager_gem
*)cs
->csm
;
346 root
= device_id
= bcs
= blob
= array
= bo
= size
= handle
= NULL
;
350 device_id
= bof_int32(csm
->device_id
);
351 if (device_id
== NULL
)
353 if (bof_object_set(root
, "device_id", device_id
))
355 bof_decref(device_id
);
358 blob
= bof_blob(csg
->nrelocs
* 16, csg
->relocs
);
361 if (bof_object_set(root
, "reloc", blob
))
366 blob
= bof_blob(cs
->cdw
* 4, cs
->packets
);
369 if (bof_object_set(root
, "pm4", blob
))
377 for (i
= 0; i
< csg
->base
.crelocs
; i
++) {
381 size
= bof_int32(csg
->relocs_bo
[i
]->size
);
384 if (bof_object_set(bo
, "size", size
))
388 handle
= bof_int32(csg
->relocs_bo
[i
]->handle
);
391 if (bof_object_set(bo
, "handle", handle
))
395 radeon_bo_map((struct radeon_bo
*)csg
->relocs_bo
[i
], 0);
396 blob
= bof_blob(csg
->relocs_bo
[i
]->size
, csg
->relocs_bo
[i
]->ptr
);
397 radeon_bo_unmap((struct radeon_bo
*)csg
->relocs_bo
[i
]);
400 if (bof_object_set(bo
, "data", blob
))
404 if (bof_array_append(array
, bo
))
409 if (bof_object_set(root
, "bo", array
))
411 sprintf(tmp
, "d-0x%04X-%08d.bof", csm
->device_id
, csm
->nbof
++);
412 bof_dump_file(root
, tmp
);
419 bof_decref(device_id
);
424 static int cs_gem_emit(struct radeon_cs_int
*cs
)
426 struct cs_gem
*csg
= (struct cs_gem
*)cs
;
427 uint64_t chunk_array
[2];
432 radeon_cs_write_dword((struct radeon_cs
*)cs
, 0x80000000);
437 csg
->chunks
[0].length_dw
= cs
->cdw
;
439 chunk_array
[0] = (uint64_t)(uintptr_t)&csg
->chunks
[0];
440 chunk_array
[1] = (uint64_t)(uintptr_t)&csg
->chunks
[1];
442 csg
->cs
.num_chunks
= 2;
443 csg
->cs
.chunks
= (uint64_t)(uintptr_t)chunk_array
;
445 r
= drmCommandWriteRead(cs
->csm
->fd
, DRM_RADEON_CS
,
446 &csg
->cs
, sizeof(struct drm_radeon_cs
));
447 for (i
= 0; i
< csg
->base
.crelocs
; i
++) {
448 csg
->relocs_bo
[i
]->space_accounted
= 0;
449 /* bo might be referenced from another context so have to use atomic operations */
450 atomic_dec((atomic_t
*)radeon_gem_get_reloc_in_cs((struct radeon_bo
*)csg
->relocs_bo
[i
]), cs
->id
);
451 radeon_bo_unref((struct radeon_bo
*)csg
->relocs_bo
[i
]);
452 csg
->relocs_bo
[i
] = NULL
;
455 cs
->csm
->read_used
= 0;
456 cs
->csm
->vram_write_used
= 0;
457 cs
->csm
->gart_write_used
= 0;
461 static int cs_gem_destroy(struct radeon_cs_int
*cs
)
463 struct cs_gem
*csg
= (struct cs_gem
*)cs
;
466 free(csg
->relocs_bo
);
473 static int cs_gem_erase(struct radeon_cs_int
*cs
)
475 struct cs_gem
*csg
= (struct cs_gem
*)cs
;
478 if (csg
->relocs_bo
) {
479 for (i
= 0; i
< csg
->base
.crelocs
; i
++) {
480 if (csg
->relocs_bo
[i
]) {
481 /* bo might be referenced from another context so have to use atomic operations */
482 atomic_dec((atomic_t
*)radeon_gem_get_reloc_in_cs((struct radeon_bo
*)csg
->relocs_bo
[i
]), cs
->id
);
483 radeon_bo_unref((struct radeon_bo
*)csg
->relocs_bo
[i
]);
484 csg
->relocs_bo
[i
] = NULL
;
488 cs
->relocs_total_size
= 0;
492 csg
->chunks
[0].length_dw
= 0;
493 csg
->chunks
[1].length_dw
= 0;
497 static int cs_gem_need_flush(struct radeon_cs_int
*cs
)
499 return 0; //(cs->relocs_total_size > (32*1024*1024));
502 static void cs_gem_print(struct radeon_cs_int
*cs
, FILE *file
)
504 struct radeon_cs_manager_gem
*csm
;
507 csm
= (struct radeon_cs_manager_gem
*)cs
->csm
;
508 fprintf(file
, "VENDORID:DEVICEID 0x%04X:0x%04X\n", 0x1002, csm
->device_id
);
509 for (i
= 0; i
< cs
->cdw
; i
++) {
510 fprintf(file
, "0x%08X\n", cs
->packets
[i
]);
514 static const struct radeon_cs_funcs radeon_cs_gem_funcs
= {
515 .cs_create
= cs_gem_create
,
516 .cs_write_reloc
= cs_gem_write_reloc
,
517 .cs_begin
= cs_gem_begin
,
518 .cs_end
= cs_gem_end
,
519 .cs_emit
= cs_gem_emit
,
520 .cs_destroy
= cs_gem_destroy
,
521 .cs_erase
= cs_gem_erase
,
522 .cs_need_flush
= cs_gem_need_flush
,
523 .cs_print
= cs_gem_print
,
526 static int radeon_get_device_id(int fd
, uint32_t *device_id
)
528 struct drm_radeon_info info
= {};
532 info
.request
= RADEON_INFO_DEVICE_ID
;
533 info
.value
= (uintptr_t)device_id
;
534 r
= drmCommandWriteRead(fd
, DRM_RADEON_INFO
, &info
,
535 sizeof(struct drm_radeon_info
));
539 drm_public
struct radeon_cs_manager
*radeon_cs_manager_gem_ctor(int fd
)
541 struct radeon_cs_manager_gem
*csm
;
543 csm
= calloc(1, sizeof(struct radeon_cs_manager_gem
));
547 csm
->base
.funcs
= &radeon_cs_gem_funcs
;
549 radeon_get_device_id(fd
, &csm
->device_id
);
553 drm_public
void radeon_cs_manager_gem_dtor(struct radeon_cs_manager
*csm
)