1 // SPDX-License-Identifier: MIT
3 * Copyright © 2019 Intel Corporation
7 #include "gt/intel_context.h"
8 #include "gt/intel_engine_pm.h"
9 #include "gt/intel_gt.h"
10 #include "gt/intel_gt_buffer_pool.h"
11 #include "gt/intel_ring.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_object_blt.h"
15 struct i915_vma
*intel_emit_vma_fill_blt(struct intel_context
*ce
,
17 struct i915_gem_ww_ctx
*ww
,
20 struct drm_i915_private
*i915
= ce
->vm
->i915
;
21 const u32 block_size
= SZ_8M
; /* ~1ms at 8GiB/s preemption delay */
22 struct intel_gt_buffer_pool_node
*pool
;
23 struct i915_vma
*batch
;
31 GEM_BUG_ON(intel_engine_is_virtual(ce
->engine
));
32 intel_engine_pm_get(ce
->engine
);
34 count
= div_u64(round_up(vma
->size
, block_size
), block_size
);
35 size
= (1 + 8 * count
) * sizeof(u32
);
36 size
= round_up(size
, PAGE_SIZE
);
37 pool
= intel_gt_get_buffer_pool(ce
->engine
->gt
, size
);
43 err
= i915_gem_object_lock(pool
->obj
, ww
);
47 batch
= i915_vma_instance(pool
->obj
, ce
->vm
, NULL
);
53 err
= i915_vma_pin_ww(batch
, ww
, 0, 0, PIN_USER
);
57 cmd
= i915_gem_object_pin_map(pool
->obj
, I915_MAP_WC
);
64 offset
= vma
->node
.start
;
67 u32 size
= min_t(u64
, rem
, block_size
);
69 GEM_BUG_ON(size
>> PAGE_SHIFT
> S16_MAX
);
71 if (INTEL_GEN(i915
) >= 8) {
72 *cmd
++ = XY_COLOR_BLT_CMD
| BLT_WRITE_RGBA
| (7 - 2);
73 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_COLOR_COPY
| PAGE_SIZE
;
75 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
76 *cmd
++ = lower_32_bits(offset
);
77 *cmd
++ = upper_32_bits(offset
);
80 *cmd
++ = XY_COLOR_BLT_CMD
| BLT_WRITE_RGBA
| (6 - 2);
81 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_COLOR_COPY
| PAGE_SIZE
;
83 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
88 /* Allow ourselves to be preempted in between blocks. */
89 *cmd
++ = MI_ARB_CHECK
;
95 *cmd
= MI_BATCH_BUFFER_END
;
97 i915_gem_object_flush_map(pool
->obj
);
98 i915_gem_object_unpin_map(pool
->obj
);
100 intel_gt_chipset_flush(ce
->vm
->gt
);
102 batch
->private = pool
;
106 i915_vma_unpin(batch
);
108 intel_gt_buffer_pool_put(pool
);
110 intel_engine_pm_put(ce
->engine
);
114 int intel_emit_vma_mark_active(struct i915_vma
*vma
, struct i915_request
*rq
)
118 err
= i915_request_await_object(rq
, vma
->obj
, false);
120 err
= i915_vma_move_to_active(vma
, rq
, 0);
124 return intel_gt_buffer_pool_mark_active(vma
->private, rq
);
127 void intel_emit_vma_release(struct intel_context
*ce
, struct i915_vma
*vma
)
130 intel_gt_buffer_pool_put(vma
->private);
131 intel_engine_pm_put(ce
->engine
);
135 move_obj_to_gpu(struct drm_i915_gem_object
*obj
,
136 struct i915_request
*rq
,
139 if (obj
->cache_dirty
& ~obj
->cache_coherent
)
140 i915_gem_clflush_object(obj
, 0);
142 return i915_request_await_object(rq
, obj
, write
);
145 int i915_gem_object_fill_blt(struct drm_i915_gem_object
*obj
,
146 struct intel_context
*ce
,
149 struct i915_gem_ww_ctx ww
;
150 struct i915_request
*rq
;
151 struct i915_vma
*batch
;
152 struct i915_vma
*vma
;
155 vma
= i915_vma_instance(obj
, ce
->vm
, NULL
);
159 i915_gem_ww_ctx_init(&ww
, true);
160 intel_engine_pm_get(ce
->engine
);
162 err
= i915_gem_object_lock(obj
, &ww
);
166 err
= intel_context_pin_ww(ce
, &ww
);
170 err
= i915_vma_pin_ww(vma
, &ww
, 0, 0, PIN_USER
);
174 batch
= intel_emit_vma_fill_blt(ce
, vma
, &ww
, value
);
176 err
= PTR_ERR(batch
);
180 rq
= i915_request_create(ce
);
186 err
= intel_emit_vma_mark_active(batch
, rq
);
190 err
= move_obj_to_gpu(vma
->obj
, rq
, true);
192 err
= i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
196 if (ce
->engine
->emit_init_breadcrumb
)
197 err
= ce
->engine
->emit_init_breadcrumb(rq
);
200 err
= ce
->engine
->emit_bb_start(rq
,
206 i915_request_set_error_once(rq
, err
);
208 i915_request_add(rq
);
210 intel_emit_vma_release(ce
, batch
);
214 intel_context_unpin(ce
);
216 if (err
== -EDEADLK
) {
217 err
= i915_gem_ww_ctx_backoff(&ww
);
221 i915_gem_ww_ctx_fini(&ww
);
222 intel_engine_pm_put(ce
->engine
);
226 /* Wa_1209644611:icl,ehl */
227 static bool wa_1209644611_applies(struct drm_i915_private
*i915
, u32 size
)
229 u32 height
= size
>> PAGE_SHIFT
;
231 if (!IS_GEN(i915
, 11))
234 return height
% 4 == 3 && height
<= 8;
237 struct i915_vma
*intel_emit_vma_copy_blt(struct intel_context
*ce
,
238 struct i915_gem_ww_ctx
*ww
,
239 struct i915_vma
*src
,
240 struct i915_vma
*dst
)
242 struct drm_i915_private
*i915
= ce
->vm
->i915
;
243 const u32 block_size
= SZ_8M
; /* ~1ms at 8GiB/s preemption delay */
244 struct intel_gt_buffer_pool_node
*pool
;
245 struct i915_vma
*batch
;
246 u64 src_offset
, dst_offset
;
251 GEM_BUG_ON(src
->size
!= dst
->size
);
253 GEM_BUG_ON(intel_engine_is_virtual(ce
->engine
));
254 intel_engine_pm_get(ce
->engine
);
256 count
= div_u64(round_up(dst
->size
, block_size
), block_size
);
257 size
= (1 + 11 * count
) * sizeof(u32
);
258 size
= round_up(size
, PAGE_SIZE
);
259 pool
= intel_gt_get_buffer_pool(ce
->engine
->gt
, size
);
265 err
= i915_gem_object_lock(pool
->obj
, ww
);
269 batch
= i915_vma_instance(pool
->obj
, ce
->vm
, NULL
);
271 err
= PTR_ERR(batch
);
275 err
= i915_vma_pin_ww(batch
, ww
, 0, 0, PIN_USER
);
279 cmd
= i915_gem_object_pin_map(pool
->obj
, I915_MAP_WC
);
286 src_offset
= src
->node
.start
;
287 dst_offset
= dst
->node
.start
;
290 size
= min_t(u64
, rem
, block_size
);
291 GEM_BUG_ON(size
>> PAGE_SHIFT
> S16_MAX
);
293 if (INTEL_GEN(i915
) >= 9 &&
294 !wa_1209644611_applies(i915
, size
)) {
295 *cmd
++ = GEN9_XY_FAST_COPY_BLT_CMD
| (10 - 2);
296 *cmd
++ = BLT_DEPTH_32
| PAGE_SIZE
;
298 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
299 *cmd
++ = lower_32_bits(dst_offset
);
300 *cmd
++ = upper_32_bits(dst_offset
);
303 *cmd
++ = lower_32_bits(src_offset
);
304 *cmd
++ = upper_32_bits(src_offset
);
305 } else if (INTEL_GEN(i915
) >= 8) {
306 *cmd
++ = XY_SRC_COPY_BLT_CMD
| BLT_WRITE_RGBA
| (10 - 2);
307 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_SRC_COPY
| PAGE_SIZE
;
309 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
310 *cmd
++ = lower_32_bits(dst_offset
);
311 *cmd
++ = upper_32_bits(dst_offset
);
314 *cmd
++ = lower_32_bits(src_offset
);
315 *cmd
++ = upper_32_bits(src_offset
);
317 *cmd
++ = SRC_COPY_BLT_CMD
| BLT_WRITE_RGBA
| (6 - 2);
318 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_SRC_COPY
| PAGE_SIZE
;
319 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
;
325 /* Allow ourselves to be preempted in between blocks. */
326 *cmd
++ = MI_ARB_CHECK
;
333 *cmd
= MI_BATCH_BUFFER_END
;
335 i915_gem_object_flush_map(pool
->obj
);
336 i915_gem_object_unpin_map(pool
->obj
);
338 intel_gt_chipset_flush(ce
->vm
->gt
);
339 batch
->private = pool
;
343 i915_vma_unpin(batch
);
345 intel_gt_buffer_pool_put(pool
);
347 intel_engine_pm_put(ce
->engine
);
351 int i915_gem_object_copy_blt(struct drm_i915_gem_object
*src
,
352 struct drm_i915_gem_object
*dst
,
353 struct intel_context
*ce
)
355 struct i915_address_space
*vm
= ce
->vm
;
356 struct i915_vma
*vma
[2], *batch
;
357 struct i915_gem_ww_ctx ww
;
358 struct i915_request
*rq
;
361 vma
[0] = i915_vma_instance(src
, vm
, NULL
);
363 return PTR_ERR(vma
[0]);
365 vma
[1] = i915_vma_instance(dst
, vm
, NULL
);
367 return PTR_ERR(vma
[1]);
369 i915_gem_ww_ctx_init(&ww
, true);
370 intel_engine_pm_get(ce
->engine
);
372 err
= i915_gem_object_lock(src
, &ww
);
374 err
= i915_gem_object_lock(dst
, &ww
);
376 err
= intel_context_pin_ww(ce
, &ww
);
380 err
= i915_vma_pin_ww(vma
[0], &ww
, 0, 0, PIN_USER
);
384 err
= i915_vma_pin_ww(vma
[1], &ww
, 0, 0, PIN_USER
);
388 batch
= intel_emit_vma_copy_blt(ce
, &ww
, vma
[0], vma
[1]);
390 err
= PTR_ERR(batch
);
394 rq
= i915_request_create(ce
);
400 err
= intel_emit_vma_mark_active(batch
, rq
);
404 for (i
= 0; i
< ARRAY_SIZE(vma
); i
++) {
405 err
= move_obj_to_gpu(vma
[i
]->obj
, rq
, i
);
410 for (i
= 0; i
< ARRAY_SIZE(vma
); i
++) {
411 unsigned int flags
= i
? EXEC_OBJECT_WRITE
: 0;
413 err
= i915_vma_move_to_active(vma
[i
], rq
, flags
);
418 if (rq
->engine
->emit_init_breadcrumb
) {
419 err
= rq
->engine
->emit_init_breadcrumb(rq
);
424 err
= rq
->engine
->emit_bb_start(rq
,
425 batch
->node
.start
, batch
->node
.size
,
430 i915_request_set_error_once(rq
, err
);
432 i915_request_add(rq
);
434 intel_emit_vma_release(ce
, batch
);
436 i915_vma_unpin(vma
[1]);
438 i915_vma_unpin(vma
[0]);
440 intel_context_unpin(ce
);
442 if (err
== -EDEADLK
) {
443 err
= i915_gem_ww_ctx_backoff(&ww
);
447 i915_gem_ww_ctx_fini(&ww
);
448 intel_engine_pm_put(ce
->engine
);
452 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
453 #include "selftests/i915_gem_object_blt.c"