1 // SPDX-License-Identifier: MIT
3 * Copyright © 2019 Intel Corporation
7 #include "gt/intel_context.h"
8 #include "gt/intel_engine_pm.h"
9 #include "gt/intel_engine_pool.h"
10 #include "gt/intel_gt.h"
11 #include "gt/intel_ring.h"
12 #include "i915_gem_clflush.h"
13 #include "i915_gem_object_blt.h"
15 struct i915_vma
*intel_emit_vma_fill_blt(struct intel_context
*ce
,
19 struct drm_i915_private
*i915
= ce
->vm
->i915
;
20 const u32 block_size
= SZ_8M
; /* ~1ms at 8GiB/s preemption delay */
21 struct intel_engine_pool_node
*pool
;
22 struct i915_vma
*batch
;
30 GEM_BUG_ON(intel_engine_is_virtual(ce
->engine
));
31 intel_engine_pm_get(ce
->engine
);
33 count
= div_u64(round_up(vma
->size
, block_size
), block_size
);
34 size
= (1 + 8 * count
) * sizeof(u32
);
35 size
= round_up(size
, PAGE_SIZE
);
36 pool
= intel_engine_get_pool(ce
->engine
, size
);
42 cmd
= i915_gem_object_pin_map(pool
->obj
, I915_MAP_WC
);
49 offset
= vma
->node
.start
;
52 u32 size
= min_t(u64
, rem
, block_size
);
54 GEM_BUG_ON(size
>> PAGE_SHIFT
> S16_MAX
);
56 if (INTEL_GEN(i915
) >= 8) {
57 *cmd
++ = XY_COLOR_BLT_CMD
| BLT_WRITE_RGBA
| (7 - 2);
58 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_COLOR_COPY
| PAGE_SIZE
;
60 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
61 *cmd
++ = lower_32_bits(offset
);
62 *cmd
++ = upper_32_bits(offset
);
65 *cmd
++ = XY_COLOR_BLT_CMD
| BLT_WRITE_RGBA
| (6 - 2);
66 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_COLOR_COPY
| PAGE_SIZE
;
68 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
73 /* Allow ourselves to be preempted in between blocks. */
74 *cmd
++ = MI_ARB_CHECK
;
80 *cmd
= MI_BATCH_BUFFER_END
;
81 intel_gt_chipset_flush(ce
->vm
->gt
);
83 i915_gem_object_unpin_map(pool
->obj
);
85 batch
= i915_vma_instance(pool
->obj
, ce
->vm
, NULL
);
91 err
= i915_vma_pin(batch
, 0, 0, PIN_USER
);
95 batch
->private = pool
;
99 intel_engine_pool_put(pool
);
101 intel_engine_pm_put(ce
->engine
);
105 int intel_emit_vma_mark_active(struct i915_vma
*vma
, struct i915_request
*rq
)
110 err
= i915_request_await_object(rq
, vma
->obj
, false);
112 err
= i915_vma_move_to_active(vma
, rq
, 0);
113 i915_vma_unlock(vma
);
117 return intel_engine_pool_mark_active(vma
->private, rq
);
120 void intel_emit_vma_release(struct intel_context
*ce
, struct i915_vma
*vma
)
123 intel_engine_pool_put(vma
->private);
124 intel_engine_pm_put(ce
->engine
);
127 int i915_gem_object_fill_blt(struct drm_i915_gem_object
*obj
,
128 struct intel_context
*ce
,
131 struct i915_request
*rq
;
132 struct i915_vma
*batch
;
133 struct i915_vma
*vma
;
136 vma
= i915_vma_instance(obj
, ce
->vm
, NULL
);
140 err
= i915_vma_pin(vma
, 0, 0, PIN_USER
);
144 if (obj
->cache_dirty
& ~obj
->cache_coherent
) {
145 i915_gem_object_lock(obj
);
146 i915_gem_clflush_object(obj
, 0);
147 i915_gem_object_unlock(obj
);
150 batch
= intel_emit_vma_fill_blt(ce
, vma
, value
);
152 err
= PTR_ERR(batch
);
156 rq
= intel_context_create_request(ce
);
162 err
= intel_emit_vma_mark_active(batch
, rq
);
166 err
= i915_request_await_object(rq
, obj
, true);
170 if (ce
->engine
->emit_init_breadcrumb
) {
171 err
= ce
->engine
->emit_init_breadcrumb(rq
);
177 err
= i915_request_await_object(rq
, vma
->obj
, true);
179 err
= i915_vma_move_to_active(vma
, rq
, EXEC_OBJECT_WRITE
);
180 i915_vma_unlock(vma
);
184 err
= ce
->engine
->emit_bb_start(rq
,
185 batch
->node
.start
, batch
->node
.size
,
189 i915_request_skip(rq
, err
);
191 i915_request_add(rq
);
193 intel_emit_vma_release(ce
, batch
);
199 struct i915_vma
*intel_emit_vma_copy_blt(struct intel_context
*ce
,
200 struct i915_vma
*src
,
201 struct i915_vma
*dst
)
203 struct drm_i915_private
*i915
= ce
->vm
->i915
;
204 const u32 block_size
= SZ_8M
; /* ~1ms at 8GiB/s preemption delay */
205 struct intel_engine_pool_node
*pool
;
206 struct i915_vma
*batch
;
207 u64 src_offset
, dst_offset
;
212 GEM_BUG_ON(src
->size
!= dst
->size
);
214 GEM_BUG_ON(intel_engine_is_virtual(ce
->engine
));
215 intel_engine_pm_get(ce
->engine
);
217 count
= div_u64(round_up(dst
->size
, block_size
), block_size
);
218 size
= (1 + 11 * count
) * sizeof(u32
);
219 size
= round_up(size
, PAGE_SIZE
);
220 pool
= intel_engine_get_pool(ce
->engine
, size
);
226 cmd
= i915_gem_object_pin_map(pool
->obj
, I915_MAP_WC
);
233 src_offset
= src
->node
.start
;
234 dst_offset
= dst
->node
.start
;
237 size
= min_t(u64
, rem
, block_size
);
238 GEM_BUG_ON(size
>> PAGE_SHIFT
> S16_MAX
);
240 if (INTEL_GEN(i915
) >= 9) {
241 *cmd
++ = GEN9_XY_FAST_COPY_BLT_CMD
| (10 - 2);
242 *cmd
++ = BLT_DEPTH_32
| PAGE_SIZE
;
244 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
245 *cmd
++ = lower_32_bits(dst_offset
);
246 *cmd
++ = upper_32_bits(dst_offset
);
249 *cmd
++ = lower_32_bits(src_offset
);
250 *cmd
++ = upper_32_bits(src_offset
);
251 } else if (INTEL_GEN(i915
) >= 8) {
252 *cmd
++ = XY_SRC_COPY_BLT_CMD
| BLT_WRITE_RGBA
| (10 - 2);
253 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_SRC_COPY
| PAGE_SIZE
;
255 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
/ 4;
256 *cmd
++ = lower_32_bits(dst_offset
);
257 *cmd
++ = upper_32_bits(dst_offset
);
260 *cmd
++ = lower_32_bits(src_offset
);
261 *cmd
++ = upper_32_bits(src_offset
);
263 *cmd
++ = SRC_COPY_BLT_CMD
| BLT_WRITE_RGBA
| (6 - 2);
264 *cmd
++ = BLT_DEPTH_32
| BLT_ROP_SRC_COPY
| PAGE_SIZE
;
265 *cmd
++ = size
>> PAGE_SHIFT
<< 16 | PAGE_SIZE
;
271 /* Allow ourselves to be preempted in between blocks. */
272 *cmd
++ = MI_ARB_CHECK
;
279 *cmd
= MI_BATCH_BUFFER_END
;
280 intel_gt_chipset_flush(ce
->vm
->gt
);
282 i915_gem_object_unpin_map(pool
->obj
);
284 batch
= i915_vma_instance(pool
->obj
, ce
->vm
, NULL
);
286 err
= PTR_ERR(batch
);
290 err
= i915_vma_pin(batch
, 0, 0, PIN_USER
);
294 batch
->private = pool
;
298 intel_engine_pool_put(pool
);
300 intel_engine_pm_put(ce
->engine
);
304 static int move_to_gpu(struct i915_vma
*vma
, struct i915_request
*rq
, bool write
)
306 struct drm_i915_gem_object
*obj
= vma
->obj
;
308 if (obj
->cache_dirty
& ~obj
->cache_coherent
)
309 i915_gem_clflush_object(obj
, 0);
311 return i915_request_await_object(rq
, obj
, write
);
314 int i915_gem_object_copy_blt(struct drm_i915_gem_object
*src
,
315 struct drm_i915_gem_object
*dst
,
316 struct intel_context
*ce
)
318 struct drm_gem_object
*objs
[] = { &src
->base
, &dst
->base
};
319 struct i915_address_space
*vm
= ce
->vm
;
320 struct i915_vma
*vma
[2], *batch
;
321 struct ww_acquire_ctx acquire
;
322 struct i915_request
*rq
;
325 vma
[0] = i915_vma_instance(src
, vm
, NULL
);
327 return PTR_ERR(vma
[0]);
329 err
= i915_vma_pin(vma
[0], 0, 0, PIN_USER
);
333 vma
[1] = i915_vma_instance(dst
, vm
, NULL
);
337 err
= i915_vma_pin(vma
[1], 0, 0, PIN_USER
);
341 batch
= intel_emit_vma_copy_blt(ce
, vma
[0], vma
[1]);
343 err
= PTR_ERR(batch
);
347 rq
= intel_context_create_request(ce
);
353 err
= intel_emit_vma_mark_active(batch
, rq
);
357 err
= drm_gem_lock_reservations(objs
, ARRAY_SIZE(objs
), &acquire
);
361 for (i
= 0; i
< ARRAY_SIZE(vma
); i
++) {
362 err
= move_to_gpu(vma
[i
], rq
, i
);
367 for (i
= 0; i
< ARRAY_SIZE(vma
); i
++) {
368 unsigned int flags
= i
? EXEC_OBJECT_WRITE
: 0;
370 err
= i915_vma_move_to_active(vma
[i
], rq
, flags
);
375 if (rq
->engine
->emit_init_breadcrumb
) {
376 err
= rq
->engine
->emit_init_breadcrumb(rq
);
381 err
= rq
->engine
->emit_bb_start(rq
,
382 batch
->node
.start
, batch
->node
.size
,
385 drm_gem_unlock_reservations(objs
, ARRAY_SIZE(objs
), &acquire
);
388 i915_request_skip(rq
, err
);
390 i915_request_add(rq
);
392 intel_emit_vma_release(ce
, batch
);
394 i915_vma_unpin(vma
[1]);
396 i915_vma_unpin(vma
[0]);
400 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
401 #include "selftests/i915_gem_object_blt.c"