drivers/gpu/drm/i915/gem/i915_gem_object_blt.c

   1 // SPDX-License-Identifier: MIT
   2 /*
   3  * Copyright © 2019 Intel Corporation
   4  */
   5
   6 #include "i915_drv.h"
   7 #include "gt/intel_context.h"
   8 #include "gt/intel_engine_pm.h"
   9 #include "gt/intel_engine_pool.h"
  10 #include "gt/intel_gt.h"
  11 #include "gt/intel_ring.h"
  12 #include "i915_gem_clflush.h"
  13 #include "i915_gem_object_blt.h"
  14
  15 struct i915_vma *intel_emit_vma_fill_blt(struct intel_context *ce,
  16                                          struct i915_vma *vma,
  17                                          u32 value)
  18 {
  19         struct drm_i915_private *i915 = ce->vm->i915;
  20         const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
  21         struct intel_engine_pool_node *pool;
  22         struct i915_vma *batch;
  23         u64 offset;
  24         u64 count;
  25         u64 rem;
  26         u32 size;
  27         u32 *cmd;
  28         int err;
  29
  30         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
  31         intel_engine_pm_get(ce->engine);
  32
  33         count = div_u64(round_up(vma->size, block_size), block_size);
  34         size = (1 + 8 * count) * sizeof(u32);
  35         size = round_up(size, PAGE_SIZE);
  36         pool = intel_engine_get_pool(ce->engine, size);
  37         if (IS_ERR(pool)) {
  38                 err = PTR_ERR(pool);
  39                 goto out_pm;
  40         }
  41
  42         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
  43         if (IS_ERR(cmd)) {
  44                 err = PTR_ERR(cmd);
  45                 goto out_put;
  46         }
  47
  48         rem = vma->size;
  49         offset = vma->node.start;
  50
  51         do {
  52                 u32 size = min_t(u64, rem, block_size);
  53
  54                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
  55
  56                 if (INTEL_GEN(i915) >= 8) {
  57                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (7 - 2);
  58                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
  59                         *cmd++ = 0;
  60                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
  61                         *cmd++ = lower_32_bits(offset);
  62                         *cmd++ = upper_32_bits(offset);
  63                         *cmd++ = value;
  64                 } else {
  65                         *cmd++ = XY_COLOR_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
  66                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | PAGE_SIZE;
  67                         *cmd++ = 0;
  68                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
  69                         *cmd++ = offset;
  70                         *cmd++ = value;
  71                 }
  72
  73                 /* Allow ourselves to be preempted in between blocks. */
  74                 *cmd++ = MI_ARB_CHECK;
  75
  76                 offset += size;
  77                 rem -= size;
  78         } while (rem);
  79
  80         *cmd = MI_BATCH_BUFFER_END;
  81         intel_gt_chipset_flush(ce->vm->gt);
  82
  83         i915_gem_object_unpin_map(pool->obj);
  84
  85         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
  86         if (IS_ERR(batch)) {
  87                 err = PTR_ERR(batch);
  88                 goto out_put;
  89         }
  90
  91         err = i915_vma_pin(batch, 0, 0, PIN_USER);
  92         if (unlikely(err))
  93                 goto out_put;
  94
  95         batch->private = pool;
  96         return batch;
  97
  98 out_put:
  99         intel_engine_pool_put(pool);
 100 out_pm:
 101         intel_engine_pm_put(ce->engine);
 102         return ERR_PTR(err);
 103 }
 104
 105 int intel_emit_vma_mark_active(struct i915_vma *vma, struct i915_request *rq)
 106 {
 107         int err;
 108
 109         i915_vma_lock(vma);
 110         err = i915_request_await_object(rq, vma->obj, false);
 111         if (err == 0)
 112                 err = i915_vma_move_to_active(vma, rq, 0);
 113         i915_vma_unlock(vma);
 114         if (unlikely(err))
 115                 return err;
 116
 117         return intel_engine_pool_mark_active(vma->private, rq);
 118 }
 119
 120 void intel_emit_vma_release(struct intel_context *ce, struct i915_vma *vma)
 121 {
 122         i915_vma_unpin(vma);
 123         intel_engine_pool_put(vma->private);
 124         intel_engine_pm_put(ce->engine);
 125 }
 126
 127 int i915_gem_object_fill_blt(struct drm_i915_gem_object *obj,
 128                              struct intel_context *ce,
 129                              u32 value)
 130 {
 131         struct i915_request *rq;
 132         struct i915_vma *batch;
 133         struct i915_vma *vma;
 134         int err;
 135
 136         vma = i915_vma_instance(obj, ce->vm, NULL);
 137         if (IS_ERR(vma))
 138                 return PTR_ERR(vma);
 139
 140         err = i915_vma_pin(vma, 0, 0, PIN_USER);
 141         if (unlikely(err))
 142                 return err;
 143
 144         if (obj->cache_dirty & ~obj->cache_coherent) {
 145                 i915_gem_object_lock(obj);
 146                 i915_gem_clflush_object(obj, 0);
 147                 i915_gem_object_unlock(obj);
 148         }
 149
 150         batch = intel_emit_vma_fill_blt(ce, vma, value);
 151         if (IS_ERR(batch)) {
 152                 err = PTR_ERR(batch);
 153                 goto out_unpin;
 154         }
 155
 156         rq = intel_context_create_request(ce);
 157         if (IS_ERR(rq)) {
 158                 err = PTR_ERR(rq);
 159                 goto out_batch;
 160         }
 161
 162         err = intel_emit_vma_mark_active(batch, rq);
 163         if (unlikely(err))
 164                 goto out_request;
 165
 166         err = i915_request_await_object(rq, obj, true);
 167         if (unlikely(err))
 168                 goto out_request;
 169
 170         if (ce->engine->emit_init_breadcrumb) {
 171                 err = ce->engine->emit_init_breadcrumb(rq);
 172                 if (unlikely(err))
 173                         goto out_request;
 174         }
 175
 176         i915_vma_lock(vma);
 177         err = i915_request_await_object(rq, vma->obj, true);
 178         if (err == 0)
 179                 err = i915_vma_move_to_active(vma, rq, EXEC_OBJECT_WRITE);
 180         i915_vma_unlock(vma);
 181         if (unlikely(err))
 182                 goto out_request;
 183
 184         err = ce->engine->emit_bb_start(rq,
 185                                         batch->node.start, batch->node.size,
 186                                         0);
 187 out_request:
 188         if (unlikely(err))
 189                 i915_request_skip(rq, err);
 190
 191         i915_request_add(rq);
 192 out_batch:
 193         intel_emit_vma_release(ce, batch);
 194 out_unpin:
 195         i915_vma_unpin(vma);
 196         return err;
 197 }
 198
 199 struct i915_vma *intel_emit_vma_copy_blt(struct intel_context *ce,
 200                                          struct i915_vma *src,
 201                                          struct i915_vma *dst)
 202 {
 203         struct drm_i915_private *i915 = ce->vm->i915;
 204         const u32 block_size = SZ_8M; /* ~1ms at 8GiB/s preemption delay */
 205         struct intel_engine_pool_node *pool;
 206         struct i915_vma *batch;
 207         u64 src_offset, dst_offset;
 208         u64 count, rem;
 209         u32 size, *cmd;
 210         int err;
 211
 212         GEM_BUG_ON(src->size != dst->size);
 213
 214         GEM_BUG_ON(intel_engine_is_virtual(ce->engine));
 215         intel_engine_pm_get(ce->engine);
 216
 217         count = div_u64(round_up(dst->size, block_size), block_size);
 218         size = (1 + 11 * count) * sizeof(u32);
 219         size = round_up(size, PAGE_SIZE);
 220         pool = intel_engine_get_pool(ce->engine, size);
 221         if (IS_ERR(pool)) {
 222                 err = PTR_ERR(pool);
 223                 goto out_pm;
 224         }
 225
 226         cmd = i915_gem_object_pin_map(pool->obj, I915_MAP_WC);
 227         if (IS_ERR(cmd)) {
 228                 err = PTR_ERR(cmd);
 229                 goto out_put;
 230         }
 231
 232         rem = src->size;
 233         src_offset = src->node.start;
 234         dst_offset = dst->node.start;
 235
 236         do {
 237                 size = min_t(u64, rem, block_size);
 238                 GEM_BUG_ON(size >> PAGE_SHIFT > S16_MAX);
 239
 240                 if (INTEL_GEN(i915) >= 9) {
 241                         *cmd++ = GEN9_XY_FAST_COPY_BLT_CMD | (10 - 2);
 242                         *cmd++ = BLT_DEPTH_32 | PAGE_SIZE;
 243                         *cmd++ = 0;
 244                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 245                         *cmd++ = lower_32_bits(dst_offset);
 246                         *cmd++ = upper_32_bits(dst_offset);
 247                         *cmd++ = 0;
 248                         *cmd++ = PAGE_SIZE;
 249                         *cmd++ = lower_32_bits(src_offset);
 250                         *cmd++ = upper_32_bits(src_offset);
 251                 } else if (INTEL_GEN(i915) >= 8) {
 252                         *cmd++ = XY_SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (10 - 2);
 253                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
 254                         *cmd++ = 0;
 255                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE / 4;
 256                         *cmd++ = lower_32_bits(dst_offset);
 257                         *cmd++ = upper_32_bits(dst_offset);
 258                         *cmd++ = 0;
 259                         *cmd++ = PAGE_SIZE;
 260                         *cmd++ = lower_32_bits(src_offset);
 261                         *cmd++ = upper_32_bits(src_offset);
 262                 } else {
 263                         *cmd++ = SRC_COPY_BLT_CMD | BLT_WRITE_RGBA | (6 - 2);
 264                         *cmd++ = BLT_DEPTH_32 | BLT_ROP_SRC_COPY | PAGE_SIZE;
 265                         *cmd++ = size >> PAGE_SHIFT << 16 | PAGE_SIZE;
 266                         *cmd++ = dst_offset;
 267                         *cmd++ = PAGE_SIZE;
 268                         *cmd++ = src_offset;
 269                 }
 270
 271                 /* Allow ourselves to be preempted in between blocks. */
 272                 *cmd++ = MI_ARB_CHECK;
 273
 274                 src_offset += size;
 275                 dst_offset += size;
 276                 rem -= size;
 277         } while (rem);
 278
 279         *cmd = MI_BATCH_BUFFER_END;
 280         intel_gt_chipset_flush(ce->vm->gt);
 281
 282         i915_gem_object_unpin_map(pool->obj);
 283
 284         batch = i915_vma_instance(pool->obj, ce->vm, NULL);
 285         if (IS_ERR(batch)) {
 286                 err = PTR_ERR(batch);
 287                 goto out_put;
 288         }
 289
 290         err = i915_vma_pin(batch, 0, 0, PIN_USER);
 291         if (unlikely(err))
 292                 goto out_put;
 293
 294         batch->private = pool;
 295         return batch;
 296
 297 out_put:
 298         intel_engine_pool_put(pool);
 299 out_pm:
 300         intel_engine_pm_put(ce->engine);
 301         return ERR_PTR(err);
 302 }
 303
 304 static int move_to_gpu(struct i915_vma *vma, struct i915_request *rq, bool write)
 305 {
 306         struct drm_i915_gem_object *obj = vma->obj;
 307
 308         if (obj->cache_dirty & ~obj->cache_coherent)
 309                 i915_gem_clflush_object(obj, 0);
 310
 311         return i915_request_await_object(rq, obj, write);
 312 }
 313
 314 int i915_gem_object_copy_blt(struct drm_i915_gem_object *src,
 315                              struct drm_i915_gem_object *dst,
 316                              struct intel_context *ce)
 317 {
 318         struct drm_gem_object *objs[] = { &src->base, &dst->base };
 319         struct i915_address_space *vm = ce->vm;
 320         struct i915_vma *vma[2], *batch;
 321         struct ww_acquire_ctx acquire;
 322         struct i915_request *rq;
 323         int err, i;
 324
 325         vma[0] = i915_vma_instance(src, vm, NULL);
 326         if (IS_ERR(vma[0]))
 327                 return PTR_ERR(vma[0]);
 328
 329         err = i915_vma_pin(vma[0], 0, 0, PIN_USER);
 330         if (unlikely(err))
 331                 return err;
 332
 333         vma[1] = i915_vma_instance(dst, vm, NULL);
 334         if (IS_ERR(vma[1]))
 335                 goto out_unpin_src;
 336
 337         err = i915_vma_pin(vma[1], 0, 0, PIN_USER);
 338         if (unlikely(err))
 339                 goto out_unpin_src;
 340
 341         batch = intel_emit_vma_copy_blt(ce, vma[0], vma[1]);
 342         if (IS_ERR(batch)) {
 343                 err = PTR_ERR(batch);
 344                 goto out_unpin_dst;
 345         }
 346
 347         rq = intel_context_create_request(ce);
 348         if (IS_ERR(rq)) {
 349                 err = PTR_ERR(rq);
 350                 goto out_batch;
 351         }
 352
 353         err = intel_emit_vma_mark_active(batch, rq);
 354         if (unlikely(err))
 355                 goto out_request;
 356
 357         err = drm_gem_lock_reservations(objs, ARRAY_SIZE(objs), &acquire);
 358         if (unlikely(err))
 359                 goto out_request;
 360
 361         for (i = 0; i < ARRAY_SIZE(vma); i++) {
 362                 err = move_to_gpu(vma[i], rq, i);
 363                 if (unlikely(err))
 364                         goto out_unlock;
 365         }
 366
 367         for (i = 0; i < ARRAY_SIZE(vma); i++) {
 368                 unsigned int flags = i ? EXEC_OBJECT_WRITE : 0;
 369
 370                 err = i915_vma_move_to_active(vma[i], rq, flags);
 371                 if (unlikely(err))
 372                         goto out_unlock;
 373         }
 374
 375         if (rq->engine->emit_init_breadcrumb) {
 376                 err = rq->engine->emit_init_breadcrumb(rq);
 377                 if (unlikely(err))
 378                         goto out_unlock;
 379         }
 380
 381         err = rq->engine->emit_bb_start(rq,
 382                                         batch->node.start, batch->node.size,
 383                                         0);
 384 out_unlock:
 385         drm_gem_unlock_reservations(objs, ARRAY_SIZE(objs), &acquire);
 386 out_request:
 387         if (unlikely(err))
 388                 i915_request_skip(rq, err);
 389
 390         i915_request_add(rq);
 391 out_batch:
 392         intel_emit_vma_release(ce, batch);
 393 out_unpin_dst:
 394         i915_vma_unpin(vma[1]);
 395 out_unpin_src:
 396         i915_vma_unpin(vma[0]);
 397         return err;
 398 }
 399
 400 #if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
 401 #include "selftests/i915_gem_object_blt.c"
 402 #endif