drivers/gpu/drm/i915/gvt/execlist.c

   1 /*
   2  * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
   8  * and/or sell copies of the Software, and to permit persons to whom the
   9  * Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
  18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
  19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
  20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  21  * SOFTWARE.
  22  *
  23  * Authors:
  24  *    Zhiyuan Lv <zhiyuan.lv@intel.com>
  25  *    Zhi Wang <zhi.a.wang@intel.com>
  26  *
  27  * Contributors:
  28  *    Min He <min.he@intel.com>
  29  *    Bing Niu <bing.niu@intel.com>
  30  *    Ping Gao <ping.a.gao@intel.com>
  31  *    Tina Zhang <tina.zhang@intel.com>
  32  *
  33  */
  34
  35 #include "i915_drv.h"
  36 #include "gvt.h"
  37
  38 #define _EL_OFFSET_STATUS       0x234
  39 #define _EL_OFFSET_STATUS_BUF   0x370
  40 #define _EL_OFFSET_STATUS_PTR   0x3A0
  41
  42 #define execlist_ring_mmio(gvt, ring_id, offset) \
  43         (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
  44
  45 #define valid_context(ctx) ((ctx)->valid)
  46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
  47                 ((a)->lrca == (b)->lrca))
  48
  49 static int context_switch_events[] = {
  50         [RCS] = RCS_AS_CONTEXT_SWITCH,
  51         [BCS] = BCS_AS_CONTEXT_SWITCH,
  52         [VCS] = VCS_AS_CONTEXT_SWITCH,
  53         [VCS2] = VCS2_AS_CONTEXT_SWITCH,
  54         [VECS] = VECS_AS_CONTEXT_SWITCH,
  55 };
  56
  57 static int ring_id_to_context_switch_event(int ring_id)
  58 {
  59         if (WARN_ON(ring_id < RCS ||
  60                     ring_id >= ARRAY_SIZE(context_switch_events)))
  61                 return -EINVAL;
  62
  63         return context_switch_events[ring_id];
  64 }
  65
  66 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist *execlist)
  67 {
  68         gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
  69                         execlist->running_slot ?
  70                         execlist->running_slot->index : -1,
  71                         execlist->running_context ?
  72                         execlist->running_context->context_id : 0,
  73                         execlist->pending_slot ?
  74                         execlist->pending_slot->index : -1);
  75
  76         execlist->running_slot = execlist->pending_slot;
  77         execlist->pending_slot = NULL;
  78         execlist->running_context = execlist->running_context ?
  79                 &execlist->running_slot->ctx[0] : NULL;
  80
  81         gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
  82                         execlist->running_slot ?
  83                         execlist->running_slot->index : -1,
  84                         execlist->running_context ?
  85                         execlist->running_context->context_id : 0,
  86                         execlist->pending_slot ?
  87                         execlist->pending_slot->index : -1);
  88 }
  89
  90 static void emulate_execlist_status(struct intel_vgpu_execlist *execlist)
  91 {
  92         struct intel_vgpu_execlist_slot *running = execlist->running_slot;
  93         struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
  94         struct execlist_ctx_descriptor_format *desc = execlist->running_context;
  95         struct intel_vgpu *vgpu = execlist->vgpu;
  96         struct execlist_status_format status;
  97         int ring_id = execlist->ring_id;
  98         u32 status_reg = execlist_ring_mmio(vgpu->gvt,
  99                         ring_id, _EL_OFFSET_STATUS);
 100
 101         status.ldw = vgpu_vreg(vgpu, status_reg);
 102         status.udw = vgpu_vreg(vgpu, status_reg + 4);
 103
 104         if (running) {
 105                 status.current_execlist_pointer = !!running->index;
 106                 status.execlist_write_pointer = !!!running->index;
 107                 status.execlist_0_active = status.execlist_0_valid =
 108                         !!!(running->index);
 109                 status.execlist_1_active = status.execlist_1_valid =
 110                         !!(running->index);
 111         } else {
 112                 status.context_id = 0;
 113                 status.execlist_0_active = status.execlist_0_valid = 0;
 114                 status.execlist_1_active = status.execlist_1_valid = 0;
 115         }
 116
 117         status.context_id = desc ? desc->context_id : 0;
 118         status.execlist_queue_full = !!(pending);
 119
 120         vgpu_vreg(vgpu, status_reg) = status.ldw;
 121         vgpu_vreg(vgpu, status_reg + 4) = status.udw;
 122
 123         gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
 124                 vgpu->id, status_reg, status.ldw, status.udw);
 125 }
 126
 127 static void emulate_csb_update(struct intel_vgpu_execlist *execlist,
 128                 struct execlist_context_status_format *status,
 129                 bool trigger_interrupt_later)
 130 {
 131         struct intel_vgpu *vgpu = execlist->vgpu;
 132         int ring_id = execlist->ring_id;
 133         struct execlist_context_status_pointer_format ctx_status_ptr;
 134         u32 write_pointer;
 135         u32 ctx_status_ptr_reg, ctx_status_buf_reg, offset;
 136         unsigned long hwsp_gpa;
 137         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 138
 139         ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
 140                         _EL_OFFSET_STATUS_PTR);
 141         ctx_status_buf_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
 142                         _EL_OFFSET_STATUS_BUF);
 143
 144         ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
 145
 146         write_pointer = ctx_status_ptr.write_ptr;
 147
 148         if (write_pointer == 0x7)
 149                 write_pointer = 0;
 150         else {
 151                 ++write_pointer;
 152                 write_pointer %= 0x6;
 153         }
 154
 155         offset = ctx_status_buf_reg + write_pointer * 8;
 156
 157         vgpu_vreg(vgpu, offset) = status->ldw;
 158         vgpu_vreg(vgpu, offset + 4) = status->udw;
 159
 160         ctx_status_ptr.write_ptr = write_pointer;
 161         vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 162
 163         /* Update the CSB and CSB write pointer in HWSP */
 164         hwsp_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
 165                                          vgpu->hws_pga[ring_id]);
 166         if (hwsp_gpa != INTEL_GVT_INVALID_ADDR) {
 167                 intel_gvt_hypervisor_write_gpa(vgpu,
 168                         hwsp_gpa + I915_HWS_CSB_BUF0_INDEX * 4 +
 169                         write_pointer * 8,
 170                         status, 8);
 171                 intel_gvt_hypervisor_write_gpa(vgpu,
 172                         hwsp_gpa +
 173                         intel_hws_csb_write_index(dev_priv) * 4,
 174                         &write_pointer, 4);
 175         }
 176
 177         gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
 178                 vgpu->id, write_pointer, offset, status->ldw, status->udw);
 179
 180         if (trigger_interrupt_later)
 181                 return;
 182
 183         intel_vgpu_trigger_virtual_event(vgpu,
 184                         ring_id_to_context_switch_event(execlist->ring_id));
 185 }
 186
 187 static int emulate_execlist_ctx_schedule_out(
 188                 struct intel_vgpu_execlist *execlist,
 189                 struct execlist_ctx_descriptor_format *ctx)
 190 {
 191         struct intel_vgpu *vgpu = execlist->vgpu;
 192         struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 193         struct intel_vgpu_execlist_slot *pending = execlist->pending_slot;
 194         struct execlist_ctx_descriptor_format *ctx0 = &running->ctx[0];
 195         struct execlist_ctx_descriptor_format *ctx1 = &running->ctx[1];
 196         struct execlist_context_status_format status;
 197
 198         memset(&status, 0, sizeof(status));
 199
 200         gvt_dbg_el("schedule out context id %x\n", ctx->context_id);
 201
 202         if (WARN_ON(!same_context(ctx, execlist->running_context))) {
 203                 gvt_vgpu_err("schedule out context is not running context,"
 204                                 "ctx id %x running ctx id %x\n",
 205                                 ctx->context_id,
 206                                 execlist->running_context->context_id);
 207                 return -EINVAL;
 208         }
 209
 210         /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
 211         if (valid_context(ctx1) && same_context(ctx0, ctx)) {
 212                 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
 213
 214                 execlist->running_context = ctx1;
 215
 216                 emulate_execlist_status(execlist);
 217
 218                 status.context_complete = status.element_switch = 1;
 219                 status.context_id = ctx->context_id;
 220
 221                 emulate_csb_update(execlist, &status, false);
 222                 /*
 223                  * ctx1 is not valid, ctx == ctx0
 224                  * ctx1 is valid, ctx1 == ctx
 225                  *      --> last element is finished
 226                  * emulate:
 227                  *      active-to-idle if there is *no* pending execlist
 228                  *      context-complete if there *is* pending execlist
 229                  */
 230         } else if ((!valid_context(ctx1) && same_context(ctx0, ctx))
 231                         || (valid_context(ctx1) && same_context(ctx1, ctx))) {
 232                 gvt_dbg_el("need to switch virtual execlist slot\n");
 233
 234                 switch_virtual_execlist_slot(execlist);
 235
 236                 emulate_execlist_status(execlist);
 237
 238                 status.context_complete = status.active_to_idle = 1;
 239                 status.context_id = ctx->context_id;
 240
 241                 if (!pending) {
 242                         emulate_csb_update(execlist, &status, false);
 243                 } else {
 244                         emulate_csb_update(execlist, &status, true);
 245
 246                         memset(&status, 0, sizeof(status));
 247
 248                         status.idle_to_active = 1;
 249                         status.context_id = 0;
 250
 251                         emulate_csb_update(execlist, &status, false);
 252                 }
 253         } else {
 254                 WARN_ON(1);
 255                 return -EINVAL;
 256         }
 257
 258         return 0;
 259 }
 260
 261 static struct intel_vgpu_execlist_slot *get_next_execlist_slot(
 262                 struct intel_vgpu_execlist *execlist)
 263 {
 264         struct intel_vgpu *vgpu = execlist->vgpu;
 265         int ring_id = execlist->ring_id;
 266         u32 status_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
 267                         _EL_OFFSET_STATUS);
 268         struct execlist_status_format status;
 269
 270         status.ldw = vgpu_vreg(vgpu, status_reg);
 271         status.udw = vgpu_vreg(vgpu, status_reg + 4);
 272
 273         if (status.execlist_queue_full) {
 274                 gvt_vgpu_err("virtual execlist slots are full\n");
 275                 return NULL;
 276         }
 277
 278         return &execlist->slot[status.execlist_write_pointer];
 279 }
 280
 281 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist *execlist,
 282                 struct execlist_ctx_descriptor_format ctx[2])
 283 {
 284         struct intel_vgpu_execlist_slot *running = execlist->running_slot;
 285         struct intel_vgpu_execlist_slot *slot =
 286                 get_next_execlist_slot(execlist);
 287
 288         struct execlist_ctx_descriptor_format *ctx0, *ctx1;
 289         struct execlist_context_status_format status;
 290         struct intel_vgpu *vgpu = execlist->vgpu;
 291
 292         gvt_dbg_el("emulate schedule-in\n");
 293
 294         if (!slot) {
 295                 gvt_vgpu_err("no available execlist slot\n");
 296                 return -EINVAL;
 297         }
 298
 299         memset(&status, 0, sizeof(status));
 300         memset(slot->ctx, 0, sizeof(slot->ctx));
 301
 302         slot->ctx[0] = ctx[0];
 303         slot->ctx[1] = ctx[1];
 304
 305         gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
 306                         slot->index, ctx[0].context_id,
 307                         ctx[1].context_id);
 308
 309         /*
 310          * no running execlist, make this write bundle as running execlist
 311          * -> idle-to-active
 312          */
 313         if (!running) {
 314                 gvt_dbg_el("no current running execlist\n");
 315
 316                 execlist->running_slot = slot;
 317                 execlist->pending_slot = NULL;
 318                 execlist->running_context = &slot->ctx[0];
 319
 320                 gvt_dbg_el("running slot index %d running context %x\n",
 321                                 execlist->running_slot->index,
 322                                 execlist->running_context->context_id);
 323
 324                 emulate_execlist_status(execlist);
 325
 326                 status.idle_to_active = 1;
 327                 status.context_id = 0;
 328
 329                 emulate_csb_update(execlist, &status, false);
 330                 return 0;
 331         }
 332
 333         ctx0 = &running->ctx[0];
 334         ctx1 = &running->ctx[1];
 335
 336         gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
 337                 running->index, ctx0->context_id, ctx1->context_id);
 338
 339         /*
 340          * already has an running execlist
 341          *      a. running ctx1 is valid,
 342          *         ctx0 is finished, and running ctx1 == new execlist ctx[0]
 343          *      b. running ctx1 is not valid,
 344          *         ctx0 == new execlist ctx[0]
 345          * ----> lite-restore + preempted
 346          */
 347         if ((valid_context(ctx1) && same_context(ctx1, &slot->ctx[0]) &&
 348                 /* condition a */
 349                 (!same_context(ctx0, execlist->running_context))) ||
 350                         (!valid_context(ctx1) &&
 351                          same_context(ctx0, &slot->ctx[0]))) { /* condition b */
 352                 gvt_dbg_el("need to switch virtual execlist slot\n");
 353
 354                 execlist->pending_slot = slot;
 355                 switch_virtual_execlist_slot(execlist);
 356
 357                 emulate_execlist_status(execlist);
 358
 359                 status.lite_restore = status.preempted = 1;
 360                 status.context_id = ctx[0].context_id;
 361
 362                 emulate_csb_update(execlist, &status, false);
 363         } else {
 364                 gvt_dbg_el("emulate as pending slot\n");
 365                 /*
 366                  * otherwise
 367                  * --> emulate pending execlist exist + but no preemption case
 368                  */
 369                 execlist->pending_slot = slot;
 370                 emulate_execlist_status(execlist);
 371         }
 372         return 0;
 373 }
 374
 375 #define get_desc_from_elsp_dwords(ed, i) \
 376         ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
 377
 378 static int prepare_execlist_workload(struct intel_vgpu_workload *workload)
 379 {
 380         struct intel_vgpu *vgpu = workload->vgpu;
 381         struct intel_vgpu_submission *s = &vgpu->submission;
 382         struct execlist_ctx_descriptor_format ctx[2];
 383         int ring_id = workload->ring_id;
 384         int ret;
 385
 386         if (!workload->emulate_schedule_in)
 387                 return 0;
 388
 389         ctx[0] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 0);
 390         ctx[1] = *get_desc_from_elsp_dwords(&workload->elsp_dwords, 1);
 391
 392         ret = emulate_execlist_schedule_in(&s->execlist[ring_id], ctx);
 393         if (ret) {
 394                 gvt_vgpu_err("fail to emulate execlist schedule in\n");
 395                 return ret;
 396         }
 397         return 0;
 398 }
 399
 400 static int complete_execlist_workload(struct intel_vgpu_workload *workload)
 401 {
 402         struct intel_vgpu *vgpu = workload->vgpu;
 403         int ring_id = workload->ring_id;
 404         struct intel_vgpu_submission *s = &vgpu->submission;
 405         struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
 406         struct intel_vgpu_workload *next_workload;
 407         struct list_head *next = workload_q_head(vgpu, ring_id)->next;
 408         bool lite_restore = false;
 409         int ret = 0;
 410
 411         gvt_dbg_el("complete workload %p status %d\n", workload,
 412                         workload->status);
 413
 414         if (workload->status || (vgpu->resetting_eng & ENGINE_MASK(ring_id)))
 415                 goto out;
 416
 417         if (!list_empty(workload_q_head(vgpu, ring_id))) {
 418                 struct execlist_ctx_descriptor_format *this_desc, *next_desc;
 419
 420                 next_workload = container_of(next,
 421                                 struct intel_vgpu_workload, list);
 422                 this_desc = &workload->ctx_desc;
 423                 next_desc = &next_workload->ctx_desc;
 424
 425                 lite_restore = same_context(this_desc, next_desc);
 426         }
 427
 428         if (lite_restore) {
 429                 gvt_dbg_el("next context == current - no schedule-out\n");
 430                 goto out;
 431         }
 432
 433         ret = emulate_execlist_ctx_schedule_out(execlist, &workload->ctx_desc);
 434 out:
 435         intel_vgpu_unpin_mm(workload->shadow_mm);
 436         intel_vgpu_destroy_workload(workload);
 437         return ret;
 438 }
 439
 440 static int submit_context(struct intel_vgpu *vgpu, int ring_id,
 441                 struct execlist_ctx_descriptor_format *desc,
 442                 bool emulate_schedule_in)
 443 {
 444         struct intel_vgpu_submission *s = &vgpu->submission;
 445         struct intel_vgpu_workload *workload = NULL;
 446
 447         workload = intel_vgpu_create_workload(vgpu, ring_id, desc);
 448         if (IS_ERR(workload))
 449                 return PTR_ERR(workload);
 450
 451         workload->prepare = prepare_execlist_workload;
 452         workload->complete = complete_execlist_workload;
 453         workload->emulate_schedule_in = emulate_schedule_in;
 454
 455         if (emulate_schedule_in)
 456                 workload->elsp_dwords = s->execlist[ring_id].elsp_dwords;
 457
 458         gvt_dbg_el("workload %p emulate schedule_in %d\n", workload,
 459                         emulate_schedule_in);
 460
 461         intel_vgpu_queue_workload(workload);
 462         return 0;
 463 }
 464
 465 int intel_vgpu_submit_execlist(struct intel_vgpu *vgpu, int ring_id)
 466 {
 467         struct intel_vgpu_submission *s = &vgpu->submission;
 468         struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
 469         struct execlist_ctx_descriptor_format *desc[2];
 470         int i, ret;
 471
 472         desc[0] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 0);
 473         desc[1] = get_desc_from_elsp_dwords(&execlist->elsp_dwords, 1);
 474
 475         if (!desc[0]->valid) {
 476                 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
 477                 goto inv_desc;
 478         }
 479
 480         for (i = 0; i < ARRAY_SIZE(desc); i++) {
 481                 if (!desc[i]->valid)
 482                         continue;
 483                 if (!desc[i]->privilege_access) {
 484                         gvt_vgpu_err("unexpected GGTT elsp submission\n");
 485                         goto inv_desc;
 486                 }
 487         }
 488
 489         /* submit workload */
 490         for (i = 0; i < ARRAY_SIZE(desc); i++) {
 491                 if (!desc[i]->valid)
 492                         continue;
 493                 ret = submit_context(vgpu, ring_id, desc[i], i == 0);
 494                 if (ret) {
 495                         gvt_vgpu_err("failed to submit desc %d\n", i);
 496                         return ret;
 497                 }
 498         }
 499
 500         return 0;
 501
 502 inv_desc:
 503         gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
 504                      desc[0]->udw, desc[0]->ldw, desc[1]->udw, desc[1]->ldw);
 505         return -EINVAL;
 506 }
 507
 508 static void init_vgpu_execlist(struct intel_vgpu *vgpu, int ring_id)
 509 {
 510         struct intel_vgpu_submission *s = &vgpu->submission;
 511         struct intel_vgpu_execlist *execlist = &s->execlist[ring_id];
 512         struct execlist_context_status_pointer_format ctx_status_ptr;
 513         u32 ctx_status_ptr_reg;
 514
 515         memset(execlist, 0, sizeof(*execlist));
 516
 517         execlist->vgpu = vgpu;
 518         execlist->ring_id = ring_id;
 519         execlist->slot[0].index = 0;
 520         execlist->slot[1].index = 1;
 521
 522         ctx_status_ptr_reg = execlist_ring_mmio(vgpu->gvt, ring_id,
 523                         _EL_OFFSET_STATUS_PTR);
 524         ctx_status_ptr.dw = vgpu_vreg(vgpu, ctx_status_ptr_reg);
 525         ctx_status_ptr.read_ptr = 0;
 526         ctx_status_ptr.write_ptr = 0x7;
 527         vgpu_vreg(vgpu, ctx_status_ptr_reg) = ctx_status_ptr.dw;
 528 }
 529
 530 static void clean_execlist(struct intel_vgpu *vgpu, unsigned long engine_mask)
 531 {
 532         unsigned int tmp;
 533         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 534         struct intel_engine_cs *engine;
 535         struct intel_vgpu_submission *s = &vgpu->submission;
 536
 537         for_each_engine_masked(engine, dev_priv, engine_mask, tmp) {
 538                 kfree(s->ring_scan_buffer[engine->id]);
 539                 s->ring_scan_buffer[engine->id] = NULL;
 540                 s->ring_scan_buffer_size[engine->id] = 0;
 541         }
 542 }
 543
 544 static void reset_execlist(struct intel_vgpu *vgpu,
 545                 unsigned long engine_mask)
 546 {
 547         struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
 548         struct intel_engine_cs *engine;
 549         unsigned int tmp;
 550
 551         for_each_engine_masked(engine, dev_priv, engine_mask, tmp)
 552                 init_vgpu_execlist(vgpu, engine->id);
 553 }
 554
 555 static int init_execlist(struct intel_vgpu *vgpu,
 556                          unsigned long engine_mask)
 557 {
 558         reset_execlist(vgpu, engine_mask);
 559         return 0;
 560 }
 561
 562 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops = {
 563         .name = "execlist",
 564         .init = init_execlist,
 565         .reset = reset_execlist,
 566         .clean = clean_execlist,
 567 };