2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhiyuan Lv <zhiyuan.lv@intel.com>
25 * Zhi Wang <zhi.a.wang@intel.com>
28 * Min He <min.he@intel.com>
29 * Bing Niu <bing.niu@intel.com>
30 * Ping Gao <ping.a.gao@intel.com>
31 * Tina Zhang <tina.zhang@intel.com>
38 #define _EL_OFFSET_STATUS 0x234
39 #define _EL_OFFSET_STATUS_BUF 0x370
40 #define _EL_OFFSET_STATUS_PTR 0x3A0
42 #define execlist_ring_mmio(gvt, ring_id, offset) \
43 (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
45 #define valid_context(ctx) ((ctx)->valid)
46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
47 ((a)->lrca == (b)->lrca))
49 static int context_switch_events
[] = {
50 [RCS0
] = RCS_AS_CONTEXT_SWITCH
,
51 [BCS0
] = BCS_AS_CONTEXT_SWITCH
,
52 [VCS0
] = VCS_AS_CONTEXT_SWITCH
,
53 [VCS1
] = VCS2_AS_CONTEXT_SWITCH
,
54 [VECS0
] = VECS_AS_CONTEXT_SWITCH
,
57 static int ring_id_to_context_switch_event(unsigned int ring_id
)
59 if (WARN_ON(ring_id
>= ARRAY_SIZE(context_switch_events
)))
62 return context_switch_events
[ring_id
];
65 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist
*execlist
)
67 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
68 execlist
->running_slot
?
69 execlist
->running_slot
->index
: -1,
70 execlist
->running_context
?
71 execlist
->running_context
->context_id
: 0,
72 execlist
->pending_slot
?
73 execlist
->pending_slot
->index
: -1);
75 execlist
->running_slot
= execlist
->pending_slot
;
76 execlist
->pending_slot
= NULL
;
77 execlist
->running_context
= execlist
->running_context
?
78 &execlist
->running_slot
->ctx
[0] : NULL
;
80 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
81 execlist
->running_slot
?
82 execlist
->running_slot
->index
: -1,
83 execlist
->running_context
?
84 execlist
->running_context
->context_id
: 0,
85 execlist
->pending_slot
?
86 execlist
->pending_slot
->index
: -1);
89 static void emulate_execlist_status(struct intel_vgpu_execlist
*execlist
)
91 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
92 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
93 struct execlist_ctx_descriptor_format
*desc
= execlist
->running_context
;
94 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
95 struct execlist_status_format status
;
96 int ring_id
= execlist
->ring_id
;
97 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
,
98 ring_id
, _EL_OFFSET_STATUS
);
100 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
101 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
104 status
.current_execlist_pointer
= !!running
->index
;
105 status
.execlist_write_pointer
= !!!running
->index
;
106 status
.execlist_0_active
= status
.execlist_0_valid
=
108 status
.execlist_1_active
= status
.execlist_1_valid
=
111 status
.context_id
= 0;
112 status
.execlist_0_active
= status
.execlist_0_valid
= 0;
113 status
.execlist_1_active
= status
.execlist_1_valid
= 0;
116 status
.context_id
= desc
? desc
->context_id
: 0;
117 status
.execlist_queue_full
= !!(pending
);
119 vgpu_vreg(vgpu
, status_reg
) = status
.ldw
;
120 vgpu_vreg(vgpu
, status_reg
+ 4) = status
.udw
;
122 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
123 vgpu
->id
, status_reg
, status
.ldw
, status
.udw
);
126 static void emulate_csb_update(struct intel_vgpu_execlist
*execlist
,
127 struct execlist_context_status_format
*status
,
128 bool trigger_interrupt_later
)
130 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
131 int ring_id
= execlist
->ring_id
;
132 struct execlist_context_status_pointer_format ctx_status_ptr
;
134 u32 ctx_status_ptr_reg
, ctx_status_buf_reg
, offset
;
135 unsigned long hwsp_gpa
;
136 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
138 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
139 _EL_OFFSET_STATUS_PTR
);
140 ctx_status_buf_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
141 _EL_OFFSET_STATUS_BUF
);
143 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
145 write_pointer
= ctx_status_ptr
.write_ptr
;
147 if (write_pointer
== 0x7)
151 write_pointer
%= 0x6;
154 offset
= ctx_status_buf_reg
+ write_pointer
* 8;
156 vgpu_vreg(vgpu
, offset
) = status
->ldw
;
157 vgpu_vreg(vgpu
, offset
+ 4) = status
->udw
;
159 ctx_status_ptr
.write_ptr
= write_pointer
;
160 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
162 /* Update the CSB and CSB write pointer in HWSP */
163 hwsp_gpa
= intel_vgpu_gma_to_gpa(vgpu
->gtt
.ggtt_mm
,
164 vgpu
->hws_pga
[ring_id
]);
165 if (hwsp_gpa
!= INTEL_GVT_INVALID_ADDR
) {
166 intel_gvt_hypervisor_write_gpa(vgpu
,
167 hwsp_gpa
+ I915_HWS_CSB_BUF0_INDEX
* 4 +
170 intel_gvt_hypervisor_write_gpa(vgpu
,
172 intel_hws_csb_write_index(dev_priv
) * 4,
176 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
177 vgpu
->id
, write_pointer
, offset
, status
->ldw
, status
->udw
);
179 if (trigger_interrupt_later
)
182 intel_vgpu_trigger_virtual_event(vgpu
,
183 ring_id_to_context_switch_event(execlist
->ring_id
));
186 static int emulate_execlist_ctx_schedule_out(
187 struct intel_vgpu_execlist
*execlist
,
188 struct execlist_ctx_descriptor_format
*ctx
)
190 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
191 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
192 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
193 struct execlist_ctx_descriptor_format
*ctx0
= &running
->ctx
[0];
194 struct execlist_ctx_descriptor_format
*ctx1
= &running
->ctx
[1];
195 struct execlist_context_status_format status
;
197 memset(&status
, 0, sizeof(status
));
199 gvt_dbg_el("schedule out context id %x\n", ctx
->context_id
);
201 if (WARN_ON(!same_context(ctx
, execlist
->running_context
))) {
202 gvt_vgpu_err("schedule out context is not running context,"
203 "ctx id %x running ctx id %x\n",
205 execlist
->running_context
->context_id
);
209 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
210 if (valid_context(ctx1
) && same_context(ctx0
, ctx
)) {
211 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
213 execlist
->running_context
= ctx1
;
215 emulate_execlist_status(execlist
);
217 status
.context_complete
= status
.element_switch
= 1;
218 status
.context_id
= ctx
->context_id
;
220 emulate_csb_update(execlist
, &status
, false);
222 * ctx1 is not valid, ctx == ctx0
223 * ctx1 is valid, ctx1 == ctx
224 * --> last element is finished
226 * active-to-idle if there is *no* pending execlist
227 * context-complete if there *is* pending execlist
229 } else if ((!valid_context(ctx1
) && same_context(ctx0
, ctx
))
230 || (valid_context(ctx1
) && same_context(ctx1
, ctx
))) {
231 gvt_dbg_el("need to switch virtual execlist slot\n");
233 switch_virtual_execlist_slot(execlist
);
235 emulate_execlist_status(execlist
);
237 status
.context_complete
= status
.active_to_idle
= 1;
238 status
.context_id
= ctx
->context_id
;
241 emulate_csb_update(execlist
, &status
, false);
243 emulate_csb_update(execlist
, &status
, true);
245 memset(&status
, 0, sizeof(status
));
247 status
.idle_to_active
= 1;
248 status
.context_id
= 0;
250 emulate_csb_update(execlist
, &status
, false);
260 static struct intel_vgpu_execlist_slot
*get_next_execlist_slot(
261 struct intel_vgpu_execlist
*execlist
)
263 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
264 int ring_id
= execlist
->ring_id
;
265 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
267 struct execlist_status_format status
;
269 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
270 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
272 if (status
.execlist_queue_full
) {
273 gvt_vgpu_err("virtual execlist slots are full\n");
277 return &execlist
->slot
[status
.execlist_write_pointer
];
280 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist
*execlist
,
281 struct execlist_ctx_descriptor_format ctx
[2])
283 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
284 struct intel_vgpu_execlist_slot
*slot
=
285 get_next_execlist_slot(execlist
);
287 struct execlist_ctx_descriptor_format
*ctx0
, *ctx1
;
288 struct execlist_context_status_format status
;
289 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
291 gvt_dbg_el("emulate schedule-in\n");
294 gvt_vgpu_err("no available execlist slot\n");
298 memset(&status
, 0, sizeof(status
));
299 memset(slot
->ctx
, 0, sizeof(slot
->ctx
));
301 slot
->ctx
[0] = ctx
[0];
302 slot
->ctx
[1] = ctx
[1];
304 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
305 slot
->index
, ctx
[0].context_id
,
309 * no running execlist, make this write bundle as running execlist
313 gvt_dbg_el("no current running execlist\n");
315 execlist
->running_slot
= slot
;
316 execlist
->pending_slot
= NULL
;
317 execlist
->running_context
= &slot
->ctx
[0];
319 gvt_dbg_el("running slot index %d running context %x\n",
320 execlist
->running_slot
->index
,
321 execlist
->running_context
->context_id
);
323 emulate_execlist_status(execlist
);
325 status
.idle_to_active
= 1;
326 status
.context_id
= 0;
328 emulate_csb_update(execlist
, &status
, false);
332 ctx0
= &running
->ctx
[0];
333 ctx1
= &running
->ctx
[1];
335 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
336 running
->index
, ctx0
->context_id
, ctx1
->context_id
);
339 * already has an running execlist
340 * a. running ctx1 is valid,
341 * ctx0 is finished, and running ctx1 == new execlist ctx[0]
342 * b. running ctx1 is not valid,
343 * ctx0 == new execlist ctx[0]
344 * ----> lite-restore + preempted
346 if ((valid_context(ctx1
) && same_context(ctx1
, &slot
->ctx
[0]) &&
348 (!same_context(ctx0
, execlist
->running_context
))) ||
349 (!valid_context(ctx1
) &&
350 same_context(ctx0
, &slot
->ctx
[0]))) { /* condition b */
351 gvt_dbg_el("need to switch virtual execlist slot\n");
353 execlist
->pending_slot
= slot
;
354 switch_virtual_execlist_slot(execlist
);
356 emulate_execlist_status(execlist
);
358 status
.lite_restore
= status
.preempted
= 1;
359 status
.context_id
= ctx
[0].context_id
;
361 emulate_csb_update(execlist
, &status
, false);
363 gvt_dbg_el("emulate as pending slot\n");
366 * --> emulate pending execlist exist + but no preemption case
368 execlist
->pending_slot
= slot
;
369 emulate_execlist_status(execlist
);
374 #define get_desc_from_elsp_dwords(ed, i) \
375 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
377 static int prepare_execlist_workload(struct intel_vgpu_workload
*workload
)
379 struct intel_vgpu
*vgpu
= workload
->vgpu
;
380 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
381 struct execlist_ctx_descriptor_format ctx
[2];
382 int ring_id
= workload
->ring_id
;
385 if (!workload
->emulate_schedule_in
)
388 ctx
[0] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 0);
389 ctx
[1] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 1);
391 ret
= emulate_execlist_schedule_in(&s
->execlist
[ring_id
], ctx
);
393 gvt_vgpu_err("fail to emulate execlist schedule in\n");
399 static int complete_execlist_workload(struct intel_vgpu_workload
*workload
)
401 struct intel_vgpu
*vgpu
= workload
->vgpu
;
402 int ring_id
= workload
->ring_id
;
403 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
404 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[ring_id
];
405 struct intel_vgpu_workload
*next_workload
;
406 struct list_head
*next
= workload_q_head(vgpu
, ring_id
)->next
;
407 bool lite_restore
= false;
410 gvt_dbg_el("complete workload %p status %d\n", workload
,
413 if (workload
->status
|| (vgpu
->resetting_eng
& BIT(ring_id
)))
416 if (!list_empty(workload_q_head(vgpu
, ring_id
))) {
417 struct execlist_ctx_descriptor_format
*this_desc
, *next_desc
;
419 next_workload
= container_of(next
,
420 struct intel_vgpu_workload
, list
);
421 this_desc
= &workload
->ctx_desc
;
422 next_desc
= &next_workload
->ctx_desc
;
424 lite_restore
= same_context(this_desc
, next_desc
);
428 gvt_dbg_el("next context == current - no schedule-out\n");
432 ret
= emulate_execlist_ctx_schedule_out(execlist
, &workload
->ctx_desc
);
434 intel_vgpu_unpin_mm(workload
->shadow_mm
);
435 intel_vgpu_destroy_workload(workload
);
439 static int submit_context(struct intel_vgpu
*vgpu
, int ring_id
,
440 struct execlist_ctx_descriptor_format
*desc
,
441 bool emulate_schedule_in
)
443 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
444 struct intel_vgpu_workload
*workload
= NULL
;
446 workload
= intel_vgpu_create_workload(vgpu
, ring_id
, desc
);
447 if (IS_ERR(workload
))
448 return PTR_ERR(workload
);
450 workload
->prepare
= prepare_execlist_workload
;
451 workload
->complete
= complete_execlist_workload
;
452 workload
->emulate_schedule_in
= emulate_schedule_in
;
454 if (emulate_schedule_in
)
455 workload
->elsp_dwords
= s
->execlist
[ring_id
].elsp_dwords
;
457 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload
,
458 emulate_schedule_in
);
460 intel_vgpu_queue_workload(workload
);
464 int intel_vgpu_submit_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
466 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
467 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[ring_id
];
468 struct execlist_ctx_descriptor_format
*desc
[2];
471 desc
[0] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 0);
472 desc
[1] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 1);
474 if (!desc
[0]->valid
) {
475 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
479 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
482 if (!desc
[i
]->privilege_access
) {
483 gvt_vgpu_err("unexpected GGTT elsp submission\n");
488 /* submit workload */
489 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
492 ret
= submit_context(vgpu
, ring_id
, desc
[i
], i
== 0);
494 gvt_vgpu_err("failed to submit desc %d\n", i
);
502 gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
503 desc
[0]->udw
, desc
[0]->ldw
, desc
[1]->udw
, desc
[1]->ldw
);
507 static void init_vgpu_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
509 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
510 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[ring_id
];
511 struct execlist_context_status_pointer_format ctx_status_ptr
;
512 u32 ctx_status_ptr_reg
;
514 memset(execlist
, 0, sizeof(*execlist
));
516 execlist
->vgpu
= vgpu
;
517 execlist
->ring_id
= ring_id
;
518 execlist
->slot
[0].index
= 0;
519 execlist
->slot
[1].index
= 1;
521 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
522 _EL_OFFSET_STATUS_PTR
);
523 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
524 ctx_status_ptr
.read_ptr
= 0;
525 ctx_status_ptr
.write_ptr
= 0x7;
526 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
529 static void clean_execlist(struct intel_vgpu
*vgpu
,
530 intel_engine_mask_t engine_mask
)
532 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
533 struct intel_engine_cs
*engine
;
534 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
535 intel_engine_mask_t tmp
;
537 for_each_engine_masked(engine
, &dev_priv
->gt
, engine_mask
, tmp
) {
538 kfree(s
->ring_scan_buffer
[engine
->id
]);
539 s
->ring_scan_buffer
[engine
->id
] = NULL
;
540 s
->ring_scan_buffer_size
[engine
->id
] = 0;
544 static void reset_execlist(struct intel_vgpu
*vgpu
,
545 intel_engine_mask_t engine_mask
)
547 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
548 struct intel_engine_cs
*engine
;
549 intel_engine_mask_t tmp
;
551 for_each_engine_masked(engine
, &dev_priv
->gt
, engine_mask
, tmp
)
552 init_vgpu_execlist(vgpu
, engine
->id
);
555 static int init_execlist(struct intel_vgpu
*vgpu
,
556 intel_engine_mask_t engine_mask
)
558 reset_execlist(vgpu
, engine_mask
);
562 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops
= {
564 .init
= init_execlist
,
565 .reset
= reset_execlist
,
566 .clean
= clean_execlist
,