2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhiyuan Lv <zhiyuan.lv@intel.com>
25 * Zhi Wang <zhi.a.wang@intel.com>
28 * Min He <min.he@intel.com>
29 * Bing Niu <bing.niu@intel.com>
30 * Ping Gao <ping.a.gao@intel.com>
31 * Tina Zhang <tina.zhang@intel.com>
38 #define _EL_OFFSET_STATUS 0x234
39 #define _EL_OFFSET_STATUS_BUF 0x370
40 #define _EL_OFFSET_STATUS_PTR 0x3A0
42 #define execlist_ring_mmio(gvt, ring_id, offset) \
43 (gvt->dev_priv->engine[ring_id]->mmio_base + (offset))
45 #define valid_context(ctx) ((ctx)->valid)
46 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
47 ((a)->lrca == (b)->lrca))
49 static int context_switch_events
[] = {
50 [RCS
] = RCS_AS_CONTEXT_SWITCH
,
51 [BCS
] = BCS_AS_CONTEXT_SWITCH
,
52 [VCS
] = VCS_AS_CONTEXT_SWITCH
,
53 [VCS2
] = VCS2_AS_CONTEXT_SWITCH
,
54 [VECS
] = VECS_AS_CONTEXT_SWITCH
,
57 static int ring_id_to_context_switch_event(int ring_id
)
59 if (WARN_ON(ring_id
< RCS
||
60 ring_id
>= ARRAY_SIZE(context_switch_events
)))
63 return context_switch_events
[ring_id
];
66 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist
*execlist
)
68 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
69 execlist
->running_slot
?
70 execlist
->running_slot
->index
: -1,
71 execlist
->running_context
?
72 execlist
->running_context
->context_id
: 0,
73 execlist
->pending_slot
?
74 execlist
->pending_slot
->index
: -1);
76 execlist
->running_slot
= execlist
->pending_slot
;
77 execlist
->pending_slot
= NULL
;
78 execlist
->running_context
= execlist
->running_context
?
79 &execlist
->running_slot
->ctx
[0] : NULL
;
81 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
82 execlist
->running_slot
?
83 execlist
->running_slot
->index
: -1,
84 execlist
->running_context
?
85 execlist
->running_context
->context_id
: 0,
86 execlist
->pending_slot
?
87 execlist
->pending_slot
->index
: -1);
90 static void emulate_execlist_status(struct intel_vgpu_execlist
*execlist
)
92 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
93 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
94 struct execlist_ctx_descriptor_format
*desc
= execlist
->running_context
;
95 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
96 struct execlist_status_format status
;
97 int ring_id
= execlist
->ring_id
;
98 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
,
99 ring_id
, _EL_OFFSET_STATUS
);
101 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
102 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
105 status
.current_execlist_pointer
= !!running
->index
;
106 status
.execlist_write_pointer
= !!!running
->index
;
107 status
.execlist_0_active
= status
.execlist_0_valid
=
109 status
.execlist_1_active
= status
.execlist_1_valid
=
112 status
.context_id
= 0;
113 status
.execlist_0_active
= status
.execlist_0_valid
= 0;
114 status
.execlist_1_active
= status
.execlist_1_valid
= 0;
117 status
.context_id
= desc
? desc
->context_id
: 0;
118 status
.execlist_queue_full
= !!(pending
);
120 vgpu_vreg(vgpu
, status_reg
) = status
.ldw
;
121 vgpu_vreg(vgpu
, status_reg
+ 4) = status
.udw
;
123 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
124 vgpu
->id
, status_reg
, status
.ldw
, status
.udw
);
127 static void emulate_csb_update(struct intel_vgpu_execlist
*execlist
,
128 struct execlist_context_status_format
*status
,
129 bool trigger_interrupt_later
)
131 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
132 int ring_id
= execlist
->ring_id
;
133 struct execlist_context_status_pointer_format ctx_status_ptr
;
135 u32 ctx_status_ptr_reg
, ctx_status_buf_reg
, offset
;
136 unsigned long hwsp_gpa
;
137 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
139 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
140 _EL_OFFSET_STATUS_PTR
);
141 ctx_status_buf_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
142 _EL_OFFSET_STATUS_BUF
);
144 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
146 write_pointer
= ctx_status_ptr
.write_ptr
;
148 if (write_pointer
== 0x7)
152 write_pointer
%= 0x6;
155 offset
= ctx_status_buf_reg
+ write_pointer
* 8;
157 vgpu_vreg(vgpu
, offset
) = status
->ldw
;
158 vgpu_vreg(vgpu
, offset
+ 4) = status
->udw
;
160 ctx_status_ptr
.write_ptr
= write_pointer
;
161 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
163 /* Update the CSB and CSB write pointer in HWSP */
164 hwsp_gpa
= intel_vgpu_gma_to_gpa(vgpu
->gtt
.ggtt_mm
,
165 vgpu
->hws_pga
[ring_id
]);
166 if (hwsp_gpa
!= INTEL_GVT_INVALID_ADDR
) {
167 intel_gvt_hypervisor_write_gpa(vgpu
,
168 hwsp_gpa
+ I915_HWS_CSB_BUF0_INDEX
* 4 +
171 intel_gvt_hypervisor_write_gpa(vgpu
,
173 intel_hws_csb_write_index(dev_priv
) * 4,
177 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
178 vgpu
->id
, write_pointer
, offset
, status
->ldw
, status
->udw
);
180 if (trigger_interrupt_later
)
183 intel_vgpu_trigger_virtual_event(vgpu
,
184 ring_id_to_context_switch_event(execlist
->ring_id
));
187 static int emulate_execlist_ctx_schedule_out(
188 struct intel_vgpu_execlist
*execlist
,
189 struct execlist_ctx_descriptor_format
*ctx
)
191 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
192 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
193 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
194 struct execlist_ctx_descriptor_format
*ctx0
= &running
->ctx
[0];
195 struct execlist_ctx_descriptor_format
*ctx1
= &running
->ctx
[1];
196 struct execlist_context_status_format status
;
198 memset(&status
, 0, sizeof(status
));
200 gvt_dbg_el("schedule out context id %x\n", ctx
->context_id
);
202 if (WARN_ON(!same_context(ctx
, execlist
->running_context
))) {
203 gvt_vgpu_err("schedule out context is not running context,"
204 "ctx id %x running ctx id %x\n",
206 execlist
->running_context
->context_id
);
210 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
211 if (valid_context(ctx1
) && same_context(ctx0
, ctx
)) {
212 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
214 execlist
->running_context
= ctx1
;
216 emulate_execlist_status(execlist
);
218 status
.context_complete
= status
.element_switch
= 1;
219 status
.context_id
= ctx
->context_id
;
221 emulate_csb_update(execlist
, &status
, false);
223 * ctx1 is not valid, ctx == ctx0
224 * ctx1 is valid, ctx1 == ctx
225 * --> last element is finished
227 * active-to-idle if there is *no* pending execlist
228 * context-complete if there *is* pending execlist
230 } else if ((!valid_context(ctx1
) && same_context(ctx0
, ctx
))
231 || (valid_context(ctx1
) && same_context(ctx1
, ctx
))) {
232 gvt_dbg_el("need to switch virtual execlist slot\n");
234 switch_virtual_execlist_slot(execlist
);
236 emulate_execlist_status(execlist
);
238 status
.context_complete
= status
.active_to_idle
= 1;
239 status
.context_id
= ctx
->context_id
;
242 emulate_csb_update(execlist
, &status
, false);
244 emulate_csb_update(execlist
, &status
, true);
246 memset(&status
, 0, sizeof(status
));
248 status
.idle_to_active
= 1;
249 status
.context_id
= 0;
251 emulate_csb_update(execlist
, &status
, false);
261 static struct intel_vgpu_execlist_slot
*get_next_execlist_slot(
262 struct intel_vgpu_execlist
*execlist
)
264 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
265 int ring_id
= execlist
->ring_id
;
266 u32 status_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
268 struct execlist_status_format status
;
270 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
271 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
273 if (status
.execlist_queue_full
) {
274 gvt_vgpu_err("virtual execlist slots are full\n");
278 return &execlist
->slot
[status
.execlist_write_pointer
];
281 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist
*execlist
,
282 struct execlist_ctx_descriptor_format ctx
[2])
284 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
285 struct intel_vgpu_execlist_slot
*slot
=
286 get_next_execlist_slot(execlist
);
288 struct execlist_ctx_descriptor_format
*ctx0
, *ctx1
;
289 struct execlist_context_status_format status
;
290 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
292 gvt_dbg_el("emulate schedule-in\n");
295 gvt_vgpu_err("no available execlist slot\n");
299 memset(&status
, 0, sizeof(status
));
300 memset(slot
->ctx
, 0, sizeof(slot
->ctx
));
302 slot
->ctx
[0] = ctx
[0];
303 slot
->ctx
[1] = ctx
[1];
305 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
306 slot
->index
, ctx
[0].context_id
,
310 * no running execlist, make this write bundle as running execlist
314 gvt_dbg_el("no current running execlist\n");
316 execlist
->running_slot
= slot
;
317 execlist
->pending_slot
= NULL
;
318 execlist
->running_context
= &slot
->ctx
[0];
320 gvt_dbg_el("running slot index %d running context %x\n",
321 execlist
->running_slot
->index
,
322 execlist
->running_context
->context_id
);
324 emulate_execlist_status(execlist
);
326 status
.idle_to_active
= 1;
327 status
.context_id
= 0;
329 emulate_csb_update(execlist
, &status
, false);
333 ctx0
= &running
->ctx
[0];
334 ctx1
= &running
->ctx
[1];
336 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
337 running
->index
, ctx0
->context_id
, ctx1
->context_id
);
340 * already has an running execlist
341 * a. running ctx1 is valid,
342 * ctx0 is finished, and running ctx1 == new execlist ctx[0]
343 * b. running ctx1 is not valid,
344 * ctx0 == new execlist ctx[0]
345 * ----> lite-restore + preempted
347 if ((valid_context(ctx1
) && same_context(ctx1
, &slot
->ctx
[0]) &&
349 (!same_context(ctx0
, execlist
->running_context
))) ||
350 (!valid_context(ctx1
) &&
351 same_context(ctx0
, &slot
->ctx
[0]))) { /* condition b */
352 gvt_dbg_el("need to switch virtual execlist slot\n");
354 execlist
->pending_slot
= slot
;
355 switch_virtual_execlist_slot(execlist
);
357 emulate_execlist_status(execlist
);
359 status
.lite_restore
= status
.preempted
= 1;
360 status
.context_id
= ctx
[0].context_id
;
362 emulate_csb_update(execlist
, &status
, false);
364 gvt_dbg_el("emulate as pending slot\n");
367 * --> emulate pending execlist exist + but no preemption case
369 execlist
->pending_slot
= slot
;
370 emulate_execlist_status(execlist
);
375 #define get_desc_from_elsp_dwords(ed, i) \
376 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
378 static int prepare_execlist_workload(struct intel_vgpu_workload
*workload
)
380 struct intel_vgpu
*vgpu
= workload
->vgpu
;
381 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
382 struct execlist_ctx_descriptor_format ctx
[2];
383 int ring_id
= workload
->ring_id
;
386 if (!workload
->emulate_schedule_in
)
389 ctx
[0] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 0);
390 ctx
[1] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 1);
392 ret
= emulate_execlist_schedule_in(&s
->execlist
[ring_id
], ctx
);
394 gvt_vgpu_err("fail to emulate execlist schedule in\n");
400 static int complete_execlist_workload(struct intel_vgpu_workload
*workload
)
402 struct intel_vgpu
*vgpu
= workload
->vgpu
;
403 int ring_id
= workload
->ring_id
;
404 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
405 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[ring_id
];
406 struct intel_vgpu_workload
*next_workload
;
407 struct list_head
*next
= workload_q_head(vgpu
, ring_id
)->next
;
408 bool lite_restore
= false;
411 gvt_dbg_el("complete workload %p status %d\n", workload
,
414 if (workload
->status
|| (vgpu
->resetting_eng
& ENGINE_MASK(ring_id
)))
417 if (!list_empty(workload_q_head(vgpu
, ring_id
))) {
418 struct execlist_ctx_descriptor_format
*this_desc
, *next_desc
;
420 next_workload
= container_of(next
,
421 struct intel_vgpu_workload
, list
);
422 this_desc
= &workload
->ctx_desc
;
423 next_desc
= &next_workload
->ctx_desc
;
425 lite_restore
= same_context(this_desc
, next_desc
);
429 gvt_dbg_el("next context == current - no schedule-out\n");
433 ret
= emulate_execlist_ctx_schedule_out(execlist
, &workload
->ctx_desc
);
435 intel_vgpu_unpin_mm(workload
->shadow_mm
);
436 intel_vgpu_destroy_workload(workload
);
440 static int submit_context(struct intel_vgpu
*vgpu
, int ring_id
,
441 struct execlist_ctx_descriptor_format
*desc
,
442 bool emulate_schedule_in
)
444 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
445 struct intel_vgpu_workload
*workload
= NULL
;
447 workload
= intel_vgpu_create_workload(vgpu
, ring_id
, desc
);
448 if (IS_ERR(workload
))
449 return PTR_ERR(workload
);
451 workload
->prepare
= prepare_execlist_workload
;
452 workload
->complete
= complete_execlist_workload
;
453 workload
->emulate_schedule_in
= emulate_schedule_in
;
455 if (emulate_schedule_in
)
456 workload
->elsp_dwords
= s
->execlist
[ring_id
].elsp_dwords
;
458 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload
,
459 emulate_schedule_in
);
461 intel_vgpu_queue_workload(workload
);
465 int intel_vgpu_submit_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
467 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
468 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[ring_id
];
469 struct execlist_ctx_descriptor_format
*desc
[2];
472 desc
[0] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 0);
473 desc
[1] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 1);
475 if (!desc
[0]->valid
) {
476 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
480 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
483 if (!desc
[i
]->privilege_access
) {
484 gvt_vgpu_err("unexpected GGTT elsp submission\n");
489 /* submit workload */
490 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
493 ret
= submit_context(vgpu
, ring_id
, desc
[i
], i
== 0);
495 gvt_vgpu_err("failed to submit desc %d\n", i
);
503 gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
504 desc
[0]->udw
, desc
[0]->ldw
, desc
[1]->udw
, desc
[1]->ldw
);
508 static void init_vgpu_execlist(struct intel_vgpu
*vgpu
, int ring_id
)
510 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
511 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[ring_id
];
512 struct execlist_context_status_pointer_format ctx_status_ptr
;
513 u32 ctx_status_ptr_reg
;
515 memset(execlist
, 0, sizeof(*execlist
));
517 execlist
->vgpu
= vgpu
;
518 execlist
->ring_id
= ring_id
;
519 execlist
->slot
[0].index
= 0;
520 execlist
->slot
[1].index
= 1;
522 ctx_status_ptr_reg
= execlist_ring_mmio(vgpu
->gvt
, ring_id
,
523 _EL_OFFSET_STATUS_PTR
);
524 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
525 ctx_status_ptr
.read_ptr
= 0;
526 ctx_status_ptr
.write_ptr
= 0x7;
527 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
530 static void clean_execlist(struct intel_vgpu
*vgpu
, unsigned long engine_mask
)
533 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
534 struct intel_engine_cs
*engine
;
535 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
537 for_each_engine_masked(engine
, dev_priv
, engine_mask
, tmp
) {
538 kfree(s
->ring_scan_buffer
[engine
->id
]);
539 s
->ring_scan_buffer
[engine
->id
] = NULL
;
540 s
->ring_scan_buffer_size
[engine
->id
] = 0;
544 static void reset_execlist(struct intel_vgpu
*vgpu
,
545 unsigned long engine_mask
)
547 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->dev_priv
;
548 struct intel_engine_cs
*engine
;
551 for_each_engine_masked(engine
, dev_priv
, engine_mask
, tmp
)
552 init_vgpu_execlist(vgpu
, engine
->id
);
555 static int init_execlist(struct intel_vgpu
*vgpu
,
556 unsigned long engine_mask
)
558 reset_execlist(vgpu
, engine_mask
);
562 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops
= {
564 .init
= init_execlist
,
565 .reset
= reset_execlist
,
566 .clean
= clean_execlist
,