2 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24 * Zhiyuan Lv <zhiyuan.lv@intel.com>
25 * Zhi Wang <zhi.a.wang@intel.com>
28 * Min He <min.he@intel.com>
29 * Bing Niu <bing.niu@intel.com>
30 * Ping Gao <ping.a.gao@intel.com>
31 * Tina Zhang <tina.zhang@intel.com>
38 #define _EL_OFFSET_STATUS 0x234
39 #define _EL_OFFSET_STATUS_BUF 0x370
40 #define _EL_OFFSET_STATUS_PTR 0x3A0
42 #define execlist_ring_mmio(e, offset) ((e)->mmio_base + (offset))
44 #define valid_context(ctx) ((ctx)->valid)
45 #define same_context(a, b) (((a)->context_id == (b)->context_id) && \
46 ((a)->lrca == (b)->lrca))
48 static int context_switch_events
[] = {
49 [RCS0
] = RCS_AS_CONTEXT_SWITCH
,
50 [BCS0
] = BCS_AS_CONTEXT_SWITCH
,
51 [VCS0
] = VCS_AS_CONTEXT_SWITCH
,
52 [VCS1
] = VCS2_AS_CONTEXT_SWITCH
,
53 [VECS0
] = VECS_AS_CONTEXT_SWITCH
,
56 static int to_context_switch_event(const struct intel_engine_cs
*engine
)
58 if (WARN_ON(engine
->id
>= ARRAY_SIZE(context_switch_events
)))
61 return context_switch_events
[engine
->id
];
64 static void switch_virtual_execlist_slot(struct intel_vgpu_execlist
*execlist
)
66 gvt_dbg_el("[before] running slot %d/context %x pending slot %d\n",
67 execlist
->running_slot
?
68 execlist
->running_slot
->index
: -1,
69 execlist
->running_context
?
70 execlist
->running_context
->context_id
: 0,
71 execlist
->pending_slot
?
72 execlist
->pending_slot
->index
: -1);
74 execlist
->running_slot
= execlist
->pending_slot
;
75 execlist
->pending_slot
= NULL
;
76 execlist
->running_context
= execlist
->running_context
?
77 &execlist
->running_slot
->ctx
[0] : NULL
;
79 gvt_dbg_el("[after] running slot %d/context %x pending slot %d\n",
80 execlist
->running_slot
?
81 execlist
->running_slot
->index
: -1,
82 execlist
->running_context
?
83 execlist
->running_context
->context_id
: 0,
84 execlist
->pending_slot
?
85 execlist
->pending_slot
->index
: -1);
88 static void emulate_execlist_status(struct intel_vgpu_execlist
*execlist
)
90 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
91 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
92 struct execlist_ctx_descriptor_format
*desc
= execlist
->running_context
;
93 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
94 struct execlist_status_format status
;
96 execlist_ring_mmio(execlist
->engine
, _EL_OFFSET_STATUS
);
98 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
99 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
102 status
.current_execlist_pointer
= !!running
->index
;
103 status
.execlist_write_pointer
= !!!running
->index
;
104 status
.execlist_0_active
= status
.execlist_0_valid
=
106 status
.execlist_1_active
= status
.execlist_1_valid
=
109 status
.context_id
= 0;
110 status
.execlist_0_active
= status
.execlist_0_valid
= 0;
111 status
.execlist_1_active
= status
.execlist_1_valid
= 0;
114 status
.context_id
= desc
? desc
->context_id
: 0;
115 status
.execlist_queue_full
= !!(pending
);
117 vgpu_vreg(vgpu
, status_reg
) = status
.ldw
;
118 vgpu_vreg(vgpu
, status_reg
+ 4) = status
.udw
;
120 gvt_dbg_el("vgpu%d: status reg offset %x ldw %x udw %x\n",
121 vgpu
->id
, status_reg
, status
.ldw
, status
.udw
);
124 static void emulate_csb_update(struct intel_vgpu_execlist
*execlist
,
125 struct execlist_context_status_format
*status
,
126 bool trigger_interrupt_later
)
128 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
129 struct execlist_context_status_pointer_format ctx_status_ptr
;
131 u32 ctx_status_ptr_reg
, ctx_status_buf_reg
, offset
;
132 unsigned long hwsp_gpa
;
135 execlist_ring_mmio(execlist
->engine
, _EL_OFFSET_STATUS_PTR
);
137 execlist_ring_mmio(execlist
->engine
, _EL_OFFSET_STATUS_BUF
);
139 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
141 write_pointer
= ctx_status_ptr
.write_ptr
;
143 if (write_pointer
== 0x7)
147 write_pointer
%= 0x6;
150 offset
= ctx_status_buf_reg
+ write_pointer
* 8;
152 vgpu_vreg(vgpu
, offset
) = status
->ldw
;
153 vgpu_vreg(vgpu
, offset
+ 4) = status
->udw
;
155 ctx_status_ptr
.write_ptr
= write_pointer
;
156 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
158 /* Update the CSB and CSB write pointer in HWSP */
159 hwsp_gpa
= intel_vgpu_gma_to_gpa(vgpu
->gtt
.ggtt_mm
,
160 vgpu
->hws_pga
[execlist
->engine
->id
]);
161 if (hwsp_gpa
!= INTEL_GVT_INVALID_ADDR
) {
162 intel_gvt_hypervisor_write_gpa(vgpu
,
163 hwsp_gpa
+ I915_HWS_CSB_BUF0_INDEX
* 4 + write_pointer
* 8,
165 intel_gvt_hypervisor_write_gpa(vgpu
,
166 hwsp_gpa
+ intel_hws_csb_write_index(execlist
->engine
->i915
) * 4,
170 gvt_dbg_el("vgpu%d: w pointer %u reg %x csb l %x csb h %x\n",
171 vgpu
->id
, write_pointer
, offset
, status
->ldw
, status
->udw
);
173 if (trigger_interrupt_later
)
176 intel_vgpu_trigger_virtual_event(vgpu
,
177 to_context_switch_event(execlist
->engine
));
180 static int emulate_execlist_ctx_schedule_out(
181 struct intel_vgpu_execlist
*execlist
,
182 struct execlist_ctx_descriptor_format
*ctx
)
184 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
185 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
186 struct intel_vgpu_execlist_slot
*pending
= execlist
->pending_slot
;
187 struct execlist_ctx_descriptor_format
*ctx0
= &running
->ctx
[0];
188 struct execlist_ctx_descriptor_format
*ctx1
= &running
->ctx
[1];
189 struct execlist_context_status_format status
;
191 memset(&status
, 0, sizeof(status
));
193 gvt_dbg_el("schedule out context id %x\n", ctx
->context_id
);
195 if (WARN_ON(!same_context(ctx
, execlist
->running_context
))) {
196 gvt_vgpu_err("schedule out context is not running context,"
197 "ctx id %x running ctx id %x\n",
199 execlist
->running_context
->context_id
);
203 /* ctx1 is valid, ctx0/ctx is scheduled-out -> element switch */
204 if (valid_context(ctx1
) && same_context(ctx0
, ctx
)) {
205 gvt_dbg_el("ctx 1 valid, ctx/ctx 0 is scheduled-out\n");
207 execlist
->running_context
= ctx1
;
209 emulate_execlist_status(execlist
);
211 status
.context_complete
= status
.element_switch
= 1;
212 status
.context_id
= ctx
->context_id
;
214 emulate_csb_update(execlist
, &status
, false);
216 * ctx1 is not valid, ctx == ctx0
217 * ctx1 is valid, ctx1 == ctx
218 * --> last element is finished
220 * active-to-idle if there is *no* pending execlist
221 * context-complete if there *is* pending execlist
223 } else if ((!valid_context(ctx1
) && same_context(ctx0
, ctx
))
224 || (valid_context(ctx1
) && same_context(ctx1
, ctx
))) {
225 gvt_dbg_el("need to switch virtual execlist slot\n");
227 switch_virtual_execlist_slot(execlist
);
229 emulate_execlist_status(execlist
);
231 status
.context_complete
= status
.active_to_idle
= 1;
232 status
.context_id
= ctx
->context_id
;
235 emulate_csb_update(execlist
, &status
, false);
237 emulate_csb_update(execlist
, &status
, true);
239 memset(&status
, 0, sizeof(status
));
241 status
.idle_to_active
= 1;
242 status
.context_id
= 0;
244 emulate_csb_update(execlist
, &status
, false);
254 static struct intel_vgpu_execlist_slot
*get_next_execlist_slot(
255 struct intel_vgpu_execlist
*execlist
)
257 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
259 execlist_ring_mmio(execlist
->engine
, _EL_OFFSET_STATUS
);
260 struct execlist_status_format status
;
262 status
.ldw
= vgpu_vreg(vgpu
, status_reg
);
263 status
.udw
= vgpu_vreg(vgpu
, status_reg
+ 4);
265 if (status
.execlist_queue_full
) {
266 gvt_vgpu_err("virtual execlist slots are full\n");
270 return &execlist
->slot
[status
.execlist_write_pointer
];
273 static int emulate_execlist_schedule_in(struct intel_vgpu_execlist
*execlist
,
274 struct execlist_ctx_descriptor_format ctx
[2])
276 struct intel_vgpu_execlist_slot
*running
= execlist
->running_slot
;
277 struct intel_vgpu_execlist_slot
*slot
=
278 get_next_execlist_slot(execlist
);
280 struct execlist_ctx_descriptor_format
*ctx0
, *ctx1
;
281 struct execlist_context_status_format status
;
282 struct intel_vgpu
*vgpu
= execlist
->vgpu
;
284 gvt_dbg_el("emulate schedule-in\n");
287 gvt_vgpu_err("no available execlist slot\n");
291 memset(&status
, 0, sizeof(status
));
292 memset(slot
->ctx
, 0, sizeof(slot
->ctx
));
294 slot
->ctx
[0] = ctx
[0];
295 slot
->ctx
[1] = ctx
[1];
297 gvt_dbg_el("alloc slot index %d ctx 0 %x ctx 1 %x\n",
298 slot
->index
, ctx
[0].context_id
,
302 * no running execlist, make this write bundle as running execlist
306 gvt_dbg_el("no current running execlist\n");
308 execlist
->running_slot
= slot
;
309 execlist
->pending_slot
= NULL
;
310 execlist
->running_context
= &slot
->ctx
[0];
312 gvt_dbg_el("running slot index %d running context %x\n",
313 execlist
->running_slot
->index
,
314 execlist
->running_context
->context_id
);
316 emulate_execlist_status(execlist
);
318 status
.idle_to_active
= 1;
319 status
.context_id
= 0;
321 emulate_csb_update(execlist
, &status
, false);
325 ctx0
= &running
->ctx
[0];
326 ctx1
= &running
->ctx
[1];
328 gvt_dbg_el("current running slot index %d ctx 0 %x ctx 1 %x\n",
329 running
->index
, ctx0
->context_id
, ctx1
->context_id
);
332 * already has an running execlist
333 * a. running ctx1 is valid,
334 * ctx0 is finished, and running ctx1 == new execlist ctx[0]
335 * b. running ctx1 is not valid,
336 * ctx0 == new execlist ctx[0]
337 * ----> lite-restore + preempted
339 if ((valid_context(ctx1
) && same_context(ctx1
, &slot
->ctx
[0]) &&
341 (!same_context(ctx0
, execlist
->running_context
))) ||
342 (!valid_context(ctx1
) &&
343 same_context(ctx0
, &slot
->ctx
[0]))) { /* condition b */
344 gvt_dbg_el("need to switch virtual execlist slot\n");
346 execlist
->pending_slot
= slot
;
347 switch_virtual_execlist_slot(execlist
);
349 emulate_execlist_status(execlist
);
351 status
.lite_restore
= status
.preempted
= 1;
352 status
.context_id
= ctx
[0].context_id
;
354 emulate_csb_update(execlist
, &status
, false);
356 gvt_dbg_el("emulate as pending slot\n");
359 * --> emulate pending execlist exist + but no preemption case
361 execlist
->pending_slot
= slot
;
362 emulate_execlist_status(execlist
);
367 #define get_desc_from_elsp_dwords(ed, i) \
368 ((struct execlist_ctx_descriptor_format *)&((ed)->data[i * 2]))
370 static int prepare_execlist_workload(struct intel_vgpu_workload
*workload
)
372 struct intel_vgpu
*vgpu
= workload
->vgpu
;
373 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
374 struct execlist_ctx_descriptor_format ctx
[2];
377 if (!workload
->emulate_schedule_in
)
380 ctx
[0] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 0);
381 ctx
[1] = *get_desc_from_elsp_dwords(&workload
->elsp_dwords
, 1);
383 ret
= emulate_execlist_schedule_in(&s
->execlist
[workload
->engine
->id
],
386 gvt_vgpu_err("fail to emulate execlist schedule in\n");
392 static int complete_execlist_workload(struct intel_vgpu_workload
*workload
)
394 struct intel_vgpu
*vgpu
= workload
->vgpu
;
395 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
396 struct intel_vgpu_execlist
*execlist
=
397 &s
->execlist
[workload
->engine
->id
];
398 struct intel_vgpu_workload
*next_workload
;
399 struct list_head
*next
= workload_q_head(vgpu
, workload
->engine
)->next
;
400 bool lite_restore
= false;
403 gvt_dbg_el("complete workload %p status %d\n",
404 workload
, workload
->status
);
406 if (workload
->status
|| vgpu
->resetting_eng
& workload
->engine
->mask
)
409 if (!list_empty(workload_q_head(vgpu
, workload
->engine
))) {
410 struct execlist_ctx_descriptor_format
*this_desc
, *next_desc
;
412 next_workload
= container_of(next
,
413 struct intel_vgpu_workload
, list
);
414 this_desc
= &workload
->ctx_desc
;
415 next_desc
= &next_workload
->ctx_desc
;
417 lite_restore
= same_context(this_desc
, next_desc
);
421 gvt_dbg_el("next context == current - no schedule-out\n");
425 ret
= emulate_execlist_ctx_schedule_out(execlist
, &workload
->ctx_desc
);
430 static int submit_context(struct intel_vgpu
*vgpu
,
431 const struct intel_engine_cs
*engine
,
432 struct execlist_ctx_descriptor_format
*desc
,
433 bool emulate_schedule_in
)
435 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
436 struct intel_vgpu_workload
*workload
= NULL
;
438 workload
= intel_vgpu_create_workload(vgpu
, engine
, desc
);
439 if (IS_ERR(workload
))
440 return PTR_ERR(workload
);
442 workload
->prepare
= prepare_execlist_workload
;
443 workload
->complete
= complete_execlist_workload
;
444 workload
->emulate_schedule_in
= emulate_schedule_in
;
446 if (emulate_schedule_in
)
447 workload
->elsp_dwords
= s
->execlist
[engine
->id
].elsp_dwords
;
449 gvt_dbg_el("workload %p emulate schedule_in %d\n", workload
,
450 emulate_schedule_in
);
452 intel_vgpu_queue_workload(workload
);
456 int intel_vgpu_submit_execlist(struct intel_vgpu
*vgpu
,
457 const struct intel_engine_cs
*engine
)
459 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
460 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[engine
->id
];
461 struct execlist_ctx_descriptor_format
*desc
[2];
464 desc
[0] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 0);
465 desc
[1] = get_desc_from_elsp_dwords(&execlist
->elsp_dwords
, 1);
467 if (!desc
[0]->valid
) {
468 gvt_vgpu_err("invalid elsp submission, desc0 is invalid\n");
472 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
475 if (!desc
[i
]->privilege_access
) {
476 gvt_vgpu_err("unexpected GGTT elsp submission\n");
481 /* submit workload */
482 for (i
= 0; i
< ARRAY_SIZE(desc
); i
++) {
485 ret
= submit_context(vgpu
, engine
, desc
[i
], i
== 0);
487 gvt_vgpu_err("failed to submit desc %d\n", i
);
495 gvt_vgpu_err("descriptors content: desc0 %08x %08x desc1 %08x %08x\n",
496 desc
[0]->udw
, desc
[0]->ldw
, desc
[1]->udw
, desc
[1]->ldw
);
500 static void init_vgpu_execlist(struct intel_vgpu
*vgpu
,
501 const struct intel_engine_cs
*engine
)
503 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
504 struct intel_vgpu_execlist
*execlist
= &s
->execlist
[engine
->id
];
505 struct execlist_context_status_pointer_format ctx_status_ptr
;
506 u32 ctx_status_ptr_reg
;
508 memset(execlist
, 0, sizeof(*execlist
));
510 execlist
->vgpu
= vgpu
;
511 execlist
->engine
= engine
;
512 execlist
->slot
[0].index
= 0;
513 execlist
->slot
[1].index
= 1;
515 ctx_status_ptr_reg
= execlist_ring_mmio(engine
, _EL_OFFSET_STATUS_PTR
);
516 ctx_status_ptr
.dw
= vgpu_vreg(vgpu
, ctx_status_ptr_reg
);
517 ctx_status_ptr
.read_ptr
= 0;
518 ctx_status_ptr
.write_ptr
= 0x7;
519 vgpu_vreg(vgpu
, ctx_status_ptr_reg
) = ctx_status_ptr
.dw
;
522 static void clean_execlist(struct intel_vgpu
*vgpu
,
523 intel_engine_mask_t engine_mask
)
525 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->gt
->i915
;
526 struct intel_engine_cs
*engine
;
527 struct intel_vgpu_submission
*s
= &vgpu
->submission
;
528 intel_engine_mask_t tmp
;
530 for_each_engine_masked(engine
, &dev_priv
->gt
, engine_mask
, tmp
) {
531 kfree(s
->ring_scan_buffer
[engine
->id
]);
532 s
->ring_scan_buffer
[engine
->id
] = NULL
;
533 s
->ring_scan_buffer_size
[engine
->id
] = 0;
537 static void reset_execlist(struct intel_vgpu
*vgpu
,
538 intel_engine_mask_t engine_mask
)
540 struct drm_i915_private
*dev_priv
= vgpu
->gvt
->gt
->i915
;
541 struct intel_engine_cs
*engine
;
542 intel_engine_mask_t tmp
;
544 for_each_engine_masked(engine
, &dev_priv
->gt
, engine_mask
, tmp
)
545 init_vgpu_execlist(vgpu
, engine
);
548 static int init_execlist(struct intel_vgpu
*vgpu
,
549 intel_engine_mask_t engine_mask
)
551 reset_execlist(vgpu
, engine_mask
);
555 const struct intel_vgpu_submission_ops intel_vgpu_execlist_submission_ops
= {
557 .init
= init_execlist
,
558 .reset
= reset_execlist
,
559 .clean
= clean_execlist
,