1 // SPDX-License-Identifier: GPL-2.0
4 * Copyright 2016-2019 HabanaLabs, Ltd.
8 #include <uapi/misc/habanalabs.h>
9 #include "habanalabs.h"
11 #include <linux/uaccess.h>
12 #include <linux/slab.h>
14 static void job_wq_completion(struct work_struct
*work
);
15 static long _hl_cs_wait_ioctl(struct hl_device
*hdev
,
16 struct hl_ctx
*ctx
, u64 timeout_us
, u64 seq
);
17 static void cs_do_release(struct kref
*ref
);
19 static const char *hl_fence_get_driver_name(struct dma_fence
*fence
)
24 static const char *hl_fence_get_timeline_name(struct dma_fence
*fence
)
26 struct hl_dma_fence
*hl_fence
=
27 container_of(fence
, struct hl_dma_fence
, base_fence
);
29 return dev_name(hl_fence
->hdev
->dev
);
32 static bool hl_fence_enable_signaling(struct dma_fence
*fence
)
37 static void hl_fence_release(struct dma_fence
*fence
)
39 struct hl_dma_fence
*hl_fence
=
40 container_of(fence
, struct hl_dma_fence
, base_fence
);
42 kfree_rcu(hl_fence
, base_fence
.rcu
);
45 static const struct dma_fence_ops hl_fence_ops
= {
46 .get_driver_name
= hl_fence_get_driver_name
,
47 .get_timeline_name
= hl_fence_get_timeline_name
,
48 .enable_signaling
= hl_fence_enable_signaling
,
49 .wait
= dma_fence_default_wait
,
50 .release
= hl_fence_release
53 static void cs_get(struct hl_cs
*cs
)
55 kref_get(&cs
->refcount
);
58 static int cs_get_unless_zero(struct hl_cs
*cs
)
60 return kref_get_unless_zero(&cs
->refcount
);
63 static void cs_put(struct hl_cs
*cs
)
65 kref_put(&cs
->refcount
, cs_do_release
);
68 static bool is_cb_patched(struct hl_device
*hdev
, struct hl_cs_job
*job
)
71 * Patched CB is created for external queues jobs, and for H/W queues
72 * jobs if the user CB was allocated by driver and MMU is disabled.
74 return (job
->queue_type
== QUEUE_TYPE_EXT
||
75 (job
->queue_type
== QUEUE_TYPE_HW
&&
76 job
->is_kernel_allocated_cb
&&
81 * cs_parser - parse the user command submission
83 * @hpriv : pointer to the private data of the fd
84 * @job : pointer to the job that holds the command submission info
86 * The function parses the command submission of the user. It calls the
87 * ASIC specific parser, which returns a list of memory blocks to send
88 * to the device as different command buffers
91 static int cs_parser(struct hl_fpriv
*hpriv
, struct hl_cs_job
*job
)
93 struct hl_device
*hdev
= hpriv
->hdev
;
94 struct hl_cs_parser parser
;
97 parser
.ctx_id
= job
->cs
->ctx
->asid
;
98 parser
.cs_sequence
= job
->cs
->sequence
;
99 parser
.job_id
= job
->id
;
101 parser
.hw_queue_id
= job
->hw_queue_id
;
102 parser
.job_userptr_list
= &job
->userptr_list
;
103 parser
.patched_cb
= NULL
;
104 parser
.user_cb
= job
->user_cb
;
105 parser
.user_cb_size
= job
->user_cb_size
;
106 parser
.queue_type
= job
->queue_type
;
107 parser
.is_kernel_allocated_cb
= job
->is_kernel_allocated_cb
;
108 job
->patched_cb
= NULL
;
110 rc
= hdev
->asic_funcs
->cs_parser(hdev
, &parser
);
112 if (is_cb_patched(hdev
, job
)) {
114 job
->patched_cb
= parser
.patched_cb
;
115 job
->job_cb_size
= parser
.patched_cb_size
;
117 spin_lock(&job
->patched_cb
->lock
);
118 job
->patched_cb
->cs_cnt
++;
119 spin_unlock(&job
->patched_cb
->lock
);
123 * Whether the parsing worked or not, we don't need the
124 * original CB anymore because it was already parsed and
125 * won't be accessed again for this CS
127 spin_lock(&job
->user_cb
->lock
);
128 job
->user_cb
->cs_cnt
--;
129 spin_unlock(&job
->user_cb
->lock
);
130 hl_cb_put(job
->user_cb
);
137 static void free_job(struct hl_device
*hdev
, struct hl_cs_job
*job
)
139 struct hl_cs
*cs
= job
->cs
;
141 if (is_cb_patched(hdev
, job
)) {
142 hl_userptr_delete_list(hdev
, &job
->userptr_list
);
145 * We might arrive here from rollback and patched CB wasn't
146 * created, so we need to check it's not NULL
148 if (job
->patched_cb
) {
149 spin_lock(&job
->patched_cb
->lock
);
150 job
->patched_cb
->cs_cnt
--;
151 spin_unlock(&job
->patched_cb
->lock
);
153 hl_cb_put(job
->patched_cb
);
157 /* For H/W queue jobs, if a user CB was allocated by driver and MMU is
158 * enabled, the user CB isn't released in cs_parser() and thus should be
161 if (job
->queue_type
== QUEUE_TYPE_HW
&&
162 job
->is_kernel_allocated_cb
&& hdev
->mmu_enable
) {
163 spin_lock(&job
->user_cb
->lock
);
164 job
->user_cb
->cs_cnt
--;
165 spin_unlock(&job
->user_cb
->lock
);
167 hl_cb_put(job
->user_cb
);
171 * This is the only place where there can be multiple threads
172 * modifying the list at the same time
174 spin_lock(&cs
->job_lock
);
175 list_del(&job
->cs_node
);
176 spin_unlock(&cs
->job_lock
);
178 hl_debugfs_remove_job(hdev
, job
);
180 if (job
->queue_type
== QUEUE_TYPE_EXT
||
181 job
->queue_type
== QUEUE_TYPE_HW
)
187 static void cs_do_release(struct kref
*ref
)
189 struct hl_cs
*cs
= container_of(ref
, struct hl_cs
,
191 struct hl_device
*hdev
= cs
->ctx
->hdev
;
192 struct hl_cs_job
*job
, *tmp
;
194 cs
->completed
= true;
197 * Although if we reached here it means that all external jobs have
198 * finished, because each one of them took refcnt to CS, we still
199 * need to go over the internal jobs and free them. Otherwise, we
200 * will have leaked memory and what's worse, the CS object (and
201 * potentially the CTX object) could be released, while the JOB
202 * still holds a pointer to them (but no reference).
204 list_for_each_entry_safe(job
, tmp
, &cs
->job_list
, cs_node
)
207 /* We also need to update CI for internal queues */
209 hdev
->asic_funcs
->hw_queues_lock(hdev
);
211 hdev
->cs_active_cnt
--;
212 if (!hdev
->cs_active_cnt
) {
213 struct hl_device_idle_busy_ts
*ts
;
215 ts
= &hdev
->idle_busy_ts_arr
[hdev
->idle_busy_ts_idx
++];
216 ts
->busy_to_idle_ts
= ktime_get();
218 if (hdev
->idle_busy_ts_idx
== HL_IDLE_BUSY_TS_ARR_SIZE
)
219 hdev
->idle_busy_ts_idx
= 0;
220 } else if (hdev
->cs_active_cnt
< 0) {
221 dev_crit(hdev
->dev
, "CS active cnt %d is negative\n",
222 hdev
->cs_active_cnt
);
225 hdev
->asic_funcs
->hw_queues_unlock(hdev
);
227 hl_int_hw_queue_update_ci(cs
);
229 spin_lock(&hdev
->hw_queues_mirror_lock
);
230 /* remove CS from hw_queues mirror list */
231 list_del_init(&cs
->mirror_node
);
232 spin_unlock(&hdev
->hw_queues_mirror_lock
);
235 * Don't cancel TDR in case this CS was timedout because we
236 * might be running from the TDR context
238 if ((!cs
->timedout
) &&
239 (hdev
->timeout_jiffies
!= MAX_SCHEDULE_TIMEOUT
)) {
243 cancel_delayed_work_sync(&cs
->work_tdr
);
245 spin_lock(&hdev
->hw_queues_mirror_lock
);
247 /* queue TDR for next CS */
248 next
= list_first_entry_or_null(
249 &hdev
->hw_queues_mirror_list
,
250 struct hl_cs
, mirror_node
);
252 if ((next
) && (!next
->tdr_active
)) {
253 next
->tdr_active
= true;
254 schedule_delayed_work(&next
->work_tdr
,
255 hdev
->timeout_jiffies
);
258 spin_unlock(&hdev
->hw_queues_mirror_lock
);
263 * Must be called before hl_ctx_put because inside we use ctx to get
266 hl_debugfs_remove_cs(cs
);
271 dma_fence_set_error(cs
->fence
, -ETIMEDOUT
);
272 else if (cs
->aborted
)
273 dma_fence_set_error(cs
->fence
, -EIO
);
275 dma_fence_signal(cs
->fence
);
276 dma_fence_put(cs
->fence
);
281 static void cs_timedout(struct work_struct
*work
)
283 struct hl_device
*hdev
;
285 struct hl_cs
*cs
= container_of(work
, struct hl_cs
,
287 rc
= cs_get_unless_zero(cs
);
291 if ((!cs
->submitted
) || (cs
->completed
)) {
296 /* Mark the CS is timed out so we won't try to cancel its TDR */
299 hdev
= cs
->ctx
->hdev
;
300 ctx_asid
= cs
->ctx
->asid
;
302 /* TODO: add information about last signaled seq and last emitted seq */
303 dev_err(hdev
->dev
, "User %d command submission %llu got stuck!\n",
304 ctx_asid
, cs
->sequence
);
308 if (hdev
->reset_on_lockup
)
309 hl_device_reset(hdev
, false, false);
312 static int allocate_cs(struct hl_device
*hdev
, struct hl_ctx
*ctx
,
313 struct hl_cs
**cs_new
)
315 struct hl_dma_fence
*fence
;
316 struct dma_fence
*other
= NULL
;
320 cs
= kzalloc(sizeof(*cs
), GFP_ATOMIC
);
325 cs
->submitted
= false;
326 cs
->completed
= false;
327 INIT_LIST_HEAD(&cs
->job_list
);
328 INIT_DELAYED_WORK(&cs
->work_tdr
, cs_timedout
);
329 kref_init(&cs
->refcount
);
330 spin_lock_init(&cs
->job_lock
);
332 fence
= kmalloc(sizeof(*fence
), GFP_ATOMIC
);
339 spin_lock_init(&fence
->lock
);
340 cs
->fence
= &fence
->base_fence
;
342 spin_lock(&ctx
->cs_lock
);
344 fence
->cs_seq
= ctx
->cs_sequence
;
345 other
= ctx
->cs_pending
[fence
->cs_seq
& (HL_MAX_PENDING_CS
- 1)];
346 if ((other
) && (!dma_fence_is_signaled(other
))) {
347 spin_unlock(&ctx
->cs_lock
);
349 "Rejecting CS because of too many in-flights CS\n");
354 dma_fence_init(&fence
->base_fence
, &hl_fence_ops
, &fence
->lock
,
355 ctx
->asid
, ctx
->cs_sequence
);
357 cs
->sequence
= fence
->cs_seq
;
359 ctx
->cs_pending
[fence
->cs_seq
& (HL_MAX_PENDING_CS
- 1)] =
363 dma_fence_get(&fence
->base_fence
);
365 dma_fence_put(other
);
367 spin_unlock(&ctx
->cs_lock
);
380 static void cs_rollback(struct hl_device
*hdev
, struct hl_cs
*cs
)
382 struct hl_cs_job
*job
, *tmp
;
384 list_for_each_entry_safe(job
, tmp
, &cs
->job_list
, cs_node
)
388 void hl_cs_rollback_all(struct hl_device
*hdev
)
390 struct hl_cs
*cs
, *tmp
;
392 /* flush all completions */
393 flush_workqueue(hdev
->cq_wq
);
395 /* Make sure we don't have leftovers in the H/W queues mirror list */
396 list_for_each_entry_safe(cs
, tmp
, &hdev
->hw_queues_mirror_list
,
400 dev_warn_ratelimited(hdev
->dev
, "Killing CS %d.%llu\n",
401 cs
->ctx
->asid
, cs
->sequence
);
402 cs_rollback(hdev
, cs
);
407 static void job_wq_completion(struct work_struct
*work
)
409 struct hl_cs_job
*job
= container_of(work
, struct hl_cs_job
,
411 struct hl_cs
*cs
= job
->cs
;
412 struct hl_device
*hdev
= cs
->ctx
->hdev
;
414 /* job is no longer needed */
418 static int validate_queue_index(struct hl_device
*hdev
,
419 struct hl_cs_chunk
*chunk
,
420 enum hl_queue_type
*queue_type
,
421 bool *is_kernel_allocated_cb
)
423 struct asic_fixed_properties
*asic
= &hdev
->asic_prop
;
424 struct hw_queue_properties
*hw_queue_prop
;
426 hw_queue_prop
= &asic
->hw_queues_props
[chunk
->queue_index
];
428 if ((chunk
->queue_index
>= HL_MAX_QUEUES
) ||
429 (hw_queue_prop
->type
== QUEUE_TYPE_NA
)) {
430 dev_err(hdev
->dev
, "Queue index %d is invalid\n",
435 if (hw_queue_prop
->driver_only
) {
437 "Queue index %d is restricted for the kernel driver\n",
442 *queue_type
= hw_queue_prop
->type
;
443 *is_kernel_allocated_cb
= !!hw_queue_prop
->requires_kernel_cb
;
448 static struct hl_cb
*get_cb_from_cs_chunk(struct hl_device
*hdev
,
449 struct hl_cb_mgr
*cb_mgr
,
450 struct hl_cs_chunk
*chunk
)
455 cb_handle
= (u32
) (chunk
->cb_handle
>> PAGE_SHIFT
);
457 cb
= hl_cb_get(hdev
, cb_mgr
, cb_handle
);
459 dev_err(hdev
->dev
, "CB handle 0x%x invalid\n", cb_handle
);
463 if ((chunk
->cb_size
< 8) || (chunk
->cb_size
> cb
->size
)) {
464 dev_err(hdev
->dev
, "CB size %u invalid\n", chunk
->cb_size
);
468 spin_lock(&cb
->lock
);
470 spin_unlock(&cb
->lock
);
479 struct hl_cs_job
*hl_cs_allocate_job(struct hl_device
*hdev
,
480 enum hl_queue_type queue_type
, bool is_kernel_allocated_cb
)
482 struct hl_cs_job
*job
;
484 job
= kzalloc(sizeof(*job
), GFP_ATOMIC
);
488 job
->queue_type
= queue_type
;
489 job
->is_kernel_allocated_cb
= is_kernel_allocated_cb
;
491 if (is_cb_patched(hdev
, job
))
492 INIT_LIST_HEAD(&job
->userptr_list
);
494 if (job
->queue_type
== QUEUE_TYPE_EXT
)
495 INIT_WORK(&job
->finish_work
, job_wq_completion
);
500 static int _hl_cs_ioctl(struct hl_fpriv
*hpriv
, void __user
*chunks
,
501 u32 num_chunks
, u64
*cs_seq
)
503 struct hl_device
*hdev
= hpriv
->hdev
;
504 struct hl_cs_chunk
*cs_chunk_array
;
505 struct hl_cs_job
*job
;
508 bool int_queues_only
= true;
510 int rc
, i
, parse_cnt
;
512 *cs_seq
= ULLONG_MAX
;
514 if (num_chunks
> HL_MAX_JOBS_PER_CS
) {
516 "Number of chunks can NOT be larger than %d\n",
522 cs_chunk_array
= kmalloc_array(num_chunks
, sizeof(*cs_chunk_array
),
524 if (!cs_chunk_array
) {
529 size_to_copy
= num_chunks
* sizeof(struct hl_cs_chunk
);
530 if (copy_from_user(cs_chunk_array
, chunks
, size_to_copy
)) {
531 dev_err(hdev
->dev
, "Failed to copy cs chunk array from user\n");
533 goto free_cs_chunk_array
;
536 /* increment refcnt for context */
537 hl_ctx_get(hdev
, hpriv
->ctx
);
539 rc
= allocate_cs(hdev
, hpriv
->ctx
, &cs
);
541 hl_ctx_put(hpriv
->ctx
);
542 goto free_cs_chunk_array
;
545 *cs_seq
= cs
->sequence
;
547 hl_debugfs_add_cs(cs
);
549 /* Validate ALL the CS chunks before submitting the CS */
550 for (i
= 0, parse_cnt
= 0 ; i
< num_chunks
; i
++, parse_cnt
++) {
551 struct hl_cs_chunk
*chunk
= &cs_chunk_array
[i
];
552 enum hl_queue_type queue_type
;
553 bool is_kernel_allocated_cb
;
555 rc
= validate_queue_index(hdev
, chunk
, &queue_type
,
556 &is_kernel_allocated_cb
);
560 if (is_kernel_allocated_cb
) {
561 cb
= get_cb_from_cs_chunk(hdev
, &hpriv
->cb_mgr
, chunk
);
567 cb
= (struct hl_cb
*) (uintptr_t) chunk
->cb_handle
;
570 if (queue_type
== QUEUE_TYPE_EXT
|| queue_type
== QUEUE_TYPE_HW
)
571 int_queues_only
= false;
573 job
= hl_cs_allocate_job(hdev
, queue_type
,
574 is_kernel_allocated_cb
);
576 dev_err(hdev
->dev
, "Failed to allocate a new job\n");
578 if (is_kernel_allocated_cb
)
587 job
->user_cb_size
= chunk
->cb_size
;
588 if (is_kernel_allocated_cb
)
589 job
->job_cb_size
= cb
->size
;
591 job
->job_cb_size
= chunk
->cb_size
;
592 job
->hw_queue_id
= chunk
->queue_index
;
594 cs
->jobs_in_queue_cnt
[job
->hw_queue_id
]++;
596 list_add_tail(&job
->cs_node
, &cs
->job_list
);
599 * Increment CS reference. When CS reference is 0, CS is
600 * done and can be signaled to user and free all its resources
601 * Only increment for JOB on external or H/W queues, because
602 * only for those JOBs we get completion
604 if (job
->queue_type
== QUEUE_TYPE_EXT
||
605 job
->queue_type
== QUEUE_TYPE_HW
)
608 hl_debugfs_add_job(hdev
, job
);
610 rc
= cs_parser(hpriv
, job
);
613 "Failed to parse JOB %d.%llu.%d, err %d, rejecting the CS\n",
614 cs
->ctx
->asid
, cs
->sequence
, job
->id
, rc
);
619 if (int_queues_only
) {
621 "Reject CS %d.%llu because only internal queues jobs are present\n",
622 cs
->ctx
->asid
, cs
->sequence
);
627 rc
= hl_hw_queue_schedule_cs(cs
);
631 "Failed to submit CS %d.%llu to H/W queues, error %d\n",
632 cs
->ctx
->asid
, cs
->sequence
, rc
);
636 rc
= HL_CS_STATUS_SUCCESS
;
640 spin_lock(&cb
->lock
);
642 spin_unlock(&cb
->lock
);
645 cs_rollback(hdev
, cs
);
646 *cs_seq
= ULLONG_MAX
;
647 /* The path below is both for good and erroneous exits */
649 /* We finished with the CS in this function, so put the ref */
652 kfree(cs_chunk_array
);
657 int hl_cs_ioctl(struct hl_fpriv
*hpriv
, void *data
)
659 struct hl_device
*hdev
= hpriv
->hdev
;
660 union hl_cs_args
*args
= data
;
661 struct hl_ctx
*ctx
= hpriv
->ctx
;
664 u64 cs_seq
= ULONG_MAX
;
665 int rc
, do_ctx_switch
;
666 bool need_soft_reset
= false;
668 if (hl_device_disabled_or_in_reset(hdev
)) {
669 dev_warn_ratelimited(hdev
->dev
,
670 "Device is %s. Can't submit new CS\n",
671 atomic_read(&hdev
->in_reset
) ? "in_reset" : "disabled");
676 do_ctx_switch
= atomic_cmpxchg(&ctx
->thread_ctx_switch_token
, 1, 0);
678 if (do_ctx_switch
|| (args
->in
.cs_flags
& HL_CS_FLAGS_FORCE_RESTORE
)) {
681 chunks
= (void __user
*)(uintptr_t)args
->in
.chunks_restore
;
682 num_chunks
= args
->in
.num_chunks_restore
;
684 mutex_lock(&hpriv
->restore_phase_mutex
);
687 rc
= hdev
->asic_funcs
->context_switch(hdev
, ctx
->asid
);
689 dev_err_ratelimited(hdev
->dev
,
690 "Failed to switch to context %d, rejecting CS! %d\n",
693 * If we timedout, or if the device is not IDLE
694 * while we want to do context-switch (-EBUSY),
695 * we need to soft-reset because QMAN is
696 * probably stuck. However, we can't call to
697 * reset here directly because of deadlock, so
698 * need to do it at the very end of this
701 if ((rc
== -ETIMEDOUT
) || (rc
== -EBUSY
))
702 need_soft_reset
= true;
703 mutex_unlock(&hpriv
->restore_phase_mutex
);
708 hdev
->asic_funcs
->restore_phase_topology(hdev
);
710 if (num_chunks
== 0) {
712 "Need to run restore phase but restore CS is empty\n");
715 rc
= _hl_cs_ioctl(hpriv
, chunks
, num_chunks
,
719 mutex_unlock(&hpriv
->restore_phase_mutex
);
723 "Failed to submit restore CS for context %d (%d)\n",
728 /* Need to wait for restore completion before execution phase */
729 if (num_chunks
> 0) {
730 ret
= _hl_cs_wait_ioctl(hdev
, ctx
,
731 jiffies_to_usecs(hdev
->timeout_jiffies
),
735 "Restore CS for context %d failed to complete %ld\n",
742 ctx
->thread_ctx_switch_wait_token
= 1;
743 } else if (!ctx
->thread_ctx_switch_wait_token
) {
746 rc
= hl_poll_timeout_memory(hdev
,
747 &ctx
->thread_ctx_switch_wait_token
, tmp
, (tmp
== 1),
748 100, jiffies_to_usecs(hdev
->timeout_jiffies
), false);
750 if (rc
== -ETIMEDOUT
) {
752 "context switch phase timeout (%d)\n", tmp
);
757 chunks
= (void __user
*)(uintptr_t)args
->in
.chunks_execute
;
758 num_chunks
= args
->in
.num_chunks_execute
;
760 if (num_chunks
== 0) {
762 "Got execute CS with 0 chunks, context %d\n",
768 rc
= _hl_cs_ioctl(hpriv
, chunks
, num_chunks
, &cs_seq
);
772 memset(args
, 0, sizeof(*args
));
773 args
->out
.status
= rc
;
774 args
->out
.seq
= cs_seq
;
777 if (((rc
== -ETIMEDOUT
) || (rc
== -EBUSY
)) && (need_soft_reset
))
778 hl_device_reset(hdev
, false, false);
783 static long _hl_cs_wait_ioctl(struct hl_device
*hdev
,
784 struct hl_ctx
*ctx
, u64 timeout_us
, u64 seq
)
786 struct dma_fence
*fence
;
787 unsigned long timeout
;
790 if (timeout_us
== MAX_SCHEDULE_TIMEOUT
)
791 timeout
= timeout_us
;
793 timeout
= usecs_to_jiffies(timeout_us
);
795 hl_ctx_get(hdev
, ctx
);
797 fence
= hl_ctx_get_fence(ctx
, seq
);
801 rc
= dma_fence_wait_timeout(fence
, true, timeout
);
802 if (fence
->error
== -ETIMEDOUT
)
804 else if (fence
->error
== -EIO
)
806 dma_fence_put(fence
);
815 int hl_cs_wait_ioctl(struct hl_fpriv
*hpriv
, void *data
)
817 struct hl_device
*hdev
= hpriv
->hdev
;
818 union hl_wait_cs_args
*args
= data
;
819 u64 seq
= args
->in
.seq
;
822 rc
= _hl_cs_wait_ioctl(hdev
, hpriv
->ctx
, args
->in
.timeout_us
, seq
);
824 memset(args
, 0, sizeof(*args
));
827 dev_err_ratelimited(hdev
->dev
,
828 "Error %ld on waiting for CS handle %llu\n",
830 if (rc
== -ERESTARTSYS
) {
831 args
->out
.status
= HL_WAIT_CS_STATUS_INTERRUPTED
;
833 } else if (rc
== -ETIMEDOUT
) {
834 args
->out
.status
= HL_WAIT_CS_STATUS_TIMEDOUT
;
835 } else if (rc
== -EIO
) {
836 args
->out
.status
= HL_WAIT_CS_STATUS_ABORTED
;
842 args
->out
.status
= HL_WAIT_CS_STATUS_BUSY
;
844 args
->out
.status
= HL_WAIT_CS_STATUS_COMPLETED
;