1 // SPDX-License-Identifier: GPL-2.0
3 * NVMe over Fabrics Persist Reservation.
4 * Copyright (c) 2024 Guixin Liu, Alibaba Group.
7 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
8 #include <linux/unaligned.h>
11 #define NVMET_PR_NOTIFI_MASK_ALL \
12 (1 << NVME_PR_NOTIFY_BIT_REG_PREEMPTED | \
13 1 << NVME_PR_NOTIFY_BIT_RESV_RELEASED | \
14 1 << NVME_PR_NOTIFY_BIT_RESV_PREEMPTED)
16 static inline bool nvmet_pr_parse_ignore_key(u32 cdw10
)
18 /* Ignore existing key, bit 03. */
19 return (cdw10
>> 3) & 1;
22 static inline struct nvmet_ns
*nvmet_pr_to_ns(struct nvmet_pr
*pr
)
24 return container_of(pr
, struct nvmet_ns
, pr
);
27 static struct nvmet_pr_registrant
*
28 nvmet_pr_find_registrant(struct nvmet_pr
*pr
, uuid_t
*hostid
)
30 struct nvmet_pr_registrant
*reg
;
32 list_for_each_entry_rcu(reg
, &pr
->registrant_list
, entry
) {
33 if (uuid_equal(®
->hostid
, hostid
))
39 u16
nvmet_set_feat_resv_notif_mask(struct nvmet_req
*req
, u32 mask
)
41 u32 nsid
= le32_to_cpu(req
->cmd
->common
.nsid
);
42 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
47 if (mask
& ~(NVMET_PR_NOTIFI_MASK_ALL
)) {
48 req
->error_loc
= offsetof(struct nvme_common_command
, cdw11
);
49 return NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
52 if (nsid
!= U32_MAX
) {
53 status
= nvmet_req_find_ns(req
);
56 if (!req
->ns
->pr
.enable
)
57 return NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
59 WRITE_ONCE(req
->ns
->pr
.notify_mask
, mask
);
63 xa_for_each(&ctrl
->subsys
->namespaces
, idx
, ns
) {
65 WRITE_ONCE(ns
->pr
.notify_mask
, mask
);
69 nvmet_set_result(req
, mask
);
70 return NVME_SC_SUCCESS
;
73 u16
nvmet_get_feat_resv_notif_mask(struct nvmet_req
*req
)
77 status
= nvmet_req_find_ns(req
);
81 if (!req
->ns
->pr
.enable
)
82 return NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
84 nvmet_set_result(req
, READ_ONCE(req
->ns
->pr
.notify_mask
));
88 void nvmet_execute_get_log_page_resv(struct nvmet_req
*req
)
90 struct nvmet_pr_log_mgr
*log_mgr
= &req
->sq
->ctrl
->pr_log_mgr
;
91 struct nvme_pr_log next_log
= {0};
92 struct nvme_pr_log log
= {0};
93 u16 status
= NVME_SC_SUCCESS
;
98 mutex_lock(&log_mgr
->lock
);
99 if (!kfifo_get(&log_mgr
->log_queue
, &log
))
103 * We can't get the last in kfifo.
104 * Utilize the current count and the count from the next log to
105 * calculate the number of lost logs, while also addressing cases
106 * of overflow. If there is no subsequent log, the number of lost
107 * logs is equal to the lost_count within the nvmet_pr_log_mgr.
109 cur_count
= le64_to_cpu(log
.count
);
110 if (kfifo_peek(&log_mgr
->log_queue
, &next_log
)) {
111 next_count
= le64_to_cpu(next_log
.count
);
112 if (next_count
> cur_count
)
113 lost_count
= next_count
- cur_count
- 1;
115 lost_count
= U64_MAX
- cur_count
+ next_count
- 1;
117 lost_count
= log_mgr
->lost_count
;
120 log
.count
= cpu_to_le64((cur_count
+ lost_count
) == 0 ?
121 1 : (cur_count
+ lost_count
));
122 log_mgr
->lost_count
-= lost_count
;
124 log
.nr_pages
= kfifo_len(&log_mgr
->log_queue
);
127 status
= nvmet_copy_to_sgl(req
, 0, &log
, sizeof(log
));
128 mutex_unlock(&log_mgr
->lock
);
129 nvmet_req_complete(req
, status
);
132 static void nvmet_pr_add_resv_log(struct nvmet_ctrl
*ctrl
, u8 log_type
,
135 struct nvmet_pr_log_mgr
*log_mgr
= &ctrl
->pr_log_mgr
;
136 struct nvme_pr_log log
= {0};
138 mutex_lock(&log_mgr
->lock
);
140 if (log_mgr
->counter
== 0)
141 log_mgr
->counter
= 1;
143 log
.count
= cpu_to_le64(log_mgr
->counter
);
145 log
.nsid
= cpu_to_le32(nsid
);
147 if (!kfifo_put(&log_mgr
->log_queue
, log
)) {
148 pr_info("a reservation log lost, cntlid:%d, log_type:%d, nsid:%d\n",
149 ctrl
->cntlid
, log_type
, nsid
);
150 log_mgr
->lost_count
++;
153 mutex_unlock(&log_mgr
->lock
);
156 static void nvmet_pr_resv_released(struct nvmet_pr
*pr
, uuid_t
*hostid
)
158 struct nvmet_ns
*ns
= nvmet_pr_to_ns(pr
);
159 struct nvmet_subsys
*subsys
= ns
->subsys
;
160 struct nvmet_ctrl
*ctrl
;
162 if (test_bit(NVME_PR_NOTIFY_BIT_RESV_RELEASED
, &pr
->notify_mask
))
165 mutex_lock(&subsys
->lock
);
166 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
167 if (!uuid_equal(&ctrl
->hostid
, hostid
) &&
168 nvmet_pr_find_registrant(pr
, &ctrl
->hostid
)) {
169 nvmet_pr_add_resv_log(ctrl
,
170 NVME_PR_LOG_RESERVATION_RELEASED
, ns
->nsid
);
171 nvmet_add_async_event(ctrl
, NVME_AER_CSS
,
172 NVME_AEN_RESV_LOG_PAGE_AVALIABLE
,
173 NVME_LOG_RESERVATION
);
176 mutex_unlock(&subsys
->lock
);
179 static void nvmet_pr_send_event_to_host(struct nvmet_pr
*pr
, uuid_t
*hostid
,
182 struct nvmet_ns
*ns
= nvmet_pr_to_ns(pr
);
183 struct nvmet_subsys
*subsys
= ns
->subsys
;
184 struct nvmet_ctrl
*ctrl
;
186 mutex_lock(&subsys
->lock
);
187 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
188 if (uuid_equal(hostid
, &ctrl
->hostid
)) {
189 nvmet_pr_add_resv_log(ctrl
, log_type
, ns
->nsid
);
190 nvmet_add_async_event(ctrl
, NVME_AER_CSS
,
191 NVME_AEN_RESV_LOG_PAGE_AVALIABLE
,
192 NVME_LOG_RESERVATION
);
195 mutex_unlock(&subsys
->lock
);
198 static void nvmet_pr_resv_preempted(struct nvmet_pr
*pr
, uuid_t
*hostid
)
200 if (test_bit(NVME_PR_NOTIFY_BIT_RESV_PREEMPTED
, &pr
->notify_mask
))
203 nvmet_pr_send_event_to_host(pr
, hostid
,
204 NVME_PR_LOG_RESERVATOIN_PREEMPTED
);
207 static void nvmet_pr_registration_preempted(struct nvmet_pr
*pr
,
210 if (test_bit(NVME_PR_NOTIFY_BIT_REG_PREEMPTED
, &pr
->notify_mask
))
213 nvmet_pr_send_event_to_host(pr
, hostid
,
214 NVME_PR_LOG_REGISTRATION_PREEMPTED
);
217 static inline void nvmet_pr_set_new_holder(struct nvmet_pr
*pr
, u8 new_rtype
,
218 struct nvmet_pr_registrant
*reg
)
220 reg
->rtype
= new_rtype
;
221 rcu_assign_pointer(pr
->holder
, reg
);
224 static u16
nvmet_pr_register(struct nvmet_req
*req
,
225 struct nvmet_pr_register_data
*d
)
227 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
228 struct nvmet_pr_registrant
*new, *reg
;
229 struct nvmet_pr
*pr
= &req
->ns
->pr
;
230 u16 status
= NVME_SC_SUCCESS
;
231 u64 nrkey
= le64_to_cpu(d
->nrkey
);
233 new = kmalloc(sizeof(*new), GFP_KERNEL
);
235 return NVME_SC_INTERNAL
;
238 reg
= nvmet_pr_find_registrant(pr
, &ctrl
->hostid
);
240 if (reg
->rkey
!= nrkey
)
241 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
246 memset(new, 0, sizeof(*new));
247 INIT_LIST_HEAD(&new->entry
);
249 uuid_copy(&new->hostid
, &ctrl
->hostid
);
250 list_add_tail_rcu(&new->entry
, &pr
->registrant_list
);
257 static void nvmet_pr_unregister_one(struct nvmet_pr
*pr
,
258 struct nvmet_pr_registrant
*reg
)
260 struct nvmet_pr_registrant
*first_reg
;
261 struct nvmet_pr_registrant
*holder
;
264 list_del_rcu(®
->entry
);
266 holder
= rcu_dereference_protected(pr
->holder
, 1);
270 original_rtype
= holder
->rtype
;
271 if (original_rtype
== NVME_PR_WRITE_EXCLUSIVE_ALL_REGS
||
272 original_rtype
== NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS
) {
273 first_reg
= list_first_or_null_rcu(&pr
->registrant_list
,
274 struct nvmet_pr_registrant
, entry
);
276 first_reg
->rtype
= original_rtype
;
277 rcu_assign_pointer(pr
->holder
, first_reg
);
279 rcu_assign_pointer(pr
->holder
, NULL
);
281 if (original_rtype
== NVME_PR_WRITE_EXCLUSIVE_REG_ONLY
||
282 original_rtype
== NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY
)
283 nvmet_pr_resv_released(pr
, ®
->hostid
);
289 static u16
nvmet_pr_unregister(struct nvmet_req
*req
,
290 struct nvmet_pr_register_data
*d
,
293 u16 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
294 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
295 struct nvmet_pr
*pr
= &req
->ns
->pr
;
296 struct nvmet_pr_registrant
*reg
;
299 list_for_each_entry_rcu(reg
, &pr
->registrant_list
, entry
) {
300 if (uuid_equal(®
->hostid
, &ctrl
->hostid
)) {
301 if (ignore_key
|| reg
->rkey
== le64_to_cpu(d
->crkey
)) {
302 status
= NVME_SC_SUCCESS
;
303 nvmet_pr_unregister_one(pr
, reg
);
313 static void nvmet_pr_update_reg_rkey(struct nvmet_pr_registrant
*reg
,
316 reg
->rkey
= *(u64
*)attr
;
319 static u16
nvmet_pr_update_reg_attr(struct nvmet_pr
*pr
,
320 struct nvmet_pr_registrant
*reg
,
321 void (*change_attr
)(struct nvmet_pr_registrant
*reg
,
325 struct nvmet_pr_registrant
*holder
;
326 struct nvmet_pr_registrant
*new;
328 holder
= rcu_dereference_protected(pr
->holder
, 1);
330 change_attr(reg
, attr
);
331 return NVME_SC_SUCCESS
;
334 new = kmalloc(sizeof(*new), GFP_ATOMIC
);
336 return NVME_SC_INTERNAL
;
338 new->rkey
= holder
->rkey
;
339 new->rtype
= holder
->rtype
;
340 uuid_copy(&new->hostid
, &holder
->hostid
);
341 INIT_LIST_HEAD(&new->entry
);
343 change_attr(new, attr
);
344 list_replace_rcu(&holder
->entry
, &new->entry
);
345 rcu_assign_pointer(pr
->holder
, new);
346 kfree_rcu(holder
, rcu
);
348 return NVME_SC_SUCCESS
;
351 static u16
nvmet_pr_replace(struct nvmet_req
*req
,
352 struct nvmet_pr_register_data
*d
,
355 u16 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
356 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
357 struct nvmet_pr
*pr
= &req
->ns
->pr
;
358 struct nvmet_pr_registrant
*reg
;
359 u64 nrkey
= le64_to_cpu(d
->nrkey
);
362 list_for_each_entry_rcu(reg
, &pr
->registrant_list
, entry
) {
363 if (uuid_equal(®
->hostid
, &ctrl
->hostid
)) {
364 if (ignore_key
|| reg
->rkey
== le64_to_cpu(d
->crkey
))
365 status
= nvmet_pr_update_reg_attr(pr
, reg
,
366 nvmet_pr_update_reg_rkey
,
375 static void nvmet_execute_pr_register(struct nvmet_req
*req
)
377 u32 cdw10
= le32_to_cpu(req
->cmd
->common
.cdw10
);
378 bool ignore_key
= nvmet_pr_parse_ignore_key(cdw10
);
379 struct nvmet_pr_register_data
*d
;
380 u8 reg_act
= cdw10
& 0x07; /* Reservation Register Action, bit 02:00 */
383 d
= kmalloc(sizeof(*d
), GFP_KERNEL
);
385 status
= NVME_SC_INTERNAL
;
389 status
= nvmet_copy_from_sgl(req
, 0, d
, sizeof(*d
));
394 case NVME_PR_REGISTER_ACT_REG
:
395 status
= nvmet_pr_register(req
, d
);
397 case NVME_PR_REGISTER_ACT_UNREG
:
398 status
= nvmet_pr_unregister(req
, d
, ignore_key
);
400 case NVME_PR_REGISTER_ACT_REPLACE
:
401 status
= nvmet_pr_replace(req
, d
, ignore_key
);
404 req
->error_loc
= offsetof(struct nvme_common_command
, cdw10
);
405 status
= NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
412 atomic_inc(&req
->ns
->pr
.generation
);
413 nvmet_req_complete(req
, status
);
416 static u16
nvmet_pr_acquire(struct nvmet_req
*req
,
417 struct nvmet_pr_registrant
*reg
,
420 struct nvmet_pr
*pr
= &req
->ns
->pr
;
421 struct nvmet_pr_registrant
*holder
;
423 holder
= rcu_dereference_protected(pr
->holder
, 1);
424 if (holder
&& reg
!= holder
)
425 return NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
426 if (holder
&& reg
== holder
) {
427 if (holder
->rtype
== rtype
)
428 return NVME_SC_SUCCESS
;
429 return NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
432 nvmet_pr_set_new_holder(pr
, rtype
, reg
);
433 return NVME_SC_SUCCESS
;
436 static void nvmet_pr_confirm_ns_pc_ref(struct percpu_ref
*ref
)
438 struct nvmet_pr_per_ctrl_ref
*pc_ref
=
439 container_of(ref
, struct nvmet_pr_per_ctrl_ref
, ref
);
441 complete(&pc_ref
->confirm_done
);
444 static void nvmet_pr_set_ctrl_to_abort(struct nvmet_req
*req
, uuid_t
*hostid
)
446 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
447 struct nvmet_ns
*ns
= req
->ns
;
450 xa_for_each(&ns
->pr_per_ctrl_refs
, idx
, pc_ref
) {
451 if (uuid_equal(&pc_ref
->hostid
, hostid
)) {
452 percpu_ref_kill_and_confirm(&pc_ref
->ref
,
453 nvmet_pr_confirm_ns_pc_ref
);
454 wait_for_completion(&pc_ref
->confirm_done
);
459 static u16
nvmet_pr_unreg_all_host_by_prkey(struct nvmet_req
*req
, u64 prkey
,
463 u16 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
464 struct nvmet_pr_registrant
*reg
, *tmp
;
465 struct nvmet_pr
*pr
= &req
->ns
->pr
;
468 list_for_each_entry_safe(reg
, tmp
, &pr
->registrant_list
, entry
) {
469 if (reg
->rkey
== prkey
) {
470 status
= NVME_SC_SUCCESS
;
471 uuid_copy(&hostid
, ®
->hostid
);
473 nvmet_pr_set_ctrl_to_abort(req
, &hostid
);
474 nvmet_pr_unregister_one(pr
, reg
);
475 if (!uuid_equal(&hostid
, send_hostid
))
476 nvmet_pr_registration_preempted(pr
, &hostid
);
482 static void nvmet_pr_unreg_all_others_by_prkey(struct nvmet_req
*req
,
487 struct nvmet_pr_registrant
*reg
, *tmp
;
488 struct nvmet_pr
*pr
= &req
->ns
->pr
;
491 list_for_each_entry_safe(reg
, tmp
, &pr
->registrant_list
, entry
) {
492 if (reg
->rkey
== prkey
&&
493 !uuid_equal(®
->hostid
, send_hostid
)) {
494 uuid_copy(&hostid
, ®
->hostid
);
496 nvmet_pr_set_ctrl_to_abort(req
, &hostid
);
497 nvmet_pr_unregister_one(pr
, reg
);
498 nvmet_pr_registration_preempted(pr
, &hostid
);
503 static void nvmet_pr_unreg_all_others(struct nvmet_req
*req
,
507 struct nvmet_pr_registrant
*reg
, *tmp
;
508 struct nvmet_pr
*pr
= &req
->ns
->pr
;
511 list_for_each_entry_safe(reg
, tmp
, &pr
->registrant_list
, entry
) {
512 if (!uuid_equal(®
->hostid
, send_hostid
)) {
513 uuid_copy(&hostid
, ®
->hostid
);
515 nvmet_pr_set_ctrl_to_abort(req
, &hostid
);
516 nvmet_pr_unregister_one(pr
, reg
);
517 nvmet_pr_registration_preempted(pr
, &hostid
);
522 static void nvmet_pr_update_holder_rtype(struct nvmet_pr_registrant
*reg
,
525 u8 new_rtype
= *(u8
*)attr
;
527 reg
->rtype
= new_rtype
;
530 static u16
nvmet_pr_preempt(struct nvmet_req
*req
,
531 struct nvmet_pr_registrant
*reg
,
533 struct nvmet_pr_acquire_data
*d
,
536 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
537 struct nvmet_pr
*pr
= &req
->ns
->pr
;
538 struct nvmet_pr_registrant
*holder
;
539 enum nvme_pr_type original_rtype
;
540 u64 prkey
= le64_to_cpu(d
->prkey
);
543 holder
= rcu_dereference_protected(pr
->holder
, 1);
545 return nvmet_pr_unreg_all_host_by_prkey(req
, prkey
,
546 &ctrl
->hostid
, abort
);
548 original_rtype
= holder
->rtype
;
549 if (original_rtype
== NVME_PR_WRITE_EXCLUSIVE_ALL_REGS
||
550 original_rtype
== NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS
) {
553 * To prevent possible access from other hosts, and
554 * avoid terminate the holder, set the new holder
555 * first before unregistering.
557 nvmet_pr_set_new_holder(pr
, rtype
, reg
);
558 nvmet_pr_unreg_all_others(req
, &ctrl
->hostid
, abort
);
559 return NVME_SC_SUCCESS
;
561 return nvmet_pr_unreg_all_host_by_prkey(req
, prkey
,
562 &ctrl
->hostid
, abort
);
566 status
= nvmet_pr_update_reg_attr(pr
, holder
,
567 nvmet_pr_update_holder_rtype
, &rtype
);
568 if (!status
&& original_rtype
!= rtype
)
569 nvmet_pr_resv_released(pr
, ®
->hostid
);
573 if (prkey
== holder
->rkey
) {
575 * Same as before, set the new holder first.
577 nvmet_pr_set_new_holder(pr
, rtype
, reg
);
578 nvmet_pr_unreg_all_others_by_prkey(req
, prkey
, &ctrl
->hostid
,
580 if (original_rtype
!= rtype
)
581 nvmet_pr_resv_released(pr
, ®
->hostid
);
582 return NVME_SC_SUCCESS
;
586 return nvmet_pr_unreg_all_host_by_prkey(req
, prkey
,
587 &ctrl
->hostid
, abort
);
588 return NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
591 static void nvmet_pr_do_abort(struct work_struct
*w
)
593 struct nvmet_req
*req
= container_of(w
, struct nvmet_req
, r
.abort_work
);
594 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
595 struct nvmet_ns
*ns
= req
->ns
;
599 * The target does not support abort, just wait per-controller ref to 0.
601 xa_for_each(&ns
->pr_per_ctrl_refs
, idx
, pc_ref
) {
602 if (percpu_ref_is_dying(&pc_ref
->ref
)) {
603 wait_for_completion(&pc_ref
->free_done
);
604 reinit_completion(&pc_ref
->confirm_done
);
605 reinit_completion(&pc_ref
->free_done
);
606 percpu_ref_resurrect(&pc_ref
->ref
);
611 nvmet_req_complete(req
, NVME_SC_SUCCESS
);
614 static u16
__nvmet_execute_pr_acquire(struct nvmet_req
*req
,
615 struct nvmet_pr_registrant
*reg
,
618 struct nvmet_pr_acquire_data
*d
)
622 switch (acquire_act
) {
623 case NVME_PR_ACQUIRE_ACT_ACQUIRE
:
624 status
= nvmet_pr_acquire(req
, reg
, rtype
);
626 case NVME_PR_ACQUIRE_ACT_PREEMPT
:
627 status
= nvmet_pr_preempt(req
, reg
, rtype
, d
, false);
629 case NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT
:
630 status
= nvmet_pr_preempt(req
, reg
, rtype
, d
, true);
633 req
->error_loc
= offsetof(struct nvme_common_command
, cdw10
);
634 status
= NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
639 atomic_inc(&req
->ns
->pr
.generation
);
644 static void nvmet_execute_pr_acquire(struct nvmet_req
*req
)
646 u32 cdw10
= le32_to_cpu(req
->cmd
->common
.cdw10
);
647 bool ignore_key
= nvmet_pr_parse_ignore_key(cdw10
);
648 /* Reservation type, bit 15:08 */
649 u8 rtype
= (u8
)((cdw10
>> 8) & 0xff);
650 /* Reservation acquire action, bit 02:00 */
651 u8 acquire_act
= cdw10
& 0x07;
652 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
653 struct nvmet_pr_acquire_data
*d
= NULL
;
654 struct nvmet_pr
*pr
= &req
->ns
->pr
;
655 struct nvmet_pr_registrant
*reg
;
656 u16 status
= NVME_SC_SUCCESS
;
659 rtype
< NVME_PR_WRITE_EXCLUSIVE
||
660 rtype
> NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS
) {
661 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
665 d
= kmalloc(sizeof(*d
), GFP_KERNEL
);
667 status
= NVME_SC_INTERNAL
;
671 status
= nvmet_copy_from_sgl(req
, 0, d
, sizeof(*d
));
675 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
677 list_for_each_entry_rcu(reg
, &pr
->registrant_list
, entry
) {
678 if (uuid_equal(®
->hostid
, &ctrl
->hostid
) &&
679 reg
->rkey
== le64_to_cpu(d
->crkey
)) {
680 status
= __nvmet_execute_pr_acquire(req
, reg
,
681 acquire_act
, rtype
, d
);
686 if (!status
&& acquire_act
== NVME_PR_ACQUIRE_ACT_PREEMPT_AND_ABORT
) {
688 INIT_WORK(&req
->r
.abort_work
, nvmet_pr_do_abort
);
689 queue_work(nvmet_wq
, &req
->r
.abort_work
);
698 nvmet_req_complete(req
, status
);
701 static u16
nvmet_pr_release(struct nvmet_req
*req
,
702 struct nvmet_pr_registrant
*reg
,
705 struct nvmet_pr
*pr
= &req
->ns
->pr
;
706 struct nvmet_pr_registrant
*holder
;
709 holder
= rcu_dereference_protected(pr
->holder
, 1);
710 if (!holder
|| reg
!= holder
)
711 return NVME_SC_SUCCESS
;
713 original_rtype
= holder
->rtype
;
714 if (original_rtype
!= rtype
)
715 return NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
717 rcu_assign_pointer(pr
->holder
, NULL
);
719 if (original_rtype
!= NVME_PR_WRITE_EXCLUSIVE
&&
720 original_rtype
!= NVME_PR_EXCLUSIVE_ACCESS
)
721 nvmet_pr_resv_released(pr
, ®
->hostid
);
723 return NVME_SC_SUCCESS
;
726 static void nvmet_pr_clear(struct nvmet_req
*req
)
728 struct nvmet_pr_registrant
*reg
, *tmp
;
729 struct nvmet_pr
*pr
= &req
->ns
->pr
;
731 rcu_assign_pointer(pr
->holder
, NULL
);
733 list_for_each_entry_safe(reg
, tmp
, &pr
->registrant_list
, entry
) {
734 list_del_rcu(®
->entry
);
735 if (!uuid_equal(&req
->sq
->ctrl
->hostid
, ®
->hostid
))
736 nvmet_pr_resv_preempted(pr
, ®
->hostid
);
740 atomic_inc(&pr
->generation
);
743 static u16
__nvmet_execute_pr_release(struct nvmet_req
*req
,
744 struct nvmet_pr_registrant
*reg
,
745 u8 release_act
, u8 rtype
)
747 switch (release_act
) {
748 case NVME_PR_RELEASE_ACT_RELEASE
:
749 return nvmet_pr_release(req
, reg
, rtype
);
750 case NVME_PR_RELEASE_ACT_CLEAR
:
752 return NVME_SC_SUCCESS
;
754 req
->error_loc
= offsetof(struct nvme_common_command
, cdw10
);
755 return NVME_SC_INVALID_OPCODE
| NVME_STATUS_DNR
;
759 static void nvmet_execute_pr_release(struct nvmet_req
*req
)
761 u32 cdw10
= le32_to_cpu(req
->cmd
->common
.cdw10
);
762 bool ignore_key
= nvmet_pr_parse_ignore_key(cdw10
);
763 u8 rtype
= (u8
)((cdw10
>> 8) & 0xff); /* Reservation type, bit 15:08 */
764 u8 release_act
= cdw10
& 0x07; /* Reservation release action, bit 02:00 */
765 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
766 struct nvmet_pr
*pr
= &req
->ns
->pr
;
767 struct nvmet_pr_release_data
*d
;
768 struct nvmet_pr_registrant
*reg
;
772 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
776 d
= kmalloc(sizeof(*d
), GFP_KERNEL
);
778 status
= NVME_SC_INTERNAL
;
782 status
= nvmet_copy_from_sgl(req
, 0, d
, sizeof(*d
));
786 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
788 list_for_each_entry_rcu(reg
, &pr
->registrant_list
, entry
) {
789 if (uuid_equal(®
->hostid
, &ctrl
->hostid
) &&
790 reg
->rkey
== le64_to_cpu(d
->crkey
)) {
791 status
= __nvmet_execute_pr_release(req
, reg
,
800 nvmet_req_complete(req
, status
);
803 static void nvmet_execute_pr_report(struct nvmet_req
*req
)
805 u32 cdw11
= le32_to_cpu(req
->cmd
->common
.cdw11
);
806 u32 cdw10
= le32_to_cpu(req
->cmd
->common
.cdw10
);
807 u32 num_bytes
= 4 * (cdw10
+ 1); /* cdw10 is number of dwords */
808 u8 eds
= cdw11
& 1; /* Extended data structure, bit 00 */
809 struct nvme_registered_ctrl_ext
*ctrl_eds
;
810 struct nvme_reservation_status_ext
*data
;
811 struct nvmet_pr
*pr
= &req
->ns
->pr
;
812 struct nvmet_pr_registrant
*holder
;
813 struct nvmet_pr_registrant
*reg
;
818 /* nvmet hostid(uuid_t) is 128 bit. */
820 req
->error_loc
= offsetof(struct nvme_common_command
, cdw11
);
821 status
= NVME_SC_HOST_ID_INCONSIST
| NVME_STATUS_DNR
;
825 if (num_bytes
< sizeof(struct nvme_reservation_status_ext
)) {
826 req
->error_loc
= offsetof(struct nvme_common_command
, cdw10
);
827 status
= NVME_SC_INVALID_FIELD
| NVME_STATUS_DNR
;
831 data
= kzalloc(num_bytes
, GFP_KERNEL
);
833 status
= NVME_SC_INTERNAL
;
836 data
->gen
= cpu_to_le32(atomic_read(&pr
->generation
));
838 ctrl_eds
= data
->regctl_eds
;
841 holder
= rcu_dereference(pr
->holder
);
842 rtype
= holder
? holder
->rtype
: 0;
845 list_for_each_entry_rcu(reg
, &pr
->registrant_list
, entry
) {
848 * continue to get the number of all registrans.
850 if (((void *)ctrl_eds
+ sizeof(*ctrl_eds
)) >
851 ((void *)data
+ num_bytes
))
854 * Dynamic controller, set cntlid to 0xffff.
856 ctrl_eds
->cntlid
= cpu_to_le16(NVME_CNTLID_DYNAMIC
);
857 if (rtype
== NVME_PR_WRITE_EXCLUSIVE_ALL_REGS
||
858 rtype
== NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS
)
862 uuid_copy((uuid_t
*)&ctrl_eds
->hostid
, ®
->hostid
);
863 ctrl_eds
->rkey
= cpu_to_le64(reg
->rkey
);
868 put_unaligned_le16(num_ctrls
, data
->regctl
);
869 status
= nvmet_copy_to_sgl(req
, 0, data
, num_bytes
);
872 nvmet_req_complete(req
, status
);
875 u16
nvmet_parse_pr_cmd(struct nvmet_req
*req
)
877 struct nvme_command
*cmd
= req
->cmd
;
879 switch (cmd
->common
.opcode
) {
880 case nvme_cmd_resv_register
:
881 req
->execute
= nvmet_execute_pr_register
;
883 case nvme_cmd_resv_acquire
:
884 req
->execute
= nvmet_execute_pr_acquire
;
886 case nvme_cmd_resv_release
:
887 req
->execute
= nvmet_execute_pr_release
;
889 case nvme_cmd_resv_report
:
890 req
->execute
= nvmet_execute_pr_report
;
895 return NVME_SC_SUCCESS
;
898 static bool nvmet_is_req_write_cmd_group(struct nvmet_req
*req
)
900 u8 opcode
= req
->cmd
->common
.opcode
;
906 case nvme_cmd_write_zeroes
:
908 case nvme_cmd_zone_append
:
909 case nvme_cmd_zone_mgmt_send
:
918 static bool nvmet_is_req_read_cmd_group(struct nvmet_req
*req
)
920 u8 opcode
= req
->cmd
->common
.opcode
;
925 case nvme_cmd_zone_mgmt_recv
:
934 u16
nvmet_pr_check_cmd_access(struct nvmet_req
*req
)
936 struct nvmet_ctrl
*ctrl
= req
->sq
->ctrl
;
937 struct nvmet_pr_registrant
*holder
;
938 struct nvmet_ns
*ns
= req
->ns
;
939 struct nvmet_pr
*pr
= &ns
->pr
;
940 u16 status
= NVME_SC_SUCCESS
;
943 holder
= rcu_dereference(pr
->holder
);
946 if (uuid_equal(&ctrl
->hostid
, &holder
->hostid
))
950 * The Reservation command group is checked in executing,
953 switch (holder
->rtype
) {
954 case NVME_PR_WRITE_EXCLUSIVE
:
955 if (nvmet_is_req_write_cmd_group(req
))
956 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
958 case NVME_PR_EXCLUSIVE_ACCESS
:
959 if (nvmet_is_req_read_cmd_group(req
) ||
960 nvmet_is_req_write_cmd_group(req
))
961 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
963 case NVME_PR_WRITE_EXCLUSIVE_REG_ONLY
:
964 case NVME_PR_WRITE_EXCLUSIVE_ALL_REGS
:
965 if ((nvmet_is_req_write_cmd_group(req
)) &&
966 !nvmet_pr_find_registrant(pr
, &ctrl
->hostid
))
967 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
969 case NVME_PR_EXCLUSIVE_ACCESS_REG_ONLY
:
970 case NVME_PR_EXCLUSIVE_ACCESS_ALL_REGS
:
971 if ((nvmet_is_req_read_cmd_group(req
) ||
972 nvmet_is_req_write_cmd_group(req
)) &&
973 !nvmet_pr_find_registrant(pr
, &ctrl
->hostid
))
974 status
= NVME_SC_RESERVATION_CONFLICT
| NVME_STATUS_DNR
;
977 pr_warn("the reservation type is set wrong, type:%d\n",
985 req
->error_loc
= offsetof(struct nvme_common_command
, opcode
);
989 u16
nvmet_pr_get_ns_pc_ref(struct nvmet_req
*req
)
991 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
993 pc_ref
= xa_load(&req
->ns
->pr_per_ctrl_refs
,
994 req
->sq
->ctrl
->cntlid
);
995 if (unlikely(!percpu_ref_tryget_live(&pc_ref
->ref
)))
996 return NVME_SC_INTERNAL
;
997 req
->pc_ref
= pc_ref
;
998 return NVME_SC_SUCCESS
;
1001 static void nvmet_pr_ctrl_ns_all_cmds_done(struct percpu_ref
*ref
)
1003 struct nvmet_pr_per_ctrl_ref
*pc_ref
=
1004 container_of(ref
, struct nvmet_pr_per_ctrl_ref
, ref
);
1006 complete(&pc_ref
->free_done
);
1009 static int nvmet_pr_alloc_and_insert_pc_ref(struct nvmet_ns
*ns
,
1013 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
1016 pc_ref
= kmalloc(sizeof(*pc_ref
), GFP_ATOMIC
);
1020 ret
= percpu_ref_init(&pc_ref
->ref
, nvmet_pr_ctrl_ns_all_cmds_done
,
1021 PERCPU_REF_ALLOW_REINIT
, GFP_KERNEL
);
1025 init_completion(&pc_ref
->free_done
);
1026 init_completion(&pc_ref
->confirm_done
);
1027 uuid_copy(&pc_ref
->hostid
, hostid
);
1029 ret
= xa_insert(&ns
->pr_per_ctrl_refs
, idx
, pc_ref
, GFP_KERNEL
);
1034 percpu_ref_exit(&pc_ref
->ref
);
1040 int nvmet_ctrl_init_pr(struct nvmet_ctrl
*ctrl
)
1042 struct nvmet_subsys
*subsys
= ctrl
->subsys
;
1043 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
1044 struct nvmet_ns
*ns
= NULL
;
1048 ctrl
->pr_log_mgr
.counter
= 0;
1049 ctrl
->pr_log_mgr
.lost_count
= 0;
1050 mutex_init(&ctrl
->pr_log_mgr
.lock
);
1051 INIT_KFIFO(ctrl
->pr_log_mgr
.log_queue
);
1054 * Here we are under subsys lock, if an ns not in subsys->namespaces,
1055 * we can make sure that ns is not enabled, and not call
1056 * nvmet_pr_init_ns(), see more details in nvmet_ns_enable().
1057 * So just check ns->pr.enable.
1059 xa_for_each(&subsys
->namespaces
, idx
, ns
) {
1060 if (ns
->pr
.enable
) {
1061 ret
= nvmet_pr_alloc_and_insert_pc_ref(ns
, ctrl
->cntlid
,
1064 goto free_per_ctrl_refs
;
1070 xa_for_each(&subsys
->namespaces
, idx
, ns
) {
1071 if (ns
->pr
.enable
) {
1072 pc_ref
= xa_erase(&ns
->pr_per_ctrl_refs
, ctrl
->cntlid
);
1074 percpu_ref_exit(&pc_ref
->ref
);
1081 void nvmet_ctrl_destroy_pr(struct nvmet_ctrl
*ctrl
)
1083 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
1084 struct nvmet_ns
*ns
;
1087 kfifo_free(&ctrl
->pr_log_mgr
.log_queue
);
1088 mutex_destroy(&ctrl
->pr_log_mgr
.lock
);
1090 xa_for_each(&ctrl
->subsys
->namespaces
, idx
, ns
) {
1091 if (ns
->pr
.enable
) {
1092 pc_ref
= xa_erase(&ns
->pr_per_ctrl_refs
, ctrl
->cntlid
);
1094 percpu_ref_exit(&pc_ref
->ref
);
1100 int nvmet_pr_init_ns(struct nvmet_ns
*ns
)
1102 struct nvmet_subsys
*subsys
= ns
->subsys
;
1103 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
1104 struct nvmet_ctrl
*ctrl
= NULL
;
1108 ns
->pr
.holder
= NULL
;
1109 atomic_set(&ns
->pr
.generation
, 0);
1110 sema_init(&ns
->pr
.pr_sem
, 1);
1111 INIT_LIST_HEAD(&ns
->pr
.registrant_list
);
1112 ns
->pr
.notify_mask
= 0;
1114 xa_init(&ns
->pr_per_ctrl_refs
);
1116 list_for_each_entry(ctrl
, &subsys
->ctrls
, subsys_entry
) {
1117 ret
= nvmet_pr_alloc_and_insert_pc_ref(ns
, ctrl
->cntlid
,
1120 goto free_per_ctrl_refs
;
1125 xa_for_each(&ns
->pr_per_ctrl_refs
, idx
, pc_ref
) {
1126 xa_erase(&ns
->pr_per_ctrl_refs
, idx
);
1127 percpu_ref_exit(&pc_ref
->ref
);
1133 void nvmet_pr_exit_ns(struct nvmet_ns
*ns
)
1135 struct nvmet_pr_registrant
*reg
, *tmp
;
1136 struct nvmet_pr_per_ctrl_ref
*pc_ref
;
1137 struct nvmet_pr
*pr
= &ns
->pr
;
1140 list_for_each_entry_safe(reg
, tmp
, &pr
->registrant_list
, entry
) {
1141 list_del(®
->entry
);
1145 xa_for_each(&ns
->pr_per_ctrl_refs
, idx
, pc_ref
) {
1147 * No command on ns here, we can safely free pc_ref.
1149 pc_ref
= xa_erase(&ns
->pr_per_ctrl_refs
, idx
);
1150 percpu_ref_exit(&pc_ref
->ref
);
1154 xa_destroy(&ns
->pr_per_ctrl_refs
);