2 * CXL Flash Device Driver
4 * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
5 * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
7 * Copyright (C) 2015 IBM Corporation
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
15 #include <linux/interrupt.h>
16 #include <linux/pci.h>
17 #include <linux/syscalls.h>
18 #include <asm/unaligned.h>
19 #include <asm/bitsperlong.h>
21 #include <scsi/scsi_cmnd.h>
22 #include <scsi/scsi_host.h>
23 #include <uapi/scsi/cxlflash_ioctl.h>
28 #include "superpipe.h"
31 * marshal_virt_to_resize() - translate uvirtual to resize structure
32 * @virt: Source structure from which to translate/copy.
33 * @resize: Destination structure for the translate/copy.
35 static void marshal_virt_to_resize(struct dk_cxlflash_uvirtual
*virt
,
36 struct dk_cxlflash_resize
*resize
)
38 resize
->hdr
= virt
->hdr
;
39 resize
->context_id
= virt
->context_id
;
40 resize
->rsrc_handle
= virt
->rsrc_handle
;
41 resize
->req_size
= virt
->lun_size
;
42 resize
->last_lba
= virt
->last_lba
;
46 * marshal_clone_to_rele() - translate clone to release structure
47 * @clone: Source structure from which to translate/copy.
48 * @rele: Destination structure for the translate/copy.
50 static void marshal_clone_to_rele(struct dk_cxlflash_clone
*clone
,
51 struct dk_cxlflash_release
*release
)
53 release
->hdr
= clone
->hdr
;
54 release
->context_id
= clone
->context_id_dst
;
58 * ba_init() - initializes a block allocator
59 * @ba_lun: Block allocator to initialize.
61 * Return: 0 on success, -errno on failure
63 static int ba_init(struct ba_lun
*ba_lun
)
65 struct ba_lun_info
*bali
= NULL
;
66 int lun_size_au
= 0, i
= 0;
67 int last_word_underflow
= 0;
70 pr_debug("%s: Initializing LUN: lun_id=%016llx "
71 "ba_lun->lsize=%lx ba_lun->au_size=%lX\n",
72 __func__
, ba_lun
->lun_id
, ba_lun
->lsize
, ba_lun
->au_size
);
74 /* Calculate bit map size */
75 lun_size_au
= ba_lun
->lsize
/ ba_lun
->au_size
;
76 if (lun_size_au
== 0) {
77 pr_debug("%s: Requested LUN size of 0!\n", __func__
);
81 /* Allocate lun information container */
82 bali
= kzalloc(sizeof(struct ba_lun_info
), GFP_KERNEL
);
83 if (unlikely(!bali
)) {
84 pr_err("%s: Failed to allocate lun_info lun_id=%016llx\n",
85 __func__
, ba_lun
->lun_id
);
89 bali
->total_aus
= lun_size_au
;
90 bali
->lun_bmap_size
= lun_size_au
/ BITS_PER_LONG
;
92 if (lun_size_au
% BITS_PER_LONG
)
93 bali
->lun_bmap_size
++;
95 /* Allocate bitmap space */
96 bali
->lun_alloc_map
= kzalloc((bali
->lun_bmap_size
* sizeof(u64
)),
98 if (unlikely(!bali
->lun_alloc_map
)) {
99 pr_err("%s: Failed to allocate lun allocation map: "
100 "lun_id=%016llx\n", __func__
, ba_lun
->lun_id
);
105 /* Initialize the bit map size and set all bits to '1' */
106 bali
->free_aun_cnt
= lun_size_au
;
108 for (i
= 0; i
< bali
->lun_bmap_size
; i
++)
109 bali
->lun_alloc_map
[i
] = 0xFFFFFFFFFFFFFFFFULL
;
111 /* If the last word not fully utilized, mark extra bits as allocated */
112 last_word_underflow
= (bali
->lun_bmap_size
* BITS_PER_LONG
);
113 last_word_underflow
-= bali
->free_aun_cnt
;
114 if (last_word_underflow
> 0) {
115 lam
= &bali
->lun_alloc_map
[bali
->lun_bmap_size
- 1];
116 for (i
= (HIBIT
- last_word_underflow
+ 1);
119 clear_bit(i
, (ulong
*)lam
);
122 /* Initialize high elevator index, low/curr already at 0 from kzalloc */
123 bali
->free_high_idx
= bali
->lun_bmap_size
;
125 /* Allocate clone map */
126 bali
->aun_clone_map
= kzalloc((bali
->total_aus
* sizeof(u8
)),
128 if (unlikely(!bali
->aun_clone_map
)) {
129 pr_err("%s: Failed to allocate clone map: lun_id=%016llx\n",
130 __func__
, ba_lun
->lun_id
);
131 kfree(bali
->lun_alloc_map
);
136 /* Pass the allocated LUN info as a handle to the user */
137 ba_lun
->ba_lun_handle
= bali
;
139 pr_debug("%s: Successfully initialized the LUN: "
140 "lun_id=%016llx bitmap size=%x, free_aun_cnt=%llx\n",
141 __func__
, ba_lun
->lun_id
, bali
->lun_bmap_size
,
147 * find_free_range() - locates a free bit within the block allocator
148 * @low: First word in block allocator to start search.
149 * @high: Last word in block allocator to search.
150 * @bali: LUN information structure owning the block allocator to search.
151 * @bit_word: Passes back the word in the block allocator owning the free bit.
153 * Return: The bit position within the passed back word, -1 on failure
155 static int find_free_range(u32 low
,
157 struct ba_lun_info
*bali
, int *bit_word
)
161 ulong
*lam
, num_bits
;
163 for (i
= low
; i
< high
; i
++)
164 if (bali
->lun_alloc_map
[i
] != 0) {
165 lam
= (ulong
*)&bali
->lun_alloc_map
[i
];
166 num_bits
= (sizeof(*lam
) * BITS_PER_BYTE
);
167 bit_pos
= find_first_bit(lam
, num_bits
);
169 pr_devel("%s: Found free bit %llu in LUN "
170 "map entry %016llx at bitmap index = %d\n",
171 __func__
, bit_pos
, bali
->lun_alloc_map
[i
], i
);
174 bali
->free_aun_cnt
--;
175 clear_bit(bit_pos
, lam
);
183 * ba_alloc() - allocates a block from the block allocator
184 * @ba_lun: Block allocator from which to allocate a block.
186 * Return: The allocated block, -1 on failure
188 static u64
ba_alloc(struct ba_lun
*ba_lun
)
192 struct ba_lun_info
*bali
= NULL
;
194 bali
= ba_lun
->ba_lun_handle
;
196 pr_debug("%s: Received block allocation request: "
197 "lun_id=%016llx free_aun_cnt=%llx\n",
198 __func__
, ba_lun
->lun_id
, bali
->free_aun_cnt
);
200 if (bali
->free_aun_cnt
== 0) {
201 pr_debug("%s: No space left on LUN: lun_id=%016llx\n",
202 __func__
, ba_lun
->lun_id
);
206 /* Search to find a free entry, curr->high then low->curr */
207 bit_pos
= find_free_range(bali
->free_curr_idx
,
208 bali
->free_high_idx
, bali
, &bit_word
);
210 bit_pos
= find_free_range(bali
->free_low_idx
,
214 pr_debug("%s: Could not find an allocation unit on LUN:"
215 " lun_id=%016llx\n", __func__
, ba_lun
->lun_id
);
220 /* Update the free_curr_idx */
221 if (bit_pos
== HIBIT
)
222 bali
->free_curr_idx
= bit_word
+ 1;
224 bali
->free_curr_idx
= bit_word
;
226 pr_debug("%s: Allocating AU number=%llx lun_id=%016llx "
227 "free_aun_cnt=%llx\n", __func__
,
228 ((bit_word
* BITS_PER_LONG
) + bit_pos
), ba_lun
->lun_id
,
231 return (u64
) ((bit_word
* BITS_PER_LONG
) + bit_pos
);
235 * validate_alloc() - validates the specified block has been allocated
236 * @ba_lun_info: LUN info owning the block allocator.
237 * @aun: Block to validate.
239 * Return: 0 on success, -1 on failure
241 static int validate_alloc(struct ba_lun_info
*bali
, u64 aun
)
243 int idx
= 0, bit_pos
= 0;
245 idx
= aun
/ BITS_PER_LONG
;
246 bit_pos
= aun
% BITS_PER_LONG
;
248 if (test_bit(bit_pos
, (ulong
*)&bali
->lun_alloc_map
[idx
]))
255 * ba_free() - frees a block from the block allocator
256 * @ba_lun: Block allocator from which to allocate a block.
257 * @to_free: Block to free.
259 * Return: 0 on success, -1 on failure
261 static int ba_free(struct ba_lun
*ba_lun
, u64 to_free
)
263 int idx
= 0, bit_pos
= 0;
264 struct ba_lun_info
*bali
= NULL
;
266 bali
= ba_lun
->ba_lun_handle
;
268 if (validate_alloc(bali
, to_free
)) {
269 pr_debug("%s: AUN %llx is not allocated on lun_id=%016llx\n",
270 __func__
, to_free
, ba_lun
->lun_id
);
274 pr_debug("%s: Received a request to free AU=%llx lun_id=%016llx "
275 "free_aun_cnt=%llx\n", __func__
, to_free
, ba_lun
->lun_id
,
278 if (bali
->aun_clone_map
[to_free
] > 0) {
279 pr_debug("%s: AUN %llx lun_id=%016llx cloned. Clone count=%x\n",
280 __func__
, to_free
, ba_lun
->lun_id
,
281 bali
->aun_clone_map
[to_free
]);
282 bali
->aun_clone_map
[to_free
]--;
286 idx
= to_free
/ BITS_PER_LONG
;
287 bit_pos
= to_free
% BITS_PER_LONG
;
289 set_bit(bit_pos
, (ulong
*)&bali
->lun_alloc_map
[idx
]);
290 bali
->free_aun_cnt
++;
292 if (idx
< bali
->free_low_idx
)
293 bali
->free_low_idx
= idx
;
294 else if (idx
> bali
->free_high_idx
)
295 bali
->free_high_idx
= idx
;
297 pr_debug("%s: Successfully freed AU bit_pos=%x bit map index=%x "
298 "lun_id=%016llx free_aun_cnt=%llx\n", __func__
, bit_pos
, idx
,
299 ba_lun
->lun_id
, bali
->free_aun_cnt
);
305 * ba_clone() - Clone a chunk of the block allocation table
306 * @ba_lun: Block allocator from which to allocate a block.
307 * @to_free: Block to free.
309 * Return: 0 on success, -1 on failure
311 static int ba_clone(struct ba_lun
*ba_lun
, u64 to_clone
)
313 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
315 if (validate_alloc(bali
, to_clone
)) {
316 pr_debug("%s: AUN=%llx not allocated on lun_id=%016llx\n",
317 __func__
, to_clone
, ba_lun
->lun_id
);
321 pr_debug("%s: Received a request to clone AUN %llx on lun_id=%016llx\n",
322 __func__
, to_clone
, ba_lun
->lun_id
);
324 if (bali
->aun_clone_map
[to_clone
] == MAX_AUN_CLONE_CNT
) {
325 pr_debug("%s: AUN %llx on lun_id=%016llx hit max clones already\n",
326 __func__
, to_clone
, ba_lun
->lun_id
);
330 bali
->aun_clone_map
[to_clone
]++;
336 * ba_space() - returns the amount of free space left in the block allocator
337 * @ba_lun: Block allocator.
339 * Return: Amount of free space in block allocator
341 static u64
ba_space(struct ba_lun
*ba_lun
)
343 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
345 return bali
->free_aun_cnt
;
349 * cxlflash_ba_terminate() - frees resources associated with the block allocator
350 * @ba_lun: Block allocator.
352 * Safe to call in a partially allocated state.
354 void cxlflash_ba_terminate(struct ba_lun
*ba_lun
)
356 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
359 kfree(bali
->aun_clone_map
);
360 kfree(bali
->lun_alloc_map
);
362 ba_lun
->ba_lun_handle
= NULL
;
367 * init_vlun() - initializes a LUN for virtual use
368 * @lun_info: LUN information structure that owns the block allocator.
370 * Return: 0 on success, -errno on failure
372 static int init_vlun(struct llun_info
*lli
)
375 struct glun_info
*gli
= lli
->parent
;
376 struct blka
*blka
= &gli
->blka
;
378 memset(blka
, 0, sizeof(*blka
));
379 mutex_init(&blka
->mutex
);
381 /* LUN IDs are unique per port, save the index instead */
382 blka
->ba_lun
.lun_id
= lli
->lun_index
;
383 blka
->ba_lun
.lsize
= gli
->max_lba
+ 1;
384 blka
->ba_lun
.lba_size
= gli
->blk_len
;
386 blka
->ba_lun
.au_size
= MC_CHUNK_SIZE
;
387 blka
->nchunk
= blka
->ba_lun
.lsize
/ MC_CHUNK_SIZE
;
389 rc
= ba_init(&blka
->ba_lun
);
391 pr_debug("%s: cannot init block_alloc, rc=%d\n", __func__
, rc
);
393 pr_debug("%s: returning rc=%d lli=%p\n", __func__
, rc
, lli
);
398 * write_same16() - sends a SCSI WRITE_SAME16 (0) command to specified LUN
399 * @sdev: SCSI device associated with LUN.
400 * @lba: Logical block address to start write same.
401 * @nblks: Number of logical blocks to write same.
403 * The SCSI WRITE_SAME16 can take quite a while to complete. Should an EEH occur
404 * while in scsi_execute(), the EEH handler will attempt to recover. As part of
405 * the recovery, the handler drains all currently running ioctls, waiting until
406 * they have completed before proceeding with a reset. As this routine is used
407 * on the ioctl path, this can create a condition where the EEH handler becomes
408 * stuck, infinitely waiting for this ioctl thread. To avoid this behavior,
409 * temporarily unmark this thread as an ioctl thread by releasing the ioctl read
410 * semaphore. This will allow the EEH handler to proceed with a recovery while
411 * this thread is still running. Once the scsi_execute() returns, reacquire the
412 * ioctl read semaphore and check the adapter state in case it changed while
413 * inside of scsi_execute(). The state check will wait if the adapter is still
414 * being recovered or return a failure if the recovery failed. In the event that
415 * the adapter reset failed, simply return the failure as the ioctl would be
416 * unable to continue.
418 * Note that the above puts a requirement on this routine to only be called on
421 * Return: 0 on success, -errno on failure
423 static int write_same16(struct scsi_device
*sdev
,
429 u8
*sense_buf
= NULL
;
434 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
435 struct device
*dev
= &cfg
->dev
->dev
;
436 const u32 s
= ilog2(sdev
->sector_size
) - 9;
437 const u32 to
= sdev
->request_queue
->rq_timeout
;
438 const u32 ws_limit
= blk_queue_get_max_sectors(sdev
->request_queue
,
439 REQ_OP_WRITE_SAME
) >> s
;
441 cmd_buf
= kzalloc(CMD_BUFSIZE
, GFP_KERNEL
);
442 scsi_cmd
= kzalloc(MAX_COMMAND_SIZE
, GFP_KERNEL
);
443 sense_buf
= kzalloc(SCSI_SENSE_BUFFERSIZE
, GFP_KERNEL
);
444 if (unlikely(!cmd_buf
|| !scsi_cmd
|| !sense_buf
)) {
451 scsi_cmd
[0] = WRITE_SAME_16
;
452 scsi_cmd
[1] = cfg
->ws_unmap
? 0x8 : 0;
453 put_unaligned_be64(offset
, &scsi_cmd
[2]);
454 put_unaligned_be32(ws_limit
< left
? ws_limit
: left
,
457 /* Drop the ioctl read semahpore across lengthy call */
458 up_read(&cfg
->ioctl_rwsem
);
459 result
= scsi_execute(sdev
, scsi_cmd
, DMA_TO_DEVICE
, cmd_buf
,
460 CMD_BUFSIZE
, sense_buf
, NULL
, to
,
461 CMD_RETRIES
, 0, 0, NULL
);
462 down_read(&cfg
->ioctl_rwsem
);
463 rc
= check_state(cfg
);
465 dev_err(dev
, "%s: Failed state result=%08x\n",
472 dev_err_ratelimited(dev
, "%s: command failed for "
473 "offset=%lld result=%08x\n",
474 __func__
, offset
, result
);
486 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
491 * grow_lxt() - expands the translation table associated with the specified RHTE
492 * @afu: AFU associated with the host.
493 * @sdev: SCSI device associated with LUN.
494 * @ctxid: Context ID of context owning the RHTE.
495 * @rhndl: Resource handle associated with the RHTE.
496 * @rhte: Resource handle entry (RHTE).
497 * @new_size: Number of translation entries associated with RHTE.
499 * By design, this routine employs a 'best attempt' allocation and will
500 * truncate the requested size down if there is not sufficient space in
501 * the block allocator to satisfy the request but there does exist some
502 * amount of space. The user is made aware of this by returning the size
505 * Return: 0 on success, -errno on failure
507 static int grow_lxt(struct afu
*afu
,
508 struct scsi_device
*sdev
,
511 struct sisl_rht_entry
*rhte
,
514 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
515 struct device
*dev
= &cfg
->dev
->dev
;
516 struct sisl_lxt_entry
*lxt
= NULL
, *lxt_old
= NULL
;
517 struct llun_info
*lli
= sdev
->hostdata
;
518 struct glun_info
*gli
= lli
->parent
;
519 struct blka
*blka
= &gli
->blka
;
521 u32 ngrps
, ngrps_old
;
522 u64 aun
; /* chunk# allocated by block allocator */
523 u64 delta
= *new_size
- rhte
->lxt_cnt
;
528 * Check what is available in the block allocator before re-allocating
529 * LXT array. This is done up front under the mutex which must not be
530 * released until after allocation is complete.
532 mutex_lock(&blka
->mutex
);
533 av_size
= ba_space(&blka
->ba_lun
);
534 if (unlikely(av_size
<= 0)) {
535 dev_dbg(dev
, "%s: ba_space error av_size=%d\n",
537 mutex_unlock(&blka
->mutex
);
545 lxt_old
= rhte
->lxt_start
;
546 ngrps_old
= LXT_NUM_GROUPS(rhte
->lxt_cnt
);
547 ngrps
= LXT_NUM_GROUPS(rhte
->lxt_cnt
+ delta
);
549 if (ngrps
!= ngrps_old
) {
550 /* reallocate to fit new size */
551 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
553 if (unlikely(!lxt
)) {
554 mutex_unlock(&blka
->mutex
);
559 /* copy over all old entries */
560 memcpy(lxt
, lxt_old
, (sizeof(*lxt
) * rhte
->lxt_cnt
));
564 /* nothing can fail from now on */
565 my_new_size
= rhte
->lxt_cnt
+ delta
;
567 /* add new entries to the end */
568 for (i
= rhte
->lxt_cnt
; i
< my_new_size
; i
++) {
570 * Due to the earlier check of available space, ba_alloc
571 * cannot fail here. If it did due to internal error,
572 * leave a rlba_base of -1u which will likely be a
573 * invalid LUN (too large).
575 aun
= ba_alloc(&blka
->ba_lun
);
576 if ((aun
== -1ULL) || (aun
>= blka
->nchunk
))
577 dev_dbg(dev
, "%s: ba_alloc error allocated chunk=%llu "
578 "max=%llu\n", __func__
, aun
, blka
->nchunk
- 1);
580 /* select both ports, use r/w perms from RHT */
581 lxt
[i
].rlba_base
= ((aun
<< MC_CHUNK_SHIFT
) |
582 (lli
->lun_index
<< LXT_LUNIDX_SHIFT
) |
583 (RHT_PERM_RW
<< LXT_PERM_SHIFT
|
587 mutex_unlock(&blka
->mutex
);
590 * The following sequence is prescribed in the SISlite spec
591 * for syncing up with the AFU when adding LXT entries.
593 dma_wmb(); /* Make LXT updates are visible */
595 rhte
->lxt_start
= lxt
;
596 dma_wmb(); /* Make RHT entry's LXT table update visible */
598 rhte
->lxt_cnt
= my_new_size
;
599 dma_wmb(); /* Make RHT entry's LXT table size update visible */
601 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_LW_SYNC
);
605 /* free old lxt if reallocated */
608 *new_size
= my_new_size
;
610 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
615 * shrink_lxt() - reduces translation table associated with the specified RHTE
616 * @afu: AFU associated with the host.
617 * @sdev: SCSI device associated with LUN.
618 * @rhndl: Resource handle associated with the RHTE.
619 * @rhte: Resource handle entry (RHTE).
620 * @ctxi: Context owning resources.
621 * @new_size: Number of translation entries associated with RHTE.
623 * Return: 0 on success, -errno on failure
625 static int shrink_lxt(struct afu
*afu
,
626 struct scsi_device
*sdev
,
628 struct sisl_rht_entry
*rhte
,
629 struct ctx_info
*ctxi
,
632 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
633 struct device
*dev
= &cfg
->dev
->dev
;
634 struct sisl_lxt_entry
*lxt
, *lxt_old
;
635 struct llun_info
*lli
= sdev
->hostdata
;
636 struct glun_info
*gli
= lli
->parent
;
637 struct blka
*blka
= &gli
->blka
;
638 ctx_hndl_t ctxid
= DECODE_CTXID(ctxi
->ctxid
);
639 bool needs_ws
= ctxi
->rht_needs_ws
[rhndl
];
640 bool needs_sync
= !ctxi
->err_recovery_active
;
641 u32 ngrps
, ngrps_old
;
642 u64 aun
; /* chunk# allocated by block allocator */
643 u64 delta
= rhte
->lxt_cnt
- *new_size
;
647 lxt_old
= rhte
->lxt_start
;
648 ngrps_old
= LXT_NUM_GROUPS(rhte
->lxt_cnt
);
649 ngrps
= LXT_NUM_GROUPS(rhte
->lxt_cnt
- delta
);
651 if (ngrps
!= ngrps_old
) {
652 /* Reallocate to fit new size unless new size is 0 */
654 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
656 if (unlikely(!lxt
)) {
661 /* Copy over old entries that will remain */
663 (sizeof(*lxt
) * (rhte
->lxt_cnt
- delta
)));
669 /* Nothing can fail from now on */
670 my_new_size
= rhte
->lxt_cnt
- delta
;
673 * The following sequence is prescribed in the SISlite spec
674 * for syncing up with the AFU when removing LXT entries.
676 rhte
->lxt_cnt
= my_new_size
;
677 dma_wmb(); /* Make RHT entry's LXT table size update visible */
679 rhte
->lxt_start
= lxt
;
680 dma_wmb(); /* Make RHT entry's LXT table update visible */
683 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_HW_SYNC
);
690 * Mark the context as unavailable, so that we can release
693 ctxi
->unavail
= true;
694 mutex_unlock(&ctxi
->mutex
);
697 /* Free LBAs allocated to freed chunks */
698 mutex_lock(&blka
->mutex
);
699 for (i
= delta
- 1; i
>= 0; i
--) {
700 aun
= lxt_old
[my_new_size
+ i
].rlba_base
>> MC_CHUNK_SHIFT
;
702 write_same16(sdev
, aun
, MC_CHUNK_SIZE
);
703 ba_free(&blka
->ba_lun
, aun
);
705 mutex_unlock(&blka
->mutex
);
708 /* Make the context visible again */
709 mutex_lock(&ctxi
->mutex
);
710 ctxi
->unavail
= false;
713 /* Free old lxt if reallocated */
716 *new_size
= my_new_size
;
718 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
723 * _cxlflash_vlun_resize() - changes the size of a virtual LUN
724 * @sdev: SCSI device associated with LUN owning virtual LUN.
725 * @ctxi: Context owning resources.
726 * @resize: Resize ioctl data structure.
728 * On successful return, the user is informed of the new size (in blocks)
729 * of the virtual LUN in last LBA format. When the size of the virtual
730 * LUN is zero, the last LBA is reflected as -1. See comment in the
731 * prologue for _cxlflash_disk_release() regarding AFU syncs and contexts
732 * on the error recovery list.
734 * Return: 0 on success, -errno on failure
736 int _cxlflash_vlun_resize(struct scsi_device
*sdev
,
737 struct ctx_info
*ctxi
,
738 struct dk_cxlflash_resize
*resize
)
740 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
741 struct device
*dev
= &cfg
->dev
->dev
;
742 struct llun_info
*lli
= sdev
->hostdata
;
743 struct glun_info
*gli
= lli
->parent
;
744 struct afu
*afu
= cfg
->afu
;
745 bool put_ctx
= false;
747 res_hndl_t rhndl
= resize
->rsrc_handle
;
750 u64 ctxid
= DECODE_CTXID(resize
->context_id
),
751 rctxid
= resize
->context_id
;
753 struct sisl_rht_entry
*rhte
;
758 * The requested size (req_size) is always assumed to be in 4k blocks,
759 * so we have to convert it here from 4k to chunk size.
761 nsectors
= (resize
->req_size
* CXLFLASH_BLOCK_SIZE
) / gli
->blk_len
;
762 new_size
= DIV_ROUND_UP(nsectors
, MC_CHUNK_SIZE
);
764 dev_dbg(dev
, "%s: ctxid=%llu rhndl=%llu req_size=%llu new_size=%llu\n",
765 __func__
, ctxid
, resize
->rsrc_handle
, resize
->req_size
,
768 if (unlikely(gli
->mode
!= MODE_VIRTUAL
)) {
769 dev_dbg(dev
, "%s: LUN mode does not support resize mode=%d\n",
770 __func__
, gli
->mode
);
777 ctxi
= get_context(cfg
, rctxid
, lli
, CTX_CTRL_ERR_FALLBACK
);
778 if (unlikely(!ctxi
)) {
779 dev_dbg(dev
, "%s: Bad context ctxid=%llu\n",
788 rhte
= get_rhte(ctxi
, rhndl
, lli
);
789 if (unlikely(!rhte
)) {
790 dev_dbg(dev
, "%s: Bad resource handle rhndl=%u\n",
796 if (new_size
> rhte
->lxt_cnt
)
797 rc
= grow_lxt(afu
, sdev
, ctxid
, rhndl
, rhte
, &new_size
);
798 else if (new_size
< rhte
->lxt_cnt
)
799 rc
= shrink_lxt(afu
, sdev
, rhndl
, rhte
, ctxi
, &new_size
);
802 * Rare case where there is already sufficient space, just
803 * need to perform a translation sync with the AFU. This
804 * scenario likely follows a previous sync failure during
805 * a resize operation. Accordingly, perform the heavyweight
806 * form of translation sync as it is unknown which type of
807 * resize failed previously.
809 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_HW_SYNC
);
816 resize
->hdr
.return_flags
= 0;
817 resize
->last_lba
= (new_size
* MC_CHUNK_SIZE
* gli
->blk_len
);
818 resize
->last_lba
/= CXLFLASH_BLOCK_SIZE
;
824 dev_dbg(dev
, "%s: resized to %llu returning rc=%d\n",
825 __func__
, resize
->last_lba
, rc
);
829 int cxlflash_vlun_resize(struct scsi_device
*sdev
,
830 struct dk_cxlflash_resize
*resize
)
832 return _cxlflash_vlun_resize(sdev
, NULL
, resize
);
836 * cxlflash_restore_luntable() - Restore LUN table to prior state
837 * @cfg: Internal structure associated with the host.
839 void cxlflash_restore_luntable(struct cxlflash_cfg
*cfg
)
841 struct llun_info
*lli
, *temp
;
844 struct device
*dev
= &cfg
->dev
->dev
;
845 __be64 __iomem
*fc_port_luns
;
847 mutex_lock(&global
.mutex
);
849 list_for_each_entry_safe(lli
, temp
, &cfg
->lluns
, list
) {
853 lind
= lli
->lun_index
;
854 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n", __func__
, lind
);
856 for (k
= 0; k
< cfg
->num_fc_ports
; k
++)
857 if (lli
->port_sel
& (1 << k
)) {
858 fc_port_luns
= get_fc_port_luns(cfg
, k
);
859 writeq_be(lli
->lun_id
[k
], &fc_port_luns
[lind
]);
860 dev_dbg(dev
, "\t%d=%llx\n", k
, lli
->lun_id
[k
]);
864 mutex_unlock(&global
.mutex
);
868 * get_num_ports() - compute number of ports from port selection mask
869 * @psm: Port selection mask.
871 * Return: Population count of port selection mask
873 static inline u8
get_num_ports(u32 psm
)
875 static const u8 bits
[16] = { 0, 1, 1, 2, 1, 2, 2, 3,
876 1, 2, 2, 3, 2, 3, 3, 4 };
878 return bits
[psm
& 0xf];
882 * init_luntable() - write an entry in the LUN table
883 * @cfg: Internal structure associated with the host.
884 * @lli: Per adapter LUN information structure.
886 * On successful return, a LUN table entry is created:
887 * - at the top for LUNs visible on multiple ports.
888 * - at the bottom for LUNs visible only on one port.
890 * Return: 0 on success, -errno on failure
892 static int init_luntable(struct cxlflash_cfg
*cfg
, struct llun_info
*lli
)
899 struct device
*dev
= &cfg
->dev
->dev
;
900 __be64 __iomem
*fc_port_luns
;
902 mutex_lock(&global
.mutex
);
907 nports
= get_num_ports(lli
->port_sel
);
908 if (nports
== 0 || nports
> cfg
->num_fc_ports
) {
909 WARN(1, "Unsupported port configuration nports=%u", nports
);
916 * When LUN is visible from multiple ports, we will put
917 * it in the top half of the LUN table.
919 for (k
= 0; k
< cfg
->num_fc_ports
; k
++) {
920 if (!(lli
->port_sel
& (1 << k
)))
923 if (cfg
->promote_lun_index
== cfg
->last_lun_index
[k
]) {
929 lind
= lli
->lun_index
= cfg
->promote_lun_index
;
930 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n", __func__
, lind
);
932 for (k
= 0; k
< cfg
->num_fc_ports
; k
++) {
933 if (!(lli
->port_sel
& (1 << k
)))
936 fc_port_luns
= get_fc_port_luns(cfg
, k
);
937 writeq_be(lli
->lun_id
[k
], &fc_port_luns
[lind
]);
938 dev_dbg(dev
, "\t%d=%llx\n", k
, lli
->lun_id
[k
]);
941 cfg
->promote_lun_index
++;
944 * When LUN is visible only from one port, we will put
945 * it in the bottom half of the LUN table.
947 chan
= PORTMASK2CHAN(lli
->port_sel
);
948 if (cfg
->promote_lun_index
== cfg
->last_lun_index
[chan
]) {
953 lind
= lli
->lun_index
= cfg
->last_lun_index
[chan
];
954 fc_port_luns
= get_fc_port_luns(cfg
, chan
);
955 writeq_be(lli
->lun_id
[chan
], &fc_port_luns
[lind
]);
956 cfg
->last_lun_index
[chan
]--;
957 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n\t%d=%llx\n",
958 __func__
, lind
, chan
, lli
->lun_id
[chan
]);
961 lli
->in_table
= true;
963 mutex_unlock(&global
.mutex
);
964 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
969 * cxlflash_disk_virtual_open() - open a virtual disk of specified size
970 * @sdev: SCSI device associated with LUN owning virtual LUN.
971 * @arg: UVirtual ioctl data structure.
973 * On successful return, the user is informed of the resource handle
974 * to be used to identify the virtual LUN and the size (in blocks) of
975 * the virtual LUN in last LBA format. When the size of the virtual LUN
976 * is zero, the last LBA is reflected as -1.
978 * Return: 0 on success, -errno on failure
980 int cxlflash_disk_virtual_open(struct scsi_device
*sdev
, void *arg
)
982 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
983 struct device
*dev
= &cfg
->dev
->dev
;
984 struct llun_info
*lli
= sdev
->hostdata
;
985 struct glun_info
*gli
= lli
->parent
;
987 struct dk_cxlflash_uvirtual
*virt
= (struct dk_cxlflash_uvirtual
*)arg
;
988 struct dk_cxlflash_resize resize
;
990 u64 ctxid
= DECODE_CTXID(virt
->context_id
),
991 rctxid
= virt
->context_id
;
992 u64 lun_size
= virt
->lun_size
;
994 u64 rsrc_handle
= -1;
998 struct ctx_info
*ctxi
= NULL
;
999 struct sisl_rht_entry
*rhte
= NULL
;
1001 dev_dbg(dev
, "%s: ctxid=%llu ls=%llu\n", __func__
, ctxid
, lun_size
);
1003 /* Setup the LUNs block allocator on first call */
1004 mutex_lock(&gli
->mutex
);
1005 if (gli
->mode
== MODE_NONE
) {
1006 rc
= init_vlun(lli
);
1008 dev_err(dev
, "%s: init_vlun failed rc=%d\n",
1015 rc
= cxlflash_lun_attach(gli
, MODE_VIRTUAL
, true);
1017 dev_err(dev
, "%s: Failed attach to LUN (VIRTUAL)\n", __func__
);
1020 mutex_unlock(&gli
->mutex
);
1022 rc
= init_luntable(cfg
, lli
);
1024 dev_err(dev
, "%s: init_luntable failed rc=%d\n", __func__
, rc
);
1028 ctxi
= get_context(cfg
, rctxid
, lli
, 0);
1029 if (unlikely(!ctxi
)) {
1030 dev_err(dev
, "%s: Bad context ctxid=%llu\n", __func__
, ctxid
);
1035 rhte
= rhte_checkout(ctxi
, lli
);
1036 if (unlikely(!rhte
)) {
1037 dev_err(dev
, "%s: too many opens ctxid=%llu\n",
1039 rc
= -EMFILE
; /* too many opens */
1043 rsrc_handle
= (rhte
- ctxi
->rht_start
);
1045 /* Populate RHT format 0 */
1046 rhte
->nmask
= MC_RHT_NMASK
;
1047 rhte
->fp
= SISL_RHT_FP(0U, ctxi
->rht_perms
);
1049 /* Resize even if requested size is 0 */
1050 marshal_virt_to_resize(virt
, &resize
);
1051 resize
.rsrc_handle
= rsrc_handle
;
1052 rc
= _cxlflash_vlun_resize(sdev
, ctxi
, &resize
);
1054 dev_err(dev
, "%s: resize failed rc=%d\n", __func__
, rc
);
1057 last_lba
= resize
.last_lba
;
1059 if (virt
->hdr
.flags
& DK_CXLFLASH_UVIRTUAL_NEED_WRITE_SAME
)
1060 ctxi
->rht_needs_ws
[rsrc_handle
] = true;
1062 virt
->hdr
.return_flags
= 0;
1063 virt
->last_lba
= last_lba
;
1064 virt
->rsrc_handle
= rsrc_handle
;
1066 if (get_num_ports(lli
->port_sel
) > 1)
1067 virt
->hdr
.return_flags
|= DK_CXLFLASH_ALL_PORTS_ACTIVE
;
1071 dev_dbg(dev
, "%s: returning handle=%llu rc=%d llba=%llu\n",
1072 __func__
, rsrc_handle
, rc
, last_lba
);
1076 rhte_checkin(ctxi
, rhte
);
1078 cxlflash_lun_detach(gli
);
1081 /* Special common cleanup prior to successful LUN attach */
1082 cxlflash_ba_terminate(&gli
->blka
.ba_lun
);
1083 mutex_unlock(&gli
->mutex
);
1088 * clone_lxt() - copies translation tables from source to destination RHTE
1089 * @afu: AFU associated with the host.
1090 * @blka: Block allocator associated with LUN.
1091 * @ctxid: Context ID of context owning the RHTE.
1092 * @rhndl: Resource handle associated with the RHTE.
1093 * @rhte: Destination resource handle entry (RHTE).
1094 * @rhte_src: Source resource handle entry (RHTE).
1096 * Return: 0 on success, -errno on failure
1098 static int clone_lxt(struct afu
*afu
,
1102 struct sisl_rht_entry
*rhte
,
1103 struct sisl_rht_entry
*rhte_src
)
1105 struct cxlflash_cfg
*cfg
= afu
->parent
;
1106 struct device
*dev
= &cfg
->dev
->dev
;
1107 struct sisl_lxt_entry
*lxt
= NULL
;
1108 bool locked
= false;
1110 u64 aun
; /* chunk# allocated by block allocator */
1115 ngrps
= LXT_NUM_GROUPS(rhte_src
->lxt_cnt
);
1118 /* allocate new LXTs for clone */
1119 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
1121 if (unlikely(!lxt
)) {
1127 memcpy(lxt
, rhte_src
->lxt_start
,
1128 (sizeof(*lxt
) * rhte_src
->lxt_cnt
));
1130 /* clone the LBAs in block allocator via ref_cnt, note that the
1131 * block allocator mutex must be held until it is established
1132 * that this routine will complete without the need for a
1135 mutex_lock(&blka
->mutex
);
1137 for (i
= 0; i
< rhte_src
->lxt_cnt
; i
++) {
1138 aun
= (lxt
[i
].rlba_base
>> MC_CHUNK_SHIFT
);
1139 if (ba_clone(&blka
->ba_lun
, aun
) == -1ULL) {
1147 * The following sequence is prescribed in the SISlite spec
1148 * for syncing up with the AFU when adding LXT entries.
1150 dma_wmb(); /* Make LXT updates are visible */
1152 rhte
->lxt_start
= lxt
;
1153 dma_wmb(); /* Make RHT entry's LXT table update visible */
1155 rhte
->lxt_cnt
= rhte_src
->lxt_cnt
;
1156 dma_wmb(); /* Make RHT entry's LXT table size update visible */
1158 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_LW_SYNC
);
1166 mutex_unlock(&blka
->mutex
);
1167 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
1170 /* Reset the RHTE */
1173 rhte
->lxt_start
= NULL
;
1176 /* free the clones already made */
1177 for (j
= 0; j
< i
; j
++) {
1178 aun
= (lxt
[j
].rlba_base
>> MC_CHUNK_SHIFT
);
1179 ba_free(&blka
->ba_lun
, aun
);
1186 * cxlflash_disk_clone() - clone a context by making snapshot of another
1187 * @sdev: SCSI device associated with LUN owning virtual LUN.
1188 * @clone: Clone ioctl data structure.
1190 * This routine effectively performs cxlflash_disk_open operation for each
1191 * in-use virtual resource in the source context. Note that the destination
1192 * context must be in pristine state and cannot have any resource handles
1193 * open at the time of the clone.
1195 * Return: 0 on success, -errno on failure
1197 int cxlflash_disk_clone(struct scsi_device
*sdev
,
1198 struct dk_cxlflash_clone
*clone
)
1200 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
1201 struct device
*dev
= &cfg
->dev
->dev
;
1202 struct llun_info
*lli
= sdev
->hostdata
;
1203 struct glun_info
*gli
= lli
->parent
;
1204 struct blka
*blka
= &gli
->blka
;
1205 struct afu
*afu
= cfg
->afu
;
1206 struct dk_cxlflash_release release
= { { 0 }, 0 };
1208 struct ctx_info
*ctxi_src
= NULL
,
1210 struct lun_access
*lun_access_src
, *lun_access_dst
;
1212 u64 ctxid_src
= DECODE_CTXID(clone
->context_id_src
),
1213 ctxid_dst
= DECODE_CTXID(clone
->context_id_dst
),
1214 rctxid_src
= clone
->context_id_src
,
1215 rctxid_dst
= clone
->context_id_dst
;
1221 dev_dbg(dev
, "%s: ctxid_src=%llu ctxid_dst=%llu\n",
1222 __func__
, ctxid_src
, ctxid_dst
);
1224 /* Do not clone yourself */
1225 if (unlikely(rctxid_src
== rctxid_dst
)) {
1230 if (unlikely(gli
->mode
!= MODE_VIRTUAL
)) {
1232 dev_dbg(dev
, "%s: Only supported on virtual LUNs mode=%u\n",
1233 __func__
, gli
->mode
);
1237 ctxi_src
= get_context(cfg
, rctxid_src
, lli
, CTX_CTRL_CLONE
);
1238 ctxi_dst
= get_context(cfg
, rctxid_dst
, lli
, 0);
1239 if (unlikely(!ctxi_src
|| !ctxi_dst
)) {
1240 dev_dbg(dev
, "%s: Bad context ctxid_src=%llu ctxid_dst=%llu\n",
1241 __func__
, ctxid_src
, ctxid_dst
);
1246 /* Verify there is no open resource handle in the destination context */
1247 for (i
= 0; i
< MAX_RHT_PER_CONTEXT
; i
++)
1248 if (ctxi_dst
->rht_start
[i
].nmask
!= 0) {
1253 /* Clone LUN access list */
1254 list_for_each_entry(lun_access_src
, &ctxi_src
->luns
, list
) {
1256 list_for_each_entry(lun_access_dst
, &ctxi_dst
->luns
, list
)
1257 if (lun_access_dst
->sdev
== lun_access_src
->sdev
) {
1263 lun_access_dst
= kzalloc(sizeof(*lun_access_dst
),
1265 if (unlikely(!lun_access_dst
)) {
1266 dev_err(dev
, "%s: lun_access allocation fail\n",
1272 *lun_access_dst
= *lun_access_src
;
1273 list_add(&lun_access_dst
->list
, &sidecar
);
1277 if (unlikely(!ctxi_src
->rht_out
)) {
1278 dev_dbg(dev
, "%s: Nothing to clone\n", __func__
);
1282 /* User specified permission on attach */
1283 perms
= ctxi_dst
->rht_perms
;
1286 * Copy over checked-out RHT (and their associated LXT) entries by
1287 * hand, stopping after we've copied all outstanding entries and
1288 * cleaning up if the clone fails.
1290 * Note: This loop is equivalent to performing cxlflash_disk_open and
1291 * cxlflash_vlun_resize. As such, LUN accounting needs to be taken into
1292 * account by attaching after each successful RHT entry clone. In the
1293 * event that a clone failure is experienced, the LUN detach is handled
1294 * via the cleanup performed by _cxlflash_disk_release.
1296 for (i
= 0; i
< MAX_RHT_PER_CONTEXT
; i
++) {
1297 if (ctxi_src
->rht_out
== ctxi_dst
->rht_out
)
1299 if (ctxi_src
->rht_start
[i
].nmask
== 0)
1302 /* Consume a destination RHT entry */
1303 ctxi_dst
->rht_out
++;
1304 ctxi_dst
->rht_start
[i
].nmask
= ctxi_src
->rht_start
[i
].nmask
;
1305 ctxi_dst
->rht_start
[i
].fp
=
1306 SISL_RHT_FP_CLONE(ctxi_src
->rht_start
[i
].fp
, perms
);
1307 ctxi_dst
->rht_lun
[i
] = ctxi_src
->rht_lun
[i
];
1309 rc
= clone_lxt(afu
, blka
, ctxid_dst
, i
,
1310 &ctxi_dst
->rht_start
[i
],
1311 &ctxi_src
->rht_start
[i
]);
1313 marshal_clone_to_rele(clone
, &release
);
1314 for (j
= 0; j
< i
; j
++) {
1315 release
.rsrc_handle
= j
;
1316 _cxlflash_disk_release(sdev
, ctxi_dst
,
1320 /* Put back the one we failed on */
1321 rhte_checkin(ctxi_dst
, &ctxi_dst
->rht_start
[i
]);
1325 cxlflash_lun_attach(gli
, gli
->mode
, false);
1329 list_splice(&sidecar
, &ctxi_dst
->luns
);
1334 put_context(ctxi_src
);
1336 put_context(ctxi_dst
);
1337 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
1341 list_for_each_entry_safe(lun_access_src
, lun_access_dst
, &sidecar
, list
)
1342 kfree(lun_access_src
);