2 * CXL Flash Device Driver
4 * Written by: Manoj N. Kumar <manoj@linux.vnet.ibm.com>, IBM Corporation
5 * Matthew R. Ochs <mrochs@linux.vnet.ibm.com>, IBM Corporation
7 * Copyright (C) 2015 IBM Corporation
9 * This program is free software; you can redistribute it and/or
10 * modify it under the terms of the GNU General Public License
11 * as published by the Free Software Foundation; either version
12 * 2 of the License, or (at your option) any later version.
15 #include <linux/interrupt.h>
16 #include <linux/pci.h>
17 #include <linux/syscalls.h>
18 #include <asm/unaligned.h>
19 #include <asm/bitsperlong.h>
21 #include <scsi/scsi_cmnd.h>
22 #include <scsi/scsi_host.h>
23 #include <uapi/scsi/cxlflash_ioctl.h>
28 #include "superpipe.h"
31 * marshal_virt_to_resize() - translate uvirtual to resize structure
32 * @virt: Source structure from which to translate/copy.
33 * @resize: Destination structure for the translate/copy.
35 static void marshal_virt_to_resize(struct dk_cxlflash_uvirtual
*virt
,
36 struct dk_cxlflash_resize
*resize
)
38 resize
->hdr
= virt
->hdr
;
39 resize
->context_id
= virt
->context_id
;
40 resize
->rsrc_handle
= virt
->rsrc_handle
;
41 resize
->req_size
= virt
->lun_size
;
42 resize
->last_lba
= virt
->last_lba
;
46 * marshal_clone_to_rele() - translate clone to release structure
47 * @clone: Source structure from which to translate/copy.
48 * @rele: Destination structure for the translate/copy.
50 static void marshal_clone_to_rele(struct dk_cxlflash_clone
*clone
,
51 struct dk_cxlflash_release
*release
)
53 release
->hdr
= clone
->hdr
;
54 release
->context_id
= clone
->context_id_dst
;
58 * ba_init() - initializes a block allocator
59 * @ba_lun: Block allocator to initialize.
61 * Return: 0 on success, -errno on failure
63 static int ba_init(struct ba_lun
*ba_lun
)
65 struct ba_lun_info
*bali
= NULL
;
66 int lun_size_au
= 0, i
= 0;
67 int last_word_underflow
= 0;
70 pr_debug("%s: Initializing LUN: lun_id=%016llx "
71 "ba_lun->lsize=%lx ba_lun->au_size=%lX\n",
72 __func__
, ba_lun
->lun_id
, ba_lun
->lsize
, ba_lun
->au_size
);
74 /* Calculate bit map size */
75 lun_size_au
= ba_lun
->lsize
/ ba_lun
->au_size
;
76 if (lun_size_au
== 0) {
77 pr_debug("%s: Requested LUN size of 0!\n", __func__
);
81 /* Allocate lun information container */
82 bali
= kzalloc(sizeof(struct ba_lun_info
), GFP_KERNEL
);
83 if (unlikely(!bali
)) {
84 pr_err("%s: Failed to allocate lun_info lun_id=%016llx\n",
85 __func__
, ba_lun
->lun_id
);
89 bali
->total_aus
= lun_size_au
;
90 bali
->lun_bmap_size
= lun_size_au
/ BITS_PER_LONG
;
92 if (lun_size_au
% BITS_PER_LONG
)
93 bali
->lun_bmap_size
++;
95 /* Allocate bitmap space */
96 bali
->lun_alloc_map
= kzalloc((bali
->lun_bmap_size
* sizeof(u64
)),
98 if (unlikely(!bali
->lun_alloc_map
)) {
99 pr_err("%s: Failed to allocate lun allocation map: "
100 "lun_id=%016llx\n", __func__
, ba_lun
->lun_id
);
105 /* Initialize the bit map size and set all bits to '1' */
106 bali
->free_aun_cnt
= lun_size_au
;
108 for (i
= 0; i
< bali
->lun_bmap_size
; i
++)
109 bali
->lun_alloc_map
[i
] = 0xFFFFFFFFFFFFFFFFULL
;
111 /* If the last word not fully utilized, mark extra bits as allocated */
112 last_word_underflow
= (bali
->lun_bmap_size
* BITS_PER_LONG
);
113 last_word_underflow
-= bali
->free_aun_cnt
;
114 if (last_word_underflow
> 0) {
115 lam
= &bali
->lun_alloc_map
[bali
->lun_bmap_size
- 1];
116 for (i
= (HIBIT
- last_word_underflow
+ 1);
119 clear_bit(i
, (ulong
*)lam
);
122 /* Initialize high elevator index, low/curr already at 0 from kzalloc */
123 bali
->free_high_idx
= bali
->lun_bmap_size
;
125 /* Allocate clone map */
126 bali
->aun_clone_map
= kzalloc((bali
->total_aus
* sizeof(u8
)),
128 if (unlikely(!bali
->aun_clone_map
)) {
129 pr_err("%s: Failed to allocate clone map: lun_id=%016llx\n",
130 __func__
, ba_lun
->lun_id
);
131 kfree(bali
->lun_alloc_map
);
136 /* Pass the allocated LUN info as a handle to the user */
137 ba_lun
->ba_lun_handle
= bali
;
139 pr_debug("%s: Successfully initialized the LUN: "
140 "lun_id=%016llx bitmap size=%x, free_aun_cnt=%llx\n",
141 __func__
, ba_lun
->lun_id
, bali
->lun_bmap_size
,
147 * find_free_range() - locates a free bit within the block allocator
148 * @low: First word in block allocator to start search.
149 * @high: Last word in block allocator to search.
150 * @bali: LUN information structure owning the block allocator to search.
151 * @bit_word: Passes back the word in the block allocator owning the free bit.
153 * Return: The bit position within the passed back word, -1 on failure
155 static int find_free_range(u32 low
,
157 struct ba_lun_info
*bali
, int *bit_word
)
161 ulong
*lam
, num_bits
;
163 for (i
= low
; i
< high
; i
++)
164 if (bali
->lun_alloc_map
[i
] != 0) {
165 lam
= (ulong
*)&bali
->lun_alloc_map
[i
];
166 num_bits
= (sizeof(*lam
) * BITS_PER_BYTE
);
167 bit_pos
= find_first_bit(lam
, num_bits
);
169 pr_devel("%s: Found free bit %llu in LUN "
170 "map entry %016llx at bitmap index = %d\n",
171 __func__
, bit_pos
, bali
->lun_alloc_map
[i
], i
);
174 bali
->free_aun_cnt
--;
175 clear_bit(bit_pos
, lam
);
183 * ba_alloc() - allocates a block from the block allocator
184 * @ba_lun: Block allocator from which to allocate a block.
186 * Return: The allocated block, -1 on failure
188 static u64
ba_alloc(struct ba_lun
*ba_lun
)
192 struct ba_lun_info
*bali
= NULL
;
194 bali
= ba_lun
->ba_lun_handle
;
196 pr_debug("%s: Received block allocation request: "
197 "lun_id=%016llx free_aun_cnt=%llx\n",
198 __func__
, ba_lun
->lun_id
, bali
->free_aun_cnt
);
200 if (bali
->free_aun_cnt
== 0) {
201 pr_debug("%s: No space left on LUN: lun_id=%016llx\n",
202 __func__
, ba_lun
->lun_id
);
206 /* Search to find a free entry, curr->high then low->curr */
207 bit_pos
= find_free_range(bali
->free_curr_idx
,
208 bali
->free_high_idx
, bali
, &bit_word
);
210 bit_pos
= find_free_range(bali
->free_low_idx
,
214 pr_debug("%s: Could not find an allocation unit on LUN:"
215 " lun_id=%016llx\n", __func__
, ba_lun
->lun_id
);
220 /* Update the free_curr_idx */
221 if (bit_pos
== HIBIT
)
222 bali
->free_curr_idx
= bit_word
+ 1;
224 bali
->free_curr_idx
= bit_word
;
226 pr_debug("%s: Allocating AU number=%llx lun_id=%016llx "
227 "free_aun_cnt=%llx\n", __func__
,
228 ((bit_word
* BITS_PER_LONG
) + bit_pos
), ba_lun
->lun_id
,
231 return (u64
) ((bit_word
* BITS_PER_LONG
) + bit_pos
);
235 * validate_alloc() - validates the specified block has been allocated
236 * @ba_lun_info: LUN info owning the block allocator.
237 * @aun: Block to validate.
239 * Return: 0 on success, -1 on failure
241 static int validate_alloc(struct ba_lun_info
*bali
, u64 aun
)
243 int idx
= 0, bit_pos
= 0;
245 idx
= aun
/ BITS_PER_LONG
;
246 bit_pos
= aun
% BITS_PER_LONG
;
248 if (test_bit(bit_pos
, (ulong
*)&bali
->lun_alloc_map
[idx
]))
255 * ba_free() - frees a block from the block allocator
256 * @ba_lun: Block allocator from which to allocate a block.
257 * @to_free: Block to free.
259 * Return: 0 on success, -1 on failure
261 static int ba_free(struct ba_lun
*ba_lun
, u64 to_free
)
263 int idx
= 0, bit_pos
= 0;
264 struct ba_lun_info
*bali
= NULL
;
266 bali
= ba_lun
->ba_lun_handle
;
268 if (validate_alloc(bali
, to_free
)) {
269 pr_debug("%s: AUN %llx is not allocated on lun_id=%016llx\n",
270 __func__
, to_free
, ba_lun
->lun_id
);
274 pr_debug("%s: Received a request to free AU=%llx lun_id=%016llx "
275 "free_aun_cnt=%llx\n", __func__
, to_free
, ba_lun
->lun_id
,
278 if (bali
->aun_clone_map
[to_free
] > 0) {
279 pr_debug("%s: AUN %llx lun_id=%016llx cloned. Clone count=%x\n",
280 __func__
, to_free
, ba_lun
->lun_id
,
281 bali
->aun_clone_map
[to_free
]);
282 bali
->aun_clone_map
[to_free
]--;
286 idx
= to_free
/ BITS_PER_LONG
;
287 bit_pos
= to_free
% BITS_PER_LONG
;
289 set_bit(bit_pos
, (ulong
*)&bali
->lun_alloc_map
[idx
]);
290 bali
->free_aun_cnt
++;
292 if (idx
< bali
->free_low_idx
)
293 bali
->free_low_idx
= idx
;
294 else if (idx
> bali
->free_high_idx
)
295 bali
->free_high_idx
= idx
;
297 pr_debug("%s: Successfully freed AU bit_pos=%x bit map index=%x "
298 "lun_id=%016llx free_aun_cnt=%llx\n", __func__
, bit_pos
, idx
,
299 ba_lun
->lun_id
, bali
->free_aun_cnt
);
305 * ba_clone() - Clone a chunk of the block allocation table
306 * @ba_lun: Block allocator from which to allocate a block.
307 * @to_free: Block to free.
309 * Return: 0 on success, -1 on failure
311 static int ba_clone(struct ba_lun
*ba_lun
, u64 to_clone
)
313 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
315 if (validate_alloc(bali
, to_clone
)) {
316 pr_debug("%s: AUN=%llx not allocated on lun_id=%016llx\n",
317 __func__
, to_clone
, ba_lun
->lun_id
);
321 pr_debug("%s: Received a request to clone AUN %llx on lun_id=%016llx\n",
322 __func__
, to_clone
, ba_lun
->lun_id
);
324 if (bali
->aun_clone_map
[to_clone
] == MAX_AUN_CLONE_CNT
) {
325 pr_debug("%s: AUN %llx on lun_id=%016llx hit max clones already\n",
326 __func__
, to_clone
, ba_lun
->lun_id
);
330 bali
->aun_clone_map
[to_clone
]++;
336 * ba_space() - returns the amount of free space left in the block allocator
337 * @ba_lun: Block allocator.
339 * Return: Amount of free space in block allocator
341 static u64
ba_space(struct ba_lun
*ba_lun
)
343 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
345 return bali
->free_aun_cnt
;
349 * cxlflash_ba_terminate() - frees resources associated with the block allocator
350 * @ba_lun: Block allocator.
352 * Safe to call in a partially allocated state.
354 void cxlflash_ba_terminate(struct ba_lun
*ba_lun
)
356 struct ba_lun_info
*bali
= ba_lun
->ba_lun_handle
;
359 kfree(bali
->aun_clone_map
);
360 kfree(bali
->lun_alloc_map
);
362 ba_lun
->ba_lun_handle
= NULL
;
367 * init_vlun() - initializes a LUN for virtual use
368 * @lun_info: LUN information structure that owns the block allocator.
370 * Return: 0 on success, -errno on failure
372 static int init_vlun(struct llun_info
*lli
)
375 struct glun_info
*gli
= lli
->parent
;
376 struct blka
*blka
= &gli
->blka
;
378 memset(blka
, 0, sizeof(*blka
));
379 mutex_init(&blka
->mutex
);
381 /* LUN IDs are unique per port, save the index instead */
382 blka
->ba_lun
.lun_id
= lli
->lun_index
;
383 blka
->ba_lun
.lsize
= gli
->max_lba
+ 1;
384 blka
->ba_lun
.lba_size
= gli
->blk_len
;
386 blka
->ba_lun
.au_size
= MC_CHUNK_SIZE
;
387 blka
->nchunk
= blka
->ba_lun
.lsize
/ MC_CHUNK_SIZE
;
389 rc
= ba_init(&blka
->ba_lun
);
391 pr_debug("%s: cannot init block_alloc, rc=%d\n", __func__
, rc
);
393 pr_debug("%s: returning rc=%d lli=%p\n", __func__
, rc
, lli
);
398 * write_same16() - sends a SCSI WRITE_SAME16 (0) command to specified LUN
399 * @sdev: SCSI device associated with LUN.
400 * @lba: Logical block address to start write same.
401 * @nblks: Number of logical blocks to write same.
403 * The SCSI WRITE_SAME16 can take quite a while to complete. Should an EEH occur
404 * while in scsi_execute(), the EEH handler will attempt to recover. As part of
405 * the recovery, the handler drains all currently running ioctls, waiting until
406 * they have completed before proceeding with a reset. As this routine is used
407 * on the ioctl path, this can create a condition where the EEH handler becomes
408 * stuck, infinitely waiting for this ioctl thread. To avoid this behavior,
409 * temporarily unmark this thread as an ioctl thread by releasing the ioctl read
410 * semaphore. This will allow the EEH handler to proceed with a recovery while
411 * this thread is still running. Once the scsi_execute() returns, reacquire the
412 * ioctl read semaphore and check the adapter state in case it changed while
413 * inside of scsi_execute(). The state check will wait if the adapter is still
414 * being recovered or return a failure if the recovery failed. In the event that
415 * the adapter reset failed, simply return the failure as the ioctl would be
416 * unable to continue.
418 * Note that the above puts a requirement on this routine to only be called on
421 * Return: 0 on success, -errno on failure
423 static int write_same16(struct scsi_device
*sdev
,
433 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
434 struct device
*dev
= &cfg
->dev
->dev
;
435 const u32 s
= ilog2(sdev
->sector_size
) - 9;
436 const u32 to
= sdev
->request_queue
->rq_timeout
;
437 const u32 ws_limit
= blk_queue_get_max_sectors(sdev
->request_queue
,
438 REQ_OP_WRITE_SAME
) >> s
;
440 cmd_buf
= kzalloc(CMD_BUFSIZE
, GFP_KERNEL
);
441 scsi_cmd
= kzalloc(MAX_COMMAND_SIZE
, GFP_KERNEL
);
442 if (unlikely(!cmd_buf
|| !scsi_cmd
)) {
449 scsi_cmd
[0] = WRITE_SAME_16
;
450 scsi_cmd
[1] = cfg
->ws_unmap
? 0x8 : 0;
451 put_unaligned_be64(offset
, &scsi_cmd
[2]);
452 put_unaligned_be32(ws_limit
< left
? ws_limit
: left
,
455 /* Drop the ioctl read semahpore across lengthy call */
456 up_read(&cfg
->ioctl_rwsem
);
457 result
= scsi_execute(sdev
, scsi_cmd
, DMA_TO_DEVICE
, cmd_buf
,
458 CMD_BUFSIZE
, NULL
, NULL
, to
,
459 CMD_RETRIES
, 0, 0, NULL
);
460 down_read(&cfg
->ioctl_rwsem
);
461 rc
= check_state(cfg
);
463 dev_err(dev
, "%s: Failed state result=%08x\n",
470 dev_err_ratelimited(dev
, "%s: command failed for "
471 "offset=%lld result=%08x\n",
472 __func__
, offset
, result
);
483 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
488 * grow_lxt() - expands the translation table associated with the specified RHTE
489 * @afu: AFU associated with the host.
490 * @sdev: SCSI device associated with LUN.
491 * @ctxid: Context ID of context owning the RHTE.
492 * @rhndl: Resource handle associated with the RHTE.
493 * @rhte: Resource handle entry (RHTE).
494 * @new_size: Number of translation entries associated with RHTE.
496 * By design, this routine employs a 'best attempt' allocation and will
497 * truncate the requested size down if there is not sufficient space in
498 * the block allocator to satisfy the request but there does exist some
499 * amount of space. The user is made aware of this by returning the size
502 * Return: 0 on success, -errno on failure
504 static int grow_lxt(struct afu
*afu
,
505 struct scsi_device
*sdev
,
508 struct sisl_rht_entry
*rhte
,
511 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
512 struct device
*dev
= &cfg
->dev
->dev
;
513 struct sisl_lxt_entry
*lxt
= NULL
, *lxt_old
= NULL
;
514 struct llun_info
*lli
= sdev
->hostdata
;
515 struct glun_info
*gli
= lli
->parent
;
516 struct blka
*blka
= &gli
->blka
;
518 u32 ngrps
, ngrps_old
;
519 u64 aun
; /* chunk# allocated by block allocator */
520 u64 delta
= *new_size
- rhte
->lxt_cnt
;
525 * Check what is available in the block allocator before re-allocating
526 * LXT array. This is done up front under the mutex which must not be
527 * released until after allocation is complete.
529 mutex_lock(&blka
->mutex
);
530 av_size
= ba_space(&blka
->ba_lun
);
531 if (unlikely(av_size
<= 0)) {
532 dev_dbg(dev
, "%s: ba_space error av_size=%d\n",
534 mutex_unlock(&blka
->mutex
);
542 lxt_old
= rhte
->lxt_start
;
543 ngrps_old
= LXT_NUM_GROUPS(rhte
->lxt_cnt
);
544 ngrps
= LXT_NUM_GROUPS(rhte
->lxt_cnt
+ delta
);
546 if (ngrps
!= ngrps_old
) {
547 /* reallocate to fit new size */
548 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
550 if (unlikely(!lxt
)) {
551 mutex_unlock(&blka
->mutex
);
556 /* copy over all old entries */
557 memcpy(lxt
, lxt_old
, (sizeof(*lxt
) * rhte
->lxt_cnt
));
561 /* nothing can fail from now on */
562 my_new_size
= rhte
->lxt_cnt
+ delta
;
564 /* add new entries to the end */
565 for (i
= rhte
->lxt_cnt
; i
< my_new_size
; i
++) {
567 * Due to the earlier check of available space, ba_alloc
568 * cannot fail here. If it did due to internal error,
569 * leave a rlba_base of -1u which will likely be a
570 * invalid LUN (too large).
572 aun
= ba_alloc(&blka
->ba_lun
);
573 if ((aun
== -1ULL) || (aun
>= blka
->nchunk
))
574 dev_dbg(dev
, "%s: ba_alloc error allocated chunk=%llu "
575 "max=%llu\n", __func__
, aun
, blka
->nchunk
- 1);
577 /* select both ports, use r/w perms from RHT */
578 lxt
[i
].rlba_base
= ((aun
<< MC_CHUNK_SHIFT
) |
579 (lli
->lun_index
<< LXT_LUNIDX_SHIFT
) |
580 (RHT_PERM_RW
<< LXT_PERM_SHIFT
|
584 mutex_unlock(&blka
->mutex
);
587 * The following sequence is prescribed in the SISlite spec
588 * for syncing up with the AFU when adding LXT entries.
590 dma_wmb(); /* Make LXT updates are visible */
592 rhte
->lxt_start
= lxt
;
593 dma_wmb(); /* Make RHT entry's LXT table update visible */
595 rhte
->lxt_cnt
= my_new_size
;
596 dma_wmb(); /* Make RHT entry's LXT table size update visible */
598 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_LW_SYNC
);
602 /* free old lxt if reallocated */
605 *new_size
= my_new_size
;
607 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
612 * shrink_lxt() - reduces translation table associated with the specified RHTE
613 * @afu: AFU associated with the host.
614 * @sdev: SCSI device associated with LUN.
615 * @rhndl: Resource handle associated with the RHTE.
616 * @rhte: Resource handle entry (RHTE).
617 * @ctxi: Context owning resources.
618 * @new_size: Number of translation entries associated with RHTE.
620 * Return: 0 on success, -errno on failure
622 static int shrink_lxt(struct afu
*afu
,
623 struct scsi_device
*sdev
,
625 struct sisl_rht_entry
*rhte
,
626 struct ctx_info
*ctxi
,
629 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
630 struct device
*dev
= &cfg
->dev
->dev
;
631 struct sisl_lxt_entry
*lxt
, *lxt_old
;
632 struct llun_info
*lli
= sdev
->hostdata
;
633 struct glun_info
*gli
= lli
->parent
;
634 struct blka
*blka
= &gli
->blka
;
635 ctx_hndl_t ctxid
= DECODE_CTXID(ctxi
->ctxid
);
636 bool needs_ws
= ctxi
->rht_needs_ws
[rhndl
];
637 bool needs_sync
= !ctxi
->err_recovery_active
;
638 u32 ngrps
, ngrps_old
;
639 u64 aun
; /* chunk# allocated by block allocator */
640 u64 delta
= rhte
->lxt_cnt
- *new_size
;
644 lxt_old
= rhte
->lxt_start
;
645 ngrps_old
= LXT_NUM_GROUPS(rhte
->lxt_cnt
);
646 ngrps
= LXT_NUM_GROUPS(rhte
->lxt_cnt
- delta
);
648 if (ngrps
!= ngrps_old
) {
649 /* Reallocate to fit new size unless new size is 0 */
651 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
653 if (unlikely(!lxt
)) {
658 /* Copy over old entries that will remain */
660 (sizeof(*lxt
) * (rhte
->lxt_cnt
- delta
)));
666 /* Nothing can fail from now on */
667 my_new_size
= rhte
->lxt_cnt
- delta
;
670 * The following sequence is prescribed in the SISlite spec
671 * for syncing up with the AFU when removing LXT entries.
673 rhte
->lxt_cnt
= my_new_size
;
674 dma_wmb(); /* Make RHT entry's LXT table size update visible */
676 rhte
->lxt_start
= lxt
;
677 dma_wmb(); /* Make RHT entry's LXT table update visible */
680 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_HW_SYNC
);
687 * Mark the context as unavailable, so that we can release
690 ctxi
->unavail
= true;
691 mutex_unlock(&ctxi
->mutex
);
694 /* Free LBAs allocated to freed chunks */
695 mutex_lock(&blka
->mutex
);
696 for (i
= delta
- 1; i
>= 0; i
--) {
697 aun
= lxt_old
[my_new_size
+ i
].rlba_base
>> MC_CHUNK_SHIFT
;
699 write_same16(sdev
, aun
, MC_CHUNK_SIZE
);
700 ba_free(&blka
->ba_lun
, aun
);
702 mutex_unlock(&blka
->mutex
);
705 /* Make the context visible again */
706 mutex_lock(&ctxi
->mutex
);
707 ctxi
->unavail
= false;
710 /* Free old lxt if reallocated */
713 *new_size
= my_new_size
;
715 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
720 * _cxlflash_vlun_resize() - changes the size of a virtual LUN
721 * @sdev: SCSI device associated with LUN owning virtual LUN.
722 * @ctxi: Context owning resources.
723 * @resize: Resize ioctl data structure.
725 * On successful return, the user is informed of the new size (in blocks)
726 * of the virtual LUN in last LBA format. When the size of the virtual
727 * LUN is zero, the last LBA is reflected as -1. See comment in the
728 * prologue for _cxlflash_disk_release() regarding AFU syncs and contexts
729 * on the error recovery list.
731 * Return: 0 on success, -errno on failure
733 int _cxlflash_vlun_resize(struct scsi_device
*sdev
,
734 struct ctx_info
*ctxi
,
735 struct dk_cxlflash_resize
*resize
)
737 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
738 struct device
*dev
= &cfg
->dev
->dev
;
739 struct llun_info
*lli
= sdev
->hostdata
;
740 struct glun_info
*gli
= lli
->parent
;
741 struct afu
*afu
= cfg
->afu
;
742 bool put_ctx
= false;
744 res_hndl_t rhndl
= resize
->rsrc_handle
;
747 u64 ctxid
= DECODE_CTXID(resize
->context_id
),
748 rctxid
= resize
->context_id
;
750 struct sisl_rht_entry
*rhte
;
755 * The requested size (req_size) is always assumed to be in 4k blocks,
756 * so we have to convert it here from 4k to chunk size.
758 nsectors
= (resize
->req_size
* CXLFLASH_BLOCK_SIZE
) / gli
->blk_len
;
759 new_size
= DIV_ROUND_UP(nsectors
, MC_CHUNK_SIZE
);
761 dev_dbg(dev
, "%s: ctxid=%llu rhndl=%llu req_size=%llu new_size=%llu\n",
762 __func__
, ctxid
, resize
->rsrc_handle
, resize
->req_size
,
765 if (unlikely(gli
->mode
!= MODE_VIRTUAL
)) {
766 dev_dbg(dev
, "%s: LUN mode does not support resize mode=%d\n",
767 __func__
, gli
->mode
);
774 ctxi
= get_context(cfg
, rctxid
, lli
, CTX_CTRL_ERR_FALLBACK
);
775 if (unlikely(!ctxi
)) {
776 dev_dbg(dev
, "%s: Bad context ctxid=%llu\n",
785 rhte
= get_rhte(ctxi
, rhndl
, lli
);
786 if (unlikely(!rhte
)) {
787 dev_dbg(dev
, "%s: Bad resource handle rhndl=%u\n",
793 if (new_size
> rhte
->lxt_cnt
)
794 rc
= grow_lxt(afu
, sdev
, ctxid
, rhndl
, rhte
, &new_size
);
795 else if (new_size
< rhte
->lxt_cnt
)
796 rc
= shrink_lxt(afu
, sdev
, rhndl
, rhte
, ctxi
, &new_size
);
799 * Rare case where there is already sufficient space, just
800 * need to perform a translation sync with the AFU. This
801 * scenario likely follows a previous sync failure during
802 * a resize operation. Accordingly, perform the heavyweight
803 * form of translation sync as it is unknown which type of
804 * resize failed previously.
806 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_HW_SYNC
);
813 resize
->hdr
.return_flags
= 0;
814 resize
->last_lba
= (new_size
* MC_CHUNK_SIZE
* gli
->blk_len
);
815 resize
->last_lba
/= CXLFLASH_BLOCK_SIZE
;
821 dev_dbg(dev
, "%s: resized to %llu returning rc=%d\n",
822 __func__
, resize
->last_lba
, rc
);
826 int cxlflash_vlun_resize(struct scsi_device
*sdev
,
827 struct dk_cxlflash_resize
*resize
)
829 return _cxlflash_vlun_resize(sdev
, NULL
, resize
);
833 * cxlflash_restore_luntable() - Restore LUN table to prior state
834 * @cfg: Internal structure associated with the host.
836 void cxlflash_restore_luntable(struct cxlflash_cfg
*cfg
)
838 struct llun_info
*lli
, *temp
;
841 struct device
*dev
= &cfg
->dev
->dev
;
842 __be64 __iomem
*fc_port_luns
;
844 mutex_lock(&global
.mutex
);
846 list_for_each_entry_safe(lli
, temp
, &cfg
->lluns
, list
) {
850 lind
= lli
->lun_index
;
851 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n", __func__
, lind
);
853 for (k
= 0; k
< cfg
->num_fc_ports
; k
++)
854 if (lli
->port_sel
& (1 << k
)) {
855 fc_port_luns
= get_fc_port_luns(cfg
, k
);
856 writeq_be(lli
->lun_id
[k
], &fc_port_luns
[lind
]);
857 dev_dbg(dev
, "\t%d=%llx\n", k
, lli
->lun_id
[k
]);
861 mutex_unlock(&global
.mutex
);
865 * get_num_ports() - compute number of ports from port selection mask
866 * @psm: Port selection mask.
868 * Return: Population count of port selection mask
870 static inline u8
get_num_ports(u32 psm
)
872 static const u8 bits
[16] = { 0, 1, 1, 2, 1, 2, 2, 3,
873 1, 2, 2, 3, 2, 3, 3, 4 };
875 return bits
[psm
& 0xf];
879 * init_luntable() - write an entry in the LUN table
880 * @cfg: Internal structure associated with the host.
881 * @lli: Per adapter LUN information structure.
883 * On successful return, a LUN table entry is created:
884 * - at the top for LUNs visible on multiple ports.
885 * - at the bottom for LUNs visible only on one port.
887 * Return: 0 on success, -errno on failure
889 static int init_luntable(struct cxlflash_cfg
*cfg
, struct llun_info
*lli
)
896 struct device
*dev
= &cfg
->dev
->dev
;
897 __be64 __iomem
*fc_port_luns
;
899 mutex_lock(&global
.mutex
);
904 nports
= get_num_ports(lli
->port_sel
);
905 if (nports
== 0 || nports
> cfg
->num_fc_ports
) {
906 WARN(1, "Unsupported port configuration nports=%u", nports
);
913 * When LUN is visible from multiple ports, we will put
914 * it in the top half of the LUN table.
916 for (k
= 0; k
< cfg
->num_fc_ports
; k
++) {
917 if (!(lli
->port_sel
& (1 << k
)))
920 if (cfg
->promote_lun_index
== cfg
->last_lun_index
[k
]) {
926 lind
= lli
->lun_index
= cfg
->promote_lun_index
;
927 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n", __func__
, lind
);
929 for (k
= 0; k
< cfg
->num_fc_ports
; k
++) {
930 if (!(lli
->port_sel
& (1 << k
)))
933 fc_port_luns
= get_fc_port_luns(cfg
, k
);
934 writeq_be(lli
->lun_id
[k
], &fc_port_luns
[lind
]);
935 dev_dbg(dev
, "\t%d=%llx\n", k
, lli
->lun_id
[k
]);
938 cfg
->promote_lun_index
++;
941 * When LUN is visible only from one port, we will put
942 * it in the bottom half of the LUN table.
944 chan
= PORTMASK2CHAN(lli
->port_sel
);
945 if (cfg
->promote_lun_index
== cfg
->last_lun_index
[chan
]) {
950 lind
= lli
->lun_index
= cfg
->last_lun_index
[chan
];
951 fc_port_luns
= get_fc_port_luns(cfg
, chan
);
952 writeq_be(lli
->lun_id
[chan
], &fc_port_luns
[lind
]);
953 cfg
->last_lun_index
[chan
]--;
954 dev_dbg(dev
, "%s: Virtual LUNs on slot %d:\n\t%d=%llx\n",
955 __func__
, lind
, chan
, lli
->lun_id
[chan
]);
958 lli
->in_table
= true;
960 mutex_unlock(&global
.mutex
);
961 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
966 * cxlflash_disk_virtual_open() - open a virtual disk of specified size
967 * @sdev: SCSI device associated with LUN owning virtual LUN.
968 * @arg: UVirtual ioctl data structure.
970 * On successful return, the user is informed of the resource handle
971 * to be used to identify the virtual LUN and the size (in blocks) of
972 * the virtual LUN in last LBA format. When the size of the virtual LUN
973 * is zero, the last LBA is reflected as -1.
975 * Return: 0 on success, -errno on failure
977 int cxlflash_disk_virtual_open(struct scsi_device
*sdev
, void *arg
)
979 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
980 struct device
*dev
= &cfg
->dev
->dev
;
981 struct llun_info
*lli
= sdev
->hostdata
;
982 struct glun_info
*gli
= lli
->parent
;
984 struct dk_cxlflash_uvirtual
*virt
= (struct dk_cxlflash_uvirtual
*)arg
;
985 struct dk_cxlflash_resize resize
;
987 u64 ctxid
= DECODE_CTXID(virt
->context_id
),
988 rctxid
= virt
->context_id
;
989 u64 lun_size
= virt
->lun_size
;
991 u64 rsrc_handle
= -1;
995 struct ctx_info
*ctxi
= NULL
;
996 struct sisl_rht_entry
*rhte
= NULL
;
998 dev_dbg(dev
, "%s: ctxid=%llu ls=%llu\n", __func__
, ctxid
, lun_size
);
1000 /* Setup the LUNs block allocator on first call */
1001 mutex_lock(&gli
->mutex
);
1002 if (gli
->mode
== MODE_NONE
) {
1003 rc
= init_vlun(lli
);
1005 dev_err(dev
, "%s: init_vlun failed rc=%d\n",
1012 rc
= cxlflash_lun_attach(gli
, MODE_VIRTUAL
, true);
1014 dev_err(dev
, "%s: Failed attach to LUN (VIRTUAL)\n", __func__
);
1017 mutex_unlock(&gli
->mutex
);
1019 rc
= init_luntable(cfg
, lli
);
1021 dev_err(dev
, "%s: init_luntable failed rc=%d\n", __func__
, rc
);
1025 ctxi
= get_context(cfg
, rctxid
, lli
, 0);
1026 if (unlikely(!ctxi
)) {
1027 dev_err(dev
, "%s: Bad context ctxid=%llu\n", __func__
, ctxid
);
1032 rhte
= rhte_checkout(ctxi
, lli
);
1033 if (unlikely(!rhte
)) {
1034 dev_err(dev
, "%s: too many opens ctxid=%llu\n",
1036 rc
= -EMFILE
; /* too many opens */
1040 rsrc_handle
= (rhte
- ctxi
->rht_start
);
1042 /* Populate RHT format 0 */
1043 rhte
->nmask
= MC_RHT_NMASK
;
1044 rhte
->fp
= SISL_RHT_FP(0U, ctxi
->rht_perms
);
1046 /* Resize even if requested size is 0 */
1047 marshal_virt_to_resize(virt
, &resize
);
1048 resize
.rsrc_handle
= rsrc_handle
;
1049 rc
= _cxlflash_vlun_resize(sdev
, ctxi
, &resize
);
1051 dev_err(dev
, "%s: resize failed rc=%d\n", __func__
, rc
);
1054 last_lba
= resize
.last_lba
;
1056 if (virt
->hdr
.flags
& DK_CXLFLASH_UVIRTUAL_NEED_WRITE_SAME
)
1057 ctxi
->rht_needs_ws
[rsrc_handle
] = true;
1059 virt
->hdr
.return_flags
= 0;
1060 virt
->last_lba
= last_lba
;
1061 virt
->rsrc_handle
= rsrc_handle
;
1063 if (get_num_ports(lli
->port_sel
) > 1)
1064 virt
->hdr
.return_flags
|= DK_CXLFLASH_ALL_PORTS_ACTIVE
;
1068 dev_dbg(dev
, "%s: returning handle=%llu rc=%d llba=%llu\n",
1069 __func__
, rsrc_handle
, rc
, last_lba
);
1073 rhte_checkin(ctxi
, rhte
);
1075 cxlflash_lun_detach(gli
);
1078 /* Special common cleanup prior to successful LUN attach */
1079 cxlflash_ba_terminate(&gli
->blka
.ba_lun
);
1080 mutex_unlock(&gli
->mutex
);
1085 * clone_lxt() - copies translation tables from source to destination RHTE
1086 * @afu: AFU associated with the host.
1087 * @blka: Block allocator associated with LUN.
1088 * @ctxid: Context ID of context owning the RHTE.
1089 * @rhndl: Resource handle associated with the RHTE.
1090 * @rhte: Destination resource handle entry (RHTE).
1091 * @rhte_src: Source resource handle entry (RHTE).
1093 * Return: 0 on success, -errno on failure
1095 static int clone_lxt(struct afu
*afu
,
1099 struct sisl_rht_entry
*rhte
,
1100 struct sisl_rht_entry
*rhte_src
)
1102 struct cxlflash_cfg
*cfg
= afu
->parent
;
1103 struct device
*dev
= &cfg
->dev
->dev
;
1104 struct sisl_lxt_entry
*lxt
= NULL
;
1105 bool locked
= false;
1107 u64 aun
; /* chunk# allocated by block allocator */
1112 ngrps
= LXT_NUM_GROUPS(rhte_src
->lxt_cnt
);
1115 /* allocate new LXTs for clone */
1116 lxt
= kzalloc((sizeof(*lxt
) * LXT_GROUP_SIZE
* ngrps
),
1118 if (unlikely(!lxt
)) {
1124 memcpy(lxt
, rhte_src
->lxt_start
,
1125 (sizeof(*lxt
) * rhte_src
->lxt_cnt
));
1127 /* clone the LBAs in block allocator via ref_cnt, note that the
1128 * block allocator mutex must be held until it is established
1129 * that this routine will complete without the need for a
1132 mutex_lock(&blka
->mutex
);
1134 for (i
= 0; i
< rhte_src
->lxt_cnt
; i
++) {
1135 aun
= (lxt
[i
].rlba_base
>> MC_CHUNK_SHIFT
);
1136 if (ba_clone(&blka
->ba_lun
, aun
) == -1ULL) {
1144 * The following sequence is prescribed in the SISlite spec
1145 * for syncing up with the AFU when adding LXT entries.
1147 dma_wmb(); /* Make LXT updates are visible */
1149 rhte
->lxt_start
= lxt
;
1150 dma_wmb(); /* Make RHT entry's LXT table update visible */
1152 rhte
->lxt_cnt
= rhte_src
->lxt_cnt
;
1153 dma_wmb(); /* Make RHT entry's LXT table size update visible */
1155 rc
= cxlflash_afu_sync(afu
, ctxid
, rhndl
, AFU_LW_SYNC
);
1163 mutex_unlock(&blka
->mutex
);
1164 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
1167 /* Reset the RHTE */
1170 rhte
->lxt_start
= NULL
;
1173 /* free the clones already made */
1174 for (j
= 0; j
< i
; j
++) {
1175 aun
= (lxt
[j
].rlba_base
>> MC_CHUNK_SHIFT
);
1176 ba_free(&blka
->ba_lun
, aun
);
1183 * cxlflash_disk_clone() - clone a context by making snapshot of another
1184 * @sdev: SCSI device associated with LUN owning virtual LUN.
1185 * @clone: Clone ioctl data structure.
1187 * This routine effectively performs cxlflash_disk_open operation for each
1188 * in-use virtual resource in the source context. Note that the destination
1189 * context must be in pristine state and cannot have any resource handles
1190 * open at the time of the clone.
1192 * Return: 0 on success, -errno on failure
1194 int cxlflash_disk_clone(struct scsi_device
*sdev
,
1195 struct dk_cxlflash_clone
*clone
)
1197 struct cxlflash_cfg
*cfg
= shost_priv(sdev
->host
);
1198 struct device
*dev
= &cfg
->dev
->dev
;
1199 struct llun_info
*lli
= sdev
->hostdata
;
1200 struct glun_info
*gli
= lli
->parent
;
1201 struct blka
*blka
= &gli
->blka
;
1202 struct afu
*afu
= cfg
->afu
;
1203 struct dk_cxlflash_release release
= { { 0 }, 0 };
1205 struct ctx_info
*ctxi_src
= NULL
,
1207 struct lun_access
*lun_access_src
, *lun_access_dst
;
1209 u64 ctxid_src
= DECODE_CTXID(clone
->context_id_src
),
1210 ctxid_dst
= DECODE_CTXID(clone
->context_id_dst
),
1211 rctxid_src
= clone
->context_id_src
,
1212 rctxid_dst
= clone
->context_id_dst
;
1218 dev_dbg(dev
, "%s: ctxid_src=%llu ctxid_dst=%llu\n",
1219 __func__
, ctxid_src
, ctxid_dst
);
1221 /* Do not clone yourself */
1222 if (unlikely(rctxid_src
== rctxid_dst
)) {
1227 if (unlikely(gli
->mode
!= MODE_VIRTUAL
)) {
1229 dev_dbg(dev
, "%s: Only supported on virtual LUNs mode=%u\n",
1230 __func__
, gli
->mode
);
1234 ctxi_src
= get_context(cfg
, rctxid_src
, lli
, CTX_CTRL_CLONE
);
1235 ctxi_dst
= get_context(cfg
, rctxid_dst
, lli
, 0);
1236 if (unlikely(!ctxi_src
|| !ctxi_dst
)) {
1237 dev_dbg(dev
, "%s: Bad context ctxid_src=%llu ctxid_dst=%llu\n",
1238 __func__
, ctxid_src
, ctxid_dst
);
1243 /* Verify there is no open resource handle in the destination context */
1244 for (i
= 0; i
< MAX_RHT_PER_CONTEXT
; i
++)
1245 if (ctxi_dst
->rht_start
[i
].nmask
!= 0) {
1250 /* Clone LUN access list */
1251 list_for_each_entry(lun_access_src
, &ctxi_src
->luns
, list
) {
1253 list_for_each_entry(lun_access_dst
, &ctxi_dst
->luns
, list
)
1254 if (lun_access_dst
->sdev
== lun_access_src
->sdev
) {
1260 lun_access_dst
= kzalloc(sizeof(*lun_access_dst
),
1262 if (unlikely(!lun_access_dst
)) {
1263 dev_err(dev
, "%s: lun_access allocation fail\n",
1269 *lun_access_dst
= *lun_access_src
;
1270 list_add(&lun_access_dst
->list
, &sidecar
);
1274 if (unlikely(!ctxi_src
->rht_out
)) {
1275 dev_dbg(dev
, "%s: Nothing to clone\n", __func__
);
1279 /* User specified permission on attach */
1280 perms
= ctxi_dst
->rht_perms
;
1283 * Copy over checked-out RHT (and their associated LXT) entries by
1284 * hand, stopping after we've copied all outstanding entries and
1285 * cleaning up if the clone fails.
1287 * Note: This loop is equivalent to performing cxlflash_disk_open and
1288 * cxlflash_vlun_resize. As such, LUN accounting needs to be taken into
1289 * account by attaching after each successful RHT entry clone. In the
1290 * event that a clone failure is experienced, the LUN detach is handled
1291 * via the cleanup performed by _cxlflash_disk_release.
1293 for (i
= 0; i
< MAX_RHT_PER_CONTEXT
; i
++) {
1294 if (ctxi_src
->rht_out
== ctxi_dst
->rht_out
)
1296 if (ctxi_src
->rht_start
[i
].nmask
== 0)
1299 /* Consume a destination RHT entry */
1300 ctxi_dst
->rht_out
++;
1301 ctxi_dst
->rht_start
[i
].nmask
= ctxi_src
->rht_start
[i
].nmask
;
1302 ctxi_dst
->rht_start
[i
].fp
=
1303 SISL_RHT_FP_CLONE(ctxi_src
->rht_start
[i
].fp
, perms
);
1304 ctxi_dst
->rht_lun
[i
] = ctxi_src
->rht_lun
[i
];
1306 rc
= clone_lxt(afu
, blka
, ctxid_dst
, i
,
1307 &ctxi_dst
->rht_start
[i
],
1308 &ctxi_src
->rht_start
[i
]);
1310 marshal_clone_to_rele(clone
, &release
);
1311 for (j
= 0; j
< i
; j
++) {
1312 release
.rsrc_handle
= j
;
1313 _cxlflash_disk_release(sdev
, ctxi_dst
,
1317 /* Put back the one we failed on */
1318 rhte_checkin(ctxi_dst
, &ctxi_dst
->rht_start
[i
]);
1322 cxlflash_lun_attach(gli
, gli
->mode
, false);
1326 list_splice(&sidecar
, &ctxi_dst
->luns
);
1331 put_context(ctxi_src
);
1333 put_context(ctxi_dst
);
1334 dev_dbg(dev
, "%s: returning rc=%d\n", __func__
, rc
);
1338 list_for_each_entry_safe(lun_access_src
, lun_access_dst
, &sidecar
, list
)
1339 kfree(lun_access_src
);