1 // SPDX-License-Identifier: GPL-2.0-only
3 * SCSI Zoned Block commands
5 * Copyright (C) 2014-2015 SUSE Linux GmbH
6 * Written by: Hannes Reinecke <hare@suse.de>
7 * Modified by: Damien Le Moal <damien.lemoal@hgst.com>
8 * Modified by: Shaun Tancheff <shaun.tancheff@seagate.com>
11 #include <linux/blkdev.h>
12 #include <linux/vmalloc.h>
13 #include <linux/sched/mm.h>
14 #include <linux/mutex.h>
16 #include <linux/unaligned.h>
18 #include <scsi/scsi.h>
19 #include <scsi/scsi_cmnd.h>
23 #define CREATE_TRACE_POINTS
26 /* Whether or not a SCSI zone descriptor describes a gap zone. */
27 static bool sd_zbc_is_gap_zone(const u8 buf
[64])
29 return (buf
[0] & 0xf) == ZBC_ZONE_TYPE_GAP
;
33 * sd_zbc_parse_report - Parse a SCSI zone descriptor
34 * @sdkp: SCSI disk pointer.
35 * @buf: SCSI zone descriptor.
36 * @idx: Index of the zone relative to the first zone reported by the current
37 * sd_zbc_report_zones() call.
38 * @cb: Callback function pointer.
39 * @data: Second argument passed to @cb.
41 * Return: Value returned by @cb.
43 * Convert a SCSI zone descriptor into struct blk_zone format. Additionally,
44 * call @cb(blk_zone, @data).
46 static int sd_zbc_parse_report(struct scsi_disk
*sdkp
, const u8 buf
[64],
47 unsigned int idx
, report_zones_cb cb
, void *data
)
49 struct scsi_device
*sdp
= sdkp
->device
;
50 struct blk_zone zone
= { 0 };
51 sector_t start_lba
, gran
;
54 if (WARN_ON_ONCE(sd_zbc_is_gap_zone(buf
)))
57 zone
.type
= buf
[0] & 0x0f;
58 zone
.cond
= (buf
[1] >> 4) & 0xf;
64 start_lba
= get_unaligned_be64(&buf
[16]);
65 zone
.start
= logical_to_sectors(sdp
, start_lba
);
66 zone
.capacity
= logical_to_sectors(sdp
, get_unaligned_be64(&buf
[8]));
67 zone
.len
= zone
.capacity
;
68 if (sdkp
->zone_starting_lba_gran
) {
69 gran
= logical_to_sectors(sdp
, sdkp
->zone_starting_lba_gran
);
70 if (zone
.len
> gran
) {
71 sd_printk(KERN_ERR
, sdkp
,
72 "Invalid zone at LBA %llu with capacity %llu and length %llu; granularity = %llu\n",
74 sectors_to_logical(sdp
, zone
.capacity
),
75 sectors_to_logical(sdp
, zone
.len
),
76 sectors_to_logical(sdp
, gran
));
80 * Use the starting LBA granularity instead of the zone length
81 * obtained from the REPORT ZONES command.
85 if (zone
.cond
== ZBC_ZONE_COND_FULL
)
86 zone
.wp
= zone
.start
+ zone
.len
;
88 zone
.wp
= logical_to_sectors(sdp
, get_unaligned_be64(&buf
[24]));
90 ret
= cb(&zone
, idx
, data
);
98 * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command.
99 * @sdkp: The target disk
100 * @buf: vmalloc-ed buffer to use for the reply
101 * @buflen: the buffer size
102 * @lba: Start LBA of the report
103 * @partial: Do partial report
105 * For internal use during device validation.
106 * Using partial=true can significantly speed up execution of a report zones
107 * command because the disk does not have to count all possible report matching
108 * zones and will only report the count of zones fitting in the command reply
111 static int sd_zbc_do_report_zones(struct scsi_disk
*sdkp
, unsigned char *buf
,
112 unsigned int buflen
, sector_t lba
,
115 struct scsi_device
*sdp
= sdkp
->device
;
116 const int timeout
= sdp
->request_queue
->rq_timeout
;
117 struct scsi_sense_hdr sshdr
;
118 const struct scsi_exec_args exec_args
= {
121 unsigned char cmd
[16];
122 unsigned int rep_len
;
127 cmd
[1] = ZI_REPORT_ZONES
;
128 put_unaligned_be64(lba
, &cmd
[2]);
129 put_unaligned_be32(buflen
, &cmd
[10]);
131 cmd
[14] = ZBC_REPORT_ZONE_PARTIAL
;
133 result
= scsi_execute_cmd(sdp
, cmd
, REQ_OP_DRV_IN
, buf
, buflen
,
134 timeout
, SD_MAX_RETRIES
, &exec_args
);
136 sd_printk(KERN_ERR
, sdkp
,
137 "REPORT ZONES start lba %llu failed\n", lba
);
138 sd_print_result(sdkp
, "REPORT ZONES", result
);
139 if (result
> 0 && scsi_sense_valid(&sshdr
))
140 sd_print_sense_hdr(sdkp
, &sshdr
);
144 rep_len
= get_unaligned_be32(&buf
[0]);
146 sd_printk(KERN_ERR
, sdkp
,
147 "REPORT ZONES report invalid length %u\n",
156 * sd_zbc_alloc_report_buffer() - Allocate a buffer for report zones reply.
157 * @sdkp: The target disk
158 * @nr_zones: Maximum number of zones to report
159 * @buflen: Size of the buffer allocated
161 * Try to allocate a reply buffer for the number of requested zones.
162 * The size of the buffer allocated may be smaller than requested to
163 * satify the device constraint (max_hw_sectors, max_segments, etc).
165 * Return the address of the allocated buffer and update @buflen with
166 * the size of the allocated buffer.
168 static void *sd_zbc_alloc_report_buffer(struct scsi_disk
*sdkp
,
169 unsigned int nr_zones
, size_t *buflen
)
171 struct request_queue
*q
= sdkp
->disk
->queue
;
176 * Report zone buffer size should be at most 64B times the number of
177 * zones requested plus the 64B reply header, but should be aligned
178 * to SECTOR_SIZE for ATA devices.
179 * Make sure that this size does not exceed the hardware capabilities.
180 * Furthermore, since the report zone command cannot be split, make
181 * sure that the allocated buffer can always be mapped by limiting the
182 * number of pages allocated to the HBA max segments limit.
184 nr_zones
= min(nr_zones
, sdkp
->zone_info
.nr_zones
);
185 bufsize
= roundup((nr_zones
+ 1) * 64, SECTOR_SIZE
);
186 bufsize
= min_t(size_t, bufsize
,
187 queue_max_hw_sectors(q
) << SECTOR_SHIFT
);
188 bufsize
= min_t(size_t, bufsize
, queue_max_segments(q
) << PAGE_SHIFT
);
190 while (bufsize
>= SECTOR_SIZE
) {
191 buf
= kvzalloc(bufsize
, GFP_KERNEL
| __GFP_NORETRY
);
196 bufsize
= rounddown(bufsize
>> 1, SECTOR_SIZE
);
203 * sd_zbc_zone_sectors - Get the device zone size in number of 512B sectors.
204 * @sdkp: The target disk
206 static inline sector_t
sd_zbc_zone_sectors(struct scsi_disk
*sdkp
)
208 return logical_to_sectors(sdkp
->device
, sdkp
->zone_info
.zone_blocks
);
212 * sd_zbc_report_zones - SCSI .report_zones() callback.
213 * @disk: Disk to report zones for.
214 * @sector: Start sector.
215 * @nr_zones: Maximum number of zones to report.
216 * @cb: Callback function called to report zone information.
217 * @data: Second argument passed to @cb.
219 * Called by the block layer to iterate over zone information. See also the
220 * disk->fops->report_zones() calls in block/blk-zoned.c.
222 int sd_zbc_report_zones(struct gendisk
*disk
, sector_t sector
,
223 unsigned int nr_zones
, report_zones_cb cb
, void *data
)
225 struct scsi_disk
*sdkp
= scsi_disk(disk
);
226 sector_t lba
= sectors_to_logical(sdkp
->device
, sector
);
229 u64 zone_length
, start_lba
;
230 size_t offset
, buflen
= 0;
234 if (sdkp
->device
->type
!= TYPE_ZBC
)
235 /* Not a zoned device */
239 /* Device gone or invalid */
242 buf
= sd_zbc_alloc_report_buffer(sdkp
, nr_zones
, &buflen
);
246 while (zone_idx
< nr_zones
&& lba
< sdkp
->capacity
) {
247 ret
= sd_zbc_do_report_zones(sdkp
, buf
, buflen
, lba
, true);
252 nr
= min(nr_zones
, get_unaligned_be32(&buf
[0]) / 64);
256 for (i
= 0; i
< nr
&& zone_idx
< nr_zones
; i
++) {
258 start_lba
= get_unaligned_be64(&buf
[offset
+ 16]);
259 zone_length
= get_unaligned_be64(&buf
[offset
+ 8]);
260 if ((zone_idx
== 0 &&
262 lba
>= start_lba
+ zone_length
)) ||
263 (zone_idx
> 0 && start_lba
!= lba
) ||
264 start_lba
+ zone_length
< start_lba
) {
265 sd_printk(KERN_ERR
, sdkp
,
266 "Zone %d at LBA %llu is invalid: %llu + %llu\n",
267 zone_idx
, lba
, start_lba
, zone_length
);
271 lba
= start_lba
+ zone_length
;
272 if (sd_zbc_is_gap_zone(&buf
[offset
])) {
273 if (sdkp
->zone_starting_lba_gran
)
275 sd_printk(KERN_ERR
, sdkp
,
276 "Gap zone without constant LBA offsets\n");
281 ret
= sd_zbc_parse_report(sdkp
, buf
+ offset
, zone_idx
,
296 static blk_status_t
sd_zbc_cmnd_checks(struct scsi_cmnd
*cmd
)
298 struct request
*rq
= scsi_cmd_to_rq(cmd
);
299 struct scsi_disk
*sdkp
= scsi_disk(rq
->q
->disk
);
300 sector_t sector
= blk_rq_pos(rq
);
302 if (sdkp
->device
->type
!= TYPE_ZBC
)
303 /* Not a zoned device */
304 return BLK_STS_IOERR
;
306 if (sdkp
->device
->changed
)
307 return BLK_STS_IOERR
;
309 if (sector
& (sd_zbc_zone_sectors(sdkp
) - 1))
310 /* Unaligned request */
311 return BLK_STS_IOERR
;
317 * sd_zbc_setup_zone_mgmt_cmnd - Prepare a zone ZBC_OUT command. The operations
318 * can be RESET WRITE POINTER, OPEN, CLOSE or FINISH.
319 * @cmd: the command to setup
320 * @op: Operation to be performed
321 * @all: All zones control
323 * Called from sd_init_command() for REQ_OP_ZONE_RESET, REQ_OP_ZONE_RESET_ALL,
324 * REQ_OP_ZONE_OPEN, REQ_OP_ZONE_CLOSE or REQ_OP_ZONE_FINISH requests.
326 blk_status_t
sd_zbc_setup_zone_mgmt_cmnd(struct scsi_cmnd
*cmd
,
327 unsigned char op
, bool all
)
329 struct request
*rq
= scsi_cmd_to_rq(cmd
);
330 sector_t sector
= blk_rq_pos(rq
);
331 struct scsi_disk
*sdkp
= scsi_disk(rq
->q
->disk
);
332 sector_t block
= sectors_to_logical(sdkp
->device
, sector
);
335 ret
= sd_zbc_cmnd_checks(cmd
);
336 if (ret
!= BLK_STS_OK
)
340 memset(cmd
->cmnd
, 0, cmd
->cmd_len
);
341 cmd
->cmnd
[0] = ZBC_OUT
;
346 put_unaligned_be64(block
, &cmd
->cmnd
[2]);
348 rq
->timeout
= SD_TIMEOUT
;
349 cmd
->sc_data_direction
= DMA_NONE
;
350 cmd
->transfersize
= 0;
357 * sd_zbc_complete - ZBC command post processing.
358 * @cmd: Completed command
359 * @good_bytes: Command reply bytes
360 * @sshdr: command sense header
362 * Called from sd_done() to handle zone commands errors and updates to the
363 * device queue zone write pointer offset cahce.
365 unsigned int sd_zbc_complete(struct scsi_cmnd
*cmd
, unsigned int good_bytes
,
366 struct scsi_sense_hdr
*sshdr
)
368 int result
= cmd
->result
;
369 struct request
*rq
= scsi_cmd_to_rq(cmd
);
371 if (op_is_zone_mgmt(req_op(rq
)) &&
373 sshdr
->sense_key
== ILLEGAL_REQUEST
&&
374 sshdr
->asc
== 0x24) {
376 * INVALID FIELD IN CDB error: a zone management command was
377 * attempted on a conventional zone. Nothing to worry about,
378 * so be quiet about the error.
380 rq
->rq_flags
|= RQF_QUIET
;
387 * sd_zbc_check_zoned_characteristics - Check zoned block device characteristics
389 * @buf: Buffer where to store the VPD page data
391 * Read VPD page B6, get information and check that reads are unconstrained.
393 static int sd_zbc_check_zoned_characteristics(struct scsi_disk
*sdkp
,
396 u64 zone_starting_lba_gran
;
398 if (scsi_get_vpd_page(sdkp
->device
, 0xb6, buf
, 64)) {
399 sd_printk(KERN_NOTICE
, sdkp
,
400 "Read zoned characteristics VPD page failed\n");
404 if (sdkp
->device
->type
!= TYPE_ZBC
) {
407 sdkp
->zones_optimal_open
= get_unaligned_be32(&buf
[8]);
408 sdkp
->zones_optimal_nonseq
= get_unaligned_be32(&buf
[12]);
409 sdkp
->zones_max_open
= 0;
414 sdkp
->urswrz
= buf
[4] & 1;
415 sdkp
->zones_optimal_open
= 0;
416 sdkp
->zones_optimal_nonseq
= 0;
417 sdkp
->zones_max_open
= get_unaligned_be32(&buf
[16]);
418 /* Check zone alignment method */
419 switch (buf
[23] & 0xf) {
421 case ZBC_CONSTANT_ZONE_LENGTH
:
422 /* Use zone length */
424 case ZBC_CONSTANT_ZONE_START_OFFSET
:
425 zone_starting_lba_gran
= get_unaligned_be64(&buf
[24]);
426 if (zone_starting_lba_gran
== 0 ||
427 !is_power_of_2(zone_starting_lba_gran
) ||
428 logical_to_sectors(sdkp
->device
, zone_starting_lba_gran
) >
430 sd_printk(KERN_ERR
, sdkp
,
431 "Invalid zone starting LBA granularity %llu\n",
432 zone_starting_lba_gran
);
435 sdkp
->zone_starting_lba_gran
= zone_starting_lba_gran
;
438 sd_printk(KERN_ERR
, sdkp
, "Invalid zone alignment method\n");
443 * Check for unconstrained reads: host-managed devices with
444 * constrained reads (drives failing read after write pointer)
448 if (sdkp
->first_scan
)
449 sd_printk(KERN_NOTICE
, sdkp
,
450 "constrained reads devices are not supported\n");
458 * sd_zbc_check_capacity - Check the device capacity
460 * @buf: command buffer
461 * @zblocks: zone size in logical blocks
463 * Get the device zone size and check that the device capacity as reported
464 * by READ CAPACITY matches the max_lba value (plus one) of the report zones
465 * command reply for devices with RC_BASIS == 0.
467 * Returns 0 upon success or an error code upon failure.
469 static int sd_zbc_check_capacity(struct scsi_disk
*sdkp
, unsigned char *buf
,
477 /* Do a report zone to get max_lba and the size of the first zone */
478 ret
= sd_zbc_do_report_zones(sdkp
, buf
, SD_BUF_SIZE
, 0, false);
482 if (sdkp
->rc_basis
== 0) {
483 /* The max_lba field is the capacity of this device */
484 max_lba
= get_unaligned_be64(&buf
[8]);
485 if (sdkp
->capacity
!= max_lba
+ 1) {
486 if (sdkp
->first_scan
)
487 sd_printk(KERN_WARNING
, sdkp
,
488 "Changing capacity from %llu to max LBA+1 %llu\n",
489 (unsigned long long)sdkp
->capacity
,
490 (unsigned long long)max_lba
+ 1);
491 sdkp
->capacity
= max_lba
+ 1;
495 if (sdkp
->zone_starting_lba_gran
== 0) {
496 /* Get the size of the first reported zone */
498 zone_blocks
= get_unaligned_be64(&rec
[8]);
499 if (logical_to_sectors(sdkp
->device
, zone_blocks
) > UINT_MAX
) {
500 if (sdkp
->first_scan
)
501 sd_printk(KERN_NOTICE
, sdkp
,
502 "Zone size too large\n");
506 zone_blocks
= sdkp
->zone_starting_lba_gran
;
509 if (!is_power_of_2(zone_blocks
)) {
510 sd_printk(KERN_ERR
, sdkp
,
511 "Zone size %llu is not a power of two.\n",
516 *zblocks
= zone_blocks
;
521 static void sd_zbc_print_zones(struct scsi_disk
*sdkp
)
523 if (sdkp
->device
->type
!= TYPE_ZBC
|| !sdkp
->capacity
)
526 if (sdkp
->capacity
& (sdkp
->zone_info
.zone_blocks
- 1))
527 sd_printk(KERN_NOTICE
, sdkp
,
528 "%u zones of %u logical blocks + 1 runt zone\n",
529 sdkp
->zone_info
.nr_zones
- 1,
530 sdkp
->zone_info
.zone_blocks
);
532 sd_printk(KERN_NOTICE
, sdkp
,
533 "%u zones of %u logical blocks\n",
534 sdkp
->zone_info
.nr_zones
,
535 sdkp
->zone_info
.zone_blocks
);
539 * Call blk_revalidate_disk_zones() if any of the zoned disk properties have
540 * changed that make it necessary to call that function. Called by
541 * sd_revalidate_disk() after the gendisk capacity has been set.
543 int sd_zbc_revalidate_zones(struct scsi_disk
*sdkp
)
545 struct gendisk
*disk
= sdkp
->disk
;
546 struct request_queue
*q
= disk
->queue
;
547 u32 zone_blocks
= sdkp
->early_zone_info
.zone_blocks
;
548 unsigned int nr_zones
= sdkp
->early_zone_info
.nr_zones
;
553 * There is nothing to do for regular disks, including host-aware disks
554 * that have partitions.
556 if (!blk_queue_is_zoned(q
))
559 if (sdkp
->zone_info
.zone_blocks
== zone_blocks
&&
560 sdkp
->zone_info
.nr_zones
== nr_zones
&&
561 disk
->nr_zones
== nr_zones
)
564 sdkp
->zone_info
.zone_blocks
= zone_blocks
;
565 sdkp
->zone_info
.nr_zones
= nr_zones
;
567 flags
= memalloc_noio_save();
568 ret
= blk_revalidate_disk_zones(disk
);
569 memalloc_noio_restore(flags
);
571 sdkp
->zone_info
= (struct zoned_disk_info
){ };
576 sd_zbc_print_zones(sdkp
);
582 * sd_zbc_read_zones - Read zone information and update the request queue
583 * @sdkp: SCSI disk pointer.
584 * @lim: queue limits to read into
585 * @buf: 512 byte buffer used for storing SCSI command output.
587 * Read zone information and update the request queue zone characteristics and
588 * also the zoned device information in *sdkp. Called by sd_revalidate_disk()
589 * before the gendisk capacity has been set.
591 int sd_zbc_read_zones(struct scsi_disk
*sdkp
, struct queue_limits
*lim
,
594 unsigned int nr_zones
;
598 if (sdkp
->device
->type
!= TYPE_ZBC
)
601 lim
->features
|= BLK_FEAT_ZONED
;
604 * Per ZBC and ZAC specifications, writes in sequential write required
605 * zones of host-managed devices must be aligned to the device physical
608 lim
->zone_write_granularity
= sdkp
->physical_block_size
;
610 /* READ16/WRITE16/SYNC16 is mandatory for ZBC devices */
611 sdkp
->device
->use_16_for_rw
= 1;
612 sdkp
->device
->use_10_for_rw
= 0;
613 sdkp
->device
->use_16_for_sync
= 1;
615 /* Check zoned block device characteristics (unconstrained reads) */
616 ret
= sd_zbc_check_zoned_characteristics(sdkp
, buf
);
620 /* Check the device capacity reported by report zones */
621 ret
= sd_zbc_check_capacity(sdkp
, buf
, &zone_blocks
);
625 nr_zones
= round_up(sdkp
->capacity
, zone_blocks
) >> ilog2(zone_blocks
);
626 sdkp
->early_zone_info
.nr_zones
= nr_zones
;
627 sdkp
->early_zone_info
.zone_blocks
= zone_blocks
;
629 /* The drive satisfies the kernel restrictions: set it up */
630 if (sdkp
->zones_max_open
== U32_MAX
)
631 lim
->max_open_zones
= 0;
633 lim
->max_open_zones
= sdkp
->zones_max_open
;
634 lim
->max_active_zones
= 0;
635 lim
->chunk_sectors
= logical_to_sectors(sdkp
->device
, zone_blocks
);