1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek
2 * - Multithreading support by Arne Welzel
3 * - Native Command Queuing support by Raja Appuswamy
6 * This driver is based on the following specifications:
7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3
8 * - Serial ATA Revision 2.6
9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7)
10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070)
12 * The driver supports device hot-plug, active device status tracking,
13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes,
14 * sector-unaligned reads, native command queuing and parallel requests to
17 * It does not implement transparent failure recovery, power management, or
18 * port multiplier support.
21 * An AHCI controller exposes a number of ports (up to 32), each of which may
22 * or may not have one device attached (port multipliers are not supported).
23 * Each port is maintained independently.
25 * The following figure depicts the possible transitions between port states.
26 * The NO_PORT state is not included; no transitions can be made from or to it.
28 * +----------+ +----------+
29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+
30 * +----------+ | | +----------+ |
33 * +----------+ +----------+ +----------+ +----------+ |
34 * | NO_DEV | --> | WAIT_DEV | --> | WAIT_ID | --> | GOOD_DEV | |
35 * +----------+ +----------+ +----------+ +----------+ |
37 * +----------------+----------------+----------------+--------+
39 * At driver startup, all physically present ports are put in SPIN_UP state.
40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred
41 * until either the spin-up timer expires, or a device has been identified on
42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at
43 * startup time if the device has not yet been able to announce its presence.
45 * If a device is detected, either at startup time or after hot-plug, its
46 * signature is checked and it is identified, after which it may be determined
47 * to be a usable ("good") device, which means that the device is considered to
48 * be in a working state. If these steps fail, the device is marked as unusable
49 * ("bad"). At any point in time, the device may be disconnected; the port is
50 * then put back into NO_DEV state.
52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA
53 * devices are assumed to be fixed; all ATAPI devices are assumed to have
54 * removable media. To prevent erroneous access to switched devices and media,
55 * the driver makes devices inaccessible until they are fully closed (the open
56 * count is zero) when a device (hot-plug) or medium change is detected.
57 * For hot-plug changes, access is prevented by setting the BARRIER flag until
58 * the device is fully closed and then reopened. For medium changes, access is
59 * prevented by not acknowledging the medium change until the device is fully
60 * closed and reopened. Removable media are not locked in the drive while
61 * opened, because the driver author is uncomfortable with that concept.
63 * Ports may leave the group of states where a device is connected (that is,
64 * WAIT_ID, GOOD_DEV, and BAD_DEV) in two ways: either due to a hot-unplug
65 * event, or due to a hard reset after a serious failure. For simplicity, we
66 * we perform a hard reset after a hot-unplug event as well, so that the link
67 * to the device is broken. Thus, in both cases, a transition to NO_DEV is
68 * made, after which the link to the device may or may not be reestablished.
69 * In both cases, ongoing requests are cancelled and the BARRIER flag is set.
71 * The following table lists for each state, whether the port is started
72 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be
73 * set to, and what BDEV_OPEN calls on this port should return.
75 * State Started Timer PxIE BDEV_OPEN
76 * --------- --------- --------- --------- ---------
77 * NO_PORT no no (none) ENXIO
78 * SPIN_UP no yes PCE (wait)
79 * NO_DEV no no PCE ENXIO
80 * WAIT_DEV no yes PCE (wait)
81 * BAD_DEV no no PRCE ENXIO
82 * WAIT_ID yes yes PRCE+ (wait)
83 * GOOD_DEV yes per-command PRCE+ OK
85 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset
86 * when changing from SPIN_UP to any state but WAIT_DEV, and when changing from
87 * WAIT_DEV to any state but WAIT_ID, and when changing from WAIT_ID to any
91 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set
92 * to 4MB. This limit has been chosen for a number of reasons:
93 * - The size that can be specified in a Physical Region Descriptor (PRD) is
94 * limited to 4MB for AHCI. Limiting the total transfer size to at most this
95 * size implies that no I/O vector element needs to be split up across PRDs.
96 * This means that the maximum number of needed PRDs can be predetermined.
97 * - The limit is below what can be transferred in a single ATA request, namely
98 * 64k sectors (i.e., at least 32MB). This means that transfer requests need
99 * never be split up into smaller chunks, reducing implementation complexity.
100 * - A single, static timeout can be used for transfers. Very large transfers
101 * can legitimately take up to several minutes -- well beyond the appropriate
102 * timeout range for small transfers. The limit obviates the need for a
103 * timeout scheme that takes into account the transfer size.
104 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious
105 * clients to keep the driver busy for a long time with a single request.
106 * - The limit is high enough for all practical purposes. The transfer setup
107 * overhead is already relatively negligible at this size, and even larger
108 * requests will not help maximize throughput. As NR_IOREQS is currently set
109 * to 64, the limit still allows file systems to perform I/O requests with
110 * vectors completely filled with 64KB-blocks.
112 #include <minix/drivers.h>
113 #include <minix/blockdriver_mt.h>
114 #include <minix/drvlib.h>
115 #include <machine/pci.h>
116 #include <sys/ioc_disk.h>
117 #include <sys/mman.h>
122 /* Host Bus Adapter (HBA) state. */
124 volatile u32_t
*base
; /* base address of memory-mapped registers */
125 size_t size
; /* size of memory-mapped register area */
127 int nr_ports
; /* addressable number of ports (1..NR_PORTS) */
128 int nr_cmds
; /* maximum number of commands per port */
129 int has_ncq
; /* NCQ support flag */
130 int has_clo
; /* CLO support flag */
132 int irq
; /* IRQ number */
133 int hook_id
; /* IRQ hook ID */
136 #define hba_read(r) (hba_state.base[r])
137 #define hba_write(r, v) (hba_state.base[r] = (v))
140 static struct port_state
{
141 int state
; /* port state */
142 unsigned int flags
; /* port flags */
144 volatile u32_t
*reg
; /* memory-mapped port registers */
146 u8_t
*mem_base
; /* primary memory buffer virtual address */
147 phys_bytes mem_phys
; /* primary memory buffer physical address */
148 vir_bytes mem_size
; /* primary memory buffer size */
150 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */
151 u32_t
*fis_base
; /* FIS receive buffer virtual address */
152 phys_bytes fis_phys
; /* FIS receive buffer physical address */
153 u32_t
*cl_base
; /* command list buffer virtual address */
154 phys_bytes cl_phys
; /* command list buffer physical address */
155 u8_t
*ct_base
[NR_CMDS
]; /* command table virtual address */
156 phys_bytes ct_phys
[NR_CMDS
]; /* command table physical address */
157 u8_t
*tmp_base
; /* temporary storage buffer virtual address */
158 phys_bytes tmp_phys
; /* temporary storage buffer physical address */
160 u8_t
*pad_base
; /* sector padding buffer virtual address */
161 phys_bytes pad_phys
; /* sector padding buffer physical address */
162 vir_bytes pad_size
; /* sector padding buffer size */
164 u64_t lba_count
; /* number of valid Logical Block Addresses */
165 u32_t sector_size
; /* medium sector size in bytes */
167 int open_count
; /* number of times this port is opened */
169 int device
; /* associated device number, or NO_DEVICE */
170 struct device part
[DEV_PER_DRIVE
]; /* partition bases and sizes */
171 struct device subpart
[SUB_PER_DRIVE
]; /* same for subpartitions */
173 minix_timer_t timer
; /* port-specific timeout timer */
174 int left
; /* number of tries left before giving up */
175 /* (only used for signature probing) */
177 int queue_depth
; /* NCQ queue depth */
178 u32_t pend_mask
; /* commands not yet complete */
180 thread_id_t tid
;/* ID of the worker thread */
181 minix_timer_t timer
; /* timer associated with each request */
182 int result
; /* success/failure result of the commands */
184 } port_state
[NR_PORTS
];
186 #define port_read(ps, r) ((ps)->reg[r])
187 #define port_write(ps, r, v) ((ps)->reg[r] = (v))
189 static int ahci_instance
; /* driver instance number */
191 static int ahci_verbose
; /* verbosity level (0..4) */
193 /* Timeout-related values. */
194 static clock_t ahci_spinup_timeout
;
195 static clock_t ahci_device_timeout
;
196 static clock_t ahci_device_delay
;
197 static unsigned int ahci_device_checks
;
198 static clock_t ahci_command_timeout
;
199 static clock_t ahci_transfer_timeout
;
200 static clock_t ahci_flush_timeout
;
202 /* Timeout environment variable names and default values. */
204 char *name
; /* environment variable name */
205 u32_t default_ms
; /* default in milliseconds */
206 clock_t *ptr
; /* clock ticks value pointer */
208 { "ahci_init_timeout", SPINUP_TIMEOUT
, &ahci_spinup_timeout
},
209 { "ahci_device_timeout", DEVICE_TIMEOUT
, &ahci_device_timeout
},
210 { "ahci_cmd_timeout", COMMAND_TIMEOUT
, &ahci_command_timeout
},
211 { "ahci_io_timeout", TRANSFER_TIMEOUT
, &ahci_transfer_timeout
},
212 { "ahci_flush_timeout", FLUSH_TIMEOUT
, &ahci_flush_timeout
}
215 static int ahci_map
[MAX_DRIVES
]; /* device-to-port mapping */
217 static int ahci_exiting
= FALSE
; /* exit after last close? */
219 #define BUILD_ARG(port, tag) (((port) << 8) | (tag))
220 #define GET_PORT(arg) ((arg) >> 8)
221 #define GET_TAG(arg) ((arg) & 0xFF)
223 #define dprintf(v,s) do { \
224 if (ahci_verbose >= (v)) \
228 /* Convert milliseconds to clock ticks. Round up. */
229 #define millis_to_hz(ms) (((ms) * sys_hz() + 999) / 1000)
231 static void port_set_cmd(struct port_state
*ps
, int cmd
, cmd_fis_t
*fis
,
232 u8_t packet
[ATAPI_PACKET_SIZE
], prd_t
*prdt
, int nr_prds
, int write
);
233 static void port_issue(struct port_state
*ps
, int cmd
, clock_t timeout
);
234 static int port_exec(struct port_state
*ps
, int cmd
, clock_t timeout
);
235 static void port_timeout(int arg
);
236 static void port_disconnect(struct port_state
*ps
);
238 static char *ahci_portname(struct port_state
*ps
);
239 static int ahci_open(devminor_t minor
, int access
);
240 static int ahci_close(devminor_t minor
);
241 static ssize_t
ahci_transfer(devminor_t minor
, int do_write
, u64_t position
,
242 endpoint_t endpt
, iovec_t
*iovec
, unsigned int count
, int flags
);
243 static struct device
*ahci_part(devminor_t minor
);
244 static void ahci_alarm(clock_t stamp
);
245 static int ahci_ioctl(devminor_t minor
, unsigned long request
,
246 endpoint_t endpt
, cp_grant_id_t grant
, endpoint_t user_endpt
);
247 static void ahci_intr(unsigned int mask
);
248 static int ahci_device(devminor_t minor
, device_id_t
*id
);
249 static struct port_state
*ahci_get_port(devminor_t minor
);
251 /* AHCI driver table. */
252 static struct blockdriver ahci_dtab
= {
253 .bdr_type
= BLOCKDRIVER_TYPE_DISK
,
254 .bdr_open
= ahci_open
,
255 .bdr_close
= ahci_close
,
256 .bdr_transfer
= ahci_transfer
,
257 .bdr_ioctl
= ahci_ioctl
,
258 .bdr_part
= ahci_part
,
259 .bdr_intr
= ahci_intr
,
260 .bdr_alarm
= ahci_alarm
,
261 .bdr_device
= ahci_device
264 /*===========================================================================*
266 *===========================================================================*/
267 static int atapi_exec(struct port_state
*ps
, int cmd
,
268 u8_t packet
[ATAPI_PACKET_SIZE
], size_t size
, int write
)
270 /* Execute an ATAPI command. Return OK or error.
276 assert(size
<= AHCI_TMP_SIZE
);
278 /* Fill in the command table with a FIS, a packet, and if a data
279 * transfer is requested, also a PRD.
281 memset(&fis
, 0, sizeof(fis
));
282 fis
.cf_cmd
= ATA_CMD_PACKET
;
285 fis
.cf_feat
= ATA_FEAT_PACKET_DMA
;
286 if (!write
&& (ps
->flags
& FLAG_USE_DMADIR
))
287 fis
.cf_feat
|= ATA_FEAT_PACKET_DMADIR
;
289 prd
[0].vp_addr
= ps
->tmp_phys
;
290 prd
[0].vp_size
= size
;
294 /* Start the command, and wait for it to complete or fail. */
295 port_set_cmd(ps
, cmd
, &fis
, packet
, prd
, nr_prds
, write
);
297 return port_exec(ps
, cmd
, ahci_command_timeout
);
300 /*===========================================================================*
302 *===========================================================================*/
303 static int atapi_test_unit(struct port_state
*ps
, int cmd
)
305 /* Test whether the ATAPI device and medium are ready.
307 u8_t packet
[ATAPI_PACKET_SIZE
];
309 memset(packet
, 0, sizeof(packet
));
310 packet
[0] = ATAPI_CMD_TEST_UNIT
;
312 return atapi_exec(ps
, cmd
, packet
, 0, FALSE
);
315 /*===========================================================================*
316 * atapi_request_sense *
317 *===========================================================================*/
318 static int atapi_request_sense(struct port_state
*ps
, int cmd
, int *sense
)
320 /* Request error (sense) information from an ATAPI device, and return
321 * the sense key. The additional sense codes are not used at this time.
323 u8_t packet
[ATAPI_PACKET_SIZE
];
326 memset(packet
, 0, sizeof(packet
));
327 packet
[0] = ATAPI_CMD_REQUEST_SENSE
;
328 packet
[4] = ATAPI_REQUEST_SENSE_LEN
;
330 r
= atapi_exec(ps
, cmd
, packet
, ATAPI_REQUEST_SENSE_LEN
, FALSE
);
335 dprintf(V_REQ
, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n",
336 ahci_portname(ps
), ps
->tmp_base
[2] & 0xF, ps
->tmp_base
[12],
339 *sense
= ps
->tmp_base
[2] & 0xF;
344 /*===========================================================================*
346 *===========================================================================*/
347 static int atapi_load_eject(struct port_state
*ps
, int cmd
, int load
)
349 /* Load or eject a medium in an ATAPI device.
351 u8_t packet
[ATAPI_PACKET_SIZE
];
353 memset(packet
, 0, sizeof(packet
));
354 packet
[0] = ATAPI_CMD_START_STOP
;
355 packet
[4] = load
? ATAPI_START_STOP_LOAD
: ATAPI_START_STOP_EJECT
;
357 return atapi_exec(ps
, cmd
, packet
, 0, FALSE
);
360 /*===========================================================================*
361 * atapi_read_capacity *
362 *===========================================================================*/
363 static int atapi_read_capacity(struct port_state
*ps
, int cmd
)
365 /* Retrieve the LBA count and sector size of an ATAPI medium.
367 u8_t packet
[ATAPI_PACKET_SIZE
], *buf
;
370 memset(packet
, 0, sizeof(packet
));
371 packet
[0] = ATAPI_CMD_READ_CAPACITY
;
373 r
= atapi_exec(ps
, cmd
, packet
, ATAPI_READ_CAPACITY_LEN
, FALSE
);
377 /* Store the number of LBA blocks and sector size. */
379 ps
->lba_count
= (u64_t
) ((buf
[0] << 24) | (buf
[1] << 16) |
380 (buf
[2] << 8) | buf
[3]) + 1;
382 (buf
[4] << 24) | (buf
[5] << 16) | (buf
[6] << 8) | buf
[7];
384 if (ps
->sector_size
== 0 || (ps
->sector_size
& 1)) {
385 dprintf(V_ERR
, ("%s: invalid medium sector size %u\n",
386 ahci_portname(ps
), ps
->sector_size
));
392 ("%s: medium detected (%u byte sectors, %llu MB size)\n",
393 ahci_portname(ps
), ps
->sector_size
,
394 ps
->lba_count
* ps
->sector_size
/ (1024*1024)));
399 /*===========================================================================*
400 * atapi_check_medium *
401 *===========================================================================*/
402 static int atapi_check_medium(struct port_state
*ps
, int cmd
)
404 /* Check whether a medium is present in a removable-media ATAPI device.
405 * If a new medium is detected, get its total and sector size. Return
406 * OK only if a usable medium is present, and an error otherwise.
410 /* Perform a readiness check. */
411 if (atapi_test_unit(ps
, cmd
) != OK
) {
412 ps
->flags
&= ~FLAG_HAS_MEDIUM
;
414 /* If the check failed due to a unit attention condition, retry
415 * reading the medium capacity. Otherwise, assume that there is
416 * no medium available.
418 if (atapi_request_sense(ps
, cmd
, &sense
) != OK
||
419 sense
!= ATAPI_SENSE_UNIT_ATT
)
423 /* If a medium is newly detected, try reading its capacity now. */
424 if (!(ps
->flags
& FLAG_HAS_MEDIUM
)) {
425 if (atapi_read_capacity(ps
, cmd
) != OK
)
428 ps
->flags
|= FLAG_HAS_MEDIUM
;
434 /*===========================================================================*
436 *===========================================================================*/
437 static int atapi_id_check(struct port_state
*ps
, u16_t
*buf
)
439 /* Determine whether we support this ATAPI device based on the
440 * identification data it returned, and store some of its properties.
443 /* The device must be an ATAPI device; it must have removable media;
444 * it must support DMA without DMADIR, or DMADIR for DMA.
446 if ((buf
[ATA_ID_GCAP
] & (ATA_ID_GCAP_ATAPI_MASK
|
447 ATA_ID_GCAP_REMOVABLE
| ATA_ID_GCAP_INCOMPLETE
)) !=
448 (ATA_ID_GCAP_ATAPI
| ATA_ID_GCAP_REMOVABLE
) ||
449 ((buf
[ATA_ID_CAP
] & ATA_ID_CAP_DMA
) != ATA_ID_CAP_DMA
&&
450 (buf
[ATA_ID_DMADIR
] & (ATA_ID_DMADIR_DMADIR
|
451 ATA_ID_DMADIR_DMA
)) != (ATA_ID_DMADIR_DMADIR
|
452 ATA_ID_DMADIR_DMA
))) {
454 dprintf(V_ERR
, ("%s: unsupported ATAPI device\n",
457 dprintf(V_DEV
, ("%s: GCAP %04x CAP %04x DMADIR %04x\n",
458 ahci_portname(ps
), buf
[ATA_ID_GCAP
], buf
[ATA_ID_CAP
],
459 buf
[ATA_ID_DMADIR
]));
464 /* Remember whether to use the DMADIR flag when appropriate. */
465 if (buf
[ATA_ID_DMADIR
] & ATA_ID_DMADIR_DMADIR
)
466 ps
->flags
|= FLAG_USE_DMADIR
;
468 /* ATAPI CD-ROM devices are considered read-only. */
469 if (((buf
[ATA_ID_GCAP
] & ATA_ID_GCAP_TYPE_MASK
) >>
470 ATA_ID_GCAP_TYPE_SHIFT
) == ATAPI_TYPE_CDROM
)
471 ps
->flags
|= FLAG_READONLY
;
473 if ((buf
[ATA_ID_SUP1
] & ATA_ID_SUP1_VALID_MASK
) == ATA_ID_SUP1_VALID
&&
474 !(ps
->flags
& FLAG_READONLY
)) {
475 /* Save write cache related capabilities of the device. It is
476 * possible, although unlikely, that a device has support for
477 * either of these but not both.
479 if (buf
[ATA_ID_SUP0
] & ATA_ID_SUP0_WCACHE
)
480 ps
->flags
|= FLAG_HAS_WCACHE
;
482 if (buf
[ATA_ID_SUP1
] & ATA_ID_SUP1_FLUSH
)
483 ps
->flags
|= FLAG_HAS_FLUSH
;
489 /*===========================================================================*
491 *===========================================================================*/
492 static int atapi_transfer(struct port_state
*ps
, int cmd
, u64_t start_lba
,
493 unsigned int count
, int write
, prd_t
*prdt
, int nr_prds
)
495 /* Perform data transfer from or to an ATAPI device.
498 u8_t packet
[ATAPI_PACKET_SIZE
];
500 /* Fill in a Register Host to Device FIS. */
501 memset(&fis
, 0, sizeof(fis
));
502 fis
.cf_cmd
= ATA_CMD_PACKET
;
503 fis
.cf_feat
= ATA_FEAT_PACKET_DMA
;
504 if (!write
&& (ps
->flags
& FLAG_USE_DMADIR
))
505 fis
.cf_feat
|= ATA_FEAT_PACKET_DMADIR
;
507 /* Fill in a packet. */
508 memset(packet
, 0, sizeof(packet
));
509 packet
[0] = write
? ATAPI_CMD_WRITE
: ATAPI_CMD_READ
;
510 packet
[2] = (start_lba
>> 24) & 0xFF;
511 packet
[3] = (start_lba
>> 16) & 0xFF;
512 packet
[4] = (start_lba
>> 8) & 0xFF;
513 packet
[5] = start_lba
& 0xFF;
514 packet
[6] = (count
>> 24) & 0xFF;
515 packet
[7] = (count
>> 16) & 0xFF;
516 packet
[8] = (count
>> 8) & 0xFF;
517 packet
[9] = count
& 0xFF;
519 /* Start the command, and wait for it to complete or fail. */
520 port_set_cmd(ps
, cmd
, &fis
, packet
, prdt
, nr_prds
, write
);
522 return port_exec(ps
, cmd
, ahci_transfer_timeout
);
525 /*===========================================================================*
527 *===========================================================================*/
528 static int ata_id_check(struct port_state
*ps
, u16_t
*buf
)
530 /* Determine whether we support this ATA device based on the
531 * identification data it returned, and store some of its properties.
534 /* This must be an ATA device; it must not have removable media;
535 * it must support LBA and DMA; it must support the FLUSH CACHE
536 * command; it must support 48-bit addressing.
538 if ((buf
[ATA_ID_GCAP
] & (ATA_ID_GCAP_ATA_MASK
| ATA_ID_GCAP_REMOVABLE
|
539 ATA_ID_GCAP_INCOMPLETE
)) != ATA_ID_GCAP_ATA
||
540 (buf
[ATA_ID_CAP
] & (ATA_ID_CAP_LBA
| ATA_ID_CAP_DMA
)) !=
541 (ATA_ID_CAP_LBA
| ATA_ID_CAP_DMA
) ||
542 (buf
[ATA_ID_SUP1
] & (ATA_ID_SUP1_VALID_MASK
|
543 ATA_ID_SUP1_FLUSH
| ATA_ID_SUP1_LBA48
)) !=
544 (ATA_ID_SUP1_VALID
| ATA_ID_SUP1_FLUSH
| ATA_ID_SUP1_LBA48
)) {
546 dprintf(V_ERR
, ("%s: unsupported ATA device\n",
549 dprintf(V_DEV
, ("%s: GCAP %04x CAP %04x SUP1 %04x\n",
550 ahci_portname(ps
), buf
[ATA_ID_GCAP
], buf
[ATA_ID_CAP
],
556 /* Get number of LBA blocks, and sector size. */
557 ps
->lba_count
= ((u64_t
) buf
[ATA_ID_LBA3
] << 48) |
558 ((u64_t
) buf
[ATA_ID_LBA2
] << 32) |
559 ((u64_t
) buf
[ATA_ID_LBA1
] << 16) |
560 (u64_t
) buf
[ATA_ID_LBA0
];
562 /* Determine the queue depth of the device. */
563 if (hba_state
.has_ncq
&&
564 (buf
[ATA_ID_SATA_CAP
] & ATA_ID_SATA_CAP_NCQ
)) {
565 ps
->flags
|= FLAG_HAS_NCQ
;
567 (buf
[ATA_ID_QDEPTH
] & ATA_ID_QDEPTH_MASK
) + 1;
568 if (ps
->queue_depth
> hba_state
.nr_cmds
)
569 ps
->queue_depth
= hba_state
.nr_cmds
;
572 /* For now, we only support long logical sectors. Long physical sector
573 * support may be added later. Note that the given value is in words.
575 if ((buf
[ATA_ID_PLSS
] & (ATA_ID_PLSS_VALID_MASK
| ATA_ID_PLSS_LLS
)) ==
576 (ATA_ID_PLSS_VALID
| ATA_ID_PLSS_LLS
))
578 ((buf
[ATA_ID_LSS1
] << 16) | buf
[ATA_ID_LSS0
]) << 1;
580 ps
->sector_size
= ATA_SECTOR_SIZE
;
582 if (ps
->sector_size
< ATA_SECTOR_SIZE
) {
583 dprintf(V_ERR
, ("%s: invalid sector size %u\n",
584 ahci_portname(ps
), ps
->sector_size
));
589 ps
->flags
|= FLAG_HAS_MEDIUM
| FLAG_HAS_FLUSH
;
591 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */
592 if (buf
[ATA_ID_SUP0
] & ATA_ID_SUP0_WCACHE
)
593 ps
->flags
|= FLAG_HAS_WCACHE
;
595 /* Check Force Unit Access capability of the device. */
596 if ((buf
[ATA_ID_ENA2
] & (ATA_ID_ENA2_VALID_MASK
| ATA_ID_ENA2_FUA
)) ==
597 (ATA_ID_ENA2_VALID
| ATA_ID_ENA2_FUA
))
598 ps
->flags
|= FLAG_HAS_FUA
;
603 /*===========================================================================*
605 *===========================================================================*/
606 static int ata_transfer(struct port_state
*ps
, int cmd
, u64_t start_lba
,
607 unsigned int count
, int write
, int force
, prd_t
*prdt
, int nr_prds
)
609 /* Perform data transfer from or to an ATA device.
613 assert(count
<= ATA_MAX_SECTORS
);
615 /* Special case for sector counts: 65536 is specified as 0. */
616 if (count
== ATA_MAX_SECTORS
)
619 memset(&fis
, 0, sizeof(fis
));
620 fis
.cf_dev
= ATA_DEV_LBA
;
621 if (ps
->flags
& FLAG_HAS_NCQ
) {
623 if (force
&& (ps
->flags
& FLAG_HAS_FUA
))
624 fis
.cf_dev
|= ATA_DEV_FUA
;
626 fis
.cf_cmd
= ATA_CMD_WRITE_FPDMA_QUEUED
;
628 fis
.cf_cmd
= ATA_CMD_READ_FPDMA_QUEUED
;
633 if (force
&& (ps
->flags
& FLAG_HAS_FUA
))
634 fis
.cf_cmd
= ATA_CMD_WRITE_DMA_FUA_EXT
;
636 fis
.cf_cmd
= ATA_CMD_WRITE_DMA_EXT
;
639 fis
.cf_cmd
= ATA_CMD_READ_DMA_EXT
;
642 fis
.cf_lba
= start_lba
& 0x00FFFFFFUL
;
643 fis
.cf_lba_exp
= (start_lba
>> 24) & 0x00FFFFFFUL
;
644 fis
.cf_sec
= count
& 0xFF;
645 fis
.cf_sec_exp
= (count
>> 8) & 0xFF;
647 /* Start the command, and wait for it to complete or fail. */
648 port_set_cmd(ps
, cmd
, &fis
, NULL
/*packet*/, prdt
, nr_prds
, write
);
650 return port_exec(ps
, cmd
, ahci_transfer_timeout
);
653 /*===========================================================================*
655 *===========================================================================*/
656 static int gen_identify(struct port_state
*ps
, int blocking
)
658 /* Identify an ATA or ATAPI device. If the blocking flag is set, block
659 * until the command has completed; otherwise return immediately.
664 /* Set up a command, and a single PRD for the result. */
665 memset(&fis
, 0, sizeof(fis
));
667 if (ps
->flags
& FLAG_ATAPI
)
668 fis
.cf_cmd
= ATA_CMD_IDENTIFY_PACKET
;
670 fis
.cf_cmd
= ATA_CMD_IDENTIFY
;
672 prd
.vp_addr
= ps
->tmp_phys
;
673 prd
.vp_size
= ATA_ID_SIZE
;
675 /* Start the command, and possibly wait for the result. */
676 port_set_cmd(ps
, 0, &fis
, NULL
/*packet*/, &prd
, 1, FALSE
/*write*/);
679 return port_exec(ps
, 0, ahci_command_timeout
);
681 port_issue(ps
, 0, ahci_command_timeout
);
686 /*===========================================================================*
688 *===========================================================================*/
689 static int gen_flush_wcache(struct port_state
*ps
)
691 /* Flush the device's write cache.
695 /* The FLUSH CACHE command may not be supported by all (writable ATAPI)
698 if (!(ps
->flags
& FLAG_HAS_FLUSH
))
701 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not
702 * interested in the disk location of a failure, so there is no reason
703 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command
704 * may indeed fail due to a disk error, in which case it should be
705 * repeated. For now, we shift this responsibility onto the caller.
707 memset(&fis
, 0, sizeof(fis
));
708 fis
.cf_cmd
= ATA_CMD_FLUSH_CACHE
;
710 /* Start the command, and wait for it to complete or fail.
711 * The flush command may take longer than regular I/O commands.
713 port_set_cmd(ps
, 0, &fis
, NULL
/*packet*/, NULL
/*prdt*/, 0,
716 return port_exec(ps
, 0, ahci_flush_timeout
);
719 /*===========================================================================*
721 *===========================================================================*/
722 static int gen_get_wcache(struct port_state
*ps
, int *val
)
724 /* Retrieve the status of the device's write cache.
728 /* Write caches are not mandatory. */
729 if (!(ps
->flags
& FLAG_HAS_WCACHE
))
732 /* Retrieve information about the device. */
733 if ((r
= gen_identify(ps
, TRUE
/*blocking*/)) != OK
)
736 /* Return the current setting. */
737 *val
= !!(((u16_t
*) ps
->tmp_base
)[ATA_ID_ENA0
] & ATA_ID_ENA0_WCACHE
);
742 /*===========================================================================*
744 *===========================================================================*/
745 static int gen_set_wcache(struct port_state
*ps
, int enable
)
747 /* Enable or disable the device's write cache.
752 /* Write caches are not mandatory. */
753 if (!(ps
->flags
& FLAG_HAS_WCACHE
))
756 /* Disabling the write cache causes a (blocking) cache flush. Cache
757 * flushes may take much longer than regular commands.
759 timeout
= enable
? ahci_command_timeout
: ahci_flush_timeout
;
761 /* Set up a command. */
762 memset(&fis
, 0, sizeof(fis
));
763 fis
.cf_cmd
= ATA_CMD_SET_FEATURES
;
764 fis
.cf_feat
= enable
? ATA_SF_EN_WCACHE
: ATA_SF_DI_WCACHE
;
766 /* Start the command, and wait for it to complete or fail. */
767 port_set_cmd(ps
, 0, &fis
, NULL
/*packet*/, NULL
/*prdt*/, 0,
770 return port_exec(ps
, 0, timeout
);
773 /*===========================================================================*
775 *===========================================================================*/
776 static vir_bytes
ct_set_fis(u8_t
*ct
, cmd_fis_t
*fis
, unsigned int tag
)
778 /* Fill in the Frame Information Structure part of a command table,
779 * and return the resulting FIS size (in bytes). We only support the
780 * command Register - Host to Device FIS type.
783 memset(ct
, 0, ATA_H2D_SIZE
);
784 ct
[ATA_FIS_TYPE
] = ATA_FIS_TYPE_H2D
;
785 ct
[ATA_H2D_FLAGS
] = ATA_H2D_FLAGS_C
;
786 ct
[ATA_H2D_CMD
] = fis
->cf_cmd
;
787 ct
[ATA_H2D_LBA_LOW
] = fis
->cf_lba
& 0xFF;
788 ct
[ATA_H2D_LBA_MID
] = (fis
->cf_lba
>> 8) & 0xFF;
789 ct
[ATA_H2D_LBA_HIGH
] = (fis
->cf_lba
>> 16) & 0xFF;
790 ct
[ATA_H2D_DEV
] = fis
->cf_dev
;
791 ct
[ATA_H2D_LBA_LOW_EXP
] = fis
->cf_lba_exp
& 0xFF;
792 ct
[ATA_H2D_LBA_MID_EXP
] = (fis
->cf_lba_exp
>> 8) & 0xFF;
793 ct
[ATA_H2D_LBA_HIGH_EXP
] = (fis
->cf_lba_exp
>> 16) & 0xFF;
794 ct
[ATA_H2D_CTL
] = fis
->cf_ctl
;
796 if (ATA_IS_FPDMA_CMD(fis
->cf_cmd
)) {
797 ct
[ATA_H2D_FEAT
] = fis
->cf_sec
;
798 ct
[ATA_H2D_FEAT_EXP
] = fis
->cf_sec_exp
;
799 ct
[ATA_H2D_SEC
] = tag
<< ATA_SEC_TAG_SHIFT
;
800 ct
[ATA_H2D_SEC_EXP
] = 0;
802 ct
[ATA_H2D_FEAT
] = fis
->cf_feat
;
803 ct
[ATA_H2D_FEAT_EXP
] = fis
->cf_feat_exp
;
804 ct
[ATA_H2D_SEC
] = fis
->cf_sec
;
805 ct
[ATA_H2D_SEC_EXP
] = fis
->cf_sec_exp
;
811 /*===========================================================================*
813 *===========================================================================*/
814 static void ct_set_packet(u8_t
*ct
, u8_t packet
[ATAPI_PACKET_SIZE
])
816 /* Fill in the packet part of a command table.
819 memcpy(&ct
[AHCI_CT_PACKET_OFF
], packet
, ATAPI_PACKET_SIZE
);
822 /*===========================================================================*
824 *===========================================================================*/
825 static void ct_set_prdt(u8_t
*ct
, prd_t
*prdt
, int nr_prds
)
827 /* Fill in the PRDT part of a command table.
832 p
= (u32_t
*) &ct
[AHCI_CT_PRDT_OFF
];
834 for (i
= 0; i
< nr_prds
; i
++, prdt
++) {
835 *p
++ = prdt
->vp_addr
;
838 *p
++ = prdt
->vp_size
- 1;
842 /*===========================================================================*
844 *===========================================================================*/
845 static void port_set_cmd(struct port_state
*ps
, int cmd
, cmd_fis_t
*fis
,
846 u8_t packet
[ATAPI_PACKET_SIZE
], prd_t
*prdt
, int nr_prds
, int write
)
848 /* Prepare the given command for execution, by constructing a command
849 * table and setting up a command list entry pointing to the table.
855 /* Set a port-specific flag that tells us if the command being
856 * processed is a NCQ command or not.
858 if (ATA_IS_FPDMA_CMD(fis
->cf_cmd
)) {
859 ps
->flags
|= FLAG_NCQ_MODE
;
861 assert(!ps
->pend_mask
);
862 ps
->flags
&= ~FLAG_NCQ_MODE
;
865 /* Construct a command table, consisting of a command FIS, optionally
866 * a packet, and optionally a number of PRDs (making up the actual PRD
869 ct
= ps
->ct_base
[cmd
];
872 assert(nr_prds
<= NR_PRDS
);
874 size
= ct_set_fis(ct
, fis
, cmd
);
877 ct_set_packet(ct
, packet
);
879 ct_set_prdt(ct
, prdt
, nr_prds
);
881 /* Construct a command list entry, pointing to the command's table.
882 * Current assumptions: callers always provide a Register - Host to
883 * Device type FIS, and all non-NCQ commands are prefetchable.
885 cl
= &ps
->cl_base
[cmd
* AHCI_CL_ENTRY_DWORDS
];
887 memset(cl
, 0, AHCI_CL_ENTRY_SIZE
);
888 cl
[0] = (nr_prds
<< AHCI_CL_PRDTL_SHIFT
) |
889 ((!ATA_IS_FPDMA_CMD(fis
->cf_cmd
) &&
890 (nr_prds
> 0 || packet
!= NULL
)) ? AHCI_CL_PREFETCHABLE
: 0) |
891 (write
? AHCI_CL_WRITE
: 0) |
892 ((packet
!= NULL
) ? AHCI_CL_ATAPI
: 0) |
893 ((size
/ sizeof(u32_t
)) << AHCI_CL_CFL_SHIFT
);
894 cl
[2] = ps
->ct_phys
[cmd
];
897 /*===========================================================================*
899 *===========================================================================*/
900 static void port_finish_cmd(struct port_state
*ps
, int cmd
, int result
)
902 /* Finish a command that has either succeeded or failed.
905 assert(cmd
< ps
->queue_depth
);
907 dprintf(V_REQ
, ("%s: command %d %s\n", ahci_portname(ps
),
908 cmd
, (result
== RESULT_SUCCESS
) ? "succeeded" : "failed"));
910 /* Update the command result, and clear it from the pending list. */
911 ps
->cmd_info
[cmd
].result
= result
;
913 assert(ps
->pend_mask
& (1 << cmd
));
914 ps
->pend_mask
&= ~(1 << cmd
);
916 /* Wake up the thread, unless it is the main thread. This can happen
917 * during initialization, as the gen_identify function is called by the
918 * main thread itself.
920 if (ps
->state
!= STATE_WAIT_ID
)
921 blockdriver_mt_wakeup(ps
->cmd_info
[cmd
].tid
);
924 /*===========================================================================*
926 *===========================================================================*/
927 static void port_fail_cmds(struct port_state
*ps
)
929 /* Fail all ongoing commands for a device.
933 for (i
= 0; ps
->pend_mask
!= 0 && i
< ps
->queue_depth
; i
++)
934 if (ps
->pend_mask
& (1 << i
))
935 port_finish_cmd(ps
, i
, RESULT_FAILURE
);
938 /*===========================================================================*
940 *===========================================================================*/
941 static void port_check_cmds(struct port_state
*ps
)
943 /* Check what commands have completed, and finish them.
948 /* See which commands have completed. */
949 if (ps
->flags
& FLAG_NCQ_MODE
)
950 mask
= port_read(ps
, AHCI_PORT_SACT
);
952 mask
= port_read(ps
, AHCI_PORT_CI
);
954 /* Wake up threads corresponding to completed commands. */
955 done
= ps
->pend_mask
& ~mask
;
957 for (i
= 0; i
< ps
->queue_depth
; i
++)
959 port_finish_cmd(ps
, i
, RESULT_SUCCESS
);
962 /*===========================================================================*
964 *===========================================================================*/
965 static int port_find_cmd(struct port_state
*ps
)
967 /* Find a free command tag to queue the current request.
971 for (i
= 0; i
< ps
->queue_depth
; i
++)
972 if (!(ps
->pend_mask
& (1 << i
)))
975 /* We should always be able to find a free slot, since a thread runs
976 * only when it is free, and thus, only because a slot is available.
978 assert(i
< ps
->queue_depth
);
983 /*===========================================================================*
985 *===========================================================================*/
986 static int port_get_padbuf(struct port_state
*ps
, size_t size
)
988 /* Make available a temporary buffer for use by this port. Enlarge the
989 * previous buffer if applicable and necessary, potentially changing
990 * its physical address.
993 if (ps
->pad_base
!= NULL
&& ps
->pad_size
>= size
)
996 if (ps
->pad_base
!= NULL
)
997 free_contig(ps
->pad_base
, ps
->pad_size
);
1000 ps
->pad_base
= alloc_contig(ps
->pad_size
, 0, &ps
->pad_phys
);
1002 if (ps
->pad_base
== NULL
) {
1003 dprintf(V_ERR
, ("%s: unable to allocate a padding buffer of "
1004 "size %lu\n", ahci_portname(ps
),
1005 (unsigned long) size
));
1010 dprintf(V_INFO
, ("%s: allocated padding buffer of size %lu\n",
1011 ahci_portname(ps
), (unsigned long) size
));
1016 /*===========================================================================*
1018 *===========================================================================*/
1019 static int sum_iovec(struct port_state
*ps
, endpoint_t endpt
,
1020 iovec_s_t
*iovec
, int nr_req
, vir_bytes
*total
)
1022 /* Retrieve the total size of the given I/O vector. Check for alignment
1023 * requirements along the way. Return OK (and the total request size)
1026 vir_bytes size
, bytes
;
1031 for (i
= 0; i
< nr_req
; i
++) {
1032 size
= iovec
[i
].iov_size
;
1034 if (size
== 0 || (size
& 1) || size
> LONG_MAX
) {
1035 dprintf(V_ERR
, ("%s: bad size %lu in iovec from %d\n",
1036 ahci_portname(ps
), size
, endpt
));
1042 if (bytes
> LONG_MAX
) {
1043 dprintf(V_ERR
, ("%s: iovec size overflow from %d\n",
1044 ahci_portname(ps
), endpt
));
1053 /*===========================================================================*
1055 *===========================================================================*/
1056 static int setup_prdt(struct port_state
*ps
, endpoint_t endpt
,
1057 iovec_s_t
*iovec
, int nr_req
, vir_bytes size
, vir_bytes lead
,
1058 int write
, prd_t
*prdt
)
1060 /* Convert (the first part of) an I/O vector to a Physical Region
1061 * Descriptor Table describing array that can later be used to set the
1062 * command's real PRDT. The resulting table as a whole should be
1063 * sector-aligned; leading and trailing local buffers may have to be
1064 * used for padding as appropriate. Return the number of PRD entries,
1065 * or a negative error code.
1067 struct vumap_vir vvec
[NR_PRDS
];
1068 size_t bytes
, trail
;
1069 int i
, r
, pcount
, nr_prds
= 0;
1072 /* Allocate a buffer for the data we don't want. */
1073 if ((r
= port_get_padbuf(ps
, ps
->sector_size
)) != OK
)
1076 prdt
[nr_prds
].vp_addr
= ps
->pad_phys
;
1077 prdt
[nr_prds
].vp_size
= lead
;
1081 /* The sum of lead, size, trail has to be sector-aligned. */
1082 trail
= (ps
->sector_size
- (lead
+ size
)) % ps
->sector_size
;
1084 /* Get the physical addresses of the given buffers. */
1085 for (i
= 0; i
< nr_req
&& size
> 0; i
++) {
1086 bytes
= MIN(iovec
[i
].iov_size
, size
);
1089 vvec
[i
].vv_addr
= (vir_bytes
) iovec
[i
].iov_grant
;
1091 vvec
[i
].vv_grant
= iovec
[i
].iov_grant
;
1093 vvec
[i
].vv_size
= bytes
;
1100 if ((r
= sys_vumap(endpt
, vvec
, i
, 0, write
? VUA_READ
: VUA_WRITE
,
1101 &prdt
[nr_prds
], &pcount
)) != OK
) {
1102 dprintf(V_ERR
, ("%s: unable to map memory from %d (%d)\n",
1103 ahci_portname(ps
), endpt
, r
));
1107 assert(pcount
> 0 && pcount
<= i
);
1109 /* Make sure all buffers are physically contiguous and word-aligned. */
1110 for (i
= 0; i
< pcount
; i
++) {
1111 if (vvec
[i
].vv_size
!= prdt
[nr_prds
].vp_size
) {
1112 dprintf(V_ERR
, ("%s: non-contiguous memory from %d\n",
1113 ahci_portname(ps
), endpt
));
1117 if (prdt
[nr_prds
].vp_addr
& 1) {
1118 dprintf(V_ERR
, ("%s: bad physical address from %d\n",
1119 ahci_portname(ps
), endpt
));
1127 assert(nr_prds
< NR_PRDS
);
1128 prdt
[nr_prds
].vp_addr
= ps
->pad_phys
+ lead
;
1129 prdt
[nr_prds
].vp_size
= trail
;
1136 /*===========================================================================*
1138 *===========================================================================*/
1139 static ssize_t
port_transfer(struct port_state
*ps
, u64_t pos
, u64_t eof
,
1140 endpoint_t endpt
, iovec_s_t
*iovec
, int nr_req
, int write
, int flags
)
1142 /* Perform an I/O transfer on a port.
1144 prd_t prdt
[NR_PRDS
];
1145 vir_bytes size
, lead
;
1146 unsigned int count
, nr_prds
;
1150 /* Get the total request size from the I/O vector. */
1151 if ((r
= sum_iovec(ps
, endpt
, iovec
, nr_req
, &size
)) != OK
)
1154 dprintf(V_REQ
, ("%s: %s for %lu bytes at pos %llx\n",
1155 ahci_portname(ps
), write
? "write" : "read", size
, pos
));
1157 assert(ps
->state
== STATE_GOOD_DEV
);
1158 assert(ps
->flags
& FLAG_HAS_MEDIUM
);
1159 assert(ps
->sector_size
> 0);
1161 /* Limit the maximum size of a single transfer.
1162 * See the comments at the top of this file for details.
1164 if (size
> MAX_TRANSFER
)
1165 size
= MAX_TRANSFER
;
1167 /* If necessary, reduce the request size so that the request does not
1168 * extend beyond the end of the partition. The caller already
1169 * guarantees that the starting position lies within the partition.
1171 if (pos
+ size
> eof
)
1172 size
= (vir_bytes
) (eof
- pos
);
1174 start_lba
= pos
/ ps
->sector_size
;
1175 lead
= (vir_bytes
) (pos
% ps
->sector_size
);
1176 count
= (lead
+ size
+ ps
->sector_size
- 1) / ps
->sector_size
;
1178 /* Position must be word-aligned for read requests, and sector-aligned
1179 * for write requests. We do not support read-modify-write for writes.
1181 if ((lead
& 1) || (write
&& lead
!= 0)) {
1182 dprintf(V_ERR
, ("%s: unaligned position from %d\n",
1183 ahci_portname(ps
), endpt
));
1187 /* Write requests must be sector-aligned. Word alignment of the size is
1188 * already guaranteed by sum_iovec().
1190 if (write
&& (size
% ps
->sector_size
) != 0) {
1191 dprintf(V_ERR
, ("%s: unaligned size %lu from %d\n",
1192 ahci_portname(ps
), size
, endpt
));
1196 /* Create a vector of physical addresses and sizes for the transfer. */
1197 nr_prds
= r
= setup_prdt(ps
, endpt
, iovec
, nr_req
, size
, lead
, write
,
1200 if (r
< 0) return r
;
1202 /* Perform the actual transfer. */
1203 cmd
= port_find_cmd(ps
);
1205 if (ps
->flags
& FLAG_ATAPI
)
1206 r
= atapi_transfer(ps
, cmd
, start_lba
, count
, write
, prdt
,
1209 r
= ata_transfer(ps
, cmd
, start_lba
, count
, write
,
1210 !!(flags
& BDEV_FORCEWRITE
), prdt
, nr_prds
);
1212 if (r
!= OK
) return r
;
1217 /*===========================================================================*
1219 *===========================================================================*/
1220 static void port_hardreset(struct port_state
*ps
)
1222 /* Perform a port-level (hard) reset on the given port.
1225 port_write(ps
, AHCI_PORT_SCTL
, AHCI_PORT_SCTL_DET_INIT
);
1227 micro_delay(COMRESET_DELAY
* 1000); /* COMRESET_DELAY is in ms */
1229 port_write(ps
, AHCI_PORT_SCTL
, AHCI_PORT_SCTL_DET_NONE
);
1232 /*===========================================================================*
1234 *===========================================================================*/
1235 static void port_override(struct port_state
*ps
)
1237 /* Override the port's BSY and/or DRQ flags. This may only be done
1238 * prior to starting the port.
1242 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1243 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_CLO
);
1245 SPIN_UNTIL(!(port_read(ps
, AHCI_PORT_CMD
) & AHCI_PORT_CMD_CLO
),
1248 dprintf(V_INFO
, ("%s: overridden\n", ahci_portname(ps
)));
1251 /*===========================================================================*
1253 *===========================================================================*/
1254 static void port_start(struct port_state
*ps
)
1256 /* Start the given port, allowing for the execution of commands and the
1257 * transfer of data on that port.
1261 /* Reset status registers. */
1262 port_write(ps
, AHCI_PORT_SERR
, ~0);
1263 port_write(ps
, AHCI_PORT_IS
, ~0);
1265 /* Start the port. */
1266 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1267 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_ST
);
1269 dprintf(V_INFO
, ("%s: started\n", ahci_portname(ps
)));
1272 /*===========================================================================*
1274 *===========================================================================*/
1275 static void port_stop(struct port_state
*ps
)
1277 /* Stop the given port, if not already stopped.
1281 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1283 if (cmd
& (AHCI_PORT_CMD_CR
| AHCI_PORT_CMD_ST
)) {
1284 port_write(ps
, AHCI_PORT_CMD
, cmd
& ~AHCI_PORT_CMD_ST
);
1286 SPIN_UNTIL(!(port_read(ps
, AHCI_PORT_CMD
) & AHCI_PORT_CMD_CR
),
1289 dprintf(V_INFO
, ("%s: stopped\n", ahci_portname(ps
)));
1293 /*===========================================================================*
1295 *===========================================================================*/
1296 static void port_restart(struct port_state
*ps
)
1298 /* Restart a port after a fatal error has occurred.
1301 /* Fail all outstanding commands. */
1304 /* Stop the port. */
1307 /* If the BSY and/or DRQ flags are set, reset the port. */
1308 if (port_read(ps
, AHCI_PORT_TFD
) &
1309 (AHCI_PORT_TFD_STS_BSY
| AHCI_PORT_TFD_STS_DRQ
)) {
1311 dprintf(V_ERR
, ("%s: port reset\n", ahci_portname(ps
)));
1313 /* To keep this driver simple, we do not transparently recover
1314 * ongoing requests. Instead, we mark the failing device as
1315 * disconnected, and reset it. If the reset succeeds, the
1316 * device (or, perhaps, eventually, another device) will come
1317 * back up. Any current and future requests to this port will
1318 * be failed until the port is fully closed and reopened.
1320 port_disconnect(ps
);
1322 /* Trigger a port reset. */
1328 /* Start the port. */
1332 /*===========================================================================*
1334 *===========================================================================*/
1335 static void print_string(u16_t
*buf
, int start
, int end
)
1337 /* Print a string that is stored as little-endian words and padded with
1342 while (end
>= start
&& buf
[end
] == 0x2020) end
--;
1344 if (end
>= start
&& (buf
[end
] & 0xFF) == 0x20) end
--, last
++;
1346 for (i
= start
; i
<= end
; i
++)
1347 printf("%c%c", buf
[i
] >> 8, buf
[i
] & 0xFF);
1350 printf("%c", buf
[i
] >> 8);
1353 /*===========================================================================*
1355 *===========================================================================*/
1356 static void port_id_check(struct port_state
*ps
, int success
)
1358 /* The device identification command has either completed or timed out.
1359 * Decide whether this device is usable or not, and store some of its
1364 assert(ps
->state
== STATE_WAIT_ID
);
1366 ps
->flags
&= ~FLAG_BUSY
;
1367 cancel_timer(&ps
->cmd_info
[0].timer
);
1370 if (!(ps
->flags
& FLAG_ATAPI
) &&
1371 port_read(ps
, AHCI_PORT_SIG
) != ATA_SIG_ATA
) {
1372 dprintf(V_INFO
, ("%s: may not be ATA, trying ATAPI\n",
1373 ahci_portname(ps
)));
1375 ps
->flags
|= FLAG_ATAPI
;
1377 (void) gen_identify(ps
, FALSE
/*blocking*/);
1382 ("%s: unable to identify\n", ahci_portname(ps
)));
1385 /* If the identify command itself succeeded, check the results and
1386 * store some properties.
1389 buf
= (u16_t
*) ps
->tmp_base
;
1391 if (ps
->flags
& FLAG_ATAPI
)
1392 success
= atapi_id_check(ps
, buf
);
1394 success
= ata_id_check(ps
, buf
);
1397 /* If the device has not been identified successfully, mark it as an
1403 ps
->state
= STATE_BAD_DEV
;
1404 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PRCE
);
1409 /* The device has been identified successfully, and hence usable. */
1410 ps
->state
= STATE_GOOD_DEV
;
1412 /* Print some information about the device. */
1413 if (ahci_verbose
>= V_INFO
) {
1414 printf("%s: ATA%s, ", ahci_portname(ps
),
1415 (ps
->flags
& FLAG_ATAPI
) ? "PI" : "");
1416 print_string(buf
, 27, 46);
1417 if (ahci_verbose
>= V_DEV
) {
1419 print_string(buf
, 10, 19);
1421 print_string(buf
, 23, 26);
1425 if (ps
->flags
& FLAG_HAS_MEDIUM
)
1426 printf(", %u byte sectors, %llu MB size",
1428 ps
->lba_count
* ps
->sector_size
/ (1024*1024));
1434 /*===========================================================================*
1436 *===========================================================================*/
1437 static void port_connect(struct port_state
*ps
)
1439 /* A device has been found to be attached to this port. Start the port,
1440 * and do timed polling for its signature to become available.
1444 dprintf(V_INFO
, ("%s: device connected\n", ahci_portname(ps
)));
1448 /* The next check covers a purely hypothetical race condition, where
1449 * the device would disappear right before we try to start it. This is
1450 * possible because we have to clear PxSERR, and with that, the DIAG.N
1451 * bit. Double-check the port status, and if it is not as we expect,
1452 * infer a disconnection.
1454 status
= port_read(ps
, AHCI_PORT_SSTS
) & AHCI_PORT_SSTS_DET_MASK
;
1456 if (status
!= AHCI_PORT_SSTS_DET_PHY
) {
1457 dprintf(V_ERR
, ("%s: device vanished!\n", ahci_portname(ps
)));
1461 ps
->state
= STATE_NO_DEV
;
1462 ps
->flags
&= ~FLAG_BUSY
;
1467 /* Clear all state flags except the busy flag, which may be relevant if
1468 * a BDEV_OPEN call is waiting for the device to become ready; the
1469 * barrier flag, which prevents access to the device until it is
1470 * completely closed and (re)opened; and, the thread suspension flag.
1472 ps
->flags
&= (FLAG_BUSY
| FLAG_BARRIER
| FLAG_SUSPENDED
);
1474 /* Check the port's signature. We only use the signature to speed up
1475 * identification; we will try both ATA and ATAPI if the signature is
1476 * neither ATA nor ATAPI.
1478 sig
= port_read(ps
, AHCI_PORT_SIG
);
1480 if (sig
== ATA_SIG_ATAPI
)
1481 ps
->flags
|= FLAG_ATAPI
;
1483 /* Attempt to identify the device. Do this using continuation, because
1484 * we may already be called from port_wait() here, and could end up
1485 * confusing the timer expiration procedure.
1487 ps
->state
= STATE_WAIT_ID
;
1488 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_MASK
);
1490 (void) gen_identify(ps
, FALSE
/*blocking*/);
1493 /*===========================================================================*
1495 *===========================================================================*/
1496 static void port_disconnect(struct port_state
*ps
)
1498 /* The device has detached from this port. It has already been stopped.
1501 dprintf(V_INFO
, ("%s: device disconnected\n", ahci_portname(ps
)));
1503 ps
->state
= STATE_NO_DEV
;
1504 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PCE
);
1505 ps
->flags
&= ~FLAG_BUSY
;
1507 /* Fail any ongoing request. The caller may already have done this. */
1510 /* Block any further access until the device is completely closed and
1511 * reopened. This prevents arbitrary I/O to a newly plugged-in device
1512 * without upper layers noticing.
1514 ps
->flags
|= FLAG_BARRIER
;
1516 /* Inform the blockdriver library to reduce the number of threads. */
1517 blockdriver_mt_set_workers(ps
->device
, 1);
1520 /*===========================================================================*
1522 *===========================================================================*/
1523 static void port_dev_check(struct port_state
*ps
)
1525 /* Perform device detection by means of polling.
1529 assert(ps
->state
== STATE_WAIT_DEV
);
1531 status
= port_read(ps
, AHCI_PORT_SSTS
) & AHCI_PORT_SSTS_DET_MASK
;
1533 dprintf(V_DEV
, ("%s: polled status %u\n", ahci_portname(ps
), status
));
1536 case AHCI_PORT_SSTS_DET_PHY
:
1537 tfd
= port_read(ps
, AHCI_PORT_TFD
);
1539 /* If a Phy connection has been established, and the BSY and
1540 * DRQ flags are cleared, the device is ready.
1542 if (!(tfd
& (AHCI_PORT_TFD_STS_BSY
| AHCI_PORT_TFD_STS_DRQ
))) {
1549 case AHCI_PORT_SSTS_DET_DET
:
1550 /* A device has been detected, but it is not ready yet. Try for
1551 * a while before giving up. This may take seconds.
1555 set_timer(&ps
->cmd_info
[0].timer
, ahci_device_delay
,
1556 port_timeout
, BUILD_ARG(ps
- port_state
, 0));
1561 dprintf(V_INFO
, ("%s: device not ready\n", ahci_portname(ps
)));
1563 /* We get here on timeout, and if the HBA reports that there is no
1564 * device present at all. In all cases, we change to another state.
1566 if (status
== AHCI_PORT_SSTS_DET_PHY
) {
1567 /* Some devices may not correctly clear BSY/DRQ. Upon timeout,
1568 * if we can override these flags, do so and start the
1569 * identification process anyway.
1571 if (hba_state
.has_clo
) {
1579 /* A device is present and initialized, but not ready. */
1580 ps
->state
= STATE_BAD_DEV
;
1581 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PRCE
);
1583 /* A device may or may not be present, but it does not appear
1584 * to be ready in any case. Ignore it until the next device
1585 * initialization event.
1587 ps
->state
= STATE_NO_DEV
;
1588 ps
->flags
&= ~FLAG_BUSY
;
1592 /*===========================================================================*
1594 *===========================================================================*/
1595 static void port_intr(struct port_state
*ps
)
1597 /* Process an interrupt on this port.
1602 if (ps
->state
== STATE_NO_PORT
) {
1603 dprintf(V_ERR
, ("%s: interrupt for invalid port!\n",
1604 ahci_portname(ps
)));
1609 smask
= port_read(ps
, AHCI_PORT_IS
);
1610 emask
= smask
& port_read(ps
, AHCI_PORT_IE
);
1612 /* Clear the interrupt flags that we saw were set. */
1613 port_write(ps
, AHCI_PORT_IS
, smask
);
1615 dprintf(V_REQ
, ("%s: interrupt (%08x)\n", ahci_portname(ps
), smask
));
1617 /* Check if any commands have completed. */
1618 port_check_cmds(ps
);
1620 if (emask
& AHCI_PORT_IS_PCS
) {
1621 /* Clear the X diagnostics bit to clear this interrupt. */
1622 port_write(ps
, AHCI_PORT_SERR
, AHCI_PORT_SERR_DIAG_X
);
1624 dprintf(V_DEV
, ("%s: device attached\n", ahci_portname(ps
)));
1626 switch (ps
->state
) {
1629 /* Reportedly, a device has shown up. Start polling its
1630 * status until it has become ready.
1633 if (ps
->state
== STATE_SPIN_UP
)
1634 cancel_timer(&ps
->cmd_info
[0].timer
);
1636 ps
->state
= STATE_WAIT_DEV
;
1637 ps
->left
= ahci_device_checks
;
1643 case STATE_WAIT_DEV
:
1644 /* Nothing else to do. */
1651 } else if (emask
& AHCI_PORT_IS_PRCS
) {
1652 /* Clear the N diagnostics bit to clear this interrupt. */
1653 port_write(ps
, AHCI_PORT_SERR
, AHCI_PORT_SERR_DIAG_N
);
1655 dprintf(V_DEV
, ("%s: device detached\n", ahci_portname(ps
)));
1657 switch (ps
->state
) {
1659 case STATE_GOOD_DEV
:
1660 /* The device is no longer ready. Stop the port, cancel
1661 * ongoing requests, and disconnect the device.
1667 port_disconnect(ps
);
1669 /* The device has become unusable to us at this point.
1670 * Reset the port to make sure that once the device (or
1671 * another device) becomes usable again, we will get a
1672 * PCS interrupt as well.
1682 } else if (smask
& AHCI_PORT_IS_MASK
) {
1683 /* We assume that any other interrupt indicates command
1684 * completion or (command or device) failure. Unfortunately, if
1685 * an NCQ command failed, we cannot easily determine which one
1686 * it was. For that reason, after completing all successfully
1687 * finished commands (above), we fail all other outstanding
1688 * commands and restart the port. This can possibly be improved
1689 * later by obtaining per-command status results from the HBA.
1692 success
= !(port_read(ps
, AHCI_PORT_TFD
) &
1693 (AHCI_PORT_TFD_STS_ERR
| AHCI_PORT_TFD_STS_DF
));
1695 /* Check now for failure. There are fatal failures, and there
1696 * are failures that set the TFD.STS.ERR field using a D2H
1697 * FIS. In both cases, we just restart the port, failing all
1698 * commands in the process.
1700 if ((port_read(ps
, AHCI_PORT_TFD
) &
1701 (AHCI_PORT_TFD_STS_ERR
| AHCI_PORT_TFD_STS_DF
)) ||
1702 (smask
& AHCI_PORT_IS_RESTART
)) {
1706 /* If we were waiting for ID verification, check now. */
1707 if (ps
->state
== STATE_WAIT_ID
)
1708 port_id_check(ps
, success
);
1712 /*===========================================================================*
1714 *===========================================================================*/
1715 static void port_timeout(int arg
)
1717 /* A timeout has occurred on this port. Figure out what the timeout is
1718 * for, and take appropriate action.
1720 struct port_state
*ps
;
1723 port
= GET_PORT(arg
);
1726 assert(port
>= 0 && port
< hba_state
.nr_ports
);
1728 ps
= &port_state
[port
];
1730 /* Regardless of the outcome of this timeout, wake up the thread if it
1731 * is suspended. This applies only during the initialization.
1733 if (ps
->flags
& FLAG_SUSPENDED
) {
1735 blockdriver_mt_wakeup(ps
->cmd_info
[0].tid
);
1738 /* If detection of a device after startup timed out, give up on initial
1739 * detection and only look for hot plug events from now on.
1741 if (ps
->state
== STATE_SPIN_UP
) {
1742 /* One exception: if the PCS interrupt bit is set here, then we
1743 * are probably running on VirtualBox, which is currently not
1744 * always raising interrupts when setting interrupt bits (!).
1746 if (port_read(ps
, AHCI_PORT_IS
) & AHCI_PORT_IS_PCS
) {
1747 dprintf(V_INFO
, ("%s: bad controller, no interrupt\n",
1748 ahci_portname(ps
)));
1750 ps
->state
= STATE_WAIT_DEV
;
1751 ps
->left
= ahci_device_checks
;
1757 dprintf(V_INFO
, ("%s: spin-up timeout\n",
1758 ahci_portname(ps
)));
1760 /* If the busy flag is set, a BDEV_OPEN request is
1761 * waiting for the detection to finish; clear the busy
1762 * flag to return an error to the caller.
1764 ps
->state
= STATE_NO_DEV
;
1765 ps
->flags
&= ~FLAG_BUSY
;
1771 /* If we are waiting for a device to become connected and initialized,
1774 if (ps
->state
== STATE_WAIT_DEV
) {
1780 dprintf(V_ERR
, ("%s: timeout\n", ahci_portname(ps
)));
1782 /* Restart the port, failing all current commands. */
1785 /* Finish up the identify operation. */
1786 if (ps
->state
== STATE_WAIT_ID
)
1787 port_id_check(ps
, FALSE
);
1790 /*===========================================================================*
1792 *===========================================================================*/
1793 static void port_wait(struct port_state
*ps
)
1795 /* Suspend the current thread until the given port is no longer busy,
1796 * due to either command completion or timeout.
1799 ps
->flags
|= FLAG_SUSPENDED
;
1801 while (ps
->flags
& FLAG_BUSY
)
1802 blockdriver_mt_sleep();
1804 ps
->flags
&= ~FLAG_SUSPENDED
;
1807 /*===========================================================================*
1809 *===========================================================================*/
1810 static void port_issue(struct port_state
*ps
, int cmd
, clock_t timeout
)
1812 /* Issue a command to the port, and set a timer to trigger a timeout
1813 * if the command takes too long to complete.
1816 /* Set the corresponding NCQ command bit, if applicable. */
1817 if (ps
->flags
& FLAG_HAS_NCQ
)
1818 port_write(ps
, AHCI_PORT_SACT
, 1 << cmd
);
1820 /* Make sure that the compiler does not delay any previous write
1821 * operations until after the write to the command issue register.
1825 /* Tell the controller that a new command is ready. */
1826 port_write(ps
, AHCI_PORT_CI
, 1 << cmd
);
1828 /* Update pending commands. */
1829 ps
->pend_mask
|= 1 << cmd
;
1831 /* Set a timer in case the command does not complete at all. */
1832 set_timer(&ps
->cmd_info
[cmd
].timer
, timeout
, port_timeout
,
1833 BUILD_ARG(ps
- port_state
, cmd
));
1836 /*===========================================================================*
1838 *===========================================================================*/
1839 static int port_exec(struct port_state
*ps
, int cmd
, clock_t timeout
)
1841 /* Execute a command on a port, wait for the command to complete or for
1842 * a timeout, and return whether the command succeeded or not.
1845 port_issue(ps
, cmd
, timeout
);
1847 /* Put the thread to sleep until a timeout or a command completion
1848 * happens. Earlier, we used to call port_wait which set the suspended
1849 * flag. We now abandon it since the flag has to work on a per-thread,
1850 * and hence per-tag basis and not on a per-port basis. Instead, we
1851 * retain that call only to defer open calls during device/driver
1852 * initialization. Instead, we call sleep here directly. Before
1853 * sleeping, we register the thread.
1855 ps
->cmd_info
[cmd
].tid
= blockdriver_mt_get_tid();
1857 blockdriver_mt_sleep();
1859 /* Cancelling a timer that just triggered, does no harm. */
1860 cancel_timer(&ps
->cmd_info
[cmd
].timer
);
1862 assert(!(ps
->flags
& FLAG_BUSY
));
1864 dprintf(V_REQ
, ("%s: end of command -- %s\n", ahci_portname(ps
),
1865 (ps
->cmd_info
[cmd
].result
== RESULT_FAILURE
) ?
1866 "failure" : "success"));
1868 if (ps
->cmd_info
[cmd
].result
== RESULT_FAILURE
)
1874 /*===========================================================================*
1876 *===========================================================================*/
1877 static void port_alloc(struct port_state
*ps
)
1879 /* Allocate memory for the given port, and enable FIS receipt. We try
1880 * to cram everything into one 4K-page in order to limit memory usage
1881 * as much as possible. More memory may be allocated on demand later,
1882 * but allocation failure should be fatal only here. Note that we do
1883 * not allocate memory for sector padding here, because we do not know
1884 * the device's sector size yet.
1886 size_t fis_off
, tmp_off
, ct_off
; int i
;
1887 size_t ct_offs
[NR_CMDS
];
1890 fis_off
= AHCI_CL_SIZE
+ AHCI_FIS_SIZE
- 1;
1891 fis_off
-= fis_off
% AHCI_FIS_SIZE
;
1893 tmp_off
= fis_off
+ AHCI_FIS_SIZE
+ AHCI_TMP_ALIGN
- 1;
1894 tmp_off
-= tmp_off
% AHCI_TMP_ALIGN
;
1896 /* Allocate memory for all the commands. */
1897 ct_off
= tmp_off
+ AHCI_TMP_SIZE
;
1898 for (i
= 0; i
< NR_CMDS
; i
++) {
1899 ct_off
+= AHCI_CT_ALIGN
- 1;
1900 ct_off
-= ct_off
% AHCI_CT_ALIGN
;
1901 ct_offs
[i
] = ct_off
;
1902 ps
->mem_size
= ct_off
+ AHCI_CT_SIZE
;
1903 ct_off
= ps
->mem_size
;
1906 ps
->mem_base
= alloc_contig(ps
->mem_size
, AC_ALIGN4K
, &ps
->mem_phys
);
1907 if (ps
->mem_base
== NULL
)
1908 panic("unable to allocate port memory");
1909 memset(ps
->mem_base
, 0, ps
->mem_size
);
1911 ps
->cl_base
= (u32_t
*) ps
->mem_base
;
1912 ps
->cl_phys
= ps
->mem_phys
;
1913 assert(ps
->cl_phys
% AHCI_CL_SIZE
== 0);
1915 ps
->fis_base
= (u32_t
*) (ps
->mem_base
+ fis_off
);
1916 ps
->fis_phys
= ps
->mem_phys
+ fis_off
;
1917 assert(ps
->fis_phys
% AHCI_FIS_SIZE
== 0);
1919 ps
->tmp_base
= (u8_t
*) (ps
->mem_base
+ tmp_off
);
1920 ps
->tmp_phys
= ps
->mem_phys
+ tmp_off
;
1921 assert(ps
->tmp_phys
% AHCI_TMP_ALIGN
== 0);
1923 for (i
= 0; i
< NR_CMDS
; i
++) {
1924 ps
->ct_base
[i
] = ps
->mem_base
+ ct_offs
[i
];
1925 ps
->ct_phys
[i
] = ps
->mem_phys
+ ct_offs
[i
];
1926 assert(ps
->ct_phys
[i
] % AHCI_CT_ALIGN
== 0);
1929 /* Tell the controller about some of the physical addresses. */
1930 port_write(ps
, AHCI_PORT_FBU
, 0);
1931 port_write(ps
, AHCI_PORT_FB
, ps
->fis_phys
);
1933 port_write(ps
, AHCI_PORT_CLBU
, 0);
1934 port_write(ps
, AHCI_PORT_CLB
, ps
->cl_phys
);
1936 /* Enable FIS receive. */
1937 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1938 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_FRE
);
1940 ps
->pad_base
= NULL
;
1944 /*===========================================================================*
1946 *===========================================================================*/
1947 static void port_free(struct port_state
*ps
)
1949 /* Disable FIS receipt for the given port, and free previously
1954 /* Disable FIS receive. */
1955 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1957 if (cmd
& (AHCI_PORT_CMD_FR
| AHCI_PORT_CMD_FRE
)) {
1958 port_write(ps
, AHCI_PORT_CMD
, cmd
& ~AHCI_PORT_CMD_FRE
);
1960 SPIN_UNTIL(!(port_read(ps
, AHCI_PORT_CMD
) & AHCI_PORT_CMD_FR
),
1964 if (ps
->pad_base
!= NULL
)
1965 free_contig(ps
->pad_base
, ps
->pad_size
);
1967 free_contig(ps
->mem_base
, ps
->mem_size
);
1970 /*===========================================================================*
1972 *===========================================================================*/
1973 static void port_init(struct port_state
*ps
)
1975 /* Initialize the given port.
1980 /* Initialize the port state structure. */
1981 ps
->queue_depth
= 1;
1982 ps
->state
= STATE_SPIN_UP
;
1983 ps
->flags
= FLAG_BUSY
;
1984 ps
->sector_size
= 0;
1987 for (i
= 0; i
< NR_CMDS
; i
++)
1988 init_timer(&ps
->cmd_info
[i
].timer
);
1990 ps
->reg
= (u32_t
*) ((u8_t
*) hba_state
.base
+
1991 AHCI_MEM_BASE_SIZE
+ AHCI_MEM_PORT_SIZE
* (ps
- port_state
));
1993 /* Allocate memory for the port. */
1996 /* Just listen for device connection events for now. */
1997 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PCE
);
1999 /* Enable device spin-up for HBAs that support staggered spin-up.
2000 * This is a no-op for HBAs that do not support it.
2002 cmd
= port_read(ps
, AHCI_PORT_CMD
);
2003 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_SUD
);
2005 /* Trigger a port reset. */
2008 set_timer(&ps
->cmd_info
[0].timer
, ahci_spinup_timeout
,
2009 port_timeout
, BUILD_ARG(ps
- port_state
, 0));
2012 /*===========================================================================*
2014 *===========================================================================*/
2015 static int ahci_probe(int skip
)
2017 /* Find a matching PCI device.
2024 r
= pci_first_dev(&devind
, &vid
, &did
);
2029 r
= pci_next_dev(&devind
, &vid
, &did
);
2034 pci_reserve(devind
);
2039 /*===========================================================================*
2041 *===========================================================================*/
2042 static void ahci_reset(void)
2044 /* Reset the HBA. Do not enable AHCI mode afterwards.
2048 ghc
= hba_read(AHCI_HBA_GHC
);
2050 hba_write(AHCI_HBA_GHC
, ghc
| AHCI_HBA_GHC_AE
);
2052 hba_write(AHCI_HBA_GHC
, ghc
| AHCI_HBA_GHC_AE
| AHCI_HBA_GHC_HR
);
2054 SPIN_UNTIL(!(hba_read(AHCI_HBA_GHC
) & AHCI_HBA_GHC_HR
), RESET_DELAY
);
2056 if (hba_read(AHCI_HBA_GHC
) & AHCI_HBA_GHC_HR
)
2057 panic("unable to reset HBA");
2060 /*===========================================================================*
2062 *===========================================================================*/
2063 static void ahci_init(int devind
)
2065 /* Initialize the device.
2067 u32_t base
, size
, cap
, ghc
, mask
;
2068 int r
, port
, ioflag
;
2070 if ((r
= pci_get_bar(devind
, PCI_BAR_6
, &base
, &size
, &ioflag
)) != OK
)
2071 panic("unable to retrieve BAR: %d", r
);
2074 panic("invalid BAR type");
2076 /* There must be at least one port, and at most NR_PORTS ports. Limit
2077 * the actual total number of ports to the size of the exposed area.
2079 if (size
< AHCI_MEM_BASE_SIZE
+ AHCI_MEM_PORT_SIZE
)
2080 panic("HBA memory size too small: %u", size
);
2082 size
= MIN(size
, AHCI_MEM_BASE_SIZE
+ AHCI_MEM_PORT_SIZE
* NR_PORTS
);
2084 hba_state
.nr_ports
= (size
- AHCI_MEM_BASE_SIZE
) / AHCI_MEM_PORT_SIZE
;
2086 /* Map the register area into local memory. */
2087 hba_state
.base
= (u32_t
*) vm_map_phys(SELF
, (void *) base
, size
);
2088 hba_state
.size
= size
;
2089 if (hba_state
.base
== MAP_FAILED
)
2090 panic("unable to map HBA memory");
2092 /* Retrieve, allocate and enable the controller's IRQ. */
2093 hba_state
.irq
= pci_attr_r8(devind
, PCI_ILR
);
2094 hba_state
.hook_id
= 0;
2096 if ((r
= sys_irqsetpolicy(hba_state
.irq
, 0, &hba_state
.hook_id
)) != OK
)
2097 panic("unable to register IRQ: %d", r
);
2099 if ((r
= sys_irqenable(&hba_state
.hook_id
)) != OK
)
2100 panic("unable to enable IRQ: %d", r
);
2102 /* Reset the HBA. */
2105 /* Enable AHCI and interrupts. */
2106 ghc
= hba_read(AHCI_HBA_GHC
);
2107 hba_write(AHCI_HBA_GHC
, ghc
| AHCI_HBA_GHC_AE
| AHCI_HBA_GHC_IE
);
2109 /* Limit the maximum number of commands to the controller's value. */
2110 /* Note that we currently use only one command anyway. */
2111 cap
= hba_read(AHCI_HBA_CAP
);
2112 hba_state
.has_ncq
= !!(cap
& AHCI_HBA_CAP_SNCQ
);
2113 hba_state
.has_clo
= !!(cap
& AHCI_HBA_CAP_SCLO
);
2114 hba_state
.nr_cmds
= MIN(NR_CMDS
,
2115 ((cap
>> AHCI_HBA_CAP_NCS_SHIFT
) & AHCI_HBA_CAP_NCS_MASK
) + 1);
2117 dprintf(V_INFO
, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, "
2118 "%s queuing, IRQ %d\n",
2120 (int) (hba_read(AHCI_HBA_VS
) >> 16),
2121 (int) ((hba_read(AHCI_HBA_VS
) >> 8) & 0xFF),
2122 (int) (hba_read(AHCI_HBA_VS
) & 0xFF),
2123 ((cap
>> AHCI_HBA_CAP_NP_SHIFT
) & AHCI_HBA_CAP_NP_MASK
) + 1,
2124 ((cap
>> AHCI_HBA_CAP_NCS_SHIFT
) & AHCI_HBA_CAP_NCS_MASK
) + 1,
2125 hba_state
.has_ncq
? "supports" : "no", hba_state
.irq
));
2127 dprintf(V_INFO
, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n",
2128 ahci_instance
, cap
, hba_read(AHCI_HBA_CAP2
),
2129 hba_read(AHCI_HBA_PI
)));
2131 /* Initialize each of the implemented ports. We ignore CAP.NP. */
2132 mask
= hba_read(AHCI_HBA_PI
);
2134 for (port
= 0; port
< hba_state
.nr_ports
; port
++) {
2135 port_state
[port
].device
= NO_DEVICE
;
2136 port_state
[port
].state
= STATE_NO_PORT
;
2138 if (mask
& (1 << port
))
2139 port_init(&port_state
[port
]);
2143 /*===========================================================================*
2145 *===========================================================================*/
2146 static void ahci_stop(void)
2148 /* Disable AHCI, and clean up resources to the extent possible.
2150 struct port_state
*ps
;
2153 for (port
= 0; port
< hba_state
.nr_ports
; port
++) {
2154 ps
= &port_state
[port
];
2156 if (ps
->state
!= STATE_NO_PORT
) {
2165 if ((r
= vm_unmap_phys(SELF
, (void *) hba_state
.base
,
2166 hba_state
.size
)) != OK
)
2167 panic("unable to unmap HBA memory: %d", r
);
2169 if ((r
= sys_irqrmpolicy(&hba_state
.hook_id
)) != OK
)
2170 panic("unable to deregister IRQ: %d", r
);
2173 /*===========================================================================*
2175 *===========================================================================*/
2176 static void ahci_alarm(clock_t stamp
)
2178 /* Process an alarm.
2181 /* Call the port-specific handler for each port that timed out. */
2182 expire_timers(stamp
);
2185 /*===========================================================================*
2187 *===========================================================================*/
2188 static void ahci_intr(unsigned int UNUSED(mask
))
2190 /* Process an interrupt.
2192 struct port_state
*ps
;
2196 /* Handle an interrupt for each port that has the interrupt bit set. */
2197 mask
= hba_read(AHCI_HBA_IS
);
2199 for (port
= 0; port
< hba_state
.nr_ports
; port
++) {
2200 if (mask
& (1 << port
)) {
2201 ps
= &port_state
[port
];
2205 /* After processing an interrupt, wake up the device
2206 * thread if it is suspended and now no longer busy.
2208 if ((ps
->flags
& (FLAG_SUSPENDED
| FLAG_BUSY
)) ==
2210 blockdriver_mt_wakeup(ps
->cmd_info
[0].tid
);
2214 /* Clear the bits that we processed. */
2215 hba_write(AHCI_HBA_IS
, mask
);
2217 /* Reenable the interrupt. */
2218 if ((r
= sys_irqenable(&hba_state
.hook_id
)) != OK
)
2219 panic("unable to enable IRQ: %d", r
);
2222 /*===========================================================================*
2224 *===========================================================================*/
2225 static void ahci_get_params(void)
2227 /* Retrieve and parse parameters passed to this driver, except the
2228 * device-to-port mapping, which has to be parsed later.
2233 /* Find out which driver instance we are. */
2235 (void) env_parse("instance", "d", 0, &v
, 0, 255);
2236 ahci_instance
= (int) v
;
2238 /* Initialize the verbosity level. */
2240 (void) env_parse("ahci_verbose", "d", 0, &v
, V_NONE
, V_REQ
);
2241 ahci_verbose
= (int) v
;
2243 /* Initialize timeout-related values. */
2244 for (i
= 0; i
< sizeof(ahci_timevar
) / sizeof(ahci_timevar
[0]); i
++) {
2245 v
= ahci_timevar
[i
].default_ms
;
2247 (void) env_parse(ahci_timevar
[i
].name
, "d", 0, &v
, 1,
2250 *ahci_timevar
[i
].ptr
= millis_to_hz(v
);
2253 ahci_device_delay
= millis_to_hz(DEVICE_DELAY
);
2254 ahci_device_checks
= (ahci_device_timeout
+ ahci_device_delay
- 1) /
2258 /*===========================================================================*
2259 * ahci_set_mapping *
2260 *===========================================================================*/
2261 static void ahci_set_mapping(void)
2263 /* Construct a mapping from device nodes to port numbers.
2265 char key
[16], val
[32], *p
;
2269 /* Start off with a mapping that includes implemented ports only, in
2270 * order. We choose this mapping over an identity mapping to maximize
2271 * the chance that the user will be able to access the first MAX_DRIVES
2272 * devices. Note that we can only do this after initializing the HBA.
2274 for (i
= j
= 0; i
< NR_PORTS
&& j
< MAX_DRIVES
; i
++)
2275 if (port_state
[i
].state
!= STATE_NO_PORT
)
2278 for ( ; j
< MAX_DRIVES
; j
++)
2279 ahci_map
[j
] = NO_PORT
;
2281 /* See if the user specified a custom mapping. Unlike all other
2282 * configuration options, this is a per-instance setting.
2284 strlcpy(key
, "ahci0_map", sizeof(key
));
2285 key
[4] += ahci_instance
;
2287 if (env_get_param(key
, val
, sizeof(val
)) == OK
) {
2288 /* Parse the mapping, which is assumed to be a comma-separated
2289 * list of zero-based port numbers.
2293 for (i
= 0; i
< MAX_DRIVES
; i
++) {
2295 port
= (unsigned int) strtoul(p
, &p
, 0);
2299 ahci_map
[i
] = port
% NR_PORTS
;
2301 else ahci_map
[i
] = NO_PORT
;
2305 /* Create a reverse mapping. */
2306 for (i
= 0; i
< MAX_DRIVES
; i
++)
2307 if ((j
= ahci_map
[i
]) != NO_PORT
)
2308 port_state
[j
].device
= i
;
2311 /*===========================================================================*
2312 * sef_cb_init_fresh *
2313 *===========================================================================*/
2314 static int sef_cb_init_fresh(int type
, sef_init_info_t
*UNUSED(info
))
2316 /* Initialize the driver.
2320 /* Get command line parameters. */
2323 /* Probe for recognized devices, skipping matches as appropriate. */
2324 devind
= ahci_probe(ahci_instance
);
2327 panic("no matching device found");
2329 /* Initialize the device we found. */
2332 /* Create a mapping from device nodes to port numbers. */
2335 /* Announce that we are up. */
2336 blockdriver_announce(type
);
2341 /*===========================================================================*
2342 * sef_cb_signal_handler *
2343 *===========================================================================*/
2344 static void sef_cb_signal_handler(int signo
)
2346 /* In case of a termination signal, shut down this driver.
2350 if (signo
!= SIGTERM
) return;
2352 /* If any ports are still opened, assume that the system is being shut
2353 * down, and stay up until the last device has been closed.
2355 ahci_exiting
= TRUE
;
2357 for (port
= 0; port
< hba_state
.nr_ports
; port
++)
2358 if (port_state
[port
].open_count
> 0)
2361 /* If not, stop the driver and exit immediately. */
2367 /*===========================================================================*
2368 * sef_local_startup *
2369 *===========================================================================*/
2370 static void sef_local_startup(void)
2372 /* Set callbacks and initialize the System Event Framework (SEF).
2375 /* Register init callbacks. */
2376 sef_setcb_init_fresh(sef_cb_init_fresh
);
2378 /* Register signal callbacks. */
2379 sef_setcb_signal_handler(sef_cb_signal_handler
);
2381 /* Enable support for live update. */
2382 blockdriver_mt_support_lu();
2384 /* Let SEF perform startup. */
2388 /*===========================================================================*
2390 *===========================================================================*/
2391 static char *ahci_portname(struct port_state
*ps
)
2393 /* Return a printable name for the given port. Whenever we can, print a
2394 * "Dx" device number rather than a "Pxx" port number, because the user
2395 * may not be aware of the mapping currently in use.
2397 static char name
[] = "AHCI0-P00";
2399 name
[4] = '0' + ahci_instance
;
2401 if (ps
->device
== NO_DEVICE
) {
2403 name
[7] = '0' + (ps
- port_state
) / 10;
2404 name
[8] = '0' + (ps
- port_state
) % 10;
2408 name
[7] = '0' + ps
->device
;
2415 /*===========================================================================*
2417 *===========================================================================*/
2418 static struct port_state
*ahci_map_minor(devminor_t minor
, struct device
**dvp
)
2420 /* Map a minor device number to a port and a pointer to the partition's
2421 * device structure. Return NULL if this minor device number does not
2422 * identify an actual device.
2424 struct port_state
*ps
;
2429 if (minor
>= 0 && minor
< NR_MINORS
) {
2430 port
= ahci_map
[minor
/ DEV_PER_DRIVE
];
2432 if (port
== NO_PORT
)
2435 ps
= &port_state
[port
];
2436 *dvp
= &ps
->part
[minor
% DEV_PER_DRIVE
];
2438 else if ((unsigned) (minor
-= MINOR_d0p0s0
) < NR_SUBDEVS
) {
2439 port
= ahci_map
[minor
/ SUB_PER_DRIVE
];
2441 if (port
== NO_PORT
)
2444 ps
= &port_state
[port
];
2445 *dvp
= &ps
->subpart
[minor
% SUB_PER_DRIVE
];
2451 /*===========================================================================*
2453 *===========================================================================*/
2454 static struct device
*ahci_part(devminor_t minor
)
2456 /* Return a pointer to the partition information structure of the given
2461 if (ahci_map_minor(minor
, &dv
) == NULL
)
2467 /*===========================================================================*
2469 *===========================================================================*/
2470 static int ahci_open(devminor_t minor
, int access
)
2474 struct port_state
*ps
;
2477 ps
= ahci_get_port(minor
);
2479 /* Only one open request can be processed at a time, due to the fact
2480 * that it is an exclusive operation. The thread that handles this call
2481 * can therefore freely register itself at slot zero.
2483 ps
->cmd_info
[0].tid
= blockdriver_mt_get_tid();
2485 /* If we are still in the process of initializing this port or device,
2486 * wait for completion of that phase first.
2488 if (ps
->flags
& FLAG_BUSY
)
2491 /* The device may only be opened if it is now properly functioning. */
2492 if (ps
->state
!= STATE_GOOD_DEV
)
2495 /* Some devices may only be opened in read-only mode. */
2496 if ((ps
->flags
& FLAG_READONLY
) && (access
& BDEV_W_BIT
))
2499 if (ps
->open_count
== 0) {
2500 /* The first open request. Clear the barrier flag, if set. */
2501 ps
->flags
&= ~FLAG_BARRIER
;
2503 /* Recheck media only when nobody is using the device. */
2504 if ((ps
->flags
& FLAG_ATAPI
) &&
2505 (r
= atapi_check_medium(ps
, 0)) != OK
)
2508 /* After rechecking the media, the partition table must always
2509 * be read. This is also a convenient time to do it for
2510 * nonremovable devices. Start by resetting the partition
2511 * tables and setting the working size of the entire device.
2513 memset(ps
->part
, 0, sizeof(ps
->part
));
2514 memset(ps
->subpart
, 0, sizeof(ps
->subpart
));
2516 ps
->part
[0].dv_size
= ps
->lba_count
* ps
->sector_size
;
2518 partition(&ahci_dtab
, ps
->device
* DEV_PER_DRIVE
, P_PRIMARY
,
2519 !!(ps
->flags
& FLAG_ATAPI
));
2521 blockdriver_mt_set_workers(ps
->device
, ps
->queue_depth
);
2524 /* If the barrier flag is set, deny new open requests until the
2525 * device is fully closed first.
2527 if (ps
->flags
& FLAG_BARRIER
)
2536 /*===========================================================================*
2538 *===========================================================================*/
2539 static int ahci_close(devminor_t minor
)
2543 struct port_state
*ps
;
2546 ps
= ahci_get_port(minor
);
2548 /* Decrease the open count. */
2549 if (ps
->open_count
<= 0) {
2550 dprintf(V_ERR
, ("%s: closing already-closed port\n",
2551 ahci_portname(ps
)));
2558 if (ps
->open_count
> 0)
2561 /* The device is now fully closed. That also means that the threads for
2562 * this device are not needed anymore, so we reduce the count to one.
2564 blockdriver_mt_set_workers(ps
->device
, 1);
2566 if (ps
->state
== STATE_GOOD_DEV
&& !(ps
->flags
& FLAG_BARRIER
)) {
2567 dprintf(V_INFO
, ("%s: flushing write cache\n",
2568 ahci_portname(ps
)));
2570 (void) gen_flush_wcache(ps
);
2573 /* If the entire driver has been told to terminate, check whether all
2574 * devices are now closed. If so, tell libblockdriver to quit after
2575 * replying to the close request.
2578 for (port
= 0; port
< hba_state
.nr_ports
; port
++)
2579 if (port_state
[port
].open_count
> 0)
2582 if (port
== hba_state
.nr_ports
) {
2585 blockdriver_mt_terminate();
2592 /*===========================================================================*
2594 *===========================================================================*/
2595 static ssize_t
ahci_transfer(devminor_t minor
, int do_write
, u64_t position
,
2596 endpoint_t endpt
, iovec_t
*iovec
, unsigned int count
, int flags
)
2598 /* Perform data transfer on the selected device.
2600 struct port_state
*ps
;
2604 ps
= ahci_get_port(minor
);
2605 dv
= ahci_part(minor
);
2607 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2610 if (count
> NR_IOREQS
)
2613 /* Check for basic end-of-partition condition: if the start position of
2614 * the request is outside the partition, return success immediately.
2615 * The size of the request is obtained, and possibly reduced, later.
2617 if (position
>= dv
->dv_size
)
2620 pos
= dv
->dv_base
+ position
;
2621 eof
= dv
->dv_base
+ dv
->dv_size
;
2623 return port_transfer(ps
, pos
, eof
, endpt
, (iovec_s_t
*) iovec
, count
,
2627 /*===========================================================================*
2629 *===========================================================================*/
2630 static int ahci_ioctl(devminor_t minor
, unsigned long request
,
2631 endpoint_t endpt
, cp_grant_id_t grant
, endpoint_t
UNUSED(user_endpt
))
2633 /* Process I/O control requests.
2635 struct port_state
*ps
;
2638 ps
= ahci_get_port(minor
);
2642 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2645 if (!(ps
->flags
& FLAG_ATAPI
))
2648 return atapi_load_eject(ps
, 0, FALSE
/*load*/);
2651 return sys_safecopyto(endpt
, grant
, 0,
2652 (vir_bytes
) &ps
->open_count
, sizeof(ps
->open_count
));
2655 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2658 return gen_flush_wcache(ps
);
2661 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2664 if ((r
= sys_safecopyfrom(endpt
, grant
, 0, (vir_bytes
) &val
,
2665 sizeof(val
))) != OK
)
2668 return gen_set_wcache(ps
, val
);
2671 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2674 if ((r
= gen_get_wcache(ps
, &val
)) != OK
)
2677 return sys_safecopyto(endpt
, grant
, 0, (vir_bytes
) &val
,
2684 /*===========================================================================*
2686 *===========================================================================*/
2687 static int ahci_device(devminor_t minor
, device_id_t
*id
)
2689 /* Map a minor device number to a device ID.
2691 struct port_state
*ps
;
2694 if ((ps
= ahci_map_minor(minor
, &dv
)) == NULL
)
2702 /*===========================================================================*
2704 *===========================================================================*/
2705 static struct port_state
*ahci_get_port(devminor_t minor
)
2707 /* Get the port structure associated with the given minor device.
2708 * Called only from worker threads, so the minor device is already
2709 * guaranteed to map to a port.
2711 struct port_state
*ps
;
2714 if ((ps
= ahci_map_minor(minor
, &dv
)) == NULL
)
2715 panic("device mapping for minor %d disappeared", minor
);
2720 /*===========================================================================*
2722 *===========================================================================*/
2723 int main(int argc
, char **argv
)
2728 env_setargs(argc
, argv
);
2729 sef_local_startup();
2731 blockdriver_mt_task(&ahci_dtab
);