etc/services - sync with NetBSD-8
[minix.git] / minix / drivers / storage / ahci / ahci.c
blobe4d2938a174ec28544a402e707e9259b39920a3a
1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek
2 * - Multithreading support by Arne Welzel
3 * - Native Command Queuing support by Raja Appuswamy
4 */
5 /*
6 * This driver is based on the following specifications:
7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3
8 * - Serial ATA Revision 2.6
9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7)
10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070)
12 * The driver supports device hot-plug, active device status tracking,
13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes,
14 * sector-unaligned reads, native command queuing and parallel requests to
15 * different devices.
17 * It does not implement transparent failure recovery, power management, or
18 * port multiplier support.
21 * An AHCI controller exposes a number of ports (up to 32), each of which may
22 * or may not have one device attached (port multipliers are not supported).
23 * Each port is maintained independently.
25 * The following figure depicts the possible transitions between port states.
26 * The NO_PORT state is not included; no transitions can be made from or to it.
28 * +----------+ +----------+
29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+
30 * +----------+ | | +----------+ |
31 * | | | ^ |
32 * v v | | |
33 * +----------+ +----------+ +----------+ +----------+ |
34 * | NO_DEV | --> | WAIT_DEV | --> | WAIT_ID | --> | GOOD_DEV | |
35 * +----------+ +----------+ +----------+ +----------+ |
36 * ^ | | | |
37 * +----------------+----------------+----------------+--------+
39 * At driver startup, all physically present ports are put in SPIN_UP state.
40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred
41 * until either the spin-up timer expires, or a device has been identified on
42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at
43 * startup time if the device has not yet been able to announce its presence.
45 * If a device is detected, either at startup time or after hot-plug, its
46 * signature is checked and it is identified, after which it may be determined
47 * to be a usable ("good") device, which means that the device is considered to
48 * be in a working state. If these steps fail, the device is marked as unusable
49 * ("bad"). At any point in time, the device may be disconnected; the port is
50 * then put back into NO_DEV state.
52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA
53 * devices are assumed to be fixed; all ATAPI devices are assumed to have
54 * removable media. To prevent erroneous access to switched devices and media,
55 * the driver makes devices inaccessible until they are fully closed (the open
56 * count is zero) when a device (hot-plug) or medium change is detected.
57 * For hot-plug changes, access is prevented by setting the BARRIER flag until
58 * the device is fully closed and then reopened. For medium changes, access is
59 * prevented by not acknowledging the medium change until the device is fully
60 * closed and reopened. Removable media are not locked in the drive while
61 * opened, because the driver author is uncomfortable with that concept.
63 * Ports may leave the group of states where a device is connected (that is,
64 * WAIT_ID, GOOD_DEV, and BAD_DEV) in two ways: either due to a hot-unplug
65 * event, or due to a hard reset after a serious failure. For simplicity, we
66 * we perform a hard reset after a hot-unplug event as well, so that the link
67 * to the device is broken. Thus, in both cases, a transition to NO_DEV is
68 * made, after which the link to the device may or may not be reestablished.
69 * In both cases, ongoing requests are cancelled and the BARRIER flag is set.
71 * The following table lists for each state, whether the port is started
72 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be
73 * set to, and what BDEV_OPEN calls on this port should return.
75 * State Started Timer PxIE BDEV_OPEN
76 * --------- --------- --------- --------- ---------
77 * NO_PORT no no (none) ENXIO
78 * SPIN_UP no yes PCE (wait)
79 * NO_DEV no no PCE ENXIO
80 * WAIT_DEV no yes PCE (wait)
81 * BAD_DEV no no PRCE ENXIO
82 * WAIT_ID yes yes PRCE+ (wait)
83 * GOOD_DEV yes per-command PRCE+ OK
85 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset
86 * when changing from SPIN_UP to any state but WAIT_DEV, and when changing from
87 * WAIT_DEV to any state but WAIT_ID, and when changing from WAIT_ID to any
88 * other state.
91 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set
92 * to 4MB. This limit has been chosen for a number of reasons:
93 * - The size that can be specified in a Physical Region Descriptor (PRD) is
94 * limited to 4MB for AHCI. Limiting the total transfer size to at most this
95 * size implies that no I/O vector element needs to be split up across PRDs.
96 * This means that the maximum number of needed PRDs can be predetermined.
97 * - The limit is below what can be transferred in a single ATA request, namely
98 * 64k sectors (i.e., at least 32MB). This means that transfer requests need
99 * never be split up into smaller chunks, reducing implementation complexity.
100 * - A single, static timeout can be used for transfers. Very large transfers
101 * can legitimately take up to several minutes -- well beyond the appropriate
102 * timeout range for small transfers. The limit obviates the need for a
103 * timeout scheme that takes into account the transfer size.
104 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious
105 * clients to keep the driver busy for a long time with a single request.
106 * - The limit is high enough for all practical purposes. The transfer setup
107 * overhead is already relatively negligible at this size, and even larger
108 * requests will not help maximize throughput. As NR_IOREQS is currently set
109 * to 64, the limit still allows file systems to perform I/O requests with
110 * vectors completely filled with 64KB-blocks.
112 #include <minix/drivers.h>
113 #include <minix/blockdriver_mt.h>
114 #include <minix/drvlib.h>
115 #include <machine/pci.h>
116 #include <sys/ioc_disk.h>
117 #include <sys/mman.h>
118 #include <assert.h>
120 #include "ahci.h"
122 /* Host Bus Adapter (HBA) state. */
123 static struct {
124 volatile u32_t *base; /* base address of memory-mapped registers */
125 size_t size; /* size of memory-mapped register area */
127 int nr_ports; /* addressable number of ports (1..NR_PORTS) */
128 int nr_cmds; /* maximum number of commands per port */
129 int has_ncq; /* NCQ support flag */
130 int has_clo; /* CLO support flag */
132 int irq; /* IRQ number */
133 int hook_id; /* IRQ hook ID */
134 } hba_state;
136 #define hba_read(r) (hba_state.base[r])
137 #define hba_write(r, v) (hba_state.base[r] = (v))
139 /* Port state. */
140 static struct port_state {
141 int state; /* port state */
142 unsigned int flags; /* port flags */
144 volatile u32_t *reg; /* memory-mapped port registers */
146 u8_t *mem_base; /* primary memory buffer virtual address */
147 phys_bytes mem_phys; /* primary memory buffer physical address */
148 vir_bytes mem_size; /* primary memory buffer size */
150 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */
151 u32_t *fis_base; /* FIS receive buffer virtual address */
152 phys_bytes fis_phys; /* FIS receive buffer physical address */
153 u32_t *cl_base; /* command list buffer virtual address */
154 phys_bytes cl_phys; /* command list buffer physical address */
155 u8_t *ct_base[NR_CMDS]; /* command table virtual address */
156 phys_bytes ct_phys[NR_CMDS]; /* command table physical address */
157 u8_t *tmp_base; /* temporary storage buffer virtual address */
158 phys_bytes tmp_phys; /* temporary storage buffer physical address */
160 u8_t *pad_base; /* sector padding buffer virtual address */
161 phys_bytes pad_phys; /* sector padding buffer physical address */
162 vir_bytes pad_size; /* sector padding buffer size */
164 u64_t lba_count; /* number of valid Logical Block Addresses */
165 u32_t sector_size; /* medium sector size in bytes */
167 int open_count; /* number of times this port is opened */
169 int device; /* associated device number, or NO_DEVICE */
170 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */
171 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */
173 minix_timer_t timer; /* port-specific timeout timer */
174 int left; /* number of tries left before giving up */
175 /* (only used for signature probing) */
177 int queue_depth; /* NCQ queue depth */
178 u32_t pend_mask; /* commands not yet complete */
179 struct {
180 thread_id_t tid;/* ID of the worker thread */
181 minix_timer_t timer; /* timer associated with each request */
182 int result; /* success/failure result of the commands */
183 } cmd_info[NR_CMDS];
184 } port_state[NR_PORTS];
186 #define port_read(ps, r) ((ps)->reg[r])
187 #define port_write(ps, r, v) ((ps)->reg[r] = (v))
189 static int ahci_instance; /* driver instance number */
191 static int ahci_verbose; /* verbosity level (0..4) */
193 /* Timeout-related values. */
194 static clock_t ahci_spinup_timeout;
195 static clock_t ahci_device_timeout;
196 static clock_t ahci_device_delay;
197 static unsigned int ahci_device_checks;
198 static clock_t ahci_command_timeout;
199 static clock_t ahci_transfer_timeout;
200 static clock_t ahci_flush_timeout;
202 /* Timeout environment variable names and default values. */
203 static struct {
204 char *name; /* environment variable name */
205 u32_t default_ms; /* default in milliseconds */
206 clock_t *ptr; /* clock ticks value pointer */
207 } ahci_timevar[] = {
208 { "ahci_init_timeout", SPINUP_TIMEOUT, &ahci_spinup_timeout },
209 { "ahci_device_timeout", DEVICE_TIMEOUT, &ahci_device_timeout },
210 { "ahci_cmd_timeout", COMMAND_TIMEOUT, &ahci_command_timeout },
211 { "ahci_io_timeout", TRANSFER_TIMEOUT, &ahci_transfer_timeout },
212 { "ahci_flush_timeout", FLUSH_TIMEOUT, &ahci_flush_timeout }
215 static int ahci_map[MAX_DRIVES]; /* device-to-port mapping */
217 static int ahci_exiting = FALSE; /* exit after last close? */
219 #define BUILD_ARG(port, tag) (((port) << 8) | (tag))
220 #define GET_PORT(arg) ((arg) >> 8)
221 #define GET_TAG(arg) ((arg) & 0xFF)
223 #define dprintf(v,s) do { \
224 if (ahci_verbose >= (v)) \
225 printf s; \
226 } while (0)
228 /* Convert milliseconds to clock ticks. Round up. */
229 #define millis_to_hz(ms) (((ms) * sys_hz() + 999) / 1000)
231 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
232 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write);
233 static void port_issue(struct port_state *ps, int cmd, clock_t timeout);
234 static int port_exec(struct port_state *ps, int cmd, clock_t timeout);
235 static void port_timeout(int arg);
236 static void port_disconnect(struct port_state *ps);
238 static char *ahci_portname(struct port_state *ps);
239 static int ahci_open(devminor_t minor, int access);
240 static int ahci_close(devminor_t minor);
241 static ssize_t ahci_transfer(devminor_t minor, int do_write, u64_t position,
242 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags);
243 static struct device *ahci_part(devminor_t minor);
244 static void ahci_alarm(clock_t stamp);
245 static int ahci_ioctl(devminor_t minor, unsigned long request,
246 endpoint_t endpt, cp_grant_id_t grant, endpoint_t user_endpt);
247 static void ahci_intr(unsigned int mask);
248 static int ahci_device(devminor_t minor, device_id_t *id);
249 static struct port_state *ahci_get_port(devminor_t minor);
251 /* AHCI driver table. */
252 static struct blockdriver ahci_dtab = {
253 .bdr_type = BLOCKDRIVER_TYPE_DISK,
254 .bdr_open = ahci_open,
255 .bdr_close = ahci_close,
256 .bdr_transfer = ahci_transfer,
257 .bdr_ioctl = ahci_ioctl,
258 .bdr_part = ahci_part,
259 .bdr_intr = ahci_intr,
260 .bdr_alarm = ahci_alarm,
261 .bdr_device = ahci_device
264 /*===========================================================================*
265 * atapi_exec *
266 *===========================================================================*/
267 static int atapi_exec(struct port_state *ps, int cmd,
268 u8_t packet[ATAPI_PACKET_SIZE], size_t size, int write)
270 /* Execute an ATAPI command. Return OK or error.
272 cmd_fis_t fis;
273 prd_t prd[1];
274 int nr_prds = 0;
276 assert(size <= AHCI_TMP_SIZE);
278 /* Fill in the command table with a FIS, a packet, and if a data
279 * transfer is requested, also a PRD.
281 memset(&fis, 0, sizeof(fis));
282 fis.cf_cmd = ATA_CMD_PACKET;
284 if (size > 0) {
285 fis.cf_feat = ATA_FEAT_PACKET_DMA;
286 if (!write && (ps->flags & FLAG_USE_DMADIR))
287 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
289 prd[0].vp_addr = ps->tmp_phys;
290 prd[0].vp_size = size;
291 nr_prds++;
294 /* Start the command, and wait for it to complete or fail. */
295 port_set_cmd(ps, cmd, &fis, packet, prd, nr_prds, write);
297 return port_exec(ps, cmd, ahci_command_timeout);
300 /*===========================================================================*
301 * atapi_test_unit *
302 *===========================================================================*/
303 static int atapi_test_unit(struct port_state *ps, int cmd)
305 /* Test whether the ATAPI device and medium are ready.
307 u8_t packet[ATAPI_PACKET_SIZE];
309 memset(packet, 0, sizeof(packet));
310 packet[0] = ATAPI_CMD_TEST_UNIT;
312 return atapi_exec(ps, cmd, packet, 0, FALSE);
315 /*===========================================================================*
316 * atapi_request_sense *
317 *===========================================================================*/
318 static int atapi_request_sense(struct port_state *ps, int cmd, int *sense)
320 /* Request error (sense) information from an ATAPI device, and return
321 * the sense key. The additional sense codes are not used at this time.
323 u8_t packet[ATAPI_PACKET_SIZE];
324 int r;
326 memset(packet, 0, sizeof(packet));
327 packet[0] = ATAPI_CMD_REQUEST_SENSE;
328 packet[4] = ATAPI_REQUEST_SENSE_LEN;
330 r = atapi_exec(ps, cmd, packet, ATAPI_REQUEST_SENSE_LEN, FALSE);
332 if (r != OK)
333 return r;
335 dprintf(V_REQ, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n",
336 ahci_portname(ps), ps->tmp_base[2] & 0xF, ps->tmp_base[12],
337 ps->tmp_base[13]));
339 *sense = ps->tmp_base[2] & 0xF;
341 return OK;
344 /*===========================================================================*
345 * atapi_load_eject *
346 *===========================================================================*/
347 static int atapi_load_eject(struct port_state *ps, int cmd, int load)
349 /* Load or eject a medium in an ATAPI device.
351 u8_t packet[ATAPI_PACKET_SIZE];
353 memset(packet, 0, sizeof(packet));
354 packet[0] = ATAPI_CMD_START_STOP;
355 packet[4] = load ? ATAPI_START_STOP_LOAD : ATAPI_START_STOP_EJECT;
357 return atapi_exec(ps, cmd, packet, 0, FALSE);
360 /*===========================================================================*
361 * atapi_read_capacity *
362 *===========================================================================*/
363 static int atapi_read_capacity(struct port_state *ps, int cmd)
365 /* Retrieve the LBA count and sector size of an ATAPI medium.
367 u8_t packet[ATAPI_PACKET_SIZE], *buf;
368 int r;
370 memset(packet, 0, sizeof(packet));
371 packet[0] = ATAPI_CMD_READ_CAPACITY;
373 r = atapi_exec(ps, cmd, packet, ATAPI_READ_CAPACITY_LEN, FALSE);
374 if (r != OK)
375 return r;
377 /* Store the number of LBA blocks and sector size. */
378 buf = ps->tmp_base;
379 ps->lba_count = (u64_t) ((buf[0] << 24) | (buf[1] << 16) |
380 (buf[2] << 8) | buf[3]) + 1;
381 ps->sector_size =
382 (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
384 if (ps->sector_size == 0 || (ps->sector_size & 1)) {
385 dprintf(V_ERR, ("%s: invalid medium sector size %u\n",
386 ahci_portname(ps), ps->sector_size));
388 return EINVAL;
391 dprintf(V_INFO,
392 ("%s: medium detected (%u byte sectors, %llu MB size)\n",
393 ahci_portname(ps), ps->sector_size,
394 ps->lba_count * ps->sector_size / (1024*1024)));
396 return OK;
399 /*===========================================================================*
400 * atapi_check_medium *
401 *===========================================================================*/
402 static int atapi_check_medium(struct port_state *ps, int cmd)
404 /* Check whether a medium is present in a removable-media ATAPI device.
405 * If a new medium is detected, get its total and sector size. Return
406 * OK only if a usable medium is present, and an error otherwise.
408 int sense;
410 /* Perform a readiness check. */
411 if (atapi_test_unit(ps, cmd) != OK) {
412 ps->flags &= ~FLAG_HAS_MEDIUM;
414 /* If the check failed due to a unit attention condition, retry
415 * reading the medium capacity. Otherwise, assume that there is
416 * no medium available.
418 if (atapi_request_sense(ps, cmd, &sense) != OK ||
419 sense != ATAPI_SENSE_UNIT_ATT)
420 return ENXIO;
423 /* If a medium is newly detected, try reading its capacity now. */
424 if (!(ps->flags & FLAG_HAS_MEDIUM)) {
425 if (atapi_read_capacity(ps, cmd) != OK)
426 return EIO;
428 ps->flags |= FLAG_HAS_MEDIUM;
431 return OK;
434 /*===========================================================================*
435 * atapi_id_check *
436 *===========================================================================*/
437 static int atapi_id_check(struct port_state *ps, u16_t *buf)
439 /* Determine whether we support this ATAPI device based on the
440 * identification data it returned, and store some of its properties.
443 /* The device must be an ATAPI device; it must have removable media;
444 * it must support DMA without DMADIR, or DMADIR for DMA.
446 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATAPI_MASK |
447 ATA_ID_GCAP_REMOVABLE | ATA_ID_GCAP_INCOMPLETE)) !=
448 (ATA_ID_GCAP_ATAPI | ATA_ID_GCAP_REMOVABLE) ||
449 ((buf[ATA_ID_CAP] & ATA_ID_CAP_DMA) != ATA_ID_CAP_DMA &&
450 (buf[ATA_ID_DMADIR] & (ATA_ID_DMADIR_DMADIR |
451 ATA_ID_DMADIR_DMA)) != (ATA_ID_DMADIR_DMADIR |
452 ATA_ID_DMADIR_DMA))) {
454 dprintf(V_ERR, ("%s: unsupported ATAPI device\n",
455 ahci_portname(ps)));
457 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x DMADIR %04x\n",
458 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
459 buf[ATA_ID_DMADIR]));
461 return FALSE;
464 /* Remember whether to use the DMADIR flag when appropriate. */
465 if (buf[ATA_ID_DMADIR] & ATA_ID_DMADIR_DMADIR)
466 ps->flags |= FLAG_USE_DMADIR;
468 /* ATAPI CD-ROM devices are considered read-only. */
469 if (((buf[ATA_ID_GCAP] & ATA_ID_GCAP_TYPE_MASK) >>
470 ATA_ID_GCAP_TYPE_SHIFT) == ATAPI_TYPE_CDROM)
471 ps->flags |= FLAG_READONLY;
473 if ((buf[ATA_ID_SUP1] & ATA_ID_SUP1_VALID_MASK) == ATA_ID_SUP1_VALID &&
474 !(ps->flags & FLAG_READONLY)) {
475 /* Save write cache related capabilities of the device. It is
476 * possible, although unlikely, that a device has support for
477 * either of these but not both.
479 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
480 ps->flags |= FLAG_HAS_WCACHE;
482 if (buf[ATA_ID_SUP1] & ATA_ID_SUP1_FLUSH)
483 ps->flags |= FLAG_HAS_FLUSH;
486 return TRUE;
489 /*===========================================================================*
490 * atapi_transfer *
491 *===========================================================================*/
492 static int atapi_transfer(struct port_state *ps, int cmd, u64_t start_lba,
493 unsigned int count, int write, prd_t *prdt, int nr_prds)
495 /* Perform data transfer from or to an ATAPI device.
497 cmd_fis_t fis;
498 u8_t packet[ATAPI_PACKET_SIZE];
500 /* Fill in a Register Host to Device FIS. */
501 memset(&fis, 0, sizeof(fis));
502 fis.cf_cmd = ATA_CMD_PACKET;
503 fis.cf_feat = ATA_FEAT_PACKET_DMA;
504 if (!write && (ps->flags & FLAG_USE_DMADIR))
505 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
507 /* Fill in a packet. */
508 memset(packet, 0, sizeof(packet));
509 packet[0] = write ? ATAPI_CMD_WRITE : ATAPI_CMD_READ;
510 packet[2] = (start_lba >> 24) & 0xFF;
511 packet[3] = (start_lba >> 16) & 0xFF;
512 packet[4] = (start_lba >> 8) & 0xFF;
513 packet[5] = start_lba & 0xFF;
514 packet[6] = (count >> 24) & 0xFF;
515 packet[7] = (count >> 16) & 0xFF;
516 packet[8] = (count >> 8) & 0xFF;
517 packet[9] = count & 0xFF;
519 /* Start the command, and wait for it to complete or fail. */
520 port_set_cmd(ps, cmd, &fis, packet, prdt, nr_prds, write);
522 return port_exec(ps, cmd, ahci_transfer_timeout);
525 /*===========================================================================*
526 * ata_id_check *
527 *===========================================================================*/
528 static int ata_id_check(struct port_state *ps, u16_t *buf)
530 /* Determine whether we support this ATA device based on the
531 * identification data it returned, and store some of its properties.
534 /* This must be an ATA device; it must not have removable media;
535 * it must support LBA and DMA; it must support the FLUSH CACHE
536 * command; it must support 48-bit addressing.
538 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATA_MASK | ATA_ID_GCAP_REMOVABLE |
539 ATA_ID_GCAP_INCOMPLETE)) != ATA_ID_GCAP_ATA ||
540 (buf[ATA_ID_CAP] & (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA)) !=
541 (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA) ||
542 (buf[ATA_ID_SUP1] & (ATA_ID_SUP1_VALID_MASK |
543 ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) !=
544 (ATA_ID_SUP1_VALID | ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) {
546 dprintf(V_ERR, ("%s: unsupported ATA device\n",
547 ahci_portname(ps)));
549 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x SUP1 %04x\n",
550 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
551 buf[ATA_ID_SUP1]));
553 return FALSE;
556 /* Get number of LBA blocks, and sector size. */
557 ps->lba_count = ((u64_t) buf[ATA_ID_LBA3] << 48) |
558 ((u64_t) buf[ATA_ID_LBA2] << 32) |
559 ((u64_t) buf[ATA_ID_LBA1] << 16) |
560 (u64_t) buf[ATA_ID_LBA0];
562 /* Determine the queue depth of the device. */
563 if (hba_state.has_ncq &&
564 (buf[ATA_ID_SATA_CAP] & ATA_ID_SATA_CAP_NCQ)) {
565 ps->flags |= FLAG_HAS_NCQ;
566 ps->queue_depth =
567 (buf[ATA_ID_QDEPTH] & ATA_ID_QDEPTH_MASK) + 1;
568 if (ps->queue_depth > hba_state.nr_cmds)
569 ps->queue_depth = hba_state.nr_cmds;
572 /* For now, we only support long logical sectors. Long physical sector
573 * support may be added later. Note that the given value is in words.
575 if ((buf[ATA_ID_PLSS] & (ATA_ID_PLSS_VALID_MASK | ATA_ID_PLSS_LLS)) ==
576 (ATA_ID_PLSS_VALID | ATA_ID_PLSS_LLS))
577 ps->sector_size =
578 ((buf[ATA_ID_LSS1] << 16) | buf[ATA_ID_LSS0]) << 1;
579 else
580 ps->sector_size = ATA_SECTOR_SIZE;
582 if (ps->sector_size < ATA_SECTOR_SIZE) {
583 dprintf(V_ERR, ("%s: invalid sector size %u\n",
584 ahci_portname(ps), ps->sector_size));
586 return FALSE;
589 ps->flags |= FLAG_HAS_MEDIUM | FLAG_HAS_FLUSH;
591 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */
592 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
593 ps->flags |= FLAG_HAS_WCACHE;
595 /* Check Force Unit Access capability of the device. */
596 if ((buf[ATA_ID_ENA2] & (ATA_ID_ENA2_VALID_MASK | ATA_ID_ENA2_FUA)) ==
597 (ATA_ID_ENA2_VALID | ATA_ID_ENA2_FUA))
598 ps->flags |= FLAG_HAS_FUA;
600 return TRUE;
603 /*===========================================================================*
604 * ata_transfer *
605 *===========================================================================*/
606 static int ata_transfer(struct port_state *ps, int cmd, u64_t start_lba,
607 unsigned int count, int write, int force, prd_t *prdt, int nr_prds)
609 /* Perform data transfer from or to an ATA device.
611 cmd_fis_t fis;
613 assert(count <= ATA_MAX_SECTORS);
615 /* Special case for sector counts: 65536 is specified as 0. */
616 if (count == ATA_MAX_SECTORS)
617 count = 0;
619 memset(&fis, 0, sizeof(fis));
620 fis.cf_dev = ATA_DEV_LBA;
621 if (ps->flags & FLAG_HAS_NCQ) {
622 if (write) {
623 if (force && (ps->flags & FLAG_HAS_FUA))
624 fis.cf_dev |= ATA_DEV_FUA;
626 fis.cf_cmd = ATA_CMD_WRITE_FPDMA_QUEUED;
627 } else {
628 fis.cf_cmd = ATA_CMD_READ_FPDMA_QUEUED;
631 else {
632 if (write) {
633 if (force && (ps->flags & FLAG_HAS_FUA))
634 fis.cf_cmd = ATA_CMD_WRITE_DMA_FUA_EXT;
635 else
636 fis.cf_cmd = ATA_CMD_WRITE_DMA_EXT;
638 else {
639 fis.cf_cmd = ATA_CMD_READ_DMA_EXT;
642 fis.cf_lba = start_lba & 0x00FFFFFFUL;
643 fis.cf_lba_exp = (start_lba >> 24) & 0x00FFFFFFUL;
644 fis.cf_sec = count & 0xFF;
645 fis.cf_sec_exp = (count >> 8) & 0xFF;
647 /* Start the command, and wait for it to complete or fail. */
648 port_set_cmd(ps, cmd, &fis, NULL /*packet*/, prdt, nr_prds, write);
650 return port_exec(ps, cmd, ahci_transfer_timeout);
653 /*===========================================================================*
654 * gen_identify *
655 *===========================================================================*/
656 static int gen_identify(struct port_state *ps, int blocking)
658 /* Identify an ATA or ATAPI device. If the blocking flag is set, block
659 * until the command has completed; otherwise return immediately.
661 cmd_fis_t fis;
662 prd_t prd;
664 /* Set up a command, and a single PRD for the result. */
665 memset(&fis, 0, sizeof(fis));
667 if (ps->flags & FLAG_ATAPI)
668 fis.cf_cmd = ATA_CMD_IDENTIFY_PACKET;
669 else
670 fis.cf_cmd = ATA_CMD_IDENTIFY;
672 prd.vp_addr = ps->tmp_phys;
673 prd.vp_size = ATA_ID_SIZE;
675 /* Start the command, and possibly wait for the result. */
676 port_set_cmd(ps, 0, &fis, NULL /*packet*/, &prd, 1, FALSE /*write*/);
678 if (blocking)
679 return port_exec(ps, 0, ahci_command_timeout);
681 port_issue(ps, 0, ahci_command_timeout);
683 return OK;
686 /*===========================================================================*
687 * gen_flush_wcache *
688 *===========================================================================*/
689 static int gen_flush_wcache(struct port_state *ps)
691 /* Flush the device's write cache.
693 cmd_fis_t fis;
695 /* The FLUSH CACHE command may not be supported by all (writable ATAPI)
696 * devices.
698 if (!(ps->flags & FLAG_HAS_FLUSH))
699 return EINVAL;
701 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not
702 * interested in the disk location of a failure, so there is no reason
703 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command
704 * may indeed fail due to a disk error, in which case it should be
705 * repeated. For now, we shift this responsibility onto the caller.
707 memset(&fis, 0, sizeof(fis));
708 fis.cf_cmd = ATA_CMD_FLUSH_CACHE;
710 /* Start the command, and wait for it to complete or fail.
711 * The flush command may take longer than regular I/O commands.
713 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
714 FALSE /*write*/);
716 return port_exec(ps, 0, ahci_flush_timeout);
719 /*===========================================================================*
720 * gen_get_wcache *
721 *===========================================================================*/
722 static int gen_get_wcache(struct port_state *ps, int *val)
724 /* Retrieve the status of the device's write cache.
726 int r;
728 /* Write caches are not mandatory. */
729 if (!(ps->flags & FLAG_HAS_WCACHE))
730 return EINVAL;
732 /* Retrieve information about the device. */
733 if ((r = gen_identify(ps, TRUE /*blocking*/)) != OK)
734 return r;
736 /* Return the current setting. */
737 *val = !!(((u16_t *) ps->tmp_base)[ATA_ID_ENA0] & ATA_ID_ENA0_WCACHE);
739 return OK;
742 /*===========================================================================*
743 * gen_set_wcache *
744 *===========================================================================*/
745 static int gen_set_wcache(struct port_state *ps, int enable)
747 /* Enable or disable the device's write cache.
749 cmd_fis_t fis;
750 clock_t timeout;
752 /* Write caches are not mandatory. */
753 if (!(ps->flags & FLAG_HAS_WCACHE))
754 return EINVAL;
756 /* Disabling the write cache causes a (blocking) cache flush. Cache
757 * flushes may take much longer than regular commands.
759 timeout = enable ? ahci_command_timeout : ahci_flush_timeout;
761 /* Set up a command. */
762 memset(&fis, 0, sizeof(fis));
763 fis.cf_cmd = ATA_CMD_SET_FEATURES;
764 fis.cf_feat = enable ? ATA_SF_EN_WCACHE : ATA_SF_DI_WCACHE;
766 /* Start the command, and wait for it to complete or fail. */
767 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
768 FALSE /*write*/);
770 return port_exec(ps, 0, timeout);
773 /*===========================================================================*
774 * ct_set_fis *
775 *===========================================================================*/
776 static vir_bytes ct_set_fis(u8_t *ct, cmd_fis_t *fis, unsigned int tag)
778 /* Fill in the Frame Information Structure part of a command table,
779 * and return the resulting FIS size (in bytes). We only support the
780 * command Register - Host to Device FIS type.
783 memset(ct, 0, ATA_H2D_SIZE);
784 ct[ATA_FIS_TYPE] = ATA_FIS_TYPE_H2D;
785 ct[ATA_H2D_FLAGS] = ATA_H2D_FLAGS_C;
786 ct[ATA_H2D_CMD] = fis->cf_cmd;
787 ct[ATA_H2D_LBA_LOW] = fis->cf_lba & 0xFF;
788 ct[ATA_H2D_LBA_MID] = (fis->cf_lba >> 8) & 0xFF;
789 ct[ATA_H2D_LBA_HIGH] = (fis->cf_lba >> 16) & 0xFF;
790 ct[ATA_H2D_DEV] = fis->cf_dev;
791 ct[ATA_H2D_LBA_LOW_EXP] = fis->cf_lba_exp & 0xFF;
792 ct[ATA_H2D_LBA_MID_EXP] = (fis->cf_lba_exp >> 8) & 0xFF;
793 ct[ATA_H2D_LBA_HIGH_EXP] = (fis->cf_lba_exp >> 16) & 0xFF;
794 ct[ATA_H2D_CTL] = fis->cf_ctl;
796 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
797 ct[ATA_H2D_FEAT] = fis->cf_sec;
798 ct[ATA_H2D_FEAT_EXP] = fis->cf_sec_exp;
799 ct[ATA_H2D_SEC] = tag << ATA_SEC_TAG_SHIFT;
800 ct[ATA_H2D_SEC_EXP] = 0;
801 } else {
802 ct[ATA_H2D_FEAT] = fis->cf_feat;
803 ct[ATA_H2D_FEAT_EXP] = fis->cf_feat_exp;
804 ct[ATA_H2D_SEC] = fis->cf_sec;
805 ct[ATA_H2D_SEC_EXP] = fis->cf_sec_exp;
808 return ATA_H2D_SIZE;
811 /*===========================================================================*
812 * ct_set_packet *
813 *===========================================================================*/
814 static void ct_set_packet(u8_t *ct, u8_t packet[ATAPI_PACKET_SIZE])
816 /* Fill in the packet part of a command table.
819 memcpy(&ct[AHCI_CT_PACKET_OFF], packet, ATAPI_PACKET_SIZE);
822 /*===========================================================================*
823 * ct_set_prdt *
824 *===========================================================================*/
825 static void ct_set_prdt(u8_t *ct, prd_t *prdt, int nr_prds)
827 /* Fill in the PRDT part of a command table.
829 u32_t *p;
830 int i;
832 p = (u32_t *) &ct[AHCI_CT_PRDT_OFF];
834 for (i = 0; i < nr_prds; i++, prdt++) {
835 *p++ = prdt->vp_addr;
836 *p++ = 0;
837 *p++ = 0;
838 *p++ = prdt->vp_size - 1;
842 /*===========================================================================*
843 * port_set_cmd *
844 *===========================================================================*/
845 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
846 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write)
848 /* Prepare the given command for execution, by constructing a command
849 * table and setting up a command list entry pointing to the table.
851 u8_t *ct;
852 u32_t *cl;
853 vir_bytes size;
855 /* Set a port-specific flag that tells us if the command being
856 * processed is a NCQ command or not.
858 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
859 ps->flags |= FLAG_NCQ_MODE;
860 } else {
861 assert(!ps->pend_mask);
862 ps->flags &= ~FLAG_NCQ_MODE;
865 /* Construct a command table, consisting of a command FIS, optionally
866 * a packet, and optionally a number of PRDs (making up the actual PRD
867 * table).
869 ct = ps->ct_base[cmd];
871 assert(ct != NULL);
872 assert(nr_prds <= NR_PRDS);
874 size = ct_set_fis(ct, fis, cmd);
876 if (packet != NULL)
877 ct_set_packet(ct, packet);
879 ct_set_prdt(ct, prdt, nr_prds);
881 /* Construct a command list entry, pointing to the command's table.
882 * Current assumptions: callers always provide a Register - Host to
883 * Device type FIS, and all non-NCQ commands are prefetchable.
885 cl = &ps->cl_base[cmd * AHCI_CL_ENTRY_DWORDS];
887 memset(cl, 0, AHCI_CL_ENTRY_SIZE);
888 cl[0] = (nr_prds << AHCI_CL_PRDTL_SHIFT) |
889 ((!ATA_IS_FPDMA_CMD(fis->cf_cmd) &&
890 (nr_prds > 0 || packet != NULL)) ? AHCI_CL_PREFETCHABLE : 0) |
891 (write ? AHCI_CL_WRITE : 0) |
892 ((packet != NULL) ? AHCI_CL_ATAPI : 0) |
893 ((size / sizeof(u32_t)) << AHCI_CL_CFL_SHIFT);
894 cl[2] = ps->ct_phys[cmd];
897 /*===========================================================================*
898 * port_finish_cmd *
899 *===========================================================================*/
900 static void port_finish_cmd(struct port_state *ps, int cmd, int result)
902 /* Finish a command that has either succeeded or failed.
905 assert(cmd < ps->queue_depth);
907 dprintf(V_REQ, ("%s: command %d %s\n", ahci_portname(ps),
908 cmd, (result == RESULT_SUCCESS) ? "succeeded" : "failed"));
910 /* Update the command result, and clear it from the pending list. */
911 ps->cmd_info[cmd].result = result;
913 assert(ps->pend_mask & (1 << cmd));
914 ps->pend_mask &= ~(1 << cmd);
916 /* Wake up the thread, unless it is the main thread. This can happen
917 * during initialization, as the gen_identify function is called by the
918 * main thread itself.
920 if (ps->state != STATE_WAIT_ID)
921 blockdriver_mt_wakeup(ps->cmd_info[cmd].tid);
924 /*===========================================================================*
925 * port_fail_cmds *
926 *===========================================================================*/
927 static void port_fail_cmds(struct port_state *ps)
929 /* Fail all ongoing commands for a device.
931 int i;
933 for (i = 0; ps->pend_mask != 0 && i < ps->queue_depth; i++)
934 if (ps->pend_mask & (1 << i))
935 port_finish_cmd(ps, i, RESULT_FAILURE);
938 /*===========================================================================*
939 * port_check_cmds *
940 *===========================================================================*/
941 static void port_check_cmds(struct port_state *ps)
943 /* Check what commands have completed, and finish them.
945 u32_t mask, done;
946 int i;
948 /* See which commands have completed. */
949 if (ps->flags & FLAG_NCQ_MODE)
950 mask = port_read(ps, AHCI_PORT_SACT);
951 else
952 mask = port_read(ps, AHCI_PORT_CI);
954 /* Wake up threads corresponding to completed commands. */
955 done = ps->pend_mask & ~mask;
957 for (i = 0; i < ps->queue_depth; i++)
958 if (done & (1 << i))
959 port_finish_cmd(ps, i, RESULT_SUCCESS);
962 /*===========================================================================*
963 * port_find_cmd *
964 *===========================================================================*/
965 static int port_find_cmd(struct port_state *ps)
967 /* Find a free command tag to queue the current request.
969 int i;
971 for (i = 0; i < ps->queue_depth; i++)
972 if (!(ps->pend_mask & (1 << i)))
973 break;
975 /* We should always be able to find a free slot, since a thread runs
976 * only when it is free, and thus, only because a slot is available.
978 assert(i < ps->queue_depth);
980 return i;
983 /*===========================================================================*
984 * port_get_padbuf *
985 *===========================================================================*/
986 static int port_get_padbuf(struct port_state *ps, size_t size)
988 /* Make available a temporary buffer for use by this port. Enlarge the
989 * previous buffer if applicable and necessary, potentially changing
990 * its physical address.
993 if (ps->pad_base != NULL && ps->pad_size >= size)
994 return OK;
996 if (ps->pad_base != NULL)
997 free_contig(ps->pad_base, ps->pad_size);
999 ps->pad_size = size;
1000 ps->pad_base = alloc_contig(ps->pad_size, 0, &ps->pad_phys);
1002 if (ps->pad_base == NULL) {
1003 dprintf(V_ERR, ("%s: unable to allocate a padding buffer of "
1004 "size %lu\n", ahci_portname(ps),
1005 (unsigned long) size));
1007 return ENOMEM;
1010 dprintf(V_INFO, ("%s: allocated padding buffer of size %lu\n",
1011 ahci_portname(ps), (unsigned long) size));
1013 return OK;
1016 /*===========================================================================*
1017 * sum_iovec *
1018 *===========================================================================*/
1019 static int sum_iovec(struct port_state *ps, endpoint_t endpt,
1020 iovec_s_t *iovec, int nr_req, vir_bytes *total)
1022 /* Retrieve the total size of the given I/O vector. Check for alignment
1023 * requirements along the way. Return OK (and the total request size)
1024 * or an error.
1026 vir_bytes size, bytes;
1027 int i;
1029 bytes = 0;
1031 for (i = 0; i < nr_req; i++) {
1032 size = iovec[i].iov_size;
1034 if (size == 0 || (size & 1) || size > LONG_MAX) {
1035 dprintf(V_ERR, ("%s: bad size %lu in iovec from %d\n",
1036 ahci_portname(ps), size, endpt));
1037 return EINVAL;
1040 bytes += size;
1042 if (bytes > LONG_MAX) {
1043 dprintf(V_ERR, ("%s: iovec size overflow from %d\n",
1044 ahci_portname(ps), endpt));
1045 return EINVAL;
1049 *total = bytes;
1050 return OK;
1053 /*===========================================================================*
1054 * setup_prdt *
1055 *===========================================================================*/
1056 static int setup_prdt(struct port_state *ps, endpoint_t endpt,
1057 iovec_s_t *iovec, int nr_req, vir_bytes size, vir_bytes lead,
1058 int write, prd_t *prdt)
1060 /* Convert (the first part of) an I/O vector to a Physical Region
1061 * Descriptor Table describing array that can later be used to set the
1062 * command's real PRDT. The resulting table as a whole should be
1063 * sector-aligned; leading and trailing local buffers may have to be
1064 * used for padding as appropriate. Return the number of PRD entries,
1065 * or a negative error code.
1067 struct vumap_vir vvec[NR_PRDS];
1068 size_t bytes, trail;
1069 int i, r, pcount, nr_prds = 0;
1071 if (lead > 0) {
1072 /* Allocate a buffer for the data we don't want. */
1073 if ((r = port_get_padbuf(ps, ps->sector_size)) != OK)
1074 return r;
1076 prdt[nr_prds].vp_addr = ps->pad_phys;
1077 prdt[nr_prds].vp_size = lead;
1078 nr_prds++;
1081 /* The sum of lead, size, trail has to be sector-aligned. */
1082 trail = (ps->sector_size - (lead + size)) % ps->sector_size;
1084 /* Get the physical addresses of the given buffers. */
1085 for (i = 0; i < nr_req && size > 0; i++) {
1086 bytes = MIN(iovec[i].iov_size, size);
1088 if (endpt == SELF)
1089 vvec[i].vv_addr = (vir_bytes) iovec[i].iov_grant;
1090 else
1091 vvec[i].vv_grant = iovec[i].iov_grant;
1093 vvec[i].vv_size = bytes;
1095 size -= bytes;
1098 pcount = i;
1100 if ((r = sys_vumap(endpt, vvec, i, 0, write ? VUA_READ : VUA_WRITE,
1101 &prdt[nr_prds], &pcount)) != OK) {
1102 dprintf(V_ERR, ("%s: unable to map memory from %d (%d)\n",
1103 ahci_portname(ps), endpt, r));
1104 return r;
1107 assert(pcount > 0 && pcount <= i);
1109 /* Make sure all buffers are physically contiguous and word-aligned. */
1110 for (i = 0; i < pcount; i++) {
1111 if (vvec[i].vv_size != prdt[nr_prds].vp_size) {
1112 dprintf(V_ERR, ("%s: non-contiguous memory from %d\n",
1113 ahci_portname(ps), endpt));
1114 return EINVAL;
1117 if (prdt[nr_prds].vp_addr & 1) {
1118 dprintf(V_ERR, ("%s: bad physical address from %d\n",
1119 ahci_portname(ps), endpt));
1120 return EINVAL;
1123 nr_prds++;
1126 if (trail > 0) {
1127 assert(nr_prds < NR_PRDS);
1128 prdt[nr_prds].vp_addr = ps->pad_phys + lead;
1129 prdt[nr_prds].vp_size = trail;
1130 nr_prds++;
1133 return nr_prds;
1136 /*===========================================================================*
1137 * port_transfer *
1138 *===========================================================================*/
1139 static ssize_t port_transfer(struct port_state *ps, u64_t pos, u64_t eof,
1140 endpoint_t endpt, iovec_s_t *iovec, int nr_req, int write, int flags)
1142 /* Perform an I/O transfer on a port.
1144 prd_t prdt[NR_PRDS];
1145 vir_bytes size, lead;
1146 unsigned int count, nr_prds;
1147 u64_t start_lba;
1148 int r, cmd;
1150 /* Get the total request size from the I/O vector. */
1151 if ((r = sum_iovec(ps, endpt, iovec, nr_req, &size)) != OK)
1152 return r;
1154 dprintf(V_REQ, ("%s: %s for %lu bytes at pos %llx\n",
1155 ahci_portname(ps), write ? "write" : "read", size, pos));
1157 assert(ps->state == STATE_GOOD_DEV);
1158 assert(ps->flags & FLAG_HAS_MEDIUM);
1159 assert(ps->sector_size > 0);
1161 /* Limit the maximum size of a single transfer.
1162 * See the comments at the top of this file for details.
1164 if (size > MAX_TRANSFER)
1165 size = MAX_TRANSFER;
1167 /* If necessary, reduce the request size so that the request does not
1168 * extend beyond the end of the partition. The caller already
1169 * guarantees that the starting position lies within the partition.
1171 if (pos + size > eof)
1172 size = (vir_bytes) (eof - pos);
1174 start_lba = pos / ps->sector_size;
1175 lead = (vir_bytes) (pos % ps->sector_size);
1176 count = (lead + size + ps->sector_size - 1) / ps->sector_size;
1178 /* Position must be word-aligned for read requests, and sector-aligned
1179 * for write requests. We do not support read-modify-write for writes.
1181 if ((lead & 1) || (write && lead != 0)) {
1182 dprintf(V_ERR, ("%s: unaligned position from %d\n",
1183 ahci_portname(ps), endpt));
1184 return EINVAL;
1187 /* Write requests must be sector-aligned. Word alignment of the size is
1188 * already guaranteed by sum_iovec().
1190 if (write && (size % ps->sector_size) != 0) {
1191 dprintf(V_ERR, ("%s: unaligned size %lu from %d\n",
1192 ahci_portname(ps), size, endpt));
1193 return EINVAL;
1196 /* Create a vector of physical addresses and sizes for the transfer. */
1197 nr_prds = r = setup_prdt(ps, endpt, iovec, nr_req, size, lead, write,
1198 prdt);
1200 if (r < 0) return r;
1202 /* Perform the actual transfer. */
1203 cmd = port_find_cmd(ps);
1205 if (ps->flags & FLAG_ATAPI)
1206 r = atapi_transfer(ps, cmd, start_lba, count, write, prdt,
1207 nr_prds);
1208 else
1209 r = ata_transfer(ps, cmd, start_lba, count, write,
1210 !!(flags & BDEV_FORCEWRITE), prdt, nr_prds);
1212 if (r != OK) return r;
1214 return size;
1217 /*===========================================================================*
1218 * port_hardreset *
1219 *===========================================================================*/
1220 static void port_hardreset(struct port_state *ps)
1222 /* Perform a port-level (hard) reset on the given port.
1225 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_INIT);
1227 micro_delay(COMRESET_DELAY * 1000); /* COMRESET_DELAY is in ms */
1229 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_NONE);
1232 /*===========================================================================*
1233 * port_override *
1234 *===========================================================================*/
1235 static void port_override(struct port_state *ps)
1237 /* Override the port's BSY and/or DRQ flags. This may only be done
1238 * prior to starting the port.
1240 u32_t cmd;
1242 cmd = port_read(ps, AHCI_PORT_CMD);
1243 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_CLO);
1245 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CLO),
1246 PORTREG_DELAY);
1248 dprintf(V_INFO, ("%s: overridden\n", ahci_portname(ps)));
1251 /*===========================================================================*
1252 * port_start *
1253 *===========================================================================*/
1254 static void port_start(struct port_state *ps)
1256 /* Start the given port, allowing for the execution of commands and the
1257 * transfer of data on that port.
1259 u32_t cmd;
1261 /* Reset status registers. */
1262 port_write(ps, AHCI_PORT_SERR, ~0);
1263 port_write(ps, AHCI_PORT_IS, ~0);
1265 /* Start the port. */
1266 cmd = port_read(ps, AHCI_PORT_CMD);
1267 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_ST);
1269 dprintf(V_INFO, ("%s: started\n", ahci_portname(ps)));
1272 /*===========================================================================*
1273 * port_stop *
1274 *===========================================================================*/
1275 static void port_stop(struct port_state *ps)
1277 /* Stop the given port, if not already stopped.
1279 u32_t cmd;
1281 cmd = port_read(ps, AHCI_PORT_CMD);
1283 if (cmd & (AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST)) {
1284 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_ST);
1286 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CR),
1287 PORTREG_DELAY);
1289 dprintf(V_INFO, ("%s: stopped\n", ahci_portname(ps)));
1293 /*===========================================================================*
1294 * port_restart *
1295 *===========================================================================*/
1296 static void port_restart(struct port_state *ps)
1298 /* Restart a port after a fatal error has occurred.
1301 /* Fail all outstanding commands. */
1302 port_fail_cmds(ps);
1304 /* Stop the port. */
1305 port_stop(ps);
1307 /* If the BSY and/or DRQ flags are set, reset the port. */
1308 if (port_read(ps, AHCI_PORT_TFD) &
1309 (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ)) {
1311 dprintf(V_ERR, ("%s: port reset\n", ahci_portname(ps)));
1313 /* To keep this driver simple, we do not transparently recover
1314 * ongoing requests. Instead, we mark the failing device as
1315 * disconnected, and reset it. If the reset succeeds, the
1316 * device (or, perhaps, eventually, another device) will come
1317 * back up. Any current and future requests to this port will
1318 * be failed until the port is fully closed and reopened.
1320 port_disconnect(ps);
1322 /* Trigger a port reset. */
1323 port_hardreset(ps);
1325 return;
1328 /* Start the port. */
1329 port_start(ps);
1332 /*===========================================================================*
1333 * print_string *
1334 *===========================================================================*/
1335 static void print_string(u16_t *buf, int start, int end)
1337 /* Print a string that is stored as little-endian words and padded with
1338 * trailing spaces.
1340 int i, last = 0;
1342 while (end >= start && buf[end] == 0x2020) end--;
1344 if (end >= start && (buf[end] & 0xFF) == 0x20) end--, last++;
1346 for (i = start; i <= end; i++)
1347 printf("%c%c", buf[i] >> 8, buf[i] & 0xFF);
1349 if (last)
1350 printf("%c", buf[i] >> 8);
1353 /*===========================================================================*
1354 * port_id_check *
1355 *===========================================================================*/
1356 static void port_id_check(struct port_state *ps, int success)
1358 /* The device identification command has either completed or timed out.
1359 * Decide whether this device is usable or not, and store some of its
1360 * properties.
1362 u16_t *buf;
1364 assert(ps->state == STATE_WAIT_ID);
1366 ps->flags &= ~FLAG_BUSY;
1367 cancel_timer(&ps->cmd_info[0].timer);
1369 if (!success) {
1370 if (!(ps->flags & FLAG_ATAPI) &&
1371 port_read(ps, AHCI_PORT_SIG) != ATA_SIG_ATA) {
1372 dprintf(V_INFO, ("%s: may not be ATA, trying ATAPI\n",
1373 ahci_portname(ps)));
1375 ps->flags |= FLAG_ATAPI;
1377 (void) gen_identify(ps, FALSE /*blocking*/);
1378 return;
1381 dprintf(V_ERR,
1382 ("%s: unable to identify\n", ahci_portname(ps)));
1385 /* If the identify command itself succeeded, check the results and
1386 * store some properties.
1388 if (success) {
1389 buf = (u16_t *) ps->tmp_base;
1391 if (ps->flags & FLAG_ATAPI)
1392 success = atapi_id_check(ps, buf);
1393 else
1394 success = ata_id_check(ps, buf);
1397 /* If the device has not been identified successfully, mark it as an
1398 * unusable device.
1400 if (!success) {
1401 port_stop(ps);
1403 ps->state = STATE_BAD_DEV;
1404 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1406 return;
1409 /* The device has been identified successfully, and hence usable. */
1410 ps->state = STATE_GOOD_DEV;
1412 /* Print some information about the device. */
1413 if (ahci_verbose >= V_INFO) {
1414 printf("%s: ATA%s, ", ahci_portname(ps),
1415 (ps->flags & FLAG_ATAPI) ? "PI" : "");
1416 print_string(buf, 27, 46);
1417 if (ahci_verbose >= V_DEV) {
1418 printf(" (");
1419 print_string(buf, 10, 19);
1420 printf(", ");
1421 print_string(buf, 23, 26);
1422 printf(")");
1425 if (ps->flags & FLAG_HAS_MEDIUM)
1426 printf(", %u byte sectors, %llu MB size",
1427 ps->sector_size,
1428 ps->lba_count * ps->sector_size / (1024*1024));
1430 printf("\n");
1434 /*===========================================================================*
1435 * port_connect *
1436 *===========================================================================*/
1437 static void port_connect(struct port_state *ps)
1439 /* A device has been found to be attached to this port. Start the port,
1440 * and do timed polling for its signature to become available.
1442 u32_t status, sig;
1444 dprintf(V_INFO, ("%s: device connected\n", ahci_portname(ps)));
1446 port_start(ps);
1448 /* The next check covers a purely hypothetical race condition, where
1449 * the device would disappear right before we try to start it. This is
1450 * possible because we have to clear PxSERR, and with that, the DIAG.N
1451 * bit. Double-check the port status, and if it is not as we expect,
1452 * infer a disconnection.
1454 status = port_read(ps, AHCI_PORT_SSTS) & AHCI_PORT_SSTS_DET_MASK;
1456 if (status != AHCI_PORT_SSTS_DET_PHY) {
1457 dprintf(V_ERR, ("%s: device vanished!\n", ahci_portname(ps)));
1459 port_stop(ps);
1461 ps->state = STATE_NO_DEV;
1462 ps->flags &= ~FLAG_BUSY;
1464 return;
1467 /* Clear all state flags except the busy flag, which may be relevant if
1468 * a BDEV_OPEN call is waiting for the device to become ready; the
1469 * barrier flag, which prevents access to the device until it is
1470 * completely closed and (re)opened; and, the thread suspension flag.
1472 ps->flags &= (FLAG_BUSY | FLAG_BARRIER | FLAG_SUSPENDED);
1474 /* Check the port's signature. We only use the signature to speed up
1475 * identification; we will try both ATA and ATAPI if the signature is
1476 * neither ATA nor ATAPI.
1478 sig = port_read(ps, AHCI_PORT_SIG);
1480 if (sig == ATA_SIG_ATAPI)
1481 ps->flags |= FLAG_ATAPI;
1483 /* Attempt to identify the device. Do this using continuation, because
1484 * we may already be called from port_wait() here, and could end up
1485 * confusing the timer expiration procedure.
1487 ps->state = STATE_WAIT_ID;
1488 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_MASK);
1490 (void) gen_identify(ps, FALSE /*blocking*/);
1493 /*===========================================================================*
1494 * port_disconnect *
1495 *===========================================================================*/
1496 static void port_disconnect(struct port_state *ps)
1498 /* The device has detached from this port. It has already been stopped.
1501 dprintf(V_INFO, ("%s: device disconnected\n", ahci_portname(ps)));
1503 ps->state = STATE_NO_DEV;
1504 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PCE);
1505 ps->flags &= ~FLAG_BUSY;
1507 /* Fail any ongoing request. The caller may already have done this. */
1508 port_fail_cmds(ps);
1510 /* Block any further access until the device is completely closed and
1511 * reopened. This prevents arbitrary I/O to a newly plugged-in device
1512 * without upper layers noticing.
1514 ps->flags |= FLAG_BARRIER;
1516 /* Inform the blockdriver library to reduce the number of threads. */
1517 blockdriver_mt_set_workers(ps->device, 1);
1520 /*===========================================================================*
1521 * port_dev_check *
1522 *===========================================================================*/
1523 static void port_dev_check(struct port_state *ps)
1525 /* Perform device detection by means of polling.
1527 u32_t status, tfd;
1529 assert(ps->state == STATE_WAIT_DEV);
1531 status = port_read(ps, AHCI_PORT_SSTS) & AHCI_PORT_SSTS_DET_MASK;
1533 dprintf(V_DEV, ("%s: polled status %u\n", ahci_portname(ps), status));
1535 switch (status) {
1536 case AHCI_PORT_SSTS_DET_PHY:
1537 tfd = port_read(ps, AHCI_PORT_TFD);
1539 /* If a Phy connection has been established, and the BSY and
1540 * DRQ flags are cleared, the device is ready.
1542 if (!(tfd & (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ))) {
1543 port_connect(ps);
1545 return;
1548 /* fall-through */
1549 case AHCI_PORT_SSTS_DET_DET:
1550 /* A device has been detected, but it is not ready yet. Try for
1551 * a while before giving up. This may take seconds.
1553 if (ps->left > 0) {
1554 ps->left--;
1555 set_timer(&ps->cmd_info[0].timer, ahci_device_delay,
1556 port_timeout, BUILD_ARG(ps - port_state, 0));
1557 return;
1561 dprintf(V_INFO, ("%s: device not ready\n", ahci_portname(ps)));
1563 /* We get here on timeout, and if the HBA reports that there is no
1564 * device present at all. In all cases, we change to another state.
1566 if (status == AHCI_PORT_SSTS_DET_PHY) {
1567 /* Some devices may not correctly clear BSY/DRQ. Upon timeout,
1568 * if we can override these flags, do so and start the
1569 * identification process anyway.
1571 if (hba_state.has_clo) {
1572 port_override(ps);
1574 port_connect(ps);
1576 return;
1579 /* A device is present and initialized, but not ready. */
1580 ps->state = STATE_BAD_DEV;
1581 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1582 } else {
1583 /* A device may or may not be present, but it does not appear
1584 * to be ready in any case. Ignore it until the next device
1585 * initialization event.
1587 ps->state = STATE_NO_DEV;
1588 ps->flags &= ~FLAG_BUSY;
1592 /*===========================================================================*
1593 * port_intr *
1594 *===========================================================================*/
1595 static void port_intr(struct port_state *ps)
1597 /* Process an interrupt on this port.
1599 u32_t smask, emask;
1600 int success;
1602 if (ps->state == STATE_NO_PORT) {
1603 dprintf(V_ERR, ("%s: interrupt for invalid port!\n",
1604 ahci_portname(ps)));
1606 return;
1609 smask = port_read(ps, AHCI_PORT_IS);
1610 emask = smask & port_read(ps, AHCI_PORT_IE);
1612 /* Clear the interrupt flags that we saw were set. */
1613 port_write(ps, AHCI_PORT_IS, smask);
1615 dprintf(V_REQ, ("%s: interrupt (%08x)\n", ahci_portname(ps), smask));
1617 /* Check if any commands have completed. */
1618 port_check_cmds(ps);
1620 if (emask & AHCI_PORT_IS_PCS) {
1621 /* Clear the X diagnostics bit to clear this interrupt. */
1622 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_X);
1624 dprintf(V_DEV, ("%s: device attached\n", ahci_portname(ps)));
1626 switch (ps->state) {
1627 case STATE_SPIN_UP:
1628 case STATE_NO_DEV:
1629 /* Reportedly, a device has shown up. Start polling its
1630 * status until it has become ready.
1633 if (ps->state == STATE_SPIN_UP)
1634 cancel_timer(&ps->cmd_info[0].timer);
1636 ps->state = STATE_WAIT_DEV;
1637 ps->left = ahci_device_checks;
1639 port_dev_check(ps);
1641 break;
1643 case STATE_WAIT_DEV:
1644 /* Nothing else to do. */
1645 break;
1647 default:
1648 /* Impossible. */
1649 assert(0);
1651 } else if (emask & AHCI_PORT_IS_PRCS) {
1652 /* Clear the N diagnostics bit to clear this interrupt. */
1653 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_N);
1655 dprintf(V_DEV, ("%s: device detached\n", ahci_portname(ps)));
1657 switch (ps->state) {
1658 case STATE_WAIT_ID:
1659 case STATE_GOOD_DEV:
1660 /* The device is no longer ready. Stop the port, cancel
1661 * ongoing requests, and disconnect the device.
1663 port_stop(ps);
1665 /* fall-through */
1666 case STATE_BAD_DEV:
1667 port_disconnect(ps);
1669 /* The device has become unusable to us at this point.
1670 * Reset the port to make sure that once the device (or
1671 * another device) becomes usable again, we will get a
1672 * PCS interrupt as well.
1674 port_hardreset(ps);
1676 break;
1678 default:
1679 /* Impossible. */
1680 assert(0);
1682 } else if (smask & AHCI_PORT_IS_MASK) {
1683 /* We assume that any other interrupt indicates command
1684 * completion or (command or device) failure. Unfortunately, if
1685 * an NCQ command failed, we cannot easily determine which one
1686 * it was. For that reason, after completing all successfully
1687 * finished commands (above), we fail all other outstanding
1688 * commands and restart the port. This can possibly be improved
1689 * later by obtaining per-command status results from the HBA.
1692 success = !(port_read(ps, AHCI_PORT_TFD) &
1693 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF));
1695 /* Check now for failure. There are fatal failures, and there
1696 * are failures that set the TFD.STS.ERR field using a D2H
1697 * FIS. In both cases, we just restart the port, failing all
1698 * commands in the process.
1700 if ((port_read(ps, AHCI_PORT_TFD) &
1701 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF)) ||
1702 (smask & AHCI_PORT_IS_RESTART)) {
1703 port_restart(ps);
1706 /* If we were waiting for ID verification, check now. */
1707 if (ps->state == STATE_WAIT_ID)
1708 port_id_check(ps, success);
1712 /*===========================================================================*
1713 * port_timeout *
1714 *===========================================================================*/
1715 static void port_timeout(int arg)
1717 /* A timeout has occurred on this port. Figure out what the timeout is
1718 * for, and take appropriate action.
1720 struct port_state *ps;
1721 int port, cmd;
1723 port = GET_PORT(arg);
1724 cmd = GET_TAG(arg);
1726 assert(port >= 0 && port < hba_state.nr_ports);
1728 ps = &port_state[port];
1730 /* Regardless of the outcome of this timeout, wake up the thread if it
1731 * is suspended. This applies only during the initialization.
1733 if (ps->flags & FLAG_SUSPENDED) {
1734 assert(cmd == 0);
1735 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
1738 /* If detection of a device after startup timed out, give up on initial
1739 * detection and only look for hot plug events from now on.
1741 if (ps->state == STATE_SPIN_UP) {
1742 /* One exception: if the PCS interrupt bit is set here, then we
1743 * are probably running on VirtualBox, which is currently not
1744 * always raising interrupts when setting interrupt bits (!).
1746 if (port_read(ps, AHCI_PORT_IS) & AHCI_PORT_IS_PCS) {
1747 dprintf(V_INFO, ("%s: bad controller, no interrupt\n",
1748 ahci_portname(ps)));
1750 ps->state = STATE_WAIT_DEV;
1751 ps->left = ahci_device_checks;
1753 port_dev_check(ps);
1755 return;
1756 } else {
1757 dprintf(V_INFO, ("%s: spin-up timeout\n",
1758 ahci_portname(ps)));
1760 /* If the busy flag is set, a BDEV_OPEN request is
1761 * waiting for the detection to finish; clear the busy
1762 * flag to return an error to the caller.
1764 ps->state = STATE_NO_DEV;
1765 ps->flags &= ~FLAG_BUSY;
1768 return;
1771 /* If we are waiting for a device to become connected and initialized,
1772 * check now.
1774 if (ps->state == STATE_WAIT_DEV) {
1775 port_dev_check(ps);
1777 return;
1780 dprintf(V_ERR, ("%s: timeout\n", ahci_portname(ps)));
1782 /* Restart the port, failing all current commands. */
1783 port_restart(ps);
1785 /* Finish up the identify operation. */
1786 if (ps->state == STATE_WAIT_ID)
1787 port_id_check(ps, FALSE);
1790 /*===========================================================================*
1791 * port_wait *
1792 *===========================================================================*/
1793 static void port_wait(struct port_state *ps)
1795 /* Suspend the current thread until the given port is no longer busy,
1796 * due to either command completion or timeout.
1799 ps->flags |= FLAG_SUSPENDED;
1801 while (ps->flags & FLAG_BUSY)
1802 blockdriver_mt_sleep();
1804 ps->flags &= ~FLAG_SUSPENDED;
1807 /*===========================================================================*
1808 * port_issue *
1809 *===========================================================================*/
1810 static void port_issue(struct port_state *ps, int cmd, clock_t timeout)
1812 /* Issue a command to the port, and set a timer to trigger a timeout
1813 * if the command takes too long to complete.
1816 /* Set the corresponding NCQ command bit, if applicable. */
1817 if (ps->flags & FLAG_HAS_NCQ)
1818 port_write(ps, AHCI_PORT_SACT, 1 << cmd);
1820 /* Make sure that the compiler does not delay any previous write
1821 * operations until after the write to the command issue register.
1823 __insn_barrier();
1825 /* Tell the controller that a new command is ready. */
1826 port_write(ps, AHCI_PORT_CI, 1 << cmd);
1828 /* Update pending commands. */
1829 ps->pend_mask |= 1 << cmd;
1831 /* Set a timer in case the command does not complete at all. */
1832 set_timer(&ps->cmd_info[cmd].timer, timeout, port_timeout,
1833 BUILD_ARG(ps - port_state, cmd));
1836 /*===========================================================================*
1837 * port_exec *
1838 *===========================================================================*/
1839 static int port_exec(struct port_state *ps, int cmd, clock_t timeout)
1841 /* Execute a command on a port, wait for the command to complete or for
1842 * a timeout, and return whether the command succeeded or not.
1845 port_issue(ps, cmd, timeout);
1847 /* Put the thread to sleep until a timeout or a command completion
1848 * happens. Earlier, we used to call port_wait which set the suspended
1849 * flag. We now abandon it since the flag has to work on a per-thread,
1850 * and hence per-tag basis and not on a per-port basis. Instead, we
1851 * retain that call only to defer open calls during device/driver
1852 * initialization. Instead, we call sleep here directly. Before
1853 * sleeping, we register the thread.
1855 ps->cmd_info[cmd].tid = blockdriver_mt_get_tid();
1857 blockdriver_mt_sleep();
1859 /* Cancelling a timer that just triggered, does no harm. */
1860 cancel_timer(&ps->cmd_info[cmd].timer);
1862 assert(!(ps->flags & FLAG_BUSY));
1864 dprintf(V_REQ, ("%s: end of command -- %s\n", ahci_portname(ps),
1865 (ps->cmd_info[cmd].result == RESULT_FAILURE) ?
1866 "failure" : "success"));
1868 if (ps->cmd_info[cmd].result == RESULT_FAILURE)
1869 return EIO;
1871 return OK;
1874 /*===========================================================================*
1875 * port_alloc *
1876 *===========================================================================*/
1877 static void port_alloc(struct port_state *ps)
1879 /* Allocate memory for the given port, and enable FIS receipt. We try
1880 * to cram everything into one 4K-page in order to limit memory usage
1881 * as much as possible. More memory may be allocated on demand later,
1882 * but allocation failure should be fatal only here. Note that we do
1883 * not allocate memory for sector padding here, because we do not know
1884 * the device's sector size yet.
1886 size_t fis_off, tmp_off, ct_off; int i;
1887 size_t ct_offs[NR_CMDS];
1888 u32_t cmd;
1890 fis_off = AHCI_CL_SIZE + AHCI_FIS_SIZE - 1;
1891 fis_off -= fis_off % AHCI_FIS_SIZE;
1893 tmp_off = fis_off + AHCI_FIS_SIZE + AHCI_TMP_ALIGN - 1;
1894 tmp_off -= tmp_off % AHCI_TMP_ALIGN;
1896 /* Allocate memory for all the commands. */
1897 ct_off = tmp_off + AHCI_TMP_SIZE;
1898 for (i = 0; i < NR_CMDS; i++) {
1899 ct_off += AHCI_CT_ALIGN - 1;
1900 ct_off -= ct_off % AHCI_CT_ALIGN;
1901 ct_offs[i] = ct_off;
1902 ps->mem_size = ct_off + AHCI_CT_SIZE;
1903 ct_off = ps->mem_size;
1906 ps->mem_base = alloc_contig(ps->mem_size, AC_ALIGN4K, &ps->mem_phys);
1907 if (ps->mem_base == NULL)
1908 panic("unable to allocate port memory");
1909 memset(ps->mem_base, 0, ps->mem_size);
1911 ps->cl_base = (u32_t *) ps->mem_base;
1912 ps->cl_phys = ps->mem_phys;
1913 assert(ps->cl_phys % AHCI_CL_SIZE == 0);
1915 ps->fis_base = (u32_t *) (ps->mem_base + fis_off);
1916 ps->fis_phys = ps->mem_phys + fis_off;
1917 assert(ps->fis_phys % AHCI_FIS_SIZE == 0);
1919 ps->tmp_base = (u8_t *) (ps->mem_base + tmp_off);
1920 ps->tmp_phys = ps->mem_phys + tmp_off;
1921 assert(ps->tmp_phys % AHCI_TMP_ALIGN == 0);
1923 for (i = 0; i < NR_CMDS; i++) {
1924 ps->ct_base[i] = ps->mem_base + ct_offs[i];
1925 ps->ct_phys[i] = ps->mem_phys + ct_offs[i];
1926 assert(ps->ct_phys[i] % AHCI_CT_ALIGN == 0);
1929 /* Tell the controller about some of the physical addresses. */
1930 port_write(ps, AHCI_PORT_FBU, 0);
1931 port_write(ps, AHCI_PORT_FB, ps->fis_phys);
1933 port_write(ps, AHCI_PORT_CLBU, 0);
1934 port_write(ps, AHCI_PORT_CLB, ps->cl_phys);
1936 /* Enable FIS receive. */
1937 cmd = port_read(ps, AHCI_PORT_CMD);
1938 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_FRE);
1940 ps->pad_base = NULL;
1941 ps->pad_size = 0;
1944 /*===========================================================================*
1945 * port_free *
1946 *===========================================================================*/
1947 static void port_free(struct port_state *ps)
1949 /* Disable FIS receipt for the given port, and free previously
1950 * allocated memory.
1952 u32_t cmd;
1954 /* Disable FIS receive. */
1955 cmd = port_read(ps, AHCI_PORT_CMD);
1957 if (cmd & (AHCI_PORT_CMD_FR | AHCI_PORT_CMD_FRE)) {
1958 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_FRE);
1960 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_FR),
1961 PORTREG_DELAY);
1964 if (ps->pad_base != NULL)
1965 free_contig(ps->pad_base, ps->pad_size);
1967 free_contig(ps->mem_base, ps->mem_size);
1970 /*===========================================================================*
1971 * port_init *
1972 *===========================================================================*/
1973 static void port_init(struct port_state *ps)
1975 /* Initialize the given port.
1977 u32_t cmd;
1978 int i;
1980 /* Initialize the port state structure. */
1981 ps->queue_depth = 1;
1982 ps->state = STATE_SPIN_UP;
1983 ps->flags = FLAG_BUSY;
1984 ps->sector_size = 0;
1985 ps->open_count = 0;
1986 ps->pend_mask = 0;
1987 for (i = 0; i < NR_CMDS; i++)
1988 init_timer(&ps->cmd_info[i].timer);
1990 ps->reg = (u32_t *) ((u8_t *) hba_state.base +
1991 AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * (ps - port_state));
1993 /* Allocate memory for the port. */
1994 port_alloc(ps);
1996 /* Just listen for device connection events for now. */
1997 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PCE);
1999 /* Enable device spin-up for HBAs that support staggered spin-up.
2000 * This is a no-op for HBAs that do not support it.
2002 cmd = port_read(ps, AHCI_PORT_CMD);
2003 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_SUD);
2005 /* Trigger a port reset. */
2006 port_hardreset(ps);
2008 set_timer(&ps->cmd_info[0].timer, ahci_spinup_timeout,
2009 port_timeout, BUILD_ARG(ps - port_state, 0));
2012 /*===========================================================================*
2013 * ahci_probe *
2014 *===========================================================================*/
2015 static int ahci_probe(int skip)
2017 /* Find a matching PCI device.
2019 int r, devind;
2020 u16_t vid, did;
2022 pci_init();
2024 r = pci_first_dev(&devind, &vid, &did);
2025 if (r <= 0)
2026 return -1;
2028 while (skip--) {
2029 r = pci_next_dev(&devind, &vid, &did);
2030 if (r <= 0)
2031 return -1;
2034 pci_reserve(devind);
2036 return devind;
2039 /*===========================================================================*
2040 * ahci_reset *
2041 *===========================================================================*/
2042 static void ahci_reset(void)
2044 /* Reset the HBA. Do not enable AHCI mode afterwards.
2046 u32_t ghc;
2048 ghc = hba_read(AHCI_HBA_GHC);
2050 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE);
2052 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_HR);
2054 SPIN_UNTIL(!(hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR), RESET_DELAY);
2056 if (hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR)
2057 panic("unable to reset HBA");
2060 /*===========================================================================*
2061 * ahci_init *
2062 *===========================================================================*/
2063 static void ahci_init(int devind)
2065 /* Initialize the device.
2067 u32_t base, size, cap, ghc, mask;
2068 int r, port, ioflag;
2070 if ((r = pci_get_bar(devind, PCI_BAR_6, &base, &size, &ioflag)) != OK)
2071 panic("unable to retrieve BAR: %d", r);
2073 if (ioflag)
2074 panic("invalid BAR type");
2076 /* There must be at least one port, and at most NR_PORTS ports. Limit
2077 * the actual total number of ports to the size of the exposed area.
2079 if (size < AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE)
2080 panic("HBA memory size too small: %u", size);
2082 size = MIN(size, AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * NR_PORTS);
2084 hba_state.nr_ports = (size - AHCI_MEM_BASE_SIZE) / AHCI_MEM_PORT_SIZE;
2086 /* Map the register area into local memory. */
2087 hba_state.base = (u32_t *) vm_map_phys(SELF, (void *) base, size);
2088 hba_state.size = size;
2089 if (hba_state.base == MAP_FAILED)
2090 panic("unable to map HBA memory");
2092 /* Retrieve, allocate and enable the controller's IRQ. */
2093 hba_state.irq = pci_attr_r8(devind, PCI_ILR);
2094 hba_state.hook_id = 0;
2096 if ((r = sys_irqsetpolicy(hba_state.irq, 0, &hba_state.hook_id)) != OK)
2097 panic("unable to register IRQ: %d", r);
2099 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2100 panic("unable to enable IRQ: %d", r);
2102 /* Reset the HBA. */
2103 ahci_reset();
2105 /* Enable AHCI and interrupts. */
2106 ghc = hba_read(AHCI_HBA_GHC);
2107 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE);
2109 /* Limit the maximum number of commands to the controller's value. */
2110 /* Note that we currently use only one command anyway. */
2111 cap = hba_read(AHCI_HBA_CAP);
2112 hba_state.has_ncq = !!(cap & AHCI_HBA_CAP_SNCQ);
2113 hba_state.has_clo = !!(cap & AHCI_HBA_CAP_SCLO);
2114 hba_state.nr_cmds = MIN(NR_CMDS,
2115 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1);
2117 dprintf(V_INFO, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, "
2118 "%s queuing, IRQ %d\n",
2119 ahci_instance,
2120 (int) (hba_read(AHCI_HBA_VS) >> 16),
2121 (int) ((hba_read(AHCI_HBA_VS) >> 8) & 0xFF),
2122 (int) (hba_read(AHCI_HBA_VS) & 0xFF),
2123 ((cap >> AHCI_HBA_CAP_NP_SHIFT) & AHCI_HBA_CAP_NP_MASK) + 1,
2124 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1,
2125 hba_state.has_ncq ? "supports" : "no", hba_state.irq));
2127 dprintf(V_INFO, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n",
2128 ahci_instance, cap, hba_read(AHCI_HBA_CAP2),
2129 hba_read(AHCI_HBA_PI)));
2131 /* Initialize each of the implemented ports. We ignore CAP.NP. */
2132 mask = hba_read(AHCI_HBA_PI);
2134 for (port = 0; port < hba_state.nr_ports; port++) {
2135 port_state[port].device = NO_DEVICE;
2136 port_state[port].state = STATE_NO_PORT;
2138 if (mask & (1 << port))
2139 port_init(&port_state[port]);
2143 /*===========================================================================*
2144 * ahci_stop *
2145 *===========================================================================*/
2146 static void ahci_stop(void)
2148 /* Disable AHCI, and clean up resources to the extent possible.
2150 struct port_state *ps;
2151 int r, port;
2153 for (port = 0; port < hba_state.nr_ports; port++) {
2154 ps = &port_state[port];
2156 if (ps->state != STATE_NO_PORT) {
2157 port_stop(ps);
2159 port_free(ps);
2163 ahci_reset();
2165 if ((r = vm_unmap_phys(SELF, (void *) hba_state.base,
2166 hba_state.size)) != OK)
2167 panic("unable to unmap HBA memory: %d", r);
2169 if ((r = sys_irqrmpolicy(&hba_state.hook_id)) != OK)
2170 panic("unable to deregister IRQ: %d", r);
2173 /*===========================================================================*
2174 * ahci_alarm *
2175 *===========================================================================*/
2176 static void ahci_alarm(clock_t stamp)
2178 /* Process an alarm.
2181 /* Call the port-specific handler for each port that timed out. */
2182 expire_timers(stamp);
2185 /*===========================================================================*
2186 * ahci_intr *
2187 *===========================================================================*/
2188 static void ahci_intr(unsigned int UNUSED(mask))
2190 /* Process an interrupt.
2192 struct port_state *ps;
2193 u32_t mask;
2194 int r, port;
2196 /* Handle an interrupt for each port that has the interrupt bit set. */
2197 mask = hba_read(AHCI_HBA_IS);
2199 for (port = 0; port < hba_state.nr_ports; port++) {
2200 if (mask & (1 << port)) {
2201 ps = &port_state[port];
2203 port_intr(ps);
2205 /* After processing an interrupt, wake up the device
2206 * thread if it is suspended and now no longer busy.
2208 if ((ps->flags & (FLAG_SUSPENDED | FLAG_BUSY)) ==
2209 FLAG_SUSPENDED)
2210 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
2214 /* Clear the bits that we processed. */
2215 hba_write(AHCI_HBA_IS, mask);
2217 /* Reenable the interrupt. */
2218 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2219 panic("unable to enable IRQ: %d", r);
2222 /*===========================================================================*
2223 * ahci_get_params *
2224 *===========================================================================*/
2225 static void ahci_get_params(void)
2227 /* Retrieve and parse parameters passed to this driver, except the
2228 * device-to-port mapping, which has to be parsed later.
2230 long v;
2231 unsigned int i;
2233 /* Find out which driver instance we are. */
2234 v = 0;
2235 (void) env_parse("instance", "d", 0, &v, 0, 255);
2236 ahci_instance = (int) v;
2238 /* Initialize the verbosity level. */
2239 v = V_ERR;
2240 (void) env_parse("ahci_verbose", "d", 0, &v, V_NONE, V_REQ);
2241 ahci_verbose = (int) v;
2243 /* Initialize timeout-related values. */
2244 for (i = 0; i < sizeof(ahci_timevar) / sizeof(ahci_timevar[0]); i++) {
2245 v = ahci_timevar[i].default_ms;
2247 (void) env_parse(ahci_timevar[i].name, "d", 0, &v, 1,
2248 LONG_MAX);
2250 *ahci_timevar[i].ptr = millis_to_hz(v);
2253 ahci_device_delay = millis_to_hz(DEVICE_DELAY);
2254 ahci_device_checks = (ahci_device_timeout + ahci_device_delay - 1) /
2255 ahci_device_delay;
2258 /*===========================================================================*
2259 * ahci_set_mapping *
2260 *===========================================================================*/
2261 static void ahci_set_mapping(void)
2263 /* Construct a mapping from device nodes to port numbers.
2265 char key[16], val[32], *p;
2266 unsigned int port;
2267 int i, j;
2269 /* Start off with a mapping that includes implemented ports only, in
2270 * order. We choose this mapping over an identity mapping to maximize
2271 * the chance that the user will be able to access the first MAX_DRIVES
2272 * devices. Note that we can only do this after initializing the HBA.
2274 for (i = j = 0; i < NR_PORTS && j < MAX_DRIVES; i++)
2275 if (port_state[i].state != STATE_NO_PORT)
2276 ahci_map[j++] = i;
2278 for ( ; j < MAX_DRIVES; j++)
2279 ahci_map[j] = NO_PORT;
2281 /* See if the user specified a custom mapping. Unlike all other
2282 * configuration options, this is a per-instance setting.
2284 strlcpy(key, "ahci0_map", sizeof(key));
2285 key[4] += ahci_instance;
2287 if (env_get_param(key, val, sizeof(val)) == OK) {
2288 /* Parse the mapping, which is assumed to be a comma-separated
2289 * list of zero-based port numbers.
2291 p = val;
2293 for (i = 0; i < MAX_DRIVES; i++) {
2294 if (*p) {
2295 port = (unsigned int) strtoul(p, &p, 0);
2297 if (*p) p++;
2299 ahci_map[i] = port % NR_PORTS;
2301 else ahci_map[i] = NO_PORT;
2305 /* Create a reverse mapping. */
2306 for (i = 0; i < MAX_DRIVES; i++)
2307 if ((j = ahci_map[i]) != NO_PORT)
2308 port_state[j].device = i;
2311 /*===========================================================================*
2312 * sef_cb_init_fresh *
2313 *===========================================================================*/
2314 static int sef_cb_init_fresh(int type, sef_init_info_t *UNUSED(info))
2316 /* Initialize the driver.
2318 int devind;
2320 /* Get command line parameters. */
2321 ahci_get_params();
2323 /* Probe for recognized devices, skipping matches as appropriate. */
2324 devind = ahci_probe(ahci_instance);
2326 if (devind < 0)
2327 panic("no matching device found");
2329 /* Initialize the device we found. */
2330 ahci_init(devind);
2332 /* Create a mapping from device nodes to port numbers. */
2333 ahci_set_mapping();
2335 /* Announce that we are up. */
2336 blockdriver_announce(type);
2338 return OK;
2341 /*===========================================================================*
2342 * sef_cb_signal_handler *
2343 *===========================================================================*/
2344 static void sef_cb_signal_handler(int signo)
2346 /* In case of a termination signal, shut down this driver.
2348 int port;
2350 if (signo != SIGTERM) return;
2352 /* If any ports are still opened, assume that the system is being shut
2353 * down, and stay up until the last device has been closed.
2355 ahci_exiting = TRUE;
2357 for (port = 0; port < hba_state.nr_ports; port++)
2358 if (port_state[port].open_count > 0)
2359 return;
2361 /* If not, stop the driver and exit immediately. */
2362 ahci_stop();
2364 exit(0);
2367 /*===========================================================================*
2368 * sef_local_startup *
2369 *===========================================================================*/
2370 static void sef_local_startup(void)
2372 /* Set callbacks and initialize the System Event Framework (SEF).
2375 /* Register init callbacks. */
2376 sef_setcb_init_fresh(sef_cb_init_fresh);
2378 /* Register signal callbacks. */
2379 sef_setcb_signal_handler(sef_cb_signal_handler);
2381 /* Enable support for live update. */
2382 blockdriver_mt_support_lu();
2384 /* Let SEF perform startup. */
2385 sef_startup();
2388 /*===========================================================================*
2389 * ahci_portname *
2390 *===========================================================================*/
2391 static char *ahci_portname(struct port_state *ps)
2393 /* Return a printable name for the given port. Whenever we can, print a
2394 * "Dx" device number rather than a "Pxx" port number, because the user
2395 * may not be aware of the mapping currently in use.
2397 static char name[] = "AHCI0-P00";
2399 name[4] = '0' + ahci_instance;
2401 if (ps->device == NO_DEVICE) {
2402 name[6] = 'P';
2403 name[7] = '0' + (ps - port_state) / 10;
2404 name[8] = '0' + (ps - port_state) % 10;
2406 else {
2407 name[6] = 'D';
2408 name[7] = '0' + ps->device;
2409 name[8] = 0;
2412 return name;
2415 /*===========================================================================*
2416 * ahci_map_minor *
2417 *===========================================================================*/
2418 static struct port_state *ahci_map_minor(devminor_t minor, struct device **dvp)
2420 /* Map a minor device number to a port and a pointer to the partition's
2421 * device structure. Return NULL if this minor device number does not
2422 * identify an actual device.
2424 struct port_state *ps;
2425 int port;
2427 ps = NULL;
2429 if (minor >= 0 && minor < NR_MINORS) {
2430 port = ahci_map[minor / DEV_PER_DRIVE];
2432 if (port == NO_PORT)
2433 return NULL;
2435 ps = &port_state[port];
2436 *dvp = &ps->part[minor % DEV_PER_DRIVE];
2438 else if ((unsigned) (minor -= MINOR_d0p0s0) < NR_SUBDEVS) {
2439 port = ahci_map[minor / SUB_PER_DRIVE];
2441 if (port == NO_PORT)
2442 return NULL;
2444 ps = &port_state[port];
2445 *dvp = &ps->subpart[minor % SUB_PER_DRIVE];
2448 return ps;
2451 /*===========================================================================*
2452 * ahci_part *
2453 *===========================================================================*/
2454 static struct device *ahci_part(devminor_t minor)
2456 /* Return a pointer to the partition information structure of the given
2457 * minor device.
2459 struct device *dv;
2461 if (ahci_map_minor(minor, &dv) == NULL)
2462 return NULL;
2464 return dv;
2467 /*===========================================================================*
2468 * ahci_open *
2469 *===========================================================================*/
2470 static int ahci_open(devminor_t minor, int access)
2472 /* Open a device.
2474 struct port_state *ps;
2475 int r;
2477 ps = ahci_get_port(minor);
2479 /* Only one open request can be processed at a time, due to the fact
2480 * that it is an exclusive operation. The thread that handles this call
2481 * can therefore freely register itself at slot zero.
2483 ps->cmd_info[0].tid = blockdriver_mt_get_tid();
2485 /* If we are still in the process of initializing this port or device,
2486 * wait for completion of that phase first.
2488 if (ps->flags & FLAG_BUSY)
2489 port_wait(ps);
2491 /* The device may only be opened if it is now properly functioning. */
2492 if (ps->state != STATE_GOOD_DEV)
2493 return ENXIO;
2495 /* Some devices may only be opened in read-only mode. */
2496 if ((ps->flags & FLAG_READONLY) && (access & BDEV_W_BIT))
2497 return EACCES;
2499 if (ps->open_count == 0) {
2500 /* The first open request. Clear the barrier flag, if set. */
2501 ps->flags &= ~FLAG_BARRIER;
2503 /* Recheck media only when nobody is using the device. */
2504 if ((ps->flags & FLAG_ATAPI) &&
2505 (r = atapi_check_medium(ps, 0)) != OK)
2506 return r;
2508 /* After rechecking the media, the partition table must always
2509 * be read. This is also a convenient time to do it for
2510 * nonremovable devices. Start by resetting the partition
2511 * tables and setting the working size of the entire device.
2513 memset(ps->part, 0, sizeof(ps->part));
2514 memset(ps->subpart, 0, sizeof(ps->subpart));
2516 ps->part[0].dv_size = ps->lba_count * ps->sector_size;
2518 partition(&ahci_dtab, ps->device * DEV_PER_DRIVE, P_PRIMARY,
2519 !!(ps->flags & FLAG_ATAPI));
2521 blockdriver_mt_set_workers(ps->device, ps->queue_depth);
2523 else {
2524 /* If the barrier flag is set, deny new open requests until the
2525 * device is fully closed first.
2527 if (ps->flags & FLAG_BARRIER)
2528 return ENXIO;
2531 ps->open_count++;
2533 return OK;
2536 /*===========================================================================*
2537 * ahci_close *
2538 *===========================================================================*/
2539 static int ahci_close(devminor_t minor)
2541 /* Close a device.
2543 struct port_state *ps;
2544 int port;
2546 ps = ahci_get_port(minor);
2548 /* Decrease the open count. */
2549 if (ps->open_count <= 0) {
2550 dprintf(V_ERR, ("%s: closing already-closed port\n",
2551 ahci_portname(ps)));
2553 return EINVAL;
2556 ps->open_count--;
2558 if (ps->open_count > 0)
2559 return OK;
2561 /* The device is now fully closed. That also means that the threads for
2562 * this device are not needed anymore, so we reduce the count to one.
2564 blockdriver_mt_set_workers(ps->device, 1);
2566 if (ps->state == STATE_GOOD_DEV && !(ps->flags & FLAG_BARRIER)) {
2567 dprintf(V_INFO, ("%s: flushing write cache\n",
2568 ahci_portname(ps)));
2570 (void) gen_flush_wcache(ps);
2573 /* If the entire driver has been told to terminate, check whether all
2574 * devices are now closed. If so, tell libblockdriver to quit after
2575 * replying to the close request.
2577 if (ahci_exiting) {
2578 for (port = 0; port < hba_state.nr_ports; port++)
2579 if (port_state[port].open_count > 0)
2580 break;
2582 if (port == hba_state.nr_ports) {
2583 ahci_stop();
2585 blockdriver_mt_terminate();
2589 return OK;
2592 /*===========================================================================*
2593 * ahci_transfer *
2594 *===========================================================================*/
2595 static ssize_t ahci_transfer(devminor_t minor, int do_write, u64_t position,
2596 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags)
2598 /* Perform data transfer on the selected device.
2600 struct port_state *ps;
2601 struct device *dv;
2602 u64_t pos, eof;
2604 ps = ahci_get_port(minor);
2605 dv = ahci_part(minor);
2607 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2608 return EIO;
2610 if (count > NR_IOREQS)
2611 return EINVAL;
2613 /* Check for basic end-of-partition condition: if the start position of
2614 * the request is outside the partition, return success immediately.
2615 * The size of the request is obtained, and possibly reduced, later.
2617 if (position >= dv->dv_size)
2618 return OK;
2620 pos = dv->dv_base + position;
2621 eof = dv->dv_base + dv->dv_size;
2623 return port_transfer(ps, pos, eof, endpt, (iovec_s_t *) iovec, count,
2624 do_write, flags);
2627 /*===========================================================================*
2628 * ahci_ioctl *
2629 *===========================================================================*/
2630 static int ahci_ioctl(devminor_t minor, unsigned long request,
2631 endpoint_t endpt, cp_grant_id_t grant, endpoint_t UNUSED(user_endpt))
2633 /* Process I/O control requests.
2635 struct port_state *ps;
2636 int r, val;
2638 ps = ahci_get_port(minor);
2640 switch (request) {
2641 case DIOCEJECT:
2642 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2643 return EIO;
2645 if (!(ps->flags & FLAG_ATAPI))
2646 return EINVAL;
2648 return atapi_load_eject(ps, 0, FALSE /*load*/);
2650 case DIOCOPENCT:
2651 return sys_safecopyto(endpt, grant, 0,
2652 (vir_bytes) &ps->open_count, sizeof(ps->open_count));
2654 case DIOCFLUSH:
2655 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2656 return EIO;
2658 return gen_flush_wcache(ps);
2660 case DIOCSETWC:
2661 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2662 return EIO;
2664 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &val,
2665 sizeof(val))) != OK)
2666 return r;
2668 return gen_set_wcache(ps, val);
2670 case DIOCGETWC:
2671 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2672 return EIO;
2674 if ((r = gen_get_wcache(ps, &val)) != OK)
2675 return r;
2677 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &val,
2678 sizeof(val));
2681 return ENOTTY;
2684 /*===========================================================================*
2685 * ahci_device *
2686 *===========================================================================*/
2687 static int ahci_device(devminor_t minor, device_id_t *id)
2689 /* Map a minor device number to a device ID.
2691 struct port_state *ps;
2692 struct device *dv;
2694 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2695 return ENXIO;
2697 *id = ps->device;
2699 return OK;
2702 /*===========================================================================*
2703 * ahci_get_port *
2704 *===========================================================================*/
2705 static struct port_state *ahci_get_port(devminor_t minor)
2707 /* Get the port structure associated with the given minor device.
2708 * Called only from worker threads, so the minor device is already
2709 * guaranteed to map to a port.
2711 struct port_state *ps;
2712 struct device *dv;
2714 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2715 panic("device mapping for minor %d disappeared", minor);
2717 return ps;
2720 /*===========================================================================*
2721 * main *
2722 *===========================================================================*/
2723 int main(int argc, char **argv)
2725 /* Driver task.
2728 env_setargs(argc, argv);
2729 sef_local_startup();
2731 blockdriver_mt_task(&ahci_dtab);
2733 return 0;