vm: fix region reporting bug
[minix.git] / drivers / ahci / ahci.c
blob8fdcc345052fac41cd226767eacc53203c87b168
1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek
2 * - Multithreading support by Arne Welzel
3 * - Native Command Queuing support by Raja Appuswamy
4 */
5 /*
6 * This driver is based on the following specifications:
7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3
8 * - Serial ATA Revision 2.6
9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7)
10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070)
12 * The driver supports device hot-plug, active device status tracking,
13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes,
14 * sector-unaligned reads, native command queuing and parallel requests to
15 * different devices.
17 * It does not implement transparent failure recovery, power management, or
18 * port multiplier support.
21 * An AHCI controller exposes a number of ports (up to 32), each of which may
22 * or may not have one device attached (port multipliers are not supported).
23 * Each port is maintained independently.
25 * The following figure depicts the possible transitions between port states.
26 * The NO_PORT state is not included; no transitions can be made from or to it.
28 * +----------+ +----------+
29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+
30 * +----------+ | | +----------+ |
31 * | | | ^ |
32 * v v | | |
33 * +----------+ +----------+ +----------+ +----------+ |
34 * | NO_DEV | --> | WAIT_SIG | --> | WAIT_ID | --> | GOOD_DEV | |
35 * +----------+ +----------+ +----------+ +----------+ |
36 * ^ | | | |
37 * +----------------+----------------+----------------+--------+
39 * At driver startup, all physically present ports are put in SPIN_UP state.
40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred
41 * until either the spin-up timer expires, or a device has been identified on
42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at
43 * startup time if the device has not yet been able to announce its presence.
45 * If a device is detected, either at startup time or after hot-plug, its
46 * signature is checked and it is identified, after which it may be determined
47 * to be a usable ("good") device, which means that the device is considered to
48 * be in a working state. If these steps fail, the device is marked as unusable
49 * ("bad"). At any point in time, the device may be disconnected; the port is
50 * then put back into NO_DEV state.
52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA
53 * devices are assumed to be fixed; all ATAPI devices are assumed to have
54 * removable media. To prevent erroneous access to switched devices and media,
55 * the driver makes devices inaccessible until they are fully closed (the open
56 * count is zero) when a device (hot-plug) or medium change is detected.
57 * For hot-plug changes, access is prevented by setting the BARRIER flag until
58 * the device is fully closed and then reopened. For medium changes, access is
59 * prevented by not acknowledging the medium change until the device is fully
60 * closed and reopened. Removable media are not locked in the drive while
61 * opened, because the driver author is uncomfortable with that concept.
63 * The following table lists for each state, whether the port is started
64 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be
65 * set to, and what BDEV_OPEN calls on this port should return.
67 * State Started Timer PxIE BDEV_OPEN
68 * --------- --------- --------- --------- ---------
69 * NO_PORT no no (none) ENXIO
70 * SPIN_UP no yes PRCE (wait)
71 * NO_DEV no no PRCE ENXIO
72 * WAIT_SIG yes yes PRCE (wait)
73 * WAIT_ID yes yes (all) (wait)
74 * BAD_DEV no no PRCE ENXIO
75 * GOOD_DEV yes per-command (all) OK
77 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset
78 * when changing from SPIN_UP to any state but WAIT_SIG, and when changing from
79 * WAIT_SIG to any state but WAIT_ID, and when changing from WAIT_ID to any
80 * other state.
83 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set
84 * to 4MB. This limit has been chosen for a number of reasons:
85 * - The size that can be specified in a Physical Region Descriptor (PRD) is
86 * limited to 4MB for AHCI. Limiting the total transfer size to at most this
87 * size implies that no I/O vector element needs to be split up across PRDs.
88 * This means that the maximum number of needed PRDs can be predetermined.
89 * - The limit is below what can be transferred in a single ATA request, namely
90 * 64k sectors (i.e., at least 32MB). This means that transfer requests need
91 * never be split up into smaller chunks, reducing implementation complexity.
92 * - A single, static timeout can be used for transfers. Very large transfers
93 * can legitimately take up to several minutes -- well beyond the appropriate
94 * timeout range for small transfers. The limit obviates the need for a
95 * timeout scheme that takes into account the transfer size.
96 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious
97 * clients to keep the driver busy for a long time with a single request.
98 * - The limit is high enough for all practical purposes. The transfer setup
99 * overhead is already relatively negligible at this size, and even larger
100 * requests will not help maximize throughput. As NR_IOREQS is currently set
101 * to 64, the limit still allows file systems to perform I/O requests with
102 * vectors completely filled with 64KB-blocks.
104 #include <minix/drivers.h>
105 #include <minix/blockdriver_mt.h>
106 #include <minix/drvlib.h>
107 #include <machine/pci.h>
108 #include <sys/ioc_disk.h>
109 #include <sys/mman.h>
110 #include <assert.h>
112 #include "ahci.h"
114 /* Host Bus Adapter (HBA) state. */
115 static struct {
116 volatile u32_t *base; /* base address of memory-mapped registers */
117 size_t size; /* size of memory-mapped register area */
119 int nr_ports; /* addressable number of ports (1..NR_PORTS) */
120 int nr_cmds; /* maximum number of commands per port */
121 int has_ncq; /* NCQ support flag */
123 int irq; /* IRQ number */
124 int hook_id; /* IRQ hook ID */
125 } hba_state;
127 /* Port state. */
128 static struct port_state {
129 int state; /* port state */
130 unsigned int flags; /* port flags */
132 volatile u32_t *reg; /* memory-mapped port registers */
134 u8_t *mem_base; /* primary memory buffer virtual address */
135 phys_bytes mem_phys; /* primary memory buffer physical address */
136 vir_bytes mem_size; /* primary memory buffer size */
138 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */
139 u32_t *fis_base; /* FIS receive buffer virtual address */
140 phys_bytes fis_phys; /* FIS receive buffer physical address */
141 u32_t *cl_base; /* command list buffer virtual address */
142 phys_bytes cl_phys; /* command list buffer physical address */
143 u8_t *ct_base[NR_CMDS]; /* command table virtual address */
144 phys_bytes ct_phys[NR_CMDS]; /* command table physical address */
145 u8_t *tmp_base; /* temporary storage buffer virtual address */
146 phys_bytes tmp_phys; /* temporary storage buffer physical address */
148 u8_t *pad_base; /* sector padding buffer virtual address */
149 phys_bytes pad_phys; /* sector padding buffer physical address */
150 vir_bytes pad_size; /* sector padding buffer size */
152 u64_t lba_count; /* number of valid Logical Block Addresses */
153 u32_t sector_size; /* medium sector size in bytes */
155 int open_count; /* number of times this port is opened */
157 int device; /* associated device number, or NO_DEVICE */
158 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */
159 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */
161 timer_t timer; /* port-specific timeout timer */
162 int left; /* number of tries left before giving up */
163 /* (only used for signature probing) */
165 int queue_depth; /* NCQ queue depth */
166 u32_t pend_mask; /* commands not yet complete */
167 struct {
168 thread_id_t tid;/* ID of the worker thread */
169 timer_t timer; /* timer associated with each request */
170 int result; /* success/failure result of the commands */
171 } cmd_info[NR_CMDS];
172 } port_state[NR_PORTS];
174 static int ahci_instance; /* driver instance number */
176 static int ahci_verbose; /* verbosity level (0..4) */
178 /* Timeout values. These can be overridden with environment variables. */
179 static long ahci_spinup_timeout = SPINUP_TIMEOUT;
180 static long ahci_sig_timeout = SIG_TIMEOUT;
181 static long ahci_sig_checks = NR_SIG_CHECKS;
182 static long ahci_command_timeout = COMMAND_TIMEOUT;
183 static long ahci_transfer_timeout = TRANSFER_TIMEOUT;
184 static long ahci_flush_timeout = FLUSH_TIMEOUT;
186 static int ahci_map[MAX_DRIVES]; /* device-to-port mapping */
188 static int ahci_exiting = FALSE; /* exit after last close? */
190 #define BUILD_ARG(port, tag) (((port) << 8) | (tag))
191 #define GET_PORT(arg) ((arg) >> 8)
192 #define GET_TAG(arg) ((arg) & 0xFF)
194 #define dprintf(v,s) do { \
195 if (ahci_verbose >= (v)) \
196 printf s; \
197 } while (0)
199 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
200 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write);
201 static void port_issue(struct port_state *ps, int cmd, clock_t timeout);
202 static int port_exec(struct port_state *ps, int cmd, clock_t timeout);
203 static void port_timeout(struct timer *tp);
204 static void port_disconnect(struct port_state *ps);
206 static char *ahci_portname(struct port_state *ps);
207 static int ahci_open(dev_t minor, int access);
208 static int ahci_close(dev_t minor);
209 static ssize_t ahci_transfer(dev_t minor, int do_write, u64_t position,
210 endpoint_t endpt, iovec_t *iovec, unsigned int count,
211 int flags);
212 static struct device *ahci_part(dev_t minor);
213 static void ahci_alarm(clock_t stamp);
214 static int ahci_ioctl(dev_t minor, unsigned int request, endpoint_t endpt,
215 cp_grant_id_t grant);
216 static void ahci_intr(unsigned int mask);
217 static int ahci_device(dev_t minor, device_id_t *id);
218 static struct port_state *ahci_get_port(dev_t minor);
220 /* AHCI driver table. */
221 static struct blockdriver ahci_dtab = {
222 BLOCKDRIVER_TYPE_DISK,
223 ahci_open,
224 ahci_close,
225 ahci_transfer,
226 ahci_ioctl,
227 NULL, /* bdr_cleanup */
228 ahci_part,
229 NULL, /* bdr_geometry */
230 ahci_intr,
231 ahci_alarm,
232 NULL, /* bdr_other */
233 ahci_device
236 /*===========================================================================*
237 * atapi_exec *
238 *===========================================================================*/
239 static int atapi_exec(struct port_state *ps, int cmd,
240 u8_t packet[ATAPI_PACKET_SIZE], size_t size, int write)
242 /* Execute an ATAPI command. Return OK or error.
244 cmd_fis_t fis;
245 prd_t prd[1];
246 int nr_prds = 0;
248 assert(size <= AHCI_TMP_SIZE);
250 /* Fill in the command table with a FIS, a packet, and if a data
251 * transfer is requested, also a PRD.
253 memset(&fis, 0, sizeof(fis));
254 fis.cf_cmd = ATA_CMD_PACKET;
256 if (size > 0) {
257 fis.cf_feat = ATA_FEAT_PACKET_DMA;
258 if (!write && (ps->flags & FLAG_USE_DMADIR))
259 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
261 prd[0].vp_addr = ps->tmp_phys;
262 prd[0].vp_size = size;
263 nr_prds++;
266 /* Start the command, and wait for it to complete or fail. */
267 port_set_cmd(ps, cmd, &fis, packet, prd, nr_prds, write);
269 return port_exec(ps, cmd, ahci_command_timeout);
272 /*===========================================================================*
273 * atapi_test_unit *
274 *===========================================================================*/
275 static int atapi_test_unit(struct port_state *ps, int cmd)
277 /* Test whether the ATAPI device and medium are ready.
279 u8_t packet[ATAPI_PACKET_SIZE];
281 memset(packet, 0, sizeof(packet));
282 packet[0] = ATAPI_CMD_TEST_UNIT;
284 return atapi_exec(ps, cmd, packet, 0, FALSE);
287 /*===========================================================================*
288 * atapi_request_sense *
289 *===========================================================================*/
290 static int atapi_request_sense(struct port_state *ps, int cmd, int *sense)
292 /* Request error (sense) information from an ATAPI device, and return
293 * the sense key. The additional sense codes are not used at this time.
295 u8_t packet[ATAPI_PACKET_SIZE];
296 int r;
298 memset(packet, 0, sizeof(packet));
299 packet[0] = ATAPI_CMD_REQUEST_SENSE;
300 packet[4] = ATAPI_REQUEST_SENSE_LEN;
302 r = atapi_exec(ps, cmd, packet, ATAPI_REQUEST_SENSE_LEN, FALSE);
304 if (r != OK)
305 return r;
307 dprintf(V_REQ, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n",
308 ahci_portname(ps), ps->tmp_base[2] & 0xF, ps->tmp_base[12],
309 ps->tmp_base[13]));
311 *sense = ps->tmp_base[2] & 0xF;
313 return OK;
316 /*===========================================================================*
317 * atapi_load_eject *
318 *===========================================================================*/
319 static int atapi_load_eject(struct port_state *ps, int cmd, int load)
321 /* Load or eject a medium in an ATAPI device.
323 u8_t packet[ATAPI_PACKET_SIZE];
325 memset(packet, 0, sizeof(packet));
326 packet[0] = ATAPI_CMD_START_STOP;
327 packet[4] = load ? ATAPI_START_STOP_LOAD : ATAPI_START_STOP_EJECT;
329 return atapi_exec(ps, cmd, packet, 0, FALSE);
332 /*===========================================================================*
333 * atapi_read_capacity *
334 *===========================================================================*/
335 static int atapi_read_capacity(struct port_state *ps, int cmd)
337 /* Retrieve the LBA count and sector size of an ATAPI medium.
339 u8_t packet[ATAPI_PACKET_SIZE], *buf;
340 int r;
342 memset(packet, 0, sizeof(packet));
343 packet[0] = ATAPI_CMD_READ_CAPACITY;
345 r = atapi_exec(ps, cmd, packet, ATAPI_READ_CAPACITY_LEN, FALSE);
346 if (r != OK)
347 return r;
349 /* Store the number of LBA blocks and sector size. */
350 buf = ps->tmp_base;
351 ps->lba_count = add64u(cvu64((buf[0] << 24) | (buf[1] << 16) |
352 (buf[2] << 8) | buf[3]), 1);
353 ps->sector_size =
354 (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
356 if (ps->sector_size == 0 || (ps->sector_size & 1)) {
357 dprintf(V_ERR, ("%s: invalid medium sector size %u\n",
358 ahci_portname(ps), ps->sector_size));
360 return EINVAL;
363 dprintf(V_INFO,
364 ("%s: medium detected (%u byte sectors, %lu MB size)\n",
365 ahci_portname(ps), ps->sector_size,
366 div64u(mul64(ps->lba_count, cvu64(ps->sector_size)),
367 1024*1024)));
369 return OK;
372 /*===========================================================================*
373 * atapi_check_medium *
374 *===========================================================================*/
375 static int atapi_check_medium(struct port_state *ps, int cmd)
377 /* Check whether a medium is present in a removable-media ATAPI device.
378 * If a new medium is detected, get its total and sector size. Return
379 * OK only if a usable medium is present, and an error otherwise.
381 int sense;
383 /* Perform a readiness check. */
384 if (atapi_test_unit(ps, cmd) != OK) {
385 ps->flags &= ~FLAG_HAS_MEDIUM;
387 /* If the check failed due to a unit attention condition, retry
388 * reading the medium capacity. Otherwise, assume that there is
389 * no medium available.
391 if (atapi_request_sense(ps, cmd, &sense) != OK ||
392 sense != ATAPI_SENSE_UNIT_ATT)
393 return ENXIO;
396 /* If a medium is newly detected, try reading its capacity now. */
397 if (!(ps->flags & FLAG_HAS_MEDIUM)) {
398 if (atapi_read_capacity(ps, cmd) != OK)
399 return EIO;
401 ps->flags |= FLAG_HAS_MEDIUM;
404 return OK;
407 /*===========================================================================*
408 * atapi_id_check *
409 *===========================================================================*/
410 static int atapi_id_check(struct port_state *ps, u16_t *buf)
412 /* Determine whether we support this ATAPI device based on the
413 * identification data it returned, and store some of its properties.
416 /* The device must be an ATAPI device; it must have removable media;
417 * it must support DMA without DMADIR, or DMADIR for DMA.
419 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATAPI_MASK |
420 ATA_ID_GCAP_REMOVABLE | ATA_ID_GCAP_INCOMPLETE)) !=
421 (ATA_ID_GCAP_ATAPI | ATA_ID_GCAP_REMOVABLE) ||
422 ((buf[ATA_ID_CAP] & ATA_ID_CAP_DMA) != ATA_ID_CAP_DMA &&
423 (buf[ATA_ID_DMADIR] & (ATA_ID_DMADIR_DMADIR |
424 ATA_ID_DMADIR_DMA)) != (ATA_ID_DMADIR_DMADIR |
425 ATA_ID_DMADIR_DMA))) {
427 dprintf(V_ERR, ("%s: unsupported ATAPI device\n",
428 ahci_portname(ps)));
430 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x DMADIR %04x\n",
431 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
432 buf[ATA_ID_DMADIR]));
434 return FALSE;
437 /* Remember whether to use the DMADIR flag when appropriate. */
438 if (buf[ATA_ID_DMADIR] & ATA_ID_DMADIR_DMADIR)
439 ps->flags |= FLAG_USE_DMADIR;
441 /* ATAPI CD-ROM devices are considered read-only. */
442 if (((buf[ATA_ID_GCAP] & ATA_ID_GCAP_TYPE_MASK) >>
443 ATA_ID_GCAP_TYPE_SHIFT) == ATAPI_TYPE_CDROM)
444 ps->flags |= FLAG_READONLY;
446 if ((buf[ATA_ID_SUP1] & ATA_ID_SUP1_VALID_MASK) == ATA_ID_SUP1_VALID &&
447 !(ps->flags & FLAG_READONLY)) {
448 /* Save write cache related capabilities of the device. It is
449 * possible, although unlikely, that a device has support for
450 * either of these but not both.
452 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
453 ps->flags |= FLAG_HAS_WCACHE;
455 if (buf[ATA_ID_SUP1] & ATA_ID_SUP1_FLUSH)
456 ps->flags |= FLAG_HAS_FLUSH;
459 return TRUE;
462 /*===========================================================================*
463 * atapi_transfer *
464 *===========================================================================*/
465 static int atapi_transfer(struct port_state *ps, int cmd, u64_t start_lba,
466 unsigned int count, int write, prd_t *prdt, int nr_prds)
468 /* Perform data transfer from or to an ATAPI device.
470 cmd_fis_t fis;
471 u8_t packet[ATAPI_PACKET_SIZE];
473 /* Fill in a Register Host to Device FIS. */
474 memset(&fis, 0, sizeof(fis));
475 fis.cf_cmd = ATA_CMD_PACKET;
476 fis.cf_feat = ATA_FEAT_PACKET_DMA;
477 if (!write && (ps->flags & FLAG_USE_DMADIR))
478 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
480 /* Fill in a packet. */
481 memset(packet, 0, sizeof(packet));
482 packet[0] = write ? ATAPI_CMD_WRITE : ATAPI_CMD_READ;
483 packet[2] = (ex64lo(start_lba) >> 24) & 0xFF;
484 packet[3] = (ex64lo(start_lba) >> 16) & 0xFF;
485 packet[4] = (ex64lo(start_lba) >> 8) & 0xFF;
486 packet[5] = ex64lo(start_lba) & 0xFF;
487 packet[6] = (count >> 24) & 0xFF;
488 packet[7] = (count >> 16) & 0xFF;
489 packet[8] = (count >> 8) & 0xFF;
490 packet[9] = count & 0xFF;
492 /* Start the command, and wait for it to complete or fail. */
493 port_set_cmd(ps, cmd, &fis, packet, prdt, nr_prds, write);
495 return port_exec(ps, cmd, ahci_transfer_timeout);
498 /*===========================================================================*
499 * ata_id_check *
500 *===========================================================================*/
501 static int ata_id_check(struct port_state *ps, u16_t *buf)
503 /* Determine whether we support this ATA device based on the
504 * identification data it returned, and store some of its properties.
507 /* This must be an ATA device; it must not have removable media;
508 * it must support LBA and DMA; it must support the FLUSH CACHE
509 * command; it must support 48-bit addressing.
511 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATA_MASK | ATA_ID_GCAP_REMOVABLE |
512 ATA_ID_GCAP_INCOMPLETE)) != ATA_ID_GCAP_ATA ||
513 (buf[ATA_ID_CAP] & (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA)) !=
514 (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA) ||
515 (buf[ATA_ID_SUP1] & (ATA_ID_SUP1_VALID_MASK |
516 ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) !=
517 (ATA_ID_SUP1_VALID | ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) {
519 dprintf(V_ERR, ("%s: unsupported ATA device\n",
520 ahci_portname(ps)));
522 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x SUP1 %04x\n",
523 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
524 buf[ATA_ID_SUP1]));
526 return FALSE;
529 /* Get number of LBA blocks, and sector size. */
530 ps->lba_count = make64((buf[ATA_ID_LBA1] << 16) | buf[ATA_ID_LBA0],
531 (buf[ATA_ID_LBA3] << 16) | buf[ATA_ID_LBA2]);
533 /* Determine the queue depth of the device. */
534 if (hba_state.has_ncq &&
535 (buf[ATA_ID_SATA_CAP] & ATA_ID_SATA_CAP_NCQ)) {
536 ps->flags |= FLAG_HAS_NCQ;
537 ps->queue_depth =
538 (buf[ATA_ID_QDEPTH] & ATA_ID_QDEPTH_MASK) + 1;
539 if (ps->queue_depth > hba_state.nr_cmds)
540 ps->queue_depth = hba_state.nr_cmds;
543 /* For now, we only support long logical sectors. Long physical sector
544 * support may be added later. Note that the given value is in words.
546 if ((buf[ATA_ID_PLSS] & (ATA_ID_PLSS_VALID_MASK | ATA_ID_PLSS_LLS)) ==
547 (ATA_ID_PLSS_VALID | ATA_ID_PLSS_LLS))
548 ps->sector_size =
549 ((buf[ATA_ID_LSS1] << 16) | buf[ATA_ID_LSS0]) << 1;
550 else
551 ps->sector_size = ATA_SECTOR_SIZE;
553 if (ps->sector_size < ATA_SECTOR_SIZE) {
554 dprintf(V_ERR, ("%s: invalid sector size %u\n",
555 ahci_portname(ps), ps->sector_size));
557 return FALSE;
560 ps->flags |= FLAG_HAS_MEDIUM | FLAG_HAS_FLUSH;
562 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */
563 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
564 ps->flags |= FLAG_HAS_WCACHE;
566 /* Check Force Unit Access capability of the device. */
567 if ((buf[ATA_ID_ENA2] & (ATA_ID_ENA2_VALID_MASK | ATA_ID_ENA2_FUA)) ==
568 (ATA_ID_ENA2_VALID | ATA_ID_ENA2_FUA))
569 ps->flags |= FLAG_HAS_FUA;
571 return TRUE;
574 /*===========================================================================*
575 * ata_transfer *
576 *===========================================================================*/
577 static int ata_transfer(struct port_state *ps, int cmd, u64_t start_lba,
578 unsigned int count, int write, int force, prd_t *prdt, int nr_prds)
580 /* Perform data transfer from or to an ATA device.
582 cmd_fis_t fis;
584 assert(count <= ATA_MAX_SECTORS);
586 /* Special case for sector counts: 65536 is specified as 0. */
587 if (count == ATA_MAX_SECTORS)
588 count = 0;
590 memset(&fis, 0, sizeof(fis));
591 fis.cf_dev = ATA_DEV_LBA;
592 if (ps->flags & FLAG_HAS_NCQ) {
593 if (write) {
594 if (force && (ps->flags & FLAG_HAS_FUA))
595 fis.cf_dev |= ATA_DEV_FUA;
597 fis.cf_cmd = ATA_CMD_WRITE_FPDMA_QUEUED;
598 } else {
599 fis.cf_cmd = ATA_CMD_READ_FPDMA_QUEUED;
602 else {
603 if (write) {
604 if (force && (ps->flags & FLAG_HAS_FUA))
605 fis.cf_cmd = ATA_CMD_WRITE_DMA_FUA_EXT;
606 else
607 fis.cf_cmd = ATA_CMD_WRITE_DMA_EXT;
609 else {
610 fis.cf_cmd = ATA_CMD_READ_DMA_EXT;
613 fis.cf_lba = ex64lo(start_lba) & 0x00FFFFFFL;
614 fis.cf_lba_exp = ex64lo(rshift64(start_lba, 24)) & 0x00FFFFFFL;
615 fis.cf_sec = count & 0xFF;
616 fis.cf_sec_exp = (count >> 8) & 0xFF;
618 /* Start the command, and wait for it to complete or fail. */
619 port_set_cmd(ps, cmd, &fis, NULL /*packet*/, prdt, nr_prds, write);
621 return port_exec(ps, cmd, ahci_transfer_timeout);
624 /*===========================================================================*
625 * gen_identify *
626 *===========================================================================*/
627 static int gen_identify(struct port_state *ps, int blocking)
629 /* Identify an ATA or ATAPI device. If the blocking flag is set, block
630 * until the command has completed; otherwise return immediately.
632 cmd_fis_t fis;
633 prd_t prd;
635 /* Set up a command, and a single PRD for the result. */
636 memset(&fis, 0, sizeof(fis));
638 if (ps->flags & FLAG_ATAPI)
639 fis.cf_cmd = ATA_CMD_IDENTIFY_PACKET;
640 else
641 fis.cf_cmd = ATA_CMD_IDENTIFY;
643 prd.vp_addr = ps->tmp_phys;
644 prd.vp_size = ATA_ID_SIZE;
646 /* Start the command, and possibly wait for the result. */
647 port_set_cmd(ps, 0, &fis, NULL /*packet*/, &prd, 1, FALSE /*write*/);
649 if (blocking)
650 return port_exec(ps, 0, ahci_command_timeout);
652 port_issue(ps, 0, ahci_command_timeout);
654 return OK;
657 /*===========================================================================*
658 * gen_flush_wcache *
659 *===========================================================================*/
660 static int gen_flush_wcache(struct port_state *ps)
662 /* Flush the device's write cache.
664 cmd_fis_t fis;
666 /* The FLUSH CACHE command may not be supported by all (writable ATAPI)
667 * devices.
669 if (!(ps->flags & FLAG_HAS_FLUSH))
670 return EINVAL;
672 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not
673 * interested in the disk location of a failure, so there is no reason
674 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command
675 * may indeed fail due to a disk error, in which case it should be
676 * repeated. For now, we shift this responsibility onto the caller.
678 memset(&fis, 0, sizeof(fis));
679 fis.cf_cmd = ATA_CMD_FLUSH_CACHE;
681 /* Start the command, and wait for it to complete or fail.
682 * The flush command may take longer than regular I/O commands.
684 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
685 FALSE /*write*/);
687 return port_exec(ps, 0, ahci_flush_timeout);
690 /*===========================================================================*
691 * gen_get_wcache *
692 *===========================================================================*/
693 static int gen_get_wcache(struct port_state *ps, int *val)
695 /* Retrieve the status of the device's write cache.
697 int r;
699 /* Write caches are not mandatory. */
700 if (!(ps->flags & FLAG_HAS_WCACHE))
701 return EINVAL;
703 /* Retrieve information about the device. */
704 if ((r = gen_identify(ps, TRUE /*blocking*/)) != OK)
705 return r;
707 /* Return the current setting. */
708 *val = !!(((u16_t *) ps->tmp_base)[ATA_ID_ENA0] & ATA_ID_ENA0_WCACHE);
710 return OK;
713 /*===========================================================================*
714 * gen_set_wcache *
715 *===========================================================================*/
716 static int gen_set_wcache(struct port_state *ps, int enable)
718 /* Enable or disable the device's write cache.
720 cmd_fis_t fis;
721 clock_t timeout;
723 /* Write caches are not mandatory. */
724 if (!(ps->flags & FLAG_HAS_WCACHE))
725 return EINVAL;
727 /* Disabling the write cache causes a (blocking) cache flush. Cache
728 * flushes may take much longer than regular commands.
730 timeout = enable ? ahci_command_timeout : ahci_flush_timeout;
732 /* Set up a command. */
733 memset(&fis, 0, sizeof(fis));
734 fis.cf_cmd = ATA_CMD_SET_FEATURES;
735 fis.cf_feat = enable ? ATA_SF_EN_WCACHE : ATA_SF_DI_WCACHE;
737 /* Start the command, and wait for it to complete or fail. */
738 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
739 FALSE /*write*/);
741 return port_exec(ps, 0, timeout);
744 /*===========================================================================*
745 * ct_set_fis *
746 *===========================================================================*/
747 static vir_bytes ct_set_fis(u8_t *ct, cmd_fis_t *fis, unsigned int tag)
749 /* Fill in the Frame Information Structure part of a command table,
750 * and return the resulting FIS size (in bytes). We only support the
751 * command Register - Host to Device FIS type.
754 memset(ct, 0, ATA_H2D_SIZE);
755 ct[ATA_FIS_TYPE] = ATA_FIS_TYPE_H2D;
756 ct[ATA_H2D_FLAGS] = ATA_H2D_FLAGS_C;
757 ct[ATA_H2D_CMD] = fis->cf_cmd;
758 ct[ATA_H2D_LBA_LOW] = fis->cf_lba & 0xFF;
759 ct[ATA_H2D_LBA_MID] = (fis->cf_lba >> 8) & 0xFF;
760 ct[ATA_H2D_LBA_HIGH] = (fis->cf_lba >> 16) & 0xFF;
761 ct[ATA_H2D_DEV] = fis->cf_dev;
762 ct[ATA_H2D_LBA_LOW_EXP] = fis->cf_lba_exp & 0xFF;
763 ct[ATA_H2D_LBA_MID_EXP] = (fis->cf_lba_exp >> 8) & 0xFF;
764 ct[ATA_H2D_LBA_HIGH_EXP] = (fis->cf_lba_exp >> 16) & 0xFF;
765 ct[ATA_H2D_CTL] = fis->cf_ctl;
767 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
768 ct[ATA_H2D_FEAT] = fis->cf_sec;
769 ct[ATA_H2D_FEAT_EXP] = fis->cf_sec_exp;
770 ct[ATA_H2D_SEC] = tag << ATA_SEC_TAG_SHIFT;
771 ct[ATA_H2D_SEC_EXP] = 0;
772 } else {
773 ct[ATA_H2D_FEAT] = fis->cf_feat;
774 ct[ATA_H2D_FEAT_EXP] = fis->cf_feat_exp;
775 ct[ATA_H2D_SEC] = fis->cf_sec;
776 ct[ATA_H2D_SEC_EXP] = fis->cf_sec_exp;
779 return ATA_H2D_SIZE;
782 /*===========================================================================*
783 * ct_set_packet *
784 *===========================================================================*/
785 static void ct_set_packet(u8_t *ct, u8_t packet[ATAPI_PACKET_SIZE])
787 /* Fill in the packet part of a command table.
790 memcpy(&ct[AHCI_CT_PACKET_OFF], packet, ATAPI_PACKET_SIZE);
793 /*===========================================================================*
794 * ct_set_prdt *
795 *===========================================================================*/
796 static void ct_set_prdt(u8_t *ct, prd_t *prdt, int nr_prds)
798 /* Fill in the PRDT part of a command table.
800 u32_t *p;
801 int i;
803 p = (u32_t *) &ct[AHCI_CT_PRDT_OFF];
805 for (i = 0; i < nr_prds; i++, prdt++) {
806 *p++ = prdt->vp_addr;
807 *p++ = 0;
808 *p++ = 0;
809 *p++ = prdt->vp_size - 1;
813 /*===========================================================================*
814 * port_set_cmd *
815 *===========================================================================*/
816 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
817 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write)
819 /* Prepare the given command for execution, by constructing a command
820 * table and setting up a command list entry pointing to the table.
822 u8_t *ct;
823 u32_t *cl;
824 vir_bytes size;
826 /* Set a port-specific flag that tells us if the command being
827 * processed is a NCQ command or not.
829 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
830 ps->flags |= FLAG_NCQ_MODE;
831 } else {
832 assert(!ps->pend_mask);
833 ps->flags &= ~FLAG_NCQ_MODE;
836 /* Construct a command table, consisting of a command FIS, optionally
837 * a packet, and optionally a number of PRDs (making up the actual PRD
838 * table).
840 ct = ps->ct_base[cmd];
842 assert(ct != NULL);
843 assert(nr_prds <= NR_PRDS);
845 size = ct_set_fis(ct, fis, cmd);
847 if (packet != NULL)
848 ct_set_packet(ct, packet);
850 ct_set_prdt(ct, prdt, nr_prds);
852 /* Construct a command list entry, pointing to the command's table.
853 * Current assumptions: callers always provide a Register - Host to
854 * Device type FIS, and all non-NCQ commands are prefetchable.
856 cl = &ps->cl_base[cmd * AHCI_CL_ENTRY_DWORDS];
858 memset(cl, 0, AHCI_CL_ENTRY_SIZE);
859 cl[0] = (nr_prds << AHCI_CL_PRDTL_SHIFT) |
860 ((!ATA_IS_FPDMA_CMD(fis->cf_cmd) &&
861 (nr_prds > 0 || packet != NULL)) ? AHCI_CL_PREFETCHABLE : 0) |
862 (write ? AHCI_CL_WRITE : 0) |
863 ((packet != NULL) ? AHCI_CL_ATAPI : 0) |
864 ((size / sizeof(u32_t)) << AHCI_CL_CFL_SHIFT);
865 cl[2] = ps->ct_phys[cmd];
868 /*===========================================================================*
869 * port_finish_cmd *
870 *===========================================================================*/
871 static void port_finish_cmd(struct port_state *ps, int cmd, int result)
873 /* Finish a command that has either succeeded or failed.
876 assert(cmd < ps->queue_depth);
878 dprintf(V_REQ, ("%s: command %d %s\n", ahci_portname(ps),
879 cmd, (result == RESULT_SUCCESS) ? "succeeded" : "failed"));
881 /* Update the command result, and clear it from the pending list. */
882 ps->cmd_info[cmd].result = result;
884 assert(ps->pend_mask & (1 << cmd));
885 ps->pend_mask &= ~(1 << cmd);
887 /* Wake up the thread, unless it is the main thread. This can happen
888 * during initialization, as the gen_identify function is called by the
889 * main thread itself.
891 if (ps->state != STATE_WAIT_ID)
892 blockdriver_mt_wakeup(ps->cmd_info[cmd].tid);
895 /*===========================================================================*
896 * port_fail_cmds *
897 *===========================================================================*/
898 static void port_fail_cmds(struct port_state *ps)
900 /* Fail all ongoing commands for a device.
902 int i;
904 for (i = 0; ps->pend_mask != 0 && i < ps->queue_depth; i++)
905 if (ps->pend_mask & (1 << i))
906 port_finish_cmd(ps, i, RESULT_FAILURE);
909 /*===========================================================================*
910 * port_check_cmds *
911 *===========================================================================*/
912 static void port_check_cmds(struct port_state *ps)
914 /* Check what commands have completed, and finish them.
916 u32_t mask, done;
917 int i;
919 /* See which commands have completed. */
920 if (ps->flags & FLAG_NCQ_MODE)
921 mask = ps->reg[AHCI_PORT_SACT];
922 else
923 mask = ps->reg[AHCI_PORT_CI];
925 /* Wake up threads corresponding to completed commands. */
926 done = ps->pend_mask & ~mask;
928 for (i = 0; i < ps->queue_depth; i++)
929 if (done & (1 << i))
930 port_finish_cmd(ps, i, RESULT_SUCCESS);
933 /*===========================================================================*
934 * port_find_cmd *
935 *===========================================================================*/
936 static int port_find_cmd(struct port_state *ps)
938 /* Find a free command tag to queue the current request.
940 int i;
942 for (i = 0; i < ps->queue_depth; i++)
943 if (!(ps->pend_mask & (1 << i)))
944 break;
946 /* We should always be able to find a free slot, since a thread runs
947 * only when it is free, and thus, only because a slot is available.
949 assert(i < ps->queue_depth);
951 return i;
954 /*===========================================================================*
955 * port_get_padbuf *
956 *===========================================================================*/
957 static int port_get_padbuf(struct port_state *ps, size_t size)
959 /* Make available a temporary buffer for use by this port. Enlarge the
960 * previous buffer if applicable and necessary, potentially changing
961 * its physical address.
964 if (ps->pad_base != NULL && ps->pad_size >= size)
965 return OK;
967 if (ps->pad_base != NULL)
968 free_contig(ps->pad_base, ps->pad_size);
970 ps->pad_size = size;
971 ps->pad_base = alloc_contig(ps->pad_size, 0, &ps->pad_phys);
973 if (ps->pad_base == NULL) {
974 dprintf(V_ERR, ("%s: unable to allocate a padding buffer of "
975 "size %lu\n", ahci_portname(ps),
976 (unsigned long) size));
978 return ENOMEM;
981 dprintf(V_INFO, ("%s: allocated padding buffer of size %lu\n",
982 ahci_portname(ps), (unsigned long) size));
984 return OK;
987 /*===========================================================================*
988 * sum_iovec *
989 *===========================================================================*/
990 static int sum_iovec(struct port_state *ps, endpoint_t endpt,
991 iovec_s_t *iovec, int nr_req, vir_bytes *total)
993 /* Retrieve the total size of the given I/O vector. Check for alignment
994 * requirements along the way. Return OK (and the total request size)
995 * or an error.
997 vir_bytes size, bytes;
998 int i;
1000 bytes = 0;
1002 for (i = 0; i < nr_req; i++) {
1003 size = iovec[i].iov_size;
1005 if (size == 0 || (size & 1) || size > LONG_MAX) {
1006 dprintf(V_ERR, ("%s: bad size %lu in iovec from %d\n",
1007 ahci_portname(ps), size, endpt));
1008 return EINVAL;
1011 bytes += size;
1013 if (bytes > LONG_MAX) {
1014 dprintf(V_ERR, ("%s: iovec size overflow from %d\n",
1015 ahci_portname(ps), endpt));
1016 return EINVAL;
1020 *total = bytes;
1021 return OK;
1024 /*===========================================================================*
1025 * setup_prdt *
1026 *===========================================================================*/
1027 static int setup_prdt(struct port_state *ps, endpoint_t endpt,
1028 iovec_s_t *iovec, int nr_req, vir_bytes size, vir_bytes lead,
1029 int write, prd_t *prdt)
1031 /* Convert (the first part of) an I/O vector to a Physical Region
1032 * Descriptor Table describing array that can later be used to set the
1033 * command's real PRDT. The resulting table as a whole should be
1034 * sector-aligned; leading and trailing local buffers may have to be
1035 * used for padding as appropriate. Return the number of PRD entries,
1036 * or a negative error code.
1038 struct vumap_vir vvec[NR_PRDS];
1039 size_t bytes, trail;
1040 int i, r, pcount, nr_prds = 0;
1042 if (lead > 0) {
1043 /* Allocate a buffer for the data we don't want. */
1044 if ((r = port_get_padbuf(ps, ps->sector_size)) != OK)
1045 return r;
1047 prdt[nr_prds].vp_addr = ps->pad_phys;
1048 prdt[nr_prds].vp_size = lead;
1049 nr_prds++;
1052 /* The sum of lead, size, trail has to be sector-aligned. */
1053 trail = (ps->sector_size - (lead + size)) % ps->sector_size;
1055 /* Get the physical addresses of the given buffers. */
1056 for (i = 0; i < nr_req && size > 0; i++) {
1057 bytes = MIN(iovec[i].iov_size, size);
1059 if (endpt == SELF)
1060 vvec[i].vv_addr = (vir_bytes) iovec[i].iov_grant;
1061 else
1062 vvec[i].vv_grant = iovec[i].iov_grant;
1064 vvec[i].vv_size = bytes;
1066 size -= bytes;
1069 pcount = i;
1071 if ((r = sys_vumap(endpt, vvec, i, 0, write ? VUA_READ : VUA_WRITE,
1072 &prdt[nr_prds], &pcount)) != OK) {
1073 dprintf(V_ERR, ("%s: unable to map memory from %d (%d)\n",
1074 ahci_portname(ps), endpt, r));
1075 return r;
1078 assert(pcount > 0 && pcount <= i);
1080 /* Make sure all buffers are physically contiguous and word-aligned. */
1081 for (i = 0; i < pcount; i++) {
1082 if (vvec[i].vv_size != prdt[nr_prds].vp_size) {
1083 dprintf(V_ERR, ("%s: non-contiguous memory from %d\n",
1084 ahci_portname(ps), endpt));
1085 return EINVAL;
1088 if (prdt[nr_prds].vp_addr & 1) {
1089 dprintf(V_ERR, ("%s: bad physical address from %d\n",
1090 ahci_portname(ps), endpt));
1091 return EINVAL;
1094 nr_prds++;
1097 if (trail > 0) {
1098 assert(nr_prds < NR_PRDS);
1099 prdt[nr_prds].vp_addr = ps->pad_phys + lead;
1100 prdt[nr_prds].vp_size = trail;
1101 nr_prds++;
1104 return nr_prds;
1107 /*===========================================================================*
1108 * port_transfer *
1109 *===========================================================================*/
1110 static ssize_t port_transfer(struct port_state *ps, u64_t pos, u64_t eof,
1111 endpoint_t endpt, iovec_s_t *iovec, int nr_req, int write, int flags)
1113 /* Perform an I/O transfer on a port.
1115 prd_t prdt[NR_PRDS];
1116 vir_bytes size, lead;
1117 unsigned int count, nr_prds;
1118 u64_t start_lba;
1119 int r, cmd;
1121 /* Get the total request size from the I/O vector. */
1122 if ((r = sum_iovec(ps, endpt, iovec, nr_req, &size)) != OK)
1123 return r;
1125 dprintf(V_REQ, ("%s: %s for %lu bytes at pos %08lx%08lx\n",
1126 ahci_portname(ps), write ? "write" : "read", size,
1127 ex64hi(pos), ex64lo(pos)));
1129 assert(ps->state == STATE_GOOD_DEV);
1130 assert(ps->flags & FLAG_HAS_MEDIUM);
1131 assert(ps->sector_size > 0);
1133 /* Limit the maximum size of a single transfer.
1134 * See the comments at the top of this file for details.
1136 if (size > MAX_TRANSFER)
1137 size = MAX_TRANSFER;
1139 /* If necessary, reduce the request size so that the request does not
1140 * extend beyond the end of the partition. The caller already
1141 * guarantees that the starting position lies within the partition.
1143 if (cmp64(add64ul(pos, size), eof) >= 0)
1144 size = (vir_bytes) diff64(eof, pos);
1146 start_lba = div64(pos, cvu64(ps->sector_size));
1147 lead = rem64u(pos, ps->sector_size);
1148 count = (lead + size + ps->sector_size - 1) / ps->sector_size;
1150 /* Position must be word-aligned for read requests, and sector-aligned
1151 * for write requests. We do not support read-modify-write for writes.
1153 if ((lead & 1) || (write && lead != 0)) {
1154 dprintf(V_ERR, ("%s: unaligned position from %d\n",
1155 ahci_portname(ps), endpt));
1156 return EINVAL;
1159 /* Write requests must be sector-aligned. Word alignment of the size is
1160 * already guaranteed by sum_iovec().
1162 if (write && (size % ps->sector_size) != 0) {
1163 dprintf(V_ERR, ("%s: unaligned size %lu from %d\n",
1164 ahci_portname(ps), size, endpt));
1165 return EINVAL;
1168 /* Create a vector of physical addresses and sizes for the transfer. */
1169 nr_prds = r = setup_prdt(ps, endpt, iovec, nr_req, size, lead, write,
1170 prdt);
1172 if (r < 0) return r;
1174 /* Perform the actual transfer. */
1175 cmd = port_find_cmd(ps);
1177 if (ps->flags & FLAG_ATAPI)
1178 r = atapi_transfer(ps, cmd, start_lba, count, write, prdt,
1179 nr_prds);
1180 else
1181 r = ata_transfer(ps, cmd, start_lba, count, write,
1182 !!(flags & BDEV_FORCEWRITE), prdt, nr_prds);
1184 if (r != OK) return r;
1186 return size;
1189 /*===========================================================================*
1190 * port_start *
1191 *===========================================================================*/
1192 static void port_start(struct port_state *ps)
1194 /* Start the given port, allowing for the execution of commands and the
1195 * transfer of data on that port.
1197 u32_t cmd;
1199 /* Enable FIS receive. */
1200 cmd = ps->reg[AHCI_PORT_CMD];
1201 ps->reg[AHCI_PORT_CMD] = cmd | AHCI_PORT_CMD_FRE;
1203 /* Reset status registers. */
1204 ps->reg[AHCI_PORT_SERR] = ~0;
1205 ps->reg[AHCI_PORT_IS] = ~0;
1207 /* Start the port. */
1208 cmd = ps->reg[AHCI_PORT_CMD];
1209 ps->reg[AHCI_PORT_CMD] = cmd | AHCI_PORT_CMD_ST;
1211 dprintf(V_INFO, ("%s: started\n", ahci_portname(ps)));
1214 /*===========================================================================*
1215 * port_restart *
1216 *===========================================================================*/
1217 static void port_restart(struct port_state *ps)
1219 /* Restart a port after a fatal error has occurred.
1221 u32_t cmd;
1223 /* Fail all outstanding commands. */
1224 port_fail_cmds(ps);
1226 /* Stop the port. */
1227 cmd = ps->reg[AHCI_PORT_CMD];
1228 ps->reg[AHCI_PORT_CMD] = cmd & ~AHCI_PORT_CMD_ST;
1230 SPIN_UNTIL(!(ps->reg[AHCI_PORT_CMD] & AHCI_PORT_CMD_CR),
1231 PORTREG_DELAY);
1233 /* Reset status registers. */
1234 ps->reg[AHCI_PORT_SERR] = ~0;
1235 ps->reg[AHCI_PORT_IS] = ~0;
1237 /* If the BSY and/or DRQ flags are set, reset the port. */
1238 if (ps->reg[AHCI_PORT_TFD] &
1239 (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ)) {
1241 dprintf(V_ERR, ("%s: port reset\n", ahci_portname(ps)));
1243 /* Trigger a port reset. */
1244 ps->reg[AHCI_PORT_SCTL] = AHCI_PORT_SCTL_DET_INIT;
1245 micro_delay(SPINUP_DELAY * 1000);
1246 ps->reg[AHCI_PORT_SCTL] = AHCI_PORT_SCTL_DET_NONE;
1248 /* To keep this driver simple, we do not transparently recover
1249 * ongoing requests. Instead, we mark the failing device as
1250 * disconnected, and assume that if the reset succeeds, the
1251 * device (or, perhaps, eventually, another device) will come
1252 * back up. Any current and future requests to this port will
1253 * be failed until the port is fully closed and reopened.
1255 port_disconnect(ps);
1257 return;
1260 /* Start the port. */
1261 cmd = ps->reg[AHCI_PORT_CMD];
1262 ps->reg[AHCI_PORT_CMD] = cmd | AHCI_PORT_CMD_ST;
1264 dprintf(V_INFO, ("%s: restarted\n", ahci_portname(ps)));
1267 /*===========================================================================*
1268 * port_stop *
1269 *===========================================================================*/
1270 static void port_stop(struct port_state *ps)
1272 /* Stop the given port, if not already stopped.
1274 u32_t cmd;
1276 /* Disable interrupts. */
1277 ps->reg[AHCI_PORT_IE] = AHCI_PORT_IE_NONE;
1279 /* Stop the port. */
1280 cmd = ps->reg[AHCI_PORT_CMD];
1282 if (cmd & (AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST)) {
1283 cmd &= ~(AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST);
1285 ps->reg[AHCI_PORT_CMD] = cmd;
1287 SPIN_UNTIL(!(ps->reg[AHCI_PORT_CMD] & AHCI_PORT_CMD_CR),
1288 PORTREG_DELAY);
1290 dprintf(V_INFO, ("%s: stopped\n", ahci_portname(ps)));
1292 cmd = ps->reg[AHCI_PORT_CMD];
1295 if (cmd & (AHCI_PORT_CMD_FR | AHCI_PORT_CMD_FRE)) {
1296 cmd &= ~(AHCI_PORT_CMD_FR | AHCI_PORT_CMD_FRE);
1298 ps->reg[AHCI_PORT_CMD] = cmd;
1300 SPIN_UNTIL(!(ps->reg[AHCI_PORT_CMD] & AHCI_PORT_CMD_FR),
1301 PORTREG_DELAY);
1304 /* Reset status registers. */
1305 ps->reg[AHCI_PORT_SERR] = ~0;
1306 ps->reg[AHCI_PORT_IS] = ~0;
1309 /*===========================================================================*
1310 * port_sig_check *
1311 *===========================================================================*/
1312 static void port_sig_check(struct port_state *ps)
1314 /* Check whether the device's signature has become available yet, and
1315 * if so, start identifying the device.
1317 u32_t tfd, sig;
1319 tfd = ps->reg[AHCI_PORT_TFD];
1321 /* Wait for the BSY flag to be (set and then) cleared first. Note that
1322 * clearing it only happens when PxCMD.FRE is set, which is why we
1323 * start the port before starting the signature wait cycle.
1325 if ((tfd & AHCI_PORT_TFD_STS_BSY) || tfd == AHCI_PORT_TFD_STS_INIT) {
1326 /* Try for a while before giving up. It may take seconds. */
1327 if (ps->left > 0) {
1328 ps->left--;
1329 set_timer(&ps->cmd_info[0].timer, ahci_sig_timeout,
1330 port_timeout, BUILD_ARG(ps - port_state, 0));
1331 return;
1334 /* If no device is actually attached, disable the port. This
1335 * value is also the initial value of the register, before the
1336 * BSY flag gets set, so only check this condition on timeout.
1338 if (tfd == AHCI_PORT_TFD_STS_INIT) {
1339 dprintf(V_DEV, ("%s: no device at this port\n",
1340 ahci_portname(ps)));
1342 port_stop(ps);
1344 ps->state = STATE_BAD_DEV;
1345 ps->flags &= ~FLAG_BUSY;
1347 return;
1350 port_restart(ps);
1352 dprintf(V_ERR, ("%s: timeout waiting for signature\n",
1353 ahci_portname(ps)));
1356 /* Check the port's signature. We only support the normal ATA and ATAPI
1357 * signatures. We ignore devices reporting anything else.
1359 sig = ps->reg[AHCI_PORT_SIG];
1361 if (sig != ATA_SIG_ATA && sig != ATA_SIG_ATAPI) {
1362 dprintf(V_ERR, ("%s: unsupported signature (%08x)\n",
1363 ahci_portname(ps), sig));
1365 port_stop(ps);
1367 ps->state = STATE_BAD_DEV;
1368 ps->flags &= ~FLAG_BUSY;
1370 return;
1373 /* Clear all state flags except the busy flag, which may be relevant if
1374 * a BDEV_OPEN call is waiting for the device to become ready; the
1375 * barrier flag, which prevents access to the device until it is
1376 * completely closed and (re)opened; and, the thread suspension flag.
1378 ps->flags &= (FLAG_BUSY | FLAG_BARRIER | FLAG_SUSPENDED);
1380 if (sig == ATA_SIG_ATAPI)
1381 ps->flags |= FLAG_ATAPI;
1383 /* Attempt to identify the device. Do this using continuation, because
1384 * we may already be called from port_wait() here, and could end up
1385 * confusing the timer expiration procedure.
1387 ps->state = STATE_WAIT_ID;
1388 ps->reg[AHCI_PORT_IE] = AHCI_PORT_IE_MASK;
1390 (void) gen_identify(ps, FALSE /*blocking*/);
1393 /*===========================================================================*
1394 * print_string *
1395 *===========================================================================*/
1396 static void print_string(u16_t *buf, int start, int end)
1398 /* Print a string that is stored as little-endian words and padded with
1399 * trailing spaces.
1401 int i, last = 0;
1403 while (end >= start && buf[end] == 0x2020) end--;
1405 if (end >= start && (buf[end] & 0xFF) == 0x20) end--, last++;
1407 for (i = start; i <= end; i++)
1408 printf("%c%c", buf[i] >> 8, buf[i] & 0xFF);
1410 if (last)
1411 printf("%c", buf[i] >> 8);
1414 /*===========================================================================*
1415 * port_id_check *
1416 *===========================================================================*/
1417 static void port_id_check(struct port_state *ps, int success)
1419 /* The device identification command has either completed or timed out.
1420 * Decide whether this device is usable or not, and store some of its
1421 * properties.
1423 u16_t *buf;
1425 assert(ps->state == STATE_WAIT_ID);
1426 assert(!(ps->flags & FLAG_BUSY)); /* unset by callers */
1428 cancel_timer(&ps->cmd_info[0].timer);
1430 if (!success)
1431 dprintf(V_ERR,
1432 ("%s: unable to identify\n", ahci_portname(ps)));
1434 /* If the identify command itself succeeded, check the results and
1435 * store some properties.
1437 if (success) {
1438 buf = (u16_t *) ps->tmp_base;
1440 if (ps->flags & FLAG_ATAPI)
1441 success = atapi_id_check(ps, buf);
1442 else
1443 success = ata_id_check(ps, buf);
1446 /* If the device has not been identified successfully, mark it as an
1447 * unusable device.
1449 if (!success) {
1450 port_stop(ps);
1452 ps->state = STATE_BAD_DEV;
1453 ps->reg[AHCI_PORT_IE] = AHCI_PORT_IE_PRCE;
1455 return;
1458 /* The device has been identified successfully, and hence usable. */
1459 ps->state = STATE_GOOD_DEV;
1461 /* Print some information about the device. */
1462 if (ahci_verbose >= V_INFO) {
1463 printf("%s: ATA%s, ", ahci_portname(ps),
1464 (ps->flags & FLAG_ATAPI) ? "PI" : "");
1465 print_string(buf, 27, 46);
1466 if (ahci_verbose >= V_DEV) {
1467 printf(" (");
1468 print_string(buf, 10, 19);
1469 printf(", ");
1470 print_string(buf, 23, 26);
1471 printf(")");
1474 if (ps->flags & FLAG_HAS_MEDIUM)
1475 printf(", %u byte sectors, %lu MB size",
1476 ps->sector_size, div64u(mul64(ps->lba_count,
1477 cvu64(ps->sector_size)), 1024*1024));
1479 printf("\n");
1483 /*===========================================================================*
1484 * port_connect *
1485 *===========================================================================*/
1486 static void port_connect(struct port_state *ps)
1488 /* A device has been found to be attached to this port. Start the port,
1489 * and do timed polling for its signature to become available.
1492 dprintf(V_INFO, ("%s: device connected\n", ahci_portname(ps)));
1494 if (ps->state == STATE_SPIN_UP)
1495 cancel_timer(&ps->cmd_info[0].timer);
1497 port_start(ps);
1499 ps->state = STATE_WAIT_SIG;
1500 ps->left = ahci_sig_checks;
1502 ps->reg[AHCI_PORT_IE] = AHCI_PORT_IE_PRCE;
1504 /* Do the first check immediately; who knows, we may get lucky. */
1505 port_sig_check(ps);
1508 /*===========================================================================*
1509 * port_disconnect *
1510 *===========================================================================*/
1511 static void port_disconnect(struct port_state *ps)
1513 /* The device has detached from this port. Stop the port if necessary.
1516 dprintf(V_INFO, ("%s: device disconnected\n", ahci_portname(ps)));
1518 if (ps->state != STATE_BAD_DEV)
1519 port_stop(ps);
1521 ps->state = STATE_NO_DEV;
1522 ps->reg[AHCI_PORT_IE] = AHCI_PORT_IE_PRCE;
1523 ps->flags &= ~FLAG_BUSY;
1525 /* Fail any ongoing request. The caller may already have done this. */
1526 port_fail_cmds(ps);
1528 /* Block any further access until the device is completely closed and
1529 * reopened. This prevents arbitrary I/O to a newly plugged-in device
1530 * without upper layers noticing.
1532 ps->flags |= FLAG_BARRIER;
1534 /* Inform the blockdriver library to reduce the number of threads. */
1535 blockdriver_mt_set_workers(ps->device, 1);
1538 /*===========================================================================*
1539 * port_intr *
1540 *===========================================================================*/
1541 static void port_intr(struct port_state *ps)
1543 /* Process an interrupt on this port.
1545 u32_t smask, emask;
1546 int connected;
1548 if (ps->state == STATE_NO_PORT) {
1549 dprintf(V_ERR, ("%s: interrupt for invalid port!\n",
1550 ahci_portname(ps)));
1552 return;
1555 smask = ps->reg[AHCI_PORT_IS];
1556 emask = smask & ps->reg[AHCI_PORT_IE];
1558 /* Clear the interrupt flags that we saw were set. */
1559 ps->reg[AHCI_PORT_IS] = smask;
1561 dprintf(V_REQ, ("%s: interrupt (%08x)\n", ahci_portname(ps), smask));
1563 /* Check if any commands have completed. */
1564 port_check_cmds(ps);
1566 if (emask & AHCI_PORT_IS_PRCS) {
1567 /* Clear the N diagnostics bit to clear this interrupt. */
1568 ps->reg[AHCI_PORT_SERR] = AHCI_PORT_SERR_DIAG_N;
1570 connected =
1571 (ps->reg[AHCI_PORT_SSTS] & AHCI_PORT_SSTS_DET_MASK) ==
1572 AHCI_PORT_SSTS_DET_PHY;
1574 switch (ps->state) {
1575 case STATE_BAD_DEV:
1576 case STATE_GOOD_DEV:
1577 case STATE_WAIT_SIG:
1578 case STATE_WAIT_ID:
1579 port_disconnect(ps);
1581 /* fall-through */
1582 default:
1583 if (!connected)
1584 break;
1586 port_connect(ps);
1588 } else if (smask & AHCI_PORT_IS_MASK) {
1589 /* We assume that any other interrupt indicates command
1590 * completion or (command or device) failure. Unfortunately, if
1591 * an NCQ command failed, we cannot easily determine which one
1592 * it was. For that reason, after completing all successfully
1593 * finished commands (above), we fail all other outstanding
1594 * commands and restart the port. This can possibly be improved
1595 * later by obtaining per-command status results from the HBA.
1598 /* If we were waiting for ID verification, check now. */
1599 if (ps->state == STATE_WAIT_ID) {
1600 ps->flags &= ~FLAG_BUSY;
1601 port_id_check(ps, !(ps->reg[AHCI_PORT_TFD] &
1602 (AHCI_PORT_TFD_STS_ERR |
1603 AHCI_PORT_TFD_STS_DF)));
1606 /* Check now for failure. There are fatal failures, and there
1607 * are failures that set the TFD.STS.ERR field using a D2H
1608 * FIS. In both cases, we just restart the port, failing all
1609 * commands in the process.
1611 if ((ps->reg[AHCI_PORT_TFD] &
1612 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF)) ||
1613 (smask & AHCI_PORT_IS_RESTART)) {
1614 port_restart(ps);
1619 /*===========================================================================*
1620 * port_timeout *
1621 *===========================================================================*/
1622 static void port_timeout(struct timer *tp)
1624 /* A timeout has occurred on this port. Figure out what the timeout is
1625 * for, and take appropriate action.
1627 struct port_state *ps;
1628 int port, cmd;
1630 port = GET_PORT(tmr_arg(tp)->ta_int);
1631 cmd = GET_TAG(tmr_arg(tp)->ta_int);
1633 assert(port >= 0 && port < hba_state.nr_ports);
1635 ps = &port_state[port];
1637 /* Regardless of the outcome of this timeout, wake up the thread if it
1638 * is suspended. This applies only during the initialization.
1640 if (ps->flags & FLAG_SUSPENDED) {
1641 assert(cmd == 0);
1642 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
1645 /* If detection of a device after startup timed out, give up on initial
1646 * detection and only look for hot plug events from now on.
1648 if (ps->state == STATE_SPIN_UP) {
1649 /* There is one exception: for braindead controllers that don't
1650 * generate the right interrupts (cough, VirtualBox), we do an
1651 * explicit check to see if a device is connected after all.
1652 * Later hot-(un)plug events will not be detected in this case.
1654 if ((ps->reg[AHCI_PORT_SSTS] & AHCI_PORT_SSTS_DET_MASK) ==
1655 AHCI_PORT_SSTS_DET_PHY) {
1656 dprintf(V_INFO, ("%s: no device connection event\n",
1657 ahci_portname(ps)));
1659 port_connect(ps);
1661 else {
1662 dprintf(V_INFO, ("%s: spin-up timeout\n",
1663 ahci_portname(ps)));
1665 /* If the busy flag is set, a BDEV_OPEN request is
1666 * waiting for the detection to finish; clear the busy
1667 * flag to return an error to the caller.
1669 ps->state = STATE_NO_DEV;
1670 ps->flags &= ~FLAG_BUSY;
1673 return;
1676 /* If a device has been connected and we are waiting for its signature
1677 * to become available, check now.
1679 if (ps->state == STATE_WAIT_SIG) {
1680 port_sig_check(ps);
1682 return;
1685 /* The only case where the busy flag will be set after this is for a
1686 * failed identify operation. During this operation, the port will be
1687 * in the WAIT_ID state. In that case, we clear the BUSY flag, fail the
1688 * command by setting its state, restart port and finish identify op.
1690 if (ps->flags & FLAG_BUSY) {
1691 assert(ps->state == STATE_WAIT_ID);
1692 ps->flags &= ~FLAG_BUSY;
1695 dprintf(V_ERR, ("%s: timeout\n", ahci_portname(ps)));
1697 /* Restart the port, failing all current commands. */
1698 port_restart(ps);
1700 /* Finish up the identify operation. */
1701 if (ps->state == STATE_WAIT_ID)
1702 port_id_check(ps, FALSE);
1705 /*===========================================================================*
1706 * port_wait *
1707 *===========================================================================*/
1708 static void port_wait(struct port_state *ps)
1710 /* Suspend the current thread until the given port is no longer busy,
1711 * due to either command completion or timeout.
1714 ps->flags |= FLAG_SUSPENDED;
1716 while (ps->flags & FLAG_BUSY)
1717 blockdriver_mt_sleep();
1719 ps->flags &= ~FLAG_SUSPENDED;
1722 /*===========================================================================*
1723 * port_issue *
1724 *===========================================================================*/
1725 static void port_issue(struct port_state *ps, int cmd, clock_t timeout)
1727 /* Issue a command to the port, and set a timer to trigger a timeout
1728 * if the command takes too long to complete.
1731 /* Set the corresponding NCQ command bit, if applicable. */
1732 if (ps->flags & FLAG_HAS_NCQ)
1733 ps->reg[AHCI_PORT_SACT] = (1 << cmd);
1735 /* Make sure that the compiler does not delay any previous write
1736 * operations until after the write to the command issue register.
1738 __insn_barrier();
1740 /* Tell the controller that a new command is ready. */
1741 ps->reg[AHCI_PORT_CI] = (1 << cmd);
1743 /* Update pending commands. */
1744 ps->pend_mask |= 1 << cmd;
1746 /* Set a timer in case the command does not complete at all. */
1747 set_timer(&ps->cmd_info[cmd].timer, timeout, port_timeout,
1748 BUILD_ARG(ps - port_state, cmd));
1751 /*===========================================================================*
1752 * port_exec *
1753 *===========================================================================*/
1754 static int port_exec(struct port_state *ps, int cmd, clock_t timeout)
1756 /* Execute a command on a port, wait for the command to complete or for
1757 * a timeout, and return whether the command succeeded or not.
1760 port_issue(ps, cmd, timeout);
1762 /* Put the thread to sleep until a timeout or a command completion
1763 * happens. Earlier, we used to call port_wait which set the suspended
1764 * flag. We now abandon it since the flag has to work on a per-thread,
1765 * and hence per-tag basis and not on a per-port basis. Instead, we
1766 * retain that call only to defer open calls during device/driver
1767 * initialization. Instead, we call sleep here directly. Before
1768 * sleeping, we register the thread.
1770 ps->cmd_info[cmd].tid = blockdriver_mt_get_tid();
1772 blockdriver_mt_sleep();
1774 /* Cancelling a timer that just triggered, does no harm. */
1775 cancel_timer(&ps->cmd_info[cmd].timer);
1777 assert(!(ps->flags & FLAG_BUSY));
1779 dprintf(V_REQ, ("%s: end of command -- %s\n", ahci_portname(ps),
1780 (ps->cmd_info[cmd].result == RESULT_FAILURE) ?
1781 "failure" : "success"));
1783 if (ps->cmd_info[cmd].result == RESULT_FAILURE)
1784 return EIO;
1786 return OK;
1789 /*===========================================================================*
1790 * port_alloc *
1791 *===========================================================================*/
1792 static void port_alloc(struct port_state *ps)
1794 /* Allocate memory for the given port. We try to cram everything into
1795 * one 4K-page in order to limit memory usage as much as possible.
1796 * More memory may be allocated on demand later, but allocation failure
1797 * should be fatal only here. Note that we do not allocate memory for
1798 * sector padding here, because we do not know the device's sector size
1799 * yet.
1801 size_t fis_off, tmp_off, ct_off; int i;
1802 size_t ct_offs[NR_CMDS];
1804 fis_off = AHCI_CL_SIZE + AHCI_FIS_SIZE - 1;
1805 fis_off -= fis_off % AHCI_FIS_SIZE;
1807 tmp_off = fis_off + AHCI_FIS_SIZE + AHCI_TMP_ALIGN - 1;
1808 tmp_off -= tmp_off % AHCI_TMP_ALIGN;
1810 /* Allocate memory for all the commands. */
1811 ct_off = tmp_off + AHCI_TMP_SIZE;
1812 for (i = 0; i < NR_CMDS; i++) {
1813 ct_off += AHCI_CT_ALIGN - 1;
1814 ct_off -= ct_off % AHCI_CT_ALIGN;
1815 ct_offs[i] = ct_off;
1816 ps->mem_size = ct_off + AHCI_CT_SIZE;
1817 ct_off = ps->mem_size;
1820 ps->mem_base = alloc_contig(ps->mem_size, AC_ALIGN4K, &ps->mem_phys);
1821 if (ps->mem_base == NULL)
1822 panic("unable to allocate port memory");
1823 memset(ps->mem_base, 0, ps->mem_size);
1825 ps->cl_base = (u32_t *) ps->mem_base;
1826 ps->cl_phys = ps->mem_phys;
1827 assert(ps->cl_phys % AHCI_CL_SIZE == 0);
1829 ps->fis_base = (u32_t *) (ps->mem_base + fis_off);
1830 ps->fis_phys = ps->mem_phys + fis_off;
1831 assert(ps->fis_phys % AHCI_FIS_SIZE == 0);
1833 ps->tmp_base = (u8_t *) (ps->mem_base + tmp_off);
1834 ps->tmp_phys = ps->mem_phys + tmp_off;
1835 assert(ps->tmp_phys % AHCI_TMP_ALIGN == 0);
1837 for (i = 0; i < NR_CMDS; i++) {
1838 ps->ct_base[i] = ps->mem_base + ct_offs[i];
1839 ps->ct_phys[i] = ps->mem_phys + ct_offs[i];
1840 assert(ps->ct_phys[i] % AHCI_CT_ALIGN == 0);
1843 /* Tell the controller about some of the physical addresses. */
1844 ps->reg[AHCI_PORT_FBU] = 0;
1845 ps->reg[AHCI_PORT_FB] = ps->fis_phys;
1847 ps->reg[AHCI_PORT_CLBU] = 0;
1848 ps->reg[AHCI_PORT_CLB] = ps->cl_phys;
1850 ps->pad_base = NULL;
1851 ps->pad_size = 0;
1854 /*===========================================================================*
1855 * port_free *
1856 *===========================================================================*/
1857 static void port_free(struct port_state *ps)
1859 /* Free previously allocated memory for the given port.
1861 int i;
1863 if (ps->pad_base != NULL)
1864 free_contig(ps->pad_base, ps->pad_size);
1866 /* The first command table is part of the primary memory page. */
1867 for (i = 1; i < hba_state.nr_cmds; i++)
1868 if (ps->ct_base[i] != NULL)
1869 free_contig(ps->ct_base[i], AHCI_CT_SIZE);
1871 free_contig(ps->mem_base, ps->mem_size);
1874 /*===========================================================================*
1875 * port_init *
1876 *===========================================================================*/
1877 static void port_init(struct port_state *ps)
1879 /* Initialize the given port.
1881 u32_t cmd;
1882 int i;
1884 /* Initialize the port state structure. */
1885 ps->queue_depth = 1;
1886 ps->state = STATE_SPIN_UP;
1887 ps->flags = FLAG_BUSY;
1888 ps->sector_size = 0;
1889 ps->open_count = 0;
1890 ps->pend_mask = 0;
1891 for (i = 0; i < NR_CMDS; i++)
1892 init_timer(&ps->cmd_info[i].timer);
1894 ps->reg = (u32_t *) ((u8_t *) hba_state.base +
1895 AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * (ps - port_state));
1897 /* Make sure the port is in a known state. */
1898 port_stop(ps);
1900 /* Allocate memory for the port. */
1901 port_alloc(ps);
1903 /* Just listen for device status change events for now. */
1904 ps->reg[AHCI_PORT_IE] = AHCI_PORT_IE_PRCE;
1906 /* Perform a reset on the device. */
1907 cmd = ps->reg[AHCI_PORT_CMD];
1908 ps->reg[AHCI_PORT_CMD] = cmd | AHCI_PORT_CMD_SUD;
1910 ps->reg[AHCI_PORT_SCTL] = AHCI_PORT_SCTL_DET_INIT;
1911 micro_delay(SPINUP_DELAY * 1000); /* SPINUP_DELAY is in ms */
1912 ps->reg[AHCI_PORT_SCTL] = AHCI_PORT_SCTL_DET_NONE;
1914 set_timer(&ps->cmd_info[0].timer, ahci_spinup_timeout,
1915 port_timeout, BUILD_ARG(ps - port_state, 0));
1918 /*===========================================================================*
1919 * ahci_probe *
1920 *===========================================================================*/
1921 static int ahci_probe(int skip)
1923 /* Find a matching PCI device.
1925 int r, devind;
1926 u16_t vid, did;
1928 pci_init();
1930 r = pci_first_dev(&devind, &vid, &did);
1931 if (r <= 0)
1932 return -1;
1934 while (skip--) {
1935 r = pci_next_dev(&devind, &vid, &did);
1936 if (r <= 0)
1937 return -1;
1940 pci_reserve(devind);
1942 return devind;
1945 /*===========================================================================*
1946 * ahci_reset *
1947 *===========================================================================*/
1948 static void ahci_reset(void)
1950 /* Reset the HBA. Do not enable AHCI mode afterwards.
1952 u32_t ghc;
1954 ghc = hba_state.base[AHCI_HBA_GHC];
1956 hba_state.base[AHCI_HBA_GHC] = ghc | AHCI_HBA_GHC_AE;
1958 hba_state.base[AHCI_HBA_GHC] = ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_HR;
1960 SPIN_UNTIL(!(hba_state.base[AHCI_HBA_GHC] & AHCI_HBA_GHC_HR),
1961 RESET_DELAY);
1963 if (hba_state.base[AHCI_HBA_GHC] & AHCI_HBA_GHC_HR)
1964 panic("unable to reset HBA");
1967 /*===========================================================================*
1968 * ahci_init *
1969 *===========================================================================*/
1970 static void ahci_init(int devind)
1972 /* Initialize the device.
1974 u32_t base, size, cap, ghc, mask;
1975 int r, port, ioflag;
1977 if ((r = pci_get_bar(devind, PCI_BAR_6, &base, &size, &ioflag)) != OK)
1978 panic("unable to retrieve BAR: %d", r);
1980 if (ioflag)
1981 panic("invalid BAR type");
1983 /* There must be at least one port, and at most NR_PORTS ports. Limit
1984 * the actual total number of ports to the size of the exposed area.
1986 if (size < AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE)
1987 panic("HBA memory size too small: %lu", size);
1989 size = MIN(size, AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * NR_PORTS);
1991 hba_state.nr_ports = (size - AHCI_MEM_BASE_SIZE) / AHCI_MEM_PORT_SIZE;
1993 /* Map the register area into local memory. */
1994 hba_state.base = (u32_t *) vm_map_phys(SELF, (void *) base, size);
1995 hba_state.size = size;
1996 if (hba_state.base == MAP_FAILED)
1997 panic("unable to map HBA memory");
1999 /* Retrieve, allocate and enable the controller's IRQ. */
2000 hba_state.irq = pci_attr_r8(devind, PCI_ILR);
2001 hba_state.hook_id = 0;
2003 if ((r = sys_irqsetpolicy(hba_state.irq, 0, &hba_state.hook_id)) != OK)
2004 panic("unable to register IRQ: %d", r);
2006 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2007 panic("unable to enable IRQ: %d", r);
2009 /* Reset the HBA. */
2010 ahci_reset();
2012 /* Enable AHCI and interrupts. */
2013 ghc = hba_state.base[AHCI_HBA_GHC];
2014 hba_state.base[AHCI_HBA_GHC] = ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE;
2016 /* Limit the maximum number of commands to the controller's value. */
2017 /* Note that we currently use only one command anyway. */
2018 cap = hba_state.base[AHCI_HBA_CAP];
2019 hba_state.has_ncq = !!(cap & AHCI_HBA_CAP_SNCQ);
2020 hba_state.nr_cmds = MIN(NR_CMDS,
2021 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1);
2023 dprintf(V_INFO, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, "
2024 "%s queuing, IRQ %d\n",
2025 ahci_instance,
2026 (int) (hba_state.base[AHCI_HBA_VS] >> 16),
2027 (int) ((hba_state.base[AHCI_HBA_VS] >> 8) & 0xFF),
2028 (int) (hba_state.base[AHCI_HBA_VS] & 0xFF),
2029 ((cap >> AHCI_HBA_CAP_NP_SHIFT) & AHCI_HBA_CAP_NP_MASK) + 1,
2030 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1,
2031 hba_state.has_ncq ? "supports" : "no", hba_state.irq));
2033 dprintf(V_INFO, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n",
2034 ahci_instance, cap, hba_state.base[AHCI_HBA_CAP2],
2035 hba_state.base[AHCI_HBA_PI]));
2037 /* Initialize each of the implemented ports. We ignore CAP.NP. */
2038 mask = hba_state.base[AHCI_HBA_PI];
2040 for (port = 0; port < hba_state.nr_ports; port++) {
2041 port_state[port].device = NO_DEVICE;
2042 port_state[port].state = STATE_NO_PORT;
2044 if (mask & (1 << port))
2045 port_init(&port_state[port]);
2049 /*===========================================================================*
2050 * ahci_stop *
2051 *===========================================================================*/
2052 static void ahci_stop(void)
2054 /* Disable AHCI, and clean up resources to the extent possible.
2056 struct port_state *ps;
2057 int r, port;
2059 for (port = 0; port < hba_state.nr_ports; port++) {
2060 ps = &port_state[port];
2062 if (ps->state != STATE_NO_PORT) {
2063 port_stop(ps);
2065 port_free(ps);
2069 ahci_reset();
2071 if ((r = vm_unmap_phys(SELF, (void *) hba_state.base,
2072 hba_state.size)) != OK)
2073 panic("unable to unmap HBA memory: %d", r);
2075 if ((r = sys_irqrmpolicy(&hba_state.hook_id)) != OK)
2076 panic("unable to deregister IRQ: %d", r);
2079 /*===========================================================================*
2080 * ahci_alarm *
2081 *===========================================================================*/
2082 static void ahci_alarm(clock_t stamp)
2084 /* Process an alarm.
2087 /* Call the port-specific handler for each port that timed out. */
2088 expire_timers(stamp);
2091 /*===========================================================================*
2092 * ahci_intr *
2093 *===========================================================================*/
2094 static void ahci_intr(unsigned int UNUSED(mask))
2096 /* Process an interrupt.
2098 struct port_state *ps;
2099 u32_t mask;
2100 int r, port;
2102 /* Handle an interrupt for each port that has the interrupt bit set. */
2103 mask = hba_state.base[AHCI_HBA_IS];
2105 for (port = 0; port < hba_state.nr_ports; port++) {
2106 if (mask & (1 << port)) {
2107 ps = &port_state[port];
2109 port_intr(ps);
2111 /* After processing an interrupt, wake up the device
2112 * thread if it is suspended and now no longer busy.
2114 if ((ps->flags & (FLAG_SUSPENDED | FLAG_BUSY)) ==
2115 FLAG_SUSPENDED)
2116 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
2120 /* Clear the bits that we processed. */
2121 hba_state.base[AHCI_HBA_IS] = mask;
2123 /* Reenable the interrupt. */
2124 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2125 panic("unable to enable IRQ: %d", r);
2128 /*===========================================================================*
2129 * ahci_get_var *
2130 *===========================================================================*/
2131 static void ahci_get_var(char *name, long *v, int timeout)
2133 /* Retrieve an environment variable, and optionall adjust it to the
2134 * scale that we are using internally.
2137 /* The value is supposed to be initialized to a default already. */
2138 (void) env_parse(name, "d", 0, v, 1, LONG_MAX);
2140 /* If this is a timeout, convert from milliseconds to ticks. */
2141 if (timeout)
2142 *v = (*v + 500) * sys_hz() / 1000;
2145 /*===========================================================================*
2146 * ahci_get_params *
2147 *===========================================================================*/
2148 static void ahci_get_params(void)
2150 /* Retrieve and parse parameters passed to this driver, except the
2151 * device-to-port mapping, which has to be parsed later.
2153 long v;
2155 /* Find out which driver instance we are. */
2156 v = 0;
2157 (void) env_parse("instance", "d", 0, &v, 0, 255);
2158 ahci_instance = (int) v;
2160 /* Initialize the verbosity level. */
2161 v = V_ERR;
2162 (void) env_parse("ahci_verbose", "d", 0, &v, V_NONE, V_REQ);
2163 ahci_verbose = (int) v;
2165 /* Initialize timeout-related values. */
2166 ahci_get_var("ahci_init_timeout", &ahci_spinup_timeout, TRUE);
2167 ahci_get_var("ahci_sig_timeout", &ahci_sig_timeout, TRUE);
2168 ahci_get_var("ahci_sig_checks", &ahci_sig_checks, FALSE);
2169 ahci_get_var("ahci_cmd_timeout", &ahci_command_timeout, TRUE);
2170 ahci_get_var("ahci_io_timeout", &ahci_transfer_timeout, TRUE);
2171 ahci_get_var("ahci_flush_timeout", &ahci_flush_timeout, TRUE);
2174 /*===========================================================================*
2175 * ahci_set_mapping *
2176 *===========================================================================*/
2177 static void ahci_set_mapping(void)
2179 /* Construct a mapping from device nodes to port numbers.
2181 char key[16], val[32], *p;
2182 unsigned int port;
2183 int i, j;
2185 /* Start off with a mapping that includes implemented ports only, in
2186 * order. We choose this mapping over an identity mapping to maximize
2187 * the chance that the user will be able to access the first MAX_DRIVES
2188 * devices. Note that we can only do this after initializing the HBA.
2190 for (i = j = 0; i < NR_PORTS && j < MAX_DRIVES; i++)
2191 if (port_state[i].state != STATE_NO_PORT)
2192 ahci_map[j++] = i;
2194 for ( ; j < MAX_DRIVES; j++)
2195 ahci_map[j] = NO_PORT;
2197 /* See if the user specified a custom mapping. Unlike all other
2198 * configuration options, this is a per-instance setting.
2200 strlcpy(key, "ahci0_map", sizeof(key));
2201 key[4] += ahci_instance;
2203 if (env_get_param(key, val, sizeof(val)) == OK) {
2204 /* Parse the mapping, which is assumed to be a comma-separated
2205 * list of zero-based port numbers.
2207 p = val;
2209 for (i = 0; i < MAX_DRIVES; i++) {
2210 if (*p) {
2211 port = (unsigned int) strtoul(p, &p, 0);
2213 if (*p) p++;
2215 ahci_map[i] = port % NR_PORTS;
2217 else ahci_map[i] = NO_PORT;
2221 /* Create a reverse mapping. */
2222 for (i = 0; i < MAX_DRIVES; i++)
2223 if ((j = ahci_map[i]) != NO_PORT)
2224 port_state[j].device = i;
2227 /*===========================================================================*
2228 * sef_cb_init_fresh *
2229 *===========================================================================*/
2230 static int sef_cb_init_fresh(int type, sef_init_info_t *UNUSED(info))
2232 /* Initialize the driver.
2234 int devind;
2236 /* Get command line parameters. */
2237 ahci_get_params();
2239 /* Probe for recognized devices, skipping matches as appropriate. */
2240 devind = ahci_probe(ahci_instance);
2242 if (devind < 0)
2243 panic("no matching device found");
2245 /* Initialize the device we found. */
2246 ahci_init(devind);
2248 /* Create a mapping from device nodes to port numbers. */
2249 ahci_set_mapping();
2251 /* Announce that we are up. */
2252 blockdriver_announce(type);
2254 return OK;
2257 /*===========================================================================*
2258 * sef_cb_signal_handler *
2259 *===========================================================================*/
2260 static void sef_cb_signal_handler(int signo)
2262 /* In case of a termination signal, shut down this driver.
2264 int port;
2266 if (signo != SIGTERM) return;
2268 /* If any ports are still opened, assume that the system is being shut
2269 * down, and stay up until the last device has been closed.
2271 ahci_exiting = TRUE;
2273 for (port = 0; port < hba_state.nr_ports; port++)
2274 if (port_state[port].open_count > 0)
2275 return;
2277 /* If not, stop the driver and exit immediately. */
2278 ahci_stop();
2280 exit(0);
2283 /*===========================================================================*
2284 * sef_local_startup *
2285 *===========================================================================*/
2286 static void sef_local_startup(void)
2288 /* Set callbacks and initialize the System Event Framework (SEF).
2291 /* Register init callbacks. */
2292 sef_setcb_init_fresh(sef_cb_init_fresh);
2293 sef_setcb_init_lu(sef_cb_init_fresh);
2295 /* Register signal callbacks. */
2296 sef_setcb_signal_handler(sef_cb_signal_handler);
2298 /* Let SEF perform startup. */
2299 sef_startup();
2302 /*===========================================================================*
2303 * ahci_portname *
2304 *===========================================================================*/
2305 static char *ahci_portname(struct port_state *ps)
2307 /* Return a printable name for the given port. Whenever we can, print a
2308 * "Dx" device number rather than a "Pxx" port number, because the user
2309 * may not be aware of the mapping currently in use.
2311 static char name[] = "AHCI0-P00";
2313 name[4] = '0' + ahci_instance;
2315 if (ps->device == NO_DEVICE) {
2316 name[6] = 'P';
2317 name[7] = '0' + (ps - port_state) / 10;
2318 name[8] = '0' + (ps - port_state) % 10;
2320 else {
2321 name[6] = 'D';
2322 name[7] = '0' + ps->device;
2323 name[8] = 0;
2326 return name;
2329 /*===========================================================================*
2330 * ahci_map_minor *
2331 *===========================================================================*/
2332 static struct port_state *ahci_map_minor(dev_t minor, struct device **dvp)
2334 /* Map a minor device number to a port and a pointer to the partition's
2335 * device structure. Return NULL if this minor device number does not
2336 * identify an actual device.
2338 struct port_state *ps;
2339 int port;
2341 ps = NULL;
2343 if (minor < NR_MINORS) {
2344 port = ahci_map[minor / DEV_PER_DRIVE];
2346 if (port == NO_PORT)
2347 return NULL;
2349 ps = &port_state[port];
2350 *dvp = &ps->part[minor % DEV_PER_DRIVE];
2352 else if ((unsigned) (minor -= MINOR_d0p0s0) < NR_SUBDEVS) {
2353 port = ahci_map[minor / SUB_PER_DRIVE];
2355 if (port == NO_PORT)
2356 return NULL;
2358 ps = &port_state[port];
2359 *dvp = &ps->subpart[minor % SUB_PER_DRIVE];
2362 return ps;
2365 /*===========================================================================*
2366 * ahci_part *
2367 *===========================================================================*/
2368 static struct device *ahci_part(dev_t minor)
2370 /* Return a pointer to the partition information structure of the given
2371 * minor device.
2373 struct device *dv;
2375 if (ahci_map_minor(minor, &dv) == NULL)
2376 return NULL;
2378 return dv;
2381 /*===========================================================================*
2382 * ahci_open *
2383 *===========================================================================*/
2384 static int ahci_open(dev_t minor, int access)
2386 /* Open a device.
2388 struct port_state *ps;
2389 int r;
2391 ps = ahci_get_port(minor);
2393 /* Only one open request can be processed at a time, due to the fact
2394 * that it is an exclusive operation. The thread that handles this call
2395 * can therefore freely register itself at slot zero.
2397 ps->cmd_info[0].tid = blockdriver_mt_get_tid();
2399 /* If we are still in the process of initializing this port or device,
2400 * wait for completion of that phase first.
2402 if (ps->flags & FLAG_BUSY)
2403 port_wait(ps);
2405 /* The device may only be opened if it is now properly functioning. */
2406 if (ps->state != STATE_GOOD_DEV)
2407 return ENXIO;
2409 /* Some devices may only be opened in read-only mode. */
2410 if ((ps->flags & FLAG_READONLY) && (access & W_BIT))
2411 return EACCES;
2413 if (ps->open_count == 0) {
2414 /* The first open request. Clear the barrier flag, if set. */
2415 ps->flags &= ~FLAG_BARRIER;
2417 /* Recheck media only when nobody is using the device. */
2418 if ((ps->flags & FLAG_ATAPI) &&
2419 (r = atapi_check_medium(ps, 0)) != OK)
2420 return r;
2422 /* After rechecking the media, the partition table must always
2423 * be read. This is also a convenient time to do it for
2424 * nonremovable devices. Start by resetting the partition
2425 * tables and setting the working size of the entire device.
2427 memset(ps->part, 0, sizeof(ps->part));
2428 memset(ps->subpart, 0, sizeof(ps->subpart));
2430 ps->part[0].dv_size =
2431 mul64(ps->lba_count, cvu64(ps->sector_size));
2433 partition(&ahci_dtab, ps->device * DEV_PER_DRIVE, P_PRIMARY,
2434 !!(ps->flags & FLAG_ATAPI));
2436 blockdriver_mt_set_workers(ps->device, ps->queue_depth);
2438 else {
2439 /* If the barrier flag is set, deny new open requests until the
2440 * device is fully closed first.
2442 if (ps->flags & FLAG_BARRIER)
2443 return ENXIO;
2446 ps->open_count++;
2448 return OK;
2451 /*===========================================================================*
2452 * ahci_close *
2453 *===========================================================================*/
2454 static int ahci_close(dev_t minor)
2456 /* Close a device.
2458 struct port_state *ps;
2459 int port;
2461 ps = ahci_get_port(minor);
2463 /* Decrease the open count. */
2464 if (ps->open_count <= 0) {
2465 dprintf(V_ERR, ("%s: closing already-closed port\n",
2466 ahci_portname(ps)));
2468 return EINVAL;
2471 ps->open_count--;
2473 if (ps->open_count > 0)
2474 return OK;
2476 /* The device is now fully closed. That also means that the threads for
2477 * this device are not needed anymore, so we reduce the count to one.
2479 blockdriver_mt_set_workers(ps->device, 1);
2481 if (ps->state == STATE_GOOD_DEV && !(ps->flags & FLAG_BARRIER)) {
2482 dprintf(V_INFO, ("%s: flushing write cache\n",
2483 ahci_portname(ps)));
2485 (void) gen_flush_wcache(ps);
2488 /* If the entire driver has been told to terminate, check whether all
2489 * devices are now closed. If so, tell libblockdriver to quit after
2490 * replying to the close request.
2492 if (ahci_exiting) {
2493 for (port = 0; port < hba_state.nr_ports; port++)
2494 if (port_state[port].open_count > 0)
2495 break;
2497 if (port == hba_state.nr_ports) {
2498 ahci_stop();
2500 blockdriver_mt_terminate();
2504 return OK;
2507 /*===========================================================================*
2508 * ahci_transfer *
2509 *===========================================================================*/
2510 static ssize_t ahci_transfer(dev_t minor, int do_write, u64_t position,
2511 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags)
2513 /* Perform data transfer on the selected device.
2515 struct port_state *ps;
2516 struct device *dv;
2517 u64_t pos, eof;
2519 ps = ahci_get_port(minor);
2520 dv = ahci_part(minor);
2522 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2523 return EIO;
2525 if (count > NR_IOREQS)
2526 return EINVAL;
2528 /* Check for basic end-of-partition condition: if the start position of
2529 * the request is outside the partition, return success immediately.
2530 * The size of the request is obtained, and possibly reduced, later.
2532 if (cmp64(position, dv->dv_size) >= 0)
2533 return OK;
2535 pos = add64(dv->dv_base, position);
2536 eof = add64(dv->dv_base, dv->dv_size);
2538 return port_transfer(ps, pos, eof, endpt, (iovec_s_t *) iovec, count,
2539 do_write, flags);
2542 /*===========================================================================*
2543 * ahci_ioctl *
2544 *===========================================================================*/
2545 static int ahci_ioctl(dev_t minor, unsigned int request, endpoint_t endpt,
2546 cp_grant_id_t grant)
2548 /* Process I/O control requests.
2550 struct port_state *ps;
2551 int r, val;
2553 ps = ahci_get_port(minor);
2555 switch (request) {
2556 case DIOCEJECT:
2557 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2558 return EIO;
2560 if (!(ps->flags & FLAG_ATAPI))
2561 return EINVAL;
2563 return atapi_load_eject(ps, 0, FALSE /*load*/);
2565 case DIOCOPENCT:
2566 return sys_safecopyto(endpt, grant, 0,
2567 (vir_bytes) &ps->open_count, sizeof(ps->open_count));
2569 case DIOCFLUSH:
2570 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2571 return EIO;
2573 return gen_flush_wcache(ps);
2575 case DIOCSETWC:
2576 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2577 return EIO;
2579 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &val,
2580 sizeof(val))) != OK)
2581 return r;
2583 return gen_set_wcache(ps, val);
2585 case DIOCGETWC:
2586 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2587 return EIO;
2589 if ((r = gen_get_wcache(ps, &val)) != OK)
2590 return r;
2592 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &val,
2593 sizeof(val));
2596 return EINVAL;
2599 /*===========================================================================*
2600 * ahci_device *
2601 *===========================================================================*/
2602 static int ahci_device(dev_t minor, device_id_t *id)
2604 /* Map a minor device number to a device ID.
2606 struct port_state *ps;
2607 struct device *dv;
2609 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2610 return ENXIO;
2612 *id = ps->device;
2614 return OK;
2617 /*===========================================================================*
2618 * ahci_get_port *
2619 *===========================================================================*/
2620 static struct port_state *ahci_get_port(dev_t minor)
2622 /* Get the port structure associated with the given minor device.
2623 * Called only from worker threads, so the minor device is already
2624 * guaranteed to map to a port.
2626 struct port_state *ps;
2627 struct device *dv;
2629 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2630 panic("device mapping for minor %d disappeared", minor);
2632 return ps;
2635 /*===========================================================================*
2636 * main *
2637 *===========================================================================*/
2638 int main(int argc, char **argv)
2640 /* Driver task.
2643 env_setargs(argc, argv);
2644 sef_local_startup();
2646 blockdriver_mt_task(&ahci_dtab);
2648 return 0;