ahci: centralize, fix port reset
[minix.git] / drivers / ahci / ahci.c
blobbbb47efddf2055dc85d5a7f4eb9d2e9b5d57a09f
1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek
2 * - Multithreading support by Arne Welzel
3 * - Native Command Queuing support by Raja Appuswamy
4 */
5 /*
6 * This driver is based on the following specifications:
7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3
8 * - Serial ATA Revision 2.6
9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7)
10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070)
12 * The driver supports device hot-plug, active device status tracking,
13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes,
14 * sector-unaligned reads, native command queuing and parallel requests to
15 * different devices.
17 * It does not implement transparent failure recovery, power management, or
18 * port multiplier support.
21 * An AHCI controller exposes a number of ports (up to 32), each of which may
22 * or may not have one device attached (port multipliers are not supported).
23 * Each port is maintained independently.
25 * The following figure depicts the possible transitions between port states.
26 * The NO_PORT state is not included; no transitions can be made from or to it.
28 * +----------+ +----------+
29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+
30 * +----------+ | | +----------+ |
31 * | | | ^ |
32 * v v | | |
33 * +----------+ +----------+ +----------+ +----------+ |
34 * | NO_DEV | --> | WAIT_SIG | --> | WAIT_ID | --> | GOOD_DEV | |
35 * +----------+ +----------+ +----------+ +----------+ |
36 * ^ | | | |
37 * +----------------+----------------+----------------+--------+
39 * At driver startup, all physically present ports are put in SPIN_UP state.
40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred
41 * until either the spin-up timer expires, or a device has been identified on
42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at
43 * startup time if the device has not yet been able to announce its presence.
45 * If a device is detected, either at startup time or after hot-plug, its
46 * signature is checked and it is identified, after which it may be determined
47 * to be a usable ("good") device, which means that the device is considered to
48 * be in a working state. If these steps fail, the device is marked as unusable
49 * ("bad"). At any point in time, the device may be disconnected; the port is
50 * then put back into NO_DEV state.
52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA
53 * devices are assumed to be fixed; all ATAPI devices are assumed to have
54 * removable media. To prevent erroneous access to switched devices and media,
55 * the driver makes devices inaccessible until they are fully closed (the open
56 * count is zero) when a device (hot-plug) or medium change is detected.
57 * For hot-plug changes, access is prevented by setting the BARRIER flag until
58 * the device is fully closed and then reopened. For medium changes, access is
59 * prevented by not acknowledging the medium change until the device is fully
60 * closed and reopened. Removable media are not locked in the drive while
61 * opened, because the driver author is uncomfortable with that concept.
63 * The following table lists for each state, whether the port is started
64 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be
65 * set to, and what BDEV_OPEN calls on this port should return.
67 * State Started Timer PxIE BDEV_OPEN
68 * --------- --------- --------- --------- ---------
69 * NO_PORT no no (none) ENXIO
70 * SPIN_UP no yes PRCE (wait)
71 * NO_DEV no no PRCE ENXIO
72 * WAIT_SIG yes yes PRCE (wait)
73 * WAIT_ID yes yes (all) (wait)
74 * BAD_DEV no no PRCE ENXIO
75 * GOOD_DEV yes per-command (all) OK
77 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset
78 * when changing from SPIN_UP to any state but WAIT_SIG, and when changing from
79 * WAIT_SIG to any state but WAIT_ID, and when changing from WAIT_ID to any
80 * other state.
83 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set
84 * to 4MB. This limit has been chosen for a number of reasons:
85 * - The size that can be specified in a Physical Region Descriptor (PRD) is
86 * limited to 4MB for AHCI. Limiting the total transfer size to at most this
87 * size implies that no I/O vector element needs to be split up across PRDs.
88 * This means that the maximum number of needed PRDs can be predetermined.
89 * - The limit is below what can be transferred in a single ATA request, namely
90 * 64k sectors (i.e., at least 32MB). This means that transfer requests need
91 * never be split up into smaller chunks, reducing implementation complexity.
92 * - A single, static timeout can be used for transfers. Very large transfers
93 * can legitimately take up to several minutes -- well beyond the appropriate
94 * timeout range for small transfers. The limit obviates the need for a
95 * timeout scheme that takes into account the transfer size.
96 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious
97 * clients to keep the driver busy for a long time with a single request.
98 * - The limit is high enough for all practical purposes. The transfer setup
99 * overhead is already relatively negligible at this size, and even larger
100 * requests will not help maximize throughput. As NR_IOREQS is currently set
101 * to 64, the limit still allows file systems to perform I/O requests with
102 * vectors completely filled with 64KB-blocks.
104 #include <minix/drivers.h>
105 #include <minix/blockdriver_mt.h>
106 #include <minix/drvlib.h>
107 #include <machine/pci.h>
108 #include <sys/ioc_disk.h>
109 #include <sys/mman.h>
110 #include <assert.h>
112 #include "ahci.h"
114 /* Host Bus Adapter (HBA) state. */
115 static struct {
116 volatile u32_t *base; /* base address of memory-mapped registers */
117 size_t size; /* size of memory-mapped register area */
119 int nr_ports; /* addressable number of ports (1..NR_PORTS) */
120 int nr_cmds; /* maximum number of commands per port */
121 int has_ncq; /* NCQ support flag */
123 int irq; /* IRQ number */
124 int hook_id; /* IRQ hook ID */
125 } hba_state;
127 #define hba_read(r) (hba_state.base[r])
128 #define hba_write(r, v) (hba_state.base[r] = (v))
130 /* Port state. */
131 static struct port_state {
132 int state; /* port state */
133 unsigned int flags; /* port flags */
135 volatile u32_t *reg; /* memory-mapped port registers */
137 u8_t *mem_base; /* primary memory buffer virtual address */
138 phys_bytes mem_phys; /* primary memory buffer physical address */
139 vir_bytes mem_size; /* primary memory buffer size */
141 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */
142 u32_t *fis_base; /* FIS receive buffer virtual address */
143 phys_bytes fis_phys; /* FIS receive buffer physical address */
144 u32_t *cl_base; /* command list buffer virtual address */
145 phys_bytes cl_phys; /* command list buffer physical address */
146 u8_t *ct_base[NR_CMDS]; /* command table virtual address */
147 phys_bytes ct_phys[NR_CMDS]; /* command table physical address */
148 u8_t *tmp_base; /* temporary storage buffer virtual address */
149 phys_bytes tmp_phys; /* temporary storage buffer physical address */
151 u8_t *pad_base; /* sector padding buffer virtual address */
152 phys_bytes pad_phys; /* sector padding buffer physical address */
153 vir_bytes pad_size; /* sector padding buffer size */
155 u64_t lba_count; /* number of valid Logical Block Addresses */
156 u32_t sector_size; /* medium sector size in bytes */
158 int open_count; /* number of times this port is opened */
160 int device; /* associated device number, or NO_DEVICE */
161 struct device part[DEV_PER_DRIVE]; /* partition bases and sizes */
162 struct device subpart[SUB_PER_DRIVE]; /* same for subpartitions */
164 timer_t timer; /* port-specific timeout timer */
165 int left; /* number of tries left before giving up */
166 /* (only used for signature probing) */
168 int queue_depth; /* NCQ queue depth */
169 u32_t pend_mask; /* commands not yet complete */
170 struct {
171 thread_id_t tid;/* ID of the worker thread */
172 timer_t timer; /* timer associated with each request */
173 int result; /* success/failure result of the commands */
174 } cmd_info[NR_CMDS];
175 } port_state[NR_PORTS];
177 #define port_read(ps, r) ((ps)->reg[r])
178 #define port_write(ps, r, v) ((ps)->reg[r] = (v))
180 static int ahci_instance; /* driver instance number */
182 static int ahci_verbose; /* verbosity level (0..4) */
184 /* Timeout values. These can be overridden with environment variables. */
185 static long ahci_spinup_timeout = SPINUP_TIMEOUT;
186 static long ahci_sig_timeout = SIG_TIMEOUT;
187 static long ahci_sig_checks = NR_SIG_CHECKS;
188 static long ahci_command_timeout = COMMAND_TIMEOUT;
189 static long ahci_transfer_timeout = TRANSFER_TIMEOUT;
190 static long ahci_flush_timeout = FLUSH_TIMEOUT;
192 static int ahci_map[MAX_DRIVES]; /* device-to-port mapping */
194 static int ahci_exiting = FALSE; /* exit after last close? */
196 #define BUILD_ARG(port, tag) (((port) << 8) | (tag))
197 #define GET_PORT(arg) ((arg) >> 8)
198 #define GET_TAG(arg) ((arg) & 0xFF)
200 #define dprintf(v,s) do { \
201 if (ahci_verbose >= (v)) \
202 printf s; \
203 } while (0)
205 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
206 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write);
207 static void port_issue(struct port_state *ps, int cmd, clock_t timeout);
208 static int port_exec(struct port_state *ps, int cmd, clock_t timeout);
209 static void port_timeout(struct timer *tp);
210 static void port_disconnect(struct port_state *ps);
212 static char *ahci_portname(struct port_state *ps);
213 static int ahci_open(dev_t minor, int access);
214 static int ahci_close(dev_t minor);
215 static ssize_t ahci_transfer(dev_t minor, int do_write, u64_t position,
216 endpoint_t endpt, iovec_t *iovec, unsigned int count,
217 int flags);
218 static struct device *ahci_part(dev_t minor);
219 static void ahci_alarm(clock_t stamp);
220 static int ahci_ioctl(dev_t minor, unsigned int request, endpoint_t endpt,
221 cp_grant_id_t grant);
222 static void ahci_intr(unsigned int mask);
223 static int ahci_device(dev_t minor, device_id_t *id);
224 static struct port_state *ahci_get_port(dev_t minor);
226 /* AHCI driver table. */
227 static struct blockdriver ahci_dtab = {
228 BLOCKDRIVER_TYPE_DISK,
229 ahci_open,
230 ahci_close,
231 ahci_transfer,
232 ahci_ioctl,
233 NULL, /* bdr_cleanup */
234 ahci_part,
235 NULL, /* bdr_geometry */
236 ahci_intr,
237 ahci_alarm,
238 NULL, /* bdr_other */
239 ahci_device
242 /*===========================================================================*
243 * atapi_exec *
244 *===========================================================================*/
245 static int atapi_exec(struct port_state *ps, int cmd,
246 u8_t packet[ATAPI_PACKET_SIZE], size_t size, int write)
248 /* Execute an ATAPI command. Return OK or error.
250 cmd_fis_t fis;
251 prd_t prd[1];
252 int nr_prds = 0;
254 assert(size <= AHCI_TMP_SIZE);
256 /* Fill in the command table with a FIS, a packet, and if a data
257 * transfer is requested, also a PRD.
259 memset(&fis, 0, sizeof(fis));
260 fis.cf_cmd = ATA_CMD_PACKET;
262 if (size > 0) {
263 fis.cf_feat = ATA_FEAT_PACKET_DMA;
264 if (!write && (ps->flags & FLAG_USE_DMADIR))
265 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
267 prd[0].vp_addr = ps->tmp_phys;
268 prd[0].vp_size = size;
269 nr_prds++;
272 /* Start the command, and wait for it to complete or fail. */
273 port_set_cmd(ps, cmd, &fis, packet, prd, nr_prds, write);
275 return port_exec(ps, cmd, ahci_command_timeout);
278 /*===========================================================================*
279 * atapi_test_unit *
280 *===========================================================================*/
281 static int atapi_test_unit(struct port_state *ps, int cmd)
283 /* Test whether the ATAPI device and medium are ready.
285 u8_t packet[ATAPI_PACKET_SIZE];
287 memset(packet, 0, sizeof(packet));
288 packet[0] = ATAPI_CMD_TEST_UNIT;
290 return atapi_exec(ps, cmd, packet, 0, FALSE);
293 /*===========================================================================*
294 * atapi_request_sense *
295 *===========================================================================*/
296 static int atapi_request_sense(struct port_state *ps, int cmd, int *sense)
298 /* Request error (sense) information from an ATAPI device, and return
299 * the sense key. The additional sense codes are not used at this time.
301 u8_t packet[ATAPI_PACKET_SIZE];
302 int r;
304 memset(packet, 0, sizeof(packet));
305 packet[0] = ATAPI_CMD_REQUEST_SENSE;
306 packet[4] = ATAPI_REQUEST_SENSE_LEN;
308 r = atapi_exec(ps, cmd, packet, ATAPI_REQUEST_SENSE_LEN, FALSE);
310 if (r != OK)
311 return r;
313 dprintf(V_REQ, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n",
314 ahci_portname(ps), ps->tmp_base[2] & 0xF, ps->tmp_base[12],
315 ps->tmp_base[13]));
317 *sense = ps->tmp_base[2] & 0xF;
319 return OK;
322 /*===========================================================================*
323 * atapi_load_eject *
324 *===========================================================================*/
325 static int atapi_load_eject(struct port_state *ps, int cmd, int load)
327 /* Load or eject a medium in an ATAPI device.
329 u8_t packet[ATAPI_PACKET_SIZE];
331 memset(packet, 0, sizeof(packet));
332 packet[0] = ATAPI_CMD_START_STOP;
333 packet[4] = load ? ATAPI_START_STOP_LOAD : ATAPI_START_STOP_EJECT;
335 return atapi_exec(ps, cmd, packet, 0, FALSE);
338 /*===========================================================================*
339 * atapi_read_capacity *
340 *===========================================================================*/
341 static int atapi_read_capacity(struct port_state *ps, int cmd)
343 /* Retrieve the LBA count and sector size of an ATAPI medium.
345 u8_t packet[ATAPI_PACKET_SIZE], *buf;
346 int r;
348 memset(packet, 0, sizeof(packet));
349 packet[0] = ATAPI_CMD_READ_CAPACITY;
351 r = atapi_exec(ps, cmd, packet, ATAPI_READ_CAPACITY_LEN, FALSE);
352 if (r != OK)
353 return r;
355 /* Store the number of LBA blocks and sector size. */
356 buf = ps->tmp_base;
357 ps->lba_count = add64u(cvu64((buf[0] << 24) | (buf[1] << 16) |
358 (buf[2] << 8) | buf[3]), 1);
359 ps->sector_size =
360 (buf[4] << 24) | (buf[5] << 16) | (buf[6] << 8) | buf[7];
362 if (ps->sector_size == 0 || (ps->sector_size & 1)) {
363 dprintf(V_ERR, ("%s: invalid medium sector size %u\n",
364 ahci_portname(ps), ps->sector_size));
366 return EINVAL;
369 dprintf(V_INFO,
370 ("%s: medium detected (%u byte sectors, %lu MB size)\n",
371 ahci_portname(ps), ps->sector_size,
372 div64u(mul64(ps->lba_count, cvu64(ps->sector_size)),
373 1024*1024)));
375 return OK;
378 /*===========================================================================*
379 * atapi_check_medium *
380 *===========================================================================*/
381 static int atapi_check_medium(struct port_state *ps, int cmd)
383 /* Check whether a medium is present in a removable-media ATAPI device.
384 * If a new medium is detected, get its total and sector size. Return
385 * OK only if a usable medium is present, and an error otherwise.
387 int sense;
389 /* Perform a readiness check. */
390 if (atapi_test_unit(ps, cmd) != OK) {
391 ps->flags &= ~FLAG_HAS_MEDIUM;
393 /* If the check failed due to a unit attention condition, retry
394 * reading the medium capacity. Otherwise, assume that there is
395 * no medium available.
397 if (atapi_request_sense(ps, cmd, &sense) != OK ||
398 sense != ATAPI_SENSE_UNIT_ATT)
399 return ENXIO;
402 /* If a medium is newly detected, try reading its capacity now. */
403 if (!(ps->flags & FLAG_HAS_MEDIUM)) {
404 if (atapi_read_capacity(ps, cmd) != OK)
405 return EIO;
407 ps->flags |= FLAG_HAS_MEDIUM;
410 return OK;
413 /*===========================================================================*
414 * atapi_id_check *
415 *===========================================================================*/
416 static int atapi_id_check(struct port_state *ps, u16_t *buf)
418 /* Determine whether we support this ATAPI device based on the
419 * identification data it returned, and store some of its properties.
422 /* The device must be an ATAPI device; it must have removable media;
423 * it must support DMA without DMADIR, or DMADIR for DMA.
425 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATAPI_MASK |
426 ATA_ID_GCAP_REMOVABLE | ATA_ID_GCAP_INCOMPLETE)) !=
427 (ATA_ID_GCAP_ATAPI | ATA_ID_GCAP_REMOVABLE) ||
428 ((buf[ATA_ID_CAP] & ATA_ID_CAP_DMA) != ATA_ID_CAP_DMA &&
429 (buf[ATA_ID_DMADIR] & (ATA_ID_DMADIR_DMADIR |
430 ATA_ID_DMADIR_DMA)) != (ATA_ID_DMADIR_DMADIR |
431 ATA_ID_DMADIR_DMA))) {
433 dprintf(V_ERR, ("%s: unsupported ATAPI device\n",
434 ahci_portname(ps)));
436 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x DMADIR %04x\n",
437 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
438 buf[ATA_ID_DMADIR]));
440 return FALSE;
443 /* Remember whether to use the DMADIR flag when appropriate. */
444 if (buf[ATA_ID_DMADIR] & ATA_ID_DMADIR_DMADIR)
445 ps->flags |= FLAG_USE_DMADIR;
447 /* ATAPI CD-ROM devices are considered read-only. */
448 if (((buf[ATA_ID_GCAP] & ATA_ID_GCAP_TYPE_MASK) >>
449 ATA_ID_GCAP_TYPE_SHIFT) == ATAPI_TYPE_CDROM)
450 ps->flags |= FLAG_READONLY;
452 if ((buf[ATA_ID_SUP1] & ATA_ID_SUP1_VALID_MASK) == ATA_ID_SUP1_VALID &&
453 !(ps->flags & FLAG_READONLY)) {
454 /* Save write cache related capabilities of the device. It is
455 * possible, although unlikely, that a device has support for
456 * either of these but not both.
458 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
459 ps->flags |= FLAG_HAS_WCACHE;
461 if (buf[ATA_ID_SUP1] & ATA_ID_SUP1_FLUSH)
462 ps->flags |= FLAG_HAS_FLUSH;
465 return TRUE;
468 /*===========================================================================*
469 * atapi_transfer *
470 *===========================================================================*/
471 static int atapi_transfer(struct port_state *ps, int cmd, u64_t start_lba,
472 unsigned int count, int write, prd_t *prdt, int nr_prds)
474 /* Perform data transfer from or to an ATAPI device.
476 cmd_fis_t fis;
477 u8_t packet[ATAPI_PACKET_SIZE];
479 /* Fill in a Register Host to Device FIS. */
480 memset(&fis, 0, sizeof(fis));
481 fis.cf_cmd = ATA_CMD_PACKET;
482 fis.cf_feat = ATA_FEAT_PACKET_DMA;
483 if (!write && (ps->flags & FLAG_USE_DMADIR))
484 fis.cf_feat |= ATA_FEAT_PACKET_DMADIR;
486 /* Fill in a packet. */
487 memset(packet, 0, sizeof(packet));
488 packet[0] = write ? ATAPI_CMD_WRITE : ATAPI_CMD_READ;
489 packet[2] = (ex64lo(start_lba) >> 24) & 0xFF;
490 packet[3] = (ex64lo(start_lba) >> 16) & 0xFF;
491 packet[4] = (ex64lo(start_lba) >> 8) & 0xFF;
492 packet[5] = ex64lo(start_lba) & 0xFF;
493 packet[6] = (count >> 24) & 0xFF;
494 packet[7] = (count >> 16) & 0xFF;
495 packet[8] = (count >> 8) & 0xFF;
496 packet[9] = count & 0xFF;
498 /* Start the command, and wait for it to complete or fail. */
499 port_set_cmd(ps, cmd, &fis, packet, prdt, nr_prds, write);
501 return port_exec(ps, cmd, ahci_transfer_timeout);
504 /*===========================================================================*
505 * ata_id_check *
506 *===========================================================================*/
507 static int ata_id_check(struct port_state *ps, u16_t *buf)
509 /* Determine whether we support this ATA device based on the
510 * identification data it returned, and store some of its properties.
513 /* This must be an ATA device; it must not have removable media;
514 * it must support LBA and DMA; it must support the FLUSH CACHE
515 * command; it must support 48-bit addressing.
517 if ((buf[ATA_ID_GCAP] & (ATA_ID_GCAP_ATA_MASK | ATA_ID_GCAP_REMOVABLE |
518 ATA_ID_GCAP_INCOMPLETE)) != ATA_ID_GCAP_ATA ||
519 (buf[ATA_ID_CAP] & (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA)) !=
520 (ATA_ID_CAP_LBA | ATA_ID_CAP_DMA) ||
521 (buf[ATA_ID_SUP1] & (ATA_ID_SUP1_VALID_MASK |
522 ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) !=
523 (ATA_ID_SUP1_VALID | ATA_ID_SUP1_FLUSH | ATA_ID_SUP1_LBA48)) {
525 dprintf(V_ERR, ("%s: unsupported ATA device\n",
526 ahci_portname(ps)));
528 dprintf(V_DEV, ("%s: GCAP %04x CAP %04x SUP1 %04x\n",
529 ahci_portname(ps), buf[ATA_ID_GCAP], buf[ATA_ID_CAP],
530 buf[ATA_ID_SUP1]));
532 return FALSE;
535 /* Get number of LBA blocks, and sector size. */
536 ps->lba_count = make64((buf[ATA_ID_LBA1] << 16) | buf[ATA_ID_LBA0],
537 (buf[ATA_ID_LBA3] << 16) | buf[ATA_ID_LBA2]);
539 /* Determine the queue depth of the device. */
540 if (hba_state.has_ncq &&
541 (buf[ATA_ID_SATA_CAP] & ATA_ID_SATA_CAP_NCQ)) {
542 ps->flags |= FLAG_HAS_NCQ;
543 ps->queue_depth =
544 (buf[ATA_ID_QDEPTH] & ATA_ID_QDEPTH_MASK) + 1;
545 if (ps->queue_depth > hba_state.nr_cmds)
546 ps->queue_depth = hba_state.nr_cmds;
549 /* For now, we only support long logical sectors. Long physical sector
550 * support may be added later. Note that the given value is in words.
552 if ((buf[ATA_ID_PLSS] & (ATA_ID_PLSS_VALID_MASK | ATA_ID_PLSS_LLS)) ==
553 (ATA_ID_PLSS_VALID | ATA_ID_PLSS_LLS))
554 ps->sector_size =
555 ((buf[ATA_ID_LSS1] << 16) | buf[ATA_ID_LSS0]) << 1;
556 else
557 ps->sector_size = ATA_SECTOR_SIZE;
559 if (ps->sector_size < ATA_SECTOR_SIZE) {
560 dprintf(V_ERR, ("%s: invalid sector size %u\n",
561 ahci_portname(ps), ps->sector_size));
563 return FALSE;
566 ps->flags |= FLAG_HAS_MEDIUM | FLAG_HAS_FLUSH;
568 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */
569 if (buf[ATA_ID_SUP0] & ATA_ID_SUP0_WCACHE)
570 ps->flags |= FLAG_HAS_WCACHE;
572 /* Check Force Unit Access capability of the device. */
573 if ((buf[ATA_ID_ENA2] & (ATA_ID_ENA2_VALID_MASK | ATA_ID_ENA2_FUA)) ==
574 (ATA_ID_ENA2_VALID | ATA_ID_ENA2_FUA))
575 ps->flags |= FLAG_HAS_FUA;
577 return TRUE;
580 /*===========================================================================*
581 * ata_transfer *
582 *===========================================================================*/
583 static int ata_transfer(struct port_state *ps, int cmd, u64_t start_lba,
584 unsigned int count, int write, int force, prd_t *prdt, int nr_prds)
586 /* Perform data transfer from or to an ATA device.
588 cmd_fis_t fis;
590 assert(count <= ATA_MAX_SECTORS);
592 /* Special case for sector counts: 65536 is specified as 0. */
593 if (count == ATA_MAX_SECTORS)
594 count = 0;
596 memset(&fis, 0, sizeof(fis));
597 fis.cf_dev = ATA_DEV_LBA;
598 if (ps->flags & FLAG_HAS_NCQ) {
599 if (write) {
600 if (force && (ps->flags & FLAG_HAS_FUA))
601 fis.cf_dev |= ATA_DEV_FUA;
603 fis.cf_cmd = ATA_CMD_WRITE_FPDMA_QUEUED;
604 } else {
605 fis.cf_cmd = ATA_CMD_READ_FPDMA_QUEUED;
608 else {
609 if (write) {
610 if (force && (ps->flags & FLAG_HAS_FUA))
611 fis.cf_cmd = ATA_CMD_WRITE_DMA_FUA_EXT;
612 else
613 fis.cf_cmd = ATA_CMD_WRITE_DMA_EXT;
615 else {
616 fis.cf_cmd = ATA_CMD_READ_DMA_EXT;
619 fis.cf_lba = ex64lo(start_lba) & 0x00FFFFFFL;
620 fis.cf_lba_exp = ex64lo(rshift64(start_lba, 24)) & 0x00FFFFFFL;
621 fis.cf_sec = count & 0xFF;
622 fis.cf_sec_exp = (count >> 8) & 0xFF;
624 /* Start the command, and wait for it to complete or fail. */
625 port_set_cmd(ps, cmd, &fis, NULL /*packet*/, prdt, nr_prds, write);
627 return port_exec(ps, cmd, ahci_transfer_timeout);
630 /*===========================================================================*
631 * gen_identify *
632 *===========================================================================*/
633 static int gen_identify(struct port_state *ps, int blocking)
635 /* Identify an ATA or ATAPI device. If the blocking flag is set, block
636 * until the command has completed; otherwise return immediately.
638 cmd_fis_t fis;
639 prd_t prd;
641 /* Set up a command, and a single PRD for the result. */
642 memset(&fis, 0, sizeof(fis));
644 if (ps->flags & FLAG_ATAPI)
645 fis.cf_cmd = ATA_CMD_IDENTIFY_PACKET;
646 else
647 fis.cf_cmd = ATA_CMD_IDENTIFY;
649 prd.vp_addr = ps->tmp_phys;
650 prd.vp_size = ATA_ID_SIZE;
652 /* Start the command, and possibly wait for the result. */
653 port_set_cmd(ps, 0, &fis, NULL /*packet*/, &prd, 1, FALSE /*write*/);
655 if (blocking)
656 return port_exec(ps, 0, ahci_command_timeout);
658 port_issue(ps, 0, ahci_command_timeout);
660 return OK;
663 /*===========================================================================*
664 * gen_flush_wcache *
665 *===========================================================================*/
666 static int gen_flush_wcache(struct port_state *ps)
668 /* Flush the device's write cache.
670 cmd_fis_t fis;
672 /* The FLUSH CACHE command may not be supported by all (writable ATAPI)
673 * devices.
675 if (!(ps->flags & FLAG_HAS_FLUSH))
676 return EINVAL;
678 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not
679 * interested in the disk location of a failure, so there is no reason
680 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command
681 * may indeed fail due to a disk error, in which case it should be
682 * repeated. For now, we shift this responsibility onto the caller.
684 memset(&fis, 0, sizeof(fis));
685 fis.cf_cmd = ATA_CMD_FLUSH_CACHE;
687 /* Start the command, and wait for it to complete or fail.
688 * The flush command may take longer than regular I/O commands.
690 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
691 FALSE /*write*/);
693 return port_exec(ps, 0, ahci_flush_timeout);
696 /*===========================================================================*
697 * gen_get_wcache *
698 *===========================================================================*/
699 static int gen_get_wcache(struct port_state *ps, int *val)
701 /* Retrieve the status of the device's write cache.
703 int r;
705 /* Write caches are not mandatory. */
706 if (!(ps->flags & FLAG_HAS_WCACHE))
707 return EINVAL;
709 /* Retrieve information about the device. */
710 if ((r = gen_identify(ps, TRUE /*blocking*/)) != OK)
711 return r;
713 /* Return the current setting. */
714 *val = !!(((u16_t *) ps->tmp_base)[ATA_ID_ENA0] & ATA_ID_ENA0_WCACHE);
716 return OK;
719 /*===========================================================================*
720 * gen_set_wcache *
721 *===========================================================================*/
722 static int gen_set_wcache(struct port_state *ps, int enable)
724 /* Enable or disable the device's write cache.
726 cmd_fis_t fis;
727 clock_t timeout;
729 /* Write caches are not mandatory. */
730 if (!(ps->flags & FLAG_HAS_WCACHE))
731 return EINVAL;
733 /* Disabling the write cache causes a (blocking) cache flush. Cache
734 * flushes may take much longer than regular commands.
736 timeout = enable ? ahci_command_timeout : ahci_flush_timeout;
738 /* Set up a command. */
739 memset(&fis, 0, sizeof(fis));
740 fis.cf_cmd = ATA_CMD_SET_FEATURES;
741 fis.cf_feat = enable ? ATA_SF_EN_WCACHE : ATA_SF_DI_WCACHE;
743 /* Start the command, and wait for it to complete or fail. */
744 port_set_cmd(ps, 0, &fis, NULL /*packet*/, NULL /*prdt*/, 0,
745 FALSE /*write*/);
747 return port_exec(ps, 0, timeout);
750 /*===========================================================================*
751 * ct_set_fis *
752 *===========================================================================*/
753 static vir_bytes ct_set_fis(u8_t *ct, cmd_fis_t *fis, unsigned int tag)
755 /* Fill in the Frame Information Structure part of a command table,
756 * and return the resulting FIS size (in bytes). We only support the
757 * command Register - Host to Device FIS type.
760 memset(ct, 0, ATA_H2D_SIZE);
761 ct[ATA_FIS_TYPE] = ATA_FIS_TYPE_H2D;
762 ct[ATA_H2D_FLAGS] = ATA_H2D_FLAGS_C;
763 ct[ATA_H2D_CMD] = fis->cf_cmd;
764 ct[ATA_H2D_LBA_LOW] = fis->cf_lba & 0xFF;
765 ct[ATA_H2D_LBA_MID] = (fis->cf_lba >> 8) & 0xFF;
766 ct[ATA_H2D_LBA_HIGH] = (fis->cf_lba >> 16) & 0xFF;
767 ct[ATA_H2D_DEV] = fis->cf_dev;
768 ct[ATA_H2D_LBA_LOW_EXP] = fis->cf_lba_exp & 0xFF;
769 ct[ATA_H2D_LBA_MID_EXP] = (fis->cf_lba_exp >> 8) & 0xFF;
770 ct[ATA_H2D_LBA_HIGH_EXP] = (fis->cf_lba_exp >> 16) & 0xFF;
771 ct[ATA_H2D_CTL] = fis->cf_ctl;
773 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
774 ct[ATA_H2D_FEAT] = fis->cf_sec;
775 ct[ATA_H2D_FEAT_EXP] = fis->cf_sec_exp;
776 ct[ATA_H2D_SEC] = tag << ATA_SEC_TAG_SHIFT;
777 ct[ATA_H2D_SEC_EXP] = 0;
778 } else {
779 ct[ATA_H2D_FEAT] = fis->cf_feat;
780 ct[ATA_H2D_FEAT_EXP] = fis->cf_feat_exp;
781 ct[ATA_H2D_SEC] = fis->cf_sec;
782 ct[ATA_H2D_SEC_EXP] = fis->cf_sec_exp;
785 return ATA_H2D_SIZE;
788 /*===========================================================================*
789 * ct_set_packet *
790 *===========================================================================*/
791 static void ct_set_packet(u8_t *ct, u8_t packet[ATAPI_PACKET_SIZE])
793 /* Fill in the packet part of a command table.
796 memcpy(&ct[AHCI_CT_PACKET_OFF], packet, ATAPI_PACKET_SIZE);
799 /*===========================================================================*
800 * ct_set_prdt *
801 *===========================================================================*/
802 static void ct_set_prdt(u8_t *ct, prd_t *prdt, int nr_prds)
804 /* Fill in the PRDT part of a command table.
806 u32_t *p;
807 int i;
809 p = (u32_t *) &ct[AHCI_CT_PRDT_OFF];
811 for (i = 0; i < nr_prds; i++, prdt++) {
812 *p++ = prdt->vp_addr;
813 *p++ = 0;
814 *p++ = 0;
815 *p++ = prdt->vp_size - 1;
819 /*===========================================================================*
820 * port_set_cmd *
821 *===========================================================================*/
822 static void port_set_cmd(struct port_state *ps, int cmd, cmd_fis_t *fis,
823 u8_t packet[ATAPI_PACKET_SIZE], prd_t *prdt, int nr_prds, int write)
825 /* Prepare the given command for execution, by constructing a command
826 * table and setting up a command list entry pointing to the table.
828 u8_t *ct;
829 u32_t *cl;
830 vir_bytes size;
832 /* Set a port-specific flag that tells us if the command being
833 * processed is a NCQ command or not.
835 if (ATA_IS_FPDMA_CMD(fis->cf_cmd)) {
836 ps->flags |= FLAG_NCQ_MODE;
837 } else {
838 assert(!ps->pend_mask);
839 ps->flags &= ~FLAG_NCQ_MODE;
842 /* Construct a command table, consisting of a command FIS, optionally
843 * a packet, and optionally a number of PRDs (making up the actual PRD
844 * table).
846 ct = ps->ct_base[cmd];
848 assert(ct != NULL);
849 assert(nr_prds <= NR_PRDS);
851 size = ct_set_fis(ct, fis, cmd);
853 if (packet != NULL)
854 ct_set_packet(ct, packet);
856 ct_set_prdt(ct, prdt, nr_prds);
858 /* Construct a command list entry, pointing to the command's table.
859 * Current assumptions: callers always provide a Register - Host to
860 * Device type FIS, and all non-NCQ commands are prefetchable.
862 cl = &ps->cl_base[cmd * AHCI_CL_ENTRY_DWORDS];
864 memset(cl, 0, AHCI_CL_ENTRY_SIZE);
865 cl[0] = (nr_prds << AHCI_CL_PRDTL_SHIFT) |
866 ((!ATA_IS_FPDMA_CMD(fis->cf_cmd) &&
867 (nr_prds > 0 || packet != NULL)) ? AHCI_CL_PREFETCHABLE : 0) |
868 (write ? AHCI_CL_WRITE : 0) |
869 ((packet != NULL) ? AHCI_CL_ATAPI : 0) |
870 ((size / sizeof(u32_t)) << AHCI_CL_CFL_SHIFT);
871 cl[2] = ps->ct_phys[cmd];
874 /*===========================================================================*
875 * port_finish_cmd *
876 *===========================================================================*/
877 static void port_finish_cmd(struct port_state *ps, int cmd, int result)
879 /* Finish a command that has either succeeded or failed.
882 assert(cmd < ps->queue_depth);
884 dprintf(V_REQ, ("%s: command %d %s\n", ahci_portname(ps),
885 cmd, (result == RESULT_SUCCESS) ? "succeeded" : "failed"));
887 /* Update the command result, and clear it from the pending list. */
888 ps->cmd_info[cmd].result = result;
890 assert(ps->pend_mask & (1 << cmd));
891 ps->pend_mask &= ~(1 << cmd);
893 /* Wake up the thread, unless it is the main thread. This can happen
894 * during initialization, as the gen_identify function is called by the
895 * main thread itself.
897 if (ps->state != STATE_WAIT_ID)
898 blockdriver_mt_wakeup(ps->cmd_info[cmd].tid);
901 /*===========================================================================*
902 * port_fail_cmds *
903 *===========================================================================*/
904 static void port_fail_cmds(struct port_state *ps)
906 /* Fail all ongoing commands for a device.
908 int i;
910 for (i = 0; ps->pend_mask != 0 && i < ps->queue_depth; i++)
911 if (ps->pend_mask & (1 << i))
912 port_finish_cmd(ps, i, RESULT_FAILURE);
915 /*===========================================================================*
916 * port_check_cmds *
917 *===========================================================================*/
918 static void port_check_cmds(struct port_state *ps)
920 /* Check what commands have completed, and finish them.
922 u32_t mask, done;
923 int i;
925 /* See which commands have completed. */
926 if (ps->flags & FLAG_NCQ_MODE)
927 mask = port_read(ps, AHCI_PORT_SACT);
928 else
929 mask = port_read(ps, AHCI_PORT_CI);
931 /* Wake up threads corresponding to completed commands. */
932 done = ps->pend_mask & ~mask;
934 for (i = 0; i < ps->queue_depth; i++)
935 if (done & (1 << i))
936 port_finish_cmd(ps, i, RESULT_SUCCESS);
939 /*===========================================================================*
940 * port_find_cmd *
941 *===========================================================================*/
942 static int port_find_cmd(struct port_state *ps)
944 /* Find a free command tag to queue the current request.
946 int i;
948 for (i = 0; i < ps->queue_depth; i++)
949 if (!(ps->pend_mask & (1 << i)))
950 break;
952 /* We should always be able to find a free slot, since a thread runs
953 * only when it is free, and thus, only because a slot is available.
955 assert(i < ps->queue_depth);
957 return i;
960 /*===========================================================================*
961 * port_get_padbuf *
962 *===========================================================================*/
963 static int port_get_padbuf(struct port_state *ps, size_t size)
965 /* Make available a temporary buffer for use by this port. Enlarge the
966 * previous buffer if applicable and necessary, potentially changing
967 * its physical address.
970 if (ps->pad_base != NULL && ps->pad_size >= size)
971 return OK;
973 if (ps->pad_base != NULL)
974 free_contig(ps->pad_base, ps->pad_size);
976 ps->pad_size = size;
977 ps->pad_base = alloc_contig(ps->pad_size, 0, &ps->pad_phys);
979 if (ps->pad_base == NULL) {
980 dprintf(V_ERR, ("%s: unable to allocate a padding buffer of "
981 "size %lu\n", ahci_portname(ps),
982 (unsigned long) size));
984 return ENOMEM;
987 dprintf(V_INFO, ("%s: allocated padding buffer of size %lu\n",
988 ahci_portname(ps), (unsigned long) size));
990 return OK;
993 /*===========================================================================*
994 * sum_iovec *
995 *===========================================================================*/
996 static int sum_iovec(struct port_state *ps, endpoint_t endpt,
997 iovec_s_t *iovec, int nr_req, vir_bytes *total)
999 /* Retrieve the total size of the given I/O vector. Check for alignment
1000 * requirements along the way. Return OK (and the total request size)
1001 * or an error.
1003 vir_bytes size, bytes;
1004 int i;
1006 bytes = 0;
1008 for (i = 0; i < nr_req; i++) {
1009 size = iovec[i].iov_size;
1011 if (size == 0 || (size & 1) || size > LONG_MAX) {
1012 dprintf(V_ERR, ("%s: bad size %lu in iovec from %d\n",
1013 ahci_portname(ps), size, endpt));
1014 return EINVAL;
1017 bytes += size;
1019 if (bytes > LONG_MAX) {
1020 dprintf(V_ERR, ("%s: iovec size overflow from %d\n",
1021 ahci_portname(ps), endpt));
1022 return EINVAL;
1026 *total = bytes;
1027 return OK;
1030 /*===========================================================================*
1031 * setup_prdt *
1032 *===========================================================================*/
1033 static int setup_prdt(struct port_state *ps, endpoint_t endpt,
1034 iovec_s_t *iovec, int nr_req, vir_bytes size, vir_bytes lead,
1035 int write, prd_t *prdt)
1037 /* Convert (the first part of) an I/O vector to a Physical Region
1038 * Descriptor Table describing array that can later be used to set the
1039 * command's real PRDT. The resulting table as a whole should be
1040 * sector-aligned; leading and trailing local buffers may have to be
1041 * used for padding as appropriate. Return the number of PRD entries,
1042 * or a negative error code.
1044 struct vumap_vir vvec[NR_PRDS];
1045 size_t bytes, trail;
1046 int i, r, pcount, nr_prds = 0;
1048 if (lead > 0) {
1049 /* Allocate a buffer for the data we don't want. */
1050 if ((r = port_get_padbuf(ps, ps->sector_size)) != OK)
1051 return r;
1053 prdt[nr_prds].vp_addr = ps->pad_phys;
1054 prdt[nr_prds].vp_size = lead;
1055 nr_prds++;
1058 /* The sum of lead, size, trail has to be sector-aligned. */
1059 trail = (ps->sector_size - (lead + size)) % ps->sector_size;
1061 /* Get the physical addresses of the given buffers. */
1062 for (i = 0; i < nr_req && size > 0; i++) {
1063 bytes = MIN(iovec[i].iov_size, size);
1065 if (endpt == SELF)
1066 vvec[i].vv_addr = (vir_bytes) iovec[i].iov_grant;
1067 else
1068 vvec[i].vv_grant = iovec[i].iov_grant;
1070 vvec[i].vv_size = bytes;
1072 size -= bytes;
1075 pcount = i;
1077 if ((r = sys_vumap(endpt, vvec, i, 0, write ? VUA_READ : VUA_WRITE,
1078 &prdt[nr_prds], &pcount)) != OK) {
1079 dprintf(V_ERR, ("%s: unable to map memory from %d (%d)\n",
1080 ahci_portname(ps), endpt, r));
1081 return r;
1084 assert(pcount > 0 && pcount <= i);
1086 /* Make sure all buffers are physically contiguous and word-aligned. */
1087 for (i = 0; i < pcount; i++) {
1088 if (vvec[i].vv_size != prdt[nr_prds].vp_size) {
1089 dprintf(V_ERR, ("%s: non-contiguous memory from %d\n",
1090 ahci_portname(ps), endpt));
1091 return EINVAL;
1094 if (prdt[nr_prds].vp_addr & 1) {
1095 dprintf(V_ERR, ("%s: bad physical address from %d\n",
1096 ahci_portname(ps), endpt));
1097 return EINVAL;
1100 nr_prds++;
1103 if (trail > 0) {
1104 assert(nr_prds < NR_PRDS);
1105 prdt[nr_prds].vp_addr = ps->pad_phys + lead;
1106 prdt[nr_prds].vp_size = trail;
1107 nr_prds++;
1110 return nr_prds;
1113 /*===========================================================================*
1114 * port_transfer *
1115 *===========================================================================*/
1116 static ssize_t port_transfer(struct port_state *ps, u64_t pos, u64_t eof,
1117 endpoint_t endpt, iovec_s_t *iovec, int nr_req, int write, int flags)
1119 /* Perform an I/O transfer on a port.
1121 prd_t prdt[NR_PRDS];
1122 vir_bytes size, lead;
1123 unsigned int count, nr_prds;
1124 u64_t start_lba;
1125 int r, cmd;
1127 /* Get the total request size from the I/O vector. */
1128 if ((r = sum_iovec(ps, endpt, iovec, nr_req, &size)) != OK)
1129 return r;
1131 dprintf(V_REQ, ("%s: %s for %lu bytes at pos %08lx%08lx\n",
1132 ahci_portname(ps), write ? "write" : "read", size,
1133 ex64hi(pos), ex64lo(pos)));
1135 assert(ps->state == STATE_GOOD_DEV);
1136 assert(ps->flags & FLAG_HAS_MEDIUM);
1137 assert(ps->sector_size > 0);
1139 /* Limit the maximum size of a single transfer.
1140 * See the comments at the top of this file for details.
1142 if (size > MAX_TRANSFER)
1143 size = MAX_TRANSFER;
1145 /* If necessary, reduce the request size so that the request does not
1146 * extend beyond the end of the partition. The caller already
1147 * guarantees that the starting position lies within the partition.
1149 if (cmp64(add64ul(pos, size), eof) >= 0)
1150 size = (vir_bytes) diff64(eof, pos);
1152 start_lba = div64(pos, cvu64(ps->sector_size));
1153 lead = rem64u(pos, ps->sector_size);
1154 count = (lead + size + ps->sector_size - 1) / ps->sector_size;
1156 /* Position must be word-aligned for read requests, and sector-aligned
1157 * for write requests. We do not support read-modify-write for writes.
1159 if ((lead & 1) || (write && lead != 0)) {
1160 dprintf(V_ERR, ("%s: unaligned position from %d\n",
1161 ahci_portname(ps), endpt));
1162 return EINVAL;
1165 /* Write requests must be sector-aligned. Word alignment of the size is
1166 * already guaranteed by sum_iovec().
1168 if (write && (size % ps->sector_size) != 0) {
1169 dprintf(V_ERR, ("%s: unaligned size %lu from %d\n",
1170 ahci_portname(ps), size, endpt));
1171 return EINVAL;
1174 /* Create a vector of physical addresses and sizes for the transfer. */
1175 nr_prds = r = setup_prdt(ps, endpt, iovec, nr_req, size, lead, write,
1176 prdt);
1178 if (r < 0) return r;
1180 /* Perform the actual transfer. */
1181 cmd = port_find_cmd(ps);
1183 if (ps->flags & FLAG_ATAPI)
1184 r = atapi_transfer(ps, cmd, start_lba, count, write, prdt,
1185 nr_prds);
1186 else
1187 r = ata_transfer(ps, cmd, start_lba, count, write,
1188 !!(flags & BDEV_FORCEWRITE), prdt, nr_prds);
1190 if (r != OK) return r;
1192 return size;
1195 /*===========================================================================*
1196 * port_hardreset *
1197 *===========================================================================*/
1198 static void port_hardreset(struct port_state *ps)
1200 /* Perform a port-level (hard) reset on the given port.
1203 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_INIT);
1205 micro_delay(COMRESET_DELAY * 1000); /* COMRESET_DELAY is in ms */
1207 port_write(ps, AHCI_PORT_SCTL, AHCI_PORT_SCTL_DET_NONE);
1210 /*===========================================================================*
1211 * port_start *
1212 *===========================================================================*/
1213 static void port_start(struct port_state *ps)
1215 /* Start the given port, allowing for the execution of commands and the
1216 * transfer of data on that port.
1218 u32_t cmd;
1220 /* Reset status registers. */
1221 port_write(ps, AHCI_PORT_SERR, ~0);
1222 port_write(ps, AHCI_PORT_IS, ~0);
1224 /* Start the port. */
1225 cmd = port_read(ps, AHCI_PORT_CMD);
1226 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_ST);
1228 dprintf(V_INFO, ("%s: started\n", ahci_portname(ps)));
1231 /*===========================================================================*
1232 * port_restart *
1233 *===========================================================================*/
1234 static void port_restart(struct port_state *ps)
1236 /* Restart a port after a fatal error has occurred.
1238 u32_t cmd;
1240 /* Fail all outstanding commands. */
1241 port_fail_cmds(ps);
1243 /* Stop the port. */
1244 cmd = port_read(ps, AHCI_PORT_CMD);
1245 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_ST);
1247 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CR),
1248 PORTREG_DELAY);
1250 /* Reset status registers. */
1251 port_write(ps, AHCI_PORT_SERR, ~0);
1252 port_write(ps, AHCI_PORT_IS, ~0);
1254 /* If the BSY and/or DRQ flags are set, reset the port. */
1255 if (port_read(ps, AHCI_PORT_TFD) &
1256 (AHCI_PORT_TFD_STS_BSY | AHCI_PORT_TFD_STS_DRQ)) {
1258 dprintf(V_ERR, ("%s: port reset\n", ahci_portname(ps)));
1260 /* To keep this driver simple, we do not transparently recover
1261 * ongoing requests. Instead, we mark the failing device as
1262 * disconnected, and reset it. If the reset succeeds, the
1263 * device (or, perhaps, eventually, another device) will come
1264 * back up. Any current and future requests to this port will
1265 * be failed until the port is fully closed and reopened.
1267 port_disconnect(ps);
1269 /* Trigger a port reset. */
1270 port_hardreset(ps);
1272 return;
1275 /* Start the port. */
1276 cmd = port_read(ps, AHCI_PORT_CMD);
1277 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_ST);
1279 dprintf(V_INFO, ("%s: restarted\n", ahci_portname(ps)));
1282 /*===========================================================================*
1283 * port_stop *
1284 *===========================================================================*/
1285 static void port_stop(struct port_state *ps)
1287 /* Stop the given port, if not already stopped.
1289 u32_t cmd;
1291 /* Disable interrupts. */
1292 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_NONE);
1294 /* Stop the port. */
1295 cmd = port_read(ps, AHCI_PORT_CMD);
1297 if (cmd & (AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST)) {
1298 cmd &= ~(AHCI_PORT_CMD_CR | AHCI_PORT_CMD_ST);
1300 port_write(ps, AHCI_PORT_CMD, cmd);
1302 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_CR),
1303 PORTREG_DELAY);
1305 dprintf(V_INFO, ("%s: stopped\n", ahci_portname(ps)));
1308 /* Reset status registers. */
1309 port_write(ps, AHCI_PORT_SERR, ~0);
1310 port_write(ps, AHCI_PORT_IS, ~0);
1313 /*===========================================================================*
1314 * port_sig_check *
1315 *===========================================================================*/
1316 static void port_sig_check(struct port_state *ps)
1318 /* Check whether the device's signature has become available yet, and
1319 * if so, start identifying the device.
1321 u32_t tfd, sig;
1323 tfd = port_read(ps, AHCI_PORT_TFD);
1325 /* Wait for the BSY flag to be (set and then) cleared first. Note that
1326 * clearing it only happens when PxCMD.FRE is set, which is why we
1327 * start the port before starting the signature wait cycle.
1329 if ((tfd & AHCI_PORT_TFD_STS_BSY) || tfd == AHCI_PORT_TFD_STS_INIT) {
1330 /* Try for a while before giving up. It may take seconds. */
1331 if (ps->left > 0) {
1332 ps->left--;
1333 set_timer(&ps->cmd_info[0].timer, ahci_sig_timeout,
1334 port_timeout, BUILD_ARG(ps - port_state, 0));
1335 return;
1338 /* If no device is actually attached, disable the port. This
1339 * value is also the initial value of the register, before the
1340 * BSY flag gets set, so only check this condition on timeout.
1342 if (tfd == AHCI_PORT_TFD_STS_INIT) {
1343 dprintf(V_DEV, ("%s: no device at this port\n",
1344 ahci_portname(ps)));
1346 port_stop(ps);
1348 ps->state = STATE_BAD_DEV;
1349 ps->flags &= ~FLAG_BUSY;
1351 return;
1354 port_restart(ps);
1356 dprintf(V_ERR, ("%s: timeout waiting for signature\n",
1357 ahci_portname(ps)));
1360 /* Check the port's signature. We only support the normal ATA and ATAPI
1361 * signatures. We ignore devices reporting anything else.
1363 sig = port_read(ps, AHCI_PORT_SIG);
1365 if (sig != ATA_SIG_ATA && sig != ATA_SIG_ATAPI) {
1366 dprintf(V_ERR, ("%s: unsupported signature (%08x)\n",
1367 ahci_portname(ps), sig));
1369 port_stop(ps);
1371 ps->state = STATE_BAD_DEV;
1372 ps->flags &= ~FLAG_BUSY;
1374 return;
1377 /* Clear all state flags except the busy flag, which may be relevant if
1378 * a BDEV_OPEN call is waiting for the device to become ready; the
1379 * barrier flag, which prevents access to the device until it is
1380 * completely closed and (re)opened; and, the thread suspension flag.
1382 ps->flags &= (FLAG_BUSY | FLAG_BARRIER | FLAG_SUSPENDED);
1384 if (sig == ATA_SIG_ATAPI)
1385 ps->flags |= FLAG_ATAPI;
1387 /* Attempt to identify the device. Do this using continuation, because
1388 * we may already be called from port_wait() here, and could end up
1389 * confusing the timer expiration procedure.
1391 ps->state = STATE_WAIT_ID;
1392 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_MASK);
1394 (void) gen_identify(ps, FALSE /*blocking*/);
1397 /*===========================================================================*
1398 * print_string *
1399 *===========================================================================*/
1400 static void print_string(u16_t *buf, int start, int end)
1402 /* Print a string that is stored as little-endian words and padded with
1403 * trailing spaces.
1405 int i, last = 0;
1407 while (end >= start && buf[end] == 0x2020) end--;
1409 if (end >= start && (buf[end] & 0xFF) == 0x20) end--, last++;
1411 for (i = start; i <= end; i++)
1412 printf("%c%c", buf[i] >> 8, buf[i] & 0xFF);
1414 if (last)
1415 printf("%c", buf[i] >> 8);
1418 /*===========================================================================*
1419 * port_id_check *
1420 *===========================================================================*/
1421 static void port_id_check(struct port_state *ps, int success)
1423 /* The device identification command has either completed or timed out.
1424 * Decide whether this device is usable or not, and store some of its
1425 * properties.
1427 u16_t *buf;
1429 assert(ps->state == STATE_WAIT_ID);
1430 assert(!(ps->flags & FLAG_BUSY)); /* unset by callers */
1432 cancel_timer(&ps->cmd_info[0].timer);
1434 if (!success)
1435 dprintf(V_ERR,
1436 ("%s: unable to identify\n", ahci_portname(ps)));
1438 /* If the identify command itself succeeded, check the results and
1439 * store some properties.
1441 if (success) {
1442 buf = (u16_t *) ps->tmp_base;
1444 if (ps->flags & FLAG_ATAPI)
1445 success = atapi_id_check(ps, buf);
1446 else
1447 success = ata_id_check(ps, buf);
1450 /* If the device has not been identified successfully, mark it as an
1451 * unusable device.
1453 if (!success) {
1454 port_stop(ps);
1456 ps->state = STATE_BAD_DEV;
1457 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1459 return;
1462 /* The device has been identified successfully, and hence usable. */
1463 ps->state = STATE_GOOD_DEV;
1465 /* Print some information about the device. */
1466 if (ahci_verbose >= V_INFO) {
1467 printf("%s: ATA%s, ", ahci_portname(ps),
1468 (ps->flags & FLAG_ATAPI) ? "PI" : "");
1469 print_string(buf, 27, 46);
1470 if (ahci_verbose >= V_DEV) {
1471 printf(" (");
1472 print_string(buf, 10, 19);
1473 printf(", ");
1474 print_string(buf, 23, 26);
1475 printf(")");
1478 if (ps->flags & FLAG_HAS_MEDIUM)
1479 printf(", %u byte sectors, %lu MB size",
1480 ps->sector_size, div64u(mul64(ps->lba_count,
1481 cvu64(ps->sector_size)), 1024*1024));
1483 printf("\n");
1487 /*===========================================================================*
1488 * port_connect *
1489 *===========================================================================*/
1490 static void port_connect(struct port_state *ps)
1492 /* A device has been found to be attached to this port. Start the port,
1493 * and do timed polling for its signature to become available.
1496 dprintf(V_INFO, ("%s: device connected\n", ahci_portname(ps)));
1498 if (ps->state == STATE_SPIN_UP)
1499 cancel_timer(&ps->cmd_info[0].timer);
1501 port_start(ps);
1503 ps->state = STATE_WAIT_SIG;
1504 ps->left = ahci_sig_checks;
1506 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1508 /* Do the first check immediately; who knows, we may get lucky. */
1509 port_sig_check(ps);
1512 /*===========================================================================*
1513 * port_disconnect *
1514 *===========================================================================*/
1515 static void port_disconnect(struct port_state *ps)
1517 /* The device has detached from this port. Stop the port if necessary.
1520 dprintf(V_INFO, ("%s: device disconnected\n", ahci_portname(ps)));
1522 if (ps->state != STATE_BAD_DEV)
1523 port_stop(ps);
1525 ps->state = STATE_NO_DEV;
1526 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1527 ps->flags &= ~FLAG_BUSY;
1529 /* Fail any ongoing request. The caller may already have done this. */
1530 port_fail_cmds(ps);
1532 /* Block any further access until the device is completely closed and
1533 * reopened. This prevents arbitrary I/O to a newly plugged-in device
1534 * without upper layers noticing.
1536 ps->flags |= FLAG_BARRIER;
1538 /* Inform the blockdriver library to reduce the number of threads. */
1539 blockdriver_mt_set_workers(ps->device, 1);
1542 /*===========================================================================*
1543 * port_intr *
1544 *===========================================================================*/
1545 static void port_intr(struct port_state *ps)
1547 /* Process an interrupt on this port.
1549 u32_t smask, emask;
1550 int connected;
1552 if (ps->state == STATE_NO_PORT) {
1553 dprintf(V_ERR, ("%s: interrupt for invalid port!\n",
1554 ahci_portname(ps)));
1556 return;
1559 smask = port_read(ps, AHCI_PORT_IS);
1560 emask = smask & port_read(ps, AHCI_PORT_IE);
1562 /* Clear the interrupt flags that we saw were set. */
1563 port_write(ps, AHCI_PORT_IS, smask);
1565 dprintf(V_REQ, ("%s: interrupt (%08x)\n", ahci_portname(ps), smask));
1567 /* Check if any commands have completed. */
1568 port_check_cmds(ps);
1570 if (emask & AHCI_PORT_IS_PRCS) {
1571 /* Clear the N diagnostics bit to clear this interrupt. */
1572 port_write(ps, AHCI_PORT_SERR, AHCI_PORT_SERR_DIAG_N);
1574 connected = (port_read(ps, AHCI_PORT_SSTS) &
1575 AHCI_PORT_SSTS_DET_MASK) == AHCI_PORT_SSTS_DET_PHY;
1577 switch (ps->state) {
1578 case STATE_BAD_DEV:
1579 case STATE_GOOD_DEV:
1580 case STATE_WAIT_SIG:
1581 case STATE_WAIT_ID:
1582 port_disconnect(ps);
1584 /* fall-through */
1585 default:
1586 if (!connected)
1587 break;
1589 port_connect(ps);
1591 } else if (smask & AHCI_PORT_IS_MASK) {
1592 /* We assume that any other interrupt indicates command
1593 * completion or (command or device) failure. Unfortunately, if
1594 * an NCQ command failed, we cannot easily determine which one
1595 * it was. For that reason, after completing all successfully
1596 * finished commands (above), we fail all other outstanding
1597 * commands and restart the port. This can possibly be improved
1598 * later by obtaining per-command status results from the HBA.
1601 /* If we were waiting for ID verification, check now. */
1602 if (ps->state == STATE_WAIT_ID) {
1603 ps->flags &= ~FLAG_BUSY;
1604 port_id_check(ps, !(port_read(ps, AHCI_PORT_TFD) &
1605 (AHCI_PORT_TFD_STS_ERR |
1606 AHCI_PORT_TFD_STS_DF)));
1609 /* Check now for failure. There are fatal failures, and there
1610 * are failures that set the TFD.STS.ERR field using a D2H
1611 * FIS. In both cases, we just restart the port, failing all
1612 * commands in the process.
1614 if ((port_read(ps, AHCI_PORT_TFD) &
1615 (AHCI_PORT_TFD_STS_ERR | AHCI_PORT_TFD_STS_DF)) ||
1616 (smask & AHCI_PORT_IS_RESTART)) {
1617 port_restart(ps);
1622 /*===========================================================================*
1623 * port_timeout *
1624 *===========================================================================*/
1625 static void port_timeout(struct timer *tp)
1627 /* A timeout has occurred on this port. Figure out what the timeout is
1628 * for, and take appropriate action.
1630 struct port_state *ps;
1631 int port, cmd;
1633 port = GET_PORT(tmr_arg(tp)->ta_int);
1634 cmd = GET_TAG(tmr_arg(tp)->ta_int);
1636 assert(port >= 0 && port < hba_state.nr_ports);
1638 ps = &port_state[port];
1640 /* Regardless of the outcome of this timeout, wake up the thread if it
1641 * is suspended. This applies only during the initialization.
1643 if (ps->flags & FLAG_SUSPENDED) {
1644 assert(cmd == 0);
1645 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
1648 /* If detection of a device after startup timed out, give up on initial
1649 * detection and only look for hot plug events from now on.
1651 if (ps->state == STATE_SPIN_UP) {
1652 /* There is one exception: for braindead controllers that don't
1653 * generate the right interrupts (cough, VirtualBox), we do an
1654 * explicit check to see if a device is connected after all.
1655 * Later hot-(un)plug events will not be detected in this case.
1657 if ((port_read(ps, AHCI_PORT_SSTS) &
1658 AHCI_PORT_SSTS_DET_MASK) == AHCI_PORT_SSTS_DET_PHY) {
1659 dprintf(V_INFO, ("%s: no device connection event\n",
1660 ahci_portname(ps)));
1662 port_connect(ps);
1664 else {
1665 dprintf(V_INFO, ("%s: spin-up timeout\n",
1666 ahci_portname(ps)));
1668 /* If the busy flag is set, a BDEV_OPEN request is
1669 * waiting for the detection to finish; clear the busy
1670 * flag to return an error to the caller.
1672 ps->state = STATE_NO_DEV;
1673 ps->flags &= ~FLAG_BUSY;
1676 return;
1679 /* If a device has been connected and we are waiting for its signature
1680 * to become available, check now.
1682 if (ps->state == STATE_WAIT_SIG) {
1683 port_sig_check(ps);
1685 return;
1688 /* The only case where the busy flag will be set after this is for a
1689 * failed identify operation. During this operation, the port will be
1690 * in the WAIT_ID state. In that case, we clear the BUSY flag, fail the
1691 * command by setting its state, restart port and finish identify op.
1693 if (ps->flags & FLAG_BUSY) {
1694 assert(ps->state == STATE_WAIT_ID);
1695 ps->flags &= ~FLAG_BUSY;
1698 dprintf(V_ERR, ("%s: timeout\n", ahci_portname(ps)));
1700 /* Restart the port, failing all current commands. */
1701 port_restart(ps);
1703 /* Finish up the identify operation. */
1704 if (ps->state == STATE_WAIT_ID)
1705 port_id_check(ps, FALSE);
1708 /*===========================================================================*
1709 * port_wait *
1710 *===========================================================================*/
1711 static void port_wait(struct port_state *ps)
1713 /* Suspend the current thread until the given port is no longer busy,
1714 * due to either command completion or timeout.
1717 ps->flags |= FLAG_SUSPENDED;
1719 while (ps->flags & FLAG_BUSY)
1720 blockdriver_mt_sleep();
1722 ps->flags &= ~FLAG_SUSPENDED;
1725 /*===========================================================================*
1726 * port_issue *
1727 *===========================================================================*/
1728 static void port_issue(struct port_state *ps, int cmd, clock_t timeout)
1730 /* Issue a command to the port, and set a timer to trigger a timeout
1731 * if the command takes too long to complete.
1734 /* Set the corresponding NCQ command bit, if applicable. */
1735 if (ps->flags & FLAG_HAS_NCQ)
1736 port_write(ps, AHCI_PORT_SACT, 1 << cmd);
1738 /* Make sure that the compiler does not delay any previous write
1739 * operations until after the write to the command issue register.
1741 __insn_barrier();
1743 /* Tell the controller that a new command is ready. */
1744 port_write(ps, AHCI_PORT_CI, 1 << cmd);
1746 /* Update pending commands. */
1747 ps->pend_mask |= 1 << cmd;
1749 /* Set a timer in case the command does not complete at all. */
1750 set_timer(&ps->cmd_info[cmd].timer, timeout, port_timeout,
1751 BUILD_ARG(ps - port_state, cmd));
1754 /*===========================================================================*
1755 * port_exec *
1756 *===========================================================================*/
1757 static int port_exec(struct port_state *ps, int cmd, clock_t timeout)
1759 /* Execute a command on a port, wait for the command to complete or for
1760 * a timeout, and return whether the command succeeded or not.
1763 port_issue(ps, cmd, timeout);
1765 /* Put the thread to sleep until a timeout or a command completion
1766 * happens. Earlier, we used to call port_wait which set the suspended
1767 * flag. We now abandon it since the flag has to work on a per-thread,
1768 * and hence per-tag basis and not on a per-port basis. Instead, we
1769 * retain that call only to defer open calls during device/driver
1770 * initialization. Instead, we call sleep here directly. Before
1771 * sleeping, we register the thread.
1773 ps->cmd_info[cmd].tid = blockdriver_mt_get_tid();
1775 blockdriver_mt_sleep();
1777 /* Cancelling a timer that just triggered, does no harm. */
1778 cancel_timer(&ps->cmd_info[cmd].timer);
1780 assert(!(ps->flags & FLAG_BUSY));
1782 dprintf(V_REQ, ("%s: end of command -- %s\n", ahci_portname(ps),
1783 (ps->cmd_info[cmd].result == RESULT_FAILURE) ?
1784 "failure" : "success"));
1786 if (ps->cmd_info[cmd].result == RESULT_FAILURE)
1787 return EIO;
1789 return OK;
1792 /*===========================================================================*
1793 * port_alloc *
1794 *===========================================================================*/
1795 static void port_alloc(struct port_state *ps)
1797 /* Allocate memory for the given port, and enable FIS receipt. We try
1798 * to cram everything into one 4K-page in order to limit memory usage
1799 * as much as possible. More memory may be allocated on demand later,
1800 * but allocation failure should be fatal only here. Note that we do
1801 * not allocate memory for sector padding here, because we do not know
1802 * the device's sector size yet.
1804 size_t fis_off, tmp_off, ct_off; int i;
1805 size_t ct_offs[NR_CMDS];
1806 u32_t cmd;
1808 fis_off = AHCI_CL_SIZE + AHCI_FIS_SIZE - 1;
1809 fis_off -= fis_off % AHCI_FIS_SIZE;
1811 tmp_off = fis_off + AHCI_FIS_SIZE + AHCI_TMP_ALIGN - 1;
1812 tmp_off -= tmp_off % AHCI_TMP_ALIGN;
1814 /* Allocate memory for all the commands. */
1815 ct_off = tmp_off + AHCI_TMP_SIZE;
1816 for (i = 0; i < NR_CMDS; i++) {
1817 ct_off += AHCI_CT_ALIGN - 1;
1818 ct_off -= ct_off % AHCI_CT_ALIGN;
1819 ct_offs[i] = ct_off;
1820 ps->mem_size = ct_off + AHCI_CT_SIZE;
1821 ct_off = ps->mem_size;
1824 ps->mem_base = alloc_contig(ps->mem_size, AC_ALIGN4K, &ps->mem_phys);
1825 if (ps->mem_base == NULL)
1826 panic("unable to allocate port memory");
1827 memset(ps->mem_base, 0, ps->mem_size);
1829 ps->cl_base = (u32_t *) ps->mem_base;
1830 ps->cl_phys = ps->mem_phys;
1831 assert(ps->cl_phys % AHCI_CL_SIZE == 0);
1833 ps->fis_base = (u32_t *) (ps->mem_base + fis_off);
1834 ps->fis_phys = ps->mem_phys + fis_off;
1835 assert(ps->fis_phys % AHCI_FIS_SIZE == 0);
1837 ps->tmp_base = (u8_t *) (ps->mem_base + tmp_off);
1838 ps->tmp_phys = ps->mem_phys + tmp_off;
1839 assert(ps->tmp_phys % AHCI_TMP_ALIGN == 0);
1841 for (i = 0; i < NR_CMDS; i++) {
1842 ps->ct_base[i] = ps->mem_base + ct_offs[i];
1843 ps->ct_phys[i] = ps->mem_phys + ct_offs[i];
1844 assert(ps->ct_phys[i] % AHCI_CT_ALIGN == 0);
1847 /* Tell the controller about some of the physical addresses. */
1848 port_write(ps, AHCI_PORT_FBU, 0);
1849 port_write(ps, AHCI_PORT_FB, ps->fis_phys);
1851 port_write(ps, AHCI_PORT_CLBU, 0);
1852 port_write(ps, AHCI_PORT_CLB, ps->cl_phys);
1854 /* Enable FIS receive. */
1855 cmd = port_read(ps, AHCI_PORT_CMD);
1856 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_FRE);
1858 ps->pad_base = NULL;
1859 ps->pad_size = 0;
1862 /*===========================================================================*
1863 * port_free *
1864 *===========================================================================*/
1865 static void port_free(struct port_state *ps)
1867 /* Disable FIS receipt for the given port, and free previously
1868 * allocated memory.
1870 u32_t cmd;
1871 int i;
1873 /* Disable FIS receive. */
1874 cmd = port_read(ps, AHCI_PORT_CMD);
1876 if (cmd & (AHCI_PORT_CMD_FR | AHCI_PORT_CMD_FRE)) {
1877 port_write(ps, AHCI_PORT_CMD, cmd & ~AHCI_PORT_CMD_FRE);
1879 SPIN_UNTIL(!(port_read(ps, AHCI_PORT_CMD) & AHCI_PORT_CMD_FR),
1880 PORTREG_DELAY);
1883 if (ps->pad_base != NULL)
1884 free_contig(ps->pad_base, ps->pad_size);
1886 /* The first command table is part of the primary memory page. */
1887 for (i = 1; i < hba_state.nr_cmds; i++)
1888 if (ps->ct_base[i] != NULL)
1889 free_contig(ps->ct_base[i], AHCI_CT_SIZE);
1891 free_contig(ps->mem_base, ps->mem_size);
1894 /*===========================================================================*
1895 * port_init *
1896 *===========================================================================*/
1897 static void port_init(struct port_state *ps)
1899 /* Initialize the given port.
1901 u32_t cmd;
1902 int i;
1904 /* Initialize the port state structure. */
1905 ps->queue_depth = 1;
1906 ps->state = STATE_SPIN_UP;
1907 ps->flags = FLAG_BUSY;
1908 ps->sector_size = 0;
1909 ps->open_count = 0;
1910 ps->pend_mask = 0;
1911 for (i = 0; i < NR_CMDS; i++)
1912 init_timer(&ps->cmd_info[i].timer);
1914 ps->reg = (u32_t *) ((u8_t *) hba_state.base +
1915 AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * (ps - port_state));
1917 /* Allocate memory for the port. */
1918 port_alloc(ps);
1920 /* Just listen for device status change events for now. */
1921 port_write(ps, AHCI_PORT_IE, AHCI_PORT_IE_PRCE);
1923 /* Enable device spin-up for HBAs that support staggered spin-up.
1924 * This is a no-op for HBAs that do not support it.
1926 cmd = port_read(ps, AHCI_PORT_CMD);
1927 port_write(ps, AHCI_PORT_CMD, cmd | AHCI_PORT_CMD_SUD);
1929 /* Trigger a port reset. */
1930 port_hardreset(ps);
1932 set_timer(&ps->cmd_info[0].timer, ahci_spinup_timeout,
1933 port_timeout, BUILD_ARG(ps - port_state, 0));
1936 /*===========================================================================*
1937 * ahci_probe *
1938 *===========================================================================*/
1939 static int ahci_probe(int skip)
1941 /* Find a matching PCI device.
1943 int r, devind;
1944 u16_t vid, did;
1946 pci_init();
1948 r = pci_first_dev(&devind, &vid, &did);
1949 if (r <= 0)
1950 return -1;
1952 while (skip--) {
1953 r = pci_next_dev(&devind, &vid, &did);
1954 if (r <= 0)
1955 return -1;
1958 pci_reserve(devind);
1960 return devind;
1963 /*===========================================================================*
1964 * ahci_reset *
1965 *===========================================================================*/
1966 static void ahci_reset(void)
1968 /* Reset the HBA. Do not enable AHCI mode afterwards.
1970 u32_t ghc;
1972 ghc = hba_read(AHCI_HBA_GHC);
1974 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE);
1976 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_HR);
1978 SPIN_UNTIL(!(hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR), RESET_DELAY);
1980 if (hba_read(AHCI_HBA_GHC) & AHCI_HBA_GHC_HR)
1981 panic("unable to reset HBA");
1984 /*===========================================================================*
1985 * ahci_init *
1986 *===========================================================================*/
1987 static void ahci_init(int devind)
1989 /* Initialize the device.
1991 u32_t base, size, cap, ghc, mask;
1992 int r, port, ioflag;
1994 if ((r = pci_get_bar(devind, PCI_BAR_6, &base, &size, &ioflag)) != OK)
1995 panic("unable to retrieve BAR: %d", r);
1997 if (ioflag)
1998 panic("invalid BAR type");
2000 /* There must be at least one port, and at most NR_PORTS ports. Limit
2001 * the actual total number of ports to the size of the exposed area.
2003 if (size < AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE)
2004 panic("HBA memory size too small: %lu", size);
2006 size = MIN(size, AHCI_MEM_BASE_SIZE + AHCI_MEM_PORT_SIZE * NR_PORTS);
2008 hba_state.nr_ports = (size - AHCI_MEM_BASE_SIZE) / AHCI_MEM_PORT_SIZE;
2010 /* Map the register area into local memory. */
2011 hba_state.base = (u32_t *) vm_map_phys(SELF, (void *) base, size);
2012 hba_state.size = size;
2013 if (hba_state.base == MAP_FAILED)
2014 panic("unable to map HBA memory");
2016 /* Retrieve, allocate and enable the controller's IRQ. */
2017 hba_state.irq = pci_attr_r8(devind, PCI_ILR);
2018 hba_state.hook_id = 0;
2020 if ((r = sys_irqsetpolicy(hba_state.irq, 0, &hba_state.hook_id)) != OK)
2021 panic("unable to register IRQ: %d", r);
2023 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2024 panic("unable to enable IRQ: %d", r);
2026 /* Reset the HBA. */
2027 ahci_reset();
2029 /* Enable AHCI and interrupts. */
2030 ghc = hba_read(AHCI_HBA_GHC);
2031 hba_write(AHCI_HBA_GHC, ghc | AHCI_HBA_GHC_AE | AHCI_HBA_GHC_IE);
2033 /* Limit the maximum number of commands to the controller's value. */
2034 /* Note that we currently use only one command anyway. */
2035 cap = hba_read(AHCI_HBA_CAP);
2036 hba_state.has_ncq = !!(cap & AHCI_HBA_CAP_SNCQ);
2037 hba_state.nr_cmds = MIN(NR_CMDS,
2038 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1);
2040 dprintf(V_INFO, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, "
2041 "%s queuing, IRQ %d\n",
2042 ahci_instance,
2043 (int) (hba_read(AHCI_HBA_VS) >> 16),
2044 (int) ((hba_read(AHCI_HBA_VS) >> 8) & 0xFF),
2045 (int) (hba_read(AHCI_HBA_VS) & 0xFF),
2046 ((cap >> AHCI_HBA_CAP_NP_SHIFT) & AHCI_HBA_CAP_NP_MASK) + 1,
2047 ((cap >> AHCI_HBA_CAP_NCS_SHIFT) & AHCI_HBA_CAP_NCS_MASK) + 1,
2048 hba_state.has_ncq ? "supports" : "no", hba_state.irq));
2050 dprintf(V_INFO, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n",
2051 ahci_instance, cap, hba_read(AHCI_HBA_CAP2),
2052 hba_read(AHCI_HBA_PI)));
2054 /* Initialize each of the implemented ports. We ignore CAP.NP. */
2055 mask = hba_read(AHCI_HBA_PI);
2057 for (port = 0; port < hba_state.nr_ports; port++) {
2058 port_state[port].device = NO_DEVICE;
2059 port_state[port].state = STATE_NO_PORT;
2061 if (mask & (1 << port))
2062 port_init(&port_state[port]);
2066 /*===========================================================================*
2067 * ahci_stop *
2068 *===========================================================================*/
2069 static void ahci_stop(void)
2071 /* Disable AHCI, and clean up resources to the extent possible.
2073 struct port_state *ps;
2074 int r, port;
2076 for (port = 0; port < hba_state.nr_ports; port++) {
2077 ps = &port_state[port];
2079 if (ps->state != STATE_NO_PORT) {
2080 port_stop(ps);
2082 port_free(ps);
2086 ahci_reset();
2088 if ((r = vm_unmap_phys(SELF, (void *) hba_state.base,
2089 hba_state.size)) != OK)
2090 panic("unable to unmap HBA memory: %d", r);
2092 if ((r = sys_irqrmpolicy(&hba_state.hook_id)) != OK)
2093 panic("unable to deregister IRQ: %d", r);
2096 /*===========================================================================*
2097 * ahci_alarm *
2098 *===========================================================================*/
2099 static void ahci_alarm(clock_t stamp)
2101 /* Process an alarm.
2104 /* Call the port-specific handler for each port that timed out. */
2105 expire_timers(stamp);
2108 /*===========================================================================*
2109 * ahci_intr *
2110 *===========================================================================*/
2111 static void ahci_intr(unsigned int UNUSED(mask))
2113 /* Process an interrupt.
2115 struct port_state *ps;
2116 u32_t mask;
2117 int r, port;
2119 /* Handle an interrupt for each port that has the interrupt bit set. */
2120 mask = hba_read(AHCI_HBA_IS);
2122 for (port = 0; port < hba_state.nr_ports; port++) {
2123 if (mask & (1 << port)) {
2124 ps = &port_state[port];
2126 port_intr(ps);
2128 /* After processing an interrupt, wake up the device
2129 * thread if it is suspended and now no longer busy.
2131 if ((ps->flags & (FLAG_SUSPENDED | FLAG_BUSY)) ==
2132 FLAG_SUSPENDED)
2133 blockdriver_mt_wakeup(ps->cmd_info[0].tid);
2137 /* Clear the bits that we processed. */
2138 hba_write(AHCI_HBA_IS, mask);
2140 /* Reenable the interrupt. */
2141 if ((r = sys_irqenable(&hba_state.hook_id)) != OK)
2142 panic("unable to enable IRQ: %d", r);
2145 /*===========================================================================*
2146 * ahci_get_var *
2147 *===========================================================================*/
2148 static void ahci_get_var(char *name, long *v, int timeout)
2150 /* Retrieve an environment variable, and optionall adjust it to the
2151 * scale that we are using internally.
2154 /* The value is supposed to be initialized to a default already. */
2155 (void) env_parse(name, "d", 0, v, 1, LONG_MAX);
2157 /* If this is a timeout, convert from milliseconds to ticks. */
2158 if (timeout)
2159 *v = (*v + 500) * sys_hz() / 1000;
2162 /*===========================================================================*
2163 * ahci_get_params *
2164 *===========================================================================*/
2165 static void ahci_get_params(void)
2167 /* Retrieve and parse parameters passed to this driver, except the
2168 * device-to-port mapping, which has to be parsed later.
2170 long v;
2172 /* Find out which driver instance we are. */
2173 v = 0;
2174 (void) env_parse("instance", "d", 0, &v, 0, 255);
2175 ahci_instance = (int) v;
2177 /* Initialize the verbosity level. */
2178 v = V_ERR;
2179 (void) env_parse("ahci_verbose", "d", 0, &v, V_NONE, V_REQ);
2180 ahci_verbose = (int) v;
2182 /* Initialize timeout-related values. */
2183 ahci_get_var("ahci_init_timeout", &ahci_spinup_timeout, TRUE);
2184 ahci_get_var("ahci_sig_timeout", &ahci_sig_timeout, TRUE);
2185 ahci_get_var("ahci_sig_checks", &ahci_sig_checks, FALSE);
2186 ahci_get_var("ahci_cmd_timeout", &ahci_command_timeout, TRUE);
2187 ahci_get_var("ahci_io_timeout", &ahci_transfer_timeout, TRUE);
2188 ahci_get_var("ahci_flush_timeout", &ahci_flush_timeout, TRUE);
2191 /*===========================================================================*
2192 * ahci_set_mapping *
2193 *===========================================================================*/
2194 static void ahci_set_mapping(void)
2196 /* Construct a mapping from device nodes to port numbers.
2198 char key[16], val[32], *p;
2199 unsigned int port;
2200 int i, j;
2202 /* Start off with a mapping that includes implemented ports only, in
2203 * order. We choose this mapping over an identity mapping to maximize
2204 * the chance that the user will be able to access the first MAX_DRIVES
2205 * devices. Note that we can only do this after initializing the HBA.
2207 for (i = j = 0; i < NR_PORTS && j < MAX_DRIVES; i++)
2208 if (port_state[i].state != STATE_NO_PORT)
2209 ahci_map[j++] = i;
2211 for ( ; j < MAX_DRIVES; j++)
2212 ahci_map[j] = NO_PORT;
2214 /* See if the user specified a custom mapping. Unlike all other
2215 * configuration options, this is a per-instance setting.
2217 strlcpy(key, "ahci0_map", sizeof(key));
2218 key[4] += ahci_instance;
2220 if (env_get_param(key, val, sizeof(val)) == OK) {
2221 /* Parse the mapping, which is assumed to be a comma-separated
2222 * list of zero-based port numbers.
2224 p = val;
2226 for (i = 0; i < MAX_DRIVES; i++) {
2227 if (*p) {
2228 port = (unsigned int) strtoul(p, &p, 0);
2230 if (*p) p++;
2232 ahci_map[i] = port % NR_PORTS;
2234 else ahci_map[i] = NO_PORT;
2238 /* Create a reverse mapping. */
2239 for (i = 0; i < MAX_DRIVES; i++)
2240 if ((j = ahci_map[i]) != NO_PORT)
2241 port_state[j].device = i;
2244 /*===========================================================================*
2245 * sef_cb_init_fresh *
2246 *===========================================================================*/
2247 static int sef_cb_init_fresh(int type, sef_init_info_t *UNUSED(info))
2249 /* Initialize the driver.
2251 int devind;
2253 /* Get command line parameters. */
2254 ahci_get_params();
2256 /* Probe for recognized devices, skipping matches as appropriate. */
2257 devind = ahci_probe(ahci_instance);
2259 if (devind < 0)
2260 panic("no matching device found");
2262 /* Initialize the device we found. */
2263 ahci_init(devind);
2265 /* Create a mapping from device nodes to port numbers. */
2266 ahci_set_mapping();
2268 /* Announce that we are up. */
2269 blockdriver_announce(type);
2271 return OK;
2274 /*===========================================================================*
2275 * sef_cb_signal_handler *
2276 *===========================================================================*/
2277 static void sef_cb_signal_handler(int signo)
2279 /* In case of a termination signal, shut down this driver.
2281 int port;
2283 if (signo != SIGTERM) return;
2285 /* If any ports are still opened, assume that the system is being shut
2286 * down, and stay up until the last device has been closed.
2288 ahci_exiting = TRUE;
2290 for (port = 0; port < hba_state.nr_ports; port++)
2291 if (port_state[port].open_count > 0)
2292 return;
2294 /* If not, stop the driver and exit immediately. */
2295 ahci_stop();
2297 exit(0);
2300 /*===========================================================================*
2301 * sef_local_startup *
2302 *===========================================================================*/
2303 static void sef_local_startup(void)
2305 /* Set callbacks and initialize the System Event Framework (SEF).
2308 /* Register init callbacks. */
2309 sef_setcb_init_fresh(sef_cb_init_fresh);
2310 sef_setcb_init_lu(sef_cb_init_fresh);
2312 /* Register signal callbacks. */
2313 sef_setcb_signal_handler(sef_cb_signal_handler);
2315 /* Let SEF perform startup. */
2316 sef_startup();
2319 /*===========================================================================*
2320 * ahci_portname *
2321 *===========================================================================*/
2322 static char *ahci_portname(struct port_state *ps)
2324 /* Return a printable name for the given port. Whenever we can, print a
2325 * "Dx" device number rather than a "Pxx" port number, because the user
2326 * may not be aware of the mapping currently in use.
2328 static char name[] = "AHCI0-P00";
2330 name[4] = '0' + ahci_instance;
2332 if (ps->device == NO_DEVICE) {
2333 name[6] = 'P';
2334 name[7] = '0' + (ps - port_state) / 10;
2335 name[8] = '0' + (ps - port_state) % 10;
2337 else {
2338 name[6] = 'D';
2339 name[7] = '0' + ps->device;
2340 name[8] = 0;
2343 return name;
2346 /*===========================================================================*
2347 * ahci_map_minor *
2348 *===========================================================================*/
2349 static struct port_state *ahci_map_minor(dev_t minor, struct device **dvp)
2351 /* Map a minor device number to a port and a pointer to the partition's
2352 * device structure. Return NULL if this minor device number does not
2353 * identify an actual device.
2355 struct port_state *ps;
2356 int port;
2358 ps = NULL;
2360 if (minor < NR_MINORS) {
2361 port = ahci_map[minor / DEV_PER_DRIVE];
2363 if (port == NO_PORT)
2364 return NULL;
2366 ps = &port_state[port];
2367 *dvp = &ps->part[minor % DEV_PER_DRIVE];
2369 else if ((unsigned) (minor -= MINOR_d0p0s0) < NR_SUBDEVS) {
2370 port = ahci_map[minor / SUB_PER_DRIVE];
2372 if (port == NO_PORT)
2373 return NULL;
2375 ps = &port_state[port];
2376 *dvp = &ps->subpart[minor % SUB_PER_DRIVE];
2379 return ps;
2382 /*===========================================================================*
2383 * ahci_part *
2384 *===========================================================================*/
2385 static struct device *ahci_part(dev_t minor)
2387 /* Return a pointer to the partition information structure of the given
2388 * minor device.
2390 struct device *dv;
2392 if (ahci_map_minor(minor, &dv) == NULL)
2393 return NULL;
2395 return dv;
2398 /*===========================================================================*
2399 * ahci_open *
2400 *===========================================================================*/
2401 static int ahci_open(dev_t minor, int access)
2403 /* Open a device.
2405 struct port_state *ps;
2406 int r;
2408 ps = ahci_get_port(minor);
2410 /* Only one open request can be processed at a time, due to the fact
2411 * that it is an exclusive operation. The thread that handles this call
2412 * can therefore freely register itself at slot zero.
2414 ps->cmd_info[0].tid = blockdriver_mt_get_tid();
2416 /* If we are still in the process of initializing this port or device,
2417 * wait for completion of that phase first.
2419 if (ps->flags & FLAG_BUSY)
2420 port_wait(ps);
2422 /* The device may only be opened if it is now properly functioning. */
2423 if (ps->state != STATE_GOOD_DEV)
2424 return ENXIO;
2426 /* Some devices may only be opened in read-only mode. */
2427 if ((ps->flags & FLAG_READONLY) && (access & W_BIT))
2428 return EACCES;
2430 if (ps->open_count == 0) {
2431 /* The first open request. Clear the barrier flag, if set. */
2432 ps->flags &= ~FLAG_BARRIER;
2434 /* Recheck media only when nobody is using the device. */
2435 if ((ps->flags & FLAG_ATAPI) &&
2436 (r = atapi_check_medium(ps, 0)) != OK)
2437 return r;
2439 /* After rechecking the media, the partition table must always
2440 * be read. This is also a convenient time to do it for
2441 * nonremovable devices. Start by resetting the partition
2442 * tables and setting the working size of the entire device.
2444 memset(ps->part, 0, sizeof(ps->part));
2445 memset(ps->subpart, 0, sizeof(ps->subpart));
2447 ps->part[0].dv_size =
2448 mul64(ps->lba_count, cvu64(ps->sector_size));
2450 partition(&ahci_dtab, ps->device * DEV_PER_DRIVE, P_PRIMARY,
2451 !!(ps->flags & FLAG_ATAPI));
2453 blockdriver_mt_set_workers(ps->device, ps->queue_depth);
2455 else {
2456 /* If the barrier flag is set, deny new open requests until the
2457 * device is fully closed first.
2459 if (ps->flags & FLAG_BARRIER)
2460 return ENXIO;
2463 ps->open_count++;
2465 return OK;
2468 /*===========================================================================*
2469 * ahci_close *
2470 *===========================================================================*/
2471 static int ahci_close(dev_t minor)
2473 /* Close a device.
2475 struct port_state *ps;
2476 int port;
2478 ps = ahci_get_port(minor);
2480 /* Decrease the open count. */
2481 if (ps->open_count <= 0) {
2482 dprintf(V_ERR, ("%s: closing already-closed port\n",
2483 ahci_portname(ps)));
2485 return EINVAL;
2488 ps->open_count--;
2490 if (ps->open_count > 0)
2491 return OK;
2493 /* The device is now fully closed. That also means that the threads for
2494 * this device are not needed anymore, so we reduce the count to one.
2496 blockdriver_mt_set_workers(ps->device, 1);
2498 if (ps->state == STATE_GOOD_DEV && !(ps->flags & FLAG_BARRIER)) {
2499 dprintf(V_INFO, ("%s: flushing write cache\n",
2500 ahci_portname(ps)));
2502 (void) gen_flush_wcache(ps);
2505 /* If the entire driver has been told to terminate, check whether all
2506 * devices are now closed. If so, tell libblockdriver to quit after
2507 * replying to the close request.
2509 if (ahci_exiting) {
2510 for (port = 0; port < hba_state.nr_ports; port++)
2511 if (port_state[port].open_count > 0)
2512 break;
2514 if (port == hba_state.nr_ports) {
2515 ahci_stop();
2517 blockdriver_mt_terminate();
2521 return OK;
2524 /*===========================================================================*
2525 * ahci_transfer *
2526 *===========================================================================*/
2527 static ssize_t ahci_transfer(dev_t minor, int do_write, u64_t position,
2528 endpoint_t endpt, iovec_t *iovec, unsigned int count, int flags)
2530 /* Perform data transfer on the selected device.
2532 struct port_state *ps;
2533 struct device *dv;
2534 u64_t pos, eof;
2536 ps = ahci_get_port(minor);
2537 dv = ahci_part(minor);
2539 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2540 return EIO;
2542 if (count > NR_IOREQS)
2543 return EINVAL;
2545 /* Check for basic end-of-partition condition: if the start position of
2546 * the request is outside the partition, return success immediately.
2547 * The size of the request is obtained, and possibly reduced, later.
2549 if (cmp64(position, dv->dv_size) >= 0)
2550 return OK;
2552 pos = add64(dv->dv_base, position);
2553 eof = add64(dv->dv_base, dv->dv_size);
2555 return port_transfer(ps, pos, eof, endpt, (iovec_s_t *) iovec, count,
2556 do_write, flags);
2559 /*===========================================================================*
2560 * ahci_ioctl *
2561 *===========================================================================*/
2562 static int ahci_ioctl(dev_t minor, unsigned int request, endpoint_t endpt,
2563 cp_grant_id_t grant)
2565 /* Process I/O control requests.
2567 struct port_state *ps;
2568 int r, val;
2570 ps = ahci_get_port(minor);
2572 switch (request) {
2573 case DIOCEJECT:
2574 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2575 return EIO;
2577 if (!(ps->flags & FLAG_ATAPI))
2578 return EINVAL;
2580 return atapi_load_eject(ps, 0, FALSE /*load*/);
2582 case DIOCOPENCT:
2583 return sys_safecopyto(endpt, grant, 0,
2584 (vir_bytes) &ps->open_count, sizeof(ps->open_count));
2586 case DIOCFLUSH:
2587 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2588 return EIO;
2590 return gen_flush_wcache(ps);
2592 case DIOCSETWC:
2593 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2594 return EIO;
2596 if ((r = sys_safecopyfrom(endpt, grant, 0, (vir_bytes) &val,
2597 sizeof(val))) != OK)
2598 return r;
2600 return gen_set_wcache(ps, val);
2602 case DIOCGETWC:
2603 if (ps->state != STATE_GOOD_DEV || (ps->flags & FLAG_BARRIER))
2604 return EIO;
2606 if ((r = gen_get_wcache(ps, &val)) != OK)
2607 return r;
2609 return sys_safecopyto(endpt, grant, 0, (vir_bytes) &val,
2610 sizeof(val));
2613 return EINVAL;
2616 /*===========================================================================*
2617 * ahci_device *
2618 *===========================================================================*/
2619 static int ahci_device(dev_t minor, device_id_t *id)
2621 /* Map a minor device number to a device ID.
2623 struct port_state *ps;
2624 struct device *dv;
2626 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2627 return ENXIO;
2629 *id = ps->device;
2631 return OK;
2634 /*===========================================================================*
2635 * ahci_get_port *
2636 *===========================================================================*/
2637 static struct port_state *ahci_get_port(dev_t minor)
2639 /* Get the port structure associated with the given minor device.
2640 * Called only from worker threads, so the minor device is already
2641 * guaranteed to map to a port.
2643 struct port_state *ps;
2644 struct device *dv;
2646 if ((ps = ahci_map_minor(minor, &dv)) == NULL)
2647 panic("device mapping for minor %d disappeared", minor);
2649 return ps;
2652 /*===========================================================================*
2653 * main *
2654 *===========================================================================*/
2655 int main(int argc, char **argv)
2657 /* Driver task.
2660 env_setargs(argc, argv);
2661 sef_local_startup();
2663 blockdriver_mt_task(&ahci_dtab);
2665 return 0;