1 /* Advanced Host Controller Interface (AHCI) driver, by D.C. van Moolenbroek
2 * - Multithreading support by Arne Welzel
3 * - Native Command Queuing support by Raja Appuswamy
6 * This driver is based on the following specifications:
7 * - Serial ATA Advanced Host Controller Interface (AHCI) 1.3
8 * - Serial ATA Revision 2.6
9 * - AT Attachment with Packet Interface 7 (ATA/ATAPI-7)
10 * - ATAPI Removable Rewritable Media Devices 1.3 (SFF-8070)
12 * The driver supports device hot-plug, active device status tracking,
13 * nonremovable ATA and removable ATAPI devices, custom logical sector sizes,
14 * sector-unaligned reads, native command queuing and parallel requests to
17 * It does not implement transparent failure recovery, power management, or
18 * port multiplier support.
21 * An AHCI controller exposes a number of ports (up to 32), each of which may
22 * or may not have one device attached (port multipliers are not supported).
23 * Each port is maintained independently.
25 * The following figure depicts the possible transitions between port states.
26 * The NO_PORT state is not included; no transitions can be made from or to it.
28 * +----------+ +----------+
29 * | SPIN_UP | ------+ +-----> | BAD_DEV | ------------------+
30 * +----------+ | | +----------+ |
33 * +----------+ +----------+ +----------+ +----------+ |
34 * | NO_DEV | --> | WAIT_SIG | --> | WAIT_ID | --> | GOOD_DEV | |
35 * +----------+ +----------+ +----------+ +----------+ |
37 * +----------------+----------------+----------------+--------+
39 * At driver startup, all physically present ports are put in SPIN_UP state.
40 * This state differs from NO_DEV in that BDEV_OPEN calls will be deferred
41 * until either the spin-up timer expires, or a device has been identified on
42 * that port. This prevents early BDEV_OPEN calls from failing erroneously at
43 * startup time if the device has not yet been able to announce its presence.
45 * If a device is detected, either at startup time or after hot-plug, its
46 * signature is checked and it is identified, after which it may be determined
47 * to be a usable ("good") device, which means that the device is considered to
48 * be in a working state. If these steps fail, the device is marked as unusable
49 * ("bad"). At any point in time, the device may be disconnected; the port is
50 * then put back into NO_DEV state.
52 * A device in working state (GOOD_DEV) may or may not have a medium. All ATA
53 * devices are assumed to be fixed; all ATAPI devices are assumed to have
54 * removable media. To prevent erroneous access to switched devices and media,
55 * the driver makes devices inaccessible until they are fully closed (the open
56 * count is zero) when a device (hot-plug) or medium change is detected.
57 * For hot-plug changes, access is prevented by setting the BARRIER flag until
58 * the device is fully closed and then reopened. For medium changes, access is
59 * prevented by not acknowledging the medium change until the device is fully
60 * closed and reopened. Removable media are not locked in the drive while
61 * opened, because the driver author is uncomfortable with that concept.
63 * The following table lists for each state, whether the port is started
64 * (PxCMD.ST is set), whether a timer is running, what the PxIE mask is to be
65 * set to, and what BDEV_OPEN calls on this port should return.
67 * State Started Timer PxIE BDEV_OPEN
68 * --------- --------- --------- --------- ---------
69 * NO_PORT no no (none) ENXIO
70 * SPIN_UP no yes PRCE (wait)
71 * NO_DEV no no PRCE ENXIO
72 * WAIT_SIG yes yes PRCE (wait)
73 * WAIT_ID yes yes (all) (wait)
74 * BAD_DEV no no PRCE ENXIO
75 * GOOD_DEV yes per-command (all) OK
77 * In order to continue deferred BDEV_OPEN calls, the BUSY flag must be unset
78 * when changing from SPIN_UP to any state but WAIT_SIG, and when changing from
79 * WAIT_SIG to any state but WAIT_ID, and when changing from WAIT_ID to any
83 * The maximum byte size of a single transfer (MAX_TRANSFER) is currently set
84 * to 4MB. This limit has been chosen for a number of reasons:
85 * - The size that can be specified in a Physical Region Descriptor (PRD) is
86 * limited to 4MB for AHCI. Limiting the total transfer size to at most this
87 * size implies that no I/O vector element needs to be split up across PRDs.
88 * This means that the maximum number of needed PRDs can be predetermined.
89 * - The limit is below what can be transferred in a single ATA request, namely
90 * 64k sectors (i.e., at least 32MB). This means that transfer requests need
91 * never be split up into smaller chunks, reducing implementation complexity.
92 * - A single, static timeout can be used for transfers. Very large transfers
93 * can legitimately take up to several minutes -- well beyond the appropriate
94 * timeout range for small transfers. The limit obviates the need for a
95 * timeout scheme that takes into account the transfer size.
96 * - Similarly, the transfer limit reduces the opportunity for buggy/malicious
97 * clients to keep the driver busy for a long time with a single request.
98 * - The limit is high enough for all practical purposes. The transfer setup
99 * overhead is already relatively negligible at this size, and even larger
100 * requests will not help maximize throughput. As NR_IOREQS is currently set
101 * to 64, the limit still allows file systems to perform I/O requests with
102 * vectors completely filled with 64KB-blocks.
104 #include <minix/drivers.h>
105 #include <minix/blockdriver_mt.h>
106 #include <minix/drvlib.h>
107 #include <machine/pci.h>
108 #include <sys/ioc_disk.h>
109 #include <sys/mman.h>
114 /* Host Bus Adapter (HBA) state. */
116 volatile u32_t
*base
; /* base address of memory-mapped registers */
117 size_t size
; /* size of memory-mapped register area */
119 int nr_ports
; /* addressable number of ports (1..NR_PORTS) */
120 int nr_cmds
; /* maximum number of commands per port */
121 int has_ncq
; /* NCQ support flag */
123 int irq
; /* IRQ number */
124 int hook_id
; /* IRQ hook ID */
127 #define hba_read(r) (hba_state.base[r])
128 #define hba_write(r, v) (hba_state.base[r] = (v))
131 static struct port_state
{
132 int state
; /* port state */
133 unsigned int flags
; /* port flags */
135 volatile u32_t
*reg
; /* memory-mapped port registers */
137 u8_t
*mem_base
; /* primary memory buffer virtual address */
138 phys_bytes mem_phys
; /* primary memory buffer physical address */
139 vir_bytes mem_size
; /* primary memory buffer size */
141 /* the FIS, CL, CT[0] and TMP buffers are all in the primary buffer */
142 u32_t
*fis_base
; /* FIS receive buffer virtual address */
143 phys_bytes fis_phys
; /* FIS receive buffer physical address */
144 u32_t
*cl_base
; /* command list buffer virtual address */
145 phys_bytes cl_phys
; /* command list buffer physical address */
146 u8_t
*ct_base
[NR_CMDS
]; /* command table virtual address */
147 phys_bytes ct_phys
[NR_CMDS
]; /* command table physical address */
148 u8_t
*tmp_base
; /* temporary storage buffer virtual address */
149 phys_bytes tmp_phys
; /* temporary storage buffer physical address */
151 u8_t
*pad_base
; /* sector padding buffer virtual address */
152 phys_bytes pad_phys
; /* sector padding buffer physical address */
153 vir_bytes pad_size
; /* sector padding buffer size */
155 u64_t lba_count
; /* number of valid Logical Block Addresses */
156 u32_t sector_size
; /* medium sector size in bytes */
158 int open_count
; /* number of times this port is opened */
160 int device
; /* associated device number, or NO_DEVICE */
161 struct device part
[DEV_PER_DRIVE
]; /* partition bases and sizes */
162 struct device subpart
[SUB_PER_DRIVE
]; /* same for subpartitions */
164 timer_t timer
; /* port-specific timeout timer */
165 int left
; /* number of tries left before giving up */
166 /* (only used for signature probing) */
168 int queue_depth
; /* NCQ queue depth */
169 u32_t pend_mask
; /* commands not yet complete */
171 thread_id_t tid
;/* ID of the worker thread */
172 timer_t timer
; /* timer associated with each request */
173 int result
; /* success/failure result of the commands */
175 } port_state
[NR_PORTS
];
177 #define port_read(ps, r) ((ps)->reg[r])
178 #define port_write(ps, r, v) ((ps)->reg[r] = (v))
180 static int ahci_instance
; /* driver instance number */
182 static int ahci_verbose
; /* verbosity level (0..4) */
184 /* Timeout values. These can be overridden with environment variables. */
185 static long ahci_spinup_timeout
= SPINUP_TIMEOUT
;
186 static long ahci_sig_timeout
= SIG_TIMEOUT
;
187 static long ahci_sig_checks
= NR_SIG_CHECKS
;
188 static long ahci_command_timeout
= COMMAND_TIMEOUT
;
189 static long ahci_transfer_timeout
= TRANSFER_TIMEOUT
;
190 static long ahci_flush_timeout
= FLUSH_TIMEOUT
;
192 static int ahci_map
[MAX_DRIVES
]; /* device-to-port mapping */
194 static int ahci_exiting
= FALSE
; /* exit after last close? */
196 #define BUILD_ARG(port, tag) (((port) << 8) | (tag))
197 #define GET_PORT(arg) ((arg) >> 8)
198 #define GET_TAG(arg) ((arg) & 0xFF)
200 #define dprintf(v,s) do { \
201 if (ahci_verbose >= (v)) \
205 static void port_set_cmd(struct port_state
*ps
, int cmd
, cmd_fis_t
*fis
,
206 u8_t packet
[ATAPI_PACKET_SIZE
], prd_t
*prdt
, int nr_prds
, int write
);
207 static void port_issue(struct port_state
*ps
, int cmd
, clock_t timeout
);
208 static int port_exec(struct port_state
*ps
, int cmd
, clock_t timeout
);
209 static void port_timeout(struct timer
*tp
);
210 static void port_disconnect(struct port_state
*ps
);
212 static char *ahci_portname(struct port_state
*ps
);
213 static int ahci_open(dev_t minor
, int access
);
214 static int ahci_close(dev_t minor
);
215 static ssize_t
ahci_transfer(dev_t minor
, int do_write
, u64_t position
,
216 endpoint_t endpt
, iovec_t
*iovec
, unsigned int count
,
218 static struct device
*ahci_part(dev_t minor
);
219 static void ahci_alarm(clock_t stamp
);
220 static int ahci_ioctl(dev_t minor
, unsigned int request
, endpoint_t endpt
,
221 cp_grant_id_t grant
);
222 static void ahci_intr(unsigned int mask
);
223 static int ahci_device(dev_t minor
, device_id_t
*id
);
224 static struct port_state
*ahci_get_port(dev_t minor
);
226 /* AHCI driver table. */
227 static struct blockdriver ahci_dtab
= {
228 BLOCKDRIVER_TYPE_DISK
,
233 NULL
, /* bdr_cleanup */
235 NULL
, /* bdr_geometry */
238 NULL
, /* bdr_other */
242 /*===========================================================================*
244 *===========================================================================*/
245 static int atapi_exec(struct port_state
*ps
, int cmd
,
246 u8_t packet
[ATAPI_PACKET_SIZE
], size_t size
, int write
)
248 /* Execute an ATAPI command. Return OK or error.
254 assert(size
<= AHCI_TMP_SIZE
);
256 /* Fill in the command table with a FIS, a packet, and if a data
257 * transfer is requested, also a PRD.
259 memset(&fis
, 0, sizeof(fis
));
260 fis
.cf_cmd
= ATA_CMD_PACKET
;
263 fis
.cf_feat
= ATA_FEAT_PACKET_DMA
;
264 if (!write
&& (ps
->flags
& FLAG_USE_DMADIR
))
265 fis
.cf_feat
|= ATA_FEAT_PACKET_DMADIR
;
267 prd
[0].vp_addr
= ps
->tmp_phys
;
268 prd
[0].vp_size
= size
;
272 /* Start the command, and wait for it to complete or fail. */
273 port_set_cmd(ps
, cmd
, &fis
, packet
, prd
, nr_prds
, write
);
275 return port_exec(ps
, cmd
, ahci_command_timeout
);
278 /*===========================================================================*
280 *===========================================================================*/
281 static int atapi_test_unit(struct port_state
*ps
, int cmd
)
283 /* Test whether the ATAPI device and medium are ready.
285 u8_t packet
[ATAPI_PACKET_SIZE
];
287 memset(packet
, 0, sizeof(packet
));
288 packet
[0] = ATAPI_CMD_TEST_UNIT
;
290 return atapi_exec(ps
, cmd
, packet
, 0, FALSE
);
293 /*===========================================================================*
294 * atapi_request_sense *
295 *===========================================================================*/
296 static int atapi_request_sense(struct port_state
*ps
, int cmd
, int *sense
)
298 /* Request error (sense) information from an ATAPI device, and return
299 * the sense key. The additional sense codes are not used at this time.
301 u8_t packet
[ATAPI_PACKET_SIZE
];
304 memset(packet
, 0, sizeof(packet
));
305 packet
[0] = ATAPI_CMD_REQUEST_SENSE
;
306 packet
[4] = ATAPI_REQUEST_SENSE_LEN
;
308 r
= atapi_exec(ps
, cmd
, packet
, ATAPI_REQUEST_SENSE_LEN
, FALSE
);
313 dprintf(V_REQ
, ("%s: ATAPI SENSE: sense %x ASC %x ASCQ %x\n",
314 ahci_portname(ps
), ps
->tmp_base
[2] & 0xF, ps
->tmp_base
[12],
317 *sense
= ps
->tmp_base
[2] & 0xF;
322 /*===========================================================================*
324 *===========================================================================*/
325 static int atapi_load_eject(struct port_state
*ps
, int cmd
, int load
)
327 /* Load or eject a medium in an ATAPI device.
329 u8_t packet
[ATAPI_PACKET_SIZE
];
331 memset(packet
, 0, sizeof(packet
));
332 packet
[0] = ATAPI_CMD_START_STOP
;
333 packet
[4] = load
? ATAPI_START_STOP_LOAD
: ATAPI_START_STOP_EJECT
;
335 return atapi_exec(ps
, cmd
, packet
, 0, FALSE
);
338 /*===========================================================================*
339 * atapi_read_capacity *
340 *===========================================================================*/
341 static int atapi_read_capacity(struct port_state
*ps
, int cmd
)
343 /* Retrieve the LBA count and sector size of an ATAPI medium.
345 u8_t packet
[ATAPI_PACKET_SIZE
], *buf
;
348 memset(packet
, 0, sizeof(packet
));
349 packet
[0] = ATAPI_CMD_READ_CAPACITY
;
351 r
= atapi_exec(ps
, cmd
, packet
, ATAPI_READ_CAPACITY_LEN
, FALSE
);
355 /* Store the number of LBA blocks and sector size. */
357 ps
->lba_count
= add64u(cvu64((buf
[0] << 24) | (buf
[1] << 16) |
358 (buf
[2] << 8) | buf
[3]), 1);
360 (buf
[4] << 24) | (buf
[5] << 16) | (buf
[6] << 8) | buf
[7];
362 if (ps
->sector_size
== 0 || (ps
->sector_size
& 1)) {
363 dprintf(V_ERR
, ("%s: invalid medium sector size %u\n",
364 ahci_portname(ps
), ps
->sector_size
));
370 ("%s: medium detected (%u byte sectors, %lu MB size)\n",
371 ahci_portname(ps
), ps
->sector_size
,
372 div64u(mul64(ps
->lba_count
, cvu64(ps
->sector_size
)),
378 /*===========================================================================*
379 * atapi_check_medium *
380 *===========================================================================*/
381 static int atapi_check_medium(struct port_state
*ps
, int cmd
)
383 /* Check whether a medium is present in a removable-media ATAPI device.
384 * If a new medium is detected, get its total and sector size. Return
385 * OK only if a usable medium is present, and an error otherwise.
389 /* Perform a readiness check. */
390 if (atapi_test_unit(ps
, cmd
) != OK
) {
391 ps
->flags
&= ~FLAG_HAS_MEDIUM
;
393 /* If the check failed due to a unit attention condition, retry
394 * reading the medium capacity. Otherwise, assume that there is
395 * no medium available.
397 if (atapi_request_sense(ps
, cmd
, &sense
) != OK
||
398 sense
!= ATAPI_SENSE_UNIT_ATT
)
402 /* If a medium is newly detected, try reading its capacity now. */
403 if (!(ps
->flags
& FLAG_HAS_MEDIUM
)) {
404 if (atapi_read_capacity(ps
, cmd
) != OK
)
407 ps
->flags
|= FLAG_HAS_MEDIUM
;
413 /*===========================================================================*
415 *===========================================================================*/
416 static int atapi_id_check(struct port_state
*ps
, u16_t
*buf
)
418 /* Determine whether we support this ATAPI device based on the
419 * identification data it returned, and store some of its properties.
422 /* The device must be an ATAPI device; it must have removable media;
423 * it must support DMA without DMADIR, or DMADIR for DMA.
425 if ((buf
[ATA_ID_GCAP
] & (ATA_ID_GCAP_ATAPI_MASK
|
426 ATA_ID_GCAP_REMOVABLE
| ATA_ID_GCAP_INCOMPLETE
)) !=
427 (ATA_ID_GCAP_ATAPI
| ATA_ID_GCAP_REMOVABLE
) ||
428 ((buf
[ATA_ID_CAP
] & ATA_ID_CAP_DMA
) != ATA_ID_CAP_DMA
&&
429 (buf
[ATA_ID_DMADIR
] & (ATA_ID_DMADIR_DMADIR
|
430 ATA_ID_DMADIR_DMA
)) != (ATA_ID_DMADIR_DMADIR
|
431 ATA_ID_DMADIR_DMA
))) {
433 dprintf(V_ERR
, ("%s: unsupported ATAPI device\n",
436 dprintf(V_DEV
, ("%s: GCAP %04x CAP %04x DMADIR %04x\n",
437 ahci_portname(ps
), buf
[ATA_ID_GCAP
], buf
[ATA_ID_CAP
],
438 buf
[ATA_ID_DMADIR
]));
443 /* Remember whether to use the DMADIR flag when appropriate. */
444 if (buf
[ATA_ID_DMADIR
] & ATA_ID_DMADIR_DMADIR
)
445 ps
->flags
|= FLAG_USE_DMADIR
;
447 /* ATAPI CD-ROM devices are considered read-only. */
448 if (((buf
[ATA_ID_GCAP
] & ATA_ID_GCAP_TYPE_MASK
) >>
449 ATA_ID_GCAP_TYPE_SHIFT
) == ATAPI_TYPE_CDROM
)
450 ps
->flags
|= FLAG_READONLY
;
452 if ((buf
[ATA_ID_SUP1
] & ATA_ID_SUP1_VALID_MASK
) == ATA_ID_SUP1_VALID
&&
453 !(ps
->flags
& FLAG_READONLY
)) {
454 /* Save write cache related capabilities of the device. It is
455 * possible, although unlikely, that a device has support for
456 * either of these but not both.
458 if (buf
[ATA_ID_SUP0
] & ATA_ID_SUP0_WCACHE
)
459 ps
->flags
|= FLAG_HAS_WCACHE
;
461 if (buf
[ATA_ID_SUP1
] & ATA_ID_SUP1_FLUSH
)
462 ps
->flags
|= FLAG_HAS_FLUSH
;
468 /*===========================================================================*
470 *===========================================================================*/
471 static int atapi_transfer(struct port_state
*ps
, int cmd
, u64_t start_lba
,
472 unsigned int count
, int write
, prd_t
*prdt
, int nr_prds
)
474 /* Perform data transfer from or to an ATAPI device.
477 u8_t packet
[ATAPI_PACKET_SIZE
];
479 /* Fill in a Register Host to Device FIS. */
480 memset(&fis
, 0, sizeof(fis
));
481 fis
.cf_cmd
= ATA_CMD_PACKET
;
482 fis
.cf_feat
= ATA_FEAT_PACKET_DMA
;
483 if (!write
&& (ps
->flags
& FLAG_USE_DMADIR
))
484 fis
.cf_feat
|= ATA_FEAT_PACKET_DMADIR
;
486 /* Fill in a packet. */
487 memset(packet
, 0, sizeof(packet
));
488 packet
[0] = write
? ATAPI_CMD_WRITE
: ATAPI_CMD_READ
;
489 packet
[2] = (ex64lo(start_lba
) >> 24) & 0xFF;
490 packet
[3] = (ex64lo(start_lba
) >> 16) & 0xFF;
491 packet
[4] = (ex64lo(start_lba
) >> 8) & 0xFF;
492 packet
[5] = ex64lo(start_lba
) & 0xFF;
493 packet
[6] = (count
>> 24) & 0xFF;
494 packet
[7] = (count
>> 16) & 0xFF;
495 packet
[8] = (count
>> 8) & 0xFF;
496 packet
[9] = count
& 0xFF;
498 /* Start the command, and wait for it to complete or fail. */
499 port_set_cmd(ps
, cmd
, &fis
, packet
, prdt
, nr_prds
, write
);
501 return port_exec(ps
, cmd
, ahci_transfer_timeout
);
504 /*===========================================================================*
506 *===========================================================================*/
507 static int ata_id_check(struct port_state
*ps
, u16_t
*buf
)
509 /* Determine whether we support this ATA device based on the
510 * identification data it returned, and store some of its properties.
513 /* This must be an ATA device; it must not have removable media;
514 * it must support LBA and DMA; it must support the FLUSH CACHE
515 * command; it must support 48-bit addressing.
517 if ((buf
[ATA_ID_GCAP
] & (ATA_ID_GCAP_ATA_MASK
| ATA_ID_GCAP_REMOVABLE
|
518 ATA_ID_GCAP_INCOMPLETE
)) != ATA_ID_GCAP_ATA
||
519 (buf
[ATA_ID_CAP
] & (ATA_ID_CAP_LBA
| ATA_ID_CAP_DMA
)) !=
520 (ATA_ID_CAP_LBA
| ATA_ID_CAP_DMA
) ||
521 (buf
[ATA_ID_SUP1
] & (ATA_ID_SUP1_VALID_MASK
|
522 ATA_ID_SUP1_FLUSH
| ATA_ID_SUP1_LBA48
)) !=
523 (ATA_ID_SUP1_VALID
| ATA_ID_SUP1_FLUSH
| ATA_ID_SUP1_LBA48
)) {
525 dprintf(V_ERR
, ("%s: unsupported ATA device\n",
528 dprintf(V_DEV
, ("%s: GCAP %04x CAP %04x SUP1 %04x\n",
529 ahci_portname(ps
), buf
[ATA_ID_GCAP
], buf
[ATA_ID_CAP
],
535 /* Get number of LBA blocks, and sector size. */
536 ps
->lba_count
= make64((buf
[ATA_ID_LBA1
] << 16) | buf
[ATA_ID_LBA0
],
537 (buf
[ATA_ID_LBA3
] << 16) | buf
[ATA_ID_LBA2
]);
539 /* Determine the queue depth of the device. */
540 if (hba_state
.has_ncq
&&
541 (buf
[ATA_ID_SATA_CAP
] & ATA_ID_SATA_CAP_NCQ
)) {
542 ps
->flags
|= FLAG_HAS_NCQ
;
544 (buf
[ATA_ID_QDEPTH
] & ATA_ID_QDEPTH_MASK
) + 1;
545 if (ps
->queue_depth
> hba_state
.nr_cmds
)
546 ps
->queue_depth
= hba_state
.nr_cmds
;
549 /* For now, we only support long logical sectors. Long physical sector
550 * support may be added later. Note that the given value is in words.
552 if ((buf
[ATA_ID_PLSS
] & (ATA_ID_PLSS_VALID_MASK
| ATA_ID_PLSS_LLS
)) ==
553 (ATA_ID_PLSS_VALID
| ATA_ID_PLSS_LLS
))
555 ((buf
[ATA_ID_LSS1
] << 16) | buf
[ATA_ID_LSS0
]) << 1;
557 ps
->sector_size
= ATA_SECTOR_SIZE
;
559 if (ps
->sector_size
< ATA_SECTOR_SIZE
) {
560 dprintf(V_ERR
, ("%s: invalid sector size %u\n",
561 ahci_portname(ps
), ps
->sector_size
));
566 ps
->flags
|= FLAG_HAS_MEDIUM
| FLAG_HAS_FLUSH
;
568 /* FLUSH CACHE is mandatory for ATA devices; write caches are not. */
569 if (buf
[ATA_ID_SUP0
] & ATA_ID_SUP0_WCACHE
)
570 ps
->flags
|= FLAG_HAS_WCACHE
;
572 /* Check Force Unit Access capability of the device. */
573 if ((buf
[ATA_ID_ENA2
] & (ATA_ID_ENA2_VALID_MASK
| ATA_ID_ENA2_FUA
)) ==
574 (ATA_ID_ENA2_VALID
| ATA_ID_ENA2_FUA
))
575 ps
->flags
|= FLAG_HAS_FUA
;
580 /*===========================================================================*
582 *===========================================================================*/
583 static int ata_transfer(struct port_state
*ps
, int cmd
, u64_t start_lba
,
584 unsigned int count
, int write
, int force
, prd_t
*prdt
, int nr_prds
)
586 /* Perform data transfer from or to an ATA device.
590 assert(count
<= ATA_MAX_SECTORS
);
592 /* Special case for sector counts: 65536 is specified as 0. */
593 if (count
== ATA_MAX_SECTORS
)
596 memset(&fis
, 0, sizeof(fis
));
597 fis
.cf_dev
= ATA_DEV_LBA
;
598 if (ps
->flags
& FLAG_HAS_NCQ
) {
600 if (force
&& (ps
->flags
& FLAG_HAS_FUA
))
601 fis
.cf_dev
|= ATA_DEV_FUA
;
603 fis
.cf_cmd
= ATA_CMD_WRITE_FPDMA_QUEUED
;
605 fis
.cf_cmd
= ATA_CMD_READ_FPDMA_QUEUED
;
610 if (force
&& (ps
->flags
& FLAG_HAS_FUA
))
611 fis
.cf_cmd
= ATA_CMD_WRITE_DMA_FUA_EXT
;
613 fis
.cf_cmd
= ATA_CMD_WRITE_DMA_EXT
;
616 fis
.cf_cmd
= ATA_CMD_READ_DMA_EXT
;
619 fis
.cf_lba
= ex64lo(start_lba
) & 0x00FFFFFFL
;
620 fis
.cf_lba_exp
= ex64lo(rshift64(start_lba
, 24)) & 0x00FFFFFFL
;
621 fis
.cf_sec
= count
& 0xFF;
622 fis
.cf_sec_exp
= (count
>> 8) & 0xFF;
624 /* Start the command, and wait for it to complete or fail. */
625 port_set_cmd(ps
, cmd
, &fis
, NULL
/*packet*/, prdt
, nr_prds
, write
);
627 return port_exec(ps
, cmd
, ahci_transfer_timeout
);
630 /*===========================================================================*
632 *===========================================================================*/
633 static int gen_identify(struct port_state
*ps
, int blocking
)
635 /* Identify an ATA or ATAPI device. If the blocking flag is set, block
636 * until the command has completed; otherwise return immediately.
641 /* Set up a command, and a single PRD for the result. */
642 memset(&fis
, 0, sizeof(fis
));
644 if (ps
->flags
& FLAG_ATAPI
)
645 fis
.cf_cmd
= ATA_CMD_IDENTIFY_PACKET
;
647 fis
.cf_cmd
= ATA_CMD_IDENTIFY
;
649 prd
.vp_addr
= ps
->tmp_phys
;
650 prd
.vp_size
= ATA_ID_SIZE
;
652 /* Start the command, and possibly wait for the result. */
653 port_set_cmd(ps
, 0, &fis
, NULL
/*packet*/, &prd
, 1, FALSE
/*write*/);
656 return port_exec(ps
, 0, ahci_command_timeout
);
658 port_issue(ps
, 0, ahci_command_timeout
);
663 /*===========================================================================*
665 *===========================================================================*/
666 static int gen_flush_wcache(struct port_state
*ps
)
668 /* Flush the device's write cache.
672 /* The FLUSH CACHE command may not be supported by all (writable ATAPI)
675 if (!(ps
->flags
& FLAG_HAS_FLUSH
))
678 /* Use the FLUSH CACHE command for both ATA and ATAPI. We are not
679 * interested in the disk location of a failure, so there is no reason
680 * to use the ATA-only FLUSH CACHE EXT command. Either way, the command
681 * may indeed fail due to a disk error, in which case it should be
682 * repeated. For now, we shift this responsibility onto the caller.
684 memset(&fis
, 0, sizeof(fis
));
685 fis
.cf_cmd
= ATA_CMD_FLUSH_CACHE
;
687 /* Start the command, and wait for it to complete or fail.
688 * The flush command may take longer than regular I/O commands.
690 port_set_cmd(ps
, 0, &fis
, NULL
/*packet*/, NULL
/*prdt*/, 0,
693 return port_exec(ps
, 0, ahci_flush_timeout
);
696 /*===========================================================================*
698 *===========================================================================*/
699 static int gen_get_wcache(struct port_state
*ps
, int *val
)
701 /* Retrieve the status of the device's write cache.
705 /* Write caches are not mandatory. */
706 if (!(ps
->flags
& FLAG_HAS_WCACHE
))
709 /* Retrieve information about the device. */
710 if ((r
= gen_identify(ps
, TRUE
/*blocking*/)) != OK
)
713 /* Return the current setting. */
714 *val
= !!(((u16_t
*) ps
->tmp_base
)[ATA_ID_ENA0
] & ATA_ID_ENA0_WCACHE
);
719 /*===========================================================================*
721 *===========================================================================*/
722 static int gen_set_wcache(struct port_state
*ps
, int enable
)
724 /* Enable or disable the device's write cache.
729 /* Write caches are not mandatory. */
730 if (!(ps
->flags
& FLAG_HAS_WCACHE
))
733 /* Disabling the write cache causes a (blocking) cache flush. Cache
734 * flushes may take much longer than regular commands.
736 timeout
= enable
? ahci_command_timeout
: ahci_flush_timeout
;
738 /* Set up a command. */
739 memset(&fis
, 0, sizeof(fis
));
740 fis
.cf_cmd
= ATA_CMD_SET_FEATURES
;
741 fis
.cf_feat
= enable
? ATA_SF_EN_WCACHE
: ATA_SF_DI_WCACHE
;
743 /* Start the command, and wait for it to complete or fail. */
744 port_set_cmd(ps
, 0, &fis
, NULL
/*packet*/, NULL
/*prdt*/, 0,
747 return port_exec(ps
, 0, timeout
);
750 /*===========================================================================*
752 *===========================================================================*/
753 static vir_bytes
ct_set_fis(u8_t
*ct
, cmd_fis_t
*fis
, unsigned int tag
)
755 /* Fill in the Frame Information Structure part of a command table,
756 * and return the resulting FIS size (in bytes). We only support the
757 * command Register - Host to Device FIS type.
760 memset(ct
, 0, ATA_H2D_SIZE
);
761 ct
[ATA_FIS_TYPE
] = ATA_FIS_TYPE_H2D
;
762 ct
[ATA_H2D_FLAGS
] = ATA_H2D_FLAGS_C
;
763 ct
[ATA_H2D_CMD
] = fis
->cf_cmd
;
764 ct
[ATA_H2D_LBA_LOW
] = fis
->cf_lba
& 0xFF;
765 ct
[ATA_H2D_LBA_MID
] = (fis
->cf_lba
>> 8) & 0xFF;
766 ct
[ATA_H2D_LBA_HIGH
] = (fis
->cf_lba
>> 16) & 0xFF;
767 ct
[ATA_H2D_DEV
] = fis
->cf_dev
;
768 ct
[ATA_H2D_LBA_LOW_EXP
] = fis
->cf_lba_exp
& 0xFF;
769 ct
[ATA_H2D_LBA_MID_EXP
] = (fis
->cf_lba_exp
>> 8) & 0xFF;
770 ct
[ATA_H2D_LBA_HIGH_EXP
] = (fis
->cf_lba_exp
>> 16) & 0xFF;
771 ct
[ATA_H2D_CTL
] = fis
->cf_ctl
;
773 if (ATA_IS_FPDMA_CMD(fis
->cf_cmd
)) {
774 ct
[ATA_H2D_FEAT
] = fis
->cf_sec
;
775 ct
[ATA_H2D_FEAT_EXP
] = fis
->cf_sec_exp
;
776 ct
[ATA_H2D_SEC
] = tag
<< ATA_SEC_TAG_SHIFT
;
777 ct
[ATA_H2D_SEC_EXP
] = 0;
779 ct
[ATA_H2D_FEAT
] = fis
->cf_feat
;
780 ct
[ATA_H2D_FEAT_EXP
] = fis
->cf_feat_exp
;
781 ct
[ATA_H2D_SEC
] = fis
->cf_sec
;
782 ct
[ATA_H2D_SEC_EXP
] = fis
->cf_sec_exp
;
788 /*===========================================================================*
790 *===========================================================================*/
791 static void ct_set_packet(u8_t
*ct
, u8_t packet
[ATAPI_PACKET_SIZE
])
793 /* Fill in the packet part of a command table.
796 memcpy(&ct
[AHCI_CT_PACKET_OFF
], packet
, ATAPI_PACKET_SIZE
);
799 /*===========================================================================*
801 *===========================================================================*/
802 static void ct_set_prdt(u8_t
*ct
, prd_t
*prdt
, int nr_prds
)
804 /* Fill in the PRDT part of a command table.
809 p
= (u32_t
*) &ct
[AHCI_CT_PRDT_OFF
];
811 for (i
= 0; i
< nr_prds
; i
++, prdt
++) {
812 *p
++ = prdt
->vp_addr
;
815 *p
++ = prdt
->vp_size
- 1;
819 /*===========================================================================*
821 *===========================================================================*/
822 static void port_set_cmd(struct port_state
*ps
, int cmd
, cmd_fis_t
*fis
,
823 u8_t packet
[ATAPI_PACKET_SIZE
], prd_t
*prdt
, int nr_prds
, int write
)
825 /* Prepare the given command for execution, by constructing a command
826 * table and setting up a command list entry pointing to the table.
832 /* Set a port-specific flag that tells us if the command being
833 * processed is a NCQ command or not.
835 if (ATA_IS_FPDMA_CMD(fis
->cf_cmd
)) {
836 ps
->flags
|= FLAG_NCQ_MODE
;
838 assert(!ps
->pend_mask
);
839 ps
->flags
&= ~FLAG_NCQ_MODE
;
842 /* Construct a command table, consisting of a command FIS, optionally
843 * a packet, and optionally a number of PRDs (making up the actual PRD
846 ct
= ps
->ct_base
[cmd
];
849 assert(nr_prds
<= NR_PRDS
);
851 size
= ct_set_fis(ct
, fis
, cmd
);
854 ct_set_packet(ct
, packet
);
856 ct_set_prdt(ct
, prdt
, nr_prds
);
858 /* Construct a command list entry, pointing to the command's table.
859 * Current assumptions: callers always provide a Register - Host to
860 * Device type FIS, and all non-NCQ commands are prefetchable.
862 cl
= &ps
->cl_base
[cmd
* AHCI_CL_ENTRY_DWORDS
];
864 memset(cl
, 0, AHCI_CL_ENTRY_SIZE
);
865 cl
[0] = (nr_prds
<< AHCI_CL_PRDTL_SHIFT
) |
866 ((!ATA_IS_FPDMA_CMD(fis
->cf_cmd
) &&
867 (nr_prds
> 0 || packet
!= NULL
)) ? AHCI_CL_PREFETCHABLE
: 0) |
868 (write
? AHCI_CL_WRITE
: 0) |
869 ((packet
!= NULL
) ? AHCI_CL_ATAPI
: 0) |
870 ((size
/ sizeof(u32_t
)) << AHCI_CL_CFL_SHIFT
);
871 cl
[2] = ps
->ct_phys
[cmd
];
874 /*===========================================================================*
876 *===========================================================================*/
877 static void port_finish_cmd(struct port_state
*ps
, int cmd
, int result
)
879 /* Finish a command that has either succeeded or failed.
882 assert(cmd
< ps
->queue_depth
);
884 dprintf(V_REQ
, ("%s: command %d %s\n", ahci_portname(ps
),
885 cmd
, (result
== RESULT_SUCCESS
) ? "succeeded" : "failed"));
887 /* Update the command result, and clear it from the pending list. */
888 ps
->cmd_info
[cmd
].result
= result
;
890 assert(ps
->pend_mask
& (1 << cmd
));
891 ps
->pend_mask
&= ~(1 << cmd
);
893 /* Wake up the thread, unless it is the main thread. This can happen
894 * during initialization, as the gen_identify function is called by the
895 * main thread itself.
897 if (ps
->state
!= STATE_WAIT_ID
)
898 blockdriver_mt_wakeup(ps
->cmd_info
[cmd
].tid
);
901 /*===========================================================================*
903 *===========================================================================*/
904 static void port_fail_cmds(struct port_state
*ps
)
906 /* Fail all ongoing commands for a device.
910 for (i
= 0; ps
->pend_mask
!= 0 && i
< ps
->queue_depth
; i
++)
911 if (ps
->pend_mask
& (1 << i
))
912 port_finish_cmd(ps
, i
, RESULT_FAILURE
);
915 /*===========================================================================*
917 *===========================================================================*/
918 static void port_check_cmds(struct port_state
*ps
)
920 /* Check what commands have completed, and finish them.
925 /* See which commands have completed. */
926 if (ps
->flags
& FLAG_NCQ_MODE
)
927 mask
= port_read(ps
, AHCI_PORT_SACT
);
929 mask
= port_read(ps
, AHCI_PORT_CI
);
931 /* Wake up threads corresponding to completed commands. */
932 done
= ps
->pend_mask
& ~mask
;
934 for (i
= 0; i
< ps
->queue_depth
; i
++)
936 port_finish_cmd(ps
, i
, RESULT_SUCCESS
);
939 /*===========================================================================*
941 *===========================================================================*/
942 static int port_find_cmd(struct port_state
*ps
)
944 /* Find a free command tag to queue the current request.
948 for (i
= 0; i
< ps
->queue_depth
; i
++)
949 if (!(ps
->pend_mask
& (1 << i
)))
952 /* We should always be able to find a free slot, since a thread runs
953 * only when it is free, and thus, only because a slot is available.
955 assert(i
< ps
->queue_depth
);
960 /*===========================================================================*
962 *===========================================================================*/
963 static int port_get_padbuf(struct port_state
*ps
, size_t size
)
965 /* Make available a temporary buffer for use by this port. Enlarge the
966 * previous buffer if applicable and necessary, potentially changing
967 * its physical address.
970 if (ps
->pad_base
!= NULL
&& ps
->pad_size
>= size
)
973 if (ps
->pad_base
!= NULL
)
974 free_contig(ps
->pad_base
, ps
->pad_size
);
977 ps
->pad_base
= alloc_contig(ps
->pad_size
, 0, &ps
->pad_phys
);
979 if (ps
->pad_base
== NULL
) {
980 dprintf(V_ERR
, ("%s: unable to allocate a padding buffer of "
981 "size %lu\n", ahci_portname(ps
),
982 (unsigned long) size
));
987 dprintf(V_INFO
, ("%s: allocated padding buffer of size %lu\n",
988 ahci_portname(ps
), (unsigned long) size
));
993 /*===========================================================================*
995 *===========================================================================*/
996 static int sum_iovec(struct port_state
*ps
, endpoint_t endpt
,
997 iovec_s_t
*iovec
, int nr_req
, vir_bytes
*total
)
999 /* Retrieve the total size of the given I/O vector. Check for alignment
1000 * requirements along the way. Return OK (and the total request size)
1003 vir_bytes size
, bytes
;
1008 for (i
= 0; i
< nr_req
; i
++) {
1009 size
= iovec
[i
].iov_size
;
1011 if (size
== 0 || (size
& 1) || size
> LONG_MAX
) {
1012 dprintf(V_ERR
, ("%s: bad size %lu in iovec from %d\n",
1013 ahci_portname(ps
), size
, endpt
));
1019 if (bytes
> LONG_MAX
) {
1020 dprintf(V_ERR
, ("%s: iovec size overflow from %d\n",
1021 ahci_portname(ps
), endpt
));
1030 /*===========================================================================*
1032 *===========================================================================*/
1033 static int setup_prdt(struct port_state
*ps
, endpoint_t endpt
,
1034 iovec_s_t
*iovec
, int nr_req
, vir_bytes size
, vir_bytes lead
,
1035 int write
, prd_t
*prdt
)
1037 /* Convert (the first part of) an I/O vector to a Physical Region
1038 * Descriptor Table describing array that can later be used to set the
1039 * command's real PRDT. The resulting table as a whole should be
1040 * sector-aligned; leading and trailing local buffers may have to be
1041 * used for padding as appropriate. Return the number of PRD entries,
1042 * or a negative error code.
1044 struct vumap_vir vvec
[NR_PRDS
];
1045 size_t bytes
, trail
;
1046 int i
, r
, pcount
, nr_prds
= 0;
1049 /* Allocate a buffer for the data we don't want. */
1050 if ((r
= port_get_padbuf(ps
, ps
->sector_size
)) != OK
)
1053 prdt
[nr_prds
].vp_addr
= ps
->pad_phys
;
1054 prdt
[nr_prds
].vp_size
= lead
;
1058 /* The sum of lead, size, trail has to be sector-aligned. */
1059 trail
= (ps
->sector_size
- (lead
+ size
)) % ps
->sector_size
;
1061 /* Get the physical addresses of the given buffers. */
1062 for (i
= 0; i
< nr_req
&& size
> 0; i
++) {
1063 bytes
= MIN(iovec
[i
].iov_size
, size
);
1066 vvec
[i
].vv_addr
= (vir_bytes
) iovec
[i
].iov_grant
;
1068 vvec
[i
].vv_grant
= iovec
[i
].iov_grant
;
1070 vvec
[i
].vv_size
= bytes
;
1077 if ((r
= sys_vumap(endpt
, vvec
, i
, 0, write
? VUA_READ
: VUA_WRITE
,
1078 &prdt
[nr_prds
], &pcount
)) != OK
) {
1079 dprintf(V_ERR
, ("%s: unable to map memory from %d (%d)\n",
1080 ahci_portname(ps
), endpt
, r
));
1084 assert(pcount
> 0 && pcount
<= i
);
1086 /* Make sure all buffers are physically contiguous and word-aligned. */
1087 for (i
= 0; i
< pcount
; i
++) {
1088 if (vvec
[i
].vv_size
!= prdt
[nr_prds
].vp_size
) {
1089 dprintf(V_ERR
, ("%s: non-contiguous memory from %d\n",
1090 ahci_portname(ps
), endpt
));
1094 if (prdt
[nr_prds
].vp_addr
& 1) {
1095 dprintf(V_ERR
, ("%s: bad physical address from %d\n",
1096 ahci_portname(ps
), endpt
));
1104 assert(nr_prds
< NR_PRDS
);
1105 prdt
[nr_prds
].vp_addr
= ps
->pad_phys
+ lead
;
1106 prdt
[nr_prds
].vp_size
= trail
;
1113 /*===========================================================================*
1115 *===========================================================================*/
1116 static ssize_t
port_transfer(struct port_state
*ps
, u64_t pos
, u64_t eof
,
1117 endpoint_t endpt
, iovec_s_t
*iovec
, int nr_req
, int write
, int flags
)
1119 /* Perform an I/O transfer on a port.
1121 prd_t prdt
[NR_PRDS
];
1122 vir_bytes size
, lead
;
1123 unsigned int count
, nr_prds
;
1127 /* Get the total request size from the I/O vector. */
1128 if ((r
= sum_iovec(ps
, endpt
, iovec
, nr_req
, &size
)) != OK
)
1131 dprintf(V_REQ
, ("%s: %s for %lu bytes at pos %08lx%08lx\n",
1132 ahci_portname(ps
), write
? "write" : "read", size
,
1133 ex64hi(pos
), ex64lo(pos
)));
1135 assert(ps
->state
== STATE_GOOD_DEV
);
1136 assert(ps
->flags
& FLAG_HAS_MEDIUM
);
1137 assert(ps
->sector_size
> 0);
1139 /* Limit the maximum size of a single transfer.
1140 * See the comments at the top of this file for details.
1142 if (size
> MAX_TRANSFER
)
1143 size
= MAX_TRANSFER
;
1145 /* If necessary, reduce the request size so that the request does not
1146 * extend beyond the end of the partition. The caller already
1147 * guarantees that the starting position lies within the partition.
1149 if (cmp64(add64ul(pos
, size
), eof
) >= 0)
1150 size
= (vir_bytes
) diff64(eof
, pos
);
1152 start_lba
= div64(pos
, cvu64(ps
->sector_size
));
1153 lead
= rem64u(pos
, ps
->sector_size
);
1154 count
= (lead
+ size
+ ps
->sector_size
- 1) / ps
->sector_size
;
1156 /* Position must be word-aligned for read requests, and sector-aligned
1157 * for write requests. We do not support read-modify-write for writes.
1159 if ((lead
& 1) || (write
&& lead
!= 0)) {
1160 dprintf(V_ERR
, ("%s: unaligned position from %d\n",
1161 ahci_portname(ps
), endpt
));
1165 /* Write requests must be sector-aligned. Word alignment of the size is
1166 * already guaranteed by sum_iovec().
1168 if (write
&& (size
% ps
->sector_size
) != 0) {
1169 dprintf(V_ERR
, ("%s: unaligned size %lu from %d\n",
1170 ahci_portname(ps
), size
, endpt
));
1174 /* Create a vector of physical addresses and sizes for the transfer. */
1175 nr_prds
= r
= setup_prdt(ps
, endpt
, iovec
, nr_req
, size
, lead
, write
,
1178 if (r
< 0) return r
;
1180 /* Perform the actual transfer. */
1181 cmd
= port_find_cmd(ps
);
1183 if (ps
->flags
& FLAG_ATAPI
)
1184 r
= atapi_transfer(ps
, cmd
, start_lba
, count
, write
, prdt
,
1187 r
= ata_transfer(ps
, cmd
, start_lba
, count
, write
,
1188 !!(flags
& BDEV_FORCEWRITE
), prdt
, nr_prds
);
1190 if (r
!= OK
) return r
;
1195 /*===========================================================================*
1197 *===========================================================================*/
1198 static void port_hardreset(struct port_state
*ps
)
1200 /* Perform a port-level (hard) reset on the given port.
1203 port_write(ps
, AHCI_PORT_SCTL
, AHCI_PORT_SCTL_DET_INIT
);
1205 micro_delay(COMRESET_DELAY
* 1000); /* COMRESET_DELAY is in ms */
1207 port_write(ps
, AHCI_PORT_SCTL
, AHCI_PORT_SCTL_DET_NONE
);
1210 /*===========================================================================*
1212 *===========================================================================*/
1213 static void port_start(struct port_state
*ps
)
1215 /* Start the given port, allowing for the execution of commands and the
1216 * transfer of data on that port.
1220 /* Reset status registers. */
1221 port_write(ps
, AHCI_PORT_SERR
, ~0);
1222 port_write(ps
, AHCI_PORT_IS
, ~0);
1224 /* Start the port. */
1225 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1226 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_ST
);
1228 dprintf(V_INFO
, ("%s: started\n", ahci_portname(ps
)));
1231 /*===========================================================================*
1233 *===========================================================================*/
1234 static void port_restart(struct port_state
*ps
)
1236 /* Restart a port after a fatal error has occurred.
1240 /* Fail all outstanding commands. */
1243 /* Stop the port. */
1244 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1245 port_write(ps
, AHCI_PORT_CMD
, cmd
& ~AHCI_PORT_CMD_ST
);
1247 SPIN_UNTIL(!(port_read(ps
, AHCI_PORT_CMD
) & AHCI_PORT_CMD_CR
),
1250 /* Reset status registers. */
1251 port_write(ps
, AHCI_PORT_SERR
, ~0);
1252 port_write(ps
, AHCI_PORT_IS
, ~0);
1254 /* If the BSY and/or DRQ flags are set, reset the port. */
1255 if (port_read(ps
, AHCI_PORT_TFD
) &
1256 (AHCI_PORT_TFD_STS_BSY
| AHCI_PORT_TFD_STS_DRQ
)) {
1258 dprintf(V_ERR
, ("%s: port reset\n", ahci_portname(ps
)));
1260 /* To keep this driver simple, we do not transparently recover
1261 * ongoing requests. Instead, we mark the failing device as
1262 * disconnected, and reset it. If the reset succeeds, the
1263 * device (or, perhaps, eventually, another device) will come
1264 * back up. Any current and future requests to this port will
1265 * be failed until the port is fully closed and reopened.
1267 port_disconnect(ps
);
1269 /* Trigger a port reset. */
1275 /* Start the port. */
1276 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1277 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_ST
);
1279 dprintf(V_INFO
, ("%s: restarted\n", ahci_portname(ps
)));
1282 /*===========================================================================*
1284 *===========================================================================*/
1285 static void port_stop(struct port_state
*ps
)
1287 /* Stop the given port, if not already stopped.
1291 /* Disable interrupts. */
1292 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_NONE
);
1294 /* Stop the port. */
1295 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1297 if (cmd
& (AHCI_PORT_CMD_CR
| AHCI_PORT_CMD_ST
)) {
1298 cmd
&= ~(AHCI_PORT_CMD_CR
| AHCI_PORT_CMD_ST
);
1300 port_write(ps
, AHCI_PORT_CMD
, cmd
);
1302 SPIN_UNTIL(!(port_read(ps
, AHCI_PORT_CMD
) & AHCI_PORT_CMD_CR
),
1305 dprintf(V_INFO
, ("%s: stopped\n", ahci_portname(ps
)));
1308 /* Reset status registers. */
1309 port_write(ps
, AHCI_PORT_SERR
, ~0);
1310 port_write(ps
, AHCI_PORT_IS
, ~0);
1313 /*===========================================================================*
1315 *===========================================================================*/
1316 static void port_sig_check(struct port_state
*ps
)
1318 /* Check whether the device's signature has become available yet, and
1319 * if so, start identifying the device.
1323 tfd
= port_read(ps
, AHCI_PORT_TFD
);
1325 /* Wait for the BSY flag to be (set and then) cleared first. Note that
1326 * clearing it only happens when PxCMD.FRE is set, which is why we
1327 * start the port before starting the signature wait cycle.
1329 if ((tfd
& AHCI_PORT_TFD_STS_BSY
) || tfd
== AHCI_PORT_TFD_STS_INIT
) {
1330 /* Try for a while before giving up. It may take seconds. */
1333 set_timer(&ps
->cmd_info
[0].timer
, ahci_sig_timeout
,
1334 port_timeout
, BUILD_ARG(ps
- port_state
, 0));
1338 /* If no device is actually attached, disable the port. This
1339 * value is also the initial value of the register, before the
1340 * BSY flag gets set, so only check this condition on timeout.
1342 if (tfd
== AHCI_PORT_TFD_STS_INIT
) {
1343 dprintf(V_DEV
, ("%s: no device at this port\n",
1344 ahci_portname(ps
)));
1348 ps
->state
= STATE_BAD_DEV
;
1349 ps
->flags
&= ~FLAG_BUSY
;
1356 dprintf(V_ERR
, ("%s: timeout waiting for signature\n",
1357 ahci_portname(ps
)));
1360 /* Check the port's signature. We only support the normal ATA and ATAPI
1361 * signatures. We ignore devices reporting anything else.
1363 sig
= port_read(ps
, AHCI_PORT_SIG
);
1365 if (sig
!= ATA_SIG_ATA
&& sig
!= ATA_SIG_ATAPI
) {
1366 dprintf(V_ERR
, ("%s: unsupported signature (%08x)\n",
1367 ahci_portname(ps
), sig
));
1371 ps
->state
= STATE_BAD_DEV
;
1372 ps
->flags
&= ~FLAG_BUSY
;
1377 /* Clear all state flags except the busy flag, which may be relevant if
1378 * a BDEV_OPEN call is waiting for the device to become ready; the
1379 * barrier flag, which prevents access to the device until it is
1380 * completely closed and (re)opened; and, the thread suspension flag.
1382 ps
->flags
&= (FLAG_BUSY
| FLAG_BARRIER
| FLAG_SUSPENDED
);
1384 if (sig
== ATA_SIG_ATAPI
)
1385 ps
->flags
|= FLAG_ATAPI
;
1387 /* Attempt to identify the device. Do this using continuation, because
1388 * we may already be called from port_wait() here, and could end up
1389 * confusing the timer expiration procedure.
1391 ps
->state
= STATE_WAIT_ID
;
1392 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_MASK
);
1394 (void) gen_identify(ps
, FALSE
/*blocking*/);
1397 /*===========================================================================*
1399 *===========================================================================*/
1400 static void print_string(u16_t
*buf
, int start
, int end
)
1402 /* Print a string that is stored as little-endian words and padded with
1407 while (end
>= start
&& buf
[end
] == 0x2020) end
--;
1409 if (end
>= start
&& (buf
[end
] & 0xFF) == 0x20) end
--, last
++;
1411 for (i
= start
; i
<= end
; i
++)
1412 printf("%c%c", buf
[i
] >> 8, buf
[i
] & 0xFF);
1415 printf("%c", buf
[i
] >> 8);
1418 /*===========================================================================*
1420 *===========================================================================*/
1421 static void port_id_check(struct port_state
*ps
, int success
)
1423 /* The device identification command has either completed or timed out.
1424 * Decide whether this device is usable or not, and store some of its
1429 assert(ps
->state
== STATE_WAIT_ID
);
1430 assert(!(ps
->flags
& FLAG_BUSY
)); /* unset by callers */
1432 cancel_timer(&ps
->cmd_info
[0].timer
);
1436 ("%s: unable to identify\n", ahci_portname(ps
)));
1438 /* If the identify command itself succeeded, check the results and
1439 * store some properties.
1442 buf
= (u16_t
*) ps
->tmp_base
;
1444 if (ps
->flags
& FLAG_ATAPI
)
1445 success
= atapi_id_check(ps
, buf
);
1447 success
= ata_id_check(ps
, buf
);
1450 /* If the device has not been identified successfully, mark it as an
1456 ps
->state
= STATE_BAD_DEV
;
1457 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PRCE
);
1462 /* The device has been identified successfully, and hence usable. */
1463 ps
->state
= STATE_GOOD_DEV
;
1465 /* Print some information about the device. */
1466 if (ahci_verbose
>= V_INFO
) {
1467 printf("%s: ATA%s, ", ahci_portname(ps
),
1468 (ps
->flags
& FLAG_ATAPI
) ? "PI" : "");
1469 print_string(buf
, 27, 46);
1470 if (ahci_verbose
>= V_DEV
) {
1472 print_string(buf
, 10, 19);
1474 print_string(buf
, 23, 26);
1478 if (ps
->flags
& FLAG_HAS_MEDIUM
)
1479 printf(", %u byte sectors, %lu MB size",
1480 ps
->sector_size
, div64u(mul64(ps
->lba_count
,
1481 cvu64(ps
->sector_size
)), 1024*1024));
1487 /*===========================================================================*
1489 *===========================================================================*/
1490 static void port_connect(struct port_state
*ps
)
1492 /* A device has been found to be attached to this port. Start the port,
1493 * and do timed polling for its signature to become available.
1496 dprintf(V_INFO
, ("%s: device connected\n", ahci_portname(ps
)));
1498 if (ps
->state
== STATE_SPIN_UP
)
1499 cancel_timer(&ps
->cmd_info
[0].timer
);
1503 ps
->state
= STATE_WAIT_SIG
;
1504 ps
->left
= ahci_sig_checks
;
1506 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PRCE
);
1508 /* Do the first check immediately; who knows, we may get lucky. */
1512 /*===========================================================================*
1514 *===========================================================================*/
1515 static void port_disconnect(struct port_state
*ps
)
1517 /* The device has detached from this port. Stop the port if necessary.
1520 dprintf(V_INFO
, ("%s: device disconnected\n", ahci_portname(ps
)));
1522 if (ps
->state
!= STATE_BAD_DEV
)
1525 ps
->state
= STATE_NO_DEV
;
1526 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PRCE
);
1527 ps
->flags
&= ~FLAG_BUSY
;
1529 /* Fail any ongoing request. The caller may already have done this. */
1532 /* Block any further access until the device is completely closed and
1533 * reopened. This prevents arbitrary I/O to a newly plugged-in device
1534 * without upper layers noticing.
1536 ps
->flags
|= FLAG_BARRIER
;
1538 /* Inform the blockdriver library to reduce the number of threads. */
1539 blockdriver_mt_set_workers(ps
->device
, 1);
1542 /*===========================================================================*
1544 *===========================================================================*/
1545 static void port_intr(struct port_state
*ps
)
1547 /* Process an interrupt on this port.
1552 if (ps
->state
== STATE_NO_PORT
) {
1553 dprintf(V_ERR
, ("%s: interrupt for invalid port!\n",
1554 ahci_portname(ps
)));
1559 smask
= port_read(ps
, AHCI_PORT_IS
);
1560 emask
= smask
& port_read(ps
, AHCI_PORT_IE
);
1562 /* Clear the interrupt flags that we saw were set. */
1563 port_write(ps
, AHCI_PORT_IS
, smask
);
1565 dprintf(V_REQ
, ("%s: interrupt (%08x)\n", ahci_portname(ps
), smask
));
1567 /* Check if any commands have completed. */
1568 port_check_cmds(ps
);
1570 if (emask
& AHCI_PORT_IS_PRCS
) {
1571 /* Clear the N diagnostics bit to clear this interrupt. */
1572 port_write(ps
, AHCI_PORT_SERR
, AHCI_PORT_SERR_DIAG_N
);
1574 connected
= (port_read(ps
, AHCI_PORT_SSTS
) &
1575 AHCI_PORT_SSTS_DET_MASK
) == AHCI_PORT_SSTS_DET_PHY
;
1577 switch (ps
->state
) {
1579 case STATE_GOOD_DEV
:
1580 case STATE_WAIT_SIG
:
1582 port_disconnect(ps
);
1591 } else if (smask
& AHCI_PORT_IS_MASK
) {
1592 /* We assume that any other interrupt indicates command
1593 * completion or (command or device) failure. Unfortunately, if
1594 * an NCQ command failed, we cannot easily determine which one
1595 * it was. For that reason, after completing all successfully
1596 * finished commands (above), we fail all other outstanding
1597 * commands and restart the port. This can possibly be improved
1598 * later by obtaining per-command status results from the HBA.
1601 /* If we were waiting for ID verification, check now. */
1602 if (ps
->state
== STATE_WAIT_ID
) {
1603 ps
->flags
&= ~FLAG_BUSY
;
1604 port_id_check(ps
, !(port_read(ps
, AHCI_PORT_TFD
) &
1605 (AHCI_PORT_TFD_STS_ERR
|
1606 AHCI_PORT_TFD_STS_DF
)));
1609 /* Check now for failure. There are fatal failures, and there
1610 * are failures that set the TFD.STS.ERR field using a D2H
1611 * FIS. In both cases, we just restart the port, failing all
1612 * commands in the process.
1614 if ((port_read(ps
, AHCI_PORT_TFD
) &
1615 (AHCI_PORT_TFD_STS_ERR
| AHCI_PORT_TFD_STS_DF
)) ||
1616 (smask
& AHCI_PORT_IS_RESTART
)) {
1622 /*===========================================================================*
1624 *===========================================================================*/
1625 static void port_timeout(struct timer
*tp
)
1627 /* A timeout has occurred on this port. Figure out what the timeout is
1628 * for, and take appropriate action.
1630 struct port_state
*ps
;
1633 port
= GET_PORT(tmr_arg(tp
)->ta_int
);
1634 cmd
= GET_TAG(tmr_arg(tp
)->ta_int
);
1636 assert(port
>= 0 && port
< hba_state
.nr_ports
);
1638 ps
= &port_state
[port
];
1640 /* Regardless of the outcome of this timeout, wake up the thread if it
1641 * is suspended. This applies only during the initialization.
1643 if (ps
->flags
& FLAG_SUSPENDED
) {
1645 blockdriver_mt_wakeup(ps
->cmd_info
[0].tid
);
1648 /* If detection of a device after startup timed out, give up on initial
1649 * detection and only look for hot plug events from now on.
1651 if (ps
->state
== STATE_SPIN_UP
) {
1652 /* There is one exception: for braindead controllers that don't
1653 * generate the right interrupts (cough, VirtualBox), we do an
1654 * explicit check to see if a device is connected after all.
1655 * Later hot-(un)plug events will not be detected in this case.
1657 if ((port_read(ps
, AHCI_PORT_SSTS
) &
1658 AHCI_PORT_SSTS_DET_MASK
) == AHCI_PORT_SSTS_DET_PHY
) {
1659 dprintf(V_INFO
, ("%s: no device connection event\n",
1660 ahci_portname(ps
)));
1665 dprintf(V_INFO
, ("%s: spin-up timeout\n",
1666 ahci_portname(ps
)));
1668 /* If the busy flag is set, a BDEV_OPEN request is
1669 * waiting for the detection to finish; clear the busy
1670 * flag to return an error to the caller.
1672 ps
->state
= STATE_NO_DEV
;
1673 ps
->flags
&= ~FLAG_BUSY
;
1679 /* If a device has been connected and we are waiting for its signature
1680 * to become available, check now.
1682 if (ps
->state
== STATE_WAIT_SIG
) {
1688 /* The only case where the busy flag will be set after this is for a
1689 * failed identify operation. During this operation, the port will be
1690 * in the WAIT_ID state. In that case, we clear the BUSY flag, fail the
1691 * command by setting its state, restart port and finish identify op.
1693 if (ps
->flags
& FLAG_BUSY
) {
1694 assert(ps
->state
== STATE_WAIT_ID
);
1695 ps
->flags
&= ~FLAG_BUSY
;
1698 dprintf(V_ERR
, ("%s: timeout\n", ahci_portname(ps
)));
1700 /* Restart the port, failing all current commands. */
1703 /* Finish up the identify operation. */
1704 if (ps
->state
== STATE_WAIT_ID
)
1705 port_id_check(ps
, FALSE
);
1708 /*===========================================================================*
1710 *===========================================================================*/
1711 static void port_wait(struct port_state
*ps
)
1713 /* Suspend the current thread until the given port is no longer busy,
1714 * due to either command completion or timeout.
1717 ps
->flags
|= FLAG_SUSPENDED
;
1719 while (ps
->flags
& FLAG_BUSY
)
1720 blockdriver_mt_sleep();
1722 ps
->flags
&= ~FLAG_SUSPENDED
;
1725 /*===========================================================================*
1727 *===========================================================================*/
1728 static void port_issue(struct port_state
*ps
, int cmd
, clock_t timeout
)
1730 /* Issue a command to the port, and set a timer to trigger a timeout
1731 * if the command takes too long to complete.
1734 /* Set the corresponding NCQ command bit, if applicable. */
1735 if (ps
->flags
& FLAG_HAS_NCQ
)
1736 port_write(ps
, AHCI_PORT_SACT
, 1 << cmd
);
1738 /* Make sure that the compiler does not delay any previous write
1739 * operations until after the write to the command issue register.
1743 /* Tell the controller that a new command is ready. */
1744 port_write(ps
, AHCI_PORT_CI
, 1 << cmd
);
1746 /* Update pending commands. */
1747 ps
->pend_mask
|= 1 << cmd
;
1749 /* Set a timer in case the command does not complete at all. */
1750 set_timer(&ps
->cmd_info
[cmd
].timer
, timeout
, port_timeout
,
1751 BUILD_ARG(ps
- port_state
, cmd
));
1754 /*===========================================================================*
1756 *===========================================================================*/
1757 static int port_exec(struct port_state
*ps
, int cmd
, clock_t timeout
)
1759 /* Execute a command on a port, wait for the command to complete or for
1760 * a timeout, and return whether the command succeeded or not.
1763 port_issue(ps
, cmd
, timeout
);
1765 /* Put the thread to sleep until a timeout or a command completion
1766 * happens. Earlier, we used to call port_wait which set the suspended
1767 * flag. We now abandon it since the flag has to work on a per-thread,
1768 * and hence per-tag basis and not on a per-port basis. Instead, we
1769 * retain that call only to defer open calls during device/driver
1770 * initialization. Instead, we call sleep here directly. Before
1771 * sleeping, we register the thread.
1773 ps
->cmd_info
[cmd
].tid
= blockdriver_mt_get_tid();
1775 blockdriver_mt_sleep();
1777 /* Cancelling a timer that just triggered, does no harm. */
1778 cancel_timer(&ps
->cmd_info
[cmd
].timer
);
1780 assert(!(ps
->flags
& FLAG_BUSY
));
1782 dprintf(V_REQ
, ("%s: end of command -- %s\n", ahci_portname(ps
),
1783 (ps
->cmd_info
[cmd
].result
== RESULT_FAILURE
) ?
1784 "failure" : "success"));
1786 if (ps
->cmd_info
[cmd
].result
== RESULT_FAILURE
)
1792 /*===========================================================================*
1794 *===========================================================================*/
1795 static void port_alloc(struct port_state
*ps
)
1797 /* Allocate memory for the given port, and enable FIS receipt. We try
1798 * to cram everything into one 4K-page in order to limit memory usage
1799 * as much as possible. More memory may be allocated on demand later,
1800 * but allocation failure should be fatal only here. Note that we do
1801 * not allocate memory for sector padding here, because we do not know
1802 * the device's sector size yet.
1804 size_t fis_off
, tmp_off
, ct_off
; int i
;
1805 size_t ct_offs
[NR_CMDS
];
1808 fis_off
= AHCI_CL_SIZE
+ AHCI_FIS_SIZE
- 1;
1809 fis_off
-= fis_off
% AHCI_FIS_SIZE
;
1811 tmp_off
= fis_off
+ AHCI_FIS_SIZE
+ AHCI_TMP_ALIGN
- 1;
1812 tmp_off
-= tmp_off
% AHCI_TMP_ALIGN
;
1814 /* Allocate memory for all the commands. */
1815 ct_off
= tmp_off
+ AHCI_TMP_SIZE
;
1816 for (i
= 0; i
< NR_CMDS
; i
++) {
1817 ct_off
+= AHCI_CT_ALIGN
- 1;
1818 ct_off
-= ct_off
% AHCI_CT_ALIGN
;
1819 ct_offs
[i
] = ct_off
;
1820 ps
->mem_size
= ct_off
+ AHCI_CT_SIZE
;
1821 ct_off
= ps
->mem_size
;
1824 ps
->mem_base
= alloc_contig(ps
->mem_size
, AC_ALIGN4K
, &ps
->mem_phys
);
1825 if (ps
->mem_base
== NULL
)
1826 panic("unable to allocate port memory");
1827 memset(ps
->mem_base
, 0, ps
->mem_size
);
1829 ps
->cl_base
= (u32_t
*) ps
->mem_base
;
1830 ps
->cl_phys
= ps
->mem_phys
;
1831 assert(ps
->cl_phys
% AHCI_CL_SIZE
== 0);
1833 ps
->fis_base
= (u32_t
*) (ps
->mem_base
+ fis_off
);
1834 ps
->fis_phys
= ps
->mem_phys
+ fis_off
;
1835 assert(ps
->fis_phys
% AHCI_FIS_SIZE
== 0);
1837 ps
->tmp_base
= (u8_t
*) (ps
->mem_base
+ tmp_off
);
1838 ps
->tmp_phys
= ps
->mem_phys
+ tmp_off
;
1839 assert(ps
->tmp_phys
% AHCI_TMP_ALIGN
== 0);
1841 for (i
= 0; i
< NR_CMDS
; i
++) {
1842 ps
->ct_base
[i
] = ps
->mem_base
+ ct_offs
[i
];
1843 ps
->ct_phys
[i
] = ps
->mem_phys
+ ct_offs
[i
];
1844 assert(ps
->ct_phys
[i
] % AHCI_CT_ALIGN
== 0);
1847 /* Tell the controller about some of the physical addresses. */
1848 port_write(ps
, AHCI_PORT_FBU
, 0);
1849 port_write(ps
, AHCI_PORT_FB
, ps
->fis_phys
);
1851 port_write(ps
, AHCI_PORT_CLBU
, 0);
1852 port_write(ps
, AHCI_PORT_CLB
, ps
->cl_phys
);
1854 /* Enable FIS receive. */
1855 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1856 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_FRE
);
1858 ps
->pad_base
= NULL
;
1862 /*===========================================================================*
1864 *===========================================================================*/
1865 static void port_free(struct port_state
*ps
)
1867 /* Disable FIS receipt for the given port, and free previously
1873 /* Disable FIS receive. */
1874 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1876 if (cmd
& (AHCI_PORT_CMD_FR
| AHCI_PORT_CMD_FRE
)) {
1877 port_write(ps
, AHCI_PORT_CMD
, cmd
& ~AHCI_PORT_CMD_FRE
);
1879 SPIN_UNTIL(!(port_read(ps
, AHCI_PORT_CMD
) & AHCI_PORT_CMD_FR
),
1883 if (ps
->pad_base
!= NULL
)
1884 free_contig(ps
->pad_base
, ps
->pad_size
);
1886 /* The first command table is part of the primary memory page. */
1887 for (i
= 1; i
< hba_state
.nr_cmds
; i
++)
1888 if (ps
->ct_base
[i
] != NULL
)
1889 free_contig(ps
->ct_base
[i
], AHCI_CT_SIZE
);
1891 free_contig(ps
->mem_base
, ps
->mem_size
);
1894 /*===========================================================================*
1896 *===========================================================================*/
1897 static void port_init(struct port_state
*ps
)
1899 /* Initialize the given port.
1904 /* Initialize the port state structure. */
1905 ps
->queue_depth
= 1;
1906 ps
->state
= STATE_SPIN_UP
;
1907 ps
->flags
= FLAG_BUSY
;
1908 ps
->sector_size
= 0;
1911 for (i
= 0; i
< NR_CMDS
; i
++)
1912 init_timer(&ps
->cmd_info
[i
].timer
);
1914 ps
->reg
= (u32_t
*) ((u8_t
*) hba_state
.base
+
1915 AHCI_MEM_BASE_SIZE
+ AHCI_MEM_PORT_SIZE
* (ps
- port_state
));
1917 /* Allocate memory for the port. */
1920 /* Just listen for device status change events for now. */
1921 port_write(ps
, AHCI_PORT_IE
, AHCI_PORT_IE_PRCE
);
1923 /* Enable device spin-up for HBAs that support staggered spin-up.
1924 * This is a no-op for HBAs that do not support it.
1926 cmd
= port_read(ps
, AHCI_PORT_CMD
);
1927 port_write(ps
, AHCI_PORT_CMD
, cmd
| AHCI_PORT_CMD_SUD
);
1929 /* Trigger a port reset. */
1932 set_timer(&ps
->cmd_info
[0].timer
, ahci_spinup_timeout
,
1933 port_timeout
, BUILD_ARG(ps
- port_state
, 0));
1936 /*===========================================================================*
1938 *===========================================================================*/
1939 static int ahci_probe(int skip
)
1941 /* Find a matching PCI device.
1948 r
= pci_first_dev(&devind
, &vid
, &did
);
1953 r
= pci_next_dev(&devind
, &vid
, &did
);
1958 pci_reserve(devind
);
1963 /*===========================================================================*
1965 *===========================================================================*/
1966 static void ahci_reset(void)
1968 /* Reset the HBA. Do not enable AHCI mode afterwards.
1972 ghc
= hba_read(AHCI_HBA_GHC
);
1974 hba_write(AHCI_HBA_GHC
, ghc
| AHCI_HBA_GHC_AE
);
1976 hba_write(AHCI_HBA_GHC
, ghc
| AHCI_HBA_GHC_AE
| AHCI_HBA_GHC_HR
);
1978 SPIN_UNTIL(!(hba_read(AHCI_HBA_GHC
) & AHCI_HBA_GHC_HR
), RESET_DELAY
);
1980 if (hba_read(AHCI_HBA_GHC
) & AHCI_HBA_GHC_HR
)
1981 panic("unable to reset HBA");
1984 /*===========================================================================*
1986 *===========================================================================*/
1987 static void ahci_init(int devind
)
1989 /* Initialize the device.
1991 u32_t base
, size
, cap
, ghc
, mask
;
1992 int r
, port
, ioflag
;
1994 if ((r
= pci_get_bar(devind
, PCI_BAR_6
, &base
, &size
, &ioflag
)) != OK
)
1995 panic("unable to retrieve BAR: %d", r
);
1998 panic("invalid BAR type");
2000 /* There must be at least one port, and at most NR_PORTS ports. Limit
2001 * the actual total number of ports to the size of the exposed area.
2003 if (size
< AHCI_MEM_BASE_SIZE
+ AHCI_MEM_PORT_SIZE
)
2004 panic("HBA memory size too small: %lu", size
);
2006 size
= MIN(size
, AHCI_MEM_BASE_SIZE
+ AHCI_MEM_PORT_SIZE
* NR_PORTS
);
2008 hba_state
.nr_ports
= (size
- AHCI_MEM_BASE_SIZE
) / AHCI_MEM_PORT_SIZE
;
2010 /* Map the register area into local memory. */
2011 hba_state
.base
= (u32_t
*) vm_map_phys(SELF
, (void *) base
, size
);
2012 hba_state
.size
= size
;
2013 if (hba_state
.base
== MAP_FAILED
)
2014 panic("unable to map HBA memory");
2016 /* Retrieve, allocate and enable the controller's IRQ. */
2017 hba_state
.irq
= pci_attr_r8(devind
, PCI_ILR
);
2018 hba_state
.hook_id
= 0;
2020 if ((r
= sys_irqsetpolicy(hba_state
.irq
, 0, &hba_state
.hook_id
)) != OK
)
2021 panic("unable to register IRQ: %d", r
);
2023 if ((r
= sys_irqenable(&hba_state
.hook_id
)) != OK
)
2024 panic("unable to enable IRQ: %d", r
);
2026 /* Reset the HBA. */
2029 /* Enable AHCI and interrupts. */
2030 ghc
= hba_read(AHCI_HBA_GHC
);
2031 hba_write(AHCI_HBA_GHC
, ghc
| AHCI_HBA_GHC_AE
| AHCI_HBA_GHC_IE
);
2033 /* Limit the maximum number of commands to the controller's value. */
2034 /* Note that we currently use only one command anyway. */
2035 cap
= hba_read(AHCI_HBA_CAP
);
2036 hba_state
.has_ncq
= !!(cap
& AHCI_HBA_CAP_SNCQ
);
2037 hba_state
.nr_cmds
= MIN(NR_CMDS
,
2038 ((cap
>> AHCI_HBA_CAP_NCS_SHIFT
) & AHCI_HBA_CAP_NCS_MASK
) + 1);
2040 dprintf(V_INFO
, ("AHCI%u: HBA v%d.%d%d, %ld ports, %ld commands, "
2041 "%s queuing, IRQ %d\n",
2043 (int) (hba_read(AHCI_HBA_VS
) >> 16),
2044 (int) ((hba_read(AHCI_HBA_VS
) >> 8) & 0xFF),
2045 (int) (hba_read(AHCI_HBA_VS
) & 0xFF),
2046 ((cap
>> AHCI_HBA_CAP_NP_SHIFT
) & AHCI_HBA_CAP_NP_MASK
) + 1,
2047 ((cap
>> AHCI_HBA_CAP_NCS_SHIFT
) & AHCI_HBA_CAP_NCS_MASK
) + 1,
2048 hba_state
.has_ncq
? "supports" : "no", hba_state
.irq
));
2050 dprintf(V_INFO
, ("AHCI%u: CAP %08x, CAP2 %08x, PI %08x\n",
2051 ahci_instance
, cap
, hba_read(AHCI_HBA_CAP2
),
2052 hba_read(AHCI_HBA_PI
)));
2054 /* Initialize each of the implemented ports. We ignore CAP.NP. */
2055 mask
= hba_read(AHCI_HBA_PI
);
2057 for (port
= 0; port
< hba_state
.nr_ports
; port
++) {
2058 port_state
[port
].device
= NO_DEVICE
;
2059 port_state
[port
].state
= STATE_NO_PORT
;
2061 if (mask
& (1 << port
))
2062 port_init(&port_state
[port
]);
2066 /*===========================================================================*
2068 *===========================================================================*/
2069 static void ahci_stop(void)
2071 /* Disable AHCI, and clean up resources to the extent possible.
2073 struct port_state
*ps
;
2076 for (port
= 0; port
< hba_state
.nr_ports
; port
++) {
2077 ps
= &port_state
[port
];
2079 if (ps
->state
!= STATE_NO_PORT
) {
2088 if ((r
= vm_unmap_phys(SELF
, (void *) hba_state
.base
,
2089 hba_state
.size
)) != OK
)
2090 panic("unable to unmap HBA memory: %d", r
);
2092 if ((r
= sys_irqrmpolicy(&hba_state
.hook_id
)) != OK
)
2093 panic("unable to deregister IRQ: %d", r
);
2096 /*===========================================================================*
2098 *===========================================================================*/
2099 static void ahci_alarm(clock_t stamp
)
2101 /* Process an alarm.
2104 /* Call the port-specific handler for each port that timed out. */
2105 expire_timers(stamp
);
2108 /*===========================================================================*
2110 *===========================================================================*/
2111 static void ahci_intr(unsigned int UNUSED(mask
))
2113 /* Process an interrupt.
2115 struct port_state
*ps
;
2119 /* Handle an interrupt for each port that has the interrupt bit set. */
2120 mask
= hba_read(AHCI_HBA_IS
);
2122 for (port
= 0; port
< hba_state
.nr_ports
; port
++) {
2123 if (mask
& (1 << port
)) {
2124 ps
= &port_state
[port
];
2128 /* After processing an interrupt, wake up the device
2129 * thread if it is suspended and now no longer busy.
2131 if ((ps
->flags
& (FLAG_SUSPENDED
| FLAG_BUSY
)) ==
2133 blockdriver_mt_wakeup(ps
->cmd_info
[0].tid
);
2137 /* Clear the bits that we processed. */
2138 hba_write(AHCI_HBA_IS
, mask
);
2140 /* Reenable the interrupt. */
2141 if ((r
= sys_irqenable(&hba_state
.hook_id
)) != OK
)
2142 panic("unable to enable IRQ: %d", r
);
2145 /*===========================================================================*
2147 *===========================================================================*/
2148 static void ahci_get_var(char *name
, long *v
, int timeout
)
2150 /* Retrieve an environment variable, and optionall adjust it to the
2151 * scale that we are using internally.
2154 /* The value is supposed to be initialized to a default already. */
2155 (void) env_parse(name
, "d", 0, v
, 1, LONG_MAX
);
2157 /* If this is a timeout, convert from milliseconds to ticks. */
2159 *v
= (*v
+ 500) * sys_hz() / 1000;
2162 /*===========================================================================*
2164 *===========================================================================*/
2165 static void ahci_get_params(void)
2167 /* Retrieve and parse parameters passed to this driver, except the
2168 * device-to-port mapping, which has to be parsed later.
2172 /* Find out which driver instance we are. */
2174 (void) env_parse("instance", "d", 0, &v
, 0, 255);
2175 ahci_instance
= (int) v
;
2177 /* Initialize the verbosity level. */
2179 (void) env_parse("ahci_verbose", "d", 0, &v
, V_NONE
, V_REQ
);
2180 ahci_verbose
= (int) v
;
2182 /* Initialize timeout-related values. */
2183 ahci_get_var("ahci_init_timeout", &ahci_spinup_timeout
, TRUE
);
2184 ahci_get_var("ahci_sig_timeout", &ahci_sig_timeout
, TRUE
);
2185 ahci_get_var("ahci_sig_checks", &ahci_sig_checks
, FALSE
);
2186 ahci_get_var("ahci_cmd_timeout", &ahci_command_timeout
, TRUE
);
2187 ahci_get_var("ahci_io_timeout", &ahci_transfer_timeout
, TRUE
);
2188 ahci_get_var("ahci_flush_timeout", &ahci_flush_timeout
, TRUE
);
2191 /*===========================================================================*
2192 * ahci_set_mapping *
2193 *===========================================================================*/
2194 static void ahci_set_mapping(void)
2196 /* Construct a mapping from device nodes to port numbers.
2198 char key
[16], val
[32], *p
;
2202 /* Start off with a mapping that includes implemented ports only, in
2203 * order. We choose this mapping over an identity mapping to maximize
2204 * the chance that the user will be able to access the first MAX_DRIVES
2205 * devices. Note that we can only do this after initializing the HBA.
2207 for (i
= j
= 0; i
< NR_PORTS
&& j
< MAX_DRIVES
; i
++)
2208 if (port_state
[i
].state
!= STATE_NO_PORT
)
2211 for ( ; j
< MAX_DRIVES
; j
++)
2212 ahci_map
[j
] = NO_PORT
;
2214 /* See if the user specified a custom mapping. Unlike all other
2215 * configuration options, this is a per-instance setting.
2217 strlcpy(key
, "ahci0_map", sizeof(key
));
2218 key
[4] += ahci_instance
;
2220 if (env_get_param(key
, val
, sizeof(val
)) == OK
) {
2221 /* Parse the mapping, which is assumed to be a comma-separated
2222 * list of zero-based port numbers.
2226 for (i
= 0; i
< MAX_DRIVES
; i
++) {
2228 port
= (unsigned int) strtoul(p
, &p
, 0);
2232 ahci_map
[i
] = port
% NR_PORTS
;
2234 else ahci_map
[i
] = NO_PORT
;
2238 /* Create a reverse mapping. */
2239 for (i
= 0; i
< MAX_DRIVES
; i
++)
2240 if ((j
= ahci_map
[i
]) != NO_PORT
)
2241 port_state
[j
].device
= i
;
2244 /*===========================================================================*
2245 * sef_cb_init_fresh *
2246 *===========================================================================*/
2247 static int sef_cb_init_fresh(int type
, sef_init_info_t
*UNUSED(info
))
2249 /* Initialize the driver.
2253 /* Get command line parameters. */
2256 /* Probe for recognized devices, skipping matches as appropriate. */
2257 devind
= ahci_probe(ahci_instance
);
2260 panic("no matching device found");
2262 /* Initialize the device we found. */
2265 /* Create a mapping from device nodes to port numbers. */
2268 /* Announce that we are up. */
2269 blockdriver_announce(type
);
2274 /*===========================================================================*
2275 * sef_cb_signal_handler *
2276 *===========================================================================*/
2277 static void sef_cb_signal_handler(int signo
)
2279 /* In case of a termination signal, shut down this driver.
2283 if (signo
!= SIGTERM
) return;
2285 /* If any ports are still opened, assume that the system is being shut
2286 * down, and stay up until the last device has been closed.
2288 ahci_exiting
= TRUE
;
2290 for (port
= 0; port
< hba_state
.nr_ports
; port
++)
2291 if (port_state
[port
].open_count
> 0)
2294 /* If not, stop the driver and exit immediately. */
2300 /*===========================================================================*
2301 * sef_local_startup *
2302 *===========================================================================*/
2303 static void sef_local_startup(void)
2305 /* Set callbacks and initialize the System Event Framework (SEF).
2308 /* Register init callbacks. */
2309 sef_setcb_init_fresh(sef_cb_init_fresh
);
2310 sef_setcb_init_lu(sef_cb_init_fresh
);
2312 /* Register signal callbacks. */
2313 sef_setcb_signal_handler(sef_cb_signal_handler
);
2315 /* Let SEF perform startup. */
2319 /*===========================================================================*
2321 *===========================================================================*/
2322 static char *ahci_portname(struct port_state
*ps
)
2324 /* Return a printable name for the given port. Whenever we can, print a
2325 * "Dx" device number rather than a "Pxx" port number, because the user
2326 * may not be aware of the mapping currently in use.
2328 static char name
[] = "AHCI0-P00";
2330 name
[4] = '0' + ahci_instance
;
2332 if (ps
->device
== NO_DEVICE
) {
2334 name
[7] = '0' + (ps
- port_state
) / 10;
2335 name
[8] = '0' + (ps
- port_state
) % 10;
2339 name
[7] = '0' + ps
->device
;
2346 /*===========================================================================*
2348 *===========================================================================*/
2349 static struct port_state
*ahci_map_minor(dev_t minor
, struct device
**dvp
)
2351 /* Map a minor device number to a port and a pointer to the partition's
2352 * device structure. Return NULL if this minor device number does not
2353 * identify an actual device.
2355 struct port_state
*ps
;
2360 if (minor
< NR_MINORS
) {
2361 port
= ahci_map
[minor
/ DEV_PER_DRIVE
];
2363 if (port
== NO_PORT
)
2366 ps
= &port_state
[port
];
2367 *dvp
= &ps
->part
[minor
% DEV_PER_DRIVE
];
2369 else if ((unsigned) (minor
-= MINOR_d0p0s0
) < NR_SUBDEVS
) {
2370 port
= ahci_map
[minor
/ SUB_PER_DRIVE
];
2372 if (port
== NO_PORT
)
2375 ps
= &port_state
[port
];
2376 *dvp
= &ps
->subpart
[minor
% SUB_PER_DRIVE
];
2382 /*===========================================================================*
2384 *===========================================================================*/
2385 static struct device
*ahci_part(dev_t minor
)
2387 /* Return a pointer to the partition information structure of the given
2392 if (ahci_map_minor(minor
, &dv
) == NULL
)
2398 /*===========================================================================*
2400 *===========================================================================*/
2401 static int ahci_open(dev_t minor
, int access
)
2405 struct port_state
*ps
;
2408 ps
= ahci_get_port(minor
);
2410 /* Only one open request can be processed at a time, due to the fact
2411 * that it is an exclusive operation. The thread that handles this call
2412 * can therefore freely register itself at slot zero.
2414 ps
->cmd_info
[0].tid
= blockdriver_mt_get_tid();
2416 /* If we are still in the process of initializing this port or device,
2417 * wait for completion of that phase first.
2419 if (ps
->flags
& FLAG_BUSY
)
2422 /* The device may only be opened if it is now properly functioning. */
2423 if (ps
->state
!= STATE_GOOD_DEV
)
2426 /* Some devices may only be opened in read-only mode. */
2427 if ((ps
->flags
& FLAG_READONLY
) && (access
& W_BIT
))
2430 if (ps
->open_count
== 0) {
2431 /* The first open request. Clear the barrier flag, if set. */
2432 ps
->flags
&= ~FLAG_BARRIER
;
2434 /* Recheck media only when nobody is using the device. */
2435 if ((ps
->flags
& FLAG_ATAPI
) &&
2436 (r
= atapi_check_medium(ps
, 0)) != OK
)
2439 /* After rechecking the media, the partition table must always
2440 * be read. This is also a convenient time to do it for
2441 * nonremovable devices. Start by resetting the partition
2442 * tables and setting the working size of the entire device.
2444 memset(ps
->part
, 0, sizeof(ps
->part
));
2445 memset(ps
->subpart
, 0, sizeof(ps
->subpart
));
2447 ps
->part
[0].dv_size
=
2448 mul64(ps
->lba_count
, cvu64(ps
->sector_size
));
2450 partition(&ahci_dtab
, ps
->device
* DEV_PER_DRIVE
, P_PRIMARY
,
2451 !!(ps
->flags
& FLAG_ATAPI
));
2453 blockdriver_mt_set_workers(ps
->device
, ps
->queue_depth
);
2456 /* If the barrier flag is set, deny new open requests until the
2457 * device is fully closed first.
2459 if (ps
->flags
& FLAG_BARRIER
)
2468 /*===========================================================================*
2470 *===========================================================================*/
2471 static int ahci_close(dev_t minor
)
2475 struct port_state
*ps
;
2478 ps
= ahci_get_port(minor
);
2480 /* Decrease the open count. */
2481 if (ps
->open_count
<= 0) {
2482 dprintf(V_ERR
, ("%s: closing already-closed port\n",
2483 ahci_portname(ps
)));
2490 if (ps
->open_count
> 0)
2493 /* The device is now fully closed. That also means that the threads for
2494 * this device are not needed anymore, so we reduce the count to one.
2496 blockdriver_mt_set_workers(ps
->device
, 1);
2498 if (ps
->state
== STATE_GOOD_DEV
&& !(ps
->flags
& FLAG_BARRIER
)) {
2499 dprintf(V_INFO
, ("%s: flushing write cache\n",
2500 ahci_portname(ps
)));
2502 (void) gen_flush_wcache(ps
);
2505 /* If the entire driver has been told to terminate, check whether all
2506 * devices are now closed. If so, tell libblockdriver to quit after
2507 * replying to the close request.
2510 for (port
= 0; port
< hba_state
.nr_ports
; port
++)
2511 if (port_state
[port
].open_count
> 0)
2514 if (port
== hba_state
.nr_ports
) {
2517 blockdriver_mt_terminate();
2524 /*===========================================================================*
2526 *===========================================================================*/
2527 static ssize_t
ahci_transfer(dev_t minor
, int do_write
, u64_t position
,
2528 endpoint_t endpt
, iovec_t
*iovec
, unsigned int count
, int flags
)
2530 /* Perform data transfer on the selected device.
2532 struct port_state
*ps
;
2536 ps
= ahci_get_port(minor
);
2537 dv
= ahci_part(minor
);
2539 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2542 if (count
> NR_IOREQS
)
2545 /* Check for basic end-of-partition condition: if the start position of
2546 * the request is outside the partition, return success immediately.
2547 * The size of the request is obtained, and possibly reduced, later.
2549 if (cmp64(position
, dv
->dv_size
) >= 0)
2552 pos
= add64(dv
->dv_base
, position
);
2553 eof
= add64(dv
->dv_base
, dv
->dv_size
);
2555 return port_transfer(ps
, pos
, eof
, endpt
, (iovec_s_t
*) iovec
, count
,
2559 /*===========================================================================*
2561 *===========================================================================*/
2562 static int ahci_ioctl(dev_t minor
, unsigned int request
, endpoint_t endpt
,
2563 cp_grant_id_t grant
)
2565 /* Process I/O control requests.
2567 struct port_state
*ps
;
2570 ps
= ahci_get_port(minor
);
2574 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2577 if (!(ps
->flags
& FLAG_ATAPI
))
2580 return atapi_load_eject(ps
, 0, FALSE
/*load*/);
2583 return sys_safecopyto(endpt
, grant
, 0,
2584 (vir_bytes
) &ps
->open_count
, sizeof(ps
->open_count
));
2587 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2590 return gen_flush_wcache(ps
);
2593 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2596 if ((r
= sys_safecopyfrom(endpt
, grant
, 0, (vir_bytes
) &val
,
2597 sizeof(val
))) != OK
)
2600 return gen_set_wcache(ps
, val
);
2603 if (ps
->state
!= STATE_GOOD_DEV
|| (ps
->flags
& FLAG_BARRIER
))
2606 if ((r
= gen_get_wcache(ps
, &val
)) != OK
)
2609 return sys_safecopyto(endpt
, grant
, 0, (vir_bytes
) &val
,
2616 /*===========================================================================*
2618 *===========================================================================*/
2619 static int ahci_device(dev_t minor
, device_id_t
*id
)
2621 /* Map a minor device number to a device ID.
2623 struct port_state
*ps
;
2626 if ((ps
= ahci_map_minor(minor
, &dv
)) == NULL
)
2634 /*===========================================================================*
2636 *===========================================================================*/
2637 static struct port_state
*ahci_get_port(dev_t minor
)
2639 /* Get the port structure associated with the given minor device.
2640 * Called only from worker threads, so the minor device is already
2641 * guaranteed to map to a port.
2643 struct port_state
*ps
;
2646 if ((ps
= ahci_map_minor(minor
, &dv
)) == NULL
)
2647 panic("device mapping for minor %d disappeared", minor
);
2652 /*===========================================================================*
2654 *===========================================================================*/
2655 int main(int argc
, char **argv
)
2660 env_setargs(argc
, argv
);
2661 sef_local_startup();
2663 blockdriver_mt_task(&ahci_dtab
);