4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
39 #include <sys/debug.h>
40 #include <sys/aio_impl.h>
42 #include <sys/fs/snode.h>
43 #include <sys/siginfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/tnf_probe.h>
49 int aphysio(int (*)(), int (*)(), dev_t
, int, void (*)(), struct aio_req
*);
50 void aio_done(struct buf
*);
51 void aphysio_unlock(aio_req_t
*);
52 void aio_cleanup(int);
53 void aio_cleanup_exit(void);
58 static void aio_sigev_send(proc_t
*, sigqueue_t
*);
59 static void aio_hash_delete(aio_t
*, aio_req_t
*);
60 static void aio_lio_free(aio_t
*, aio_lio_t
*);
61 static int aio_cleanup_cleanupq(aio_t
*, aio_req_t
*, int);
62 static int aio_cleanup_notifyq(aio_t
*, aio_req_t
*, int);
63 static void aio_cleanup_pollq(aio_t
*, aio_req_t
*, int);
64 static void aio_cleanup_portq(aio_t
*, aio_req_t
*, int);
67 * async version of physio() that doesn't wait synchronously
68 * for the driver's strategy routine to complete.
73 int (*strategy
)(struct buf
*),
74 int (*cancel
)(struct buf
*),
77 void (*mincnt
)(struct buf
*),
80 struct uio
*uio
= aio
->aio_uio
;
81 aio_req_t
*reqp
= (aio_req_t
*)aio
->aio_private
;
82 struct buf
*bp
= &reqp
->aio_req_buf
;
89 struct dev_ops
*ops
= devopsp
[getmajor(dev
)];
91 if (uio
->uio_loffset
< 0)
95 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
96 * the maximum size that can be supported by the IO subsystem.
97 * XXX this code assumes a D_64BIT driver.
99 if (uio
->uio_loffset
> SPEC_MAXOFFSET_T
)
104 CPU_STATS_ADD_K(sys
, phread
, 1);
106 CPU_STATS_ADD_K(sys
, phwrite
, 1);
110 sema_init(&bp
->b_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
111 sema_init(&bp
->b_io
, 0, NULL
, SEMA_DEFAULT
, NULL
);
114 bp
->b_flags
= B_BUSY
| B_PHYS
| B_ASYNC
| rw
;
116 bp
->b_dev
= cmpdev(dev
);
117 bp
->b_lblkno
= btodt(uio
->uio_loffset
);
118 bp
->b_offset
= uio
->uio_loffset
;
119 (void) ops
->devo_getinfo(NULL
, DDI_INFO_DEVT2DEVINFO
,
120 (void *)bp
->b_edev
, (void **)&bp
->b_dip
);
123 * Clustering: Clustering can set the b_iodone, b_forw and
124 * b_proc fields to cluster-specifc values.
126 if (bp
->b_iodone
== NULL
) {
127 bp
->b_iodone
= (int (*)()) aio_done
;
128 /* b_forw points at an aio_req_t structure */
129 bp
->b_forw
= (struct buf
*)reqp
;
130 bp
->b_proc
= curproc
;
133 a
= bp
->b_un
.b_addr
= iov
->iov_base
;
134 c
= bp
->b_bcount
= iov
->iov_len
;
137 if (bp
->b_bcount
!= iov
->iov_len
)
140 as
= bp
->b_proc
->p_as
;
142 error
= as_pagelock(as
, &pplist
, a
,
143 c
, rw
== B_READ
? S_WRITE
: S_READ
);
145 bp
->b_flags
|= B_ERROR
;
147 bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_SHADOW
);
150 reqp
->aio_req_flags
|= AIO_PAGELOCKDONE
;
151 bp
->b_shadow
= pplist
;
152 if (pplist
!= NULL
) {
153 bp
->b_flags
|= B_SHADOW
;
156 if (cancel
!= anocancel
)
158 "aphysio: cancellation not supported, use anocancel");
160 reqp
->aio_req_cancel
= cancel
;
162 DTRACE_IO1(start
, struct buf
*, bp
);
164 return ((*strategy
)(bp
));
169 anocancel(struct buf
*bp
)
175 * Called from biodone().
176 * Notify process that a pending AIO has finished.
180 * Clustering: This function is made non-static as it is used
181 * by clustering s/w as contract private interface.
185 aio_done(struct buf
*bp
)
190 aio_lio_t
*head
= NULL
;
192 sigqueue_t
*sigev
= NULL
;
193 sigqueue_t
*lio_sigev
= NULL
;
194 port_kevent_t
*pkevp
= NULL
;
195 port_kevent_t
*lio_pkevp
= NULL
;
207 reqp
= (aio_req_t
*)bp
->b_forw
;
208 fd
= reqp
->aio_req_fd
;
211 * mapout earlier so that more kmem is available when aio is
212 * heavily used. bug #1262082
214 if (bp
->b_flags
& B_REMAPPED
)
217 /* decrement fd's ref count by one, now that aio request is done. */
218 areleasef(fd
, P_FINFO(p
));
221 ASSERT(aiop
!= NULL
);
223 mutex_enter(&aiop
->aio_portq_mutex
);
224 mutex_enter(&aiop
->aio_mutex
);
225 ASSERT(aiop
->aio_pending
> 0);
226 ASSERT(reqp
->aio_req_flags
& AIO_PENDING
);
228 reqp
->aio_req_flags
&= ~AIO_PENDING
;
229 reqp_flags
= reqp
->aio_req_flags
;
230 if ((pkevp
= reqp
->aio_req_portkev
) != NULL
) {
231 /* Event port notification is desired for this transaction */
232 if (reqp
->aio_req_flags
& AIO_CLOSE_PORT
) {
234 * The port is being closed and it is waiting for
235 * pending asynchronous I/O transactions to complete.
237 portevpend
= --aiop
->aio_portpendcnt
;
238 aio_deq(&aiop
->aio_portpending
, reqp
);
239 aio_enq(&aiop
->aio_portq
, reqp
, 0);
240 mutex_exit(&aiop
->aio_mutex
);
241 mutex_exit(&aiop
->aio_portq_mutex
);
242 port_send_event(pkevp
);
244 cv_broadcast(&aiop
->aio_portcv
);
248 if (aiop
->aio_flags
& AIO_CLEANUP
) {
250 * aio_cleanup_thread() is waiting for completion of
253 mutex_enter(&as
->a_contents
);
254 aio_deq(&aiop
->aio_portpending
, reqp
);
255 aio_enq(&aiop
->aio_portcleanupq
, reqp
, 0);
256 cv_signal(&aiop
->aio_cleanupcv
);
257 mutex_exit(&as
->a_contents
);
258 mutex_exit(&aiop
->aio_mutex
);
259 mutex_exit(&aiop
->aio_portq_mutex
);
263 aio_deq(&aiop
->aio_portpending
, reqp
);
264 aio_enq(&aiop
->aio_portq
, reqp
, 0);
269 * when the AIO_CLEANUP flag is enabled for this
270 * process, or when the AIO_POLL bit is set for
271 * this request, special handling is required.
272 * otherwise the request is put onto the doneq.
274 cleanupqflag
= (aiop
->aio_flags
& AIO_CLEANUP
);
275 pollqflag
= (reqp
->aio_req_flags
& AIO_POLL
);
276 if (cleanupqflag
| pollqflag
) {
279 mutex_enter(&as
->a_contents
);
282 * requests with their AIO_POLL bit set are put
283 * on the pollq, requests with sigevent structures
284 * or with listio heads are put on the notifyq, and
285 * the remaining requests don't require any special
286 * cleanup handling, so they're put onto the default
290 aio_enq(&aiop
->aio_pollq
, reqp
, AIO_POLLQ
);
291 else if (reqp
->aio_req_sigqp
|| reqp
->aio_req_lio
)
292 aio_enq(&aiop
->aio_notifyq
, reqp
, AIO_NOTIFYQ
);
294 aio_enq(&aiop
->aio_cleanupq
, reqp
,
298 cv_signal(&aiop
->aio_cleanupcv
);
299 mutex_exit(&as
->a_contents
);
300 mutex_exit(&aiop
->aio_mutex
);
301 mutex_exit(&aiop
->aio_portq_mutex
);
304 /* block aio_cleanup_exit until we're done */
305 aiop
->aio_flags
|= AIO_DONE_ACTIVE
;
306 mutex_exit(&aiop
->aio_mutex
);
307 mutex_exit(&aiop
->aio_portq_mutex
);
309 * let the cleanup processing happen from an AST
310 * set an AST on all threads in this process
312 mutex_enter(&p
->p_lock
);
314 mutex_exit(&p
->p_lock
);
315 mutex_enter(&aiop
->aio_mutex
);
316 /* wakeup anybody waiting in aiowait() */
317 cv_broadcast(&aiop
->aio_waitcv
);
319 /* wakeup aio_cleanup_exit if needed */
320 if (aiop
->aio_flags
& AIO_CLEANUP
)
321 cv_signal(&aiop
->aio_cleanupcv
);
322 aiop
->aio_flags
&= ~AIO_DONE_ACTIVE
;
323 mutex_exit(&aiop
->aio_mutex
);
329 * save req's sigevent pointer, and check its
330 * value after releasing aio_mutex lock.
332 sigev
= reqp
->aio_req_sigqp
;
333 reqp
->aio_req_sigqp
= NULL
;
335 /* put request on done queue. */
336 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
340 * when list IO notification is enabled, a notification or
341 * signal is sent only when all entries in the list are done.
343 if ((head
= reqp
->aio_req_lio
) != NULL
) {
344 ASSERT(head
->lio_refcnt
> 0);
345 if (--head
->lio_refcnt
== 0) {
347 * save lio's sigevent pointer, and check
348 * its value after releasing aio_mutex lock.
350 lio_sigev
= head
->lio_sigqp
;
351 head
->lio_sigqp
= NULL
;
352 cv_signal(&head
->lio_notify
);
353 if (head
->lio_port
>= 0 &&
354 (lio_pkevp
= head
->lio_portkev
) != NULL
)
360 * if AIO_WAITN set then
361 * send signal only when we reached the
362 * required amount of IO's finished
363 * or when all IO's are done
365 if (aiop
->aio_flags
& AIO_WAITN
) {
366 if (aiop
->aio_waitncnt
> 0)
367 aiop
->aio_waitncnt
--;
368 if (aiop
->aio_pending
== 0 ||
369 aiop
->aio_waitncnt
== 0)
370 cv_broadcast(&aiop
->aio_waitcv
);
372 cv_broadcast(&aiop
->aio_waitcv
);
376 * No need to set this flag for pollq, portq, lio requests.
377 * If this is an old Solaris aio request, and the process has
378 * a SIGIO signal handler enabled, then send a SIGIO signal.
380 if (!sigev
&& !use_port
&& head
== NULL
&&
381 (reqp
->aio_req_flags
& AIO_SOLARIS
) &&
382 (func
= PTOU(p
)->u_signal
[SIGIO
- 1]) != SIG_DFL
&&
385 reqp
->aio_req_flags
|= AIO_SIGNALLED
;
388 mutex_exit(&aiop
->aio_mutex
);
389 mutex_exit(&aiop
->aio_portq_mutex
);
392 * Could the cleanup thread be waiting for AIO with locked
393 * resources to finish?
394 * Ideally in that case cleanup thread should block on cleanupcv,
395 * but there is a window, where it could miss to see a new aio
396 * request that sneaked in.
398 mutex_enter(&as
->a_contents
);
399 if ((reqp_flags
& AIO_PAGELOCKDONE
) && AS_ISUNMAPWAIT(as
))
400 cv_broadcast(&as
->a_cv
);
401 mutex_exit(&as
->a_contents
);
404 aio_sigev_send(p
, sigev
);
405 else if (send_signal
)
409 port_send_event(pkevp
);
411 aio_sigev_send(p
, lio_sigev
);
413 port_send_event(lio_pkevp
);
417 * send a queued signal to the specified process when
418 * the event signal is non-NULL. A return value of 1
419 * will indicate that a signal is queued, and 0 means that
420 * no signal was specified, nor sent.
423 aio_sigev_send(proc_t
*p
, sigqueue_t
*sigev
)
425 ASSERT(sigev
!= NULL
);
427 mutex_enter(&p
->p_lock
);
428 sigaddqa(p
, NULL
, sigev
);
429 mutex_exit(&p
->p_lock
);
433 * special case handling for zero length requests. the aio request
434 * short circuits the normal completion path since all that's required
435 * to complete this request is to copyout a zero to the aio request's
439 aio_zerolen(aio_req_t
*reqp
)
442 struct buf
*bp
= &reqp
->aio_req_buf
;
444 reqp
->aio_req_flags
|= AIO_ZEROLEN
;
446 bp
->b_forw
= (struct buf
*)reqp
;
447 bp
->b_proc
= curproc
;
456 * unlock pages previously locked by as_pagelock
459 aphysio_unlock(aio_req_t
*reqp
)
465 if (reqp
->aio_req_flags
& AIO_PHYSIODONE
)
468 reqp
->aio_req_flags
|= AIO_PHYSIODONE
;
470 if (reqp
->aio_req_flags
& AIO_ZEROLEN
)
473 bp
= &reqp
->aio_req_buf
;
474 iov
= reqp
->aio_req_uio
.uio_iov
;
475 flags
= (((bp
->b_flags
& B_READ
) == B_READ
) ? S_WRITE
: S_READ
);
476 if (reqp
->aio_req_flags
& AIO_PAGELOCKDONE
) {
477 as_pageunlock(bp
->b_proc
->p_as
,
478 bp
->b_flags
& B_SHADOW
? bp
->b_shadow
: NULL
,
479 iov
->iov_base
, iov
->iov_len
, flags
);
480 reqp
->aio_req_flags
&= ~AIO_PAGELOCKDONE
;
482 bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_SHADOW
);
483 bp
->b_flags
|= B_DONE
;
487 * deletes a requests id from the hash table of outstanding io.
490 aio_hash_delete(aio_t
*aiop
, struct aio_req_t
*reqp
)
493 aio_result_t
*resultp
= reqp
->aio_req_resultp
;
497 index
= AIO_HASH(resultp
);
498 nextp
= (aiop
->aio_hash
+ index
);
499 while ((current
= *nextp
) != NULL
) {
500 if (current
->aio_req_resultp
== resultp
) {
501 *nextp
= current
->aio_hash_next
;
504 nextp
= ¤t
->aio_hash_next
;
509 * Put a list head struct onto its free list.
512 aio_lio_free(aio_t
*aiop
, aio_lio_t
*head
)
514 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
516 if (head
->lio_sigqp
!= NULL
)
517 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
518 head
->lio_next
= aiop
->aio_lio_free
;
519 aiop
->aio_lio_free
= head
;
523 * Put a reqp onto the freelist.
526 aio_req_free(aio_t
*aiop
, aio_req_t
*reqp
)
530 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
532 if (reqp
->aio_req_portkev
) {
533 port_free_event(reqp
->aio_req_portkev
);
534 reqp
->aio_req_portkev
= NULL
;
537 if ((liop
= reqp
->aio_req_lio
) != NULL
) {
538 if (--liop
->lio_nent
== 0)
539 aio_lio_free(aiop
, liop
);
540 reqp
->aio_req_lio
= NULL
;
542 if (reqp
->aio_req_sigqp
!= NULL
) {
543 kmem_free(reqp
->aio_req_sigqp
, sizeof (sigqueue_t
));
544 reqp
->aio_req_sigqp
= NULL
;
546 reqp
->aio_req_next
= aiop
->aio_free
;
547 reqp
->aio_req_prev
= NULL
;
548 aiop
->aio_free
= reqp
;
549 aiop
->aio_outstanding
--;
550 if (aiop
->aio_outstanding
== 0)
551 cv_broadcast(&aiop
->aio_waitcv
);
552 aio_hash_delete(aiop
, reqp
);
556 * Put a reqp onto the freelist.
559 aio_req_free_port(aio_t
*aiop
, aio_req_t
*reqp
)
561 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
563 reqp
->aio_req_next
= aiop
->aio_free
;
564 reqp
->aio_req_prev
= NULL
;
565 aiop
->aio_free
= reqp
;
566 aiop
->aio_outstanding
--;
567 aio_hash_delete(aiop
, reqp
);
572 * Verify the integrity of a queue.
576 aio_verify_queue(aio_req_t
*head
,
577 aio_req_t
*entry_present
, aio_req_t
*entry_missing
)
583 if ((reqp
= head
) != NULL
) {
585 ASSERT(reqp
->aio_req_prev
->aio_req_next
== reqp
);
586 ASSERT(reqp
->aio_req_next
->aio_req_prev
== reqp
);
587 if (entry_present
== reqp
)
589 if (entry_missing
== reqp
)
591 } while ((reqp
= reqp
->aio_req_next
) != head
);
593 ASSERT(entry_present
== NULL
|| found
== 1);
594 ASSERT(entry_missing
== NULL
|| present
== 0);
597 #define aio_verify_queue(x, y, z)
601 * Put a request onto the tail of a queue.
604 aio_enq(aio_req_t
**qhead
, aio_req_t
*reqp
, int qflg_new
)
609 aio_verify_queue(*qhead
, NULL
, reqp
);
611 if ((head
= *qhead
) == NULL
) {
612 reqp
->aio_req_next
= reqp
;
613 reqp
->aio_req_prev
= reqp
;
616 reqp
->aio_req_next
= head
;
617 reqp
->aio_req_prev
= prev
= head
->aio_req_prev
;
618 prev
->aio_req_next
= reqp
;
619 head
->aio_req_prev
= reqp
;
621 reqp
->aio_req_flags
|= qflg_new
;
625 * Remove a request from its queue.
628 aio_deq(aio_req_t
**qhead
, aio_req_t
*reqp
)
630 aio_verify_queue(*qhead
, reqp
, NULL
);
632 if (reqp
->aio_req_next
== reqp
) {
635 reqp
->aio_req_prev
->aio_req_next
= reqp
->aio_req_next
;
636 reqp
->aio_req_next
->aio_req_prev
= reqp
->aio_req_prev
;
638 *qhead
= reqp
->aio_req_next
;
640 reqp
->aio_req_next
= NULL
;
641 reqp
->aio_req_prev
= NULL
;
645 * concatenate a specified queue with the cleanupq. the specified
646 * queue is put onto the tail of the cleanupq. all elements on the
647 * specified queue should have their aio_req_flags field cleared.
651 aio_cleanupq_concat(aio_t
*aiop
, aio_req_t
*q2
, int qflg
)
653 aio_req_t
*cleanupqhead
, *q2tail
;
654 aio_req_t
*reqp
= q2
;
657 ASSERT(reqp
->aio_req_flags
& qflg
);
658 reqp
->aio_req_flags
&= ~qflg
;
659 reqp
->aio_req_flags
|= AIO_CLEANUPQ
;
660 } while ((reqp
= reqp
->aio_req_next
) != q2
);
662 cleanupqhead
= aiop
->aio_cleanupq
;
663 if (cleanupqhead
== NULL
)
664 aiop
->aio_cleanupq
= q2
;
666 cleanupqhead
->aio_req_prev
->aio_req_next
= q2
;
667 q2tail
= q2
->aio_req_prev
;
668 q2tail
->aio_req_next
= cleanupqhead
;
669 q2
->aio_req_prev
= cleanupqhead
->aio_req_prev
;
670 cleanupqhead
->aio_req_prev
= q2tail
;
675 * cleanup aio requests that are on the per-process poll queue.
678 aio_cleanup(int flag
)
680 aio_t
*aiop
= curproc
->p_aio
;
681 aio_req_t
*pollqhead
, *cleanupqhead
, *notifyqhead
;
682 aio_req_t
*cleanupport
;
683 aio_req_t
*portq
= NULL
;
689 ASSERT(aiop
!= NULL
);
691 if (flag
== AIO_CLEANUP_EXIT
)
692 exitflg
= AIO_CLEANUP_EXIT
;
697 * We need to get the aio_cleanupq_mutex because we are calling
698 * aio_cleanup_cleanupq()
700 mutex_enter(&aiop
->aio_cleanupq_mutex
);
702 * take all the requests off the cleanupq, the notifyq,
705 mutex_enter(&aiop
->aio_mutex
);
706 if ((cleanupqhead
= aiop
->aio_cleanupq
) != NULL
) {
707 aiop
->aio_cleanupq
= NULL
;
710 if ((notifyqhead
= aiop
->aio_notifyq
) != NULL
) {
711 aiop
->aio_notifyq
= NULL
;
714 if ((pollqhead
= aiop
->aio_pollq
) != NULL
) {
715 aiop
->aio_pollq
= NULL
;
719 if ((portq
= aiop
->aio_portq
) != NULL
)
722 if ((cleanupport
= aiop
->aio_portcleanupq
) != NULL
) {
723 aiop
->aio_portcleanupq
= NULL
;
727 mutex_exit(&aiop
->aio_mutex
);
730 * return immediately if cleanupq, pollq, and
731 * notifyq are all empty. someone else must have
735 mutex_exit(&aiop
->aio_cleanupq_mutex
);
740 * do cleanup for the various queues.
743 signalled
= aio_cleanup_cleanupq(aiop
, cleanupqhead
, exitflg
);
744 mutex_exit(&aiop
->aio_cleanupq_mutex
);
746 signalled
= aio_cleanup_notifyq(aiop
, notifyqhead
, exitflg
);
748 aio_cleanup_pollq(aiop
, pollqhead
, exitflg
);
749 if (flag
&& (cleanupport
|| portq
))
750 aio_cleanup_portq(aiop
, cleanupport
, exitflg
);
756 * If we have an active aio_cleanup_thread it's possible for
757 * this routine to push something on to the done queue after
758 * an aiowait/aiosuspend thread has already decided to block.
759 * This being the case, we need a cv_broadcast here to wake
760 * these threads up. It is simpler and cleaner to do this
761 * broadcast here than in the individual cleanup routines.
764 mutex_enter(&aiop
->aio_mutex
);
766 * If there has never been an old solaris aio request
767 * issued by this process, then do not send a SIGIO signal.
769 if (!(aiop
->aio_flags
& AIO_SOLARIS_REQ
))
771 cv_broadcast(&aiop
->aio_waitcv
);
772 mutex_exit(&aiop
->aio_mutex
);
775 * Only if the process wasn't already signalled,
776 * determine if a SIGIO signal should be delievered.
779 (func
= PTOU(curproc
)->u_signal
[SIGIO
- 1]) != SIG_DFL
&&
781 psignal(curproc
, SIGIO
);
786 * Do cleanup for every element of the port cleanup queue.
789 aio_cleanup_portq(aio_t
*aiop
, aio_req_t
*cleanupq
, int exitflag
)
796 /* first check the portq */
797 if (exitflag
|| ((aiop
->aio_flags
& AIO_CLEANUP_PORT
) == 0)) {
798 mutex_enter(&aiop
->aio_mutex
);
799 if (aiop
->aio_flags
& AIO_CLEANUP
)
800 aiop
->aio_flags
|= AIO_CLEANUP_PORT
;
801 mutex_exit(&aiop
->aio_mutex
);
804 * It is not allowed to hold locks during aphysio_unlock().
805 * The aio_done() interrupt function will try to acquire
806 * aio_mutex and aio_portq_mutex. Therefore we disconnect
807 * the portq list from the aiop for the duration of the
808 * aphysio_unlock() loop below.
810 mutex_enter(&aiop
->aio_portq_mutex
);
811 headp
= aiop
->aio_portq
;
812 aiop
->aio_portq
= NULL
;
813 mutex_exit(&aiop
->aio_portq_mutex
);
814 if ((reqp
= headp
) != NULL
) {
816 next
= reqp
->aio_req_next
;
817 aphysio_unlock(reqp
);
819 mutex_enter(&aiop
->aio_mutex
);
820 aio_req_free(aiop
, reqp
);
821 mutex_exit(&aiop
->aio_mutex
);
823 } while ((reqp
= next
) != headp
);
826 if (headp
!= NULL
&& exitflag
== 0) {
827 /* move unlocked requests back to the port queue */
830 mutex_enter(&aiop
->aio_portq_mutex
);
831 if ((newq
= aiop
->aio_portq
) != NULL
) {
832 aio_req_t
*headprev
= headp
->aio_req_prev
;
833 aio_req_t
*newqprev
= newq
->aio_req_prev
;
835 headp
->aio_req_prev
= newqprev
;
836 newq
->aio_req_prev
= headprev
;
837 headprev
->aio_req_next
= newq
;
838 newqprev
->aio_req_next
= headp
;
840 aiop
->aio_portq
= headp
;
841 cv_broadcast(&aiop
->aio_portcv
);
842 mutex_exit(&aiop
->aio_portq_mutex
);
846 /* now check the port cleanup queue */
847 if ((reqp
= cleanupq
) == NULL
)
850 next
= reqp
->aio_req_next
;
851 aphysio_unlock(reqp
);
853 mutex_enter(&aiop
->aio_mutex
);
854 aio_req_free(aiop
, reqp
);
855 mutex_exit(&aiop
->aio_mutex
);
857 mutex_enter(&aiop
->aio_portq_mutex
);
858 aio_enq(&aiop
->aio_portq
, reqp
, 0);
859 mutex_exit(&aiop
->aio_portq_mutex
);
860 port_send_event(reqp
->aio_req_portkev
);
861 if ((liop
= reqp
->aio_req_lio
) != NULL
) {
864 mutex_enter(&aiop
->aio_mutex
);
865 ASSERT(liop
->lio_refcnt
> 0);
866 if (--liop
->lio_refcnt
== 0) {
867 if (liop
->lio_port
>= 0 &&
873 mutex_exit(&aiop
->aio_mutex
);
875 port_send_event(liop
->lio_portkev
);
878 } while ((reqp
= next
) != cleanupq
);
882 * Do cleanup for every element of the cleanupq.
885 aio_cleanup_cleanupq(aio_t
*aiop
, aio_req_t
*qhead
, int exitflg
)
887 aio_req_t
*reqp
, *next
;
890 ASSERT(MUTEX_HELD(&aiop
->aio_cleanupq_mutex
));
893 * Since aio_req_done() or aio_req_find() use the HASH list to find
894 * the required requests, they could potentially take away elements
895 * if they are already done (AIO_DONEQ is set).
896 * The aio_cleanupq_mutex protects the queue for the duration of the
897 * loop from aio_req_done() and aio_req_find().
899 if ((reqp
= qhead
) == NULL
)
902 ASSERT(reqp
->aio_req_flags
& AIO_CLEANUPQ
);
903 ASSERT(reqp
->aio_req_portkev
== NULL
);
904 next
= reqp
->aio_req_next
;
905 aphysio_unlock(reqp
);
906 mutex_enter(&aiop
->aio_mutex
);
908 aio_req_free(aiop
, reqp
);
910 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
912 if (reqp
->aio_req_flags
& AIO_SIGNALLED
)
915 reqp
->aio_req_flags
|= AIO_SIGNALLED
;
917 mutex_exit(&aiop
->aio_mutex
);
918 } while ((reqp
= next
) != qhead
);
923 * do cleanup for every element of the notify queue.
926 aio_cleanup_notifyq(aio_t
*aiop
, aio_req_t
*qhead
, int exitflg
)
928 aio_req_t
*reqp
, *next
;
930 sigqueue_t
*sigev
, *lio_sigev
= NULL
;
933 if ((reqp
= qhead
) == NULL
)
936 ASSERT(reqp
->aio_req_flags
& AIO_NOTIFYQ
);
937 next
= reqp
->aio_req_next
;
938 aphysio_unlock(reqp
);
940 mutex_enter(&aiop
->aio_mutex
);
941 aio_req_free(aiop
, reqp
);
942 mutex_exit(&aiop
->aio_mutex
);
944 mutex_enter(&aiop
->aio_mutex
);
945 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
946 sigev
= reqp
->aio_req_sigqp
;
947 reqp
->aio_req_sigqp
= NULL
;
948 if ((liohead
= reqp
->aio_req_lio
) != NULL
) {
949 ASSERT(liohead
->lio_refcnt
> 0);
950 if (--liohead
->lio_refcnt
== 0) {
951 cv_signal(&liohead
->lio_notify
);
952 lio_sigev
= liohead
->lio_sigqp
;
953 liohead
->lio_sigqp
= NULL
;
956 mutex_exit(&aiop
->aio_mutex
);
959 aio_sigev_send(reqp
->aio_req_buf
.b_proc
,
964 aio_sigev_send(reqp
->aio_req_buf
.b_proc
,
968 } while ((reqp
= next
) != qhead
);
974 * Do cleanup for every element of the poll queue.
977 aio_cleanup_pollq(aio_t
*aiop
, aio_req_t
*qhead
, int exitflg
)
979 aio_req_t
*reqp
, *next
;
982 * As no other threads should be accessing the queue at this point,
983 * it isn't necessary to hold aio_mutex while we traverse its elements.
985 if ((reqp
= qhead
) == NULL
)
988 ASSERT(reqp
->aio_req_flags
& AIO_POLLQ
);
989 next
= reqp
->aio_req_next
;
990 aphysio_unlock(reqp
);
992 mutex_enter(&aiop
->aio_mutex
);
993 aio_req_free(aiop
, reqp
);
994 mutex_exit(&aiop
->aio_mutex
);
996 aio_copyout_result(reqp
);
997 mutex_enter(&aiop
->aio_mutex
);
998 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
999 mutex_exit(&aiop
->aio_mutex
);
1001 } while ((reqp
= next
) != qhead
);
1005 * called by exit(). waits for all outstanding kaio to finish
1006 * before the kaio resources are freed.
1009 aio_cleanup_exit(void)
1011 proc_t
*p
= curproc
;
1012 aio_t
*aiop
= p
->p_aio
;
1013 aio_req_t
*reqp
, *next
, *head
;
1014 aio_lio_t
*nxtlio
, *liop
;
1017 * wait for all outstanding kaio to complete. process
1018 * is now single-threaded; no other kaio requests can
1019 * happen once aio_pending is zero.
1021 mutex_enter(&aiop
->aio_mutex
);
1022 aiop
->aio_flags
|= AIO_CLEANUP
;
1023 while ((aiop
->aio_pending
!= 0) || (aiop
->aio_flags
& AIO_DONE_ACTIVE
))
1024 cv_wait(&aiop
->aio_cleanupcv
, &aiop
->aio_mutex
);
1025 mutex_exit(&aiop
->aio_mutex
);
1027 /* cleanup the cleanup-thread queues. */
1028 aio_cleanup(AIO_CLEANUP_EXIT
);
1031 * Although this process is now single-threaded, we
1032 * still need to protect ourselves against a race with
1033 * aio_cleanup_dr_delete_memory().
1035 mutex_enter(&p
->p_lock
);
1038 * free up the done queue's resources.
1040 if ((head
= aiop
->aio_doneq
) != NULL
) {
1041 aiop
->aio_doneq
= NULL
;
1044 next
= reqp
->aio_req_next
;
1045 aphysio_unlock(reqp
);
1046 kmem_free(reqp
, sizeof (struct aio_req_t
));
1047 } while ((reqp
= next
) != head
);
1050 * release aio request freelist.
1052 for (reqp
= aiop
->aio_free
; reqp
!= NULL
; reqp
= next
) {
1053 next
= reqp
->aio_req_next
;
1054 kmem_free(reqp
, sizeof (struct aio_req_t
));
1058 * release io list head freelist.
1060 for (liop
= aiop
->aio_lio_free
; liop
!= NULL
; liop
= nxtlio
) {
1061 nxtlio
= liop
->lio_next
;
1062 kmem_free(liop
, sizeof (aio_lio_t
));
1066 kmem_free(aiop
->aio_iocb
, aiop
->aio_iocbsz
);
1068 mutex_destroy(&aiop
->aio_mutex
);
1069 mutex_destroy(&aiop
->aio_portq_mutex
);
1070 mutex_destroy(&aiop
->aio_cleanupq_mutex
);
1072 mutex_exit(&p
->p_lock
);
1073 kmem_free(aiop
, sizeof (struct aio
));
1077 * copy out aio request's result to a user-level result_t buffer.
1080 aio_copyout_result(aio_req_t
*reqp
)
1088 if (reqp
->aio_req_flags
& AIO_COPYOUTDONE
)
1091 reqp
->aio_req_flags
|= AIO_COPYOUTDONE
;
1093 iov
= reqp
->aio_req_uio
.uio_iov
;
1094 bp
= &reqp
->aio_req_buf
;
1095 /* "resultp" points to user-level result_t buffer */
1096 resultp
= (void *)reqp
->aio_req_resultp
;
1097 if (bp
->b_flags
& B_ERROR
) {
1099 error
= bp
->b_error
;
1102 retval
= (size_t)-1;
1105 retval
= iov
->iov_len
- bp
->b_resid
;
1107 #ifdef _SYSCALL32_IMPL
1108 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1109 (void) sulword(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1110 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, error
);
1112 (void) suword32(&((aio_result32_t
*)resultp
)->aio_return
,
1114 (void) suword32(&((aio_result32_t
*)resultp
)->aio_errno
, error
);
1117 (void) suword32(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1118 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, error
);
1124 aio_copyout_result_port(struct iovec
*iov
, struct buf
*bp
, void *resultp
)
1129 if (bp
->b_flags
& B_ERROR
) {
1131 errno
= bp
->b_error
;
1134 retval
= (size_t)-1;
1137 retval
= iov
->iov_len
- bp
->b_resid
;
1139 #ifdef _SYSCALL32_IMPL
1140 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1141 (void) sulword(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1142 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, errno
);
1144 (void) suword32(&((aio_result32_t
*)resultp
)->aio_return
,
1146 (void) suword32(&((aio_result32_t
*)resultp
)->aio_errno
, errno
);
1149 (void) suword32(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1150 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, errno
);
1155 * This function is used to remove a request from the done queue.
1159 aio_req_remove_portq(aio_t
*aiop
, aio_req_t
*reqp
)
1161 ASSERT(MUTEX_HELD(&aiop
->aio_portq_mutex
));
1162 while (aiop
->aio_portq
== NULL
) {
1164 * aio_portq is set to NULL when aio_cleanup_portq()
1165 * is working with the event queue.
1166 * The aio_cleanup_thread() uses aio_cleanup_portq()
1167 * to unlock all AIO buffers with completed transactions.
1168 * Wait here until aio_cleanup_portq() restores the
1169 * list of completed transactions in aio_portq.
1171 cv_wait(&aiop
->aio_portcv
, &aiop
->aio_portq_mutex
);
1173 aio_deq(&aiop
->aio_portq
, reqp
);
1178 aio_close_port(void *arg
, int port
, pid_t pid
, int lastclose
)
1187 aiop
= curproc
->p_aio
;
1189 aiop
= (aio_t
*)arg
;
1192 * The PORT_SOURCE_AIO source is always associated with every new
1193 * created port by default.
1194 * If no asynchronous I/O transactions were associated with the port
1195 * then the aiop pointer will still be set to NULL.
1201 * Within a process event ports can be used to collect events other
1202 * than PORT_SOURCE_AIO events. At the same time the process can submit
1203 * asynchronous I/Os transactions which are not associated with the
1205 * The current process oriented model of AIO uses a sigle queue for
1206 * pending events. On close the pending queue (queue of asynchronous
1207 * I/O transactions using event port notification) must be scanned
1208 * to detect and handle pending I/Os using the current port.
1210 mutex_enter(&aiop
->aio_portq_mutex
);
1211 mutex_enter(&aiop
->aio_mutex
);
1213 if ((headp
= aiop
->aio_portpending
) != NULL
) {
1216 if (reqp
->aio_req_portkev
&&
1217 reqp
->aio_req_port
== port
) {
1218 reqp
->aio_req_flags
|= AIO_CLOSE_PORT
;
1221 } while ((reqp
= reqp
->aio_req_next
) != headp
);
1224 /* no AIOs pending */
1225 mutex_exit(&aiop
->aio_mutex
);
1226 mutex_exit(&aiop
->aio_portq_mutex
);
1229 aiop
->aio_portpendcnt
+= counter
;
1230 mutex_exit(&aiop
->aio_mutex
);
1231 while (aiop
->aio_portpendcnt
)
1232 cv_wait(&aiop
->aio_portcv
, &aiop
->aio_portq_mutex
);
1235 * all pending AIOs are completed.
1239 if ((reqp
= aiop
->aio_portq
) != NULL
) {
1241 next
= reqp
->aio_req_next
;
1242 if (reqp
->aio_req_port
== port
) {
1243 /* dequeue request and discard event */
1244 aio_req_remove_portq(aiop
, reqp
);
1245 port_free_event(reqp
->aio_req_portkev
);
1246 /* put request in temporary queue */
1247 reqp
->aio_req_next
= headp
;
1250 } while ((reqp
= next
) != aiop
->aio_portq
);
1252 mutex_exit(&aiop
->aio_portq_mutex
);
1254 /* headp points to the list of requests to be discarded */
1255 for (reqp
= headp
; reqp
!= NULL
; reqp
= next
) {
1256 next
= reqp
->aio_req_next
;
1257 aphysio_unlock(reqp
);
1258 mutex_enter(&aiop
->aio_mutex
);
1259 aio_req_free_port(aiop
, reqp
);
1260 mutex_exit(&aiop
->aio_mutex
);
1263 if (aiop
->aio_flags
& AIO_CLEANUP
)
1264 cv_broadcast(&aiop
->aio_waitcv
);
1268 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1269 * to kick start the aio_cleanup_thread for the give process to do the
1270 * necessary cleanup.
1271 * This is needed so that delete_memory_thread can obtain writer locks
1272 * on pages that need to be relocated during a dr memory delete operation,
1273 * otherwise a deadly embrace may occur.
1276 aio_cleanup_dr_delete_memory(proc_t
*procp
)
1278 struct aio
*aiop
= procp
->p_aio
;
1279 struct as
*as
= procp
->p_as
;
1282 ASSERT(MUTEX_HELD(&procp
->p_lock
));
1284 mutex_enter(&as
->a_contents
);
1287 aiop
->aio_rqclnup
= 1;
1288 cv_broadcast(&as
->a_cv
);
1291 mutex_exit(&as
->a_contents
);