4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Kernel asynchronous I/O.
29 * This is only for raw devices now (as of Nov. 1993).
32 #include <sys/types.h>
33 #include <sys/errno.h>
36 #include <sys/fs/snode.h>
37 #include <sys/unistd.h>
38 #include <sys/cmn_err.h>
40 #include <vm/faultcode.h>
41 #include <sys/sysmacros.h>
42 #include <sys/procfs.h>
44 #include <sys/autoconf.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/sunddi.h>
47 #include <sys/aio_impl.h>
48 #include <sys/debug.h>
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/vmsystm.h>
52 #include <sys/contract/process_impl.h>
55 * external entry point.
58 static int64_t kaioc(long, long, long, long, long, long);
60 static int kaio(ulong_t
*, rval_t
*);
65 #define AIO_LARGEFILE 2
68 * implementation specific functions (private)
71 static int alio(int, aiocb_t
**, int, struct sigevent
*);
73 static int aionotify(void);
74 static int aioinit(void);
75 static int aiostart(void);
76 static void alio_cleanup(aio_t
*, aiocb_t
**, int, int);
77 static int (*check_vp(struct vnode
*, int))(vnode_t
*, struct aio_req
*,
79 static void lio_set_error(aio_req_t
*, int portused
);
80 static aio_t
*aio_aiop_alloc();
81 static int aio_req_alloc(aio_req_t
**, aio_result_t
*);
82 static int aio_lio_alloc(aio_lio_t
**);
83 static aio_req_t
*aio_req_done(void *);
84 static aio_req_t
*aio_req_remove(aio_req_t
*);
85 static int aio_req_find(aio_result_t
*, aio_req_t
**);
86 static int aio_hash_insert(struct aio_req_t
*, aio_t
*);
87 static int aio_req_setup(aio_req_t
**, aio_t
*, aiocb_t
*,
88 aio_result_t
*, vnode_t
*, int);
89 static int aio_cleanup_thread(aio_t
*);
90 static aio_lio_t
*aio_list_get(aio_result_t
*);
91 static void lio_set_uerror(void *, int);
92 extern void aio_zerolen(aio_req_t
*);
93 static int aiowait(struct timeval
*, int, long *);
94 static int aiowaitn(void *, uint_t
, uint_t
*, timespec_t
*);
95 static int aio_unlock_requests(caddr_t iocblist
, int iocb_index
,
96 aio_req_t
*reqlist
, aio_t
*aiop
, model_t model
);
97 static int aio_reqlist_concat(aio_t
*aiop
, aio_req_t
**reqlist
, int max
);
98 static int aiosuspend(void *, int, struct timespec
*, int,
100 static int aliowait(int, void *, int, void *, int);
101 static int aioerror(void *, int);
102 static int aio_cancel(int, void *, long *, int);
103 static int arw(int, int, char *, int, offset_t
, aio_result_t
*, int);
104 static int aiorw(int, void *, int, int);
106 static int alioLF(int, void *, int, void *);
107 static int aio_req_setupLF(aio_req_t
**, aio_t
*, aiocb64_32_t
*,
108 aio_result_t
*, vnode_t
*, int);
109 static int alio32(int, void *, int, void *);
110 static int driver_aio_write(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
);
111 static int driver_aio_read(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
);
113 #ifdef _SYSCALL32_IMPL
114 static void aiocb_LFton(aiocb64_32_t
*, aiocb_t
*);
115 void aiocb_32ton(aiocb32_t
*, aiocb_t
*);
116 #endif /* _SYSCALL32_IMPL */
119 * implementation specific functions (external)
121 void aio_req_free(aio_t
*, aio_req_t
*);
124 * Event Port framework
127 void aio_req_free_port(aio_t
*, aio_req_t
*);
128 static int aio_port_callback(void *, int *, pid_t
, int, void *);
131 * This is the loadable module wrapper.
133 #include <sys/modctl.h>
134 #include <sys/syscall.h>
138 static struct sysent kaio_sysent
= {
140 SE_NOUNLOAD
| SE_64RVAL
| SE_ARGC
,
144 #ifdef _SYSCALL32_IMPL
145 static struct sysent kaio_sysent32
= {
147 SE_NOUNLOAD
| SE_64RVAL
,
150 #endif /* _SYSCALL32_IMPL */
154 static struct sysent kaio_sysent
= {
156 SE_NOUNLOAD
| SE_32RVAL1
,
163 * Module linkage information for the kernel.
166 static struct modlsys modlsys
= {
172 #ifdef _SYSCALL32_IMPL
173 static struct modlsys modlsys32
= {
175 "kernel Async I/O for 32 bit compatibility",
178 #endif /* _SYSCALL32_IMPL */
181 static struct modlinkage modlinkage
= {
184 #ifdef _SYSCALL32_IMPL
195 if ((retval
= mod_install(&modlinkage
)) != 0)
206 retval
= mod_remove(&modlinkage
);
212 _info(struct modinfo
*modinfop
)
214 return (mod_info(&modlinkage
, modinfop
));
230 switch ((int)a0
& ~AIO_POLL_BIT
) {
232 error
= arw((int)a0
, (int)a1
, (char *)a2
, (int)a3
,
233 (offset_t
)a4
, (aio_result_t
*)a5
, FREAD
);
236 error
= arw((int)a0
, (int)a1
, (char *)a2
, (int)a3
,
237 (offset_t
)a4
, (aio_result_t
*)a5
, FWRITE
);
240 error
= aiowait((struct timeval
*)a1
, (int)a2
, &rval
);
243 error
= aiowaitn((void *)a1
, (uint_t
)a2
, (uint_t
*)a3
,
256 error
= alio((int)a1
, (aiocb_t
**)a2
, (int)a3
,
257 (struct sigevent
*)a4
);
260 error
= aliowait((int)a1
, (void *)a2
, (int)a3
,
261 (struct sigevent
*)a4
, AIO_64
);
264 error
= aiosuspend((void *)a1
, (int)a2
, (timespec_t
*)a3
,
265 (int)a4
, &rval
, AIO_64
);
268 error
= aioerror((void *)a1
, AIO_64
);
271 error
= aiorw((int)a0
, (void *)a1
, FREAD
, AIO_64
);
274 error
= aiorw((int)a0
, (void *)a1
, FWRITE
, AIO_64
);
277 error
= aio_cancel((int)a1
, (void *)a2
, &rval
, AIO_64
);
281 * The large file related stuff is valid only for
282 * 32 bit kernel and not for 64 bit kernel
283 * On 64 bit kernel we convert large file calls
284 * to regular 64bit calls.
291 return ((int64_t)set_errno(error
));
307 #if defined(_LITTLE_ENDIAN)
308 off
= ((uoff_t
)uap
[5] << 32) | (uoff_t
)uap
[4];
310 off
= ((uoff_t
)uap
[4] << 32) | (uoff_t
)uap
[5];
313 switch (uap
[0] & ~AIO_POLL_BIT
) {
315 * It must be the 32 bit system call on 64 bit kernel
318 return (arw((int)uap
[0], (int)uap
[1], (char *)uap
[2],
319 (int)uap
[3], off
, (aio_result_t
*)uap
[6], FREAD
));
321 return (arw((int)uap
[0], (int)uap
[1], (char *)uap
[2],
322 (int)uap
[3], off
, (aio_result_t
*)uap
[6], FWRITE
));
324 error
= aiowait((struct timeval
*)uap
[1], (int)uap
[2],
328 error
= aiowaitn((void *)uap
[1], (uint_t
)uap
[2],
329 (uint_t
*)uap
[3], (timespec_t
*)uap
[4]);
332 return (aionotify());
338 return (alio32((int)uap
[1], (void *)uap
[2], (int)uap
[3],
341 return (aliowait((int)uap
[1], (void *)uap
[2],
342 (int)uap
[3], (struct sigevent
*)uap
[4], AIO_32
));
344 error
= aiosuspend((void *)uap
[1], (int)uap
[2],
345 (timespec_t
*)uap
[3], (int)uap
[4],
349 return (aioerror((void *)uap
[1], AIO_32
));
351 return (aiorw((int)uap
[0], (void *)uap
[1],
354 return (aiorw((int)uap
[0], (void *)uap
[1],
357 error
= (aio_cancel((int)uap
[1], (void *)uap
[2], &rval
,
361 return (alioLF((int)uap
[1], (void *)uap
[2],
362 (int)uap
[3], (void *)uap
[4]));
364 return (aliowait(uap
[1], (void *)uap
[2],
365 (int)uap
[3], (void *)uap
[4], AIO_LARGEFILE
));
367 error
= aiosuspend((void *)uap
[1], (int)uap
[2],
368 (timespec_t
*)uap
[3], (int)uap
[4], &rval
,
372 return (aioerror((void *)uap
[1], AIO_LARGEFILE
));
374 return (aiorw((int)uap
[0], (void *)uap
[1], FREAD
,
377 return (aiorw((int)uap
[0], (void *)uap
[1], FWRITE
,
380 error
= (aio_cancel((int)uap
[1], (void *)uap
[2],
381 &rval
, AIO_LARGEFILE
));
392 * wake up LWPs in this process that are sleeping in
400 aiop
= curproc
->p_aio
;
404 mutex_enter(&aiop
->aio_mutex
);
405 aiop
->aio_notifycnt
++;
406 cv_broadcast(&aiop
->aio_waitcv
);
407 mutex_exit(&aiop
->aio_mutex
);
413 timeval2reltime(struct timeval
*timout
, timestruc_t
*rqtime
,
414 timestruc_t
**rqtp
, int *blocking
)
416 #ifdef _SYSCALL32_IMPL
417 struct timeval32 wait_time_32
;
419 struct timeval wait_time
;
420 model_t model
= get_udatamodel();
423 if (timout
== NULL
) { /* wait indefinitely */
429 * Need to correctly compare with the -1 passed in for a user
430 * address pointer, with both 32 bit and 64 bit apps.
432 if (model
== DATAMODEL_NATIVE
) {
433 if ((intptr_t)timout
== (intptr_t)-1) { /* don't wait */
438 if (copyin(timout
, &wait_time
, sizeof (wait_time
)))
441 #ifdef _SYSCALL32_IMPL
444 * -1 from a 32bit app. It will not get sign extended.
447 if ((intptr_t)timout
== (intptr_t)((uint32_t)-1)) {
452 if (copyin(timout
, &wait_time_32
, sizeof (wait_time_32
)))
454 TIMEVAL32_TO_TIMEVAL(&wait_time
, &wait_time_32
);
456 #endif /* _SYSCALL32_IMPL */
458 if (wait_time
.tv_sec
== 0 && wait_time
.tv_usec
== 0) { /* don't wait */
463 if (wait_time
.tv_sec
< 0 ||
464 wait_time
.tv_usec
< 0 || wait_time
.tv_usec
>= MICROSEC
)
467 rqtime
->tv_sec
= wait_time
.tv_sec
;
468 rqtime
->tv_nsec
= wait_time
.tv_usec
* 1000;
476 timespec2reltime(timespec_t
*timout
, timestruc_t
*rqtime
,
477 timestruc_t
**rqtp
, int *blocking
)
479 #ifdef _SYSCALL32_IMPL
480 timespec32_t wait_time_32
;
482 model_t model
= get_udatamodel();
485 if (timout
== NULL
) {
490 if (model
== DATAMODEL_NATIVE
) {
491 if (copyin(timout
, rqtime
, sizeof (*rqtime
)))
494 #ifdef _SYSCALL32_IMPL
496 if (copyin(timout
, &wait_time_32
, sizeof (wait_time_32
)))
498 TIMESPEC32_TO_TIMESPEC(rqtime
, &wait_time_32
);
500 #endif /* _SYSCALL32_IMPL */
502 if (rqtime
->tv_sec
== 0 && rqtime
->tv_nsec
== 0) {
507 if (rqtime
->tv_sec
< 0 ||
508 rqtime
->tv_nsec
< 0 || rqtime
->tv_nsec
>= NANOSEC
)
520 struct timeval
*timout
,
533 aiop
= curproc
->p_aio
;
538 * Establish the absolute future time for the timeout.
540 error
= timeval2reltime(timout
, &rqtime
, &rqtp
, &blocking
);
545 timecheck
= timechanged
;
547 timespecadd(rqtp
, &now
);
550 mutex_enter(&aiop
->aio_mutex
);
552 /* process requests on poll queue */
553 if (aiop
->aio_pollq
) {
554 mutex_exit(&aiop
->aio_mutex
);
556 mutex_enter(&aiop
->aio_mutex
);
558 if ((reqp
= aio_req_remove(NULL
)) != NULL
) {
559 *rval
= (long)reqp
->aio_req_resultp
;
562 /* user-level done queue might not be empty */
563 if (aiop
->aio_notifycnt
> 0) {
564 aiop
->aio_notifycnt
--;
568 /* don't block if no outstanding aio */
569 if (aiop
->aio_outstanding
== 0 && dontblockflg
) {
574 status
= cv_waituntil_sig(&aiop
->aio_waitcv
,
575 &aiop
->aio_mutex
, rqtp
, timecheck
);
577 if (status
> 0) /* check done queue again */
579 if (status
== 0) { /* interrupted by a signal */
582 } else { /* timer expired */
588 mutex_exit(&aiop
->aio_mutex
);
590 aphysio_unlock(reqp
);
591 aio_copyout_result(reqp
);
592 mutex_enter(&aiop
->aio_mutex
);
593 aio_req_free(aiop
, reqp
);
594 mutex_exit(&aiop
->aio_mutex
);
600 * aiowaitn can be used to reap completed asynchronous requests submitted with
601 * lio_listio, aio_read or aio_write.
602 * This function only reaps asynchronous raw I/Os.
607 aiowaitn(void *uiocb
, uint_t nent
, uint_t
*nwait
, timespec_t
*timout
)
611 aio_req_t
*reqlist
= NULL
;
612 caddr_t iocblist
= NULL
; /* array of iocb ptr's */
613 uint_t waitcnt
, cnt
= 0; /* iocb cnt */
614 size_t iocbsz
; /* users iocb size */
615 size_t riocbsz
; /* returned iocb size */
617 model_t model
= get_udatamodel();
623 aiop
= curproc
->p_aio
;
624 if (aiop
== NULL
|| nent
== 0 || nent
> _AIO_LISTIO_MAX
)
627 if (aiop
->aio_outstanding
== 0)
630 if (copyin(nwait
, &waitcnt
, sizeof (uint_t
)))
633 /* set *nwait to zero, if we must return prematurely */
634 if (copyout(&cnt
, nwait
, sizeof (uint_t
)))
642 error
= timespec2reltime(timout
, &rqtime
, &rqtp
, &blocking
);
647 if (model
== DATAMODEL_NATIVE
)
648 iocbsz
= (sizeof (aiocb_t
*) * nent
);
649 #ifdef _SYSCALL32_IMPL
651 iocbsz
= (sizeof (caddr32_t
) * nent
);
652 #endif /* _SYSCALL32_IMPL */
655 * Only one aio_waitn call is allowed at a time.
656 * The active aio_waitn will collect all requests
657 * out of the "done" list and if necessary it will wait
658 * for some/all pending requests to fulfill the nwait
660 * A second or further aio_waitn calls will sleep here
661 * until the active aio_waitn finishes and leaves the kernel
662 * If the second call does not block (poll), then return
663 * immediately with the error code : EAGAIN.
664 * If the second call should block, then sleep here, but
665 * do not touch the timeout. The timeout starts when this
666 * aio_waitn-call becomes active.
669 mutex_enter(&aiop
->aio_mutex
);
671 while (aiop
->aio_flags
& AIO_WAITN
) {
673 mutex_exit(&aiop
->aio_mutex
);
677 /* block, no timeout */
678 aiop
->aio_flags
|= AIO_WAITN_PENDING
;
679 if (!cv_wait_sig(&aiop
->aio_waitncv
, &aiop
->aio_mutex
)) {
680 mutex_exit(&aiop
->aio_mutex
);
686 * Establish the absolute future time for the timeout.
690 timecheck
= timechanged
;
692 timespecadd(rqtp
, &now
);
695 if (iocbsz
> aiop
->aio_iocbsz
&& aiop
->aio_iocb
!= NULL
) {
696 kmem_free(aiop
->aio_iocb
, aiop
->aio_iocbsz
);
697 aiop
->aio_iocb
= NULL
;
700 if (aiop
->aio_iocb
== NULL
) {
701 iocblist
= kmem_zalloc(iocbsz
, KM_NOSLEEP
);
702 if (iocblist
== NULL
) {
703 mutex_exit(&aiop
->aio_mutex
);
706 aiop
->aio_iocb
= (aiocb_t
**)iocblist
;
707 aiop
->aio_iocbsz
= iocbsz
;
709 iocblist
= (char *)aiop
->aio_iocb
;
712 aiop
->aio_waitncnt
= waitcnt
;
713 aiop
->aio_flags
|= AIO_WAITN
;
716 /* push requests on poll queue to done queue */
717 if (aiop
->aio_pollq
) {
718 mutex_exit(&aiop
->aio_mutex
);
720 mutex_enter(&aiop
->aio_mutex
);
723 /* check for requests on done queue */
724 if (aiop
->aio_doneq
) {
725 cnt
+= aio_reqlist_concat(aiop
, &reqlist
, nent
- cnt
);
726 aiop
->aio_waitncnt
= waitcnt
- cnt
;
729 /* user-level done queue might not be empty */
730 if (aiop
->aio_notifycnt
> 0) {
731 aiop
->aio_notifycnt
--;
737 * if we are here second time as a result of timer
738 * expiration, we reset error if there are enough
739 * aiocb's to satisfy request.
740 * We return also if all requests are already done
741 * and we picked up the whole done queue.
744 if ((cnt
>= waitcnt
) || (cnt
> 0 && aiop
->aio_pending
== 0 &&
745 aiop
->aio_doneq
== NULL
)) {
750 if ((cnt
< waitcnt
) && blocking
) {
751 int rval
= cv_waituntil_sig(&aiop
->aio_waitcv
,
752 &aiop
->aio_mutex
, rqtp
, timecheck
);
765 mutex_exit(&aiop
->aio_mutex
);
769 iocb_index
= aio_unlock_requests(iocblist
, iocb_index
, reqlist
,
772 if (model
== DATAMODEL_NATIVE
)
773 riocbsz
= (sizeof (aiocb_t
*) * cnt
);
774 #ifdef _SYSCALL32_IMPL
776 riocbsz
= (sizeof (caddr32_t
) * cnt
);
777 #endif /* _SYSCALL32_IMPL */
779 if (copyout(iocblist
, uiocb
, riocbsz
) ||
780 copyout(&cnt
, nwait
, sizeof (uint_t
)))
784 /* check if there is another thread waiting for execution */
785 mutex_enter(&aiop
->aio_mutex
);
786 aiop
->aio_flags
&= ~AIO_WAITN
;
787 if (aiop
->aio_flags
& AIO_WAITN_PENDING
) {
788 aiop
->aio_flags
&= ~AIO_WAITN_PENDING
;
789 cv_signal(&aiop
->aio_waitncv
);
791 mutex_exit(&aiop
->aio_mutex
);
797 * aio_unlock_requests
798 * copyouts the result of the request as well as the return value.
799 * It builds the list of completed asynchronous requests,
800 * unlocks the allocated memory ranges and
801 * put the aio request structure back into the free list.
812 aio_req_t
*reqp
, *nreqp
;
814 if (model
== DATAMODEL_NATIVE
) {
815 for (reqp
= reqlist
; reqp
!= NULL
; reqp
= nreqp
) {
816 (((caddr_t
*)iocblist
)[iocb_index
++]) =
817 reqp
->aio_req_iocb
.iocb
;
818 nreqp
= reqp
->aio_req_next
;
819 aphysio_unlock(reqp
);
820 aio_copyout_result(reqp
);
821 mutex_enter(&aiop
->aio_mutex
);
822 aio_req_free(aiop
, reqp
);
823 mutex_exit(&aiop
->aio_mutex
);
826 #ifdef _SYSCALL32_IMPL
828 for (reqp
= reqlist
; reqp
!= NULL
; reqp
= nreqp
) {
829 ((caddr32_t
*)iocblist
)[iocb_index
++] =
830 reqp
->aio_req_iocb
.iocb32
;
831 nreqp
= reqp
->aio_req_next
;
832 aphysio_unlock(reqp
);
833 aio_copyout_result(reqp
);
834 mutex_enter(&aiop
->aio_mutex
);
835 aio_req_free(aiop
, reqp
);
836 mutex_exit(&aiop
->aio_mutex
);
839 #endif /* _SYSCALL32_IMPL */
845 * moves "max" elements from the done queue to the reqlist queue and removes
846 * the AIO_DONEQ flag.
847 * - reqlist queue is a simple linked list
848 * - done queue is a double linked list
852 aio_reqlist_concat(aio_t
*aiop
, aio_req_t
**reqlist
, int max
)
854 aio_req_t
*q2
, *q2work
, *list
;
858 q2
= aiop
->aio_doneq
;
861 q2work
->aio_req_flags
&= ~AIO_DONEQ
;
862 q2work
= q2work
->aio_req_next
;
869 /* all elements revised */
870 q2
->aio_req_prev
->aio_req_next
= list
;
872 aiop
->aio_doneq
= NULL
;
875 * max < elements in the doneq
876 * detach only the required amount of elements
879 q2work
->aio_req_prev
->aio_req_next
= list
;
882 aiop
->aio_doneq
= q2work
;
883 q2work
->aio_req_prev
= q2
->aio_req_prev
;
884 q2
->aio_req_prev
->aio_req_next
= q2work
;
895 struct timespec
*timout
,
902 aio_req_t
*reqp
, *found
, *next
;
903 caddr_t cbplist
= NULL
;
904 aiocb_t
*cbp
, **ucbp
;
905 #ifdef _SYSCALL32_IMPL
908 #endif /* _SYSCALL32_IMPL */
913 model_t model
= get_udatamodel();
919 aiop
= curproc
->p_aio
;
920 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
924 * Establish the absolute future time for the timeout.
926 error
= timespec2reltime(timout
, &rqtime
, &rqtp
, &blocking
);
931 timecheck
= timechanged
;
933 timespecadd(rqtp
, &now
);
937 * If we are not blocking and there's no IO complete
940 if (!blocking
&& (aiop
->aio_pollq
== NULL
) &&
941 (aiop
->aio_doneq
== NULL
)) {
945 if (model
== DATAMODEL_NATIVE
)
946 ssize
= (sizeof (aiocb_t
*) * nent
);
947 #ifdef _SYSCALL32_IMPL
949 ssize
= (sizeof (caddr32_t
) * nent
);
950 #endif /* _SYSCALL32_IMPL */
952 cbplist
= kmem_alloc(ssize
, KM_NOSLEEP
);
956 if (copyin(aiocb
, cbplist
, ssize
)) {
963 * we need to get the aio_cleanupq_mutex since we call
966 mutex_enter(&aiop
->aio_cleanupq_mutex
);
967 mutex_enter(&aiop
->aio_mutex
);
969 /* push requests on poll queue to done queue */
970 if (aiop
->aio_pollq
) {
971 mutex_exit(&aiop
->aio_mutex
);
972 mutex_exit(&aiop
->aio_cleanupq_mutex
);
974 mutex_enter(&aiop
->aio_cleanupq_mutex
);
975 mutex_enter(&aiop
->aio_mutex
);
977 /* check for requests on done queue */
978 if (aiop
->aio_doneq
) {
979 if (model
== DATAMODEL_NATIVE
)
980 ucbp
= (aiocb_t
**)cbplist
;
981 #ifdef _SYSCALL32_IMPL
983 ucbp32
= (caddr32_t
*)cbplist
;
984 #endif /* _SYSCALL32_IMPL */
985 for (i
= 0; i
< nent
; i
++) {
986 if (model
== DATAMODEL_NATIVE
) {
987 if ((cbp
= *ucbp
++) == NULL
)
989 if (run_mode
!= AIO_LARGEFILE
)
993 cbp64
= (aiocb64_32_t
*)cbp
;
995 &cbp64
->aio_resultp
);
998 #ifdef _SYSCALL32_IMPL
1000 if (run_mode
== AIO_32
) {
1002 (aiocb32_t
*)(uintptr_t)
1005 reqp
= aio_req_done(
1006 &cbp32
->aio_resultp
);
1007 } else if (run_mode
== AIO_LARGEFILE
) {
1009 (aiocb64_32_t
*)(uintptr_t)
1012 reqp
= aio_req_done(
1013 &cbp64
->aio_resultp
);
1017 #endif /* _SYSCALL32_IMPL */
1019 reqp
->aio_req_next
= found
;
1022 if (aiop
->aio_doneq
== NULL
)
1028 if (aiop
->aio_notifycnt
> 0) {
1030 * nothing on the kernel's queue. the user
1031 * has notified the kernel that it has items
1032 * on a user-level queue.
1034 aiop
->aio_notifycnt
--;
1039 /* don't block if nothing is outstanding */
1040 if (aiop
->aio_outstanding
== 0) {
1046 * drop the aio_cleanupq_mutex as we are
1049 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1050 rv
= cv_waituntil_sig(&aiop
->aio_waitcv
,
1051 &aiop
->aio_mutex
, rqtp
, timecheck
);
1053 * we have to drop aio_mutex and
1054 * grab it in the right order.
1056 mutex_exit(&aiop
->aio_mutex
);
1057 mutex_enter(&aiop
->aio_cleanupq_mutex
);
1058 mutex_enter(&aiop
->aio_mutex
);
1059 if (rv
> 0) /* check done queue again */
1061 if (rv
== 0) /* interrupted by a signal */
1063 else /* timer expired */
1070 mutex_exit(&aiop
->aio_mutex
);
1071 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1072 for (reqp
= found
; reqp
!= NULL
; reqp
= next
) {
1073 next
= reqp
->aio_req_next
;
1074 aphysio_unlock(reqp
);
1075 aio_copyout_result(reqp
);
1076 mutex_enter(&aiop
->aio_mutex
);
1077 aio_req_free(aiop
, reqp
);
1078 mutex_exit(&aiop
->aio_mutex
);
1081 kmem_free(cbplist
, ssize
);
1086 * initialize aio by allocating an aio_t struct for this
1092 proc_t
*p
= curproc
;
1094 mutex_enter(&p
->p_lock
);
1095 if ((aiop
= p
->p_aio
) == NULL
) {
1096 aiop
= aio_aiop_alloc();
1099 mutex_exit(&p
->p_lock
);
1106 * start a special thread that will cleanup after aio requests
1107 * that are preventing a segment from being unmapped. as_unmap()
1108 * blocks until all phsyio to this segment is completed. this
1109 * doesn't happen until all the pages in this segment are not
1110 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1111 * requests still outstanding. this special thread will make sure
1112 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1114 * this function will return an error if the process has only
1115 * one LWP. the assumption is that the caller is a separate LWP
1116 * that remains blocked in the kernel for the life of this process.
1121 proc_t
*p
= curproc
;
1123 int first
, error
= 0;
1125 if (p
->p_lwpcnt
== 1)
1127 mutex_enter(&p
->p_lock
);
1128 if ((aiop
= p
->p_aio
) == NULL
)
1131 first
= aiop
->aio_ok
;
1132 if (aiop
->aio_ok
== 0)
1135 mutex_exit(&p
->p_lock
);
1136 if (error
== 0 && first
== 0) {
1137 return (aio_cleanup_thread(aiop
));
1138 /* should return only to exit */
1144 * Associate an aiocb with a port.
1145 * This function is used by aiorw() to associate a transaction with a port.
1146 * Allocate an event port structure (port_alloc_event()) and store the
1147 * delivered user pointer (portnfy_user) in the portkev_user field of the
1148 * port_kevent_t structure..
1149 * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1150 * the port association.
1154 aio_req_assoc_port_rw(port_notify_t
*pntfy
, aiocb_t
*cbp
,
1155 aio_req_t
*reqp
, int event
)
1157 port_kevent_t
*pkevp
= NULL
;
1160 error
= port_alloc_event(pntfy
->portnfy_port
, PORT_ALLOC_DEFAULT
,
1161 PORT_SOURCE_AIO
, &pkevp
);
1163 if ((error
== ENOMEM
) || (error
== EAGAIN
))
1168 port_init_event(pkevp
, (uintptr_t)cbp
, pntfy
->portnfy_user
,
1169 aio_port_callback
, reqp
);
1170 pkevp
->portkev_events
= event
;
1171 reqp
->aio_req_portkev
= pkevp
;
1172 reqp
->aio_req_port
= pntfy
->portnfy_port
;
1180 * Asynchronous list IO. A chain of aiocb's are copied in
1181 * one at a time. If the aiocb is invalid, it is skipped.
1182 * For each aiocb, the appropriate driver entry point is
1183 * called. Optimize for the common case where the list
1184 * of requests is to the same file descriptor.
1186 * One possible optimization is to define a new driver entry
1187 * point that supports a list of IO requests. Whether this
1188 * improves performance depends somewhat on the driver's
1189 * locking strategy. Processing a list could adversely impact
1190 * the driver's interrupt latency.
1195 aiocb_t
**aiocb_arg
,
1197 struct sigevent
*sigev
)
1200 file_t
*prev_fp
= NULL
;
1208 aiocb_t
*aiocb
= &cb
;
1211 struct sigevent sigevk
;
1220 int aio_notsupported
= 0;
1224 port_kevent_t
*pkevtp
= NULL
;
1226 port_notify_t pnotify
;
1229 aiop
= curproc
->p_aio
;
1230 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
1233 ssize
= (sizeof (aiocb_t
*) * nent
);
1234 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
1235 ucbp
= (aiocb_t
**)cbplist
;
1237 if (copyin(aiocb_arg
, cbplist
, ssize
) ||
1238 (sigev
&& copyin(sigev
, &sigevk
, sizeof (struct sigevent
)))) {
1239 kmem_free(cbplist
, ssize
);
1245 (sigevk
.sigev_notify
== SIGEV_THREAD
||
1246 sigevk
.sigev_notify
== SIGEV_PORT
)) {
1247 if (sigevk
.sigev_notify
== SIGEV_THREAD
) {
1248 pnotify
.portnfy_port
= sigevk
.sigev_signo
;
1249 pnotify
.portnfy_user
= sigevk
.sigev_value
.sival_ptr
;
1250 } else if (copyin(sigevk
.sigev_value
.sival_ptr
,
1251 &pnotify
, sizeof (pnotify
))) {
1252 kmem_free(cbplist
, ssize
);
1255 error
= port_alloc_event(pnotify
.portnfy_port
,
1256 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
, &pkevtp
);
1258 if (error
== ENOMEM
|| error
== EAGAIN
)
1262 kmem_free(cbplist
, ssize
);
1265 lio_head_port
= pnotify
.portnfy_port
;
1270 * a list head should be allocated if notification is
1271 * enabled for this list.
1275 if (mode_arg
== LIO_WAIT
|| sigev
) {
1276 mutex_enter(&aiop
->aio_mutex
);
1277 error
= aio_lio_alloc(&head
);
1278 mutex_exit(&aiop
->aio_mutex
);
1282 head
->lio_nent
= nent
;
1283 head
->lio_refcnt
= nent
;
1284 head
->lio_port
= -1;
1285 head
->lio_portkev
= NULL
;
1286 if (sigev
&& sigevk
.sigev_notify
== SIGEV_SIGNAL
&&
1287 sigevk
.sigev_signo
> 0 && sigevk
.sigev_signo
< NSIG
) {
1288 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
1293 sqp
->sq_func
= NULL
;
1294 sqp
->sq_next
= NULL
;
1295 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
1296 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
1297 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
1298 sqp
->sq_info
.si_zoneid
= getzoneid();
1299 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
1300 sqp
->sq_info
.si_signo
= sigevk
.sigev_signo
;
1301 sqp
->sq_info
.si_value
= sigevk
.sigev_value
;
1302 head
->lio_sigqp
= sqp
;
1304 head
->lio_sigqp
= NULL
;
1308 * Prepare data to send when list of aiocb's
1311 port_init_event(pkevtp
, (uintptr_t)sigev
,
1312 (void *)(uintptr_t)pnotify
.portnfy_user
,
1314 pkevtp
->portkev_events
= AIOLIO
;
1315 head
->lio_portkev
= pkevtp
;
1316 head
->lio_port
= pnotify
.portnfy_port
;
1320 for (i
= 0; i
< nent
; i
++, ucbp
++) {
1323 /* skip entry if it can't be copied. */
1324 if (cbp
== NULL
|| copyin(cbp
, aiocb
, sizeof (*aiocb
))) {
1326 mutex_enter(&aiop
->aio_mutex
);
1329 mutex_exit(&aiop
->aio_mutex
);
1334 /* skip if opcode for aiocb is LIO_NOP */
1335 mode
= aiocb
->aio_lio_opcode
;
1336 if (mode
== LIO_NOP
) {
1339 mutex_enter(&aiop
->aio_mutex
);
1342 mutex_exit(&aiop
->aio_mutex
);
1347 /* increment file descriptor's ref count. */
1348 if ((fp
= getf(aiocb
->aio_fildes
)) == NULL
) {
1349 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
1351 mutex_enter(&aiop
->aio_mutex
);
1354 mutex_exit(&aiop
->aio_mutex
);
1361 * check the permission of the partition
1363 if ((fp
->f_flag
& mode
) == 0) {
1364 releasef(aiocb
->aio_fildes
);
1365 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
1367 mutex_enter(&aiop
->aio_mutex
);
1370 mutex_exit(&aiop
->aio_mutex
);
1377 * common case where requests are to the same fd
1378 * for the same r/w operation.
1379 * for UFS, need to set EBADFD
1382 if (fp
!= prev_fp
|| mode
!= prev_mode
) {
1383 aio_func
= check_vp(vp
, mode
);
1384 if (aio_func
== NULL
) {
1386 releasef(aiocb
->aio_fildes
);
1387 lio_set_uerror(&cbp
->aio_resultp
, EBADFD
);
1390 mutex_enter(&aiop
->aio_mutex
);
1393 mutex_exit(&aiop
->aio_mutex
);
1402 error
= aio_req_setup(&reqp
, aiop
, aiocb
,
1403 &cbp
->aio_resultp
, vp
, 0);
1405 releasef(aiocb
->aio_fildes
);
1406 lio_set_uerror(&cbp
->aio_resultp
, error
);
1408 mutex_enter(&aiop
->aio_mutex
);
1411 mutex_exit(&aiop
->aio_mutex
);
1417 reqp
->aio_req_lio
= head
;
1421 * Set the errno field now before sending the request to
1422 * the driver to avoid a race condition
1424 (void) suword32(&cbp
->aio_resultp
.aio_errno
,
1427 reqp
->aio_req_iocb
.iocb
= (caddr_t
)cbp
;
1429 event
= (mode
== LIO_READ
)? AIOAREAD
: AIOAWRITE
;
1430 aio_port
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_PORT
);
1431 aio_thread
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_THREAD
);
1432 if (aio_port
| aio_thread
) {
1433 port_kevent_t
*lpkevp
;
1435 * Prepare data to send with each aiocb completed.
1439 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
1440 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
1442 } else { /* aio_thread */
1443 pnotify
.portnfy_port
=
1444 aiocb
->aio_sigevent
.sigev_signo
;
1445 pnotify
.portnfy_user
=
1446 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
1450 else if (pkevtp
!= NULL
&&
1451 pnotify
.portnfy_port
== lio_head_port
)
1452 error
= port_dup_event(pkevtp
, &lpkevp
,
1453 PORT_ALLOC_DEFAULT
);
1455 error
= port_alloc_event(pnotify
.portnfy_port
,
1456 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
,
1459 port_init_event(lpkevp
, (uintptr_t)cbp
,
1460 (void *)(uintptr_t)pnotify
.portnfy_user
,
1461 aio_port_callback
, reqp
);
1462 lpkevp
->portkev_events
= event
;
1463 reqp
->aio_req_portkev
= lpkevp
;
1464 reqp
->aio_req_port
= pnotify
.portnfy_port
;
1469 * send the request to driver.
1472 if (aiocb
->aio_nbytes
== 0) {
1473 clear_active_fd(aiocb
->aio_fildes
);
1477 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
,
1482 * the fd's ref count is not decremented until the IO has
1483 * completed unless there was an error.
1486 releasef(aiocb
->aio_fildes
);
1487 lio_set_uerror(&cbp
->aio_resultp
, error
);
1489 mutex_enter(&aiop
->aio_mutex
);
1492 mutex_exit(&aiop
->aio_mutex
);
1494 if (error
== ENOTSUP
)
1498 lio_set_error(reqp
, portused
);
1500 clear_active_fd(aiocb
->aio_fildes
);
1504 if (aio_notsupported
) {
1506 } else if (aio_errors
) {
1508 * return EIO if any request failed
1513 if (mode_arg
== LIO_WAIT
) {
1514 mutex_enter(&aiop
->aio_mutex
);
1515 while (head
->lio_refcnt
> 0) {
1516 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
1517 mutex_exit(&aiop
->aio_mutex
);
1522 mutex_exit(&aiop
->aio_mutex
);
1523 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, AIO_64
);
1527 kmem_free(cbplist
, ssize
);
1529 if (head
->lio_sigqp
)
1530 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
1531 if (head
->lio_portkev
)
1532 port_free_event(head
->lio_portkev
);
1533 kmem_free(head
, sizeof (aio_lio_t
));
1541 * Asynchronous list IO.
1542 * If list I/O is called with LIO_WAIT it can still return
1543 * before all the I/O's are completed if a signal is caught
1544 * or if the list include UFS I/O requests. If this happens,
1545 * libaio will call aliowait() to wait for the I/O's to
1560 aiocb_t
*cbp
, **ucbp
;
1561 #ifdef _SYSCALL32_IMPL
1564 aiocb64_32_t
*cbp64
;
1569 model_t model
= get_udatamodel();
1571 aiop
= curproc
->p_aio
;
1572 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
1575 if (model
== DATAMODEL_NATIVE
)
1576 ssize
= (sizeof (aiocb_t
*) * nent
);
1577 #ifdef _SYSCALL32_IMPL
1579 ssize
= (sizeof (caddr32_t
) * nent
);
1580 #endif /* _SYSCALL32_IMPL */
1585 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
1587 if (model
== DATAMODEL_NATIVE
)
1588 ucbp
= (aiocb_t
**)cbplist
;
1589 #ifdef _SYSCALL32_IMPL
1591 ucbp32
= (caddr32_t
*)cbplist
;
1592 #endif /* _SYSCALL32_IMPL */
1594 if (copyin(aiocb
, cbplist
, ssize
)) {
1600 * To find the list head, we go through the
1601 * list of aiocb structs, find the request
1602 * its for, then get the list head that reqp
1607 for (i
= 0; i
< nent
; i
++) {
1608 if (model
== DATAMODEL_NATIVE
) {
1610 * Since we are only checking for a NULL pointer
1611 * Following should work on both native data sizes
1612 * as well as for largefile aiocb.
1614 if ((cbp
= *ucbp
++) == NULL
)
1616 if (run_mode
!= AIO_LARGEFILE
)
1617 if (head
= aio_list_get(&cbp
->aio_resultp
))
1621 * This is a case when largefile call is
1622 * made on 32 bit kernel.
1623 * Treat each pointer as pointer to
1626 if (head
= aio_list_get((aio_result_t
*)
1627 &(((aiocb64_32_t
*)cbp
)->aio_resultp
)))
1631 #ifdef _SYSCALL32_IMPL
1633 if (run_mode
== AIO_LARGEFILE
) {
1634 if ((cbp64
= (aiocb64_32_t
*)
1635 (uintptr_t)*ucbp32
++) == NULL
)
1637 if (head
= aio_list_get((aio_result_t
*)
1638 &cbp64
->aio_resultp
))
1640 } else if (run_mode
== AIO_32
) {
1641 if ((cbp32
= (aiocb32_t
*)
1642 (uintptr_t)*ucbp32
++) == NULL
)
1644 if (head
= aio_list_get((aio_result_t
*)
1645 &cbp32
->aio_resultp
))
1649 #endif /* _SYSCALL32_IMPL */
1657 mutex_enter(&aiop
->aio_mutex
);
1658 while (head
->lio_refcnt
> 0) {
1659 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
1660 mutex_exit(&aiop
->aio_mutex
);
1665 mutex_exit(&aiop
->aio_mutex
);
1666 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, run_mode
);
1668 kmem_free(cbplist
, ssize
);
1673 aio_list_get(aio_result_t
*resultp
)
1675 aio_lio_t
*head
= NULL
;
1681 aiop
= curproc
->p_aio
;
1686 index
= AIO_HASH(resultp
);
1687 bucket
= &aiop
->aio_hash
[index
];
1688 for (reqp
= *bucket
; reqp
!= NULL
;
1689 reqp
= reqp
->aio_hash_next
) {
1690 if (reqp
->aio_req_resultp
== resultp
) {
1691 head
= reqp
->aio_req_lio
;
1701 lio_set_uerror(void *resultp
, int error
)
1704 * the resultp field is a pointer to where the
1705 * error should be written out to the user's
1709 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1710 (void) sulword(&((aio_result_t
*)resultp
)->aio_return
,
1712 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, error
);
1714 #ifdef _SYSCALL32_IMPL
1716 (void) suword32(&((aio_result32_t
*)resultp
)->aio_return
,
1718 (void) suword32(&((aio_result32_t
*)resultp
)->aio_errno
, error
);
1720 #endif /* _SYSCALL32_IMPL */
1724 * do cleanup completion for all requests in list. memory for
1725 * each request is also freed.
1728 alio_cleanup(aio_t
*aiop
, aiocb_t
**cbp
, int nent
, int run_mode
)
1732 aio_result_t
*resultp
;
1733 aiocb64_32_t
*aiocb_64
;
1735 for (i
= 0; i
< nent
; i
++) {
1736 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1739 if (run_mode
== AIO_LARGEFILE
) {
1740 aiocb_64
= (aiocb64_32_t
*)cbp
[i
];
1741 resultp
= (aio_result_t
*)
1742 &aiocb_64
->aio_resultp
;
1744 resultp
= &cbp
[i
]->aio_resultp
;
1746 #ifdef _SYSCALL32_IMPL
1748 aiocb32_t
*aiocb_32
;
1751 cbp32
= (caddr32_t
*)cbp
;
1752 if (cbp32
[i
] == (uintptr_t)NULL
)
1754 if (run_mode
== AIO_32
) {
1755 aiocb_32
= (aiocb32_t
*)(uintptr_t)cbp32
[i
];
1756 resultp
= (aio_result_t
*)&aiocb_32
->
1758 } else if (run_mode
== AIO_LARGEFILE
) {
1759 aiocb_64
= (aiocb64_32_t
*)(uintptr_t)cbp32
[i
];
1760 resultp
= (aio_result_t
*)&aiocb_64
->
1764 #endif /* _SYSCALL32_IMPL */
1766 * we need to get the aio_cleanupq_mutex since we call
1769 mutex_enter(&aiop
->aio_cleanupq_mutex
);
1770 mutex_enter(&aiop
->aio_mutex
);
1771 reqp
= aio_req_done(resultp
);
1772 mutex_exit(&aiop
->aio_mutex
);
1773 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1775 aphysio_unlock(reqp
);
1776 aio_copyout_result(reqp
);
1777 mutex_enter(&aiop
->aio_mutex
);
1778 aio_req_free(aiop
, reqp
);
1779 mutex_exit(&aiop
->aio_mutex
);
1785 * Write out the results for an aio request that is done.
1788 aioerror(void *cb
, int run_mode
)
1790 aio_result_t
*resultp
;
1795 aiop
= curproc
->p_aio
;
1796 if (aiop
== NULL
|| cb
== NULL
)
1799 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1800 if (run_mode
== AIO_LARGEFILE
)
1801 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)->
1804 resultp
= &((aiocb_t
*)cb
)->aio_resultp
;
1806 #ifdef _SYSCALL32_IMPL
1808 if (run_mode
== AIO_LARGEFILE
)
1809 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)->
1811 else if (run_mode
== AIO_32
)
1812 resultp
= (aio_result_t
*)&((aiocb32_t
*)cb
)->
1815 #endif /* _SYSCALL32_IMPL */
1817 * we need to get the aio_cleanupq_mutex since we call
1820 mutex_enter(&aiop
->aio_cleanupq_mutex
);
1821 mutex_enter(&aiop
->aio_mutex
);
1822 retval
= aio_req_find(resultp
, &reqp
);
1823 mutex_exit(&aiop
->aio_mutex
);
1824 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1826 aphysio_unlock(reqp
);
1827 aio_copyout_result(reqp
);
1828 mutex_enter(&aiop
->aio_mutex
);
1829 aio_req_free(aiop
, reqp
);
1830 mutex_exit(&aiop
->aio_mutex
);
1832 } else if (retval
== 1)
1833 return (EINPROGRESS
);
1834 else if (retval
== 2)
1840 * aio_cancel - if no requests outstanding,
1841 * return AIO_ALLDONE
1843 * return AIO_NOTCANCELED
1860 * Verify valid file descriptor
1862 if ((getf(fildes
)) == NULL
) {
1867 aiop
= curproc
->p_aio
;
1871 if (aiop
->aio_outstanding
== 0) {
1872 *rval
= AIO_ALLDONE
;
1876 mutex_enter(&aiop
->aio_mutex
);
1878 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1879 if (run_mode
== AIO_LARGEFILE
)
1880 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)
1883 resultp
= &((aiocb_t
*)cb
)->aio_resultp
;
1885 #ifdef _SYSCALL32_IMPL
1887 if (run_mode
== AIO_LARGEFILE
)
1888 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)
1890 else if (run_mode
== AIO_32
)
1891 resultp
= (aio_result_t
*)&((aiocb32_t
*)cb
)
1894 #endif /* _SYSCALL32_IMPL */
1895 index
= AIO_HASH(resultp
);
1896 bucket
= &aiop
->aio_hash
[index
];
1897 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
1898 if (ent
->aio_req_resultp
== resultp
) {
1899 if ((ent
->aio_req_flags
& AIO_PENDING
) == 0) {
1900 mutex_exit(&aiop
->aio_mutex
);
1901 *rval
= AIO_ALLDONE
;
1904 mutex_exit(&aiop
->aio_mutex
);
1905 *rval
= AIO_NOTCANCELED
;
1909 mutex_exit(&aiop
->aio_mutex
);
1910 *rval
= AIO_ALLDONE
;
1914 for (index
= 0; index
< AIO_HASHSZ
; index
++) {
1915 bucket
= &aiop
->aio_hash
[index
];
1916 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
1917 if (ent
->aio_req_fd
== fildes
) {
1918 if ((ent
->aio_req_flags
& AIO_PENDING
) != 0) {
1919 mutex_exit(&aiop
->aio_mutex
);
1920 *rval
= AIO_NOTCANCELED
;
1926 mutex_exit(&aiop
->aio_mutex
);
1927 *rval
= AIO_ALLDONE
;
1932 * solaris version of asynchronous read and write
1941 aio_result_t
*resultp
,
1953 aiocb64_32_t aiocb64
;
1956 aiop
= curproc
->p_aio
;
1960 if ((fp
= getf(fdes
)) == NULL
) {
1965 * check the permission of the partition
1967 if ((fp
->f_flag
& mode
) == 0) {
1973 aio_func
= check_vp(vp
, mode
);
1974 if (aio_func
== NULL
) {
1979 aiocb
.aio_fildes
= fdes
;
1980 aiocb
.aio_buf
= bufp
;
1981 aiocb
.aio_nbytes
= bufsize
;
1982 aiocb
.aio_offset
= offset
;
1983 aiocb
.aio_sigevent
.sigev_notify
= 0;
1984 error
= aio_req_setup(&reqp
, aiop
, &aiocb
, resultp
, vp
, 1);
1986 aiocb64
.aio_fildes
= fdes
;
1987 aiocb64
.aio_buf
= (caddr32_t
)bufp
;
1988 aiocb64
.aio_nbytes
= bufsize
;
1989 aiocb64
.aio_offset
= offset
;
1990 aiocb64
.aio_sigevent
.sigev_notify
= 0;
1991 error
= aio_req_setupLF(&reqp
, aiop
, &aiocb64
, resultp
, vp
, 1);
1999 * enable polling on this request if the opcode has
2000 * the AIO poll bit set
2002 if (opcode
& AIO_POLL_BIT
)
2003 reqp
->aio_req_flags
|= AIO_POLL
;
2006 clear_active_fd(fdes
);
2011 * send the request to driver.
2013 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
, CRED());
2015 * the fd is stored in the aio_req_t by aio_req_setup(), and
2016 * is released by the aio_cleanup_thread() when the IO has
2021 mutex_enter(&aiop
->aio_mutex
);
2022 aio_req_free(aiop
, reqp
);
2023 aiop
->aio_pending
--;
2024 if (aiop
->aio_flags
& AIO_REQ_BLOCK
)
2025 cv_signal(&aiop
->aio_cleanupcv
);
2026 mutex_exit(&aiop
->aio_mutex
);
2029 clear_active_fd(fdes
);
2034 * posix version of asynchronous read and write
2043 #ifdef _SYSCALL32_IMPL
2045 struct sigevent32
*sigev32
;
2046 port_notify32_t pntfy32
;
2048 aiocb64_32_t aiocb64
;
2057 aio_result_t
*resultp
;
2058 struct sigevent
*sigev
;
2060 int aio_use_port
= 0;
2061 port_notify_t pntfy
;
2063 model
= get_udatamodel();
2064 aiop
= curproc
->p_aio
;
2068 if (model
== DATAMODEL_NATIVE
) {
2069 if (run_mode
!= AIO_LARGEFILE
) {
2070 if (copyin(aiocb_arg
, &aiocb
, sizeof (aiocb_t
)))
2072 bufsize
= aiocb
.aio_nbytes
;
2073 resultp
= &(((aiocb_t
*)aiocb_arg
)->aio_resultp
);
2074 if ((fp
= getf(fd
= aiocb
.aio_fildes
)) == NULL
) {
2077 sigev
= &aiocb
.aio_sigevent
;
2080 * We come here only when we make largefile
2081 * call on 32 bit kernel using 32 bit library.
2083 if (copyin(aiocb_arg
, &aiocb64
, sizeof (aiocb64_32_t
)))
2085 bufsize
= aiocb64
.aio_nbytes
;
2086 resultp
= (aio_result_t
*)&(((aiocb64_32_t
*)aiocb_arg
)
2088 if ((fp
= getf(fd
= aiocb64
.aio_fildes
)) == NULL
)
2090 sigev
= (struct sigevent
*)&aiocb64
.aio_sigevent
;
2093 if (sigev
->sigev_notify
== SIGEV_PORT
) {
2094 if (copyin((void *)sigev
->sigev_value
.sival_ptr
,
2095 &pntfy
, sizeof (port_notify_t
))) {
2100 } else if (sigev
->sigev_notify
== SIGEV_THREAD
) {
2101 pntfy
.portnfy_port
= aiocb
.aio_sigevent
.sigev_signo
;
2102 pntfy
.portnfy_user
=
2103 aiocb
.aio_sigevent
.sigev_value
.sival_ptr
;
2107 #ifdef _SYSCALL32_IMPL
2109 if (run_mode
== AIO_32
) {
2110 /* 32 bit system call is being made on 64 bit kernel */
2111 if (copyin(aiocb_arg
, &aiocb32
, sizeof (aiocb32_t
)))
2114 bufsize
= aiocb32
.aio_nbytes
;
2115 aiocb_32ton(&aiocb32
, &aiocb
);
2116 resultp
= (aio_result_t
*)&(((aiocb32_t
*)aiocb_arg
)->
2118 if ((fp
= getf(fd
= aiocb32
.aio_fildes
)) == NULL
) {
2121 sigev32
= &aiocb32
.aio_sigevent
;
2122 } else if (run_mode
== AIO_LARGEFILE
) {
2124 * We come here only when we make largefile
2125 * call on 64 bit kernel using 32 bit library.
2127 if (copyin(aiocb_arg
, &aiocb64
, sizeof (aiocb64_32_t
)))
2129 bufsize
= aiocb64
.aio_nbytes
;
2130 aiocb_LFton(&aiocb64
, &aiocb
);
2131 resultp
= (aio_result_t
*)&(((aiocb64_32_t
*)aiocb_arg
)
2133 if ((fp
= getf(fd
= aiocb64
.aio_fildes
)) == NULL
)
2135 sigev32
= &aiocb64
.aio_sigevent
;
2138 if (sigev32
->sigev_notify
== SIGEV_PORT
) {
2140 (void *)(uintptr_t)sigev32
->sigev_value
.sival_ptr
,
2141 &pntfy32
, sizeof (port_notify32_t
))) {
2145 pntfy
.portnfy_port
= pntfy32
.portnfy_port
;
2146 pntfy
.portnfy_user
= (void *)(uintptr_t)
2147 pntfy32
.portnfy_user
;
2149 } else if (sigev32
->sigev_notify
== SIGEV_THREAD
) {
2150 pntfy
.portnfy_port
= sigev32
->sigev_signo
;
2151 pntfy
.portnfy_user
= (void *)(uintptr_t)
2152 sigev32
->sigev_value
.sival_ptr
;
2156 #endif /* _SYSCALL32_IMPL */
2159 * check the permission of the partition
2162 if ((fp
->f_flag
& mode
) == 0) {
2168 aio_func
= check_vp(vp
, mode
);
2169 if (aio_func
== NULL
) {
2173 if (run_mode
== AIO_LARGEFILE
)
2174 error
= aio_req_setupLF(&reqp
, aiop
, &aiocb64
, resultp
, vp
, 0);
2176 error
= aio_req_setup(&reqp
, aiop
, &aiocb
, resultp
, vp
, 0);
2183 * enable polling on this request if the opcode has
2184 * the AIO poll bit set
2186 if (opcode
& AIO_POLL_BIT
)
2187 reqp
->aio_req_flags
|= AIO_POLL
;
2189 if (model
== DATAMODEL_NATIVE
)
2190 reqp
->aio_req_iocb
.iocb
= aiocb_arg
;
2191 #ifdef _SYSCALL32_IMPL
2193 reqp
->aio_req_iocb
.iocb32
= (caddr32_t
)(uintptr_t)aiocb_arg
;
2197 int event
= (run_mode
== AIO_LARGEFILE
)?
2198 ((mode
== FREAD
)? AIOAREAD64
: AIOAWRITE64
) :
2199 ((mode
== FREAD
)? AIOAREAD
: AIOAWRITE
);
2200 error
= aio_req_assoc_port_rw(&pntfy
, aiocb_arg
, reqp
, event
);
2204 * send the request to driver.
2208 clear_active_fd(fd
);
2212 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
, CRED());
2216 * the fd is stored in the aio_req_t by aio_req_setup(), and
2217 * is released by the aio_cleanup_thread() when the IO has
2222 mutex_enter(&aiop
->aio_mutex
);
2224 aio_deq(&aiop
->aio_portpending
, reqp
);
2225 aio_req_free(aiop
, reqp
);
2226 aiop
->aio_pending
--;
2227 if (aiop
->aio_flags
& AIO_REQ_BLOCK
)
2228 cv_signal(&aiop
->aio_cleanupcv
);
2229 mutex_exit(&aiop
->aio_mutex
);
2232 clear_active_fd(fd
);
2238 * set error for a list IO entry that failed.
2241 lio_set_error(aio_req_t
*reqp
, int portused
)
2243 aio_t
*aiop
= curproc
->p_aio
;
2248 mutex_enter(&aiop
->aio_mutex
);
2250 aio_deq(&aiop
->aio_portpending
, reqp
);
2251 aiop
->aio_pending
--;
2252 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2253 reqp
->aio_req_flags
|= AIO_PHYSIODONE
;
2255 * Need to free the request now as its never
2256 * going to get on the done queue
2258 * Note: aio_outstanding is decremented in
2261 aio_req_free(aiop
, reqp
);
2262 if (aiop
->aio_flags
& AIO_REQ_BLOCK
)
2263 cv_signal(&aiop
->aio_cleanupcv
);
2264 mutex_exit(&aiop
->aio_mutex
);
2268 * check if a specified request is done, and remove it from
2269 * the done queue. otherwise remove anybody from the done queue
2270 * if NULL is specified.
2273 aio_req_done(void *resultp
)
2277 aio_t
*aiop
= curproc
->p_aio
;
2280 ASSERT(MUTEX_HELD(&aiop
->aio_cleanupq_mutex
));
2281 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2284 index
= AIO_HASH(resultp
);
2285 bucket
= &aiop
->aio_hash
[index
];
2286 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
2287 if (ent
->aio_req_resultp
== (aio_result_t
*)resultp
) {
2288 if (ent
->aio_req_flags
& AIO_DONEQ
) {
2289 return (aio_req_remove(ent
));
2294 /* no match, resultp is invalid */
2297 return (aio_req_remove(NULL
));
2301 * determine if a user-level resultp pointer is associated with an
2302 * active IO request. Zero is returned when the request is done,
2303 * and the request is removed from the done queue. Only when the
2304 * return value is zero, is the "reqp" pointer valid. One is returned
2305 * when the request is inprogress. Two is returned when the request
2309 aio_req_find(aio_result_t
*resultp
, aio_req_t
**reqp
)
2313 aio_t
*aiop
= curproc
->p_aio
;
2316 ASSERT(MUTEX_HELD(&aiop
->aio_cleanupq_mutex
));
2317 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2319 index
= AIO_HASH(resultp
);
2320 bucket
= &aiop
->aio_hash
[index
];
2321 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
2322 if (ent
->aio_req_resultp
== resultp
) {
2323 if (ent
->aio_req_flags
& AIO_DONEQ
) {
2324 *reqp
= aio_req_remove(ent
);
2330 /* no match, resultp is invalid */
2335 * remove a request from the done queue.
2338 aio_req_remove(aio_req_t
*reqp
)
2340 aio_t
*aiop
= curproc
->p_aio
;
2342 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2345 ASSERT(reqp
->aio_req_flags
& AIO_DONEQ
);
2346 if (reqp
->aio_req_next
== reqp
) {
2347 /* only one request on queue */
2348 if (reqp
== aiop
->aio_doneq
) {
2349 aiop
->aio_doneq
= NULL
;
2351 ASSERT(reqp
== aiop
->aio_cleanupq
);
2352 aiop
->aio_cleanupq
= NULL
;
2355 reqp
->aio_req_next
->aio_req_prev
= reqp
->aio_req_prev
;
2356 reqp
->aio_req_prev
->aio_req_next
= reqp
->aio_req_next
;
2358 * The request can be either on the aio_doneq or the
2361 if (reqp
== aiop
->aio_doneq
)
2362 aiop
->aio_doneq
= reqp
->aio_req_next
;
2364 if (reqp
== aiop
->aio_cleanupq
)
2365 aiop
->aio_cleanupq
= reqp
->aio_req_next
;
2367 reqp
->aio_req_flags
&= ~AIO_DONEQ
;
2368 reqp
->aio_req_next
= NULL
;
2369 reqp
->aio_req_prev
= NULL
;
2370 } else if ((reqp
= aiop
->aio_doneq
) != NULL
) {
2371 ASSERT(reqp
->aio_req_flags
& AIO_DONEQ
);
2372 if (reqp
== reqp
->aio_req_next
) {
2373 /* only one request on queue */
2374 aiop
->aio_doneq
= NULL
;
2376 reqp
->aio_req_prev
->aio_req_next
= reqp
->aio_req_next
;
2377 reqp
->aio_req_next
->aio_req_prev
= reqp
->aio_req_prev
;
2378 aiop
->aio_doneq
= reqp
->aio_req_next
;
2380 reqp
->aio_req_flags
&= ~AIO_DONEQ
;
2381 reqp
->aio_req_next
= NULL
;
2382 reqp
->aio_req_prev
= NULL
;
2384 if (aiop
->aio_doneq
== NULL
&& (aiop
->aio_flags
& AIO_WAITN
))
2385 cv_broadcast(&aiop
->aio_waitcv
);
2394 aio_result_t
*resultp
,
2396 int old_solaris_req
)
2398 sigqueue_t
*sqp
= NULL
;
2401 struct sigevent
*sigev
;
2404 sigev
= &arg
->aio_sigevent
;
2405 if (sigev
->sigev_notify
== SIGEV_SIGNAL
&&
2406 sigev
->sigev_signo
> 0 && sigev
->sigev_signo
< NSIG
) {
2407 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
2410 sqp
->sq_func
= NULL
;
2411 sqp
->sq_next
= NULL
;
2412 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
2413 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
2414 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
2415 sqp
->sq_info
.si_zoneid
= getzoneid();
2416 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
2417 sqp
->sq_info
.si_signo
= sigev
->sigev_signo
;
2418 sqp
->sq_info
.si_value
= sigev
->sigev_value
;
2421 mutex_enter(&aiop
->aio_mutex
);
2423 if (aiop
->aio_flags
& AIO_REQ_BLOCK
) {
2424 mutex_exit(&aiop
->aio_mutex
);
2426 kmem_free(sqp
, sizeof (sigqueue_t
));
2430 * get an aio_reqp from the free list or allocate one
2431 * from dynamic memory.
2433 if (error
= aio_req_alloc(&reqp
, resultp
)) {
2434 mutex_exit(&aiop
->aio_mutex
);
2436 kmem_free(sqp
, sizeof (sigqueue_t
));
2439 aiop
->aio_pending
++;
2440 aiop
->aio_outstanding
++;
2441 reqp
->aio_req_flags
= AIO_PENDING
;
2442 if (old_solaris_req
) {
2443 /* this is an old solaris aio request */
2444 reqp
->aio_req_flags
|= AIO_SOLARIS
;
2445 aiop
->aio_flags
|= AIO_SOLARIS_REQ
;
2447 if (sigev
->sigev_notify
== SIGEV_THREAD
||
2448 sigev
->sigev_notify
== SIGEV_PORT
)
2449 aio_enq(&aiop
->aio_portpending
, reqp
, 0);
2450 mutex_exit(&aiop
->aio_mutex
);
2452 * initialize aio request.
2454 reqp
->aio_req_fd
= arg
->aio_fildes
;
2455 reqp
->aio_req_sigqp
= sqp
;
2456 reqp
->aio_req_iocb
.iocb
= NULL
;
2457 reqp
->aio_req_lio
= NULL
;
2458 reqp
->aio_req_buf
.b_file
= vp
;
2459 uio
= reqp
->aio_req
.aio_uio
;
2460 uio
->uio_iovcnt
= 1;
2461 uio
->uio_iov
->iov_base
= (caddr_t
)arg
->aio_buf
;
2462 uio
->uio_iov
->iov_len
= arg
->aio_nbytes
;
2463 uio
->uio_loffset
= arg
->aio_offset
;
2469 * Allocate p_aio struct.
2472 aio_aiop_alloc(void)
2476 ASSERT(MUTEX_HELD(&curproc
->p_lock
));
2478 aiop
= kmem_zalloc(sizeof (struct aio
), KM_NOSLEEP
);
2480 mutex_init(&aiop
->aio_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
2481 mutex_init(&aiop
->aio_cleanupq_mutex
, NULL
, MUTEX_DEFAULT
,
2483 mutex_init(&aiop
->aio_portq_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
2489 * Allocate an aio_req struct.
2492 aio_req_alloc(aio_req_t
**nreqp
, aio_result_t
*resultp
)
2495 aio_t
*aiop
= curproc
->p_aio
;
2497 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2499 if ((reqp
= aiop
->aio_free
) != NULL
) {
2500 aiop
->aio_free
= reqp
->aio_req_next
;
2501 bzero(reqp
, sizeof (*reqp
));
2504 * Check whether memory is getting tight.
2505 * This is a temporary mechanism to avoid memory
2506 * exhaustion by a single process until we come up
2507 * with a per process solution such as setrlimit().
2509 if (freemem
< desfree
)
2511 reqp
= kmem_zalloc(sizeof (struct aio_req_t
), KM_NOSLEEP
);
2515 reqp
->aio_req
.aio_uio
= &reqp
->aio_req_uio
;
2516 reqp
->aio_req
.aio_uio
->uio_iov
= &reqp
->aio_req_iov
;
2517 reqp
->aio_req
.aio_private
= reqp
;
2518 reqp
->aio_req_buf
.b_offset
= -1;
2519 reqp
->aio_req_resultp
= resultp
;
2520 if (aio_hash_insert(reqp
, aiop
)) {
2521 reqp
->aio_req_next
= aiop
->aio_free
;
2522 aiop
->aio_free
= reqp
;
2530 * Allocate an aio_lio_t struct.
2533 aio_lio_alloc(aio_lio_t
**head
)
2536 aio_t
*aiop
= curproc
->p_aio
;
2538 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2540 if ((liop
= aiop
->aio_lio_free
) != NULL
) {
2541 aiop
->aio_lio_free
= liop
->lio_next
;
2544 * Check whether memory is getting tight.
2545 * This is a temporary mechanism to avoid memory
2546 * exhaustion by a single process until we come up
2547 * with a per process solution such as setrlimit().
2549 if (freemem
< desfree
)
2552 liop
= kmem_zalloc(sizeof (aio_lio_t
), KM_NOSLEEP
);
2561 * this is a special per-process thread that is only activated if
2562 * the process is unmapping a segment with outstanding aio. normally,
2563 * the process will have completed the aio before unmapping the
2564 * segment. If the process does unmap a segment with outstanding aio,
2565 * this special thread will guarentee that the locked pages due to
2566 * aphysio() are released, thereby permitting the segment to be
2567 * unmapped. In addition to this, the cleanup thread is woken up
2568 * during DR operations to release the locked pages.
2572 aio_cleanup_thread(aio_t
*aiop
)
2574 proc_t
*p
= curproc
;
2575 struct as
*as
= p
->p_as
;
2581 sigfillset(&curthread
->t_hold
);
2582 sigdiffset(&curthread
->t_hold
, &cantmask
);
2585 * if a segment is being unmapped, and the current
2586 * process's done queue is not empty, then every request
2587 * on the doneq with locked resources should be forced
2588 * to release their locks. By moving the doneq request
2589 * to the cleanupq, aio_cleanup() will process the cleanupq,
2590 * and place requests back onto the doneq. All requests
2591 * processed by aio_cleanup() will have their physical
2592 * resources unlocked.
2594 mutex_enter(&aiop
->aio_mutex
);
2595 if ((aiop
->aio_flags
& AIO_CLEANUP
) == 0) {
2596 aiop
->aio_flags
|= AIO_CLEANUP
;
2597 mutex_enter(&as
->a_contents
);
2598 if (aiop
->aio_rqclnup
) {
2599 aiop
->aio_rqclnup
= 0;
2602 mutex_exit(&as
->a_contents
);
2603 if (aiop
->aio_doneq
) {
2604 aio_req_t
*doneqhead
= aiop
->aio_doneq
;
2605 aiop
->aio_doneq
= NULL
;
2606 aio_cleanupq_concat(aiop
, doneqhead
, AIO_DONEQ
);
2609 mutex_exit(&aiop
->aio_mutex
);
2610 aio_cleanup(AIO_CLEANUP_THREAD
);
2612 * thread should block on the cleanupcv while
2613 * AIO_CLEANUP is set.
2615 cvp
= &aiop
->aio_cleanupcv
;
2616 mutex_enter(&aiop
->aio_mutex
);
2618 if (aiop
->aio_pollq
!= NULL
|| aiop
->aio_cleanupq
!= NULL
||
2619 aiop
->aio_notifyq
!= NULL
||
2620 aiop
->aio_portcleanupq
!= NULL
) {
2621 mutex_exit(&aiop
->aio_mutex
);
2624 mutex_enter(&as
->a_contents
);
2627 * AIO_CLEANUP determines when the cleanup thread
2628 * should be active. This flag is set when
2629 * the cleanup thread is awakened by as_unmap() or
2630 * due to DR operations.
2631 * The flag is cleared when the blocking as_unmap()
2632 * that originally awakened us is allowed to
2633 * complete. as_unmap() blocks when trying to
2634 * unmap a segment that has SOFTLOCKed pages. when
2635 * the segment's pages are all SOFTUNLOCKed,
2636 * as->a_flags & AS_UNMAPWAIT should be zero.
2638 * In case of cleanup request by DR, the flag is cleared
2639 * once all the pending aio requests have been processed.
2641 * The flag shouldn't be cleared right away if the
2642 * cleanup thread was interrupted because the process
2643 * is doing forkall(). This happens when cv_wait_sig()
2644 * returns zero, because it was awakened by a pokelwps().
2645 * If the process is not exiting, it must be doing forkall().
2648 ((!rqclnup
&& (AS_ISUNMAPWAIT(as
) == 0)) ||
2649 (aiop
->aio_pending
== 0))) {
2650 aiop
->aio_flags
&= ~(AIO_CLEANUP
| AIO_CLEANUP_PORT
);
2654 mutex_exit(&aiop
->aio_mutex
);
2657 * If the process is exiting/killed, don't return
2658 * immediately without waiting for pending I/O's
2659 * and releasing the page locks.
2661 if (p
->p_flag
& (SEXITLWPS
|SKILLED
)) {
2663 * If exit_flag is set, then it is
2664 * safe to exit because we have released
2665 * page locks of completed I/O's.
2670 mutex_exit(&as
->a_contents
);
2673 * Wait for all the pending aio to complete.
2675 mutex_enter(&aiop
->aio_mutex
);
2676 aiop
->aio_flags
|= AIO_REQ_BLOCK
;
2677 while (aiop
->aio_pending
!= 0)
2678 cv_wait(&aiop
->aio_cleanupcv
,
2680 mutex_exit(&aiop
->aio_mutex
);
2683 } else if (p
->p_flag
&
2684 (SHOLDFORK
|SHOLDFORK1
|SHOLDWATCH
)) {
2689 mutex_exit(&as
->a_contents
);
2690 mutex_enter(&p
->p_lock
);
2691 stop(PR_SUSPENDED
, SUSPEND_NORMAL
);
2692 mutex_exit(&p
->p_lock
);
2698 * When started this thread will sleep on as->a_cv.
2699 * as_unmap will awake this thread if the
2700 * segment has SOFTLOCKed pages (poked = 0).
2701 * 1. pokelwps() awakes this thread =>
2702 * break the loop to check SEXITLWPS, SHOLDFORK, etc
2703 * 2. as_unmap awakes this thread =>
2704 * to break the loop it is necessary that
2705 * - AS_UNMAPWAIT is set (as_unmap is waiting for
2706 * memory to be unlocked)
2707 * - AIO_CLEANUP is not set
2708 * (if AIO_CLEANUP is set we have to wait for
2709 * pending requests. aio_done will send a signal
2710 * for every request which completes to continue
2711 * unmapping the corresponding address range)
2712 * 3. A cleanup request will wake this thread up, ex.
2713 * by the DR operations. The aio_rqclnup flag will
2716 while (poked
== 0) {
2718 * The clean up requests that came in
2719 * after we had just cleaned up, couldn't
2720 * be causing the unmap thread to block - as
2721 * unmap event happened first.
2722 * Let aio_done() wake us up if it sees a need.
2724 if (aiop
->aio_rqclnup
&&
2725 (aiop
->aio_flags
& AIO_CLEANUP
) == 0)
2727 poked
= !cv_wait_sig(cvp
, &as
->a_contents
);
2728 if (AS_ISUNMAPWAIT(as
) == 0)
2730 if (aiop
->aio_outstanding
!= 0)
2734 mutex_exit(&as
->a_contents
);
2737 mutex_exit(&as
->a_contents
);
2738 ASSERT((curproc
->p_flag
& (SEXITLWPS
|SKILLED
)));
2739 aston(curthread
); /* make thread do post_syscall */
2744 * save a reference to a user's outstanding aio in a hash list.
2748 aio_req_t
*aio_reqp
,
2752 aio_result_t
*resultp
= aio_reqp
->aio_req_resultp
;
2756 index
= AIO_HASH(resultp
);
2757 nextp
= &aiop
->aio_hash
[index
];
2758 while ((current
= *nextp
) != NULL
) {
2759 if (current
->aio_req_resultp
== resultp
)
2761 nextp
= ¤t
->aio_hash_next
;
2764 aio_reqp
->aio_hash_next
= NULL
;
2769 (*check_vp(struct vnode
*vp
, int mode
))(vnode_t
*, struct aio_req
*,
2779 major
= getmajor(dev
);
2782 * return NULL for requests to files and STREAMs so
2783 * that libaio takes care of them.
2785 if (vp
->v_type
== VCHR
) {
2786 /* no stream device for kaio */
2787 if (STREAMSTAB(major
)) {
2795 * Check old drivers which do not have async I/O entry points.
2797 if (devopsp
[major
]->devo_rev
< 3)
2800 cb
= devopsp
[major
]->devo_cb_ops
;
2806 * Check whether this device is a block device.
2807 * Kaio is not supported for devices like tty.
2809 if (cb
->cb_strategy
== nodev
|| cb
->cb_strategy
== NULL
)
2813 aio_func
= (cb
->cb_aread
== nodev
) ? NULL
: driver_aio_read
;
2815 aio_func
= (cb
->cb_awrite
== nodev
) ? NULL
: driver_aio_write
;
2819 * nodev returns ENXIO anyway.
2821 if (aio_func
== nodev
)
2830 * Clustering: We want check_vp to return a function prototyped
2831 * correctly that will be common to both PXFS and regular case.
2832 * We define this intermediate function that will do the right
2833 * thing for driver cases.
2837 driver_aio_write(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
)
2842 ASSERT(vp
->v_type
== VCHR
);
2843 dev
= VTOS(vp
)->s_dev
;
2844 ASSERT(STREAMSTAB(getmajor(dev
)) == NULL
);
2846 cb
= devopsp
[getmajor(dev
)]->devo_cb_ops
;
2848 ASSERT(cb
->cb_awrite
!= nodev
);
2849 return ((*cb
->cb_awrite
)(dev
, aio
, cred_p
));
2853 * Clustering: We want check_vp to return a function prototyped
2854 * correctly that will be common to both PXFS and regular case.
2855 * We define this intermediate function that will do the right
2856 * thing for driver cases.
2860 driver_aio_read(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
)
2865 ASSERT(vp
->v_type
== VCHR
);
2866 dev
= VTOS(vp
)->s_dev
;
2867 ASSERT(!STREAMSTAB(getmajor(dev
)));
2869 cb
= devopsp
[getmajor(dev
)]->devo_cb_ops
;
2871 ASSERT(cb
->cb_aread
!= nodev
);
2872 return ((*cb
->cb_aread
)(dev
, aio
, cred_p
));
2876 * This routine is called when a largefile call is made by a 32bit
2877 * process on a ILP32 or LP64 kernel. All 64bit processes are large
2878 * file by definition and will call alio() instead.
2888 file_t
*prev_fp
= NULL
;
2896 aiocb64_32_t
*aiocb
= &cb64
;
2902 struct sigevent32 sigevk
;
2911 int aio_notsupported
= 0;
2915 port_kevent_t
*pkevtp
= NULL
;
2917 port_notify32_t pnotify
;
2920 aiop
= curproc
->p_aio
;
2921 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
2924 ASSERT(get_udatamodel() == DATAMODEL_ILP32
);
2926 ssize
= (sizeof (caddr32_t
) * nent
);
2927 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
2928 ucbp
= (caddr32_t
*)cbplist
;
2930 if (copyin(aiocb_arg
, cbplist
, ssize
) ||
2931 (sigev
&& copyin(sigev
, &sigevk
, sizeof (sigevk
)))) {
2932 kmem_free(cbplist
, ssize
);
2938 (sigevk
.sigev_notify
== SIGEV_THREAD
||
2939 sigevk
.sigev_notify
== SIGEV_PORT
)) {
2940 if (sigevk
.sigev_notify
== SIGEV_THREAD
) {
2941 pnotify
.portnfy_port
= sigevk
.sigev_signo
;
2942 pnotify
.portnfy_user
= sigevk
.sigev_value
.sival_ptr
;
2944 (void *)(uintptr_t)sigevk
.sigev_value
.sival_ptr
,
2945 &pnotify
, sizeof (pnotify
))) {
2946 kmem_free(cbplist
, ssize
);
2949 error
= port_alloc_event(pnotify
.portnfy_port
,
2950 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
, &pkevtp
);
2952 if (error
== ENOMEM
|| error
== EAGAIN
)
2956 kmem_free(cbplist
, ssize
);
2959 lio_head_port
= pnotify
.portnfy_port
;
2964 * a list head should be allocated if notification is
2965 * enabled for this list.
2969 if (mode_arg
== LIO_WAIT
|| sigev
) {
2970 mutex_enter(&aiop
->aio_mutex
);
2971 error
= aio_lio_alloc(&head
);
2972 mutex_exit(&aiop
->aio_mutex
);
2976 head
->lio_nent
= nent
;
2977 head
->lio_refcnt
= nent
;
2978 head
->lio_port
= -1;
2979 head
->lio_portkev
= NULL
;
2980 if (sigev
&& sigevk
.sigev_notify
== SIGEV_SIGNAL
&&
2981 sigevk
.sigev_signo
> 0 && sigevk
.sigev_signo
< NSIG
) {
2982 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
2987 sqp
->sq_func
= NULL
;
2988 sqp
->sq_next
= NULL
;
2989 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
2990 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
2991 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
2992 sqp
->sq_info
.si_zoneid
= getzoneid();
2993 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
2994 sqp
->sq_info
.si_signo
= sigevk
.sigev_signo
;
2995 sqp
->sq_info
.si_value
.sival_int
=
2996 sigevk
.sigev_value
.sival_int
;
2997 head
->lio_sigqp
= sqp
;
2999 head
->lio_sigqp
= NULL
;
3003 * Prepare data to send when list of aiocb's
3006 port_init_event(pkevtp
, (uintptr_t)sigev
,
3007 (void *)(uintptr_t)pnotify
.portnfy_user
,
3009 pkevtp
->portkev_events
= AIOLIO64
;
3010 head
->lio_portkev
= pkevtp
;
3011 head
->lio_port
= pnotify
.portnfy_port
;
3015 for (i
= 0; i
< nent
; i
++, ucbp
++) {
3017 cbp
= (aiocb64_32_t
*)(uintptr_t)*ucbp
;
3018 /* skip entry if it can't be copied. */
3019 if (cbp
== NULL
|| copyin(cbp
, aiocb
, sizeof (*aiocb
))) {
3021 mutex_enter(&aiop
->aio_mutex
);
3024 mutex_exit(&aiop
->aio_mutex
);
3029 /* skip if opcode for aiocb is LIO_NOP */
3030 mode
= aiocb
->aio_lio_opcode
;
3031 if (mode
== LIO_NOP
) {
3034 mutex_enter(&aiop
->aio_mutex
);
3037 mutex_exit(&aiop
->aio_mutex
);
3042 /* increment file descriptor's ref count. */
3043 if ((fp
= getf(aiocb
->aio_fildes
)) == NULL
) {
3044 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3046 mutex_enter(&aiop
->aio_mutex
);
3049 mutex_exit(&aiop
->aio_mutex
);
3056 * check the permission of the partition
3058 if ((fp
->f_flag
& mode
) == 0) {
3059 releasef(aiocb
->aio_fildes
);
3060 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3062 mutex_enter(&aiop
->aio_mutex
);
3065 mutex_exit(&aiop
->aio_mutex
);
3072 * common case where requests are to the same fd
3073 * for the same r/w operation
3074 * for UFS, need to set EBADFD
3077 if (fp
!= prev_fp
|| mode
!= prev_mode
) {
3078 aio_func
= check_vp(vp
, mode
);
3079 if (aio_func
== NULL
) {
3081 releasef(aiocb
->aio_fildes
);
3082 lio_set_uerror(&cbp
->aio_resultp
, EBADFD
);
3085 mutex_enter(&aiop
->aio_mutex
);
3088 mutex_exit(&aiop
->aio_mutex
);
3098 aiocb_LFton(aiocb
, &aiocb_n
);
3099 error
= aio_req_setup(&reqp
, aiop
, &aiocb_n
,
3100 (aio_result_t
*)&cbp
->aio_resultp
, vp
, 0);
3102 error
= aio_req_setupLF(&reqp
, aiop
, aiocb
,
3103 (aio_result_t
*)&cbp
->aio_resultp
, vp
, 0);
3106 releasef(aiocb
->aio_fildes
);
3107 lio_set_uerror(&cbp
->aio_resultp
, error
);
3109 mutex_enter(&aiop
->aio_mutex
);
3112 mutex_exit(&aiop
->aio_mutex
);
3118 reqp
->aio_req_lio
= head
;
3122 * Set the errno field now before sending the request to
3123 * the driver to avoid a race condition
3125 (void) suword32(&cbp
->aio_resultp
.aio_errno
,
3128 reqp
->aio_req_iocb
.iocb32
= *ucbp
;
3130 event
= (mode
== LIO_READ
)? AIOAREAD64
: AIOAWRITE64
;
3131 aio_port
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_PORT
);
3132 aio_thread
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_THREAD
);
3133 if (aio_port
| aio_thread
) {
3134 port_kevent_t
*lpkevp
;
3136 * Prepare data to send with each aiocb completed.
3139 void *paddr
= (void *)(uintptr_t)
3140 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3141 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
3143 } else { /* aio_thread */
3144 pnotify
.portnfy_port
=
3145 aiocb
->aio_sigevent
.sigev_signo
;
3146 pnotify
.portnfy_user
=
3147 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3151 else if (pkevtp
!= NULL
&&
3152 pnotify
.portnfy_port
== lio_head_port
)
3153 error
= port_dup_event(pkevtp
, &lpkevp
,
3154 PORT_ALLOC_DEFAULT
);
3156 error
= port_alloc_event(pnotify
.portnfy_port
,
3157 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
,
3160 port_init_event(lpkevp
, (uintptr_t)*ucbp
,
3161 (void *)(uintptr_t)pnotify
.portnfy_user
,
3162 aio_port_callback
, reqp
);
3163 lpkevp
->portkev_events
= event
;
3164 reqp
->aio_req_portkev
= lpkevp
;
3165 reqp
->aio_req_port
= pnotify
.portnfy_port
;
3170 * send the request to driver.
3173 if (aiocb
->aio_nbytes
== 0) {
3174 clear_active_fd(aiocb
->aio_fildes
);
3178 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
,
3183 * the fd's ref count is not decremented until the IO has
3184 * completed unless there was an error.
3187 releasef(aiocb
->aio_fildes
);
3188 lio_set_uerror(&cbp
->aio_resultp
, error
);
3190 mutex_enter(&aiop
->aio_mutex
);
3193 mutex_exit(&aiop
->aio_mutex
);
3195 if (error
== ENOTSUP
)
3199 lio_set_error(reqp
, portused
);
3201 clear_active_fd(aiocb
->aio_fildes
);
3205 if (aio_notsupported
) {
3207 } else if (aio_errors
) {
3209 * return EIO if any request failed
3214 if (mode_arg
== LIO_WAIT
) {
3215 mutex_enter(&aiop
->aio_mutex
);
3216 while (head
->lio_refcnt
> 0) {
3217 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
3218 mutex_exit(&aiop
->aio_mutex
);
3223 mutex_exit(&aiop
->aio_mutex
);
3224 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, AIO_LARGEFILE
);
3228 kmem_free(cbplist
, ssize
);
3230 if (head
->lio_sigqp
)
3231 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
3232 if (head
->lio_portkev
)
3233 port_free_event(head
->lio_portkev
);
3234 kmem_free(head
, sizeof (aio_lio_t
));
3239 #ifdef _SYSCALL32_IMPL
3241 aiocb_LFton(aiocb64_32_t
*src
, aiocb_t
*dest
)
3243 dest
->aio_fildes
= src
->aio_fildes
;
3244 dest
->aio_buf
= (void *)(uintptr_t)src
->aio_buf
;
3245 dest
->aio_nbytes
= (size_t)src
->aio_nbytes
;
3246 dest
->aio_offset
= (off_t
)src
->aio_offset
;
3247 dest
->aio_reqprio
= src
->aio_reqprio
;
3248 dest
->aio_sigevent
.sigev_notify
= src
->aio_sigevent
.sigev_notify
;
3249 dest
->aio_sigevent
.sigev_signo
= src
->aio_sigevent
.sigev_signo
;
3252 * See comment in sigqueue32() on handling of 32-bit
3253 * sigvals in a 64-bit kernel.
3255 dest
->aio_sigevent
.sigev_value
.sival_int
=
3256 (int)src
->aio_sigevent
.sigev_value
.sival_int
;
3257 dest
->aio_sigevent
.sigev_notify_function
= (void (*)(union sigval
))
3258 (uintptr_t)src
->aio_sigevent
.sigev_notify_function
;
3259 dest
->aio_sigevent
.sigev_notify_attributes
= (pthread_attr_t
*)
3260 (uintptr_t)src
->aio_sigevent
.sigev_notify_attributes
;
3261 dest
->aio_sigevent
.__sigev_pad2
= src
->aio_sigevent
.__sigev_pad2
;
3262 dest
->aio_lio_opcode
= src
->aio_lio_opcode
;
3263 dest
->aio_state
= src
->aio_state
;
3264 dest
->aio__pad
[0] = src
->aio__pad
[0];
3269 * This function is used only for largefile calls made by
3270 * 32 bit applications.
3277 aio_result_t
*resultp
,
3279 int old_solaris_req
)
3281 sigqueue_t
*sqp
= NULL
;
3284 struct sigevent32
*sigev
;
3287 sigev
= &arg
->aio_sigevent
;
3288 if (sigev
->sigev_notify
== SIGEV_SIGNAL
&&
3289 sigev
->sigev_signo
> 0 && sigev
->sigev_signo
< NSIG
) {
3290 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
3293 sqp
->sq_func
= NULL
;
3294 sqp
->sq_next
= NULL
;
3295 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
3296 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
3297 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
3298 sqp
->sq_info
.si_zoneid
= getzoneid();
3299 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
3300 sqp
->sq_info
.si_signo
= sigev
->sigev_signo
;
3301 sqp
->sq_info
.si_value
.sival_int
= sigev
->sigev_value
.sival_int
;
3304 mutex_enter(&aiop
->aio_mutex
);
3306 if (aiop
->aio_flags
& AIO_REQ_BLOCK
) {
3307 mutex_exit(&aiop
->aio_mutex
);
3309 kmem_free(sqp
, sizeof (sigqueue_t
));
3313 * get an aio_reqp from the free list or allocate one
3314 * from dynamic memory.
3316 if (error
= aio_req_alloc(&reqp
, resultp
)) {
3317 mutex_exit(&aiop
->aio_mutex
);
3319 kmem_free(sqp
, sizeof (sigqueue_t
));
3322 aiop
->aio_pending
++;
3323 aiop
->aio_outstanding
++;
3324 reqp
->aio_req_flags
= AIO_PENDING
;
3325 if (old_solaris_req
) {
3326 /* this is an old solaris aio request */
3327 reqp
->aio_req_flags
|= AIO_SOLARIS
;
3328 aiop
->aio_flags
|= AIO_SOLARIS_REQ
;
3330 if (sigev
->sigev_notify
== SIGEV_THREAD
||
3331 sigev
->sigev_notify
== SIGEV_PORT
)
3332 aio_enq(&aiop
->aio_portpending
, reqp
, 0);
3333 mutex_exit(&aiop
->aio_mutex
);
3335 * initialize aio request.
3337 reqp
->aio_req_fd
= arg
->aio_fildes
;
3338 reqp
->aio_req_sigqp
= sqp
;
3339 reqp
->aio_req_iocb
.iocb
= NULL
;
3340 reqp
->aio_req_lio
= NULL
;
3341 reqp
->aio_req_buf
.b_file
= vp
;
3342 uio
= reqp
->aio_req
.aio_uio
;
3343 uio
->uio_iovcnt
= 1;
3344 uio
->uio_iov
->iov_base
= (caddr_t
)(uintptr_t)arg
->aio_buf
;
3345 uio
->uio_iov
->iov_len
= arg
->aio_nbytes
;
3346 uio
->uio_loffset
= arg
->aio_offset
;
3352 * This routine is called when a non largefile call is made by a 32bit
3353 * process on a ILP32 or LP64 kernel.
3363 file_t
*prev_fp
= NULL
;
3371 aiocb_t
*aiocb
= &cb
;
3376 aiocb32_t
*aiocb32
= &cb32
;
3377 struct sigevent32 sigevk
;
3379 aiocb_t
*cbp
, **ucbp
;
3380 struct sigevent sigevk
;
3390 int aio_notsupported
= 0;
3394 port_kevent_t
*pkevtp
= NULL
;
3397 port_notify32_t pnotify
;
3399 port_notify_t pnotify
;
3403 aiop
= curproc
->p_aio
;
3404 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
3408 ssize
= (sizeof (caddr32_t
) * nent
);
3410 ssize
= (sizeof (aiocb_t
*) * nent
);
3412 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
3413 ucbp
= (void *)cbplist
;
3415 if (copyin(aiocb_arg
, cbplist
, ssize
) ||
3416 (sigev
&& copyin(sigev
, &sigevk
, sizeof (struct sigevent32
)))) {
3417 kmem_free(cbplist
, ssize
);
3423 (sigevk
.sigev_notify
== SIGEV_THREAD
||
3424 sigevk
.sigev_notify
== SIGEV_PORT
)) {
3425 if (sigevk
.sigev_notify
== SIGEV_THREAD
) {
3426 pnotify
.portnfy_port
= sigevk
.sigev_signo
;
3427 pnotify
.portnfy_user
= sigevk
.sigev_value
.sival_ptr
;
3429 (void *)(uintptr_t)sigevk
.sigev_value
.sival_ptr
,
3430 &pnotify
, sizeof (pnotify
))) {
3431 kmem_free(cbplist
, ssize
);
3434 error
= port_alloc_event(pnotify
.portnfy_port
,
3435 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
, &pkevtp
);
3437 if (error
== ENOMEM
|| error
== EAGAIN
)
3441 kmem_free(cbplist
, ssize
);
3444 lio_head_port
= pnotify
.portnfy_port
;
3449 * a list head should be allocated if notification is
3450 * enabled for this list.
3454 if (mode_arg
== LIO_WAIT
|| sigev
) {
3455 mutex_enter(&aiop
->aio_mutex
);
3456 error
= aio_lio_alloc(&head
);
3457 mutex_exit(&aiop
->aio_mutex
);
3461 head
->lio_nent
= nent
;
3462 head
->lio_refcnt
= nent
;
3463 head
->lio_port
= -1;
3464 head
->lio_portkev
= NULL
;
3465 if (sigev
&& sigevk
.sigev_notify
== SIGEV_SIGNAL
&&
3466 sigevk
.sigev_signo
> 0 && sigevk
.sigev_signo
< NSIG
) {
3467 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
3472 sqp
->sq_func
= NULL
;
3473 sqp
->sq_next
= NULL
;
3474 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
3475 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
3476 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
3477 sqp
->sq_info
.si_zoneid
= getzoneid();
3478 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
3479 sqp
->sq_info
.si_signo
= sigevk
.sigev_signo
;
3480 sqp
->sq_info
.si_value
.sival_int
=
3481 sigevk
.sigev_value
.sival_int
;
3482 head
->lio_sigqp
= sqp
;
3484 head
->lio_sigqp
= NULL
;
3488 * Prepare data to send when list of aiocb's has
3491 port_init_event(pkevtp
, (uintptr_t)sigev
,
3492 (void *)(uintptr_t)pnotify
.portnfy_user
,
3494 pkevtp
->portkev_events
= AIOLIO
;
3495 head
->lio_portkev
= pkevtp
;
3496 head
->lio_port
= pnotify
.portnfy_port
;
3500 for (i
= 0; i
< nent
; i
++, ucbp
++) {
3502 /* skip entry if it can't be copied. */
3504 cbp
= (aiocb32_t
*)(uintptr_t)*ucbp
;
3505 if (cbp
== NULL
|| copyin(cbp
, aiocb32
, sizeof (*aiocb32
)))
3507 cbp
= (aiocb_t
*)*ucbp
;
3508 if (cbp
== NULL
|| copyin(cbp
, aiocb
, sizeof (*aiocb
)))
3512 mutex_enter(&aiop
->aio_mutex
);
3515 mutex_exit(&aiop
->aio_mutex
);
3521 * copy 32 bit structure into 64 bit structure
3523 aiocb_32ton(aiocb32
, aiocb
);
3526 /* skip if opcode for aiocb is LIO_NOP */
3527 mode
= aiocb
->aio_lio_opcode
;
3528 if (mode
== LIO_NOP
) {
3531 mutex_enter(&aiop
->aio_mutex
);
3534 mutex_exit(&aiop
->aio_mutex
);
3539 /* increment file descriptor's ref count. */
3540 if ((fp
= getf(aiocb
->aio_fildes
)) == NULL
) {
3541 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3543 mutex_enter(&aiop
->aio_mutex
);
3546 mutex_exit(&aiop
->aio_mutex
);
3553 * check the permission of the partition
3555 if ((fp
->f_flag
& mode
) == 0) {
3556 releasef(aiocb
->aio_fildes
);
3557 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3559 mutex_enter(&aiop
->aio_mutex
);
3562 mutex_exit(&aiop
->aio_mutex
);
3569 * common case where requests are to the same fd
3570 * for the same r/w operation
3571 * for UFS, need to set EBADFD
3574 if (fp
!= prev_fp
|| mode
!= prev_mode
) {
3575 aio_func
= check_vp(vp
, mode
);
3576 if (aio_func
== NULL
) {
3578 releasef(aiocb
->aio_fildes
);
3579 lio_set_uerror(&cbp
->aio_resultp
, EBADFD
);
3582 mutex_enter(&aiop
->aio_mutex
);
3585 mutex_exit(&aiop
->aio_mutex
);
3594 error
= aio_req_setup(&reqp
, aiop
, aiocb
,
3595 (aio_result_t
*)&cbp
->aio_resultp
, vp
, 0);
3597 releasef(aiocb
->aio_fildes
);
3598 lio_set_uerror(&cbp
->aio_resultp
, error
);
3600 mutex_enter(&aiop
->aio_mutex
);
3603 mutex_exit(&aiop
->aio_mutex
);
3609 reqp
->aio_req_lio
= head
;
3613 * Set the errno field now before sending the request to
3614 * the driver to avoid a race condition
3616 (void) suword32(&cbp
->aio_resultp
.aio_errno
,
3619 reqp
->aio_req_iocb
.iocb32
= (caddr32_t
)(uintptr_t)cbp
;
3621 event
= (mode
== LIO_READ
)? AIOAREAD
: AIOAWRITE
;
3622 aio_port
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_PORT
);
3623 aio_thread
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_THREAD
);
3624 if (aio_port
| aio_thread
) {
3625 port_kevent_t
*lpkevp
;
3627 * Prepare data to send with each aiocb completed.
3631 void *paddr
= (void *)(uintptr_t)
3632 aiocb32
->aio_sigevent
.sigev_value
.sival_ptr
;
3633 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
3635 } else { /* aio_thread */
3636 pnotify
.portnfy_port
=
3637 aiocb32
->aio_sigevent
.sigev_signo
;
3638 pnotify
.portnfy_user
=
3639 aiocb32
->aio_sigevent
.sigev_value
.sival_ptr
;
3644 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3645 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
3647 } else { /* aio_thread */
3648 pnotify
.portnfy_port
=
3649 aiocb
->aio_sigevent
.sigev_signo
;
3650 pnotify
.portnfy_user
=
3651 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3656 else if (pkevtp
!= NULL
&&
3657 pnotify
.portnfy_port
== lio_head_port
)
3658 error
= port_dup_event(pkevtp
, &lpkevp
,
3659 PORT_ALLOC_DEFAULT
);
3661 error
= port_alloc_event(pnotify
.portnfy_port
,
3662 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
,
3665 port_init_event(lpkevp
, (uintptr_t)cbp
,
3666 (void *)(uintptr_t)pnotify
.portnfy_user
,
3667 aio_port_callback
, reqp
);
3668 lpkevp
->portkev_events
= event
;
3669 reqp
->aio_req_portkev
= lpkevp
;
3670 reqp
->aio_req_port
= pnotify
.portnfy_port
;
3675 * send the request to driver.
3678 if (aiocb
->aio_nbytes
== 0) {
3679 clear_active_fd(aiocb
->aio_fildes
);
3683 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
,
3688 * the fd's ref count is not decremented until the IO has
3689 * completed unless there was an error.
3692 releasef(aiocb
->aio_fildes
);
3693 lio_set_uerror(&cbp
->aio_resultp
, error
);
3695 mutex_enter(&aiop
->aio_mutex
);
3698 mutex_exit(&aiop
->aio_mutex
);
3700 if (error
== ENOTSUP
)
3704 lio_set_error(reqp
, portused
);
3706 clear_active_fd(aiocb
->aio_fildes
);
3710 if (aio_notsupported
) {
3712 } else if (aio_errors
) {
3714 * return EIO if any request failed
3719 if (mode_arg
== LIO_WAIT
) {
3720 mutex_enter(&aiop
->aio_mutex
);
3721 while (head
->lio_refcnt
> 0) {
3722 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
3723 mutex_exit(&aiop
->aio_mutex
);
3728 mutex_exit(&aiop
->aio_mutex
);
3729 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, AIO_32
);
3733 kmem_free(cbplist
, ssize
);
3735 if (head
->lio_sigqp
)
3736 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
3737 if (head
->lio_portkev
)
3738 port_free_event(head
->lio_portkev
);
3739 kmem_free(head
, sizeof (aio_lio_t
));
3745 #ifdef _SYSCALL32_IMPL
3747 aiocb_32ton(aiocb32_t
*src
, aiocb_t
*dest
)
3749 dest
->aio_fildes
= src
->aio_fildes
;
3750 dest
->aio_buf
= (caddr_t
)(uintptr_t)src
->aio_buf
;
3751 dest
->aio_nbytes
= (size_t)src
->aio_nbytes
;
3752 dest
->aio_offset
= (off_t
)src
->aio_offset
;
3753 dest
->aio_reqprio
= src
->aio_reqprio
;
3754 dest
->aio_sigevent
.sigev_notify
= src
->aio_sigevent
.sigev_notify
;
3755 dest
->aio_sigevent
.sigev_signo
= src
->aio_sigevent
.sigev_signo
;
3758 * See comment in sigqueue32() on handling of 32-bit
3759 * sigvals in a 64-bit kernel.
3761 dest
->aio_sigevent
.sigev_value
.sival_int
=
3762 (int)src
->aio_sigevent
.sigev_value
.sival_int
;
3763 dest
->aio_sigevent
.sigev_notify_function
= (void (*)(union sigval
))
3764 (uintptr_t)src
->aio_sigevent
.sigev_notify_function
;
3765 dest
->aio_sigevent
.sigev_notify_attributes
= (pthread_attr_t
*)
3766 (uintptr_t)src
->aio_sigevent
.sigev_notify_attributes
;
3767 dest
->aio_sigevent
.__sigev_pad2
= src
->aio_sigevent
.__sigev_pad2
;
3768 dest
->aio_lio_opcode
= src
->aio_lio_opcode
;
3769 dest
->aio_state
= src
->aio_state
;
3770 dest
->aio__pad
[0] = src
->aio__pad
[0];
3772 #endif /* _SYSCALL32_IMPL */
3775 * aio_port_callback() is called just before the event is retrieved from the
3776 * port. The task of this callback function is to finish the work of the
3777 * transaction for the application, it means :
3778 * - copyout transaction data to the application
3779 * (this thread is running in the right process context)
3780 * - keep trace of the transaction (update of counters).
3781 * - free allocated buffers
3782 * The aiocb pointer is the object element of the port_kevent_t structure.
3785 * PORT_CALLBACK_DEFAULT : do copyout and free resources
3786 * PORT_CALLBACK_CLOSE : don't do copyout, free resources
3791 aio_port_callback(void *arg
, int *events
, pid_t pid
, int flag
, void *evp
)
3793 aio_t
*aiop
= curproc
->p_aio
;
3794 aio_req_t
*reqp
= arg
;
3799 if (pid
!= curproc
->p_pid
) {
3800 /* wrong proc !!, can not deliver data here ... */
3804 mutex_enter(&aiop
->aio_portq_mutex
);
3805 reqp
->aio_req_portkev
= NULL
;
3806 aio_req_remove_portq(aiop
, reqp
); /* remove request from portq */
3807 mutex_exit(&aiop
->aio_portq_mutex
);
3808 aphysio_unlock(reqp
); /* unlock used pages */
3809 mutex_enter(&aiop
->aio_mutex
);
3810 if (reqp
->aio_req_flags
& AIO_COPYOUTDONE
) {
3811 aio_req_free_port(aiop
, reqp
); /* back to free list */
3812 mutex_exit(&aiop
->aio_mutex
);
3816 iov
= reqp
->aio_req_uio
.uio_iov
;
3817 bp
= &reqp
->aio_req_buf
;
3818 resultp
= (void *)reqp
->aio_req_resultp
;
3819 if (flag
== PORT_CALLBACK_DEFAULT
)
3820 aio_copyout_result_port(iov
, bp
, resultp
);
3821 aio_req_free_port(aiop
, reqp
); /* request struct back to free list */
3822 mutex_exit(&aiop
->aio_mutex
);