4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
28 * Kernel asynchronous I/O.
29 * This is only for raw devices now (as of Nov. 1993).
32 #include <sys/types.h>
33 #include <sys/errno.h>
36 #include <sys/fs/snode.h>
37 #include <sys/unistd.h>
38 #include <sys/cmn_err.h>
40 #include <vm/faultcode.h>
41 #include <sys/sysmacros.h>
42 #include <sys/procfs.h>
44 #include <sys/autoconf.h>
45 #include <sys/ddi_impldefs.h>
46 #include <sys/sunddi.h>
47 #include <sys/aio_impl.h>
48 #include <sys/debug.h>
49 #include <sys/param.h>
50 #include <sys/systm.h>
51 #include <sys/vmsystm.h>
52 #include <sys/fs/pxfs_ki.h>
53 #include <sys/contract/process_impl.h>
56 * external entry point.
59 static int64_t kaioc(long, long, long, long, long, long);
61 static int kaio(ulong_t
*, rval_t
*);
66 #define AIO_LARGEFILE 2
69 * implementation specific functions (private)
72 static int alio(int, aiocb_t
**, int, struct sigevent
*);
74 static int aionotify(void);
75 static int aioinit(void);
76 static int aiostart(void);
77 static void alio_cleanup(aio_t
*, aiocb_t
**, int, int);
78 static int (*check_vp(struct vnode
*, int))(vnode_t
*, struct aio_req
*,
80 static void lio_set_error(aio_req_t
*, int portused
);
81 static aio_t
*aio_aiop_alloc();
82 static int aio_req_alloc(aio_req_t
**, aio_result_t
*);
83 static int aio_lio_alloc(aio_lio_t
**);
84 static aio_req_t
*aio_req_done(void *);
85 static aio_req_t
*aio_req_remove(aio_req_t
*);
86 static int aio_req_find(aio_result_t
*, aio_req_t
**);
87 static int aio_hash_insert(struct aio_req_t
*, aio_t
*);
88 static int aio_req_setup(aio_req_t
**, aio_t
*, aiocb_t
*,
89 aio_result_t
*, vnode_t
*, int);
90 static int aio_cleanup_thread(aio_t
*);
91 static aio_lio_t
*aio_list_get(aio_result_t
*);
92 static void lio_set_uerror(void *, int);
93 extern void aio_zerolen(aio_req_t
*);
94 static int aiowait(struct timeval
*, int, long *);
95 static int aiowaitn(void *, uint_t
, uint_t
*, timespec_t
*);
96 static int aio_unlock_requests(caddr_t iocblist
, int iocb_index
,
97 aio_req_t
*reqlist
, aio_t
*aiop
, model_t model
);
98 static int aio_reqlist_concat(aio_t
*aiop
, aio_req_t
**reqlist
, int max
);
99 static int aiosuspend(void *, int, struct timespec
*, int,
101 static int aliowait(int, void *, int, void *, int);
102 static int aioerror(void *, int);
103 static int aio_cancel(int, void *, long *, int);
104 static int arw(int, int, char *, int, offset_t
, aio_result_t
*, int);
105 static int aiorw(int, void *, int, int);
107 static int alioLF(int, void *, int, void *);
108 static int aio_req_setupLF(aio_req_t
**, aio_t
*, aiocb64_32_t
*,
109 aio_result_t
*, vnode_t
*, int);
110 static int alio32(int, void *, int, void *);
111 static int driver_aio_write(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
);
112 static int driver_aio_read(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
);
114 #ifdef _SYSCALL32_IMPL
115 static void aiocb_LFton(aiocb64_32_t
*, aiocb_t
*);
116 void aiocb_32ton(aiocb32_t
*, aiocb_t
*);
117 #endif /* _SYSCALL32_IMPL */
120 * implementation specific functions (external)
122 void aio_req_free(aio_t
*, aio_req_t
*);
125 * Event Port framework
128 void aio_req_free_port(aio_t
*, aio_req_t
*);
129 static int aio_port_callback(void *, int *, pid_t
, int, void *);
132 * This is the loadable module wrapper.
134 #include <sys/modctl.h>
135 #include <sys/syscall.h>
139 static struct sysent kaio_sysent
= {
141 SE_NOUNLOAD
| SE_64RVAL
| SE_ARGC
,
145 #ifdef _SYSCALL32_IMPL
146 static struct sysent kaio_sysent32
= {
148 SE_NOUNLOAD
| SE_64RVAL
,
151 #endif /* _SYSCALL32_IMPL */
155 static struct sysent kaio_sysent
= {
157 SE_NOUNLOAD
| SE_32RVAL1
,
164 * Module linkage information for the kernel.
167 static struct modlsys modlsys
= {
173 #ifdef _SYSCALL32_IMPL
174 static struct modlsys modlsys32
= {
176 "kernel Async I/O for 32 bit compatibility",
179 #endif /* _SYSCALL32_IMPL */
182 static struct modlinkage modlinkage
= {
185 #ifdef _SYSCALL32_IMPL
196 if ((retval
= mod_install(&modlinkage
)) != 0)
207 retval
= mod_remove(&modlinkage
);
213 _info(struct modinfo
*modinfop
)
215 return (mod_info(&modlinkage
, modinfop
));
231 switch ((int)a0
& ~AIO_POLL_BIT
) {
233 error
= arw((int)a0
, (int)a1
, (char *)a2
, (int)a3
,
234 (offset_t
)a4
, (aio_result_t
*)a5
, FREAD
);
237 error
= arw((int)a0
, (int)a1
, (char *)a2
, (int)a3
,
238 (offset_t
)a4
, (aio_result_t
*)a5
, FWRITE
);
241 error
= aiowait((struct timeval
*)a1
, (int)a2
, &rval
);
244 error
= aiowaitn((void *)a1
, (uint_t
)a2
, (uint_t
*)a3
,
257 error
= alio((int)a1
, (aiocb_t
**)a2
, (int)a3
,
258 (struct sigevent
*)a4
);
261 error
= aliowait((int)a1
, (void *)a2
, (int)a3
,
262 (struct sigevent
*)a4
, AIO_64
);
265 error
= aiosuspend((void *)a1
, (int)a2
, (timespec_t
*)a3
,
266 (int)a4
, &rval
, AIO_64
);
269 error
= aioerror((void *)a1
, AIO_64
);
272 error
= aiorw((int)a0
, (void *)a1
, FREAD
, AIO_64
);
275 error
= aiorw((int)a0
, (void *)a1
, FWRITE
, AIO_64
);
278 error
= aio_cancel((int)a1
, (void *)a2
, &rval
, AIO_64
);
282 * The large file related stuff is valid only for
283 * 32 bit kernel and not for 64 bit kernel
284 * On 64 bit kernel we convert large file calls
285 * to regular 64bit calls.
292 return ((int64_t)set_errno(error
));
308 #if defined(_LITTLE_ENDIAN)
309 off
= ((u_offset_t
)uap
[5] << 32) | (u_offset_t
)uap
[4];
311 off
= ((u_offset_t
)uap
[4] << 32) | (u_offset_t
)uap
[5];
314 switch (uap
[0] & ~AIO_POLL_BIT
) {
316 * It must be the 32 bit system call on 64 bit kernel
319 return (arw((int)uap
[0], (int)uap
[1], (char *)uap
[2],
320 (int)uap
[3], off
, (aio_result_t
*)uap
[6], FREAD
));
322 return (arw((int)uap
[0], (int)uap
[1], (char *)uap
[2],
323 (int)uap
[3], off
, (aio_result_t
*)uap
[6], FWRITE
));
325 error
= aiowait((struct timeval
*)uap
[1], (int)uap
[2],
329 error
= aiowaitn((void *)uap
[1], (uint_t
)uap
[2],
330 (uint_t
*)uap
[3], (timespec_t
*)uap
[4]);
333 return (aionotify());
339 return (alio32((int)uap
[1], (void *)uap
[2], (int)uap
[3],
342 return (aliowait((int)uap
[1], (void *)uap
[2],
343 (int)uap
[3], (struct sigevent
*)uap
[4], AIO_32
));
345 error
= aiosuspend((void *)uap
[1], (int)uap
[2],
346 (timespec_t
*)uap
[3], (int)uap
[4],
350 return (aioerror((void *)uap
[1], AIO_32
));
352 return (aiorw((int)uap
[0], (void *)uap
[1],
355 return (aiorw((int)uap
[0], (void *)uap
[1],
358 error
= (aio_cancel((int)uap
[1], (void *)uap
[2], &rval
,
362 return (alioLF((int)uap
[1], (void *)uap
[2],
363 (int)uap
[3], (void *)uap
[4]));
365 return (aliowait(uap
[1], (void *)uap
[2],
366 (int)uap
[3], (void *)uap
[4], AIO_LARGEFILE
));
368 error
= aiosuspend((void *)uap
[1], (int)uap
[2],
369 (timespec_t
*)uap
[3], (int)uap
[4], &rval
,
373 return (aioerror((void *)uap
[1], AIO_LARGEFILE
));
375 return (aiorw((int)uap
[0], (void *)uap
[1], FREAD
,
378 return (aiorw((int)uap
[0], (void *)uap
[1], FWRITE
,
381 error
= (aio_cancel((int)uap
[1], (void *)uap
[2],
382 &rval
, AIO_LARGEFILE
));
393 * wake up LWPs in this process that are sleeping in
401 aiop
= curproc
->p_aio
;
405 mutex_enter(&aiop
->aio_mutex
);
406 aiop
->aio_notifycnt
++;
407 cv_broadcast(&aiop
->aio_waitcv
);
408 mutex_exit(&aiop
->aio_mutex
);
414 timeval2reltime(struct timeval
*timout
, timestruc_t
*rqtime
,
415 timestruc_t
**rqtp
, int *blocking
)
417 #ifdef _SYSCALL32_IMPL
418 struct timeval32 wait_time_32
;
420 struct timeval wait_time
;
421 model_t model
= get_udatamodel();
424 if (timout
== NULL
) { /* wait indefinitely */
430 * Need to correctly compare with the -1 passed in for a user
431 * address pointer, with both 32 bit and 64 bit apps.
433 if (model
== DATAMODEL_NATIVE
) {
434 if ((intptr_t)timout
== (intptr_t)-1) { /* don't wait */
439 if (copyin(timout
, &wait_time
, sizeof (wait_time
)))
442 #ifdef _SYSCALL32_IMPL
445 * -1 from a 32bit app. It will not get sign extended.
448 if ((intptr_t)timout
== (intptr_t)((uint32_t)-1)) {
453 if (copyin(timout
, &wait_time_32
, sizeof (wait_time_32
)))
455 TIMEVAL32_TO_TIMEVAL(&wait_time
, &wait_time_32
);
457 #endif /* _SYSCALL32_IMPL */
459 if (wait_time
.tv_sec
== 0 && wait_time
.tv_usec
== 0) { /* don't wait */
464 if (wait_time
.tv_sec
< 0 ||
465 wait_time
.tv_usec
< 0 || wait_time
.tv_usec
>= MICROSEC
)
468 rqtime
->tv_sec
= wait_time
.tv_sec
;
469 rqtime
->tv_nsec
= wait_time
.tv_usec
* 1000;
477 timespec2reltime(timespec_t
*timout
, timestruc_t
*rqtime
,
478 timestruc_t
**rqtp
, int *blocking
)
480 #ifdef _SYSCALL32_IMPL
481 timespec32_t wait_time_32
;
483 model_t model
= get_udatamodel();
486 if (timout
== NULL
) {
491 if (model
== DATAMODEL_NATIVE
) {
492 if (copyin(timout
, rqtime
, sizeof (*rqtime
)))
495 #ifdef _SYSCALL32_IMPL
497 if (copyin(timout
, &wait_time_32
, sizeof (wait_time_32
)))
499 TIMESPEC32_TO_TIMESPEC(rqtime
, &wait_time_32
);
501 #endif /* _SYSCALL32_IMPL */
503 if (rqtime
->tv_sec
== 0 && rqtime
->tv_nsec
== 0) {
508 if (rqtime
->tv_sec
< 0 ||
509 rqtime
->tv_nsec
< 0 || rqtime
->tv_nsec
>= NANOSEC
)
521 struct timeval
*timout
,
534 aiop
= curproc
->p_aio
;
539 * Establish the absolute future time for the timeout.
541 error
= timeval2reltime(timout
, &rqtime
, &rqtp
, &blocking
);
546 timecheck
= timechanged
;
548 timespecadd(rqtp
, &now
);
551 mutex_enter(&aiop
->aio_mutex
);
553 /* process requests on poll queue */
554 if (aiop
->aio_pollq
) {
555 mutex_exit(&aiop
->aio_mutex
);
557 mutex_enter(&aiop
->aio_mutex
);
559 if ((reqp
= aio_req_remove(NULL
)) != NULL
) {
560 *rval
= (long)reqp
->aio_req_resultp
;
563 /* user-level done queue might not be empty */
564 if (aiop
->aio_notifycnt
> 0) {
565 aiop
->aio_notifycnt
--;
569 /* don't block if no outstanding aio */
570 if (aiop
->aio_outstanding
== 0 && dontblockflg
) {
575 status
= cv_waituntil_sig(&aiop
->aio_waitcv
,
576 &aiop
->aio_mutex
, rqtp
, timecheck
);
578 if (status
> 0) /* check done queue again */
580 if (status
== 0) { /* interrupted by a signal */
583 } else { /* timer expired */
589 mutex_exit(&aiop
->aio_mutex
);
591 aphysio_unlock(reqp
);
592 aio_copyout_result(reqp
);
593 mutex_enter(&aiop
->aio_mutex
);
594 aio_req_free(aiop
, reqp
);
595 mutex_exit(&aiop
->aio_mutex
);
601 * aiowaitn can be used to reap completed asynchronous requests submitted with
602 * lio_listio, aio_read or aio_write.
603 * This function only reaps asynchronous raw I/Os.
608 aiowaitn(void *uiocb
, uint_t nent
, uint_t
*nwait
, timespec_t
*timout
)
612 aio_req_t
*reqlist
= NULL
;
613 caddr_t iocblist
= NULL
; /* array of iocb ptr's */
614 uint_t waitcnt
, cnt
= 0; /* iocb cnt */
615 size_t iocbsz
; /* users iocb size */
616 size_t riocbsz
; /* returned iocb size */
618 model_t model
= get_udatamodel();
624 aiop
= curproc
->p_aio
;
625 if (aiop
== NULL
|| nent
== 0 || nent
> _AIO_LISTIO_MAX
)
628 if (aiop
->aio_outstanding
== 0)
631 if (copyin(nwait
, &waitcnt
, sizeof (uint_t
)))
634 /* set *nwait to zero, if we must return prematurely */
635 if (copyout(&cnt
, nwait
, sizeof (uint_t
)))
643 error
= timespec2reltime(timout
, &rqtime
, &rqtp
, &blocking
);
648 if (model
== DATAMODEL_NATIVE
)
649 iocbsz
= (sizeof (aiocb_t
*) * nent
);
650 #ifdef _SYSCALL32_IMPL
652 iocbsz
= (sizeof (caddr32_t
) * nent
);
653 #endif /* _SYSCALL32_IMPL */
656 * Only one aio_waitn call is allowed at a time.
657 * The active aio_waitn will collect all requests
658 * out of the "done" list and if necessary it will wait
659 * for some/all pending requests to fulfill the nwait
661 * A second or further aio_waitn calls will sleep here
662 * until the active aio_waitn finishes and leaves the kernel
663 * If the second call does not block (poll), then return
664 * immediately with the error code : EAGAIN.
665 * If the second call should block, then sleep here, but
666 * do not touch the timeout. The timeout starts when this
667 * aio_waitn-call becomes active.
670 mutex_enter(&aiop
->aio_mutex
);
672 while (aiop
->aio_flags
& AIO_WAITN
) {
674 mutex_exit(&aiop
->aio_mutex
);
678 /* block, no timeout */
679 aiop
->aio_flags
|= AIO_WAITN_PENDING
;
680 if (!cv_wait_sig(&aiop
->aio_waitncv
, &aiop
->aio_mutex
)) {
681 mutex_exit(&aiop
->aio_mutex
);
687 * Establish the absolute future time for the timeout.
691 timecheck
= timechanged
;
693 timespecadd(rqtp
, &now
);
696 if (iocbsz
> aiop
->aio_iocbsz
&& aiop
->aio_iocb
!= NULL
) {
697 kmem_free(aiop
->aio_iocb
, aiop
->aio_iocbsz
);
698 aiop
->aio_iocb
= NULL
;
701 if (aiop
->aio_iocb
== NULL
) {
702 iocblist
= kmem_zalloc(iocbsz
, KM_NOSLEEP
);
703 if (iocblist
== NULL
) {
704 mutex_exit(&aiop
->aio_mutex
);
707 aiop
->aio_iocb
= (aiocb_t
**)iocblist
;
708 aiop
->aio_iocbsz
= iocbsz
;
710 iocblist
= (char *)aiop
->aio_iocb
;
713 aiop
->aio_waitncnt
= waitcnt
;
714 aiop
->aio_flags
|= AIO_WAITN
;
717 /* push requests on poll queue to done queue */
718 if (aiop
->aio_pollq
) {
719 mutex_exit(&aiop
->aio_mutex
);
721 mutex_enter(&aiop
->aio_mutex
);
724 /* check for requests on done queue */
725 if (aiop
->aio_doneq
) {
726 cnt
+= aio_reqlist_concat(aiop
, &reqlist
, nent
- cnt
);
727 aiop
->aio_waitncnt
= waitcnt
- cnt
;
730 /* user-level done queue might not be empty */
731 if (aiop
->aio_notifycnt
> 0) {
732 aiop
->aio_notifycnt
--;
738 * if we are here second time as a result of timer
739 * expiration, we reset error if there are enough
740 * aiocb's to satisfy request.
741 * We return also if all requests are already done
742 * and we picked up the whole done queue.
745 if ((cnt
>= waitcnt
) || (cnt
> 0 && aiop
->aio_pending
== 0 &&
746 aiop
->aio_doneq
== NULL
)) {
751 if ((cnt
< waitcnt
) && blocking
) {
752 int rval
= cv_waituntil_sig(&aiop
->aio_waitcv
,
753 &aiop
->aio_mutex
, rqtp
, timecheck
);
766 mutex_exit(&aiop
->aio_mutex
);
770 iocb_index
= aio_unlock_requests(iocblist
, iocb_index
, reqlist
,
773 if (model
== DATAMODEL_NATIVE
)
774 riocbsz
= (sizeof (aiocb_t
*) * cnt
);
775 #ifdef _SYSCALL32_IMPL
777 riocbsz
= (sizeof (caddr32_t
) * cnt
);
778 #endif /* _SYSCALL32_IMPL */
780 if (copyout(iocblist
, uiocb
, riocbsz
) ||
781 copyout(&cnt
, nwait
, sizeof (uint_t
)))
785 /* check if there is another thread waiting for execution */
786 mutex_enter(&aiop
->aio_mutex
);
787 aiop
->aio_flags
&= ~AIO_WAITN
;
788 if (aiop
->aio_flags
& AIO_WAITN_PENDING
) {
789 aiop
->aio_flags
&= ~AIO_WAITN_PENDING
;
790 cv_signal(&aiop
->aio_waitncv
);
792 mutex_exit(&aiop
->aio_mutex
);
798 * aio_unlock_requests
799 * copyouts the result of the request as well as the return value.
800 * It builds the list of completed asynchronous requests,
801 * unlocks the allocated memory ranges and
802 * put the aio request structure back into the free list.
813 aio_req_t
*reqp
, *nreqp
;
815 if (model
== DATAMODEL_NATIVE
) {
816 for (reqp
= reqlist
; reqp
!= NULL
; reqp
= nreqp
) {
817 (((caddr_t
*)iocblist
)[iocb_index
++]) =
818 reqp
->aio_req_iocb
.iocb
;
819 nreqp
= reqp
->aio_req_next
;
820 aphysio_unlock(reqp
);
821 aio_copyout_result(reqp
);
822 mutex_enter(&aiop
->aio_mutex
);
823 aio_req_free(aiop
, reqp
);
824 mutex_exit(&aiop
->aio_mutex
);
827 #ifdef _SYSCALL32_IMPL
829 for (reqp
= reqlist
; reqp
!= NULL
; reqp
= nreqp
) {
830 ((caddr32_t
*)iocblist
)[iocb_index
++] =
831 reqp
->aio_req_iocb
.iocb32
;
832 nreqp
= reqp
->aio_req_next
;
833 aphysio_unlock(reqp
);
834 aio_copyout_result(reqp
);
835 mutex_enter(&aiop
->aio_mutex
);
836 aio_req_free(aiop
, reqp
);
837 mutex_exit(&aiop
->aio_mutex
);
840 #endif /* _SYSCALL32_IMPL */
846 * moves "max" elements from the done queue to the reqlist queue and removes
847 * the AIO_DONEQ flag.
848 * - reqlist queue is a simple linked list
849 * - done queue is a double linked list
853 aio_reqlist_concat(aio_t
*aiop
, aio_req_t
**reqlist
, int max
)
855 aio_req_t
*q2
, *q2work
, *list
;
859 q2
= aiop
->aio_doneq
;
862 q2work
->aio_req_flags
&= ~AIO_DONEQ
;
863 q2work
= q2work
->aio_req_next
;
870 /* all elements revised */
871 q2
->aio_req_prev
->aio_req_next
= list
;
873 aiop
->aio_doneq
= NULL
;
876 * max < elements in the doneq
877 * detach only the required amount of elements
880 q2work
->aio_req_prev
->aio_req_next
= list
;
883 aiop
->aio_doneq
= q2work
;
884 q2work
->aio_req_prev
= q2
->aio_req_prev
;
885 q2
->aio_req_prev
->aio_req_next
= q2work
;
896 struct timespec
*timout
,
903 aio_req_t
*reqp
, *found
, *next
;
904 caddr_t cbplist
= NULL
;
905 aiocb_t
*cbp
, **ucbp
;
906 #ifdef _SYSCALL32_IMPL
909 #endif /* _SYSCALL32_IMPL */
914 model_t model
= get_udatamodel();
920 aiop
= curproc
->p_aio
;
921 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
925 * Establish the absolute future time for the timeout.
927 error
= timespec2reltime(timout
, &rqtime
, &rqtp
, &blocking
);
932 timecheck
= timechanged
;
934 timespecadd(rqtp
, &now
);
938 * If we are not blocking and there's no IO complete
941 if (!blocking
&& (aiop
->aio_pollq
== NULL
) &&
942 (aiop
->aio_doneq
== NULL
)) {
946 if (model
== DATAMODEL_NATIVE
)
947 ssize
= (sizeof (aiocb_t
*) * nent
);
948 #ifdef _SYSCALL32_IMPL
950 ssize
= (sizeof (caddr32_t
) * nent
);
951 #endif /* _SYSCALL32_IMPL */
953 cbplist
= kmem_alloc(ssize
, KM_NOSLEEP
);
957 if (copyin(aiocb
, cbplist
, ssize
)) {
964 * we need to get the aio_cleanupq_mutex since we call
967 mutex_enter(&aiop
->aio_cleanupq_mutex
);
968 mutex_enter(&aiop
->aio_mutex
);
970 /* push requests on poll queue to done queue */
971 if (aiop
->aio_pollq
) {
972 mutex_exit(&aiop
->aio_mutex
);
973 mutex_exit(&aiop
->aio_cleanupq_mutex
);
975 mutex_enter(&aiop
->aio_cleanupq_mutex
);
976 mutex_enter(&aiop
->aio_mutex
);
978 /* check for requests on done queue */
979 if (aiop
->aio_doneq
) {
980 if (model
== DATAMODEL_NATIVE
)
981 ucbp
= (aiocb_t
**)cbplist
;
982 #ifdef _SYSCALL32_IMPL
984 ucbp32
= (caddr32_t
*)cbplist
;
985 #endif /* _SYSCALL32_IMPL */
986 for (i
= 0; i
< nent
; i
++) {
987 if (model
== DATAMODEL_NATIVE
) {
988 if ((cbp
= *ucbp
++) == NULL
)
990 if (run_mode
!= AIO_LARGEFILE
)
994 cbp64
= (aiocb64_32_t
*)cbp
;
996 &cbp64
->aio_resultp
);
999 #ifdef _SYSCALL32_IMPL
1001 if (run_mode
== AIO_32
) {
1003 (aiocb32_t
*)(uintptr_t)
1006 reqp
= aio_req_done(
1007 &cbp32
->aio_resultp
);
1008 } else if (run_mode
== AIO_LARGEFILE
) {
1010 (aiocb64_32_t
*)(uintptr_t)
1013 reqp
= aio_req_done(
1014 &cbp64
->aio_resultp
);
1018 #endif /* _SYSCALL32_IMPL */
1020 reqp
->aio_req_next
= found
;
1023 if (aiop
->aio_doneq
== NULL
)
1029 if (aiop
->aio_notifycnt
> 0) {
1031 * nothing on the kernel's queue. the user
1032 * has notified the kernel that it has items
1033 * on a user-level queue.
1035 aiop
->aio_notifycnt
--;
1040 /* don't block if nothing is outstanding */
1041 if (aiop
->aio_outstanding
== 0) {
1047 * drop the aio_cleanupq_mutex as we are
1050 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1051 rv
= cv_waituntil_sig(&aiop
->aio_waitcv
,
1052 &aiop
->aio_mutex
, rqtp
, timecheck
);
1054 * we have to drop aio_mutex and
1055 * grab it in the right order.
1057 mutex_exit(&aiop
->aio_mutex
);
1058 mutex_enter(&aiop
->aio_cleanupq_mutex
);
1059 mutex_enter(&aiop
->aio_mutex
);
1060 if (rv
> 0) /* check done queue again */
1062 if (rv
== 0) /* interrupted by a signal */
1064 else /* timer expired */
1071 mutex_exit(&aiop
->aio_mutex
);
1072 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1073 for (reqp
= found
; reqp
!= NULL
; reqp
= next
) {
1074 next
= reqp
->aio_req_next
;
1075 aphysio_unlock(reqp
);
1076 aio_copyout_result(reqp
);
1077 mutex_enter(&aiop
->aio_mutex
);
1078 aio_req_free(aiop
, reqp
);
1079 mutex_exit(&aiop
->aio_mutex
);
1082 kmem_free(cbplist
, ssize
);
1087 * initialize aio by allocating an aio_t struct for this
1093 proc_t
*p
= curproc
;
1095 mutex_enter(&p
->p_lock
);
1096 if ((aiop
= p
->p_aio
) == NULL
) {
1097 aiop
= aio_aiop_alloc();
1100 mutex_exit(&p
->p_lock
);
1107 * start a special thread that will cleanup after aio requests
1108 * that are preventing a segment from being unmapped. as_unmap()
1109 * blocks until all phsyio to this segment is completed. this
1110 * doesn't happen until all the pages in this segment are not
1111 * SOFTLOCKed. Some pages will be SOFTLOCKed when there are aio
1112 * requests still outstanding. this special thread will make sure
1113 * that these SOFTLOCKed pages will eventually be SOFTUNLOCKed.
1115 * this function will return an error if the process has only
1116 * one LWP. the assumption is that the caller is a separate LWP
1117 * that remains blocked in the kernel for the life of this process.
1122 proc_t
*p
= curproc
;
1124 int first
, error
= 0;
1126 if (p
->p_lwpcnt
== 1)
1128 mutex_enter(&p
->p_lock
);
1129 if ((aiop
= p
->p_aio
) == NULL
)
1132 first
= aiop
->aio_ok
;
1133 if (aiop
->aio_ok
== 0)
1136 mutex_exit(&p
->p_lock
);
1137 if (error
== 0 && first
== 0) {
1138 return (aio_cleanup_thread(aiop
));
1139 /* should return only to exit */
1145 * Associate an aiocb with a port.
1146 * This function is used by aiorw() to associate a transaction with a port.
1147 * Allocate an event port structure (port_alloc_event()) and store the
1148 * delivered user pointer (portnfy_user) in the portkev_user field of the
1149 * port_kevent_t structure..
1150 * The aio_req_portkev pointer in the aio_req_t structure was added to identify
1151 * the port association.
1155 aio_req_assoc_port_rw(port_notify_t
*pntfy
, aiocb_t
*cbp
,
1156 aio_req_t
*reqp
, int event
)
1158 port_kevent_t
*pkevp
= NULL
;
1161 error
= port_alloc_event(pntfy
->portnfy_port
, PORT_ALLOC_DEFAULT
,
1162 PORT_SOURCE_AIO
, &pkevp
);
1164 if ((error
== ENOMEM
) || (error
== EAGAIN
))
1169 port_init_event(pkevp
, (uintptr_t)cbp
, pntfy
->portnfy_user
,
1170 aio_port_callback
, reqp
);
1171 pkevp
->portkev_events
= event
;
1172 reqp
->aio_req_portkev
= pkevp
;
1173 reqp
->aio_req_port
= pntfy
->portnfy_port
;
1181 * Asynchronous list IO. A chain of aiocb's are copied in
1182 * one at a time. If the aiocb is invalid, it is skipped.
1183 * For each aiocb, the appropriate driver entry point is
1184 * called. Optimize for the common case where the list
1185 * of requests is to the same file descriptor.
1187 * One possible optimization is to define a new driver entry
1188 * point that supports a list of IO requests. Whether this
1189 * improves performance depends somewhat on the driver's
1190 * locking strategy. Processing a list could adversely impact
1191 * the driver's interrupt latency.
1196 aiocb_t
**aiocb_arg
,
1198 struct sigevent
*sigev
)
1201 file_t
*prev_fp
= NULL
;
1209 aiocb_t
*aiocb
= &cb
;
1212 struct sigevent sigevk
;
1221 int aio_notsupported
= 0;
1225 port_kevent_t
*pkevtp
= NULL
;
1227 port_notify_t pnotify
;
1230 aiop
= curproc
->p_aio
;
1231 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
1234 ssize
= (sizeof (aiocb_t
*) * nent
);
1235 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
1236 ucbp
= (aiocb_t
**)cbplist
;
1238 if (copyin(aiocb_arg
, cbplist
, ssize
) ||
1239 (sigev
&& copyin(sigev
, &sigevk
, sizeof (struct sigevent
)))) {
1240 kmem_free(cbplist
, ssize
);
1246 (sigevk
.sigev_notify
== SIGEV_THREAD
||
1247 sigevk
.sigev_notify
== SIGEV_PORT
)) {
1248 if (sigevk
.sigev_notify
== SIGEV_THREAD
) {
1249 pnotify
.portnfy_port
= sigevk
.sigev_signo
;
1250 pnotify
.portnfy_user
= sigevk
.sigev_value
.sival_ptr
;
1251 } else if (copyin(sigevk
.sigev_value
.sival_ptr
,
1252 &pnotify
, sizeof (pnotify
))) {
1253 kmem_free(cbplist
, ssize
);
1256 error
= port_alloc_event(pnotify
.portnfy_port
,
1257 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
, &pkevtp
);
1259 if (error
== ENOMEM
|| error
== EAGAIN
)
1263 kmem_free(cbplist
, ssize
);
1266 lio_head_port
= pnotify
.portnfy_port
;
1271 * a list head should be allocated if notification is
1272 * enabled for this list.
1276 if (mode_arg
== LIO_WAIT
|| sigev
) {
1277 mutex_enter(&aiop
->aio_mutex
);
1278 error
= aio_lio_alloc(&head
);
1279 mutex_exit(&aiop
->aio_mutex
);
1283 head
->lio_nent
= nent
;
1284 head
->lio_refcnt
= nent
;
1285 head
->lio_port
= -1;
1286 head
->lio_portkev
= NULL
;
1287 if (sigev
&& sigevk
.sigev_notify
== SIGEV_SIGNAL
&&
1288 sigevk
.sigev_signo
> 0 && sigevk
.sigev_signo
< NSIG
) {
1289 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
1294 sqp
->sq_func
= NULL
;
1295 sqp
->sq_next
= NULL
;
1296 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
1297 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
1298 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
1299 sqp
->sq_info
.si_zoneid
= getzoneid();
1300 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
1301 sqp
->sq_info
.si_signo
= sigevk
.sigev_signo
;
1302 sqp
->sq_info
.si_value
= sigevk
.sigev_value
;
1303 head
->lio_sigqp
= sqp
;
1305 head
->lio_sigqp
= NULL
;
1309 * Prepare data to send when list of aiocb's
1312 port_init_event(pkevtp
, (uintptr_t)sigev
,
1313 (void *)(uintptr_t)pnotify
.portnfy_user
,
1315 pkevtp
->portkev_events
= AIOLIO
;
1316 head
->lio_portkev
= pkevtp
;
1317 head
->lio_port
= pnotify
.portnfy_port
;
1321 for (i
= 0; i
< nent
; i
++, ucbp
++) {
1324 /* skip entry if it can't be copied. */
1325 if (cbp
== NULL
|| copyin(cbp
, aiocb
, sizeof (*aiocb
))) {
1327 mutex_enter(&aiop
->aio_mutex
);
1330 mutex_exit(&aiop
->aio_mutex
);
1335 /* skip if opcode for aiocb is LIO_NOP */
1336 mode
= aiocb
->aio_lio_opcode
;
1337 if (mode
== LIO_NOP
) {
1340 mutex_enter(&aiop
->aio_mutex
);
1343 mutex_exit(&aiop
->aio_mutex
);
1348 /* increment file descriptor's ref count. */
1349 if ((fp
= getf(aiocb
->aio_fildes
)) == NULL
) {
1350 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
1352 mutex_enter(&aiop
->aio_mutex
);
1355 mutex_exit(&aiop
->aio_mutex
);
1362 * check the permission of the partition
1364 if ((fp
->f_flag
& mode
) == 0) {
1365 releasef(aiocb
->aio_fildes
);
1366 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
1368 mutex_enter(&aiop
->aio_mutex
);
1371 mutex_exit(&aiop
->aio_mutex
);
1378 * common case where requests are to the same fd
1379 * for the same r/w operation.
1380 * for UFS, need to set EBADFD
1383 if (fp
!= prev_fp
|| mode
!= prev_mode
) {
1384 aio_func
= check_vp(vp
, mode
);
1385 if (aio_func
== NULL
) {
1387 releasef(aiocb
->aio_fildes
);
1388 lio_set_uerror(&cbp
->aio_resultp
, EBADFD
);
1391 mutex_enter(&aiop
->aio_mutex
);
1394 mutex_exit(&aiop
->aio_mutex
);
1403 error
= aio_req_setup(&reqp
, aiop
, aiocb
,
1404 &cbp
->aio_resultp
, vp
, 0);
1406 releasef(aiocb
->aio_fildes
);
1407 lio_set_uerror(&cbp
->aio_resultp
, error
);
1409 mutex_enter(&aiop
->aio_mutex
);
1412 mutex_exit(&aiop
->aio_mutex
);
1418 reqp
->aio_req_lio
= head
;
1422 * Set the errno field now before sending the request to
1423 * the driver to avoid a race condition
1425 (void) suword32(&cbp
->aio_resultp
.aio_errno
,
1428 reqp
->aio_req_iocb
.iocb
= (caddr_t
)cbp
;
1430 event
= (mode
== LIO_READ
)? AIOAREAD
: AIOAWRITE
;
1431 aio_port
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_PORT
);
1432 aio_thread
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_THREAD
);
1433 if (aio_port
| aio_thread
) {
1434 port_kevent_t
*lpkevp
;
1436 * Prepare data to send with each aiocb completed.
1440 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
1441 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
1443 } else { /* aio_thread */
1444 pnotify
.portnfy_port
=
1445 aiocb
->aio_sigevent
.sigev_signo
;
1446 pnotify
.portnfy_user
=
1447 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
1451 else if (pkevtp
!= NULL
&&
1452 pnotify
.portnfy_port
== lio_head_port
)
1453 error
= port_dup_event(pkevtp
, &lpkevp
,
1454 PORT_ALLOC_DEFAULT
);
1456 error
= port_alloc_event(pnotify
.portnfy_port
,
1457 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
,
1460 port_init_event(lpkevp
, (uintptr_t)cbp
,
1461 (void *)(uintptr_t)pnotify
.portnfy_user
,
1462 aio_port_callback
, reqp
);
1463 lpkevp
->portkev_events
= event
;
1464 reqp
->aio_req_portkev
= lpkevp
;
1465 reqp
->aio_req_port
= pnotify
.portnfy_port
;
1470 * send the request to driver.
1473 if (aiocb
->aio_nbytes
== 0) {
1474 clear_active_fd(aiocb
->aio_fildes
);
1478 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
,
1483 * the fd's ref count is not decremented until the IO has
1484 * completed unless there was an error.
1487 releasef(aiocb
->aio_fildes
);
1488 lio_set_uerror(&cbp
->aio_resultp
, error
);
1490 mutex_enter(&aiop
->aio_mutex
);
1493 mutex_exit(&aiop
->aio_mutex
);
1495 if (error
== ENOTSUP
)
1499 lio_set_error(reqp
, portused
);
1501 clear_active_fd(aiocb
->aio_fildes
);
1505 if (aio_notsupported
) {
1507 } else if (aio_errors
) {
1509 * return EIO if any request failed
1514 if (mode_arg
== LIO_WAIT
) {
1515 mutex_enter(&aiop
->aio_mutex
);
1516 while (head
->lio_refcnt
> 0) {
1517 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
1518 mutex_exit(&aiop
->aio_mutex
);
1523 mutex_exit(&aiop
->aio_mutex
);
1524 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, AIO_64
);
1528 kmem_free(cbplist
, ssize
);
1530 if (head
->lio_sigqp
)
1531 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
1532 if (head
->lio_portkev
)
1533 port_free_event(head
->lio_portkev
);
1534 kmem_free(head
, sizeof (aio_lio_t
));
1542 * Asynchronous list IO.
1543 * If list I/O is called with LIO_WAIT it can still return
1544 * before all the I/O's are completed if a signal is caught
1545 * or if the list include UFS I/O requests. If this happens,
1546 * libaio will call aliowait() to wait for the I/O's to
1561 aiocb_t
*cbp
, **ucbp
;
1562 #ifdef _SYSCALL32_IMPL
1565 aiocb64_32_t
*cbp64
;
1570 model_t model
= get_udatamodel();
1572 aiop
= curproc
->p_aio
;
1573 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
1576 if (model
== DATAMODEL_NATIVE
)
1577 ssize
= (sizeof (aiocb_t
*) * nent
);
1578 #ifdef _SYSCALL32_IMPL
1580 ssize
= (sizeof (caddr32_t
) * nent
);
1581 #endif /* _SYSCALL32_IMPL */
1586 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
1588 if (model
== DATAMODEL_NATIVE
)
1589 ucbp
= (aiocb_t
**)cbplist
;
1590 #ifdef _SYSCALL32_IMPL
1592 ucbp32
= (caddr32_t
*)cbplist
;
1593 #endif /* _SYSCALL32_IMPL */
1595 if (copyin(aiocb
, cbplist
, ssize
)) {
1601 * To find the list head, we go through the
1602 * list of aiocb structs, find the request
1603 * its for, then get the list head that reqp
1608 for (i
= 0; i
< nent
; i
++) {
1609 if (model
== DATAMODEL_NATIVE
) {
1611 * Since we are only checking for a NULL pointer
1612 * Following should work on both native data sizes
1613 * as well as for largefile aiocb.
1615 if ((cbp
= *ucbp
++) == NULL
)
1617 if (run_mode
!= AIO_LARGEFILE
)
1618 if (head
= aio_list_get(&cbp
->aio_resultp
))
1622 * This is a case when largefile call is
1623 * made on 32 bit kernel.
1624 * Treat each pointer as pointer to
1627 if (head
= aio_list_get((aio_result_t
*)
1628 &(((aiocb64_32_t
*)cbp
)->aio_resultp
)))
1632 #ifdef _SYSCALL32_IMPL
1634 if (run_mode
== AIO_LARGEFILE
) {
1635 if ((cbp64
= (aiocb64_32_t
*)
1636 (uintptr_t)*ucbp32
++) == NULL
)
1638 if (head
= aio_list_get((aio_result_t
*)
1639 &cbp64
->aio_resultp
))
1641 } else if (run_mode
== AIO_32
) {
1642 if ((cbp32
= (aiocb32_t
*)
1643 (uintptr_t)*ucbp32
++) == NULL
)
1645 if (head
= aio_list_get((aio_result_t
*)
1646 &cbp32
->aio_resultp
))
1650 #endif /* _SYSCALL32_IMPL */
1658 mutex_enter(&aiop
->aio_mutex
);
1659 while (head
->lio_refcnt
> 0) {
1660 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
1661 mutex_exit(&aiop
->aio_mutex
);
1666 mutex_exit(&aiop
->aio_mutex
);
1667 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, run_mode
);
1669 kmem_free(cbplist
, ssize
);
1674 aio_list_get(aio_result_t
*resultp
)
1676 aio_lio_t
*head
= NULL
;
1682 aiop
= curproc
->p_aio
;
1687 index
= AIO_HASH(resultp
);
1688 bucket
= &aiop
->aio_hash
[index
];
1689 for (reqp
= *bucket
; reqp
!= NULL
;
1690 reqp
= reqp
->aio_hash_next
) {
1691 if (reqp
->aio_req_resultp
== resultp
) {
1692 head
= reqp
->aio_req_lio
;
1702 lio_set_uerror(void *resultp
, int error
)
1705 * the resultp field is a pointer to where the
1706 * error should be written out to the user's
1710 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1711 (void) sulword(&((aio_result_t
*)resultp
)->aio_return
,
1713 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, error
);
1715 #ifdef _SYSCALL32_IMPL
1717 (void) suword32(&((aio_result32_t
*)resultp
)->aio_return
,
1719 (void) suword32(&((aio_result32_t
*)resultp
)->aio_errno
, error
);
1721 #endif /* _SYSCALL32_IMPL */
1725 * do cleanup completion for all requests in list. memory for
1726 * each request is also freed.
1729 alio_cleanup(aio_t
*aiop
, aiocb_t
**cbp
, int nent
, int run_mode
)
1733 aio_result_t
*resultp
;
1734 aiocb64_32_t
*aiocb_64
;
1736 for (i
= 0; i
< nent
; i
++) {
1737 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1740 if (run_mode
== AIO_LARGEFILE
) {
1741 aiocb_64
= (aiocb64_32_t
*)cbp
[i
];
1742 resultp
= (aio_result_t
*)
1743 &aiocb_64
->aio_resultp
;
1745 resultp
= &cbp
[i
]->aio_resultp
;
1747 #ifdef _SYSCALL32_IMPL
1749 aiocb32_t
*aiocb_32
;
1752 cbp32
= (caddr32_t
*)cbp
;
1753 if (cbp32
[i
] == NULL
)
1755 if (run_mode
== AIO_32
) {
1756 aiocb_32
= (aiocb32_t
*)(uintptr_t)cbp32
[i
];
1757 resultp
= (aio_result_t
*)&aiocb_32
->
1759 } else if (run_mode
== AIO_LARGEFILE
) {
1760 aiocb_64
= (aiocb64_32_t
*)(uintptr_t)cbp32
[i
];
1761 resultp
= (aio_result_t
*)&aiocb_64
->
1765 #endif /* _SYSCALL32_IMPL */
1767 * we need to get the aio_cleanupq_mutex since we call
1770 mutex_enter(&aiop
->aio_cleanupq_mutex
);
1771 mutex_enter(&aiop
->aio_mutex
);
1772 reqp
= aio_req_done(resultp
);
1773 mutex_exit(&aiop
->aio_mutex
);
1774 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1776 aphysio_unlock(reqp
);
1777 aio_copyout_result(reqp
);
1778 mutex_enter(&aiop
->aio_mutex
);
1779 aio_req_free(aiop
, reqp
);
1780 mutex_exit(&aiop
->aio_mutex
);
1786 * Write out the results for an aio request that is done.
1789 aioerror(void *cb
, int run_mode
)
1791 aio_result_t
*resultp
;
1796 aiop
= curproc
->p_aio
;
1797 if (aiop
== NULL
|| cb
== NULL
)
1800 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1801 if (run_mode
== AIO_LARGEFILE
)
1802 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)->
1805 resultp
= &((aiocb_t
*)cb
)->aio_resultp
;
1807 #ifdef _SYSCALL32_IMPL
1809 if (run_mode
== AIO_LARGEFILE
)
1810 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)->
1812 else if (run_mode
== AIO_32
)
1813 resultp
= (aio_result_t
*)&((aiocb32_t
*)cb
)->
1816 #endif /* _SYSCALL32_IMPL */
1818 * we need to get the aio_cleanupq_mutex since we call
1821 mutex_enter(&aiop
->aio_cleanupq_mutex
);
1822 mutex_enter(&aiop
->aio_mutex
);
1823 retval
= aio_req_find(resultp
, &reqp
);
1824 mutex_exit(&aiop
->aio_mutex
);
1825 mutex_exit(&aiop
->aio_cleanupq_mutex
);
1827 aphysio_unlock(reqp
);
1828 aio_copyout_result(reqp
);
1829 mutex_enter(&aiop
->aio_mutex
);
1830 aio_req_free(aiop
, reqp
);
1831 mutex_exit(&aiop
->aio_mutex
);
1833 } else if (retval
== 1)
1834 return (EINPROGRESS
);
1835 else if (retval
== 2)
1841 * aio_cancel - if no requests outstanding,
1842 * return AIO_ALLDONE
1844 * return AIO_NOTCANCELED
1861 * Verify valid file descriptor
1863 if ((getf(fildes
)) == NULL
) {
1868 aiop
= curproc
->p_aio
;
1872 if (aiop
->aio_outstanding
== 0) {
1873 *rval
= AIO_ALLDONE
;
1877 mutex_enter(&aiop
->aio_mutex
);
1879 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1880 if (run_mode
== AIO_LARGEFILE
)
1881 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)
1884 resultp
= &((aiocb_t
*)cb
)->aio_resultp
;
1886 #ifdef _SYSCALL32_IMPL
1888 if (run_mode
== AIO_LARGEFILE
)
1889 resultp
= (aio_result_t
*)&((aiocb64_32_t
*)cb
)
1891 else if (run_mode
== AIO_32
)
1892 resultp
= (aio_result_t
*)&((aiocb32_t
*)cb
)
1895 #endif /* _SYSCALL32_IMPL */
1896 index
= AIO_HASH(resultp
);
1897 bucket
= &aiop
->aio_hash
[index
];
1898 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
1899 if (ent
->aio_req_resultp
== resultp
) {
1900 if ((ent
->aio_req_flags
& AIO_PENDING
) == 0) {
1901 mutex_exit(&aiop
->aio_mutex
);
1902 *rval
= AIO_ALLDONE
;
1905 mutex_exit(&aiop
->aio_mutex
);
1906 *rval
= AIO_NOTCANCELED
;
1910 mutex_exit(&aiop
->aio_mutex
);
1911 *rval
= AIO_ALLDONE
;
1915 for (index
= 0; index
< AIO_HASHSZ
; index
++) {
1916 bucket
= &aiop
->aio_hash
[index
];
1917 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
1918 if (ent
->aio_req_fd
== fildes
) {
1919 if ((ent
->aio_req_flags
& AIO_PENDING
) != 0) {
1920 mutex_exit(&aiop
->aio_mutex
);
1921 *rval
= AIO_NOTCANCELED
;
1927 mutex_exit(&aiop
->aio_mutex
);
1928 *rval
= AIO_ALLDONE
;
1933 * solaris version of asynchronous read and write
1942 aio_result_t
*resultp
,
1954 aiocb64_32_t aiocb64
;
1957 aiop
= curproc
->p_aio
;
1961 if ((fp
= getf(fdes
)) == NULL
) {
1966 * check the permission of the partition
1968 if ((fp
->f_flag
& mode
) == 0) {
1974 aio_func
= check_vp(vp
, mode
);
1975 if (aio_func
== NULL
) {
1980 aiocb
.aio_fildes
= fdes
;
1981 aiocb
.aio_buf
= bufp
;
1982 aiocb
.aio_nbytes
= bufsize
;
1983 aiocb
.aio_offset
= offset
;
1984 aiocb
.aio_sigevent
.sigev_notify
= 0;
1985 error
= aio_req_setup(&reqp
, aiop
, &aiocb
, resultp
, vp
, 1);
1987 aiocb64
.aio_fildes
= fdes
;
1988 aiocb64
.aio_buf
= (caddr32_t
)bufp
;
1989 aiocb64
.aio_nbytes
= bufsize
;
1990 aiocb64
.aio_offset
= offset
;
1991 aiocb64
.aio_sigevent
.sigev_notify
= 0;
1992 error
= aio_req_setupLF(&reqp
, aiop
, &aiocb64
, resultp
, vp
, 1);
2000 * enable polling on this request if the opcode has
2001 * the AIO poll bit set
2003 if (opcode
& AIO_POLL_BIT
)
2004 reqp
->aio_req_flags
|= AIO_POLL
;
2007 clear_active_fd(fdes
);
2012 * send the request to driver.
2014 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
, CRED());
2016 * the fd is stored in the aio_req_t by aio_req_setup(), and
2017 * is released by the aio_cleanup_thread() when the IO has
2022 mutex_enter(&aiop
->aio_mutex
);
2023 aio_req_free(aiop
, reqp
);
2024 aiop
->aio_pending
--;
2025 if (aiop
->aio_flags
& AIO_REQ_BLOCK
)
2026 cv_signal(&aiop
->aio_cleanupcv
);
2027 mutex_exit(&aiop
->aio_mutex
);
2030 clear_active_fd(fdes
);
2035 * posix version of asynchronous read and write
2044 #ifdef _SYSCALL32_IMPL
2046 struct sigevent32
*sigev32
;
2047 port_notify32_t pntfy32
;
2049 aiocb64_32_t aiocb64
;
2058 aio_result_t
*resultp
;
2059 struct sigevent
*sigev
;
2061 int aio_use_port
= 0;
2062 port_notify_t pntfy
;
2064 model
= get_udatamodel();
2065 aiop
= curproc
->p_aio
;
2069 if (model
== DATAMODEL_NATIVE
) {
2070 if (run_mode
!= AIO_LARGEFILE
) {
2071 if (copyin(aiocb_arg
, &aiocb
, sizeof (aiocb_t
)))
2073 bufsize
= aiocb
.aio_nbytes
;
2074 resultp
= &(((aiocb_t
*)aiocb_arg
)->aio_resultp
);
2075 if ((fp
= getf(fd
= aiocb
.aio_fildes
)) == NULL
) {
2078 sigev
= &aiocb
.aio_sigevent
;
2081 * We come here only when we make largefile
2082 * call on 32 bit kernel using 32 bit library.
2084 if (copyin(aiocb_arg
, &aiocb64
, sizeof (aiocb64_32_t
)))
2086 bufsize
= aiocb64
.aio_nbytes
;
2087 resultp
= (aio_result_t
*)&(((aiocb64_32_t
*)aiocb_arg
)
2089 if ((fp
= getf(fd
= aiocb64
.aio_fildes
)) == NULL
)
2091 sigev
= (struct sigevent
*)&aiocb64
.aio_sigevent
;
2094 if (sigev
->sigev_notify
== SIGEV_PORT
) {
2095 if (copyin((void *)sigev
->sigev_value
.sival_ptr
,
2096 &pntfy
, sizeof (port_notify_t
))) {
2101 } else if (sigev
->sigev_notify
== SIGEV_THREAD
) {
2102 pntfy
.portnfy_port
= aiocb
.aio_sigevent
.sigev_signo
;
2103 pntfy
.portnfy_user
=
2104 aiocb
.aio_sigevent
.sigev_value
.sival_ptr
;
2108 #ifdef _SYSCALL32_IMPL
2110 if (run_mode
== AIO_32
) {
2111 /* 32 bit system call is being made on 64 bit kernel */
2112 if (copyin(aiocb_arg
, &aiocb32
, sizeof (aiocb32_t
)))
2115 bufsize
= aiocb32
.aio_nbytes
;
2116 aiocb_32ton(&aiocb32
, &aiocb
);
2117 resultp
= (aio_result_t
*)&(((aiocb32_t
*)aiocb_arg
)->
2119 if ((fp
= getf(fd
= aiocb32
.aio_fildes
)) == NULL
) {
2122 sigev32
= &aiocb32
.aio_sigevent
;
2123 } else if (run_mode
== AIO_LARGEFILE
) {
2125 * We come here only when we make largefile
2126 * call on 64 bit kernel using 32 bit library.
2128 if (copyin(aiocb_arg
, &aiocb64
, sizeof (aiocb64_32_t
)))
2130 bufsize
= aiocb64
.aio_nbytes
;
2131 aiocb_LFton(&aiocb64
, &aiocb
);
2132 resultp
= (aio_result_t
*)&(((aiocb64_32_t
*)aiocb_arg
)
2134 if ((fp
= getf(fd
= aiocb64
.aio_fildes
)) == NULL
)
2136 sigev32
= &aiocb64
.aio_sigevent
;
2139 if (sigev32
->sigev_notify
== SIGEV_PORT
) {
2141 (void *)(uintptr_t)sigev32
->sigev_value
.sival_ptr
,
2142 &pntfy32
, sizeof (port_notify32_t
))) {
2146 pntfy
.portnfy_port
= pntfy32
.portnfy_port
;
2147 pntfy
.portnfy_user
= (void *)(uintptr_t)
2148 pntfy32
.portnfy_user
;
2150 } else if (sigev32
->sigev_notify
== SIGEV_THREAD
) {
2151 pntfy
.portnfy_port
= sigev32
->sigev_signo
;
2152 pntfy
.portnfy_user
= (void *)(uintptr_t)
2153 sigev32
->sigev_value
.sival_ptr
;
2157 #endif /* _SYSCALL32_IMPL */
2160 * check the permission of the partition
2163 if ((fp
->f_flag
& mode
) == 0) {
2169 aio_func
= check_vp(vp
, mode
);
2170 if (aio_func
== NULL
) {
2174 if (run_mode
== AIO_LARGEFILE
)
2175 error
= aio_req_setupLF(&reqp
, aiop
, &aiocb64
, resultp
, vp
, 0);
2177 error
= aio_req_setup(&reqp
, aiop
, &aiocb
, resultp
, vp
, 0);
2184 * enable polling on this request if the opcode has
2185 * the AIO poll bit set
2187 if (opcode
& AIO_POLL_BIT
)
2188 reqp
->aio_req_flags
|= AIO_POLL
;
2190 if (model
== DATAMODEL_NATIVE
)
2191 reqp
->aio_req_iocb
.iocb
= aiocb_arg
;
2192 #ifdef _SYSCALL32_IMPL
2194 reqp
->aio_req_iocb
.iocb32
= (caddr32_t
)(uintptr_t)aiocb_arg
;
2198 int event
= (run_mode
== AIO_LARGEFILE
)?
2199 ((mode
== FREAD
)? AIOAREAD64
: AIOAWRITE64
) :
2200 ((mode
== FREAD
)? AIOAREAD
: AIOAWRITE
);
2201 error
= aio_req_assoc_port_rw(&pntfy
, aiocb_arg
, reqp
, event
);
2205 * send the request to driver.
2209 clear_active_fd(fd
);
2213 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
, CRED());
2217 * the fd is stored in the aio_req_t by aio_req_setup(), and
2218 * is released by the aio_cleanup_thread() when the IO has
2223 mutex_enter(&aiop
->aio_mutex
);
2225 aio_deq(&aiop
->aio_portpending
, reqp
);
2226 aio_req_free(aiop
, reqp
);
2227 aiop
->aio_pending
--;
2228 if (aiop
->aio_flags
& AIO_REQ_BLOCK
)
2229 cv_signal(&aiop
->aio_cleanupcv
);
2230 mutex_exit(&aiop
->aio_mutex
);
2233 clear_active_fd(fd
);
2239 * set error for a list IO entry that failed.
2242 lio_set_error(aio_req_t
*reqp
, int portused
)
2244 aio_t
*aiop
= curproc
->p_aio
;
2249 mutex_enter(&aiop
->aio_mutex
);
2251 aio_deq(&aiop
->aio_portpending
, reqp
);
2252 aiop
->aio_pending
--;
2253 /* request failed, AIO_PHYSIODONE set to aviod physio cleanup. */
2254 reqp
->aio_req_flags
|= AIO_PHYSIODONE
;
2256 * Need to free the request now as its never
2257 * going to get on the done queue
2259 * Note: aio_outstanding is decremented in
2262 aio_req_free(aiop
, reqp
);
2263 if (aiop
->aio_flags
& AIO_REQ_BLOCK
)
2264 cv_signal(&aiop
->aio_cleanupcv
);
2265 mutex_exit(&aiop
->aio_mutex
);
2269 * check if a specified request is done, and remove it from
2270 * the done queue. otherwise remove anybody from the done queue
2271 * if NULL is specified.
2274 aio_req_done(void *resultp
)
2278 aio_t
*aiop
= curproc
->p_aio
;
2281 ASSERT(MUTEX_HELD(&aiop
->aio_cleanupq_mutex
));
2282 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2285 index
= AIO_HASH(resultp
);
2286 bucket
= &aiop
->aio_hash
[index
];
2287 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
2288 if (ent
->aio_req_resultp
== (aio_result_t
*)resultp
) {
2289 if (ent
->aio_req_flags
& AIO_DONEQ
) {
2290 return (aio_req_remove(ent
));
2295 /* no match, resultp is invalid */
2298 return (aio_req_remove(NULL
));
2302 * determine if a user-level resultp pointer is associated with an
2303 * active IO request. Zero is returned when the request is done,
2304 * and the request is removed from the done queue. Only when the
2305 * return value is zero, is the "reqp" pointer valid. One is returned
2306 * when the request is inprogress. Two is returned when the request
2310 aio_req_find(aio_result_t
*resultp
, aio_req_t
**reqp
)
2314 aio_t
*aiop
= curproc
->p_aio
;
2317 ASSERT(MUTEX_HELD(&aiop
->aio_cleanupq_mutex
));
2318 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2320 index
= AIO_HASH(resultp
);
2321 bucket
= &aiop
->aio_hash
[index
];
2322 for (ent
= *bucket
; ent
!= NULL
; ent
= ent
->aio_hash_next
) {
2323 if (ent
->aio_req_resultp
== resultp
) {
2324 if (ent
->aio_req_flags
& AIO_DONEQ
) {
2325 *reqp
= aio_req_remove(ent
);
2331 /* no match, resultp is invalid */
2336 * remove a request from the done queue.
2339 aio_req_remove(aio_req_t
*reqp
)
2341 aio_t
*aiop
= curproc
->p_aio
;
2343 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2346 ASSERT(reqp
->aio_req_flags
& AIO_DONEQ
);
2347 if (reqp
->aio_req_next
== reqp
) {
2348 /* only one request on queue */
2349 if (reqp
== aiop
->aio_doneq
) {
2350 aiop
->aio_doneq
= NULL
;
2352 ASSERT(reqp
== aiop
->aio_cleanupq
);
2353 aiop
->aio_cleanupq
= NULL
;
2356 reqp
->aio_req_next
->aio_req_prev
= reqp
->aio_req_prev
;
2357 reqp
->aio_req_prev
->aio_req_next
= reqp
->aio_req_next
;
2359 * The request can be either on the aio_doneq or the
2362 if (reqp
== aiop
->aio_doneq
)
2363 aiop
->aio_doneq
= reqp
->aio_req_next
;
2365 if (reqp
== aiop
->aio_cleanupq
)
2366 aiop
->aio_cleanupq
= reqp
->aio_req_next
;
2368 reqp
->aio_req_flags
&= ~AIO_DONEQ
;
2369 reqp
->aio_req_next
= NULL
;
2370 reqp
->aio_req_prev
= NULL
;
2371 } else if ((reqp
= aiop
->aio_doneq
) != NULL
) {
2372 ASSERT(reqp
->aio_req_flags
& AIO_DONEQ
);
2373 if (reqp
== reqp
->aio_req_next
) {
2374 /* only one request on queue */
2375 aiop
->aio_doneq
= NULL
;
2377 reqp
->aio_req_prev
->aio_req_next
= reqp
->aio_req_next
;
2378 reqp
->aio_req_next
->aio_req_prev
= reqp
->aio_req_prev
;
2379 aiop
->aio_doneq
= reqp
->aio_req_next
;
2381 reqp
->aio_req_flags
&= ~AIO_DONEQ
;
2382 reqp
->aio_req_next
= NULL
;
2383 reqp
->aio_req_prev
= NULL
;
2385 if (aiop
->aio_doneq
== NULL
&& (aiop
->aio_flags
& AIO_WAITN
))
2386 cv_broadcast(&aiop
->aio_waitcv
);
2395 aio_result_t
*resultp
,
2397 int old_solaris_req
)
2399 sigqueue_t
*sqp
= NULL
;
2402 struct sigevent
*sigev
;
2405 sigev
= &arg
->aio_sigevent
;
2406 if (sigev
->sigev_notify
== SIGEV_SIGNAL
&&
2407 sigev
->sigev_signo
> 0 && sigev
->sigev_signo
< NSIG
) {
2408 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
2411 sqp
->sq_func
= NULL
;
2412 sqp
->sq_next
= NULL
;
2413 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
2414 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
2415 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
2416 sqp
->sq_info
.si_zoneid
= getzoneid();
2417 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
2418 sqp
->sq_info
.si_signo
= sigev
->sigev_signo
;
2419 sqp
->sq_info
.si_value
= sigev
->sigev_value
;
2422 mutex_enter(&aiop
->aio_mutex
);
2424 if (aiop
->aio_flags
& AIO_REQ_BLOCK
) {
2425 mutex_exit(&aiop
->aio_mutex
);
2427 kmem_free(sqp
, sizeof (sigqueue_t
));
2431 * get an aio_reqp from the free list or allocate one
2432 * from dynamic memory.
2434 if (error
= aio_req_alloc(&reqp
, resultp
)) {
2435 mutex_exit(&aiop
->aio_mutex
);
2437 kmem_free(sqp
, sizeof (sigqueue_t
));
2440 aiop
->aio_pending
++;
2441 aiop
->aio_outstanding
++;
2442 reqp
->aio_req_flags
= AIO_PENDING
;
2443 if (old_solaris_req
) {
2444 /* this is an old solaris aio request */
2445 reqp
->aio_req_flags
|= AIO_SOLARIS
;
2446 aiop
->aio_flags
|= AIO_SOLARIS_REQ
;
2448 if (sigev
->sigev_notify
== SIGEV_THREAD
||
2449 sigev
->sigev_notify
== SIGEV_PORT
)
2450 aio_enq(&aiop
->aio_portpending
, reqp
, 0);
2451 mutex_exit(&aiop
->aio_mutex
);
2453 * initialize aio request.
2455 reqp
->aio_req_fd
= arg
->aio_fildes
;
2456 reqp
->aio_req_sigqp
= sqp
;
2457 reqp
->aio_req_iocb
.iocb
= NULL
;
2458 reqp
->aio_req_lio
= NULL
;
2459 reqp
->aio_req_buf
.b_file
= vp
;
2460 uio
= reqp
->aio_req
.aio_uio
;
2461 uio
->uio_iovcnt
= 1;
2462 uio
->uio_iov
->iov_base
= (caddr_t
)arg
->aio_buf
;
2463 uio
->uio_iov
->iov_len
= arg
->aio_nbytes
;
2464 uio
->uio_loffset
= arg
->aio_offset
;
2470 * Allocate p_aio struct.
2473 aio_aiop_alloc(void)
2477 ASSERT(MUTEX_HELD(&curproc
->p_lock
));
2479 aiop
= kmem_zalloc(sizeof (struct aio
), KM_NOSLEEP
);
2481 mutex_init(&aiop
->aio_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
2482 mutex_init(&aiop
->aio_cleanupq_mutex
, NULL
, MUTEX_DEFAULT
,
2484 mutex_init(&aiop
->aio_portq_mutex
, NULL
, MUTEX_DEFAULT
, NULL
);
2490 * Allocate an aio_req struct.
2493 aio_req_alloc(aio_req_t
**nreqp
, aio_result_t
*resultp
)
2496 aio_t
*aiop
= curproc
->p_aio
;
2498 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2500 if ((reqp
= aiop
->aio_free
) != NULL
) {
2501 aiop
->aio_free
= reqp
->aio_req_next
;
2502 bzero(reqp
, sizeof (*reqp
));
2505 * Check whether memory is getting tight.
2506 * This is a temporary mechanism to avoid memory
2507 * exhaustion by a single process until we come up
2508 * with a per process solution such as setrlimit().
2510 if (freemem
< desfree
)
2512 reqp
= kmem_zalloc(sizeof (struct aio_req_t
), KM_NOSLEEP
);
2516 reqp
->aio_req
.aio_uio
= &reqp
->aio_req_uio
;
2517 reqp
->aio_req
.aio_uio
->uio_iov
= &reqp
->aio_req_iov
;
2518 reqp
->aio_req
.aio_private
= reqp
;
2519 reqp
->aio_req_buf
.b_offset
= -1;
2520 reqp
->aio_req_resultp
= resultp
;
2521 if (aio_hash_insert(reqp
, aiop
)) {
2522 reqp
->aio_req_next
= aiop
->aio_free
;
2523 aiop
->aio_free
= reqp
;
2531 * Allocate an aio_lio_t struct.
2534 aio_lio_alloc(aio_lio_t
**head
)
2537 aio_t
*aiop
= curproc
->p_aio
;
2539 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
2541 if ((liop
= aiop
->aio_lio_free
) != NULL
) {
2542 aiop
->aio_lio_free
= liop
->lio_next
;
2545 * Check whether memory is getting tight.
2546 * This is a temporary mechanism to avoid memory
2547 * exhaustion by a single process until we come up
2548 * with a per process solution such as setrlimit().
2550 if (freemem
< desfree
)
2553 liop
= kmem_zalloc(sizeof (aio_lio_t
), KM_NOSLEEP
);
2562 * this is a special per-process thread that is only activated if
2563 * the process is unmapping a segment with outstanding aio. normally,
2564 * the process will have completed the aio before unmapping the
2565 * segment. If the process does unmap a segment with outstanding aio,
2566 * this special thread will guarentee that the locked pages due to
2567 * aphysio() are released, thereby permitting the segment to be
2568 * unmapped. In addition to this, the cleanup thread is woken up
2569 * during DR operations to release the locked pages.
2573 aio_cleanup_thread(aio_t
*aiop
)
2575 proc_t
*p
= curproc
;
2576 struct as
*as
= p
->p_as
;
2582 sigfillset(&curthread
->t_hold
);
2583 sigdiffset(&curthread
->t_hold
, &cantmask
);
2586 * if a segment is being unmapped, and the current
2587 * process's done queue is not empty, then every request
2588 * on the doneq with locked resources should be forced
2589 * to release their locks. By moving the doneq request
2590 * to the cleanupq, aio_cleanup() will process the cleanupq,
2591 * and place requests back onto the doneq. All requests
2592 * processed by aio_cleanup() will have their physical
2593 * resources unlocked.
2595 mutex_enter(&aiop
->aio_mutex
);
2596 if ((aiop
->aio_flags
& AIO_CLEANUP
) == 0) {
2597 aiop
->aio_flags
|= AIO_CLEANUP
;
2598 mutex_enter(&as
->a_contents
);
2599 if (aiop
->aio_rqclnup
) {
2600 aiop
->aio_rqclnup
= 0;
2603 mutex_exit(&as
->a_contents
);
2604 if (aiop
->aio_doneq
) {
2605 aio_req_t
*doneqhead
= aiop
->aio_doneq
;
2606 aiop
->aio_doneq
= NULL
;
2607 aio_cleanupq_concat(aiop
, doneqhead
, AIO_DONEQ
);
2610 mutex_exit(&aiop
->aio_mutex
);
2611 aio_cleanup(AIO_CLEANUP_THREAD
);
2613 * thread should block on the cleanupcv while
2614 * AIO_CLEANUP is set.
2616 cvp
= &aiop
->aio_cleanupcv
;
2617 mutex_enter(&aiop
->aio_mutex
);
2619 if (aiop
->aio_pollq
!= NULL
|| aiop
->aio_cleanupq
!= NULL
||
2620 aiop
->aio_notifyq
!= NULL
||
2621 aiop
->aio_portcleanupq
!= NULL
) {
2622 mutex_exit(&aiop
->aio_mutex
);
2625 mutex_enter(&as
->a_contents
);
2628 * AIO_CLEANUP determines when the cleanup thread
2629 * should be active. This flag is set when
2630 * the cleanup thread is awakened by as_unmap() or
2631 * due to DR operations.
2632 * The flag is cleared when the blocking as_unmap()
2633 * that originally awakened us is allowed to
2634 * complete. as_unmap() blocks when trying to
2635 * unmap a segment that has SOFTLOCKed pages. when
2636 * the segment's pages are all SOFTUNLOCKed,
2637 * as->a_flags & AS_UNMAPWAIT should be zero.
2639 * In case of cleanup request by DR, the flag is cleared
2640 * once all the pending aio requests have been processed.
2642 * The flag shouldn't be cleared right away if the
2643 * cleanup thread was interrupted because the process
2644 * is doing forkall(). This happens when cv_wait_sig()
2645 * returns zero, because it was awakened by a pokelwps().
2646 * If the process is not exiting, it must be doing forkall().
2649 ((!rqclnup
&& (AS_ISUNMAPWAIT(as
) == 0)) ||
2650 (aiop
->aio_pending
== 0))) {
2651 aiop
->aio_flags
&= ~(AIO_CLEANUP
| AIO_CLEANUP_PORT
);
2655 mutex_exit(&aiop
->aio_mutex
);
2658 * If the process is exiting/killed, don't return
2659 * immediately without waiting for pending I/O's
2660 * and releasing the page locks.
2662 if (p
->p_flag
& (SEXITLWPS
|SKILLED
)) {
2664 * If exit_flag is set, then it is
2665 * safe to exit because we have released
2666 * page locks of completed I/O's.
2671 mutex_exit(&as
->a_contents
);
2674 * Wait for all the pending aio to complete.
2676 mutex_enter(&aiop
->aio_mutex
);
2677 aiop
->aio_flags
|= AIO_REQ_BLOCK
;
2678 while (aiop
->aio_pending
!= 0)
2679 cv_wait(&aiop
->aio_cleanupcv
,
2681 mutex_exit(&aiop
->aio_mutex
);
2684 } else if (p
->p_flag
&
2685 (SHOLDFORK
|SHOLDFORK1
|SHOLDWATCH
)) {
2690 mutex_exit(&as
->a_contents
);
2691 mutex_enter(&p
->p_lock
);
2692 stop(PR_SUSPENDED
, SUSPEND_NORMAL
);
2693 mutex_exit(&p
->p_lock
);
2699 * When started this thread will sleep on as->a_cv.
2700 * as_unmap will awake this thread if the
2701 * segment has SOFTLOCKed pages (poked = 0).
2702 * 1. pokelwps() awakes this thread =>
2703 * break the loop to check SEXITLWPS, SHOLDFORK, etc
2704 * 2. as_unmap awakes this thread =>
2705 * to break the loop it is necessary that
2706 * - AS_UNMAPWAIT is set (as_unmap is waiting for
2707 * memory to be unlocked)
2708 * - AIO_CLEANUP is not set
2709 * (if AIO_CLEANUP is set we have to wait for
2710 * pending requests. aio_done will send a signal
2711 * for every request which completes to continue
2712 * unmapping the corresponding address range)
2713 * 3. A cleanup request will wake this thread up, ex.
2714 * by the DR operations. The aio_rqclnup flag will
2717 while (poked
== 0) {
2719 * The clean up requests that came in
2720 * after we had just cleaned up, couldn't
2721 * be causing the unmap thread to block - as
2722 * unmap event happened first.
2723 * Let aio_done() wake us up if it sees a need.
2725 if (aiop
->aio_rqclnup
&&
2726 (aiop
->aio_flags
& AIO_CLEANUP
) == 0)
2728 poked
= !cv_wait_sig(cvp
, &as
->a_contents
);
2729 if (AS_ISUNMAPWAIT(as
) == 0)
2731 if (aiop
->aio_outstanding
!= 0)
2735 mutex_exit(&as
->a_contents
);
2738 mutex_exit(&as
->a_contents
);
2739 ASSERT((curproc
->p_flag
& (SEXITLWPS
|SKILLED
)));
2740 aston(curthread
); /* make thread do post_syscall */
2745 * save a reference to a user's outstanding aio in a hash list.
2749 aio_req_t
*aio_reqp
,
2753 aio_result_t
*resultp
= aio_reqp
->aio_req_resultp
;
2757 index
= AIO_HASH(resultp
);
2758 nextp
= &aiop
->aio_hash
[index
];
2759 while ((current
= *nextp
) != NULL
) {
2760 if (current
->aio_req_resultp
== resultp
)
2762 nextp
= ¤t
->aio_hash_next
;
2765 aio_reqp
->aio_hash_next
= NULL
;
2770 (*check_vp(struct vnode
*vp
, int mode
))(vnode_t
*, struct aio_req
*,
2780 major
= getmajor(dev
);
2783 * return NULL for requests to files and STREAMs so
2784 * that libaio takes care of them.
2786 if (vp
->v_type
== VCHR
) {
2787 /* no stream device for kaio */
2788 if (STREAMSTAB(major
)) {
2796 * Check old drivers which do not have async I/O entry points.
2798 if (devopsp
[major
]->devo_rev
< 3)
2801 cb
= devopsp
[major
]->devo_cb_ops
;
2807 * Check whether this device is a block device.
2808 * Kaio is not supported for devices like tty.
2810 if (cb
->cb_strategy
== nodev
|| cb
->cb_strategy
== NULL
)
2814 * Clustering: If vnode is a PXFS vnode, then the device may be remote.
2815 * We cannot call the driver directly. Instead return the
2819 if (IS_PXFSVP(vp
)) {
2821 return (clpxfs_aio_read
);
2823 return (clpxfs_aio_write
);
2826 aio_func
= (cb
->cb_aread
== nodev
) ? NULL
: driver_aio_read
;
2828 aio_func
= (cb
->cb_awrite
== nodev
) ? NULL
: driver_aio_write
;
2832 * nodev returns ENXIO anyway.
2834 if (aio_func
== nodev
)
2843 * Clustering: We want check_vp to return a function prototyped
2844 * correctly that will be common to both PXFS and regular case.
2845 * We define this intermediate function that will do the right
2846 * thing for driver cases.
2850 driver_aio_write(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
)
2855 ASSERT(vp
->v_type
== VCHR
);
2856 ASSERT(!IS_PXFSVP(vp
));
2857 dev
= VTOS(vp
)->s_dev
;
2858 ASSERT(STREAMSTAB(getmajor(dev
)) == NULL
);
2860 cb
= devopsp
[getmajor(dev
)]->devo_cb_ops
;
2862 ASSERT(cb
->cb_awrite
!= nodev
);
2863 return ((*cb
->cb_awrite
)(dev
, aio
, cred_p
));
2867 * Clustering: We want check_vp to return a function prototyped
2868 * correctly that will be common to both PXFS and regular case.
2869 * We define this intermediate function that will do the right
2870 * thing for driver cases.
2874 driver_aio_read(vnode_t
*vp
, struct aio_req
*aio
, cred_t
*cred_p
)
2879 ASSERT(vp
->v_type
== VCHR
);
2880 ASSERT(!IS_PXFSVP(vp
));
2881 dev
= VTOS(vp
)->s_dev
;
2882 ASSERT(!STREAMSTAB(getmajor(dev
)));
2884 cb
= devopsp
[getmajor(dev
)]->devo_cb_ops
;
2886 ASSERT(cb
->cb_aread
!= nodev
);
2887 return ((*cb
->cb_aread
)(dev
, aio
, cred_p
));
2891 * This routine is called when a largefile call is made by a 32bit
2892 * process on a ILP32 or LP64 kernel. All 64bit processes are large
2893 * file by definition and will call alio() instead.
2903 file_t
*prev_fp
= NULL
;
2911 aiocb64_32_t
*aiocb
= &cb64
;
2917 struct sigevent32 sigevk
;
2926 int aio_notsupported
= 0;
2930 port_kevent_t
*pkevtp
= NULL
;
2932 port_notify32_t pnotify
;
2935 aiop
= curproc
->p_aio
;
2936 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
2939 ASSERT(get_udatamodel() == DATAMODEL_ILP32
);
2941 ssize
= (sizeof (caddr32_t
) * nent
);
2942 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
2943 ucbp
= (caddr32_t
*)cbplist
;
2945 if (copyin(aiocb_arg
, cbplist
, ssize
) ||
2946 (sigev
&& copyin(sigev
, &sigevk
, sizeof (sigevk
)))) {
2947 kmem_free(cbplist
, ssize
);
2953 (sigevk
.sigev_notify
== SIGEV_THREAD
||
2954 sigevk
.sigev_notify
== SIGEV_PORT
)) {
2955 if (sigevk
.sigev_notify
== SIGEV_THREAD
) {
2956 pnotify
.portnfy_port
= sigevk
.sigev_signo
;
2957 pnotify
.portnfy_user
= sigevk
.sigev_value
.sival_ptr
;
2959 (void *)(uintptr_t)sigevk
.sigev_value
.sival_ptr
,
2960 &pnotify
, sizeof (pnotify
))) {
2961 kmem_free(cbplist
, ssize
);
2964 error
= port_alloc_event(pnotify
.portnfy_port
,
2965 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
, &pkevtp
);
2967 if (error
== ENOMEM
|| error
== EAGAIN
)
2971 kmem_free(cbplist
, ssize
);
2974 lio_head_port
= pnotify
.portnfy_port
;
2979 * a list head should be allocated if notification is
2980 * enabled for this list.
2984 if (mode_arg
== LIO_WAIT
|| sigev
) {
2985 mutex_enter(&aiop
->aio_mutex
);
2986 error
= aio_lio_alloc(&head
);
2987 mutex_exit(&aiop
->aio_mutex
);
2991 head
->lio_nent
= nent
;
2992 head
->lio_refcnt
= nent
;
2993 head
->lio_port
= -1;
2994 head
->lio_portkev
= NULL
;
2995 if (sigev
&& sigevk
.sigev_notify
== SIGEV_SIGNAL
&&
2996 sigevk
.sigev_signo
> 0 && sigevk
.sigev_signo
< NSIG
) {
2997 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
3002 sqp
->sq_func
= NULL
;
3003 sqp
->sq_next
= NULL
;
3004 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
3005 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
3006 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
3007 sqp
->sq_info
.si_zoneid
= getzoneid();
3008 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
3009 sqp
->sq_info
.si_signo
= sigevk
.sigev_signo
;
3010 sqp
->sq_info
.si_value
.sival_int
=
3011 sigevk
.sigev_value
.sival_int
;
3012 head
->lio_sigqp
= sqp
;
3014 head
->lio_sigqp
= NULL
;
3018 * Prepare data to send when list of aiocb's
3021 port_init_event(pkevtp
, (uintptr_t)sigev
,
3022 (void *)(uintptr_t)pnotify
.portnfy_user
,
3024 pkevtp
->portkev_events
= AIOLIO64
;
3025 head
->lio_portkev
= pkevtp
;
3026 head
->lio_port
= pnotify
.portnfy_port
;
3030 for (i
= 0; i
< nent
; i
++, ucbp
++) {
3032 cbp
= (aiocb64_32_t
*)(uintptr_t)*ucbp
;
3033 /* skip entry if it can't be copied. */
3034 if (cbp
== NULL
|| copyin(cbp
, aiocb
, sizeof (*aiocb
))) {
3036 mutex_enter(&aiop
->aio_mutex
);
3039 mutex_exit(&aiop
->aio_mutex
);
3044 /* skip if opcode for aiocb is LIO_NOP */
3045 mode
= aiocb
->aio_lio_opcode
;
3046 if (mode
== LIO_NOP
) {
3049 mutex_enter(&aiop
->aio_mutex
);
3052 mutex_exit(&aiop
->aio_mutex
);
3057 /* increment file descriptor's ref count. */
3058 if ((fp
= getf(aiocb
->aio_fildes
)) == NULL
) {
3059 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3061 mutex_enter(&aiop
->aio_mutex
);
3064 mutex_exit(&aiop
->aio_mutex
);
3071 * check the permission of the partition
3073 if ((fp
->f_flag
& mode
) == 0) {
3074 releasef(aiocb
->aio_fildes
);
3075 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3077 mutex_enter(&aiop
->aio_mutex
);
3080 mutex_exit(&aiop
->aio_mutex
);
3087 * common case where requests are to the same fd
3088 * for the same r/w operation
3089 * for UFS, need to set EBADFD
3092 if (fp
!= prev_fp
|| mode
!= prev_mode
) {
3093 aio_func
= check_vp(vp
, mode
);
3094 if (aio_func
== NULL
) {
3096 releasef(aiocb
->aio_fildes
);
3097 lio_set_uerror(&cbp
->aio_resultp
, EBADFD
);
3100 mutex_enter(&aiop
->aio_mutex
);
3103 mutex_exit(&aiop
->aio_mutex
);
3113 aiocb_LFton(aiocb
, &aiocb_n
);
3114 error
= aio_req_setup(&reqp
, aiop
, &aiocb_n
,
3115 (aio_result_t
*)&cbp
->aio_resultp
, vp
, 0);
3117 error
= aio_req_setupLF(&reqp
, aiop
, aiocb
,
3118 (aio_result_t
*)&cbp
->aio_resultp
, vp
, 0);
3121 releasef(aiocb
->aio_fildes
);
3122 lio_set_uerror(&cbp
->aio_resultp
, error
);
3124 mutex_enter(&aiop
->aio_mutex
);
3127 mutex_exit(&aiop
->aio_mutex
);
3133 reqp
->aio_req_lio
= head
;
3137 * Set the errno field now before sending the request to
3138 * the driver to avoid a race condition
3140 (void) suword32(&cbp
->aio_resultp
.aio_errno
,
3143 reqp
->aio_req_iocb
.iocb32
= *ucbp
;
3145 event
= (mode
== LIO_READ
)? AIOAREAD64
: AIOAWRITE64
;
3146 aio_port
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_PORT
);
3147 aio_thread
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_THREAD
);
3148 if (aio_port
| aio_thread
) {
3149 port_kevent_t
*lpkevp
;
3151 * Prepare data to send with each aiocb completed.
3154 void *paddr
= (void *)(uintptr_t)
3155 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3156 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
3158 } else { /* aio_thread */
3159 pnotify
.portnfy_port
=
3160 aiocb
->aio_sigevent
.sigev_signo
;
3161 pnotify
.portnfy_user
=
3162 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3166 else if (pkevtp
!= NULL
&&
3167 pnotify
.portnfy_port
== lio_head_port
)
3168 error
= port_dup_event(pkevtp
, &lpkevp
,
3169 PORT_ALLOC_DEFAULT
);
3171 error
= port_alloc_event(pnotify
.portnfy_port
,
3172 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
,
3175 port_init_event(lpkevp
, (uintptr_t)*ucbp
,
3176 (void *)(uintptr_t)pnotify
.portnfy_user
,
3177 aio_port_callback
, reqp
);
3178 lpkevp
->portkev_events
= event
;
3179 reqp
->aio_req_portkev
= lpkevp
;
3180 reqp
->aio_req_port
= pnotify
.portnfy_port
;
3185 * send the request to driver.
3188 if (aiocb
->aio_nbytes
== 0) {
3189 clear_active_fd(aiocb
->aio_fildes
);
3193 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
,
3198 * the fd's ref count is not decremented until the IO has
3199 * completed unless there was an error.
3202 releasef(aiocb
->aio_fildes
);
3203 lio_set_uerror(&cbp
->aio_resultp
, error
);
3205 mutex_enter(&aiop
->aio_mutex
);
3208 mutex_exit(&aiop
->aio_mutex
);
3210 if (error
== ENOTSUP
)
3214 lio_set_error(reqp
, portused
);
3216 clear_active_fd(aiocb
->aio_fildes
);
3220 if (aio_notsupported
) {
3222 } else if (aio_errors
) {
3224 * return EIO if any request failed
3229 if (mode_arg
== LIO_WAIT
) {
3230 mutex_enter(&aiop
->aio_mutex
);
3231 while (head
->lio_refcnt
> 0) {
3232 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
3233 mutex_exit(&aiop
->aio_mutex
);
3238 mutex_exit(&aiop
->aio_mutex
);
3239 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, AIO_LARGEFILE
);
3243 kmem_free(cbplist
, ssize
);
3245 if (head
->lio_sigqp
)
3246 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
3247 if (head
->lio_portkev
)
3248 port_free_event(head
->lio_portkev
);
3249 kmem_free(head
, sizeof (aio_lio_t
));
3254 #ifdef _SYSCALL32_IMPL
3256 aiocb_LFton(aiocb64_32_t
*src
, aiocb_t
*dest
)
3258 dest
->aio_fildes
= src
->aio_fildes
;
3259 dest
->aio_buf
= (void *)(uintptr_t)src
->aio_buf
;
3260 dest
->aio_nbytes
= (size_t)src
->aio_nbytes
;
3261 dest
->aio_offset
= (off_t
)src
->aio_offset
;
3262 dest
->aio_reqprio
= src
->aio_reqprio
;
3263 dest
->aio_sigevent
.sigev_notify
= src
->aio_sigevent
.sigev_notify
;
3264 dest
->aio_sigevent
.sigev_signo
= src
->aio_sigevent
.sigev_signo
;
3267 * See comment in sigqueue32() on handling of 32-bit
3268 * sigvals in a 64-bit kernel.
3270 dest
->aio_sigevent
.sigev_value
.sival_int
=
3271 (int)src
->aio_sigevent
.sigev_value
.sival_int
;
3272 dest
->aio_sigevent
.sigev_notify_function
= (void (*)(union sigval
))
3273 (uintptr_t)src
->aio_sigevent
.sigev_notify_function
;
3274 dest
->aio_sigevent
.sigev_notify_attributes
= (pthread_attr_t
*)
3275 (uintptr_t)src
->aio_sigevent
.sigev_notify_attributes
;
3276 dest
->aio_sigevent
.__sigev_pad2
= src
->aio_sigevent
.__sigev_pad2
;
3277 dest
->aio_lio_opcode
= src
->aio_lio_opcode
;
3278 dest
->aio_state
= src
->aio_state
;
3279 dest
->aio__pad
[0] = src
->aio__pad
[0];
3284 * This function is used only for largefile calls made by
3285 * 32 bit applications.
3292 aio_result_t
*resultp
,
3294 int old_solaris_req
)
3296 sigqueue_t
*sqp
= NULL
;
3299 struct sigevent32
*sigev
;
3302 sigev
= &arg
->aio_sigevent
;
3303 if (sigev
->sigev_notify
== SIGEV_SIGNAL
&&
3304 sigev
->sigev_signo
> 0 && sigev
->sigev_signo
< NSIG
) {
3305 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
3308 sqp
->sq_func
= NULL
;
3309 sqp
->sq_next
= NULL
;
3310 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
3311 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
3312 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
3313 sqp
->sq_info
.si_zoneid
= getzoneid();
3314 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
3315 sqp
->sq_info
.si_signo
= sigev
->sigev_signo
;
3316 sqp
->sq_info
.si_value
.sival_int
= sigev
->sigev_value
.sival_int
;
3319 mutex_enter(&aiop
->aio_mutex
);
3321 if (aiop
->aio_flags
& AIO_REQ_BLOCK
) {
3322 mutex_exit(&aiop
->aio_mutex
);
3324 kmem_free(sqp
, sizeof (sigqueue_t
));
3328 * get an aio_reqp from the free list or allocate one
3329 * from dynamic memory.
3331 if (error
= aio_req_alloc(&reqp
, resultp
)) {
3332 mutex_exit(&aiop
->aio_mutex
);
3334 kmem_free(sqp
, sizeof (sigqueue_t
));
3337 aiop
->aio_pending
++;
3338 aiop
->aio_outstanding
++;
3339 reqp
->aio_req_flags
= AIO_PENDING
;
3340 if (old_solaris_req
) {
3341 /* this is an old solaris aio request */
3342 reqp
->aio_req_flags
|= AIO_SOLARIS
;
3343 aiop
->aio_flags
|= AIO_SOLARIS_REQ
;
3345 if (sigev
->sigev_notify
== SIGEV_THREAD
||
3346 sigev
->sigev_notify
== SIGEV_PORT
)
3347 aio_enq(&aiop
->aio_portpending
, reqp
, 0);
3348 mutex_exit(&aiop
->aio_mutex
);
3350 * initialize aio request.
3352 reqp
->aio_req_fd
= arg
->aio_fildes
;
3353 reqp
->aio_req_sigqp
= sqp
;
3354 reqp
->aio_req_iocb
.iocb
= NULL
;
3355 reqp
->aio_req_lio
= NULL
;
3356 reqp
->aio_req_buf
.b_file
= vp
;
3357 uio
= reqp
->aio_req
.aio_uio
;
3358 uio
->uio_iovcnt
= 1;
3359 uio
->uio_iov
->iov_base
= (caddr_t
)(uintptr_t)arg
->aio_buf
;
3360 uio
->uio_iov
->iov_len
= arg
->aio_nbytes
;
3361 uio
->uio_loffset
= arg
->aio_offset
;
3367 * This routine is called when a non largefile call is made by a 32bit
3368 * process on a ILP32 or LP64 kernel.
3378 file_t
*prev_fp
= NULL
;
3386 aiocb_t
*aiocb
= &cb
;
3391 aiocb32_t
*aiocb32
= &cb32
;
3392 struct sigevent32 sigevk
;
3394 aiocb_t
*cbp
, **ucbp
;
3395 struct sigevent sigevk
;
3405 int aio_notsupported
= 0;
3409 port_kevent_t
*pkevtp
= NULL
;
3412 port_notify32_t pnotify
;
3414 port_notify_t pnotify
;
3418 aiop
= curproc
->p_aio
;
3419 if (aiop
== NULL
|| nent
<= 0 || nent
> _AIO_LISTIO_MAX
)
3423 ssize
= (sizeof (caddr32_t
) * nent
);
3425 ssize
= (sizeof (aiocb_t
*) * nent
);
3427 cbplist
= kmem_alloc(ssize
, KM_SLEEP
);
3428 ucbp
= (void *)cbplist
;
3430 if (copyin(aiocb_arg
, cbplist
, ssize
) ||
3431 (sigev
&& copyin(sigev
, &sigevk
, sizeof (struct sigevent32
)))) {
3432 kmem_free(cbplist
, ssize
);
3438 (sigevk
.sigev_notify
== SIGEV_THREAD
||
3439 sigevk
.sigev_notify
== SIGEV_PORT
)) {
3440 if (sigevk
.sigev_notify
== SIGEV_THREAD
) {
3441 pnotify
.portnfy_port
= sigevk
.sigev_signo
;
3442 pnotify
.portnfy_user
= sigevk
.sigev_value
.sival_ptr
;
3444 (void *)(uintptr_t)sigevk
.sigev_value
.sival_ptr
,
3445 &pnotify
, sizeof (pnotify
))) {
3446 kmem_free(cbplist
, ssize
);
3449 error
= port_alloc_event(pnotify
.portnfy_port
,
3450 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
, &pkevtp
);
3452 if (error
== ENOMEM
|| error
== EAGAIN
)
3456 kmem_free(cbplist
, ssize
);
3459 lio_head_port
= pnotify
.portnfy_port
;
3464 * a list head should be allocated if notification is
3465 * enabled for this list.
3469 if (mode_arg
== LIO_WAIT
|| sigev
) {
3470 mutex_enter(&aiop
->aio_mutex
);
3471 error
= aio_lio_alloc(&head
);
3472 mutex_exit(&aiop
->aio_mutex
);
3476 head
->lio_nent
= nent
;
3477 head
->lio_refcnt
= nent
;
3478 head
->lio_port
= -1;
3479 head
->lio_portkev
= NULL
;
3480 if (sigev
&& sigevk
.sigev_notify
== SIGEV_SIGNAL
&&
3481 sigevk
.sigev_signo
> 0 && sigevk
.sigev_signo
< NSIG
) {
3482 sqp
= kmem_zalloc(sizeof (sigqueue_t
), KM_NOSLEEP
);
3487 sqp
->sq_func
= NULL
;
3488 sqp
->sq_next
= NULL
;
3489 sqp
->sq_info
.si_code
= SI_ASYNCIO
;
3490 sqp
->sq_info
.si_pid
= curproc
->p_pid
;
3491 sqp
->sq_info
.si_ctid
= PRCTID(curproc
);
3492 sqp
->sq_info
.si_zoneid
= getzoneid();
3493 sqp
->sq_info
.si_uid
= crgetuid(curproc
->p_cred
);
3494 sqp
->sq_info
.si_signo
= sigevk
.sigev_signo
;
3495 sqp
->sq_info
.si_value
.sival_int
=
3496 sigevk
.sigev_value
.sival_int
;
3497 head
->lio_sigqp
= sqp
;
3499 head
->lio_sigqp
= NULL
;
3503 * Prepare data to send when list of aiocb's has
3506 port_init_event(pkevtp
, (uintptr_t)sigev
,
3507 (void *)(uintptr_t)pnotify
.portnfy_user
,
3509 pkevtp
->portkev_events
= AIOLIO
;
3510 head
->lio_portkev
= pkevtp
;
3511 head
->lio_port
= pnotify
.portnfy_port
;
3515 for (i
= 0; i
< nent
; i
++, ucbp
++) {
3517 /* skip entry if it can't be copied. */
3519 cbp
= (aiocb32_t
*)(uintptr_t)*ucbp
;
3520 if (cbp
== NULL
|| copyin(cbp
, aiocb32
, sizeof (*aiocb32
)))
3522 cbp
= (aiocb_t
*)*ucbp
;
3523 if (cbp
== NULL
|| copyin(cbp
, aiocb
, sizeof (*aiocb
)))
3527 mutex_enter(&aiop
->aio_mutex
);
3530 mutex_exit(&aiop
->aio_mutex
);
3536 * copy 32 bit structure into 64 bit structure
3538 aiocb_32ton(aiocb32
, aiocb
);
3541 /* skip if opcode for aiocb is LIO_NOP */
3542 mode
= aiocb
->aio_lio_opcode
;
3543 if (mode
== LIO_NOP
) {
3546 mutex_enter(&aiop
->aio_mutex
);
3549 mutex_exit(&aiop
->aio_mutex
);
3554 /* increment file descriptor's ref count. */
3555 if ((fp
= getf(aiocb
->aio_fildes
)) == NULL
) {
3556 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3558 mutex_enter(&aiop
->aio_mutex
);
3561 mutex_exit(&aiop
->aio_mutex
);
3568 * check the permission of the partition
3570 if ((fp
->f_flag
& mode
) == 0) {
3571 releasef(aiocb
->aio_fildes
);
3572 lio_set_uerror(&cbp
->aio_resultp
, EBADF
);
3574 mutex_enter(&aiop
->aio_mutex
);
3577 mutex_exit(&aiop
->aio_mutex
);
3584 * common case where requests are to the same fd
3585 * for the same r/w operation
3586 * for UFS, need to set EBADFD
3589 if (fp
!= prev_fp
|| mode
!= prev_mode
) {
3590 aio_func
= check_vp(vp
, mode
);
3591 if (aio_func
== NULL
) {
3593 releasef(aiocb
->aio_fildes
);
3594 lio_set_uerror(&cbp
->aio_resultp
, EBADFD
);
3597 mutex_enter(&aiop
->aio_mutex
);
3600 mutex_exit(&aiop
->aio_mutex
);
3609 error
= aio_req_setup(&reqp
, aiop
, aiocb
,
3610 (aio_result_t
*)&cbp
->aio_resultp
, vp
, 0);
3612 releasef(aiocb
->aio_fildes
);
3613 lio_set_uerror(&cbp
->aio_resultp
, error
);
3615 mutex_enter(&aiop
->aio_mutex
);
3618 mutex_exit(&aiop
->aio_mutex
);
3624 reqp
->aio_req_lio
= head
;
3628 * Set the errno field now before sending the request to
3629 * the driver to avoid a race condition
3631 (void) suword32(&cbp
->aio_resultp
.aio_errno
,
3634 reqp
->aio_req_iocb
.iocb32
= (caddr32_t
)(uintptr_t)cbp
;
3636 event
= (mode
== LIO_READ
)? AIOAREAD
: AIOAWRITE
;
3637 aio_port
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_PORT
);
3638 aio_thread
= (aiocb
->aio_sigevent
.sigev_notify
== SIGEV_THREAD
);
3639 if (aio_port
| aio_thread
) {
3640 port_kevent_t
*lpkevp
;
3642 * Prepare data to send with each aiocb completed.
3646 void *paddr
= (void *)(uintptr_t)
3647 aiocb32
->aio_sigevent
.sigev_value
.sival_ptr
;
3648 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
3650 } else { /* aio_thread */
3651 pnotify
.portnfy_port
=
3652 aiocb32
->aio_sigevent
.sigev_signo
;
3653 pnotify
.portnfy_user
=
3654 aiocb32
->aio_sigevent
.sigev_value
.sival_ptr
;
3659 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3660 if (copyin(paddr
, &pnotify
, sizeof (pnotify
)))
3662 } else { /* aio_thread */
3663 pnotify
.portnfy_port
=
3664 aiocb
->aio_sigevent
.sigev_signo
;
3665 pnotify
.portnfy_user
=
3666 aiocb
->aio_sigevent
.sigev_value
.sival_ptr
;
3671 else if (pkevtp
!= NULL
&&
3672 pnotify
.portnfy_port
== lio_head_port
)
3673 error
= port_dup_event(pkevtp
, &lpkevp
,
3674 PORT_ALLOC_DEFAULT
);
3676 error
= port_alloc_event(pnotify
.portnfy_port
,
3677 PORT_ALLOC_DEFAULT
, PORT_SOURCE_AIO
,
3680 port_init_event(lpkevp
, (uintptr_t)cbp
,
3681 (void *)(uintptr_t)pnotify
.portnfy_user
,
3682 aio_port_callback
, reqp
);
3683 lpkevp
->portkev_events
= event
;
3684 reqp
->aio_req_portkev
= lpkevp
;
3685 reqp
->aio_req_port
= pnotify
.portnfy_port
;
3690 * send the request to driver.
3693 if (aiocb
->aio_nbytes
== 0) {
3694 clear_active_fd(aiocb
->aio_fildes
);
3698 error
= (*aio_func
)(vp
, (aio_req_t
*)&reqp
->aio_req
,
3703 * the fd's ref count is not decremented until the IO has
3704 * completed unless there was an error.
3707 releasef(aiocb
->aio_fildes
);
3708 lio_set_uerror(&cbp
->aio_resultp
, error
);
3710 mutex_enter(&aiop
->aio_mutex
);
3713 mutex_exit(&aiop
->aio_mutex
);
3715 if (error
== ENOTSUP
)
3719 lio_set_error(reqp
, portused
);
3721 clear_active_fd(aiocb
->aio_fildes
);
3725 if (aio_notsupported
) {
3727 } else if (aio_errors
) {
3729 * return EIO if any request failed
3734 if (mode_arg
== LIO_WAIT
) {
3735 mutex_enter(&aiop
->aio_mutex
);
3736 while (head
->lio_refcnt
> 0) {
3737 if (!cv_wait_sig(&head
->lio_notify
, &aiop
->aio_mutex
)) {
3738 mutex_exit(&aiop
->aio_mutex
);
3743 mutex_exit(&aiop
->aio_mutex
);
3744 alio_cleanup(aiop
, (aiocb_t
**)cbplist
, nent
, AIO_32
);
3748 kmem_free(cbplist
, ssize
);
3750 if (head
->lio_sigqp
)
3751 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
3752 if (head
->lio_portkev
)
3753 port_free_event(head
->lio_portkev
);
3754 kmem_free(head
, sizeof (aio_lio_t
));
3760 #ifdef _SYSCALL32_IMPL
3762 aiocb_32ton(aiocb32_t
*src
, aiocb_t
*dest
)
3764 dest
->aio_fildes
= src
->aio_fildes
;
3765 dest
->aio_buf
= (caddr_t
)(uintptr_t)src
->aio_buf
;
3766 dest
->aio_nbytes
= (size_t)src
->aio_nbytes
;
3767 dest
->aio_offset
= (off_t
)src
->aio_offset
;
3768 dest
->aio_reqprio
= src
->aio_reqprio
;
3769 dest
->aio_sigevent
.sigev_notify
= src
->aio_sigevent
.sigev_notify
;
3770 dest
->aio_sigevent
.sigev_signo
= src
->aio_sigevent
.sigev_signo
;
3773 * See comment in sigqueue32() on handling of 32-bit
3774 * sigvals in a 64-bit kernel.
3776 dest
->aio_sigevent
.sigev_value
.sival_int
=
3777 (int)src
->aio_sigevent
.sigev_value
.sival_int
;
3778 dest
->aio_sigevent
.sigev_notify_function
= (void (*)(union sigval
))
3779 (uintptr_t)src
->aio_sigevent
.sigev_notify_function
;
3780 dest
->aio_sigevent
.sigev_notify_attributes
= (pthread_attr_t
*)
3781 (uintptr_t)src
->aio_sigevent
.sigev_notify_attributes
;
3782 dest
->aio_sigevent
.__sigev_pad2
= src
->aio_sigevent
.__sigev_pad2
;
3783 dest
->aio_lio_opcode
= src
->aio_lio_opcode
;
3784 dest
->aio_state
= src
->aio_state
;
3785 dest
->aio__pad
[0] = src
->aio__pad
[0];
3787 #endif /* _SYSCALL32_IMPL */
3790 * aio_port_callback() is called just before the event is retrieved from the
3791 * port. The task of this callback function is to finish the work of the
3792 * transaction for the application, it means :
3793 * - copyout transaction data to the application
3794 * (this thread is running in the right process context)
3795 * - keep trace of the transaction (update of counters).
3796 * - free allocated buffers
3797 * The aiocb pointer is the object element of the port_kevent_t structure.
3800 * PORT_CALLBACK_DEFAULT : do copyout and free resources
3801 * PORT_CALLBACK_CLOSE : don't do copyout, free resources
3806 aio_port_callback(void *arg
, int *events
, pid_t pid
, int flag
, void *evp
)
3808 aio_t
*aiop
= curproc
->p_aio
;
3809 aio_req_t
*reqp
= arg
;
3814 if (pid
!= curproc
->p_pid
) {
3815 /* wrong proc !!, can not deliver data here ... */
3819 mutex_enter(&aiop
->aio_portq_mutex
);
3820 reqp
->aio_req_portkev
= NULL
;
3821 aio_req_remove_portq(aiop
, reqp
); /* remove request from portq */
3822 mutex_exit(&aiop
->aio_portq_mutex
);
3823 aphysio_unlock(reqp
); /* unlock used pages */
3824 mutex_enter(&aiop
->aio_mutex
);
3825 if (reqp
->aio_req_flags
& AIO_COPYOUTDONE
) {
3826 aio_req_free_port(aiop
, reqp
); /* back to free list */
3827 mutex_exit(&aiop
->aio_mutex
);
3831 iov
= reqp
->aio_req_uio
.uio_iov
;
3832 bp
= &reqp
->aio_req_buf
;
3833 resultp
= (void *)reqp
->aio_req_resultp
;
3834 if (flag
== PORT_CALLBACK_DEFAULT
)
3835 aio_copyout_result_port(iov
, bp
, resultp
);
3836 aio_req_free_port(aiop
, reqp
); /* request struct back to free list */
3837 mutex_exit(&aiop
->aio_mutex
);