4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 #include <sys/types.h>
30 #include <sys/errno.h>
31 #include <sys/param.h>
32 #include <sys/sysmacros.h>
33 #include <sys/cmn_err.h>
34 #include <sys/systm.h>
39 #include <sys/debug.h>
40 #include <sys/aio_impl.h>
42 #include <sys/fs/snode.h>
43 #include <sys/siginfo.h>
44 #include <sys/cpuvar.h>
45 #include <sys/tnf_probe.h>
49 int aphysio(int (*)(), int (*)(), dev_t
, int, void (*)(), struct aio_req
*);
50 void aio_done(struct buf
*);
51 void aphysio_unlock(aio_req_t
*);
52 void aio_cleanup(int);
53 void aio_cleanup_exit(void);
58 static void aio_sigev_send(proc_t
*, sigqueue_t
*);
59 static void aio_hash_delete(aio_t
*, aio_req_t
*);
60 static void aio_lio_free(aio_t
*, aio_lio_t
*);
61 static int aio_cleanup_cleanupq(aio_t
*, aio_req_t
*, int);
62 static int aio_cleanup_notifyq(aio_t
*, aio_req_t
*, int);
63 static void aio_cleanup_pollq(aio_t
*, aio_req_t
*, int);
64 static void aio_cleanup_portq(aio_t
*, aio_req_t
*, int);
67 * async version of physio() that doesn't wait synchronously
68 * for the driver's strategy routine to complete.
73 int (*strategy
)(struct buf
*),
74 int (*cancel
)(struct buf
*),
77 void (*mincnt
)(struct buf
*),
80 struct uio
*uio
= aio
->aio_uio
;
81 aio_req_t
*reqp
= (aio_req_t
*)aio
->aio_private
;
82 struct buf
*bp
= &reqp
->aio_req_buf
;
89 struct dev_ops
*ops
= devopsp
[getmajor(dev
)];
91 if (uio
->uio_loffset
< 0)
95 * For 32-bit kernels, check against SPEC_MAXOFFSET_T which represents
96 * the maximum size that can be supported by the IO subsystem.
97 * XXX this code assumes a D_64BIT driver.
99 if (uio
->uio_loffset
> SPEC_MAXOFFSET_T
)
103 TNF_PROBE_5(aphysio_start
, "kaio", /* CSTYLED */,
105 tnf_device
, device
, dev
,
106 tnf_offset
, blkno
, btodt(uio
->uio_loffset
),
107 tnf_size
, size
, uio
->uio_iov
->iov_len
,
108 tnf_bioflags
, rw
, rw
);
111 CPU_STATS_ADD_K(sys
, phread
, 1);
113 CPU_STATS_ADD_K(sys
, phwrite
, 1);
117 sema_init(&bp
->b_sem
, 0, NULL
, SEMA_DEFAULT
, NULL
);
118 sema_init(&bp
->b_io
, 0, NULL
, SEMA_DEFAULT
, NULL
);
121 bp
->b_flags
= B_BUSY
| B_PHYS
| B_ASYNC
| rw
;
123 bp
->b_dev
= cmpdev(dev
);
124 bp
->b_lblkno
= btodt(uio
->uio_loffset
);
125 bp
->b_offset
= uio
->uio_loffset
;
126 (void) ops
->devo_getinfo(NULL
, DDI_INFO_DEVT2DEVINFO
,
127 (void *)bp
->b_edev
, (void **)&bp
->b_dip
);
130 * Clustering: Clustering can set the b_iodone, b_forw and
131 * b_proc fields to cluster-specifc values.
133 if (bp
->b_iodone
== NULL
) {
134 bp
->b_iodone
= (int (*)()) aio_done
;
135 /* b_forw points at an aio_req_t structure */
136 bp
->b_forw
= (struct buf
*)reqp
;
137 bp
->b_proc
= curproc
;
140 a
= bp
->b_un
.b_addr
= iov
->iov_base
;
141 c
= bp
->b_bcount
= iov
->iov_len
;
144 if (bp
->b_bcount
!= iov
->iov_len
)
147 as
= bp
->b_proc
->p_as
;
149 error
= as_pagelock(as
, &pplist
, a
,
150 c
, rw
== B_READ
? S_WRITE
: S_READ
);
152 bp
->b_flags
|= B_ERROR
;
154 bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_SHADOW
);
157 reqp
->aio_req_flags
|= AIO_PAGELOCKDONE
;
158 bp
->b_shadow
= pplist
;
159 if (pplist
!= NULL
) {
160 bp
->b_flags
|= B_SHADOW
;
163 if (cancel
!= anocancel
)
165 "aphysio: cancellation not supported, use anocancel");
167 reqp
->aio_req_cancel
= cancel
;
169 DTRACE_IO1(start
, struct buf
*, bp
);
171 return ((*strategy
)(bp
));
176 anocancel(struct buf
*bp
)
182 * Called from biodone().
183 * Notify process that a pending AIO has finished.
187 * Clustering: This function is made non-static as it is used
188 * by clustering s/w as contract private interface.
192 aio_done(struct buf
*bp
)
197 aio_lio_t
*head
= NULL
;
199 sigqueue_t
*sigev
= NULL
;
200 sigqueue_t
*lio_sigev
= NULL
;
201 port_kevent_t
*pkevp
= NULL
;
202 port_kevent_t
*lio_pkevp
= NULL
;
214 reqp
= (aio_req_t
*)bp
->b_forw
;
215 fd
= reqp
->aio_req_fd
;
217 TNF_PROBE_5(aphysio_end
, "kaio", /* CSTYLED */,
219 tnf_device
, device
, bp
->b_edev
,
220 tnf_offset
, blkno
, btodt(reqp
->aio_req_uio
.uio_loffset
),
221 tnf_size
, size
, reqp
->aio_req_uio
.uio_iov
->iov_len
,
222 tnf_bioflags
, rw
, (bp
->b_flags
& (B_READ
|B_WRITE
)));
225 * mapout earlier so that more kmem is available when aio is
226 * heavily used. bug #1262082
228 if (bp
->b_flags
& B_REMAPPED
)
231 /* decrement fd's ref count by one, now that aio request is done. */
232 areleasef(fd
, P_FINFO(p
));
235 ASSERT(aiop
!= NULL
);
237 mutex_enter(&aiop
->aio_portq_mutex
);
238 mutex_enter(&aiop
->aio_mutex
);
239 ASSERT(aiop
->aio_pending
> 0);
240 ASSERT(reqp
->aio_req_flags
& AIO_PENDING
);
242 reqp
->aio_req_flags
&= ~AIO_PENDING
;
243 reqp_flags
= reqp
->aio_req_flags
;
244 if ((pkevp
= reqp
->aio_req_portkev
) != NULL
) {
245 /* Event port notification is desired for this transaction */
246 if (reqp
->aio_req_flags
& AIO_CLOSE_PORT
) {
248 * The port is being closed and it is waiting for
249 * pending asynchronous I/O transactions to complete.
251 portevpend
= --aiop
->aio_portpendcnt
;
252 aio_deq(&aiop
->aio_portpending
, reqp
);
253 aio_enq(&aiop
->aio_portq
, reqp
, 0);
254 mutex_exit(&aiop
->aio_mutex
);
255 mutex_exit(&aiop
->aio_portq_mutex
);
256 port_send_event(pkevp
);
258 cv_broadcast(&aiop
->aio_portcv
);
262 if (aiop
->aio_flags
& AIO_CLEANUP
) {
264 * aio_cleanup_thread() is waiting for completion of
267 mutex_enter(&as
->a_contents
);
268 aio_deq(&aiop
->aio_portpending
, reqp
);
269 aio_enq(&aiop
->aio_portcleanupq
, reqp
, 0);
270 cv_signal(&aiop
->aio_cleanupcv
);
271 mutex_exit(&as
->a_contents
);
272 mutex_exit(&aiop
->aio_mutex
);
273 mutex_exit(&aiop
->aio_portq_mutex
);
277 aio_deq(&aiop
->aio_portpending
, reqp
);
278 aio_enq(&aiop
->aio_portq
, reqp
, 0);
283 * when the AIO_CLEANUP flag is enabled for this
284 * process, or when the AIO_POLL bit is set for
285 * this request, special handling is required.
286 * otherwise the request is put onto the doneq.
288 cleanupqflag
= (aiop
->aio_flags
& AIO_CLEANUP
);
289 pollqflag
= (reqp
->aio_req_flags
& AIO_POLL
);
290 if (cleanupqflag
| pollqflag
) {
293 mutex_enter(&as
->a_contents
);
296 * requests with their AIO_POLL bit set are put
297 * on the pollq, requests with sigevent structures
298 * or with listio heads are put on the notifyq, and
299 * the remaining requests don't require any special
300 * cleanup handling, so they're put onto the default
304 aio_enq(&aiop
->aio_pollq
, reqp
, AIO_POLLQ
);
305 else if (reqp
->aio_req_sigqp
|| reqp
->aio_req_lio
)
306 aio_enq(&aiop
->aio_notifyq
, reqp
, AIO_NOTIFYQ
);
308 aio_enq(&aiop
->aio_cleanupq
, reqp
,
312 cv_signal(&aiop
->aio_cleanupcv
);
313 mutex_exit(&as
->a_contents
);
314 mutex_exit(&aiop
->aio_mutex
);
315 mutex_exit(&aiop
->aio_portq_mutex
);
318 /* block aio_cleanup_exit until we're done */
319 aiop
->aio_flags
|= AIO_DONE_ACTIVE
;
320 mutex_exit(&aiop
->aio_mutex
);
321 mutex_exit(&aiop
->aio_portq_mutex
);
323 * let the cleanup processing happen from an AST
324 * set an AST on all threads in this process
326 mutex_enter(&p
->p_lock
);
328 mutex_exit(&p
->p_lock
);
329 mutex_enter(&aiop
->aio_mutex
);
330 /* wakeup anybody waiting in aiowait() */
331 cv_broadcast(&aiop
->aio_waitcv
);
333 /* wakeup aio_cleanup_exit if needed */
334 if (aiop
->aio_flags
& AIO_CLEANUP
)
335 cv_signal(&aiop
->aio_cleanupcv
);
336 aiop
->aio_flags
&= ~AIO_DONE_ACTIVE
;
337 mutex_exit(&aiop
->aio_mutex
);
343 * save req's sigevent pointer, and check its
344 * value after releasing aio_mutex lock.
346 sigev
= reqp
->aio_req_sigqp
;
347 reqp
->aio_req_sigqp
= NULL
;
349 /* put request on done queue. */
350 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
354 * when list IO notification is enabled, a notification or
355 * signal is sent only when all entries in the list are done.
357 if ((head
= reqp
->aio_req_lio
) != NULL
) {
358 ASSERT(head
->lio_refcnt
> 0);
359 if (--head
->lio_refcnt
== 0) {
361 * save lio's sigevent pointer, and check
362 * its value after releasing aio_mutex lock.
364 lio_sigev
= head
->lio_sigqp
;
365 head
->lio_sigqp
= NULL
;
366 cv_signal(&head
->lio_notify
);
367 if (head
->lio_port
>= 0 &&
368 (lio_pkevp
= head
->lio_portkev
) != NULL
)
374 * if AIO_WAITN set then
375 * send signal only when we reached the
376 * required amount of IO's finished
377 * or when all IO's are done
379 if (aiop
->aio_flags
& AIO_WAITN
) {
380 if (aiop
->aio_waitncnt
> 0)
381 aiop
->aio_waitncnt
--;
382 if (aiop
->aio_pending
== 0 ||
383 aiop
->aio_waitncnt
== 0)
384 cv_broadcast(&aiop
->aio_waitcv
);
386 cv_broadcast(&aiop
->aio_waitcv
);
390 * No need to set this flag for pollq, portq, lio requests.
391 * If this is an old Solaris aio request, and the process has
392 * a SIGIO signal handler enabled, then send a SIGIO signal.
394 if (!sigev
&& !use_port
&& head
== NULL
&&
395 (reqp
->aio_req_flags
& AIO_SOLARIS
) &&
396 (func
= PTOU(p
)->u_signal
[SIGIO
- 1]) != SIG_DFL
&&
399 reqp
->aio_req_flags
|= AIO_SIGNALLED
;
402 mutex_exit(&aiop
->aio_mutex
);
403 mutex_exit(&aiop
->aio_portq_mutex
);
406 * Could the cleanup thread be waiting for AIO with locked
407 * resources to finish?
408 * Ideally in that case cleanup thread should block on cleanupcv,
409 * but there is a window, where it could miss to see a new aio
410 * request that sneaked in.
412 mutex_enter(&as
->a_contents
);
413 if ((reqp_flags
& AIO_PAGELOCKDONE
) && AS_ISUNMAPWAIT(as
))
414 cv_broadcast(&as
->a_cv
);
415 mutex_exit(&as
->a_contents
);
418 aio_sigev_send(p
, sigev
);
419 else if (send_signal
)
423 port_send_event(pkevp
);
425 aio_sigev_send(p
, lio_sigev
);
427 port_send_event(lio_pkevp
);
431 * send a queued signal to the specified process when
432 * the event signal is non-NULL. A return value of 1
433 * will indicate that a signal is queued, and 0 means that
434 * no signal was specified, nor sent.
437 aio_sigev_send(proc_t
*p
, sigqueue_t
*sigev
)
439 ASSERT(sigev
!= NULL
);
441 mutex_enter(&p
->p_lock
);
442 sigaddqa(p
, NULL
, sigev
);
443 mutex_exit(&p
->p_lock
);
447 * special case handling for zero length requests. the aio request
448 * short circuits the normal completion path since all that's required
449 * to complete this request is to copyout a zero to the aio request's
453 aio_zerolen(aio_req_t
*reqp
)
456 struct buf
*bp
= &reqp
->aio_req_buf
;
458 reqp
->aio_req_flags
|= AIO_ZEROLEN
;
460 bp
->b_forw
= (struct buf
*)reqp
;
461 bp
->b_proc
= curproc
;
470 * unlock pages previously locked by as_pagelock
473 aphysio_unlock(aio_req_t
*reqp
)
479 if (reqp
->aio_req_flags
& AIO_PHYSIODONE
)
482 reqp
->aio_req_flags
|= AIO_PHYSIODONE
;
484 if (reqp
->aio_req_flags
& AIO_ZEROLEN
)
487 bp
= &reqp
->aio_req_buf
;
488 iov
= reqp
->aio_req_uio
.uio_iov
;
489 flags
= (((bp
->b_flags
& B_READ
) == B_READ
) ? S_WRITE
: S_READ
);
490 if (reqp
->aio_req_flags
& AIO_PAGELOCKDONE
) {
491 as_pageunlock(bp
->b_proc
->p_as
,
492 bp
->b_flags
& B_SHADOW
? bp
->b_shadow
: NULL
,
493 iov
->iov_base
, iov
->iov_len
, flags
);
494 reqp
->aio_req_flags
&= ~AIO_PAGELOCKDONE
;
496 bp
->b_flags
&= ~(B_BUSY
|B_WANTED
|B_PHYS
|B_SHADOW
);
497 bp
->b_flags
|= B_DONE
;
501 * deletes a requests id from the hash table of outstanding io.
504 aio_hash_delete(aio_t
*aiop
, struct aio_req_t
*reqp
)
507 aio_result_t
*resultp
= reqp
->aio_req_resultp
;
511 index
= AIO_HASH(resultp
);
512 nextp
= (aiop
->aio_hash
+ index
);
513 while ((current
= *nextp
) != NULL
) {
514 if (current
->aio_req_resultp
== resultp
) {
515 *nextp
= current
->aio_hash_next
;
518 nextp
= ¤t
->aio_hash_next
;
523 * Put a list head struct onto its free list.
526 aio_lio_free(aio_t
*aiop
, aio_lio_t
*head
)
528 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
530 if (head
->lio_sigqp
!= NULL
)
531 kmem_free(head
->lio_sigqp
, sizeof (sigqueue_t
));
532 head
->lio_next
= aiop
->aio_lio_free
;
533 aiop
->aio_lio_free
= head
;
537 * Put a reqp onto the freelist.
540 aio_req_free(aio_t
*aiop
, aio_req_t
*reqp
)
544 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
546 if (reqp
->aio_req_portkev
) {
547 port_free_event(reqp
->aio_req_portkev
);
548 reqp
->aio_req_portkev
= NULL
;
551 if ((liop
= reqp
->aio_req_lio
) != NULL
) {
552 if (--liop
->lio_nent
== 0)
553 aio_lio_free(aiop
, liop
);
554 reqp
->aio_req_lio
= NULL
;
556 if (reqp
->aio_req_sigqp
!= NULL
) {
557 kmem_free(reqp
->aio_req_sigqp
, sizeof (sigqueue_t
));
558 reqp
->aio_req_sigqp
= NULL
;
560 reqp
->aio_req_next
= aiop
->aio_free
;
561 reqp
->aio_req_prev
= NULL
;
562 aiop
->aio_free
= reqp
;
563 aiop
->aio_outstanding
--;
564 if (aiop
->aio_outstanding
== 0)
565 cv_broadcast(&aiop
->aio_waitcv
);
566 aio_hash_delete(aiop
, reqp
);
570 * Put a reqp onto the freelist.
573 aio_req_free_port(aio_t
*aiop
, aio_req_t
*reqp
)
575 ASSERT(MUTEX_HELD(&aiop
->aio_mutex
));
577 reqp
->aio_req_next
= aiop
->aio_free
;
578 reqp
->aio_req_prev
= NULL
;
579 aiop
->aio_free
= reqp
;
580 aiop
->aio_outstanding
--;
581 aio_hash_delete(aiop
, reqp
);
586 * Verify the integrity of a queue.
590 aio_verify_queue(aio_req_t
*head
,
591 aio_req_t
*entry_present
, aio_req_t
*entry_missing
)
597 if ((reqp
= head
) != NULL
) {
599 ASSERT(reqp
->aio_req_prev
->aio_req_next
== reqp
);
600 ASSERT(reqp
->aio_req_next
->aio_req_prev
== reqp
);
601 if (entry_present
== reqp
)
603 if (entry_missing
== reqp
)
605 } while ((reqp
= reqp
->aio_req_next
) != head
);
607 ASSERT(entry_present
== NULL
|| found
== 1);
608 ASSERT(entry_missing
== NULL
|| present
== 0);
611 #define aio_verify_queue(x, y, z)
615 * Put a request onto the tail of a queue.
618 aio_enq(aio_req_t
**qhead
, aio_req_t
*reqp
, int qflg_new
)
623 aio_verify_queue(*qhead
, NULL
, reqp
);
625 if ((head
= *qhead
) == NULL
) {
626 reqp
->aio_req_next
= reqp
;
627 reqp
->aio_req_prev
= reqp
;
630 reqp
->aio_req_next
= head
;
631 reqp
->aio_req_prev
= prev
= head
->aio_req_prev
;
632 prev
->aio_req_next
= reqp
;
633 head
->aio_req_prev
= reqp
;
635 reqp
->aio_req_flags
|= qflg_new
;
639 * Remove a request from its queue.
642 aio_deq(aio_req_t
**qhead
, aio_req_t
*reqp
)
644 aio_verify_queue(*qhead
, reqp
, NULL
);
646 if (reqp
->aio_req_next
== reqp
) {
649 reqp
->aio_req_prev
->aio_req_next
= reqp
->aio_req_next
;
650 reqp
->aio_req_next
->aio_req_prev
= reqp
->aio_req_prev
;
652 *qhead
= reqp
->aio_req_next
;
654 reqp
->aio_req_next
= NULL
;
655 reqp
->aio_req_prev
= NULL
;
659 * concatenate a specified queue with the cleanupq. the specified
660 * queue is put onto the tail of the cleanupq. all elements on the
661 * specified queue should have their aio_req_flags field cleared.
665 aio_cleanupq_concat(aio_t
*aiop
, aio_req_t
*q2
, int qflg
)
667 aio_req_t
*cleanupqhead
, *q2tail
;
668 aio_req_t
*reqp
= q2
;
671 ASSERT(reqp
->aio_req_flags
& qflg
);
672 reqp
->aio_req_flags
&= ~qflg
;
673 reqp
->aio_req_flags
|= AIO_CLEANUPQ
;
674 } while ((reqp
= reqp
->aio_req_next
) != q2
);
676 cleanupqhead
= aiop
->aio_cleanupq
;
677 if (cleanupqhead
== NULL
)
678 aiop
->aio_cleanupq
= q2
;
680 cleanupqhead
->aio_req_prev
->aio_req_next
= q2
;
681 q2tail
= q2
->aio_req_prev
;
682 q2tail
->aio_req_next
= cleanupqhead
;
683 q2
->aio_req_prev
= cleanupqhead
->aio_req_prev
;
684 cleanupqhead
->aio_req_prev
= q2tail
;
689 * cleanup aio requests that are on the per-process poll queue.
692 aio_cleanup(int flag
)
694 aio_t
*aiop
= curproc
->p_aio
;
695 aio_req_t
*pollqhead
, *cleanupqhead
, *notifyqhead
;
696 aio_req_t
*cleanupport
;
697 aio_req_t
*portq
= NULL
;
703 ASSERT(aiop
!= NULL
);
705 if (flag
== AIO_CLEANUP_EXIT
)
706 exitflg
= AIO_CLEANUP_EXIT
;
711 * We need to get the aio_cleanupq_mutex because we are calling
712 * aio_cleanup_cleanupq()
714 mutex_enter(&aiop
->aio_cleanupq_mutex
);
716 * take all the requests off the cleanupq, the notifyq,
719 mutex_enter(&aiop
->aio_mutex
);
720 if ((cleanupqhead
= aiop
->aio_cleanupq
) != NULL
) {
721 aiop
->aio_cleanupq
= NULL
;
724 if ((notifyqhead
= aiop
->aio_notifyq
) != NULL
) {
725 aiop
->aio_notifyq
= NULL
;
728 if ((pollqhead
= aiop
->aio_pollq
) != NULL
) {
729 aiop
->aio_pollq
= NULL
;
733 if ((portq
= aiop
->aio_portq
) != NULL
)
736 if ((cleanupport
= aiop
->aio_portcleanupq
) != NULL
) {
737 aiop
->aio_portcleanupq
= NULL
;
741 mutex_exit(&aiop
->aio_mutex
);
744 * return immediately if cleanupq, pollq, and
745 * notifyq are all empty. someone else must have
749 mutex_exit(&aiop
->aio_cleanupq_mutex
);
754 * do cleanup for the various queues.
757 signalled
= aio_cleanup_cleanupq(aiop
, cleanupqhead
, exitflg
);
758 mutex_exit(&aiop
->aio_cleanupq_mutex
);
760 signalled
= aio_cleanup_notifyq(aiop
, notifyqhead
, exitflg
);
762 aio_cleanup_pollq(aiop
, pollqhead
, exitflg
);
763 if (flag
&& (cleanupport
|| portq
))
764 aio_cleanup_portq(aiop
, cleanupport
, exitflg
);
770 * If we have an active aio_cleanup_thread it's possible for
771 * this routine to push something on to the done queue after
772 * an aiowait/aiosuspend thread has already decided to block.
773 * This being the case, we need a cv_broadcast here to wake
774 * these threads up. It is simpler and cleaner to do this
775 * broadcast here than in the individual cleanup routines.
778 mutex_enter(&aiop
->aio_mutex
);
780 * If there has never been an old solaris aio request
781 * issued by this process, then do not send a SIGIO signal.
783 if (!(aiop
->aio_flags
& AIO_SOLARIS_REQ
))
785 cv_broadcast(&aiop
->aio_waitcv
);
786 mutex_exit(&aiop
->aio_mutex
);
789 * Only if the process wasn't already signalled,
790 * determine if a SIGIO signal should be delievered.
793 (func
= PTOU(curproc
)->u_signal
[SIGIO
- 1]) != SIG_DFL
&&
795 psignal(curproc
, SIGIO
);
800 * Do cleanup for every element of the port cleanup queue.
803 aio_cleanup_portq(aio_t
*aiop
, aio_req_t
*cleanupq
, int exitflag
)
810 /* first check the portq */
811 if (exitflag
|| ((aiop
->aio_flags
& AIO_CLEANUP_PORT
) == 0)) {
812 mutex_enter(&aiop
->aio_mutex
);
813 if (aiop
->aio_flags
& AIO_CLEANUP
)
814 aiop
->aio_flags
|= AIO_CLEANUP_PORT
;
815 mutex_exit(&aiop
->aio_mutex
);
818 * It is not allowed to hold locks during aphysio_unlock().
819 * The aio_done() interrupt function will try to acquire
820 * aio_mutex and aio_portq_mutex. Therefore we disconnect
821 * the portq list from the aiop for the duration of the
822 * aphysio_unlock() loop below.
824 mutex_enter(&aiop
->aio_portq_mutex
);
825 headp
= aiop
->aio_portq
;
826 aiop
->aio_portq
= NULL
;
827 mutex_exit(&aiop
->aio_portq_mutex
);
828 if ((reqp
= headp
) != NULL
) {
830 next
= reqp
->aio_req_next
;
831 aphysio_unlock(reqp
);
833 mutex_enter(&aiop
->aio_mutex
);
834 aio_req_free(aiop
, reqp
);
835 mutex_exit(&aiop
->aio_mutex
);
837 } while ((reqp
= next
) != headp
);
840 if (headp
!= NULL
&& exitflag
== 0) {
841 /* move unlocked requests back to the port queue */
844 mutex_enter(&aiop
->aio_portq_mutex
);
845 if ((newq
= aiop
->aio_portq
) != NULL
) {
846 aio_req_t
*headprev
= headp
->aio_req_prev
;
847 aio_req_t
*newqprev
= newq
->aio_req_prev
;
849 headp
->aio_req_prev
= newqprev
;
850 newq
->aio_req_prev
= headprev
;
851 headprev
->aio_req_next
= newq
;
852 newqprev
->aio_req_next
= headp
;
854 aiop
->aio_portq
= headp
;
855 cv_broadcast(&aiop
->aio_portcv
);
856 mutex_exit(&aiop
->aio_portq_mutex
);
860 /* now check the port cleanup queue */
861 if ((reqp
= cleanupq
) == NULL
)
864 next
= reqp
->aio_req_next
;
865 aphysio_unlock(reqp
);
867 mutex_enter(&aiop
->aio_mutex
);
868 aio_req_free(aiop
, reqp
);
869 mutex_exit(&aiop
->aio_mutex
);
871 mutex_enter(&aiop
->aio_portq_mutex
);
872 aio_enq(&aiop
->aio_portq
, reqp
, 0);
873 mutex_exit(&aiop
->aio_portq_mutex
);
874 port_send_event(reqp
->aio_req_portkev
);
875 if ((liop
= reqp
->aio_req_lio
) != NULL
) {
878 mutex_enter(&aiop
->aio_mutex
);
879 ASSERT(liop
->lio_refcnt
> 0);
880 if (--liop
->lio_refcnt
== 0) {
881 if (liop
->lio_port
>= 0 &&
887 mutex_exit(&aiop
->aio_mutex
);
889 port_send_event(liop
->lio_portkev
);
892 } while ((reqp
= next
) != cleanupq
);
896 * Do cleanup for every element of the cleanupq.
899 aio_cleanup_cleanupq(aio_t
*aiop
, aio_req_t
*qhead
, int exitflg
)
901 aio_req_t
*reqp
, *next
;
904 ASSERT(MUTEX_HELD(&aiop
->aio_cleanupq_mutex
));
907 * Since aio_req_done() or aio_req_find() use the HASH list to find
908 * the required requests, they could potentially take away elements
909 * if they are already done (AIO_DONEQ is set).
910 * The aio_cleanupq_mutex protects the queue for the duration of the
911 * loop from aio_req_done() and aio_req_find().
913 if ((reqp
= qhead
) == NULL
)
916 ASSERT(reqp
->aio_req_flags
& AIO_CLEANUPQ
);
917 ASSERT(reqp
->aio_req_portkev
== NULL
);
918 next
= reqp
->aio_req_next
;
919 aphysio_unlock(reqp
);
920 mutex_enter(&aiop
->aio_mutex
);
922 aio_req_free(aiop
, reqp
);
924 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
926 if (reqp
->aio_req_flags
& AIO_SIGNALLED
)
929 reqp
->aio_req_flags
|= AIO_SIGNALLED
;
931 mutex_exit(&aiop
->aio_mutex
);
932 } while ((reqp
= next
) != qhead
);
937 * do cleanup for every element of the notify queue.
940 aio_cleanup_notifyq(aio_t
*aiop
, aio_req_t
*qhead
, int exitflg
)
942 aio_req_t
*reqp
, *next
;
944 sigqueue_t
*sigev
, *lio_sigev
= NULL
;
947 if ((reqp
= qhead
) == NULL
)
950 ASSERT(reqp
->aio_req_flags
& AIO_NOTIFYQ
);
951 next
= reqp
->aio_req_next
;
952 aphysio_unlock(reqp
);
954 mutex_enter(&aiop
->aio_mutex
);
955 aio_req_free(aiop
, reqp
);
956 mutex_exit(&aiop
->aio_mutex
);
958 mutex_enter(&aiop
->aio_mutex
);
959 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
960 sigev
= reqp
->aio_req_sigqp
;
961 reqp
->aio_req_sigqp
= NULL
;
962 if ((liohead
= reqp
->aio_req_lio
) != NULL
) {
963 ASSERT(liohead
->lio_refcnt
> 0);
964 if (--liohead
->lio_refcnt
== 0) {
965 cv_signal(&liohead
->lio_notify
);
966 lio_sigev
= liohead
->lio_sigqp
;
967 liohead
->lio_sigqp
= NULL
;
970 mutex_exit(&aiop
->aio_mutex
);
973 aio_sigev_send(reqp
->aio_req_buf
.b_proc
,
978 aio_sigev_send(reqp
->aio_req_buf
.b_proc
,
982 } while ((reqp
= next
) != qhead
);
988 * Do cleanup for every element of the poll queue.
991 aio_cleanup_pollq(aio_t
*aiop
, aio_req_t
*qhead
, int exitflg
)
993 aio_req_t
*reqp
, *next
;
996 * As no other threads should be accessing the queue at this point,
997 * it isn't necessary to hold aio_mutex while we traverse its elements.
999 if ((reqp
= qhead
) == NULL
)
1002 ASSERT(reqp
->aio_req_flags
& AIO_POLLQ
);
1003 next
= reqp
->aio_req_next
;
1004 aphysio_unlock(reqp
);
1006 mutex_enter(&aiop
->aio_mutex
);
1007 aio_req_free(aiop
, reqp
);
1008 mutex_exit(&aiop
->aio_mutex
);
1010 aio_copyout_result(reqp
);
1011 mutex_enter(&aiop
->aio_mutex
);
1012 aio_enq(&aiop
->aio_doneq
, reqp
, AIO_DONEQ
);
1013 mutex_exit(&aiop
->aio_mutex
);
1015 } while ((reqp
= next
) != qhead
);
1019 * called by exit(). waits for all outstanding kaio to finish
1020 * before the kaio resources are freed.
1023 aio_cleanup_exit(void)
1025 proc_t
*p
= curproc
;
1026 aio_t
*aiop
= p
->p_aio
;
1027 aio_req_t
*reqp
, *next
, *head
;
1028 aio_lio_t
*nxtlio
, *liop
;
1031 * wait for all outstanding kaio to complete. process
1032 * is now single-threaded; no other kaio requests can
1033 * happen once aio_pending is zero.
1035 mutex_enter(&aiop
->aio_mutex
);
1036 aiop
->aio_flags
|= AIO_CLEANUP
;
1037 while ((aiop
->aio_pending
!= 0) || (aiop
->aio_flags
& AIO_DONE_ACTIVE
))
1038 cv_wait(&aiop
->aio_cleanupcv
, &aiop
->aio_mutex
);
1039 mutex_exit(&aiop
->aio_mutex
);
1041 /* cleanup the cleanup-thread queues. */
1042 aio_cleanup(AIO_CLEANUP_EXIT
);
1045 * Although this process is now single-threaded, we
1046 * still need to protect ourselves against a race with
1047 * aio_cleanup_dr_delete_memory().
1049 mutex_enter(&p
->p_lock
);
1052 * free up the done queue's resources.
1054 if ((head
= aiop
->aio_doneq
) != NULL
) {
1055 aiop
->aio_doneq
= NULL
;
1058 next
= reqp
->aio_req_next
;
1059 aphysio_unlock(reqp
);
1060 kmem_free(reqp
, sizeof (struct aio_req_t
));
1061 } while ((reqp
= next
) != head
);
1064 * release aio request freelist.
1066 for (reqp
= aiop
->aio_free
; reqp
!= NULL
; reqp
= next
) {
1067 next
= reqp
->aio_req_next
;
1068 kmem_free(reqp
, sizeof (struct aio_req_t
));
1072 * release io list head freelist.
1074 for (liop
= aiop
->aio_lio_free
; liop
!= NULL
; liop
= nxtlio
) {
1075 nxtlio
= liop
->lio_next
;
1076 kmem_free(liop
, sizeof (aio_lio_t
));
1080 kmem_free(aiop
->aio_iocb
, aiop
->aio_iocbsz
);
1082 mutex_destroy(&aiop
->aio_mutex
);
1083 mutex_destroy(&aiop
->aio_portq_mutex
);
1084 mutex_destroy(&aiop
->aio_cleanupq_mutex
);
1086 mutex_exit(&p
->p_lock
);
1087 kmem_free(aiop
, sizeof (struct aio
));
1091 * copy out aio request's result to a user-level result_t buffer.
1094 aio_copyout_result(aio_req_t
*reqp
)
1102 if (reqp
->aio_req_flags
& AIO_COPYOUTDONE
)
1105 reqp
->aio_req_flags
|= AIO_COPYOUTDONE
;
1107 iov
= reqp
->aio_req_uio
.uio_iov
;
1108 bp
= &reqp
->aio_req_buf
;
1109 /* "resultp" points to user-level result_t buffer */
1110 resultp
= (void *)reqp
->aio_req_resultp
;
1111 if (bp
->b_flags
& B_ERROR
) {
1113 error
= bp
->b_error
;
1116 retval
= (size_t)-1;
1119 retval
= iov
->iov_len
- bp
->b_resid
;
1121 #ifdef _SYSCALL32_IMPL
1122 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1123 (void) sulword(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1124 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, error
);
1126 (void) suword32(&((aio_result32_t
*)resultp
)->aio_return
,
1128 (void) suword32(&((aio_result32_t
*)resultp
)->aio_errno
, error
);
1131 (void) suword32(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1132 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, error
);
1138 aio_copyout_result_port(struct iovec
*iov
, struct buf
*bp
, void *resultp
)
1143 if (bp
->b_flags
& B_ERROR
) {
1145 errno
= bp
->b_error
;
1148 retval
= (size_t)-1;
1151 retval
= iov
->iov_len
- bp
->b_resid
;
1153 #ifdef _SYSCALL32_IMPL
1154 if (get_udatamodel() == DATAMODEL_NATIVE
) {
1155 (void) sulword(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1156 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, errno
);
1158 (void) suword32(&((aio_result32_t
*)resultp
)->aio_return
,
1160 (void) suword32(&((aio_result32_t
*)resultp
)->aio_errno
, errno
);
1163 (void) suword32(&((aio_result_t
*)resultp
)->aio_return
, retval
);
1164 (void) suword32(&((aio_result_t
*)resultp
)->aio_errno
, errno
);
1169 * This function is used to remove a request from the done queue.
1173 aio_req_remove_portq(aio_t
*aiop
, aio_req_t
*reqp
)
1175 ASSERT(MUTEX_HELD(&aiop
->aio_portq_mutex
));
1176 while (aiop
->aio_portq
== NULL
) {
1178 * aio_portq is set to NULL when aio_cleanup_portq()
1179 * is working with the event queue.
1180 * The aio_cleanup_thread() uses aio_cleanup_portq()
1181 * to unlock all AIO buffers with completed transactions.
1182 * Wait here until aio_cleanup_portq() restores the
1183 * list of completed transactions in aio_portq.
1185 cv_wait(&aiop
->aio_portcv
, &aiop
->aio_portq_mutex
);
1187 aio_deq(&aiop
->aio_portq
, reqp
);
1192 aio_close_port(void *arg
, int port
, pid_t pid
, int lastclose
)
1201 aiop
= curproc
->p_aio
;
1203 aiop
= (aio_t
*)arg
;
1206 * The PORT_SOURCE_AIO source is always associated with every new
1207 * created port by default.
1208 * If no asynchronous I/O transactions were associated with the port
1209 * then the aiop pointer will still be set to NULL.
1215 * Within a process event ports can be used to collect events other
1216 * than PORT_SOURCE_AIO events. At the same time the process can submit
1217 * asynchronous I/Os transactions which are not associated with the
1219 * The current process oriented model of AIO uses a sigle queue for
1220 * pending events. On close the pending queue (queue of asynchronous
1221 * I/O transactions using event port notification) must be scanned
1222 * to detect and handle pending I/Os using the current port.
1224 mutex_enter(&aiop
->aio_portq_mutex
);
1225 mutex_enter(&aiop
->aio_mutex
);
1227 if ((headp
= aiop
->aio_portpending
) != NULL
) {
1230 if (reqp
->aio_req_portkev
&&
1231 reqp
->aio_req_port
== port
) {
1232 reqp
->aio_req_flags
|= AIO_CLOSE_PORT
;
1235 } while ((reqp
= reqp
->aio_req_next
) != headp
);
1238 /* no AIOs pending */
1239 mutex_exit(&aiop
->aio_mutex
);
1240 mutex_exit(&aiop
->aio_portq_mutex
);
1243 aiop
->aio_portpendcnt
+= counter
;
1244 mutex_exit(&aiop
->aio_mutex
);
1245 while (aiop
->aio_portpendcnt
)
1246 cv_wait(&aiop
->aio_portcv
, &aiop
->aio_portq_mutex
);
1249 * all pending AIOs are completed.
1253 if ((reqp
= aiop
->aio_portq
) != NULL
) {
1255 next
= reqp
->aio_req_next
;
1256 if (reqp
->aio_req_port
== port
) {
1257 /* dequeue request and discard event */
1258 aio_req_remove_portq(aiop
, reqp
);
1259 port_free_event(reqp
->aio_req_portkev
);
1260 /* put request in temporary queue */
1261 reqp
->aio_req_next
= headp
;
1264 } while ((reqp
= next
) != aiop
->aio_portq
);
1266 mutex_exit(&aiop
->aio_portq_mutex
);
1268 /* headp points to the list of requests to be discarded */
1269 for (reqp
= headp
; reqp
!= NULL
; reqp
= next
) {
1270 next
= reqp
->aio_req_next
;
1271 aphysio_unlock(reqp
);
1272 mutex_enter(&aiop
->aio_mutex
);
1273 aio_req_free_port(aiop
, reqp
);
1274 mutex_exit(&aiop
->aio_mutex
);
1277 if (aiop
->aio_flags
& AIO_CLEANUP
)
1278 cv_broadcast(&aiop
->aio_waitcv
);
1282 * aio_cleanup_dr_delete_memory is used by dr's delete_memory_thread
1283 * to kick start the aio_cleanup_thread for the give process to do the
1284 * necessary cleanup.
1285 * This is needed so that delete_memory_thread can obtain writer locks
1286 * on pages that need to be relocated during a dr memory delete operation,
1287 * otherwise a deadly embrace may occur.
1290 aio_cleanup_dr_delete_memory(proc_t
*procp
)
1292 struct aio
*aiop
= procp
->p_aio
;
1293 struct as
*as
= procp
->p_as
;
1296 ASSERT(MUTEX_HELD(&procp
->p_lock
));
1298 mutex_enter(&as
->a_contents
);
1301 aiop
->aio_rqclnup
= 1;
1302 cv_broadcast(&as
->a_cv
);
1305 mutex_exit(&as
->a_contents
);