4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
26 #include <sys/types.h>
27 #include <sys/param.h>
28 #include <sys/cmn_err.h>
30 #include <sys/stropts.h>
31 #include <sys/strsun.h>
32 #include <sys/systm.h>
33 #include <sys/socketvar.h>
37 * In support of on-board asynchronous DMA hardware (e.g. Intel I/OAT)
38 * we use a consolidation private KAPI to allow the protocol to start
39 * an asynchronous copyout to a user-land receive-side buffer (uioa)
40 * when a blocking socket read (e.g. read, recv, ...) is pending.
42 * In some broad strokes, this is what happens. When recv is called,
43 * we first determine whether it would be beneficial to use uioa, and
44 * if so set up the required state (all done by sod_rcv_init()).
45 * The protocol can only initiate asynchronous copyout if the receive
46 * queue is empty, so the first thing we do is drain any previously
47 * queued data (using sod_uioa_so_init()). Once the copyouts (if any)
48 * have been scheduled we wait for the receive to be satisfied. During
49 * that time any new mblks that are enqueued will be scheduled to be
50 * copied out asynchronously (sod_uioa_mblk_init()). When the receive
51 * has been satisfied we wait for all scheduled copyout operations to
52 * complete before we return to the user (sod_rcv_done())
55 static struct kmem_cache
*sock_sod_cache
;
58 * This function is called at the beginning of recvmsg().
60 * If I/OAT is enabled on this sonode, initialize the uioa state machine
61 * with state UIOA_ALLOC.
64 sod_rcv_init(struct sonode
*so
, int flags
, struct uio
**uiopp
)
68 sodirect_t
*sodp
= so
->so_direct
;
76 mutex_enter(&so
->so_lock
);
77 if (uiop
->uio_resid
>= uioasync
.mincnt
&&
78 sodp
!= NULL
&& sodp
->sod_enabled
&&
79 uioasync
.enabled
&& !(flags
& MSG_PEEK
) &&
80 !so
->so_proto_props
.sopp_loopback
&& so
->so_filter_active
== 0 &&
81 !(so
->so_state
& SS_CANTRCVMORE
)) {
83 * Big enough I/O for uioa min setup and an sodirect socket
84 * and sodirect enabled and uioa enabled and I/O will be done
85 * and not EOF so initialize the sodirect_t uioa_t with "uiop".
87 if (!uioainit(uiop
, &sodp
->sod_uioa
)) {
89 * Successful uioainit() so the uio_t part of the
90 * uioa_t will be used for all uio_t work to follow,
91 * we return the original "uiop" in "suiop".
94 *uiopp
= (uio_t
*)&sodp
->sod_uioa
;
96 * Before returning to the caller the passed in uio_t
97 * "uiop" will be updated via a call to uioafini()
100 * Note, the uioa.uioa_state isn't set to UIOA_ENABLED
101 * here as first we have to uioamove() any currently
102 * queued M_DATA mblk_t(s) so it will be done later.
106 mutex_exit(&so
->so_lock
);
112 * This function is called at the end of recvmsg(), it finializes all the I/OAT
113 * operations, and reset the uioa state to UIOA_ALLOC.
116 sod_rcv_done(struct sonode
*so
, struct uio
*suiop
, struct uio
*uiop
)
119 sodirect_t
*sodp
= so
->so_direct
;
126 ASSERT(MUTEX_HELD(&so
->so_lock
));
127 /* Finish any sodirect and uioa processing */
129 /* Finish any uioa_t processing */
131 ASSERT(uiop
== (uio_t
*)&sodp
->sod_uioa
);
132 error
= uioafini(suiop
, (uioa_t
*)uiop
);
133 if ((mp
= sodp
->sod_uioafh
) != NULL
) {
134 sodp
->sod_uioafh
= NULL
;
135 sodp
->sod_uioaft
= NULL
;
139 ASSERT(sodp
->sod_uioafh
== NULL
);
145 * Schedule a uioamove() on a mblk. This is done as mblks are enqueued
146 * by the protocol on the socket's rcv queue.
148 * Caller must be holding so_lock.
151 sod_uioa_mblk_init(struct sodirect_s
*sodp
, mblk_t
*mp
, size_t msg_size
)
153 uioa_t
*uioap
= &sodp
->sod_uioa
;
157 ASSERT(DB_TYPE(mp
) == M_DATA
);
158 ASSERT(msg_size
== msgdsize(mp
));
160 if (uioap
->uioa_state
& UIOA_ENABLED
) {
161 /* Uioa is enabled */
163 if (msg_size
> uioap
->uio_resid
) {
165 * There isn't enough uio space for the mblk_t chain
166 * so disable uioa such that this and any additional
167 * mblk_t data is handled by the socket and schedule
168 * the socket for wakeup to finish this uioa.
170 uioap
->uioa_state
&= UIOA_CLR
;
171 uioap
->uioa_state
|= UIOA_FINI
;
175 uint32_t len
= MBLKL(mp1
);
177 if (!uioamove(mp1
->b_rptr
, len
, UIO_READ
, uioap
)) {
178 /* Scheduled, mark dblk_t as such */
179 DB_FLAGS(mp1
) |= DBLK_UIOA
;
181 /* Error, turn off async processing */
182 uioap
->uioa_state
&= UIOA_CLR
;
183 uioap
->uioa_state
|= UIOA_FINI
;
187 } while ((mp1
= mp1
->b_cont
) != NULL
);
189 if (mp1
!= NULL
|| uioap
->uio_resid
== 0) {
190 /* Break the mblk chain if neccessary. */
191 if (mp1
!= NULL
&& lmp
!= NULL
) {
200 * This function is called on a mblk that thas been successfully uioamoved().
203 sod_uioa_mblk_done(sodirect_t
*sodp
, mblk_t
*bp
)
205 if (bp
!= NULL
&& (bp
->b_datap
->db_flags
& DBLK_UIOA
)) {
207 * A uioa flaged mblk_t chain, already uio processed,
208 * add it to the sodirect uioa pending free list.
210 * Note, a b_cont chain headed by a DBLK_UIOA enable
211 * mblk_t must have all mblk_t(s) DBLK_UIOA enabled.
213 mblk_t
*bpt
= sodp
->sod_uioaft
;
215 ASSERT(sodp
!= NULL
);
218 * Add first mblk_t of "bp" chain to current sodirect uioa
219 * free list tail mblk_t, if any, else empty list so new head.
222 sodp
->sod_uioafh
= bp
;
227 * Walk mblk_t "bp" chain to find tail and adjust rptr of
228 * each to reflect that uioamove() has consumed all data.
232 ASSERT(bpt
->b_datap
->db_flags
& DBLK_UIOA
);
234 bpt
->b_rptr
= bpt
->b_wptr
;
235 if (bpt
->b_cont
== NULL
)
239 /* New sodirect uioa free list tail */
240 sodp
->sod_uioaft
= bpt
;
242 /* Only dequeue once with data returned per uioa_t */
243 if (sodp
->sod_uioa
.uioa_state
& UIOA_ENABLED
) {
244 sodp
->sod_uioa
.uioa_state
&= UIOA_CLR
;
245 sodp
->sod_uioa
.uioa_state
|= UIOA_FINI
;
251 * When transit from UIOA_INIT state to UIOA_ENABLE state in recvmsg(), call
252 * this function on a non-STREAMS socket to schedule uioamove() on the data
253 * that has already queued in this socket.
256 sod_uioa_so_init(struct sonode
*so
, struct sodirect_s
*sodp
, struct uio
*uiop
)
258 uioa_t
*uioap
= (uioa_t
*)uiop
;
264 boolean_t in_rcv_q
= B_TRUE
;
266 ASSERT(MUTEX_HELD(&so
->so_lock
));
267 ASSERT(&sodp
->sod_uioa
== uioap
);
270 * Walk first b_cont chain in sod_q
271 * and schedule any M_DATA mblk_t's for uio asynchronous move.
273 bp
= so
->so_rcv_q_head
;
284 if (wbp
->b_datap
->db_type
!= M_DATA
) {
285 /* Not M_DATA, no more uioa */
288 if ((len
= wbp
->b_wptr
- wbp
->b_rptr
) > 0) {
289 /* Have a M_DATA mblk_t with data */
290 if (len
> uioap
->uio_resid
|| (so
->so_oobmark
> 0 &&
291 len
+ uioap
->uioa_mbytes
>= so
->so_oobmark
)) {
292 /* Not enough uio sapce, or beyond oobmark */
295 ASSERT(!(wbp
->b_datap
->db_flags
& DBLK_UIOA
));
296 error
= uioamove(wbp
->b_rptr
, len
,
299 /* Scheduled, mark dblk_t as such */
300 wbp
->b_datap
->db_flags
|= DBLK_UIOA
;
302 /* Break the mblk chain */
306 /* Save last wbp processed */
308 } while ((wbp
= wbp
->b_cont
) != NULL
);
310 if (in_rcv_q
&& (bp
== NULL
|| bp
->b_next
== NULL
)) {
312 * We get here only once to process the sonode dump area
313 * if so_rcv_q_head is NULL or all the mblks have been
314 * successfully uioamoved()ed.
318 /* move to dump area */
319 bp
= so
->so_rcv_head
;
327 uioap
->uioa_state
&= UIOA_CLR
;
328 uioap
->uioa_state
|= UIOA_FINI
;
331 * If we processed 1 or more mblk_t(s) then we need to split the
332 * current mblk_t chain in 2 so that all the uioamove()ed mblk_t(s)
333 * are in the current chain and the rest are in the following new
337 /* New end of current chain */
340 /* Insert new chain wbp after bp */
341 if ((wbp
->b_next
= bp
->b_next
) == NULL
) {
343 so
->so_rcv_q_last_head
= wbp
;
345 so
->so_rcv_last_head
= wbp
;
348 bp
->b_next
->b_prev
= bp
->b_prev
;
354 * Initialize sodirect data structures on a socket.
357 sod_sock_init(struct sonode
*so
)
361 ASSERT(so
->so_direct
== NULL
);
363 so
->so_state
|= SS_SODIRECT
;
365 sodp
= kmem_cache_alloc(sock_sod_cache
, KM_SLEEP
);
366 sodp
->sod_enabled
= B_TRUE
;
367 sodp
->sod_uioafh
= NULL
;
368 sodp
->sod_uioaft
= NULL
;
370 * Remainder of the sod_uioa members are left uninitialized
371 * but will be initialized later by uioainit() before uioa
374 sodp
->sod_uioa
.uioa_state
= UIOA_ALLOC
;
375 so
->so_direct
= sodp
;
379 sod_sock_fini(struct sonode
*so
)
381 sodirect_t
*sodp
= so
->so_direct
;
383 ASSERT(sodp
->sod_uioafh
== NULL
);
385 so
->so_direct
= NULL
;
386 kmem_cache_free(sock_sod_cache
, sodp
);
390 * Init the sodirect kmem cache while sockfs is loading.
395 /* Allocate sodirect_t kmem_cache */
396 sock_sod_cache
= kmem_cache_create("sock_sod_cache",
397 sizeof (sodirect_t
), 0, NULL
, NULL
, NULL
, NULL
, NULL
, 0);
403 sod_uioa_mblk(struct sonode
*so
, mblk_t
*mp
)
405 sodirect_t
*sodp
= so
->so_direct
;
407 ASSERT(sodp
!= NULL
);
408 ASSERT(MUTEX_HELD(&so
->so_lock
));
410 ASSERT(sodp
->sod_enabled
);
411 ASSERT(sodp
->sod_uioa
.uioa_state
!= (UIOA_ALLOC
|UIOA_INIT
));
413 ASSERT(sodp
->sod_uioa
.uioa_state
& (UIOA_ENABLED
|UIOA_FINI
));
415 if (mp
== NULL
&& so
->so_rcv_q_head
!= NULL
) {
416 mp
= so
->so_rcv_q_head
;
417 ASSERT(mp
->b_prev
!= NULL
);
419 so
->so_rcv_q_head
= mp
->b_next
;
420 if (so
->so_rcv_q_head
== NULL
) {
421 so
->so_rcv_q_last_head
= NULL
;
426 sod_uioa_mblk_done(sodp
, mp
);
428 if (so
->so_rcv_q_head
== NULL
&& so
->so_rcv_head
!= NULL
&&
429 DB_TYPE(so
->so_rcv_head
) == M_DATA
&&
430 (DB_FLAGS(so
->so_rcv_head
) & DBLK_UIOA
)) {
432 ASSERT(so
->so_rcv_q_head
== NULL
);
433 mp
= so
->so_rcv_head
;
434 so
->so_rcv_head
= mp
->b_next
;
435 if (so
->so_rcv_head
== NULL
)
436 so
->so_rcv_last_head
= NULL
;
437 mp
->b_prev
= mp
->b_next
= NULL
;
438 sod_uioa_mblk_done(sodp
, mp
);
442 if (so
->so_rcv_q_head
!= NULL
) {
443 mblk_t
*m
= so
->so_rcv_q_head
;
445 if (DB_FLAGS(m
) & DBLK_UIOA
) {
446 cmn_err(CE_PANIC
, "Unexpected I/OAT mblk %p"
447 " in so_rcv_q_head.\n", (void *)m
);
452 if (so
->so_rcv_head
!= NULL
) {
453 mblk_t
*m
= so
->so_rcv_head
;
455 if (DB_FLAGS(m
) & DBLK_UIOA
) {
456 cmn_err(CE_PANIC
, "Unexpected I/OAT mblk %p"
457 " in so_rcv_head.\n", (void *)m
);
463 return (sodp
->sod_uioa
.uioa_mbytes
);