1 /* $NetBSD: kttcp.c,v 1.27 2008/03/27 19:06:51 ad Exp $ */
4 * Copyright (c) 2002 Wasabi Systems, Inc.
7 * Written by Frank van der Linden and Jason R. Thorpe for
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed for the NetBSD Project by
21 * Wasabi Systems, Inc.
22 * 4. The name of Wasabi Systems, Inc. may not be used to endorse
23 * or promote products derived from this software without specific prior
26 * THIS SOFTWARE IS PROVIDED BY WASABI SYSTEMS, INC. ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
28 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
29 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL WASABI SYSTEMS, INC
30 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
31 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
32 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
33 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
34 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
35 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
36 * POSSIBILITY OF SUCH DAMAGE.
40 * kttcp.c -- provides kernel support for testing network testing,
44 #include <sys/cdefs.h>
45 __KERNEL_RCSID(0, "$NetBSD: kttcp.c,v 1.27 2008/03/27 19:06:51 ad Exp $");
47 #include <sys/param.h>
48 #include <sys/types.h>
49 #include <sys/ioctl.h>
51 #include <sys/filedesc.h>
53 #include <sys/systm.h>
54 #include <sys/protosw.h>
56 #include <sys/resourcevar.h>
57 #include <sys/signal.h>
58 #include <sys/socketvar.h>
59 #include <sys/socket.h>
61 #include <sys/mount.h>
62 #include <sys/syscallargs.h>
64 #include <dev/kttcpio.h>
66 static int kttcp_send(struct lwp
*l
, struct kttcp_io_args
*);
67 static int kttcp_recv(struct lwp
*l
, struct kttcp_io_args
*);
68 static int kttcp_sosend(struct socket
*, unsigned long long,
69 unsigned long long *, struct lwp
*, int);
70 static int kttcp_soreceive(struct socket
*, unsigned long long,
71 unsigned long long *, struct lwp
*, int *);
73 void kttcpattach(int);
75 dev_type_ioctl(kttcpioctl
);
77 const struct cdevsw kttcp_cdevsw
= {
78 nullopen
, nullclose
, noread
, nowrite
, kttcpioctl
,
79 nostop
, notty
, nopoll
, nommap
, nokqfilter
, D_OTHER
83 kttcpattach(int count
)
89 kttcpioctl(dev_t dev
, u_long cmd
, void *data
, int flag
,
94 if ((flag
& FWRITE
) == 0)
99 error
= kttcp_send(l
, (struct kttcp_io_args
*) data
);
103 error
= kttcp_recv(l
, (struct kttcp_io_args
*) data
);
114 kttcp_send(struct lwp
*l
, struct kttcp_io_args
*kio
)
118 struct timeval t0
, t1
;
119 unsigned long long len
, done
;
121 if (kio
->kio_totalsize
>= KTTCP_MAX_XMIT
)
124 if ((error
= fd_getsock(kio
->kio_socket
, &so
)) != 0)
127 len
= kio
->kio_totalsize
;
130 error
= kttcp_sosend(so
, len
, &done
, l
, 0);
132 } while (error
== 0 && len
> 0);
134 fd_putfile(kio
->kio_socket
);
139 timersub(&t1
, &t0
, &kio
->kio_elapsed
);
141 kio
->kio_bytesdone
= kio
->kio_totalsize
- len
;
147 kttcp_recv(struct lwp
*l
, struct kttcp_io_args
*kio
)
151 struct timeval t0
, t1
;
152 unsigned long long len
, done
;
154 done
= 0; /* XXX gcc */
156 if (kio
->kio_totalsize
> KTTCP_MAX_XMIT
)
159 if ((error
= fd_getsock(kio
->kio_socket
, &so
)) != 0)
161 len
= kio
->kio_totalsize
;
164 error
= kttcp_soreceive(so
, len
, &done
, l
, NULL
);
166 } while (error
== 0 && len
> 0 && done
> 0);
168 fd_putfile(kio
->kio_socket
);
175 timersub(&t1
, &t0
, &kio
->kio_elapsed
);
177 kio
->kio_bytesdone
= kio
->kio_totalsize
- len
;
182 #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? M_NOWAIT : M_WAITOK)
185 * Slightly changed version of sosend()
188 kttcp_sosend(struct socket
*so
, unsigned long long slen
,
189 unsigned long long *done
, struct lwp
*l
, int flags
)
191 struct mbuf
**mp
, *m
, *top
;
192 long space
, len
, mlen
;
193 int error
, dontroute
, atomic
;
196 atomic
= sosendallatonce(so
);
200 * In theory resid should be unsigned.
201 * However, space must be signed, as it might be less than 0
202 * if we over-committed, and we must use a signed comparison
203 * of space and resid. On the other hand, a negative resid
204 * causes us to loop sending 0-length segments to the protocol.
211 (flags
& MSG_DONTROUTE
) && (so
->so_options
& SO_DONTROUTE
) == 0 &&
212 (so
->so_proto
->pr_flags
& PR_ATOMIC
);
214 #define snderr(errno) { error = errno; goto release; }
217 if ((error
= sblock(&so
->so_snd
, SBLOCKWAIT(flags
))) != 0)
220 if (so
->so_state
& SS_CANTSENDMORE
)
223 error
= so
->so_error
;
227 if ((so
->so_state
& SS_ISCONNECTED
) == 0) {
228 if (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
) {
229 if ((so
->so_state
& SS_ISCONFIRMING
) == 0)
232 snderr(EDESTADDRREQ
);
234 space
= sbspace(&so
->so_snd
);
237 if ((atomic
&& resid
> so
->so_snd
.sb_hiwat
))
239 if (space
< resid
&& (atomic
|| space
< so
->so_snd
.sb_lowat
)) {
242 SBLASTRECORDCHK(&so
->so_rcv
,
243 "kttcp_soreceive sbwait 1");
244 SBLASTMBUFCHK(&so
->so_rcv
,
245 "kttcp_soreceive sbwait 1");
246 sbunlock(&so
->so_snd
);
247 error
= sbwait(&so
->so_snd
);
257 m
= m_gethdr(M_WAIT
, MT_DATA
);
260 m
->m_pkthdr
.rcvif
= NULL
;
262 m
= m_get(M_WAIT
, MT_DATA
);
265 if (resid
>= MINCLSIZE
&& space
>= MCLBYTES
) {
267 if ((m
->m_flags
& M_EXT
) == 0)
271 len
= lmin(MCLBYTES
, resid
);
273 if (atomic
&& top
== 0) {
274 len
= lmin(MCLBYTES
- max_hdr
,
276 m
->m_data
+= max_hdr
;
278 len
= lmin(MCLBYTES
, resid
);
283 len
= lmin(lmin(mlen
, resid
), space
);
286 * For datagram protocols, leave room
287 * for protocol headers in first mbuf.
289 if (atomic
&& top
== 0 && len
< mlen
)
295 top
->m_pkthdr
.len
+= len
;
301 top
->m_flags
|= M_EOR
;
304 } while (space
> 0 && atomic
);
307 if (so
->so_state
& SS_CANTSENDMORE
)
310 so
->so_options
|= SO_DONTROUTE
;
312 so
->so_state
|= SS_MORETOCOME
;
313 error
= (*so
->so_proto
->pr_usrreq
)(so
,
314 (flags
& MSG_OOB
) ? PRU_SENDOOB
: PRU_SEND
,
317 so
->so_options
&= ~SO_DONTROUTE
;
319 so
->so_state
&= ~SS_MORETOCOME
;
324 } while (resid
&& space
> 0);
328 sbunlock(&so
->so_snd
);
333 *done
= slen
- resid
;
335 printf("sosend: error %d slen %llu resid %lld\n", error
, slen
, resid
);
341 kttcp_soreceive(struct socket
*so
, unsigned long long slen
,
342 unsigned long long *done
, struct lwp
*l
, int *flagsp
)
344 struct mbuf
*m
, **mp
;
345 int flags
, len
, error
, offset
, moff
, type
;
346 long long orig_resid
, resid
;
347 const struct protosw
*pr
;
348 struct mbuf
*nextrecord
;
353 resid
= orig_resid
= slen
;
355 flags
= *flagsp
&~ MSG_EOR
;
358 if (flags
& MSG_OOB
) {
359 m
= m_get(M_WAIT
, MT_DATA
);
361 error
= (*pr
->pr_usrreq
)(so
, PRU_RCVOOB
, m
,
362 (struct mbuf
*)(long)(flags
& MSG_PEEK
), NULL
, NULL
);
367 resid
-= min(resid
, m
->m_len
);
369 } while (resid
&& error
== 0 && m
);
378 if (so
->so_state
& SS_ISCONFIRMING
&& resid
)
379 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
, NULL
, NULL
, NULL
);
381 if ((error
= sblock(&so
->so_rcv
, SBLOCKWAIT(flags
))) != 0)
383 m
= so
->so_rcv
.sb_mb
;
385 * If we have less data than requested, block awaiting more
386 * (subject to any timeout) if:
387 * 1. the current count is less than the low water mark,
388 * 2. MSG_WAITALL is set, and it is possible to do the entire
389 * receive operation at once if we block (resid <= hiwat), or
390 * 3. MSG_DONTWAIT is not set.
391 * If MSG_WAITALL is set but resid is larger than the receive buffer,
392 * we have to do the receive in sections, and thus risk returning
393 * a short count if a timeout or signal occurs after we start.
395 if (m
== NULL
|| (((flags
& MSG_DONTWAIT
) == 0 &&
396 so
->so_rcv
.sb_cc
< resid
) &&
397 (so
->so_rcv
.sb_cc
< so
->so_rcv
.sb_lowat
||
398 ((flags
& MSG_WAITALL
) && resid
<= so
->so_rcv
.sb_hiwat
)) &&
399 m
->m_nextpkt
== NULL
&& (pr
->pr_flags
& PR_ATOMIC
) == 0)) {
401 if (m
== NULL
&& so
->so_rcv
.sb_cc
)
407 error
= so
->so_error
;
408 if ((flags
& MSG_PEEK
) == 0)
412 if (so
->so_state
& SS_CANTRCVMORE
) {
418 for (; m
; m
= m
->m_next
)
419 if (m
->m_type
== MT_OOBDATA
|| (m
->m_flags
& M_EOR
)) {
420 m
= so
->so_rcv
.sb_mb
;
423 if ((so
->so_state
& (SS_ISCONNECTED
|SS_ISCONNECTING
)) == 0 &&
424 (so
->so_proto
->pr_flags
& PR_CONNREQUIRED
)) {
430 if (so
->so_nbio
|| (flags
& MSG_DONTWAIT
)) {
434 sbunlock(&so
->so_rcv
);
435 error
= sbwait(&so
->so_rcv
);
444 * On entry here, m points to the first record of the socket buffer.
445 * While we process the initial mbufs containing address and control
446 * info, we save a copy of m->m_nextpkt into nextrecord.
448 #ifdef notyet /* XXXX */
450 uio
->uio_lwp
->l_ru
.ru_msgrcv
++;
452 KASSERT(m
== so
->so_rcv
.sb_mb
);
453 SBLASTRECORDCHK(&so
->so_rcv
, "kttcp_soreceive 1");
454 SBLASTMBUFCHK(&so
->so_rcv
, "kttcp_soreceive 1");
455 nextrecord
= m
->m_nextpkt
;
456 if (pr
->pr_flags
& PR_ADDR
) {
458 if (m
->m_type
!= MT_SONAME
)
462 if (flags
& MSG_PEEK
) {
465 sbfree(&so
->so_rcv
, m
);
466 MFREE(m
, so
->so_rcv
.sb_mb
);
467 m
= so
->so_rcv
.sb_mb
;
470 while (m
&& m
->m_type
== MT_CONTROL
&& error
== 0) {
471 if (flags
& MSG_PEEK
) {
474 sbfree(&so
->so_rcv
, m
);
475 MFREE(m
, so
->so_rcv
.sb_mb
);
476 m
= so
->so_rcv
.sb_mb
;
481 * If m is non-NULL, we have some data to read. From now on,
482 * make sure to keep sb_lastrecord consistent when working on
483 * the last packet on the chain (nextrecord == NULL) and we
484 * change m->m_nextpkt.
487 if ((flags
& MSG_PEEK
) == 0) {
488 m
->m_nextpkt
= nextrecord
;
490 * If nextrecord == NULL (this is a single chain),
491 * then sb_lastrecord may not be valid here if m
492 * was changed earlier.
494 if (nextrecord
== NULL
) {
495 KASSERT(so
->so_rcv
.sb_mb
== m
);
496 so
->so_rcv
.sb_lastrecord
= m
;
500 if (type
== MT_OOBDATA
)
503 if ((flags
& MSG_PEEK
) == 0) {
504 KASSERT(so
->so_rcv
.sb_mb
== m
);
505 so
->so_rcv
.sb_mb
= nextrecord
;
506 SB_EMPTY_FIXUP(&so
->so_rcv
);
509 SBLASTRECORDCHK(&so
->so_rcv
, "kttcp_soreceive 2");
510 SBLASTMBUFCHK(&so
->so_rcv
, "kttcp_soreceive 2");
514 while (m
&& resid
> 0 && error
== 0) {
515 if (m
->m_type
== MT_OOBDATA
) {
516 if (type
!= MT_OOBDATA
)
518 } else if (type
== MT_OOBDATA
)
521 else if (m
->m_type
!= MT_DATA
&& m
->m_type
!= MT_HEADER
)
524 so
->so_state
&= ~SS_RCVATMARK
;
526 if (so
->so_oobmark
&& len
> so
->so_oobmark
- offset
)
527 len
= so
->so_oobmark
- offset
;
528 if (len
> m
->m_len
- moff
)
529 len
= m
->m_len
- moff
;
531 * If mp is set, just pass back the mbufs.
532 * Otherwise copy them out via the uio, then free.
533 * Sockbuf must be consistent here (points to current mbuf,
534 * it points to next record) when we drop priority;
535 * we must note any additions to the sockbuf when we
536 * block interrupts again.
539 if (len
== m
->m_len
- moff
) {
540 if (m
->m_flags
& M_EOR
)
542 if (flags
& MSG_PEEK
) {
546 nextrecord
= m
->m_nextpkt
;
547 sbfree(&so
->so_rcv
, m
);
551 so
->so_rcv
.sb_mb
= m
= m
->m_next
;
554 MFREE(m
, so
->so_rcv
.sb_mb
);
555 m
= so
->so_rcv
.sb_mb
;
558 * If m != NULL, we also know that
559 * so->so_rcv.sb_mb != NULL.
561 KASSERT(so
->so_rcv
.sb_mb
== m
);
563 m
->m_nextpkt
= nextrecord
;
564 if (nextrecord
== NULL
)
565 so
->so_rcv
.sb_lastrecord
= m
;
567 so
->so_rcv
.sb_mb
= nextrecord
;
568 SB_EMPTY_FIXUP(&so
->so_rcv
);
570 SBLASTRECORDCHK(&so
->so_rcv
,
571 "kttcp_soreceive 3");
572 SBLASTMBUFCHK(&so
->so_rcv
,
573 "kttcp_soreceive 3");
576 if (flags
& MSG_PEEK
)
581 *mp
= m_copym(m
, 0, len
, M_WAIT
);
586 so
->so_rcv
.sb_cc
-= len
;
589 if (so
->so_oobmark
) {
590 if ((flags
& MSG_PEEK
) == 0) {
591 so
->so_oobmark
-= len
;
592 if (so
->so_oobmark
== 0) {
593 so
->so_state
|= SS_RCVATMARK
;
598 if (offset
== so
->so_oobmark
)
605 * If the MSG_WAITALL flag is set (for non-atomic socket),
606 * we must not quit until "uio->uio_resid == 0" or an error
607 * termination. If a signal/timeout occurs, return
608 * with a short count but without error.
609 * Keep sockbuf locked against other readers.
611 while (flags
& MSG_WAITALL
&& m
== NULL
&& resid
> 0 &&
612 !sosendallatonce(so
) && !nextrecord
) {
613 if (so
->so_error
|| so
->so_state
& SS_CANTRCVMORE
)
616 * If we are peeking and the socket receive buffer is
617 * full, stop since we can't get more data to peek at.
619 if ((flags
& MSG_PEEK
) && sbspace(&so
->so_rcv
) <= 0)
622 * If we've drained the socket buffer, tell the
623 * protocol in case it needs to do something to
624 * get it filled again.
626 if ((pr
->pr_flags
& PR_WANTRCVD
) && so
->so_pcb
)
627 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
,
628 (struct mbuf
*)(long)flags
, NULL
, NULL
);
629 SBLASTRECORDCHK(&so
->so_rcv
,
630 "kttcp_soreceive sbwait 2");
631 SBLASTMBUFCHK(&so
->so_rcv
,
632 "kttcp_soreceive sbwait 2");
633 error
= sbwait(&so
->so_rcv
);
635 sbunlock(&so
->so_rcv
);
639 if ((m
= so
->so_rcv
.sb_mb
) != NULL
)
640 nextrecord
= m
->m_nextpkt
;
644 if (m
&& pr
->pr_flags
& PR_ATOMIC
) {
646 if ((flags
& MSG_PEEK
) == 0)
647 (void) sbdroprecord(&so
->so_rcv
);
649 if ((flags
& MSG_PEEK
) == 0) {
652 * First part is an SB_EMPTY_FIXUP(). Second part
653 * makes sure sb_lastrecord is up-to-date if
654 * there is still data in the socket buffer.
656 so
->so_rcv
.sb_mb
= nextrecord
;
657 if (so
->so_rcv
.sb_mb
== NULL
) {
658 so
->so_rcv
.sb_mbtail
= NULL
;
659 so
->so_rcv
.sb_lastrecord
= NULL
;
660 } else if (nextrecord
->m_nextpkt
== NULL
)
661 so
->so_rcv
.sb_lastrecord
= nextrecord
;
663 SBLASTRECORDCHK(&so
->so_rcv
, "kttcp_soreceive 4");
664 SBLASTMBUFCHK(&so
->so_rcv
, "kttcp_soreceive 4");
665 if (pr
->pr_flags
& PR_WANTRCVD
&& so
->so_pcb
)
666 (*pr
->pr_usrreq
)(so
, PRU_RCVD
, NULL
,
667 (struct mbuf
*)(long)flags
, NULL
, NULL
);
669 if (orig_resid
== resid
&& orig_resid
&&
670 (flags
& MSG_EOR
) == 0 && (so
->so_state
& SS_CANTRCVMORE
) == 0) {
671 sbunlock(&so
->so_rcv
);
678 sbunlock(&so
->so_rcv
);
680 *done
= slen
- resid
;
682 printf("soreceive: error %d slen %llu resid %lld\n", error
, slen
, resid
);