arch/cpu.resource: remove dead code
[AROS.git] / workbench / network / stacks / AROSTCP / bsdsocket / netinet / tcp_usrreq.c
blobd0d93b2c209ebeb8d3d9d48c24cc51c42ca7eabf
1 /*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
33 * From: @(#)tcp_usrreq.c 8.2 (Berkeley) 1/3/94
34 * $Id$
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/malloc.h>
41 #include <sys/mbuf.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/protosw.h>
45 #include <sys/errno.h>
46 #include <sys/stat.h>
47 #include <sys/queue.h>
48 #include <sys/synch.h>
49 #ifdef ENABLE_SYSCTL
50 #include <sys/sysctl.h>
51 #endif
53 #include <net/route.h>
54 #include <net/if.h>
56 #include <netinet/in.h>
57 #include <netinet/in_systm.h>
58 #include <netinet/ip.h>
59 #include <netinet/in_pcb.h>
60 #include <netinet/in_var.h>
61 #include <netinet/ip_var.h>
62 #include <netinet/tcp.h>
63 #include <netinet/tcp_fsm.h>
64 #include <netinet/tcp_seq.h>
65 #include <netinet/tcp_timer.h>
66 #include <netinet/tcp_var.h>
67 #include <netinet/tcpip.h>
68 #ifdef TCPDEBUG
69 #include <netinet/tcp_debug.h>
70 #endif
73 * TCP protocol interface to socket abstraction.
75 extern char *tcpstates[];
78 * Process a TCP user request for TCP tb. If this is a send request
79 * then m is the mbuf chain of send data. If this is a timer expiration
80 * (called from the software clock routine), then timertype tells which timer.
82 /*ARGSUSED*/
83 int
84 tcp_usrreq(so, req, m, nam, control)
85 struct socket *so;
86 int req;
87 struct mbuf *m, *nam, *control;
89 register struct inpcb *inp;
90 register struct tcpcb *tp = 0;
91 struct sockaddr_in *sinp;
92 int s;
93 int error = 0;
94 #ifdef TCPDEBUG
95 int ostate;
96 #endif
98 if (req == PRU_CONTROL)
99 return (in_control(so, (long)m, (caddr_t)nam,
100 (struct ifnet *)control));
101 if (control && control->m_len) {
102 m_freem(control);
103 if (m)
104 m_freem(m);
105 return (EINVAL);
108 s = splnet();
109 inp = sotoinpcb(so);
111 * When a TCP is attached to a socket, then there will be
112 * a (struct inpcb) pointed at by the socket, and this
113 * structure will point at a subsidary (struct tcpcb).
115 if (inp == 0 && req != PRU_ATTACH) {
116 splx(s);
117 return (EINVAL); /* XXX */
119 if (inp) {
120 tp = intotcpcb(inp);
121 /* WHAT IF TP IS 0? */
122 #ifdef KPROF
123 tcp_acounts[tp->t_state][req]++;
124 #endif
125 #ifdef TCPDEBUG
126 ostate = tp->t_state;
127 } else
128 ostate = 0;
129 #else /* TCPDEBUG */
131 #endif /* TCPDEBUG */
133 switch (req) {
136 * TCP attaches to socket via PRU_ATTACH, reserving space,
137 * and an internet control block.
139 case PRU_ATTACH:
140 if (inp) {
141 error = EISCONN;
142 break;
144 error = tcp_attach(so);
145 if (error)
146 break;
147 if ((so->so_options & SO_LINGER) && so->so_linger.tv_sec == 0)
148 so->so_linger.tv_sec = TCP_LINGERTIME;
149 tp = sototcpcb(so);
150 break;
153 * PRU_DETACH detaches the TCP protocol from the socket.
154 * If the protocol state is non-embryonic, then can't
155 * do this directly: have to initiate a PRU_DISCONNECT,
156 * which may finish later; embryonic TCB's can just
157 * be discarded here.
159 case PRU_DETACH:
160 if (tp->t_state > TCPS_LISTEN)
161 tp = tcp_disconnect(tp);
162 else
163 tp = tcp_close(tp);
164 break;
167 * Give the socket an address.
169 case PRU_BIND:
171 * Must check for multicast addresses and disallow binding
172 * to them.
174 sinp = mtod(nam, struct sockaddr_in *);
175 if (sinp->sin_family == AF_INET &&
176 IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
177 error = EAFNOSUPPORT;
178 break;
180 error = in_pcbbind(inp, nam);
181 if (error)
182 break;
183 break;
186 * Prepare to accept connections.
188 case PRU_LISTEN:
189 if (inp->inp_lport == 0)
190 error = in_pcbbind(inp, NULL);
191 if (error == 0)
192 tp->t_state = TCPS_LISTEN;
193 break;
196 * Initiate connection to peer.
197 * Create a template for use in transmissions on this connection.
198 * Enter SYN_SENT state, and mark socket as connecting.
199 * Start keep-alive timer, and seed output sequence space.
200 * Send initial segment on connection.
202 case PRU_CONNECT:
204 * Must disallow TCP ``connections'' to multicast addresses.
206 sinp = mtod(nam, struct sockaddr_in *);
207 if (sinp->sin_family == AF_INET
208 && IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
209 error = EAFNOSUPPORT;
210 break;
213 if ((error = tcp_connect(tp, nam)) != 0)
214 break;
215 error = tcp_output(tp);
216 break;
219 * Create a TCP connection between two sockets.
221 case PRU_CONNECT2:
222 error = EOPNOTSUPP;
223 break;
226 * Initiate disconnect from peer.
227 * If connection never passed embryonic stage, just drop;
228 * else if don't need to let data drain, then can just drop anyways,
229 * else have to begin TCP shutdown process: mark socket disconnecting,
230 * drain unread data, state switch to reflect user close, and
231 * send segment (e.g. FIN) to peer. Socket will be really disconnected
232 * when peer sends FIN and acks ours.
234 * SHOULD IMPLEMENT LATER PRU_CONNECT VIA REALLOC TCPCB.
236 case PRU_DISCONNECT:
237 tp = tcp_disconnect(tp);
238 break;
241 * Accept a connection. Essentially all the work is
242 * done at higher levels; just return the address
243 * of the peer, storing through addr.
245 case PRU_ACCEPT:
246 in_setpeeraddr(inp, nam);
247 break;
250 * Mark the connection as being incapable of further output.
252 case PRU_SHUTDOWN:
253 socantsendmore(so);
254 tp = tcp_usrclosed(tp);
255 if (tp)
256 error = tcp_output(tp);
257 break;
260 * After a receive, possibly send window update to peer.
262 case PRU_RCVD:
263 (void) tcp_output(tp);
264 break;
267 * Do a send by putting data in output queue and updating urgent
268 * marker if URG set. Possibly send more data.
270 case PRU_SEND_EOF:
271 case PRU_SEND:
272 sbappend(&so->so_snd, m);
273 if (nam && tp->t_state < TCPS_SYN_SENT) {
275 * Do implied connect if not yet connected,
276 * initialize window to default value, and
277 * initialize maxseg/maxopd using peer's cached
278 * MSS.
280 error = tcp_connect(tp, nam);
281 if (error)
282 break;
283 tp->snd_wnd = TTCP_CLIENT_SND_WND;
284 tcp_mss(tp, -1);
287 if (req == PRU_SEND_EOF) {
289 * Close the send side of the connection after
290 * the data is sent.
292 socantsendmore(so);
293 tp = tcp_usrclosed(tp);
295 if (tp != NULL)
296 error = tcp_output(tp);
297 break;
300 * Abort the TCP.
302 case PRU_ABORT:
303 tp = tcp_drop(tp, ECONNABORTED);
304 break;
306 case PRU_SENSE:
307 ((struct stat *) m)->st_blksize = so->so_snd.sb_hiwat;
308 (void) splx(s);
309 return (0);
311 case PRU_RCVOOB:
312 if ((so->so_oobmark == 0 &&
313 (so->so_state & SS_RCVATMARK) == 0) ||
314 so->so_options & SO_OOBINLINE ||
315 tp->t_oobflags & TCPOOB_HADDATA) {
316 error = EINVAL;
317 break;
319 if ((tp->t_oobflags & TCPOOB_HAVEDATA) == 0) {
320 error = EWOULDBLOCK;
321 break;
323 m->m_len = 1;
324 *mtod(m, caddr_t) = tp->t_iobc;
325 if (((long)nam & MSG_PEEK) == 0)
326 tp->t_oobflags ^= (TCPOOB_HAVEDATA | TCPOOB_HADDATA);
327 break;
329 case PRU_SENDOOB:
330 if (sbspace(&so->so_snd) < -512) {
331 m_freem(m);
332 error = ENOBUFS;
333 break;
336 * According to RFC961 (Assigned Protocols),
337 * the urgent pointer points to the last octet
338 * of urgent data. We continue, however,
339 * to consider it to indicate the first octet
340 * of data past the urgent section.
341 * Otherwise, snd_up should be one lower.
343 sbappend(&so->so_snd, m);
344 tp->snd_up = tp->snd_una + so->so_snd.sb_cc;
345 tp->t_force = 1;
346 error = tcp_output(tp);
347 tp->t_force = 0;
348 break;
350 case PRU_SOCKADDR:
351 in_setsockaddr(inp, nam);
352 break;
354 case PRU_PEERADDR:
355 in_setpeeraddr(inp, nam);
356 break;
359 * TCP slow timer went off; going through this
360 * routine for tracing's sake.
362 case PRU_SLOWTIMO:
363 tp = tcp_timers(tp, (long)nam);
364 #ifdef TCPDEBUG
365 req |= (int)nam << 8; /* for debug's sake */
366 #endif
367 break;
369 default:
370 panic("tcp_usrreq");
372 #ifdef TCPDEBUG
373 if (tp && (so->so_options & SO_DEBUG))
374 tcp_trace(TA_USER, ostate, tp, (struct tcpiphdr *)0, req);
375 #endif
376 splx(s);
377 return (error);
381 * Common subroutine to open a TCP connection to remote host specified
382 * by struct sockaddr_in in mbuf *nam. Call in_pcbbind to assign a local
383 * port number if needed. Call in_pcbladdr to do the routing and to choose
384 * a local host address (interface). If there is an existing incarnation
385 * of the same connection in TIME-WAIT state and if the remote host was
386 * sending CC options and if the connection duration was < MSL, then
387 * truncate the previous TIME-WAIT state and proceed.
388 * Initialize connection parameters and enter SYN-SENT state.
391 tcp_connect(tp, nam)
392 register struct tcpcb *tp;
393 struct mbuf *nam;
395 struct inpcb *inp = tp->t_inpcb, *oinp;
396 struct socket *so = inp->inp_socket;
397 struct tcpcb *otp;
398 struct sockaddr_in *sin = mtod(nam, struct sockaddr_in *);
399 struct sockaddr_in *ifaddr;
400 int error;
402 if (inp->inp_lport == 0) {
403 error = in_pcbbind(inp, NULL);
404 if (error)
405 return error;
409 * Cannot simply call in_pcbconnect, because there might be an
410 * earlier incarnation of this same connection still in
411 * TIME_WAIT state, creating an ADDRINUSE error.
413 error = in_pcbladdr(inp, nam, &ifaddr);
414 if (error)
415 return error;
416 oinp = in_pcblookup(inp->inp_pcbinfo->listhead,
417 sin->sin_addr, sin->sin_port,
418 inp->inp_laddr.s_addr != INADDR_ANY ? inp->inp_laddr
419 : ifaddr->sin_addr,
420 inp->inp_lport, 0);
421 if (oinp) {
422 if (oinp != inp && (otp = intotcpcb(oinp)) != NULL &&
423 otp->t_state == TCPS_TIME_WAIT &&
424 otp->t_duration < TCPTV_MSL &&
425 (otp->t_flags & TF_RCVD_CC))
426 otp = tcp_close(otp);
427 else
428 return EADDRINUSE;
430 if (inp->inp_laddr.s_addr == INADDR_ANY)
431 inp->inp_laddr = ifaddr->sin_addr;
432 inp->inp_faddr = sin->sin_addr;
433 inp->inp_fport = sin->sin_port;
434 in_pcbrehash(inp);
436 tp->t_template = tcp_template(tp);
437 if (tp->t_template == 0) {
438 in_pcbdisconnect(inp);
439 return ENOBUFS;
442 /* Compute window scaling to request. */
443 while (tp->request_r_scale < TCP_MAX_WINSHIFT &&
444 (TCP_MAXWIN << tp->request_r_scale) < so->so_rcv.sb_hiwat)
445 tp->request_r_scale++;
447 soisconnecting(so);
448 tcpstat.tcps_connattempt++;
449 tp->t_state = TCPS_SYN_SENT;
450 tp->t_timer[TCPT_KEEP] = TCPTV_KEEP_INIT;
451 tp->iss = tcp_iss; tcp_iss += TCP_ISSINCR/2;
452 tcp_sendseqinit(tp);
453 tp->cc_send = CC_INC(tcp_ccgen);
455 return 0;
459 tcp_ctloutput(op, so, level, optname, mp)
460 int op;
461 struct socket *so;
462 int level, optname;
463 struct mbuf **mp;
465 int error = 0, s;
466 struct inpcb *inp;
467 register struct tcpcb *tp;
468 register struct mbuf *m;
469 register int i;
471 s = splnet();
472 inp = sotoinpcb(so);
473 if (inp == NULL) {
474 splx(s);
475 if (op == PRCO_SETOPT && *mp)
476 (void) m_free(*mp);
477 return (ECONNRESET);
479 if (level != IPPROTO_TCP) {
480 error = ip_ctloutput(op, so, level, optname, mp);
481 splx(s);
482 return (error);
484 tp = intotcpcb(inp);
486 switch (op) {
488 case PRCO_SETOPT:
489 m = *mp;
490 switch (optname) {
492 case TCP_NODELAY:
493 if (m == NULL || m->m_len < sizeof (int))
494 error = EINVAL;
495 else if (*mtod(m, int *))
496 tp->t_flags |= TF_NODELAY;
497 else
498 tp->t_flags &= ~TF_NODELAY;
499 break;
501 case TCP_MAXSEG:
502 if (m && (i = *mtod(m, int *)) > 0 && i <= tp->t_maxseg)
503 tp->t_maxseg = i;
504 else
505 error = EINVAL;
506 break;
508 case TCP_NOOPT:
509 if (m == NULL || m->m_len < sizeof (int))
510 error = EINVAL;
511 else if (*mtod(m, int *))
512 tp->t_flags |= TF_NOOPT;
513 else
514 tp->t_flags &= ~TF_NOOPT;
515 break;
517 case TCP_NOPUSH:
518 if (m == NULL || m->m_len < sizeof (int))
519 error = EINVAL;
520 else if (*mtod(m, int *))
521 tp->t_flags |= TF_NOPUSH;
522 else
523 tp->t_flags &= ~TF_NOPUSH;
524 break;
526 default:
527 error = ENOPROTOOPT;
528 break;
530 if (m)
531 (void) m_free(m);
532 break;
534 case PRCO_GETOPT:
535 *mp = m = m_get(M_WAIT, MT_SOOPTS);
536 m->m_len = sizeof(int);
538 switch (optname) {
539 case TCP_NODELAY:
540 *mtod(m, int *) = tp->t_flags & TF_NODELAY;
541 break;
542 case TCP_MAXSEG:
543 *mtod(m, int *) = tp->t_maxseg;
544 break;
545 case TCP_NOOPT:
546 *mtod(m, int *) = tp->t_flags & TF_NOOPT;
547 break;
548 case TCP_NOPUSH:
549 *mtod(m, int *) = tp->t_flags & TF_NOPUSH;
550 break;
551 default:
552 error = ENOPROTOOPT;
553 break;
555 break;
557 splx(s);
558 return (error);
562 * tcp_sendspace and tcp_recvspace are the default send and receive window
563 * sizes, respectively. These are obsolescent (this information should
564 * be set by the route).
566 u_long tcp_sendspace = 1024*16;
567 u_long tcp_recvspace = 1024*16;
570 * Attach TCP protocol to socket, allocating
571 * internet protocol control block, tcp control block,
572 * bufer space, and entering LISTEN state if to accept connections.
575 tcp_attach(so)
576 struct socket *so;
578 register struct tcpcb *tp;
579 struct inpcb *inp;
580 int error;
582 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
583 error = soreserve(so, tcp_sendspace, tcp_recvspace);
584 if (error)
585 return (error);
587 error = in_pcballoc(so, &tcbinfo);
588 if (error)
589 return (error);
590 inp = sotoinpcb(so);
591 tp = tcp_newtcpcb(inp);
592 if (tp == 0) {
593 int nofd = so->so_state & SS_NOFDREF; /* XXX */
595 so->so_state &= ~SS_NOFDREF; /* don't free the socket yet */
596 in_pcbdetach(inp);
597 so->so_state |= nofd;
598 return (ENOBUFS);
600 tp->t_state = TCPS_CLOSED;
601 return (0);
605 * Initiate (or continue) disconnect.
606 * If embryonic state, just send reset (once).
607 * If in ``let data drain'' option and linger null, just drop.
608 * Otherwise (hard), mark socket disconnecting and drop
609 * current input data; switch states based on user close, and
610 * send segment to peer (with FIN).
612 struct tcpcb *
613 tcp_disconnect(tp)
614 register struct tcpcb *tp;
616 struct socket *so = tp->t_inpcb->inp_socket;
618 if (tp->t_state < TCPS_ESTABLISHED)
619 tp = tcp_close(tp);
620 else if ((so->so_options & SO_LINGER) && so->so_linger.tv_sec == 0)
621 tp = tcp_drop(tp, 0);
622 else {
623 soisdisconnecting(so);
624 sbflush(&so->so_rcv);
625 tp = tcp_usrclosed(tp);
626 if (tp)
627 (void) tcp_output(tp);
629 return (tp);
633 * User issued close, and wish to trail through shutdown states:
634 * if never received SYN, just forget it. If got a SYN from peer,
635 * but haven't sent FIN, then go to FIN_WAIT_1 state to send peer a FIN.
636 * If already got a FIN from peer, then almost done; go to LAST_ACK
637 * state. In all other cases, have already sent FIN to peer (e.g.
638 * after PRU_SHUTDOWN), and just have to play tedious game waiting
639 * for peer to send FIN or not respond to keep-alives, etc.
640 * We can let the user exit from the close as soon as the FIN is acked.
642 struct tcpcb *
643 tcp_usrclosed(tp)
644 register struct tcpcb *tp;
647 switch (tp->t_state) {
649 case TCPS_CLOSED:
650 case TCPS_LISTEN:
651 tp->t_state = TCPS_CLOSED;
652 tp = tcp_close(tp);
653 break;
655 case TCPS_SYN_SENT:
656 case TCPS_SYN_RECEIVED:
657 tp->t_flags |= TF_NEEDFIN;
658 break;
660 case TCPS_ESTABLISHED:
661 tp->t_state = TCPS_FIN_WAIT_1;
662 break;
664 case TCPS_CLOSE_WAIT:
665 tp->t_state = TCPS_LAST_ACK;
666 break;
668 if (tp && tp->t_state >= TCPS_FIN_WAIT_2)
669 soisdisconnected(tp->t_inpcb->inp_socket);
670 return (tp);
673 #ifdef ENABLE_SYSCTL
675 * Sysctl for tcp variables.
678 tcp_sysctl(name, namelen, oldp, oldlenp, newp, newlen)
679 int *name;
680 u_int namelen;
681 void *oldp;
682 size_t *oldlenp;
683 void *newp;
684 size_t newlen;
686 /* All sysctl names at this level are terminal. */
687 if (namelen != 1)
688 return (ENOTDIR);
690 switch (name[0]) {
691 case TCPCTL_DO_RFC1323:
692 return (sysctl_int(oldp, oldlenp, newp, newlen,
693 &tcp_do_rfc1323));
694 case TCPCTL_DO_RFC1644:
695 return (sysctl_int(oldp, oldlenp, newp, newlen,
696 &tcp_do_rfc1644));
697 case TCPCTL_MSSDFLT:
698 return (sysctl_int(oldp, oldlenp, newp, newlen,
699 &tcp_mssdflt));
700 case TCPCTL_STATS:
701 return (sysctl_rdstruct(oldp, oldlenp, newp, &tcpstat,
702 sizeof tcpstat));
703 case TCPCTL_RTTDFLT:
704 return (sysctl_int(oldp, oldlenp, newp, newlen, &tcp_rttdflt));
705 case TCPCTL_KEEPIDLE:
706 return (sysctl_int(oldp, oldlenp, newp, newlen,
707 &tcp_keepidle));
708 case TCPCTL_KEEPINTVL:
709 return (sysctl_int(oldp, oldlenp, newp, newlen,
710 &tcp_keepintvl));
711 case TCPCTL_SENDSPACE:
712 return (sysctl_int(oldp, oldlenp, newp, newlen,
713 (int *)&tcp_sendspace)); /* XXX */
714 case TCPCTL_RECVSPACE:
715 return (sysctl_int(oldp, oldlenp, newp, newlen,
716 (int *)&tcp_recvspace)); /* XXX */
717 default:
718 return (ENOPROTOOPT);
720 /* NOTREACHED */
722 #endif