Merge master.kernel.org:/pub/scm/linux/kernel/git/davem/sparc-2.6
[linux/fpc-iii.git] / net / ipv6 / tcp_ipv6.c
blob4c2a7c0cafef2db93c05e95f1345b02affca3ea9
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * $Id: tcp_ipv6.c,v 1.144 2002/02/01 22:01:04 davem Exp $
10 * Based on:
11 * linux/net/ipv4/tcp.c
12 * linux/net/ipv4/tcp_input.c
13 * linux/net/ipv4/tcp_output.c
15 * Fixes:
16 * Hideaki YOSHIFUJI : sin6_scope_id support
17 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
18 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
19 * a single port at the same time.
20 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 * This program is free software; you can redistribute it and/or
23 * modify it under the terms of the GNU General Public License
24 * as published by the Free Software Foundation; either version
25 * 2 of the License, or (at your option) any later version.
28 #include <linux/module.h>
29 #include <linux/errno.h>
30 #include <linux/types.h>
31 #include <linux/socket.h>
32 #include <linux/sockios.h>
33 #include <linux/net.h>
34 #include <linux/jiffies.h>
35 #include <linux/in.h>
36 #include <linux/in6.h>
37 #include <linux/netdevice.h>
38 #include <linux/init.h>
39 #include <linux/jhash.h>
40 #include <linux/ipsec.h>
41 #include <linux/times.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/addrconf.h>
60 #include <net/snmp.h>
61 #include <net/dsfield.h>
62 #include <net/timewait_sock.h>
64 #include <asm/uaccess.h>
66 #include <linux/proc_fs.h>
67 #include <linux/seq_file.h>
69 /* Socket used for sending RSTs and ACKs */
70 static struct socket *tcp6_socket;
72 static void tcp_v6_send_reset(struct sk_buff *skb);
73 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req);
74 static void tcp_v6_send_check(struct sock *sk, int len,
75 struct sk_buff *skb);
77 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
79 static struct inet_connection_sock_af_ops ipv6_mapped;
80 static struct inet_connection_sock_af_ops ipv6_specific;
82 static int tcp_v6_get_port(struct sock *sk, unsigned short snum)
84 return inet_csk_get_port(&tcp_hashinfo, sk, snum,
85 inet6_csk_bind_conflict);
88 static void tcp_v6_hash(struct sock *sk)
90 if (sk->sk_state != TCP_CLOSE) {
91 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
92 tcp_prot.hash(sk);
93 return;
95 local_bh_disable();
96 __inet6_hash(&tcp_hashinfo, sk);
97 local_bh_enable();
101 static __inline__ u16 tcp_v6_check(struct tcphdr *th, int len,
102 struct in6_addr *saddr,
103 struct in6_addr *daddr,
104 unsigned long base)
106 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
109 static __u32 tcp_v6_init_sequence(struct sock *sk, struct sk_buff *skb)
111 if (skb->protocol == htons(ETH_P_IPV6)) {
112 return secure_tcpv6_sequence_number(skb->nh.ipv6h->daddr.s6_addr32,
113 skb->nh.ipv6h->saddr.s6_addr32,
114 skb->h.th->dest,
115 skb->h.th->source);
116 } else {
117 return secure_tcp_sequence_number(skb->nh.iph->daddr,
118 skb->nh.iph->saddr,
119 skb->h.th->dest,
120 skb->h.th->source);
124 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
125 int addr_len)
127 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
128 struct inet_sock *inet = inet_sk(sk);
129 struct inet_connection_sock *icsk = inet_csk(sk);
130 struct ipv6_pinfo *np = inet6_sk(sk);
131 struct tcp_sock *tp = tcp_sk(sk);
132 struct in6_addr *saddr = NULL, *final_p = NULL, final;
133 struct flowi fl;
134 struct dst_entry *dst;
135 int addr_type;
136 int err;
138 if (addr_len < SIN6_LEN_RFC2133)
139 return -EINVAL;
141 if (usin->sin6_family != AF_INET6)
142 return(-EAFNOSUPPORT);
144 memset(&fl, 0, sizeof(fl));
146 if (np->sndflow) {
147 fl.fl6_flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
148 IP6_ECN_flow_init(fl.fl6_flowlabel);
149 if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
150 struct ip6_flowlabel *flowlabel;
151 flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
152 if (flowlabel == NULL)
153 return -EINVAL;
154 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
155 fl6_sock_release(flowlabel);
160 * connect() to INADDR_ANY means loopback (BSD'ism).
163 if(ipv6_addr_any(&usin->sin6_addr))
164 usin->sin6_addr.s6_addr[15] = 0x1;
166 addr_type = ipv6_addr_type(&usin->sin6_addr);
168 if(addr_type & IPV6_ADDR_MULTICAST)
169 return -ENETUNREACH;
171 if (addr_type&IPV6_ADDR_LINKLOCAL) {
172 if (addr_len >= sizeof(struct sockaddr_in6) &&
173 usin->sin6_scope_id) {
174 /* If interface is set while binding, indices
175 * must coincide.
177 if (sk->sk_bound_dev_if &&
178 sk->sk_bound_dev_if != usin->sin6_scope_id)
179 return -EINVAL;
181 sk->sk_bound_dev_if = usin->sin6_scope_id;
184 /* Connect to link-local address requires an interface */
185 if (!sk->sk_bound_dev_if)
186 return -EINVAL;
189 if (tp->rx_opt.ts_recent_stamp &&
190 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
191 tp->rx_opt.ts_recent = 0;
192 tp->rx_opt.ts_recent_stamp = 0;
193 tp->write_seq = 0;
196 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
197 np->flow_label = fl.fl6_flowlabel;
200 * TCP over IPv4
203 if (addr_type == IPV6_ADDR_MAPPED) {
204 u32 exthdrlen = icsk->icsk_ext_hdr_len;
205 struct sockaddr_in sin;
207 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
209 if (__ipv6_only_sock(sk))
210 return -ENETUNREACH;
212 sin.sin_family = AF_INET;
213 sin.sin_port = usin->sin6_port;
214 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
216 icsk->icsk_af_ops = &ipv6_mapped;
217 sk->sk_backlog_rcv = tcp_v4_do_rcv;
219 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
221 if (err) {
222 icsk->icsk_ext_hdr_len = exthdrlen;
223 icsk->icsk_af_ops = &ipv6_specific;
224 sk->sk_backlog_rcv = tcp_v6_do_rcv;
225 goto failure;
226 } else {
227 ipv6_addr_set(&np->saddr, 0, 0, htonl(0x0000FFFF),
228 inet->saddr);
229 ipv6_addr_set(&np->rcv_saddr, 0, 0, htonl(0x0000FFFF),
230 inet->rcv_saddr);
233 return err;
236 if (!ipv6_addr_any(&np->rcv_saddr))
237 saddr = &np->rcv_saddr;
239 fl.proto = IPPROTO_TCP;
240 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
241 ipv6_addr_copy(&fl.fl6_src,
242 (saddr ? saddr : &np->saddr));
243 fl.oif = sk->sk_bound_dev_if;
244 fl.fl_ip_dport = usin->sin6_port;
245 fl.fl_ip_sport = inet->sport;
247 if (np->opt && np->opt->srcrt) {
248 struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt;
249 ipv6_addr_copy(&final, &fl.fl6_dst);
250 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
251 final_p = &final;
254 security_sk_classify_flow(sk, &fl);
256 err = ip6_dst_lookup(sk, &dst, &fl);
257 if (err)
258 goto failure;
259 if (final_p)
260 ipv6_addr_copy(&fl.fl6_dst, final_p);
262 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
263 goto failure;
265 if (saddr == NULL) {
266 saddr = &fl.fl6_src;
267 ipv6_addr_copy(&np->rcv_saddr, saddr);
270 /* set the source address */
271 ipv6_addr_copy(&np->saddr, saddr);
272 inet->rcv_saddr = LOOPBACK4_IPV6;
274 sk->sk_gso_type = SKB_GSO_TCPV6;
275 __ip6_dst_store(sk, dst, NULL, NULL);
277 icsk->icsk_ext_hdr_len = 0;
278 if (np->opt)
279 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
280 np->opt->opt_nflen);
282 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
284 inet->dport = usin->sin6_port;
286 tcp_set_state(sk, TCP_SYN_SENT);
287 err = inet6_hash_connect(&tcp_death_row, sk);
288 if (err)
289 goto late_failure;
291 if (!tp->write_seq)
292 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
293 np->daddr.s6_addr32,
294 inet->sport,
295 inet->dport);
297 err = tcp_connect(sk);
298 if (err)
299 goto late_failure;
301 return 0;
303 late_failure:
304 tcp_set_state(sk, TCP_CLOSE);
305 __sk_dst_reset(sk);
306 failure:
307 inet->dport = 0;
308 sk->sk_route_caps = 0;
309 return err;
312 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
313 int type, int code, int offset, __u32 info)
315 struct ipv6hdr *hdr = (struct ipv6hdr*)skb->data;
316 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
317 struct ipv6_pinfo *np;
318 struct sock *sk;
319 int err;
320 struct tcp_sock *tp;
321 __u32 seq;
323 sk = inet6_lookup(&tcp_hashinfo, &hdr->daddr, th->dest, &hdr->saddr,
324 th->source, skb->dev->ifindex);
326 if (sk == NULL) {
327 ICMP6_INC_STATS_BH(__in6_dev_get(skb->dev), ICMP6_MIB_INERRORS);
328 return;
331 if (sk->sk_state == TCP_TIME_WAIT) {
332 inet_twsk_put(inet_twsk(sk));
333 return;
336 bh_lock_sock(sk);
337 if (sock_owned_by_user(sk))
338 NET_INC_STATS_BH(LINUX_MIB_LOCKDROPPEDICMPS);
340 if (sk->sk_state == TCP_CLOSE)
341 goto out;
343 tp = tcp_sk(sk);
344 seq = ntohl(th->seq);
345 if (sk->sk_state != TCP_LISTEN &&
346 !between(seq, tp->snd_una, tp->snd_nxt)) {
347 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
348 goto out;
351 np = inet6_sk(sk);
353 if (type == ICMPV6_PKT_TOOBIG) {
354 struct dst_entry *dst = NULL;
356 if (sock_owned_by_user(sk))
357 goto out;
358 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
359 goto out;
361 /* icmp should have updated the destination cache entry */
362 dst = __sk_dst_check(sk, np->dst_cookie);
364 if (dst == NULL) {
365 struct inet_sock *inet = inet_sk(sk);
366 struct flowi fl;
368 /* BUGGG_FUTURE: Again, it is not clear how
369 to handle rthdr case. Ignore this complexity
370 for now.
372 memset(&fl, 0, sizeof(fl));
373 fl.proto = IPPROTO_TCP;
374 ipv6_addr_copy(&fl.fl6_dst, &np->daddr);
375 ipv6_addr_copy(&fl.fl6_src, &np->saddr);
376 fl.oif = sk->sk_bound_dev_if;
377 fl.fl_ip_dport = inet->dport;
378 fl.fl_ip_sport = inet->sport;
379 security_skb_classify_flow(skb, &fl);
381 if ((err = ip6_dst_lookup(sk, &dst, &fl))) {
382 sk->sk_err_soft = -err;
383 goto out;
386 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0) {
387 sk->sk_err_soft = -err;
388 goto out;
391 } else
392 dst_hold(dst);
394 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
395 tcp_sync_mss(sk, dst_mtu(dst));
396 tcp_simple_retransmit(sk);
397 } /* else let the usual retransmit timer handle it */
398 dst_release(dst);
399 goto out;
402 icmpv6_err_convert(type, code, &err);
404 /* Might be for an request_sock */
405 switch (sk->sk_state) {
406 struct request_sock *req, **prev;
407 case TCP_LISTEN:
408 if (sock_owned_by_user(sk))
409 goto out;
411 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
412 &hdr->saddr, inet6_iif(skb));
413 if (!req)
414 goto out;
416 /* ICMPs are not backlogged, hence we cannot get
417 * an established socket here.
419 BUG_TRAP(req->sk == NULL);
421 if (seq != tcp_rsk(req)->snt_isn) {
422 NET_INC_STATS_BH(LINUX_MIB_OUTOFWINDOWICMPS);
423 goto out;
426 inet_csk_reqsk_queue_drop(sk, req, prev);
427 goto out;
429 case TCP_SYN_SENT:
430 case TCP_SYN_RECV: /* Cannot happen.
431 It can, it SYNs are crossed. --ANK */
432 if (!sock_owned_by_user(sk)) {
433 sk->sk_err = err;
434 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
436 tcp_done(sk);
437 } else
438 sk->sk_err_soft = err;
439 goto out;
442 if (!sock_owned_by_user(sk) && np->recverr) {
443 sk->sk_err = err;
444 sk->sk_error_report(sk);
445 } else
446 sk->sk_err_soft = err;
448 out:
449 bh_unlock_sock(sk);
450 sock_put(sk);
454 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
455 struct dst_entry *dst)
457 struct inet6_request_sock *treq = inet6_rsk(req);
458 struct ipv6_pinfo *np = inet6_sk(sk);
459 struct sk_buff * skb;
460 struct ipv6_txoptions *opt = NULL;
461 struct in6_addr * final_p = NULL, final;
462 struct flowi fl;
463 int err = -1;
465 memset(&fl, 0, sizeof(fl));
466 fl.proto = IPPROTO_TCP;
467 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
468 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
469 fl.fl6_flowlabel = 0;
470 fl.oif = treq->iif;
471 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
472 fl.fl_ip_sport = inet_sk(sk)->sport;
473 security_req_classify_flow(req, &fl);
475 if (dst == NULL) {
476 opt = np->opt;
477 if (opt == NULL &&
478 np->rxopt.bits.osrcrt == 2 &&
479 treq->pktopts) {
480 struct sk_buff *pktopts = treq->pktopts;
481 struct inet6_skb_parm *rxopt = IP6CB(pktopts);
482 if (rxopt->srcrt)
483 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr*)(pktopts->nh.raw + rxopt->srcrt));
486 if (opt && opt->srcrt) {
487 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
488 ipv6_addr_copy(&final, &fl.fl6_dst);
489 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
490 final_p = &final;
493 err = ip6_dst_lookup(sk, &dst, &fl);
494 if (err)
495 goto done;
496 if (final_p)
497 ipv6_addr_copy(&fl.fl6_dst, final_p);
498 if ((err = xfrm_lookup(&dst, &fl, sk, 0)) < 0)
499 goto done;
502 skb = tcp_make_synack(sk, dst, req);
503 if (skb) {
504 struct tcphdr *th = skb->h.th;
506 th->check = tcp_v6_check(th, skb->len,
507 &treq->loc_addr, &treq->rmt_addr,
508 csum_partial((char *)th, skb->len, skb->csum));
510 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
511 err = ip6_xmit(sk, skb, &fl, opt, 0);
512 if (err == NET_XMIT_CN)
513 err = 0;
516 done:
517 if (opt && opt != np->opt)
518 sock_kfree_s(sk, opt, opt->tot_len);
519 dst_release(dst);
520 return err;
523 static void tcp_v6_reqsk_destructor(struct request_sock *req)
525 if (inet6_rsk(req)->pktopts)
526 kfree_skb(inet6_rsk(req)->pktopts);
529 static struct request_sock_ops tcp6_request_sock_ops = {
530 .family = AF_INET6,
531 .obj_size = sizeof(struct tcp6_request_sock),
532 .rtx_syn_ack = tcp_v6_send_synack,
533 .send_ack = tcp_v6_reqsk_send_ack,
534 .destructor = tcp_v6_reqsk_destructor,
535 .send_reset = tcp_v6_send_reset
538 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
539 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
540 .twsk_unique = tcp_twsk_unique,
543 static void tcp_v6_send_check(struct sock *sk, int len, struct sk_buff *skb)
545 struct ipv6_pinfo *np = inet6_sk(sk);
546 struct tcphdr *th = skb->h.th;
548 if (skb->ip_summed == CHECKSUM_PARTIAL) {
549 th->check = ~csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP, 0);
550 skb->csum = offsetof(struct tcphdr, check);
551 } else {
552 th->check = csum_ipv6_magic(&np->saddr, &np->daddr, len, IPPROTO_TCP,
553 csum_partial((char *)th, th->doff<<2,
554 skb->csum));
558 static int tcp_v6_gso_send_check(struct sk_buff *skb)
560 struct ipv6hdr *ipv6h;
561 struct tcphdr *th;
563 if (!pskb_may_pull(skb, sizeof(*th)))
564 return -EINVAL;
566 ipv6h = skb->nh.ipv6h;
567 th = skb->h.th;
569 th->check = 0;
570 th->check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, skb->len,
571 IPPROTO_TCP, 0);
572 skb->csum = offsetof(struct tcphdr, check);
573 skb->ip_summed = CHECKSUM_PARTIAL;
574 return 0;
577 static void tcp_v6_send_reset(struct sk_buff *skb)
579 struct tcphdr *th = skb->h.th, *t1;
580 struct sk_buff *buff;
581 struct flowi fl;
583 if (th->rst)
584 return;
586 if (!ipv6_unicast_destination(skb))
587 return;
590 * We need to grab some memory, and put together an RST,
591 * and then put it into the queue to be sent.
594 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr),
595 GFP_ATOMIC);
596 if (buff == NULL)
597 return;
599 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + sizeof(struct tcphdr));
601 t1 = (struct tcphdr *) skb_push(buff,sizeof(struct tcphdr));
603 /* Swap the send and the receive. */
604 memset(t1, 0, sizeof(*t1));
605 t1->dest = th->source;
606 t1->source = th->dest;
607 t1->doff = sizeof(*t1)/4;
608 t1->rst = 1;
610 if(th->ack) {
611 t1->seq = th->ack_seq;
612 } else {
613 t1->ack = 1;
614 t1->ack_seq = htonl(ntohl(th->seq) + th->syn + th->fin
615 + skb->len - (th->doff<<2));
618 buff->csum = csum_partial((char *)t1, sizeof(*t1), 0);
620 memset(&fl, 0, sizeof(fl));
621 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
622 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
624 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
625 sizeof(*t1), IPPROTO_TCP,
626 buff->csum);
628 fl.proto = IPPROTO_TCP;
629 fl.oif = inet6_iif(skb);
630 fl.fl_ip_dport = t1->dest;
631 fl.fl_ip_sport = t1->source;
632 security_skb_classify_flow(skb, &fl);
634 /* sk = NULL, but it is safe for now. RST socket required. */
635 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
637 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
638 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
639 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
640 TCP_INC_STATS_BH(TCP_MIB_OUTRSTS);
641 return;
645 kfree_skb(buff);
648 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts)
650 struct tcphdr *th = skb->h.th, *t1;
651 struct sk_buff *buff;
652 struct flowi fl;
653 int tot_len = sizeof(struct tcphdr);
655 if (ts)
656 tot_len += TCPOLEN_TSTAMP_ALIGNED;
658 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
659 GFP_ATOMIC);
660 if (buff == NULL)
661 return;
663 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
665 t1 = (struct tcphdr *) skb_push(buff,tot_len);
667 /* Swap the send and the receive. */
668 memset(t1, 0, sizeof(*t1));
669 t1->dest = th->source;
670 t1->source = th->dest;
671 t1->doff = tot_len/4;
672 t1->seq = htonl(seq);
673 t1->ack_seq = htonl(ack);
674 t1->ack = 1;
675 t1->window = htons(win);
677 if (ts) {
678 u32 *ptr = (u32*)(t1 + 1);
679 *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
680 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
681 *ptr++ = htonl(tcp_time_stamp);
682 *ptr = htonl(ts);
685 buff->csum = csum_partial((char *)t1, tot_len, 0);
687 memset(&fl, 0, sizeof(fl));
688 ipv6_addr_copy(&fl.fl6_dst, &skb->nh.ipv6h->saddr);
689 ipv6_addr_copy(&fl.fl6_src, &skb->nh.ipv6h->daddr);
691 t1->check = csum_ipv6_magic(&fl.fl6_src, &fl.fl6_dst,
692 tot_len, IPPROTO_TCP,
693 buff->csum);
695 fl.proto = IPPROTO_TCP;
696 fl.oif = inet6_iif(skb);
697 fl.fl_ip_dport = t1->dest;
698 fl.fl_ip_sport = t1->source;
699 security_skb_classify_flow(skb, &fl);
701 if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) {
702 if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) {
703 ip6_xmit(tcp6_socket->sk, buff, &fl, NULL, 0);
704 TCP_INC_STATS_BH(TCP_MIB_OUTSEGS);
705 return;
709 kfree_skb(buff);
712 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
714 struct inet_timewait_sock *tw = inet_twsk(sk);
715 const struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
717 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
718 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
719 tcptw->tw_ts_recent);
721 inet_twsk_put(tw);
724 static void tcp_v6_reqsk_send_ack(struct sk_buff *skb, struct request_sock *req)
726 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent);
730 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
732 struct request_sock *req, **prev;
733 const struct tcphdr *th = skb->h.th;
734 struct sock *nsk;
736 /* Find possible connection requests. */
737 req = inet6_csk_search_req(sk, &prev, th->source,
738 &skb->nh.ipv6h->saddr,
739 &skb->nh.ipv6h->daddr, inet6_iif(skb));
740 if (req)
741 return tcp_check_req(sk, skb, req, prev);
743 nsk = __inet6_lookup_established(&tcp_hashinfo, &skb->nh.ipv6h->saddr,
744 th->source, &skb->nh.ipv6h->daddr,
745 ntohs(th->dest), inet6_iif(skb));
747 if (nsk) {
748 if (nsk->sk_state != TCP_TIME_WAIT) {
749 bh_lock_sock(nsk);
750 return nsk;
752 inet_twsk_put(inet_twsk(nsk));
753 return NULL;
756 #if 0 /*def CONFIG_SYN_COOKIES*/
757 if (!th->rst && !th->syn && th->ack)
758 sk = cookie_v6_check(sk, skb, &(IPCB(skb)->opt));
759 #endif
760 return sk;
763 /* FIXME: this is substantially similar to the ipv4 code.
764 * Can some kind of merge be done? -- erics
766 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
768 struct inet6_request_sock *treq;
769 struct ipv6_pinfo *np = inet6_sk(sk);
770 struct tcp_options_received tmp_opt;
771 struct tcp_sock *tp = tcp_sk(sk);
772 struct request_sock *req = NULL;
773 __u32 isn = TCP_SKB_CB(skb)->when;
775 if (skb->protocol == htons(ETH_P_IP))
776 return tcp_v4_conn_request(sk, skb);
778 if (!ipv6_unicast_destination(skb))
779 goto drop;
782 * There are no SYN attacks on IPv6, yet...
784 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
785 if (net_ratelimit())
786 printk(KERN_INFO "TCPv6: dropping request, synflood is possible\n");
787 goto drop;
790 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
791 goto drop;
793 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
794 if (req == NULL)
795 goto drop;
797 tcp_clear_options(&tmp_opt);
798 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
799 tmp_opt.user_mss = tp->rx_opt.user_mss;
801 tcp_parse_options(skb, &tmp_opt, 0);
803 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
804 tcp_openreq_init(req, &tmp_opt, skb);
806 treq = inet6_rsk(req);
807 ipv6_addr_copy(&treq->rmt_addr, &skb->nh.ipv6h->saddr);
808 ipv6_addr_copy(&treq->loc_addr, &skb->nh.ipv6h->daddr);
809 TCP_ECN_create_request(req, skb->h.th);
810 treq->pktopts = NULL;
811 if (ipv6_opt_accepted(sk, skb) ||
812 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
813 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
814 atomic_inc(&skb->users);
815 treq->pktopts = skb;
817 treq->iif = sk->sk_bound_dev_if;
819 /* So that link locals have meaning */
820 if (!sk->sk_bound_dev_if &&
821 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
822 treq->iif = inet6_iif(skb);
824 if (isn == 0)
825 isn = tcp_v6_init_sequence(sk,skb);
827 tcp_rsk(req)->snt_isn = isn;
829 security_inet_conn_request(sk, skb, req);
831 if (tcp_v6_send_synack(sk, req, NULL))
832 goto drop;
834 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
835 return 0;
837 drop:
838 if (req)
839 reqsk_free(req);
841 return 0; /* don't send reset */
844 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
845 struct request_sock *req,
846 struct dst_entry *dst)
848 struct inet6_request_sock *treq = inet6_rsk(req);
849 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
850 struct tcp6_sock *newtcp6sk;
851 struct inet_sock *newinet;
852 struct tcp_sock *newtp;
853 struct sock *newsk;
854 struct ipv6_txoptions *opt;
856 if (skb->protocol == htons(ETH_P_IP)) {
858 * v6 mapped
861 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
863 if (newsk == NULL)
864 return NULL;
866 newtcp6sk = (struct tcp6_sock *)newsk;
867 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
869 newinet = inet_sk(newsk);
870 newnp = inet6_sk(newsk);
871 newtp = tcp_sk(newsk);
873 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
875 ipv6_addr_set(&newnp->daddr, 0, 0, htonl(0x0000FFFF),
876 newinet->daddr);
878 ipv6_addr_set(&newnp->saddr, 0, 0, htonl(0x0000FFFF),
879 newinet->saddr);
881 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
883 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
884 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
885 newnp->pktoptions = NULL;
886 newnp->opt = NULL;
887 newnp->mcast_oif = inet6_iif(skb);
888 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
891 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
892 * here, tcp_create_openreq_child now does this for us, see the comment in
893 * that function for the gory details. -acme
896 /* It is tricky place. Until this moment IPv4 tcp
897 worked with IPv6 icsk.icsk_af_ops.
898 Sync it now.
900 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
902 return newsk;
905 opt = np->opt;
907 if (sk_acceptq_is_full(sk))
908 goto out_overflow;
910 if (np->rxopt.bits.osrcrt == 2 &&
911 opt == NULL && treq->pktopts) {
912 struct inet6_skb_parm *rxopt = IP6CB(treq->pktopts);
913 if (rxopt->srcrt)
914 opt = ipv6_invert_rthdr(sk, (struct ipv6_rt_hdr *)(treq->pktopts->nh.raw + rxopt->srcrt));
917 if (dst == NULL) {
918 struct in6_addr *final_p = NULL, final;
919 struct flowi fl;
921 memset(&fl, 0, sizeof(fl));
922 fl.proto = IPPROTO_TCP;
923 ipv6_addr_copy(&fl.fl6_dst, &treq->rmt_addr);
924 if (opt && opt->srcrt) {
925 struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
926 ipv6_addr_copy(&final, &fl.fl6_dst);
927 ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
928 final_p = &final;
930 ipv6_addr_copy(&fl.fl6_src, &treq->loc_addr);
931 fl.oif = sk->sk_bound_dev_if;
932 fl.fl_ip_dport = inet_rsk(req)->rmt_port;
933 fl.fl_ip_sport = inet_sk(sk)->sport;
934 security_req_classify_flow(req, &fl);
936 if (ip6_dst_lookup(sk, &dst, &fl))
937 goto out;
939 if (final_p)
940 ipv6_addr_copy(&fl.fl6_dst, final_p);
942 if ((xfrm_lookup(&dst, &fl, sk, 0)) < 0)
943 goto out;
946 newsk = tcp_create_openreq_child(sk, req, skb);
947 if (newsk == NULL)
948 goto out;
951 * No need to charge this sock to the relevant IPv6 refcnt debug socks
952 * count here, tcp_create_openreq_child now does this for us, see the
953 * comment in that function for the gory details. -acme
956 newsk->sk_gso_type = SKB_GSO_TCPV6;
957 __ip6_dst_store(newsk, dst, NULL, NULL);
959 newtcp6sk = (struct tcp6_sock *)newsk;
960 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
962 newtp = tcp_sk(newsk);
963 newinet = inet_sk(newsk);
964 newnp = inet6_sk(newsk);
966 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
968 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
969 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
970 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
971 newsk->sk_bound_dev_if = treq->iif;
973 /* Now IPv6 options...
975 First: no IPv4 options.
977 newinet->opt = NULL;
979 /* Clone RX bits */
980 newnp->rxopt.all = np->rxopt.all;
982 /* Clone pktoptions received with SYN */
983 newnp->pktoptions = NULL;
984 if (treq->pktopts != NULL) {
985 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
986 kfree_skb(treq->pktopts);
987 treq->pktopts = NULL;
988 if (newnp->pktoptions)
989 skb_set_owner_r(newnp->pktoptions, newsk);
991 newnp->opt = NULL;
992 newnp->mcast_oif = inet6_iif(skb);
993 newnp->mcast_hops = skb->nh.ipv6h->hop_limit;
995 /* Clone native IPv6 options from listening socket (if any)
997 Yes, keeping reference count would be much more clever,
998 but we make one more one thing there: reattach optmem
999 to newsk.
1001 if (opt) {
1002 newnp->opt = ipv6_dup_options(newsk, opt);
1003 if (opt != np->opt)
1004 sock_kfree_s(sk, opt, opt->tot_len);
1007 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1008 if (newnp->opt)
1009 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1010 newnp->opt->opt_flen);
1012 tcp_mtup_init(newsk);
1013 tcp_sync_mss(newsk, dst_mtu(dst));
1014 newtp->advmss = dst_metric(dst, RTAX_ADVMSS);
1015 tcp_initialize_rcv_mss(newsk);
1017 newinet->daddr = newinet->saddr = newinet->rcv_saddr = LOOPBACK4_IPV6;
1019 __inet6_hash(&tcp_hashinfo, newsk);
1020 inet_inherit_port(&tcp_hashinfo, sk, newsk);
1022 return newsk;
1024 out_overflow:
1025 NET_INC_STATS_BH(LINUX_MIB_LISTENOVERFLOWS);
1026 out:
1027 NET_INC_STATS_BH(LINUX_MIB_LISTENDROPS);
1028 if (opt && opt != np->opt)
1029 sock_kfree_s(sk, opt, opt->tot_len);
1030 dst_release(dst);
1031 return NULL;
1034 static int tcp_v6_checksum_init(struct sk_buff *skb)
1036 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1037 if (!tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1038 &skb->nh.ipv6h->daddr,skb->csum)) {
1039 skb->ip_summed = CHECKSUM_UNNECESSARY;
1040 return 0;
1044 skb->csum = ~tcp_v6_check(skb->h.th,skb->len,&skb->nh.ipv6h->saddr,
1045 &skb->nh.ipv6h->daddr, 0);
1047 if (skb->len <= 76) {
1048 return __skb_checksum_complete(skb);
1050 return 0;
1053 /* The socket must have it's spinlock held when we get
1054 * here.
1056 * We have a potential double-lock case here, so even when
1057 * doing backlog processing we use the BH locking scheme.
1058 * This is because we cannot sleep with the original spinlock
1059 * held.
1061 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1063 struct ipv6_pinfo *np = inet6_sk(sk);
1064 struct tcp_sock *tp;
1065 struct sk_buff *opt_skb = NULL;
1067 /* Imagine: socket is IPv6. IPv4 packet arrives,
1068 goes to IPv4 receive handler and backlogged.
1069 From backlog it always goes here. Kerboom...
1070 Fortunately, tcp_rcv_established and rcv_established
1071 handle them correctly, but it is not case with
1072 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1075 if (skb->protocol == htons(ETH_P_IP))
1076 return tcp_v4_do_rcv(sk, skb);
1078 if (sk_filter(sk, skb))
1079 goto discard;
1082 * socket locking is here for SMP purposes as backlog rcv
1083 * is currently called with bh processing disabled.
1086 /* Do Stevens' IPV6_PKTOPTIONS.
1088 Yes, guys, it is the only place in our code, where we
1089 may make it not affecting IPv4.
1090 The rest of code is protocol independent,
1091 and I do not like idea to uglify IPv4.
1093 Actually, all the idea behind IPV6_PKTOPTIONS
1094 looks not very well thought. For now we latch
1095 options, received in the last packet, enqueued
1096 by tcp. Feel free to propose better solution.
1097 --ANK (980728)
1099 if (np->rxopt.all)
1100 opt_skb = skb_clone(skb, GFP_ATOMIC);
1102 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1103 TCP_CHECK_TIMER(sk);
1104 if (tcp_rcv_established(sk, skb, skb->h.th, skb->len))
1105 goto reset;
1106 TCP_CHECK_TIMER(sk);
1107 if (opt_skb)
1108 goto ipv6_pktoptions;
1109 return 0;
1112 if (skb->len < (skb->h.th->doff<<2) || tcp_checksum_complete(skb))
1113 goto csum_err;
1115 if (sk->sk_state == TCP_LISTEN) {
1116 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1117 if (!nsk)
1118 goto discard;
1121 * Queue it on the new socket if the new socket is active,
1122 * otherwise we just shortcircuit this and continue with
1123 * the new socket..
1125 if(nsk != sk) {
1126 if (tcp_child_process(sk, nsk, skb))
1127 goto reset;
1128 if (opt_skb)
1129 __kfree_skb(opt_skb);
1130 return 0;
1134 TCP_CHECK_TIMER(sk);
1135 if (tcp_rcv_state_process(sk, skb, skb->h.th, skb->len))
1136 goto reset;
1137 TCP_CHECK_TIMER(sk);
1138 if (opt_skb)
1139 goto ipv6_pktoptions;
1140 return 0;
1142 reset:
1143 tcp_v6_send_reset(skb);
1144 discard:
1145 if (opt_skb)
1146 __kfree_skb(opt_skb);
1147 kfree_skb(skb);
1148 return 0;
1149 csum_err:
1150 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1151 goto discard;
1154 ipv6_pktoptions:
1155 /* Do you ask, what is it?
1157 1. skb was enqueued by tcp.
1158 2. skb is added to tail of read queue, rather than out of order.
1159 3. socket is not in passive state.
1160 4. Finally, it really contains options, which user wants to receive.
1162 tp = tcp_sk(sk);
1163 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1164 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1165 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1166 np->mcast_oif = inet6_iif(opt_skb);
1167 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1168 np->mcast_hops = opt_skb->nh.ipv6h->hop_limit;
1169 if (ipv6_opt_accepted(sk, opt_skb)) {
1170 skb_set_owner_r(opt_skb, sk);
1171 opt_skb = xchg(&np->pktoptions, opt_skb);
1172 } else {
1173 __kfree_skb(opt_skb);
1174 opt_skb = xchg(&np->pktoptions, NULL);
1178 if (opt_skb)
1179 kfree_skb(opt_skb);
1180 return 0;
1183 static int tcp_v6_rcv(struct sk_buff **pskb)
1185 struct sk_buff *skb = *pskb;
1186 struct tcphdr *th;
1187 struct sock *sk;
1188 int ret;
1190 if (skb->pkt_type != PACKET_HOST)
1191 goto discard_it;
1194 * Count it even if it's bad.
1196 TCP_INC_STATS_BH(TCP_MIB_INSEGS);
1198 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1199 goto discard_it;
1201 th = skb->h.th;
1203 if (th->doff < sizeof(struct tcphdr)/4)
1204 goto bad_packet;
1205 if (!pskb_may_pull(skb, th->doff*4))
1206 goto discard_it;
1208 if ((skb->ip_summed != CHECKSUM_UNNECESSARY &&
1209 tcp_v6_checksum_init(skb)))
1210 goto bad_packet;
1212 th = skb->h.th;
1213 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1214 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1215 skb->len - th->doff*4);
1216 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1217 TCP_SKB_CB(skb)->when = 0;
1218 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(skb->nh.ipv6h);
1219 TCP_SKB_CB(skb)->sacked = 0;
1221 sk = __inet6_lookup(&tcp_hashinfo, &skb->nh.ipv6h->saddr, th->source,
1222 &skb->nh.ipv6h->daddr, ntohs(th->dest),
1223 inet6_iif(skb));
1225 if (!sk)
1226 goto no_tcp_socket;
1228 process:
1229 if (sk->sk_state == TCP_TIME_WAIT)
1230 goto do_time_wait;
1232 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1233 goto discard_and_relse;
1235 if (sk_filter(sk, skb))
1236 goto discard_and_relse;
1238 skb->dev = NULL;
1240 bh_lock_sock_nested(sk);
1241 ret = 0;
1242 if (!sock_owned_by_user(sk)) {
1243 #ifdef CONFIG_NET_DMA
1244 struct tcp_sock *tp = tcp_sk(sk);
1245 if (tp->ucopy.dma_chan)
1246 ret = tcp_v6_do_rcv(sk, skb);
1247 else
1248 #endif
1250 if (!tcp_prequeue(sk, skb))
1251 ret = tcp_v6_do_rcv(sk, skb);
1253 } else
1254 sk_add_backlog(sk, skb);
1255 bh_unlock_sock(sk);
1257 sock_put(sk);
1258 return ret ? -1 : 0;
1260 no_tcp_socket:
1261 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1262 goto discard_it;
1264 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1265 bad_packet:
1266 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1267 } else {
1268 tcp_v6_send_reset(skb);
1271 discard_it:
1274 * Discard frame
1277 kfree_skb(skb);
1278 return 0;
1280 discard_and_relse:
1281 sock_put(sk);
1282 goto discard_it;
1284 do_time_wait:
1285 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1286 inet_twsk_put(inet_twsk(sk));
1287 goto discard_it;
1290 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1291 TCP_INC_STATS_BH(TCP_MIB_INERRS);
1292 inet_twsk_put(inet_twsk(sk));
1293 goto discard_it;
1296 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1297 case TCP_TW_SYN:
1299 struct sock *sk2;
1301 sk2 = inet6_lookup_listener(&tcp_hashinfo,
1302 &skb->nh.ipv6h->daddr,
1303 ntohs(th->dest), inet6_iif(skb));
1304 if (sk2 != NULL) {
1305 struct inet_timewait_sock *tw = inet_twsk(sk);
1306 inet_twsk_deschedule(tw, &tcp_death_row);
1307 inet_twsk_put(tw);
1308 sk = sk2;
1309 goto process;
1311 /* Fall through to ACK */
1313 case TCP_TW_ACK:
1314 tcp_v6_timewait_ack(sk, skb);
1315 break;
1316 case TCP_TW_RST:
1317 goto no_tcp_socket;
1318 case TCP_TW_SUCCESS:;
1320 goto discard_it;
1323 static int tcp_v6_remember_stamp(struct sock *sk)
1325 /* Alas, not yet... */
1326 return 0;
1329 static struct inet_connection_sock_af_ops ipv6_specific = {
1330 .queue_xmit = inet6_csk_xmit,
1331 .send_check = tcp_v6_send_check,
1332 .rebuild_header = inet6_sk_rebuild_header,
1333 .conn_request = tcp_v6_conn_request,
1334 .syn_recv_sock = tcp_v6_syn_recv_sock,
1335 .remember_stamp = tcp_v6_remember_stamp,
1336 .net_header_len = sizeof(struct ipv6hdr),
1337 .setsockopt = ipv6_setsockopt,
1338 .getsockopt = ipv6_getsockopt,
1339 .addr2sockaddr = inet6_csk_addr2sockaddr,
1340 .sockaddr_len = sizeof(struct sockaddr_in6),
1341 #ifdef CONFIG_COMPAT
1342 .compat_setsockopt = compat_ipv6_setsockopt,
1343 .compat_getsockopt = compat_ipv6_getsockopt,
1344 #endif
1348 * TCP over IPv4 via INET6 API
1351 static struct inet_connection_sock_af_ops ipv6_mapped = {
1352 .queue_xmit = ip_queue_xmit,
1353 .send_check = tcp_v4_send_check,
1354 .rebuild_header = inet_sk_rebuild_header,
1355 .conn_request = tcp_v6_conn_request,
1356 .syn_recv_sock = tcp_v6_syn_recv_sock,
1357 .remember_stamp = tcp_v4_remember_stamp,
1358 .net_header_len = sizeof(struct iphdr),
1359 .setsockopt = ipv6_setsockopt,
1360 .getsockopt = ipv6_getsockopt,
1361 .addr2sockaddr = inet6_csk_addr2sockaddr,
1362 .sockaddr_len = sizeof(struct sockaddr_in6),
1363 #ifdef CONFIG_COMPAT
1364 .compat_setsockopt = compat_ipv6_setsockopt,
1365 .compat_getsockopt = compat_ipv6_getsockopt,
1366 #endif
1369 /* NOTE: A lot of things set to zero explicitly by call to
1370 * sk_alloc() so need not be done here.
1372 static int tcp_v6_init_sock(struct sock *sk)
1374 struct inet_connection_sock *icsk = inet_csk(sk);
1375 struct tcp_sock *tp = tcp_sk(sk);
1377 skb_queue_head_init(&tp->out_of_order_queue);
1378 tcp_init_xmit_timers(sk);
1379 tcp_prequeue_init(tp);
1381 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1382 tp->mdev = TCP_TIMEOUT_INIT;
1384 /* So many TCP implementations out there (incorrectly) count the
1385 * initial SYN frame in their delayed-ACK and congestion control
1386 * algorithms that we must have the following bandaid to talk
1387 * efficiently to them. -DaveM
1389 tp->snd_cwnd = 2;
1391 /* See draft-stevens-tcpca-spec-01 for discussion of the
1392 * initialization of these values.
1394 tp->snd_ssthresh = 0x7fffffff;
1395 tp->snd_cwnd_clamp = ~0;
1396 tp->mss_cache = 536;
1398 tp->reordering = sysctl_tcp_reordering;
1400 sk->sk_state = TCP_CLOSE;
1402 icsk->icsk_af_ops = &ipv6_specific;
1403 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1404 icsk->icsk_sync_mss = tcp_sync_mss;
1405 sk->sk_write_space = sk_stream_write_space;
1406 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1408 sk->sk_sndbuf = sysctl_tcp_wmem[1];
1409 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
1411 atomic_inc(&tcp_sockets_allocated);
1413 return 0;
1416 static int tcp_v6_destroy_sock(struct sock *sk)
1418 tcp_v4_destroy_sock(sk);
1419 return inet6_destroy_sock(sk);
1422 /* Proc filesystem TCPv6 sock list dumping. */
1423 static void get_openreq6(struct seq_file *seq,
1424 struct sock *sk, struct request_sock *req, int i, int uid)
1426 int ttd = req->expires - jiffies;
1427 struct in6_addr *src = &inet6_rsk(req)->loc_addr;
1428 struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
1430 if (ttd < 0)
1431 ttd = 0;
1433 seq_printf(seq,
1434 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1435 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1437 src->s6_addr32[0], src->s6_addr32[1],
1438 src->s6_addr32[2], src->s6_addr32[3],
1439 ntohs(inet_sk(sk)->sport),
1440 dest->s6_addr32[0], dest->s6_addr32[1],
1441 dest->s6_addr32[2], dest->s6_addr32[3],
1442 ntohs(inet_rsk(req)->rmt_port),
1443 TCP_SYN_RECV,
1444 0,0, /* could print option size, but that is af dependent. */
1445 1, /* timers active (only the expire timer) */
1446 jiffies_to_clock_t(ttd),
1447 req->retrans,
1448 uid,
1449 0, /* non standard timer */
1450 0, /* open_requests have no inode */
1451 0, req);
1454 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1456 struct in6_addr *dest, *src;
1457 __u16 destp, srcp;
1458 int timer_active;
1459 unsigned long timer_expires;
1460 struct inet_sock *inet = inet_sk(sp);
1461 struct tcp_sock *tp = tcp_sk(sp);
1462 const struct inet_connection_sock *icsk = inet_csk(sp);
1463 struct ipv6_pinfo *np = inet6_sk(sp);
1465 dest = &np->daddr;
1466 src = &np->rcv_saddr;
1467 destp = ntohs(inet->dport);
1468 srcp = ntohs(inet->sport);
1470 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
1471 timer_active = 1;
1472 timer_expires = icsk->icsk_timeout;
1473 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1474 timer_active = 4;
1475 timer_expires = icsk->icsk_timeout;
1476 } else if (timer_pending(&sp->sk_timer)) {
1477 timer_active = 2;
1478 timer_expires = sp->sk_timer.expires;
1479 } else {
1480 timer_active = 0;
1481 timer_expires = jiffies;
1484 seq_printf(seq,
1485 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1486 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %p %u %u %u %u %d\n",
1488 src->s6_addr32[0], src->s6_addr32[1],
1489 src->s6_addr32[2], src->s6_addr32[3], srcp,
1490 dest->s6_addr32[0], dest->s6_addr32[1],
1491 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1492 sp->sk_state,
1493 tp->write_seq-tp->snd_una,
1494 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
1495 timer_active,
1496 jiffies_to_clock_t(timer_expires - jiffies),
1497 icsk->icsk_retransmits,
1498 sock_i_uid(sp),
1499 icsk->icsk_probes_out,
1500 sock_i_ino(sp),
1501 atomic_read(&sp->sk_refcnt), sp,
1502 icsk->icsk_rto,
1503 icsk->icsk_ack.ato,
1504 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
1505 tp->snd_cwnd, tp->snd_ssthresh>=0xFFFF?-1:tp->snd_ssthresh
1509 static void get_timewait6_sock(struct seq_file *seq,
1510 struct inet_timewait_sock *tw, int i)
1512 struct in6_addr *dest, *src;
1513 __u16 destp, srcp;
1514 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
1515 int ttd = tw->tw_ttd - jiffies;
1517 if (ttd < 0)
1518 ttd = 0;
1520 dest = &tw6->tw_v6_daddr;
1521 src = &tw6->tw_v6_rcv_saddr;
1522 destp = ntohs(tw->tw_dport);
1523 srcp = ntohs(tw->tw_sport);
1525 seq_printf(seq,
1526 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1527 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %p\n",
1529 src->s6_addr32[0], src->s6_addr32[1],
1530 src->s6_addr32[2], src->s6_addr32[3], srcp,
1531 dest->s6_addr32[0], dest->s6_addr32[1],
1532 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1533 tw->tw_substate, 0, 0,
1534 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
1535 atomic_read(&tw->tw_refcnt), tw);
1538 #ifdef CONFIG_PROC_FS
1539 static int tcp6_seq_show(struct seq_file *seq, void *v)
1541 struct tcp_iter_state *st;
1543 if (v == SEQ_START_TOKEN) {
1544 seq_puts(seq,
1545 " sl "
1546 "local_address "
1547 "remote_address "
1548 "st tx_queue rx_queue tr tm->when retrnsmt"
1549 " uid timeout inode\n");
1550 goto out;
1552 st = seq->private;
1554 switch (st->state) {
1555 case TCP_SEQ_STATE_LISTENING:
1556 case TCP_SEQ_STATE_ESTABLISHED:
1557 get_tcp6_sock(seq, v, st->num);
1558 break;
1559 case TCP_SEQ_STATE_OPENREQ:
1560 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
1561 break;
1562 case TCP_SEQ_STATE_TIME_WAIT:
1563 get_timewait6_sock(seq, v, st->num);
1564 break;
1566 out:
1567 return 0;
1570 static struct file_operations tcp6_seq_fops;
1571 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1572 .owner = THIS_MODULE,
1573 .name = "tcp6",
1574 .family = AF_INET6,
1575 .seq_show = tcp6_seq_show,
1576 .seq_fops = &tcp6_seq_fops,
1579 int __init tcp6_proc_init(void)
1581 return tcp_proc_register(&tcp6_seq_afinfo);
1584 void tcp6_proc_exit(void)
1586 tcp_proc_unregister(&tcp6_seq_afinfo);
1588 #endif
1590 struct proto tcpv6_prot = {
1591 .name = "TCPv6",
1592 .owner = THIS_MODULE,
1593 .close = tcp_close,
1594 .connect = tcp_v6_connect,
1595 .disconnect = tcp_disconnect,
1596 .accept = inet_csk_accept,
1597 .ioctl = tcp_ioctl,
1598 .init = tcp_v6_init_sock,
1599 .destroy = tcp_v6_destroy_sock,
1600 .shutdown = tcp_shutdown,
1601 .setsockopt = tcp_setsockopt,
1602 .getsockopt = tcp_getsockopt,
1603 .sendmsg = tcp_sendmsg,
1604 .recvmsg = tcp_recvmsg,
1605 .backlog_rcv = tcp_v6_do_rcv,
1606 .hash = tcp_v6_hash,
1607 .unhash = tcp_unhash,
1608 .get_port = tcp_v6_get_port,
1609 .enter_memory_pressure = tcp_enter_memory_pressure,
1610 .sockets_allocated = &tcp_sockets_allocated,
1611 .memory_allocated = &tcp_memory_allocated,
1612 .memory_pressure = &tcp_memory_pressure,
1613 .orphan_count = &tcp_orphan_count,
1614 .sysctl_mem = sysctl_tcp_mem,
1615 .sysctl_wmem = sysctl_tcp_wmem,
1616 .sysctl_rmem = sysctl_tcp_rmem,
1617 .max_header = MAX_TCP_HEADER,
1618 .obj_size = sizeof(struct tcp6_sock),
1619 .twsk_prot = &tcp6_timewait_sock_ops,
1620 .rsk_prot = &tcp6_request_sock_ops,
1621 #ifdef CONFIG_COMPAT
1622 .compat_setsockopt = compat_tcp_setsockopt,
1623 .compat_getsockopt = compat_tcp_getsockopt,
1624 #endif
1627 static struct inet6_protocol tcpv6_protocol = {
1628 .handler = tcp_v6_rcv,
1629 .err_handler = tcp_v6_err,
1630 .gso_send_check = tcp_v6_gso_send_check,
1631 .gso_segment = tcp_tso_segment,
1632 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
1635 static struct inet_protosw tcpv6_protosw = {
1636 .type = SOCK_STREAM,
1637 .protocol = IPPROTO_TCP,
1638 .prot = &tcpv6_prot,
1639 .ops = &inet6_stream_ops,
1640 .capability = -1,
1641 .no_check = 0,
1642 .flags = INET_PROTOSW_PERMANENT |
1643 INET_PROTOSW_ICSK,
1646 void __init tcpv6_init(void)
1648 /* register inet6 protocol */
1649 if (inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP) < 0)
1650 printk(KERN_ERR "tcpv6_init: Could not register protocol\n");
1651 inet6_register_protosw(&tcpv6_protosw);
1653 if (inet_csk_ctl_sock_create(&tcp6_socket, PF_INET6, SOCK_RAW,
1654 IPPROTO_TCP) < 0)
1655 panic("Failed to create the TCPv6 control socket.\n");