ARM: 7409/1: Do not call flush_cache_user_range with mmap_sem held
[linux/fpc-iii.git] / net / ipv6 / tcp_ipv6.c
blob848f9634bbdfe968701540db7edd2f6ca7851258
1 /*
2 * TCP over IPv6
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on:
9 * linux/net/ipv4/tcp.c
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
13 * Fixes:
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
34 #include <linux/in.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
47 #include <net/tcp.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
51 #include <net/ipv6.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
58 #include <net/xfrm.h>
59 #include <net/snmp.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
66 #include <asm/uaccess.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
71 #include <linux/crypto.h>
72 #include <linux/scatterlist.h>
74 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb);
75 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
76 struct request_sock *req);
78 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
79 static void __tcp_v6_send_check(struct sk_buff *skb,
80 const struct in6_addr *saddr,
81 const struct in6_addr *daddr);
83 static const struct inet_connection_sock_af_ops ipv6_mapped;
84 static const struct inet_connection_sock_af_ops ipv6_specific;
85 #ifdef CONFIG_TCP_MD5SIG
86 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
87 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
88 #else
89 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
90 const struct in6_addr *addr)
92 return NULL;
94 #endif
96 static void tcp_v6_hash(struct sock *sk)
98 if (sk->sk_state != TCP_CLOSE) {
99 if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) {
100 tcp_prot.hash(sk);
101 return;
103 local_bh_disable();
104 __inet6_hash(sk, NULL);
105 local_bh_enable();
109 static __inline__ __sum16 tcp_v6_check(int len,
110 const struct in6_addr *saddr,
111 const struct in6_addr *daddr,
112 __wsum base)
114 return csum_ipv6_magic(saddr, daddr, len, IPPROTO_TCP, base);
117 static __u32 tcp_v6_init_sequence(struct sk_buff *skb)
119 return secure_tcpv6_sequence_number(ipv6_hdr(skb)->daddr.s6_addr32,
120 ipv6_hdr(skb)->saddr.s6_addr32,
121 tcp_hdr(skb)->dest,
122 tcp_hdr(skb)->source);
125 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
126 int addr_len)
128 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
129 struct inet_sock *inet = inet_sk(sk);
130 struct inet_connection_sock *icsk = inet_csk(sk);
131 struct ipv6_pinfo *np = inet6_sk(sk);
132 struct tcp_sock *tp = tcp_sk(sk);
133 struct in6_addr *saddr = NULL, *final_p, final;
134 struct rt6_info *rt;
135 struct flowi6 fl6;
136 struct dst_entry *dst;
137 int addr_type;
138 int err;
140 if (addr_len < SIN6_LEN_RFC2133)
141 return -EINVAL;
143 if (usin->sin6_family != AF_INET6)
144 return -EAFNOSUPPORT;
146 memset(&fl6, 0, sizeof(fl6));
148 if (np->sndflow) {
149 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
150 IP6_ECN_flow_init(fl6.flowlabel);
151 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
152 struct ip6_flowlabel *flowlabel;
153 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
154 if (flowlabel == NULL)
155 return -EINVAL;
156 ipv6_addr_copy(&usin->sin6_addr, &flowlabel->dst);
157 fl6_sock_release(flowlabel);
162 * connect() to INADDR_ANY means loopback (BSD'ism).
165 if(ipv6_addr_any(&usin->sin6_addr))
166 usin->sin6_addr.s6_addr[15] = 0x1;
168 addr_type = ipv6_addr_type(&usin->sin6_addr);
170 if(addr_type & IPV6_ADDR_MULTICAST)
171 return -ENETUNREACH;
173 if (addr_type&IPV6_ADDR_LINKLOCAL) {
174 if (addr_len >= sizeof(struct sockaddr_in6) &&
175 usin->sin6_scope_id) {
176 /* If interface is set while binding, indices
177 * must coincide.
179 if (sk->sk_bound_dev_if &&
180 sk->sk_bound_dev_if != usin->sin6_scope_id)
181 return -EINVAL;
183 sk->sk_bound_dev_if = usin->sin6_scope_id;
186 /* Connect to link-local address requires an interface */
187 if (!sk->sk_bound_dev_if)
188 return -EINVAL;
191 if (tp->rx_opt.ts_recent_stamp &&
192 !ipv6_addr_equal(&np->daddr, &usin->sin6_addr)) {
193 tp->rx_opt.ts_recent = 0;
194 tp->rx_opt.ts_recent_stamp = 0;
195 tp->write_seq = 0;
198 ipv6_addr_copy(&np->daddr, &usin->sin6_addr);
199 np->flow_label = fl6.flowlabel;
202 * TCP over IPv4
205 if (addr_type == IPV6_ADDR_MAPPED) {
206 u32 exthdrlen = icsk->icsk_ext_hdr_len;
207 struct sockaddr_in sin;
209 SOCK_DEBUG(sk, "connect: ipv4 mapped\n");
211 if (__ipv6_only_sock(sk))
212 return -ENETUNREACH;
214 sin.sin_family = AF_INET;
215 sin.sin_port = usin->sin6_port;
216 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
218 icsk->icsk_af_ops = &ipv6_mapped;
219 sk->sk_backlog_rcv = tcp_v4_do_rcv;
220 #ifdef CONFIG_TCP_MD5SIG
221 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
222 #endif
224 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
226 if (err) {
227 icsk->icsk_ext_hdr_len = exthdrlen;
228 icsk->icsk_af_ops = &ipv6_specific;
229 sk->sk_backlog_rcv = tcp_v6_do_rcv;
230 #ifdef CONFIG_TCP_MD5SIG
231 tp->af_specific = &tcp_sock_ipv6_specific;
232 #endif
233 goto failure;
234 } else {
235 ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr);
236 ipv6_addr_set_v4mapped(inet->inet_rcv_saddr,
237 &np->rcv_saddr);
240 return err;
243 if (!ipv6_addr_any(&np->rcv_saddr))
244 saddr = &np->rcv_saddr;
246 fl6.flowi6_proto = IPPROTO_TCP;
247 ipv6_addr_copy(&fl6.daddr, &np->daddr);
248 ipv6_addr_copy(&fl6.saddr,
249 (saddr ? saddr : &np->saddr));
250 fl6.flowi6_oif = sk->sk_bound_dev_if;
251 fl6.flowi6_mark = sk->sk_mark;
252 fl6.fl6_dport = usin->sin6_port;
253 fl6.fl6_sport = inet->inet_sport;
255 final_p = fl6_update_dst(&fl6, np->opt, &final);
257 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
259 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
260 if (IS_ERR(dst)) {
261 err = PTR_ERR(dst);
262 goto failure;
265 if (saddr == NULL) {
266 saddr = &fl6.saddr;
267 ipv6_addr_copy(&np->rcv_saddr, saddr);
270 /* set the source address */
271 ipv6_addr_copy(&np->saddr, saddr);
272 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
274 sk->sk_gso_type = SKB_GSO_TCPV6;
275 __ip6_dst_store(sk, dst, NULL, NULL);
277 rt = (struct rt6_info *) dst;
278 if (tcp_death_row.sysctl_tw_recycle &&
279 !tp->rx_opt.ts_recent_stamp &&
280 ipv6_addr_equal(&rt->rt6i_dst.addr, &np->daddr)) {
281 struct inet_peer *peer = rt6_get_peer(rt);
283 * VJ's idea. We save last timestamp seen from
284 * the destination in peer table, when entering state
285 * TIME-WAIT * and initialize rx_opt.ts_recent from it,
286 * when trying new connection.
288 if (peer) {
289 inet_peer_refcheck(peer);
290 if ((u32)get_seconds() - peer->tcp_ts_stamp <= TCP_PAWS_MSL) {
291 tp->rx_opt.ts_recent_stamp = peer->tcp_ts_stamp;
292 tp->rx_opt.ts_recent = peer->tcp_ts;
297 icsk->icsk_ext_hdr_len = 0;
298 if (np->opt)
299 icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
300 np->opt->opt_nflen);
302 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
304 inet->inet_dport = usin->sin6_port;
306 tcp_set_state(sk, TCP_SYN_SENT);
307 err = inet6_hash_connect(&tcp_death_row, sk);
308 if (err)
309 goto late_failure;
311 if (!tp->write_seq)
312 tp->write_seq = secure_tcpv6_sequence_number(np->saddr.s6_addr32,
313 np->daddr.s6_addr32,
314 inet->inet_sport,
315 inet->inet_dport);
317 err = tcp_connect(sk);
318 if (err)
319 goto late_failure;
321 return 0;
323 late_failure:
324 tcp_set_state(sk, TCP_CLOSE);
325 __sk_dst_reset(sk);
326 failure:
327 inet->inet_dport = 0;
328 sk->sk_route_caps = 0;
329 return err;
332 static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
333 u8 type, u8 code, int offset, __be32 info)
335 const struct ipv6hdr *hdr = (const struct ipv6hdr*)skb->data;
336 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
337 struct ipv6_pinfo *np;
338 struct sock *sk;
339 int err;
340 struct tcp_sock *tp;
341 __u32 seq;
342 struct net *net = dev_net(skb->dev);
344 sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr,
345 th->dest, &hdr->saddr, th->source, skb->dev->ifindex);
347 if (sk == NULL) {
348 ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev),
349 ICMP6_MIB_INERRORS);
350 return;
353 if (sk->sk_state == TCP_TIME_WAIT) {
354 inet_twsk_put(inet_twsk(sk));
355 return;
358 bh_lock_sock(sk);
359 if (sock_owned_by_user(sk))
360 NET_INC_STATS_BH(net, LINUX_MIB_LOCKDROPPEDICMPS);
362 if (sk->sk_state == TCP_CLOSE)
363 goto out;
365 if (ipv6_hdr(skb)->hop_limit < inet6_sk(sk)->min_hopcount) {
366 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
367 goto out;
370 tp = tcp_sk(sk);
371 seq = ntohl(th->seq);
372 if (sk->sk_state != TCP_LISTEN &&
373 !between(seq, tp->snd_una, tp->snd_nxt)) {
374 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
375 goto out;
378 np = inet6_sk(sk);
380 if (type == ICMPV6_PKT_TOOBIG) {
381 struct dst_entry *dst;
383 if (sock_owned_by_user(sk))
384 goto out;
385 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
386 goto out;
388 /* icmp should have updated the destination cache entry */
389 dst = __sk_dst_check(sk, np->dst_cookie);
391 if (dst == NULL) {
392 struct inet_sock *inet = inet_sk(sk);
393 struct flowi6 fl6;
395 /* BUGGG_FUTURE: Again, it is not clear how
396 to handle rthdr case. Ignore this complexity
397 for now.
399 memset(&fl6, 0, sizeof(fl6));
400 fl6.flowi6_proto = IPPROTO_TCP;
401 ipv6_addr_copy(&fl6.daddr, &np->daddr);
402 ipv6_addr_copy(&fl6.saddr, &np->saddr);
403 fl6.flowi6_oif = sk->sk_bound_dev_if;
404 fl6.flowi6_mark = sk->sk_mark;
405 fl6.fl6_dport = inet->inet_dport;
406 fl6.fl6_sport = inet->inet_sport;
407 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
409 dst = ip6_dst_lookup_flow(sk, &fl6, NULL, false);
410 if (IS_ERR(dst)) {
411 sk->sk_err_soft = -PTR_ERR(dst);
412 goto out;
415 } else
416 dst_hold(dst);
418 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
419 tcp_sync_mss(sk, dst_mtu(dst));
420 tcp_simple_retransmit(sk);
421 } /* else let the usual retransmit timer handle it */
422 dst_release(dst);
423 goto out;
426 icmpv6_err_convert(type, code, &err);
428 /* Might be for an request_sock */
429 switch (sk->sk_state) {
430 struct request_sock *req, **prev;
431 case TCP_LISTEN:
432 if (sock_owned_by_user(sk))
433 goto out;
435 req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr,
436 &hdr->saddr, inet6_iif(skb));
437 if (!req)
438 goto out;
440 /* ICMPs are not backlogged, hence we cannot get
441 * an established socket here.
443 WARN_ON(req->sk != NULL);
445 if (seq != tcp_rsk(req)->snt_isn) {
446 NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS);
447 goto out;
450 inet_csk_reqsk_queue_drop(sk, req, prev);
451 goto out;
453 case TCP_SYN_SENT:
454 case TCP_SYN_RECV: /* Cannot happen.
455 It can, it SYNs are crossed. --ANK */
456 if (!sock_owned_by_user(sk)) {
457 sk->sk_err = err;
458 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
460 tcp_done(sk);
461 } else
462 sk->sk_err_soft = err;
463 goto out;
466 if (!sock_owned_by_user(sk) && np->recverr) {
467 sk->sk_err = err;
468 sk->sk_error_report(sk);
469 } else
470 sk->sk_err_soft = err;
472 out:
473 bh_unlock_sock(sk);
474 sock_put(sk);
478 static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req,
479 struct request_values *rvp)
481 struct inet6_request_sock *treq = inet6_rsk(req);
482 struct ipv6_pinfo *np = inet6_sk(sk);
483 struct sk_buff * skb;
484 struct ipv6_txoptions *opt = NULL;
485 struct in6_addr * final_p, final;
486 struct flowi6 fl6;
487 struct dst_entry *dst;
488 int err;
490 memset(&fl6, 0, sizeof(fl6));
491 fl6.flowi6_proto = IPPROTO_TCP;
492 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
493 ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
494 fl6.flowlabel = 0;
495 fl6.flowi6_oif = treq->iif;
496 fl6.flowi6_mark = sk->sk_mark;
497 fl6.fl6_dport = inet_rsk(req)->rmt_port;
498 fl6.fl6_sport = inet_rsk(req)->loc_port;
499 security_req_classify_flow(req, flowi6_to_flowi(&fl6));
501 opt = np->opt;
502 final_p = fl6_update_dst(&fl6, opt, &final);
504 dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
505 if (IS_ERR(dst)) {
506 err = PTR_ERR(dst);
507 dst = NULL;
508 goto done;
510 skb = tcp_make_synack(sk, dst, req, rvp);
511 err = -ENOMEM;
512 if (skb) {
513 __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);
515 ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
516 err = ip6_xmit(sk, skb, &fl6, opt);
517 err = net_xmit_eval(err);
520 done:
521 if (opt && opt != np->opt)
522 sock_kfree_s(sk, opt, opt->tot_len);
523 dst_release(dst);
524 return err;
527 static int tcp_v6_rtx_synack(struct sock *sk, struct request_sock *req,
528 struct request_values *rvp)
530 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_RETRANSSEGS);
531 return tcp_v6_send_synack(sk, req, rvp);
534 static inline void syn_flood_warning(struct sk_buff *skb)
536 #ifdef CONFIG_SYN_COOKIES
537 if (sysctl_tcp_syncookies)
538 printk(KERN_INFO
539 "TCPv6: Possible SYN flooding on port %d. "
540 "Sending cookies.\n", ntohs(tcp_hdr(skb)->dest));
541 else
542 #endif
543 printk(KERN_INFO
544 "TCPv6: Possible SYN flooding on port %d. "
545 "Dropping request.\n", ntohs(tcp_hdr(skb)->dest));
548 static void tcp_v6_reqsk_destructor(struct request_sock *req)
550 kfree_skb(inet6_rsk(req)->pktopts);
553 #ifdef CONFIG_TCP_MD5SIG
554 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk,
555 const struct in6_addr *addr)
557 struct tcp_sock *tp = tcp_sk(sk);
558 int i;
560 BUG_ON(tp == NULL);
562 if (!tp->md5sig_info || !tp->md5sig_info->entries6)
563 return NULL;
565 for (i = 0; i < tp->md5sig_info->entries6; i++) {
566 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, addr))
567 return &tp->md5sig_info->keys6[i].base;
569 return NULL;
572 static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk,
573 struct sock *addr_sk)
575 return tcp_v6_md5_do_lookup(sk, &inet6_sk(addr_sk)->daddr);
578 static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk,
579 struct request_sock *req)
581 return tcp_v6_md5_do_lookup(sk, &inet6_rsk(req)->rmt_addr);
584 static int tcp_v6_md5_do_add(struct sock *sk, const struct in6_addr *peer,
585 char *newkey, u8 newkeylen)
587 /* Add key to the list */
588 struct tcp_md5sig_key *key;
589 struct tcp_sock *tp = tcp_sk(sk);
590 struct tcp6_md5sig_key *keys;
592 key = tcp_v6_md5_do_lookup(sk, peer);
593 if (key) {
594 /* modify existing entry - just update that one */
595 kfree(key->key);
596 key->key = newkey;
597 key->keylen = newkeylen;
598 } else {
599 /* reallocate new list if current one is full. */
600 if (!tp->md5sig_info) {
601 tp->md5sig_info = kzalloc(sizeof(*tp->md5sig_info), GFP_ATOMIC);
602 if (!tp->md5sig_info) {
603 kfree(newkey);
604 return -ENOMEM;
606 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
608 if (tp->md5sig_info->entries6 == 0 &&
609 tcp_alloc_md5sig_pool(sk) == NULL) {
610 kfree(newkey);
611 return -ENOMEM;
613 if (tp->md5sig_info->alloced6 == tp->md5sig_info->entries6) {
614 keys = kmalloc((sizeof (tp->md5sig_info->keys6[0]) *
615 (tp->md5sig_info->entries6 + 1)), GFP_ATOMIC);
617 if (!keys) {
618 kfree(newkey);
619 if (tp->md5sig_info->entries6 == 0)
620 tcp_free_md5sig_pool();
621 return -ENOMEM;
624 if (tp->md5sig_info->entries6)
625 memmove(keys, tp->md5sig_info->keys6,
626 (sizeof (tp->md5sig_info->keys6[0]) *
627 tp->md5sig_info->entries6));
629 kfree(tp->md5sig_info->keys6);
630 tp->md5sig_info->keys6 = keys;
631 tp->md5sig_info->alloced6++;
634 ipv6_addr_copy(&tp->md5sig_info->keys6[tp->md5sig_info->entries6].addr,
635 peer);
636 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.key = newkey;
637 tp->md5sig_info->keys6[tp->md5sig_info->entries6].base.keylen = newkeylen;
639 tp->md5sig_info->entries6++;
641 return 0;
644 static int tcp_v6_md5_add_func(struct sock *sk, struct sock *addr_sk,
645 u8 *newkey, __u8 newkeylen)
647 return tcp_v6_md5_do_add(sk, &inet6_sk(addr_sk)->daddr,
648 newkey, newkeylen);
651 static int tcp_v6_md5_do_del(struct sock *sk, const struct in6_addr *peer)
653 struct tcp_sock *tp = tcp_sk(sk);
654 int i;
656 for (i = 0; i < tp->md5sig_info->entries6; i++) {
657 if (ipv6_addr_equal(&tp->md5sig_info->keys6[i].addr, peer)) {
658 /* Free the key */
659 kfree(tp->md5sig_info->keys6[i].base.key);
660 tp->md5sig_info->entries6--;
662 if (tp->md5sig_info->entries6 == 0) {
663 kfree(tp->md5sig_info->keys6);
664 tp->md5sig_info->keys6 = NULL;
665 tp->md5sig_info->alloced6 = 0;
666 tcp_free_md5sig_pool();
667 } else {
668 /* shrink the database */
669 if (tp->md5sig_info->entries6 != i)
670 memmove(&tp->md5sig_info->keys6[i],
671 &tp->md5sig_info->keys6[i+1],
672 (tp->md5sig_info->entries6 - i)
673 * sizeof (tp->md5sig_info->keys6[0]));
675 return 0;
678 return -ENOENT;
681 static void tcp_v6_clear_md5_list (struct sock *sk)
683 struct tcp_sock *tp = tcp_sk(sk);
684 int i;
686 if (tp->md5sig_info->entries6) {
687 for (i = 0; i < tp->md5sig_info->entries6; i++)
688 kfree(tp->md5sig_info->keys6[i].base.key);
689 tp->md5sig_info->entries6 = 0;
690 tcp_free_md5sig_pool();
693 kfree(tp->md5sig_info->keys6);
694 tp->md5sig_info->keys6 = NULL;
695 tp->md5sig_info->alloced6 = 0;
697 if (tp->md5sig_info->entries4) {
698 for (i = 0; i < tp->md5sig_info->entries4; i++)
699 kfree(tp->md5sig_info->keys4[i].base.key);
700 tp->md5sig_info->entries4 = 0;
701 tcp_free_md5sig_pool();
704 kfree(tp->md5sig_info->keys4);
705 tp->md5sig_info->keys4 = NULL;
706 tp->md5sig_info->alloced4 = 0;
709 static int tcp_v6_parse_md5_keys (struct sock *sk, char __user *optval,
710 int optlen)
712 struct tcp_md5sig cmd;
713 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
714 u8 *newkey;
716 if (optlen < sizeof(cmd))
717 return -EINVAL;
719 if (copy_from_user(&cmd, optval, sizeof(cmd)))
720 return -EFAULT;
722 if (sin6->sin6_family != AF_INET6)
723 return -EINVAL;
725 if (!cmd.tcpm_keylen) {
726 if (!tcp_sk(sk)->md5sig_info)
727 return -ENOENT;
728 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
729 return tcp_v4_md5_do_del(sk, sin6->sin6_addr.s6_addr32[3]);
730 return tcp_v6_md5_do_del(sk, &sin6->sin6_addr);
733 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
734 return -EINVAL;
736 if (!tcp_sk(sk)->md5sig_info) {
737 struct tcp_sock *tp = tcp_sk(sk);
738 struct tcp_md5sig_info *p;
740 p = kzalloc(sizeof(struct tcp_md5sig_info), GFP_KERNEL);
741 if (!p)
742 return -ENOMEM;
744 tp->md5sig_info = p;
745 sk_nocaps_add(sk, NETIF_F_GSO_MASK);
748 newkey = kmemdup(cmd.tcpm_key, cmd.tcpm_keylen, GFP_KERNEL);
749 if (!newkey)
750 return -ENOMEM;
751 if (ipv6_addr_v4mapped(&sin6->sin6_addr)) {
752 return tcp_v4_md5_do_add(sk, sin6->sin6_addr.s6_addr32[3],
753 newkey, cmd.tcpm_keylen);
755 return tcp_v6_md5_do_add(sk, &sin6->sin6_addr, newkey, cmd.tcpm_keylen);
758 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool *hp,
759 const struct in6_addr *daddr,
760 const struct in6_addr *saddr, int nbytes)
762 struct tcp6_pseudohdr *bp;
763 struct scatterlist sg;
765 bp = &hp->md5_blk.ip6;
766 /* 1. TCP pseudo-header (RFC2460) */
767 ipv6_addr_copy(&bp->saddr, saddr);
768 ipv6_addr_copy(&bp->daddr, daddr);
769 bp->protocol = cpu_to_be32(IPPROTO_TCP);
770 bp->len = cpu_to_be32(nbytes);
772 sg_init_one(&sg, bp, sizeof(*bp));
773 return crypto_hash_update(&hp->md5_desc, &sg, sizeof(*bp));
776 static int tcp_v6_md5_hash_hdr(char *md5_hash, struct tcp_md5sig_key *key,
777 const struct in6_addr *daddr, struct in6_addr *saddr,
778 struct tcphdr *th)
780 struct tcp_md5sig_pool *hp;
781 struct hash_desc *desc;
783 hp = tcp_get_md5sig_pool();
784 if (!hp)
785 goto clear_hash_noput;
786 desc = &hp->md5_desc;
788 if (crypto_hash_init(desc))
789 goto clear_hash;
790 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, th->doff << 2))
791 goto clear_hash;
792 if (tcp_md5_hash_header(hp, th))
793 goto clear_hash;
794 if (tcp_md5_hash_key(hp, key))
795 goto clear_hash;
796 if (crypto_hash_final(desc, md5_hash))
797 goto clear_hash;
799 tcp_put_md5sig_pool();
800 return 0;
802 clear_hash:
803 tcp_put_md5sig_pool();
804 clear_hash_noput:
805 memset(md5_hash, 0, 16);
806 return 1;
809 static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key,
810 struct sock *sk, struct request_sock *req,
811 struct sk_buff *skb)
813 const struct in6_addr *saddr, *daddr;
814 struct tcp_md5sig_pool *hp;
815 struct hash_desc *desc;
816 struct tcphdr *th = tcp_hdr(skb);
818 if (sk) {
819 saddr = &inet6_sk(sk)->saddr;
820 daddr = &inet6_sk(sk)->daddr;
821 } else if (req) {
822 saddr = &inet6_rsk(req)->loc_addr;
823 daddr = &inet6_rsk(req)->rmt_addr;
824 } else {
825 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
826 saddr = &ip6h->saddr;
827 daddr = &ip6h->daddr;
830 hp = tcp_get_md5sig_pool();
831 if (!hp)
832 goto clear_hash_noput;
833 desc = &hp->md5_desc;
835 if (crypto_hash_init(desc))
836 goto clear_hash;
838 if (tcp_v6_md5_hash_pseudoheader(hp, daddr, saddr, skb->len))
839 goto clear_hash;
840 if (tcp_md5_hash_header(hp, th))
841 goto clear_hash;
842 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
843 goto clear_hash;
844 if (tcp_md5_hash_key(hp, key))
845 goto clear_hash;
846 if (crypto_hash_final(desc, md5_hash))
847 goto clear_hash;
849 tcp_put_md5sig_pool();
850 return 0;
852 clear_hash:
853 tcp_put_md5sig_pool();
854 clear_hash_noput:
855 memset(md5_hash, 0, 16);
856 return 1;
859 static int tcp_v6_inbound_md5_hash (struct sock *sk, struct sk_buff *skb)
861 __u8 *hash_location = NULL;
862 struct tcp_md5sig_key *hash_expected;
863 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
864 struct tcphdr *th = tcp_hdr(skb);
865 int genhash;
866 u8 newhash[16];
868 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
869 hash_location = tcp_parse_md5sig_option(th);
871 /* We've parsed the options - do we have a hash? */
872 if (!hash_expected && !hash_location)
873 return 0;
875 if (hash_expected && !hash_location) {
876 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
877 return 1;
880 if (!hash_expected && hash_location) {
881 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
882 return 1;
885 /* check the signature */
886 genhash = tcp_v6_md5_hash_skb(newhash,
887 hash_expected,
888 NULL, NULL, skb);
890 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
891 if (net_ratelimit()) {
892 printk(KERN_INFO "MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
893 genhash ? "failed" : "mismatch",
894 &ip6h->saddr, ntohs(th->source),
895 &ip6h->daddr, ntohs(th->dest));
897 return 1;
899 return 0;
901 #endif
903 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
904 .family = AF_INET6,
905 .obj_size = sizeof(struct tcp6_request_sock),
906 .rtx_syn_ack = tcp_v6_rtx_synack,
907 .send_ack = tcp_v6_reqsk_send_ack,
908 .destructor = tcp_v6_reqsk_destructor,
909 .send_reset = tcp_v6_send_reset,
910 .syn_ack_timeout = tcp_syn_ack_timeout,
913 #ifdef CONFIG_TCP_MD5SIG
914 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
915 .md5_lookup = tcp_v6_reqsk_md5_lookup,
916 .calc_md5_hash = tcp_v6_md5_hash_skb,
918 #endif
920 static void __tcp_v6_send_check(struct sk_buff *skb,
921 const struct in6_addr *saddr, const struct in6_addr *daddr)
923 struct tcphdr *th = tcp_hdr(skb);
925 if (skb->ip_summed == CHECKSUM_PARTIAL) {
926 th->check = ~tcp_v6_check(skb->len, saddr, daddr, 0);
927 skb->csum_start = skb_transport_header(skb) - skb->head;
928 skb->csum_offset = offsetof(struct tcphdr, check);
929 } else {
930 th->check = tcp_v6_check(skb->len, saddr, daddr,
931 csum_partial(th, th->doff << 2,
932 skb->csum));
936 static void tcp_v6_send_check(struct sock *sk, struct sk_buff *skb)
938 struct ipv6_pinfo *np = inet6_sk(sk);
940 __tcp_v6_send_check(skb, &np->saddr, &np->daddr);
943 static int tcp_v6_gso_send_check(struct sk_buff *skb)
945 const struct ipv6hdr *ipv6h;
946 struct tcphdr *th;
948 if (!pskb_may_pull(skb, sizeof(*th)))
949 return -EINVAL;
951 ipv6h = ipv6_hdr(skb);
952 th = tcp_hdr(skb);
954 th->check = 0;
955 skb->ip_summed = CHECKSUM_PARTIAL;
956 __tcp_v6_send_check(skb, &ipv6h->saddr, &ipv6h->daddr);
957 return 0;
960 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
961 struct sk_buff *skb)
963 const struct ipv6hdr *iph = skb_gro_network_header(skb);
965 switch (skb->ip_summed) {
966 case CHECKSUM_COMPLETE:
967 if (!tcp_v6_check(skb_gro_len(skb), &iph->saddr, &iph->daddr,
968 skb->csum)) {
969 skb->ip_summed = CHECKSUM_UNNECESSARY;
970 break;
973 /* fall through */
974 case CHECKSUM_NONE:
975 NAPI_GRO_CB(skb)->flush = 1;
976 return NULL;
979 return tcp_gro_receive(head, skb);
982 static int tcp6_gro_complete(struct sk_buff *skb)
984 const struct ipv6hdr *iph = ipv6_hdr(skb);
985 struct tcphdr *th = tcp_hdr(skb);
987 th->check = ~tcp_v6_check(skb->len - skb_transport_offset(skb),
988 &iph->saddr, &iph->daddr, 0);
989 skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6;
991 return tcp_gro_complete(skb);
994 static void tcp_v6_send_response(struct sk_buff *skb, u32 seq, u32 ack, u32 win,
995 u32 ts, struct tcp_md5sig_key *key, int rst)
997 struct tcphdr *th = tcp_hdr(skb), *t1;
998 struct sk_buff *buff;
999 struct flowi6 fl6;
1000 struct net *net = dev_net(skb_dst(skb)->dev);
1001 struct sock *ctl_sk = net->ipv6.tcp_sk;
1002 unsigned int tot_len = sizeof(struct tcphdr);
1003 struct dst_entry *dst;
1004 __be32 *topt;
1006 if (ts)
1007 tot_len += TCPOLEN_TSTAMP_ALIGNED;
1008 #ifdef CONFIG_TCP_MD5SIG
1009 if (key)
1010 tot_len += TCPOLEN_MD5SIG_ALIGNED;
1011 #endif
1013 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
1014 GFP_ATOMIC);
1015 if (buff == NULL)
1016 return;
1018 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
1020 t1 = (struct tcphdr *) skb_push(buff, tot_len);
1021 skb_reset_transport_header(buff);
1023 /* Swap the send and the receive. */
1024 memset(t1, 0, sizeof(*t1));
1025 t1->dest = th->source;
1026 t1->source = th->dest;
1027 t1->doff = tot_len / 4;
1028 t1->seq = htonl(seq);
1029 t1->ack_seq = htonl(ack);
1030 t1->ack = !rst || !th->ack;
1031 t1->rst = rst;
1032 t1->window = htons(win);
1034 topt = (__be32 *)(t1 + 1);
1036 if (ts) {
1037 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1038 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
1039 *topt++ = htonl(tcp_time_stamp);
1040 *topt++ = htonl(ts);
1043 #ifdef CONFIG_TCP_MD5SIG
1044 if (key) {
1045 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
1046 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
1047 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
1048 &ipv6_hdr(skb)->saddr,
1049 &ipv6_hdr(skb)->daddr, t1);
1051 #endif
1053 memset(&fl6, 0, sizeof(fl6));
1054 ipv6_addr_copy(&fl6.daddr, &ipv6_hdr(skb)->saddr);
1055 ipv6_addr_copy(&fl6.saddr, &ipv6_hdr(skb)->daddr);
1057 buff->ip_summed = CHECKSUM_PARTIAL;
1058 buff->csum = 0;
1060 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
1062 fl6.flowi6_proto = IPPROTO_TCP;
1063 fl6.flowi6_oif = inet6_iif(skb);
1064 fl6.fl6_dport = t1->dest;
1065 fl6.fl6_sport = t1->source;
1066 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
1068 /* Pass a socket to ip6_dst_lookup either it is for RST
1069 * Underlying function will use this to retrieve the network
1070 * namespace
1072 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL, false);
1073 if (!IS_ERR(dst)) {
1074 skb_dst_set(buff, dst);
1075 ip6_xmit(ctl_sk, buff, &fl6, NULL);
1076 TCP_INC_STATS_BH(net, TCP_MIB_OUTSEGS);
1077 if (rst)
1078 TCP_INC_STATS_BH(net, TCP_MIB_OUTRSTS);
1079 return;
1082 kfree_skb(buff);
1085 static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb)
1087 struct tcphdr *th = tcp_hdr(skb);
1088 u32 seq = 0, ack_seq = 0;
1089 struct tcp_md5sig_key *key = NULL;
1091 if (th->rst)
1092 return;
1094 if (!ipv6_unicast_destination(skb))
1095 return;
1097 #ifdef CONFIG_TCP_MD5SIG
1098 if (sk)
1099 key = tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr);
1100 #endif
1102 if (th->ack)
1103 seq = ntohl(th->ack_seq);
1104 else
1105 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
1106 (th->doff << 2);
1108 tcp_v6_send_response(skb, seq, ack_seq, 0, 0, key, 1);
1111 static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 ts,
1112 struct tcp_md5sig_key *key)
1114 tcp_v6_send_response(skb, seq, ack, win, ts, key, 0);
1117 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
1119 struct inet_timewait_sock *tw = inet_twsk(sk);
1120 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1122 tcp_v6_send_ack(skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1123 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1124 tcptw->tw_ts_recent, tcp_twsk_md5_key(tcptw));
1126 inet_twsk_put(tw);
1129 static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb,
1130 struct request_sock *req)
1132 tcp_v6_send_ack(skb, tcp_rsk(req)->snt_isn + 1, tcp_rsk(req)->rcv_isn + 1, req->rcv_wnd, req->ts_recent,
1133 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->daddr));
1137 static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb)
1139 struct request_sock *req, **prev;
1140 const struct tcphdr *th = tcp_hdr(skb);
1141 struct sock *nsk;
1143 /* Find possible connection requests. */
1144 req = inet6_csk_search_req(sk, &prev, th->source,
1145 &ipv6_hdr(skb)->saddr,
1146 &ipv6_hdr(skb)->daddr, inet6_iif(skb));
1147 if (req)
1148 return tcp_check_req(sk, skb, req, prev);
1150 nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo,
1151 &ipv6_hdr(skb)->saddr, th->source,
1152 &ipv6_hdr(skb)->daddr, ntohs(th->dest), inet6_iif(skb));
1154 if (nsk) {
1155 if (nsk->sk_state != TCP_TIME_WAIT) {
1156 bh_lock_sock(nsk);
1157 return nsk;
1159 inet_twsk_put(inet_twsk(nsk));
1160 return NULL;
1163 #ifdef CONFIG_SYN_COOKIES
1164 if (!th->syn)
1165 sk = cookie_v6_check(sk, skb);
1166 #endif
1167 return sk;
1170 /* FIXME: this is substantially similar to the ipv4 code.
1171 * Can some kind of merge be done? -- erics
1173 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1175 struct tcp_extend_values tmp_ext;
1176 struct tcp_options_received tmp_opt;
1177 u8 *hash_location;
1178 struct request_sock *req;
1179 struct inet6_request_sock *treq;
1180 struct ipv6_pinfo *np = inet6_sk(sk);
1181 struct tcp_sock *tp = tcp_sk(sk);
1182 __u32 isn = TCP_SKB_CB(skb)->when;
1183 struct dst_entry *dst = NULL;
1184 #ifdef CONFIG_SYN_COOKIES
1185 int want_cookie = 0;
1186 #else
1187 #define want_cookie 0
1188 #endif
1190 if (skb->protocol == htons(ETH_P_IP))
1191 return tcp_v4_conn_request(sk, skb);
1193 if (!ipv6_unicast_destination(skb))
1194 goto drop;
1196 if (inet_csk_reqsk_queue_is_full(sk) && !isn) {
1197 if (net_ratelimit())
1198 syn_flood_warning(skb);
1199 #ifdef CONFIG_SYN_COOKIES
1200 if (sysctl_tcp_syncookies)
1201 want_cookie = 1;
1202 else
1203 #endif
1204 goto drop;
1207 if (sk_acceptq_is_full(sk) && inet_csk_reqsk_queue_young(sk) > 1)
1208 goto drop;
1210 req = inet6_reqsk_alloc(&tcp6_request_sock_ops);
1211 if (req == NULL)
1212 goto drop;
1214 #ifdef CONFIG_TCP_MD5SIG
1215 tcp_rsk(req)->af_specific = &tcp_request_sock_ipv6_ops;
1216 #endif
1218 tcp_clear_options(&tmp_opt);
1219 tmp_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
1220 tmp_opt.user_mss = tp->rx_opt.user_mss;
1221 tcp_parse_options(skb, &tmp_opt, &hash_location, 0);
1223 if (tmp_opt.cookie_plus > 0 &&
1224 tmp_opt.saw_tstamp &&
1225 !tp->rx_opt.cookie_out_never &&
1226 (sysctl_tcp_cookie_size > 0 ||
1227 (tp->cookie_values != NULL &&
1228 tp->cookie_values->cookie_desired > 0))) {
1229 u8 *c;
1230 u32 *d;
1231 u32 *mess = &tmp_ext.cookie_bakery[COOKIE_DIGEST_WORDS];
1232 int l = tmp_opt.cookie_plus - TCPOLEN_COOKIE_BASE;
1234 if (tcp_cookie_generator(&tmp_ext.cookie_bakery[0]) != 0)
1235 goto drop_and_free;
1237 /* Secret recipe starts with IP addresses */
1238 d = (__force u32 *)&ipv6_hdr(skb)->daddr.s6_addr32[0];
1239 *mess++ ^= *d++;
1240 *mess++ ^= *d++;
1241 *mess++ ^= *d++;
1242 *mess++ ^= *d++;
1243 d = (__force u32 *)&ipv6_hdr(skb)->saddr.s6_addr32[0];
1244 *mess++ ^= *d++;
1245 *mess++ ^= *d++;
1246 *mess++ ^= *d++;
1247 *mess++ ^= *d++;
1249 /* plus variable length Initiator Cookie */
1250 c = (u8 *)mess;
1251 while (l-- > 0)
1252 *c++ ^= *hash_location++;
1254 #ifdef CONFIG_SYN_COOKIES
1255 want_cookie = 0; /* not our kind of cookie */
1256 #endif
1257 tmp_ext.cookie_out_never = 0; /* false */
1258 tmp_ext.cookie_plus = tmp_opt.cookie_plus;
1259 } else if (!tp->rx_opt.cookie_in_always) {
1260 /* redundant indications, but ensure initialization. */
1261 tmp_ext.cookie_out_never = 1; /* true */
1262 tmp_ext.cookie_plus = 0;
1263 } else {
1264 goto drop_and_free;
1266 tmp_ext.cookie_in_always = tp->rx_opt.cookie_in_always;
1268 if (want_cookie && !tmp_opt.saw_tstamp)
1269 tcp_clear_options(&tmp_opt);
1271 tmp_opt.tstamp_ok = tmp_opt.saw_tstamp;
1272 tcp_openreq_init(req, &tmp_opt, skb);
1274 treq = inet6_rsk(req);
1275 ipv6_addr_copy(&treq->rmt_addr, &ipv6_hdr(skb)->saddr);
1276 ipv6_addr_copy(&treq->loc_addr, &ipv6_hdr(skb)->daddr);
1277 if (!want_cookie || tmp_opt.tstamp_ok)
1278 TCP_ECN_create_request(req, tcp_hdr(skb));
1280 if (!isn) {
1281 struct inet_peer *peer = NULL;
1283 if (ipv6_opt_accepted(sk, skb) ||
1284 np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo ||
1285 np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim) {
1286 atomic_inc(&skb->users);
1287 treq->pktopts = skb;
1289 treq->iif = sk->sk_bound_dev_if;
1291 /* So that link locals have meaning */
1292 if (!sk->sk_bound_dev_if &&
1293 ipv6_addr_type(&treq->rmt_addr) & IPV6_ADDR_LINKLOCAL)
1294 treq->iif = inet6_iif(skb);
1296 if (want_cookie) {
1297 isn = cookie_v6_init_sequence(sk, skb, &req->mss);
1298 req->cookie_ts = tmp_opt.tstamp_ok;
1299 goto have_isn;
1302 /* VJ's idea. We save last timestamp seen
1303 * from the destination in peer table, when entering
1304 * state TIME-WAIT, and check against it before
1305 * accepting new connection request.
1307 * If "isn" is not zero, this request hit alive
1308 * timewait bucket, so that all the necessary checks
1309 * are made in the function processing timewait state.
1311 if (tmp_opt.saw_tstamp &&
1312 tcp_death_row.sysctl_tw_recycle &&
1313 (dst = inet6_csk_route_req(sk, req)) != NULL &&
1314 (peer = rt6_get_peer((struct rt6_info *)dst)) != NULL &&
1315 ipv6_addr_equal((struct in6_addr *)peer->daddr.addr.a6,
1316 &treq->rmt_addr)) {
1317 inet_peer_refcheck(peer);
1318 if ((u32)get_seconds() - peer->tcp_ts_stamp < TCP_PAWS_MSL &&
1319 (s32)(peer->tcp_ts - req->ts_recent) >
1320 TCP_PAWS_WINDOW) {
1321 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_PAWSPASSIVEREJECTED);
1322 goto drop_and_release;
1325 /* Kill the following clause, if you dislike this way. */
1326 else if (!sysctl_tcp_syncookies &&
1327 (sysctl_max_syn_backlog - inet_csk_reqsk_queue_len(sk) <
1328 (sysctl_max_syn_backlog >> 2)) &&
1329 (!peer || !peer->tcp_ts_stamp) &&
1330 (!dst || !dst_metric(dst, RTAX_RTT))) {
1331 /* Without syncookies last quarter of
1332 * backlog is filled with destinations,
1333 * proven to be alive.
1334 * It means that we continue to communicate
1335 * to destinations, already remembered
1336 * to the moment of synflood.
1338 LIMIT_NETDEBUG(KERN_DEBUG "TCP: drop open request from %pI6/%u\n",
1339 &treq->rmt_addr, ntohs(tcp_hdr(skb)->source));
1340 goto drop_and_release;
1343 isn = tcp_v6_init_sequence(skb);
1345 have_isn:
1346 tcp_rsk(req)->snt_isn = isn;
1348 security_inet_conn_request(sk, skb, req);
1350 if (tcp_v6_send_synack(sk, req,
1351 (struct request_values *)&tmp_ext) ||
1352 want_cookie)
1353 goto drop_and_free;
1355 inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT);
1356 return 0;
1358 drop_and_release:
1359 dst_release(dst);
1360 drop_and_free:
1361 reqsk_free(req);
1362 drop:
1363 return 0; /* don't send reset */
1366 static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb,
1367 struct request_sock *req,
1368 struct dst_entry *dst)
1370 struct inet6_request_sock *treq;
1371 struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
1372 struct tcp6_sock *newtcp6sk;
1373 struct inet_sock *newinet;
1374 struct tcp_sock *newtp;
1375 struct sock *newsk;
1376 struct ipv6_txoptions *opt;
1377 #ifdef CONFIG_TCP_MD5SIG
1378 struct tcp_md5sig_key *key;
1379 #endif
1381 if (skb->protocol == htons(ETH_P_IP)) {
1383 * v6 mapped
1386 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst);
1388 if (newsk == NULL)
1389 return NULL;
1391 newtcp6sk = (struct tcp6_sock *)newsk;
1392 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1394 newinet = inet_sk(newsk);
1395 newnp = inet6_sk(newsk);
1396 newtp = tcp_sk(newsk);
1398 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1400 ipv6_addr_set_v4mapped(newinet->inet_daddr, &newnp->daddr);
1402 ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr);
1404 ipv6_addr_copy(&newnp->rcv_saddr, &newnp->saddr);
1406 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1407 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1408 #ifdef CONFIG_TCP_MD5SIG
1409 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1410 #endif
1412 newnp->ipv6_ac_list = NULL;
1413 newnp->ipv6_fl_list = NULL;
1414 newnp->pktoptions = NULL;
1415 newnp->opt = NULL;
1416 newnp->mcast_oif = inet6_iif(skb);
1417 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1420 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1421 * here, tcp_create_openreq_child now does this for us, see the comment in
1422 * that function for the gory details. -acme
1425 /* It is tricky place. Until this moment IPv4 tcp
1426 worked with IPv6 icsk.icsk_af_ops.
1427 Sync it now.
1429 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1431 return newsk;
1434 treq = inet6_rsk(req);
1435 opt = np->opt;
1437 if (sk_acceptq_is_full(sk))
1438 goto out_overflow;
1440 if (!dst) {
1441 dst = inet6_csk_route_req(sk, req);
1442 if (!dst)
1443 goto out;
1446 newsk = tcp_create_openreq_child(sk, req, skb);
1447 if (newsk == NULL)
1448 goto out_nonewsk;
1451 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1452 * count here, tcp_create_openreq_child now does this for us, see the
1453 * comment in that function for the gory details. -acme
1456 newsk->sk_gso_type = SKB_GSO_TCPV6;
1457 __ip6_dst_store(newsk, dst, NULL, NULL);
1459 newtcp6sk = (struct tcp6_sock *)newsk;
1460 inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
1462 newtp = tcp_sk(newsk);
1463 newinet = inet_sk(newsk);
1464 newnp = inet6_sk(newsk);
1466 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1468 ipv6_addr_copy(&newnp->daddr, &treq->rmt_addr);
1469 ipv6_addr_copy(&newnp->saddr, &treq->loc_addr);
1470 ipv6_addr_copy(&newnp->rcv_saddr, &treq->loc_addr);
1471 newsk->sk_bound_dev_if = treq->iif;
1473 /* Now IPv6 options...
1475 First: no IPv4 options.
1477 newinet->inet_opt = NULL;
1478 newnp->ipv6_ac_list = NULL;
1479 newnp->ipv6_fl_list = NULL;
1481 /* Clone RX bits */
1482 newnp->rxopt.all = np->rxopt.all;
1484 /* Clone pktoptions received with SYN */
1485 newnp->pktoptions = NULL;
1486 if (treq->pktopts != NULL) {
1487 newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC);
1488 kfree_skb(treq->pktopts);
1489 treq->pktopts = NULL;
1490 if (newnp->pktoptions)
1491 skb_set_owner_r(newnp->pktoptions, newsk);
1493 newnp->opt = NULL;
1494 newnp->mcast_oif = inet6_iif(skb);
1495 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1497 /* Clone native IPv6 options from listening socket (if any)
1499 Yes, keeping reference count would be much more clever,
1500 but we make one more one thing there: reattach optmem
1501 to newsk.
1503 if (opt) {
1504 newnp->opt = ipv6_dup_options(newsk, opt);
1505 if (opt != np->opt)
1506 sock_kfree_s(sk, opt, opt->tot_len);
1509 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1510 if (newnp->opt)
1511 inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
1512 newnp->opt->opt_flen);
1514 tcp_mtup_init(newsk);
1515 tcp_sync_mss(newsk, dst_mtu(dst));
1516 newtp->advmss = dst_metric_advmss(dst);
1517 if (tcp_sk(sk)->rx_opt.user_mss &&
1518 tcp_sk(sk)->rx_opt.user_mss < newtp->advmss)
1519 newtp->advmss = tcp_sk(sk)->rx_opt.user_mss;
1521 tcp_initialize_rcv_mss(newsk);
1523 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1524 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1526 #ifdef CONFIG_TCP_MD5SIG
1527 /* Copy over the MD5 key from the original socket */
1528 if ((key = tcp_v6_md5_do_lookup(sk, &newnp->daddr)) != NULL) {
1529 /* We're using one, so create a matching key
1530 * on the newsk structure. If we fail to get
1531 * memory, then we end up not copying the key
1532 * across. Shucks.
1534 char *newkey = kmemdup(key->key, key->keylen, GFP_ATOMIC);
1535 if (newkey != NULL)
1536 tcp_v6_md5_do_add(newsk, &newnp->daddr,
1537 newkey, key->keylen);
1539 #endif
1541 if (__inet_inherit_port(sk, newsk) < 0) {
1542 sock_put(newsk);
1543 goto out;
1545 __inet6_hash(newsk, NULL);
1547 return newsk;
1549 out_overflow:
1550 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1551 out_nonewsk:
1552 if (opt && opt != np->opt)
1553 sock_kfree_s(sk, opt, opt->tot_len);
1554 dst_release(dst);
1555 out:
1556 NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
1557 return NULL;
1560 static __sum16 tcp_v6_checksum_init(struct sk_buff *skb)
1562 if (skb->ip_summed == CHECKSUM_COMPLETE) {
1563 if (!tcp_v6_check(skb->len, &ipv6_hdr(skb)->saddr,
1564 &ipv6_hdr(skb)->daddr, skb->csum)) {
1565 skb->ip_summed = CHECKSUM_UNNECESSARY;
1566 return 0;
1570 skb->csum = ~csum_unfold(tcp_v6_check(skb->len,
1571 &ipv6_hdr(skb)->saddr,
1572 &ipv6_hdr(skb)->daddr, 0));
1574 if (skb->len <= 76) {
1575 return __skb_checksum_complete(skb);
1577 return 0;
1580 /* The socket must have it's spinlock held when we get
1581 * here.
1583 * We have a potential double-lock case here, so even when
1584 * doing backlog processing we use the BH locking scheme.
1585 * This is because we cannot sleep with the original spinlock
1586 * held.
1588 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1590 struct ipv6_pinfo *np = inet6_sk(sk);
1591 struct tcp_sock *tp;
1592 struct sk_buff *opt_skb = NULL;
1594 /* Imagine: socket is IPv6. IPv4 packet arrives,
1595 goes to IPv4 receive handler and backlogged.
1596 From backlog it always goes here. Kerboom...
1597 Fortunately, tcp_rcv_established and rcv_established
1598 handle them correctly, but it is not case with
1599 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1602 if (skb->protocol == htons(ETH_P_IP))
1603 return tcp_v4_do_rcv(sk, skb);
1605 #ifdef CONFIG_TCP_MD5SIG
1606 if (tcp_v6_inbound_md5_hash (sk, skb))
1607 goto discard;
1608 #endif
1610 if (sk_filter(sk, skb))
1611 goto discard;
1614 * socket locking is here for SMP purposes as backlog rcv
1615 * is currently called with bh processing disabled.
1618 /* Do Stevens' IPV6_PKTOPTIONS.
1620 Yes, guys, it is the only place in our code, where we
1621 may make it not affecting IPv4.
1622 The rest of code is protocol independent,
1623 and I do not like idea to uglify IPv4.
1625 Actually, all the idea behind IPV6_PKTOPTIONS
1626 looks not very well thought. For now we latch
1627 options, received in the last packet, enqueued
1628 by tcp. Feel free to propose better solution.
1629 --ANK (980728)
1631 if (np->rxopt.all)
1632 opt_skb = skb_clone(skb, GFP_ATOMIC);
1634 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1635 sock_rps_save_rxhash(sk, skb->rxhash);
1636 if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len))
1637 goto reset;
1638 if (opt_skb)
1639 goto ipv6_pktoptions;
1640 return 0;
1643 if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb))
1644 goto csum_err;
1646 if (sk->sk_state == TCP_LISTEN) {
1647 struct sock *nsk = tcp_v6_hnd_req(sk, skb);
1648 if (!nsk)
1649 goto discard;
1652 * Queue it on the new socket if the new socket is active,
1653 * otherwise we just shortcircuit this and continue with
1654 * the new socket..
1656 if(nsk != sk) {
1657 sock_rps_save_rxhash(nsk, skb->rxhash);
1658 if (tcp_child_process(sk, nsk, skb))
1659 goto reset;
1660 if (opt_skb)
1661 __kfree_skb(opt_skb);
1662 return 0;
1664 } else
1665 sock_rps_save_rxhash(sk, skb->rxhash);
1667 if (tcp_rcv_state_process(sk, skb, tcp_hdr(skb), skb->len))
1668 goto reset;
1669 if (opt_skb)
1670 goto ipv6_pktoptions;
1671 return 0;
1673 reset:
1674 tcp_v6_send_reset(sk, skb);
1675 discard:
1676 if (opt_skb)
1677 __kfree_skb(opt_skb);
1678 kfree_skb(skb);
1679 return 0;
1680 csum_err:
1681 TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_INERRS);
1682 goto discard;
1685 ipv6_pktoptions:
1686 /* Do you ask, what is it?
1688 1. skb was enqueued by tcp.
1689 2. skb is added to tail of read queue, rather than out of order.
1690 3. socket is not in passive state.
1691 4. Finally, it really contains options, which user wants to receive.
1693 tp = tcp_sk(sk);
1694 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1695 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1696 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1697 np->mcast_oif = inet6_iif(opt_skb);
1698 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1699 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1700 if (ipv6_opt_accepted(sk, opt_skb)) {
1701 skb_set_owner_r(opt_skb, sk);
1702 opt_skb = xchg(&np->pktoptions, opt_skb);
1703 } else {
1704 __kfree_skb(opt_skb);
1705 opt_skb = xchg(&np->pktoptions, NULL);
1709 kfree_skb(opt_skb);
1710 return 0;
1713 static int tcp_v6_rcv(struct sk_buff *skb)
1715 struct tcphdr *th;
1716 const struct ipv6hdr *hdr;
1717 struct sock *sk;
1718 int ret;
1719 struct net *net = dev_net(skb->dev);
1721 if (skb->pkt_type != PACKET_HOST)
1722 goto discard_it;
1725 * Count it even if it's bad.
1727 TCP_INC_STATS_BH(net, TCP_MIB_INSEGS);
1729 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1730 goto discard_it;
1732 th = tcp_hdr(skb);
1734 if (th->doff < sizeof(struct tcphdr)/4)
1735 goto bad_packet;
1736 if (!pskb_may_pull(skb, th->doff*4))
1737 goto discard_it;
1739 if (!skb_csum_unnecessary(skb) && tcp_v6_checksum_init(skb))
1740 goto bad_packet;
1742 th = tcp_hdr(skb);
1743 hdr = ipv6_hdr(skb);
1744 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1745 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1746 skb->len - th->doff*4);
1747 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1748 TCP_SKB_CB(skb)->when = 0;
1749 TCP_SKB_CB(skb)->flags = ipv6_get_dsfield(hdr);
1750 TCP_SKB_CB(skb)->sacked = 0;
1752 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, th->source, th->dest);
1753 if (!sk)
1754 goto no_tcp_socket;
1756 process:
1757 if (sk->sk_state == TCP_TIME_WAIT)
1758 goto do_time_wait;
1760 if (hdr->hop_limit < inet6_sk(sk)->min_hopcount) {
1761 NET_INC_STATS_BH(net, LINUX_MIB_TCPMINTTLDROP);
1762 goto discard_and_relse;
1765 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1766 goto discard_and_relse;
1768 if (sk_filter(sk, skb))
1769 goto discard_and_relse;
1771 skb->dev = NULL;
1773 bh_lock_sock_nested(sk);
1774 ret = 0;
1775 if (!sock_owned_by_user(sk)) {
1776 #ifdef CONFIG_NET_DMA
1777 struct tcp_sock *tp = tcp_sk(sk);
1778 if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
1779 tp->ucopy.dma_chan = dma_find_channel(DMA_MEMCPY);
1780 if (tp->ucopy.dma_chan)
1781 ret = tcp_v6_do_rcv(sk, skb);
1782 else
1783 #endif
1785 if (!tcp_prequeue(sk, skb))
1786 ret = tcp_v6_do_rcv(sk, skb);
1788 } else if (unlikely(sk_add_backlog(sk, skb))) {
1789 bh_unlock_sock(sk);
1790 NET_INC_STATS_BH(net, LINUX_MIB_TCPBACKLOGDROP);
1791 goto discard_and_relse;
1793 bh_unlock_sock(sk);
1795 sock_put(sk);
1796 return ret ? -1 : 0;
1798 no_tcp_socket:
1799 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1800 goto discard_it;
1802 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1803 bad_packet:
1804 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1805 } else {
1806 tcp_v6_send_reset(NULL, skb);
1809 discard_it:
1812 * Discard frame
1815 kfree_skb(skb);
1816 return 0;
1818 discard_and_relse:
1819 sock_put(sk);
1820 goto discard_it;
1822 do_time_wait:
1823 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1824 inet_twsk_put(inet_twsk(sk));
1825 goto discard_it;
1828 if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) {
1829 TCP_INC_STATS_BH(net, TCP_MIB_INERRS);
1830 inet_twsk_put(inet_twsk(sk));
1831 goto discard_it;
1834 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1835 case TCP_TW_SYN:
1837 struct sock *sk2;
1839 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1840 &ipv6_hdr(skb)->daddr,
1841 ntohs(th->dest), inet6_iif(skb));
1842 if (sk2 != NULL) {
1843 struct inet_timewait_sock *tw = inet_twsk(sk);
1844 inet_twsk_deschedule(tw, &tcp_death_row);
1845 inet_twsk_put(tw);
1846 sk = sk2;
1847 goto process;
1849 /* Fall through to ACK */
1851 case TCP_TW_ACK:
1852 tcp_v6_timewait_ack(sk, skb);
1853 break;
1854 case TCP_TW_RST:
1855 goto no_tcp_socket;
1856 case TCP_TW_SUCCESS:;
1858 goto discard_it;
1861 static struct inet_peer *tcp_v6_get_peer(struct sock *sk, bool *release_it)
1863 struct rt6_info *rt = (struct rt6_info *) __sk_dst_get(sk);
1864 struct ipv6_pinfo *np = inet6_sk(sk);
1865 struct inet_peer *peer;
1867 if (!rt ||
1868 !ipv6_addr_equal(&np->daddr, &rt->rt6i_dst.addr)) {
1869 peer = inet_getpeer_v6(&np->daddr, 1);
1870 *release_it = true;
1871 } else {
1872 if (!rt->rt6i_peer)
1873 rt6_bind_peer(rt, 1);
1874 peer = rt->rt6i_peer;
1875 *release_it = false;
1878 return peer;
1881 static void *tcp_v6_tw_get_peer(struct sock *sk)
1883 struct inet6_timewait_sock *tw6 = inet6_twsk(sk);
1884 struct inet_timewait_sock *tw = inet_twsk(sk);
1886 if (tw->tw_family == AF_INET)
1887 return tcp_v4_tw_get_peer(sk);
1889 return inet_getpeer_v6(&tw6->tw_v6_daddr, 1);
1892 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1893 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1894 .twsk_unique = tcp_twsk_unique,
1895 .twsk_destructor= tcp_twsk_destructor,
1896 .twsk_getpeer = tcp_v6_tw_get_peer,
1899 static const struct inet_connection_sock_af_ops ipv6_specific = {
1900 .queue_xmit = inet6_csk_xmit,
1901 .send_check = tcp_v6_send_check,
1902 .rebuild_header = inet6_sk_rebuild_header,
1903 .conn_request = tcp_v6_conn_request,
1904 .syn_recv_sock = tcp_v6_syn_recv_sock,
1905 .get_peer = tcp_v6_get_peer,
1906 .net_header_len = sizeof(struct ipv6hdr),
1907 .setsockopt = ipv6_setsockopt,
1908 .getsockopt = ipv6_getsockopt,
1909 .addr2sockaddr = inet6_csk_addr2sockaddr,
1910 .sockaddr_len = sizeof(struct sockaddr_in6),
1911 .bind_conflict = inet6_csk_bind_conflict,
1912 #ifdef CONFIG_COMPAT
1913 .compat_setsockopt = compat_ipv6_setsockopt,
1914 .compat_getsockopt = compat_ipv6_getsockopt,
1915 #endif
1918 #ifdef CONFIG_TCP_MD5SIG
1919 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1920 .md5_lookup = tcp_v6_md5_lookup,
1921 .calc_md5_hash = tcp_v6_md5_hash_skb,
1922 .md5_add = tcp_v6_md5_add_func,
1923 .md5_parse = tcp_v6_parse_md5_keys,
1925 #endif
1928 * TCP over IPv4 via INET6 API
1931 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1932 .queue_xmit = ip_queue_xmit,
1933 .send_check = tcp_v4_send_check,
1934 .rebuild_header = inet_sk_rebuild_header,
1935 .conn_request = tcp_v6_conn_request,
1936 .syn_recv_sock = tcp_v6_syn_recv_sock,
1937 .get_peer = tcp_v4_get_peer,
1938 .net_header_len = sizeof(struct iphdr),
1939 .setsockopt = ipv6_setsockopt,
1940 .getsockopt = ipv6_getsockopt,
1941 .addr2sockaddr = inet6_csk_addr2sockaddr,
1942 .sockaddr_len = sizeof(struct sockaddr_in6),
1943 .bind_conflict = inet6_csk_bind_conflict,
1944 #ifdef CONFIG_COMPAT
1945 .compat_setsockopt = compat_ipv6_setsockopt,
1946 .compat_getsockopt = compat_ipv6_getsockopt,
1947 #endif
1950 #ifdef CONFIG_TCP_MD5SIG
1951 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1952 .md5_lookup = tcp_v4_md5_lookup,
1953 .calc_md5_hash = tcp_v4_md5_hash_skb,
1954 .md5_add = tcp_v6_md5_add_func,
1955 .md5_parse = tcp_v6_parse_md5_keys,
1957 #endif
1959 /* NOTE: A lot of things set to zero explicitly by call to
1960 * sk_alloc() so need not be done here.
1962 static int tcp_v6_init_sock(struct sock *sk)
1964 struct inet_connection_sock *icsk = inet_csk(sk);
1965 struct tcp_sock *tp = tcp_sk(sk);
1967 skb_queue_head_init(&tp->out_of_order_queue);
1968 tcp_init_xmit_timers(sk);
1969 tcp_prequeue_init(tp);
1971 icsk->icsk_rto = TCP_TIMEOUT_INIT;
1972 tp->mdev = TCP_TIMEOUT_INIT;
1974 /* So many TCP implementations out there (incorrectly) count the
1975 * initial SYN frame in their delayed-ACK and congestion control
1976 * algorithms that we must have the following bandaid to talk
1977 * efficiently to them. -DaveM
1979 tp->snd_cwnd = 2;
1981 /* See draft-stevens-tcpca-spec-01 for discussion of the
1982 * initialization of these values.
1984 tp->snd_ssthresh = TCP_INFINITE_SSTHRESH;
1985 tp->snd_cwnd_clamp = ~0;
1986 tp->mss_cache = TCP_MSS_DEFAULT;
1988 tp->reordering = sysctl_tcp_reordering;
1990 sk->sk_state = TCP_CLOSE;
1992 icsk->icsk_af_ops = &ipv6_specific;
1993 icsk->icsk_ca_ops = &tcp_init_congestion_ops;
1994 icsk->icsk_sync_mss = tcp_sync_mss;
1995 sk->sk_write_space = sk_stream_write_space;
1996 sock_set_flag(sk, SOCK_USE_WRITE_QUEUE);
1998 #ifdef CONFIG_TCP_MD5SIG
1999 tp->af_specific = &tcp_sock_ipv6_specific;
2000 #endif
2002 /* TCP Cookie Transactions */
2003 if (sysctl_tcp_cookie_size > 0) {
2004 /* Default, cookies without s_data_payload. */
2005 tp->cookie_values =
2006 kzalloc(sizeof(*tp->cookie_values),
2007 sk->sk_allocation);
2008 if (tp->cookie_values != NULL)
2009 kref_init(&tp->cookie_values->kref);
2011 /* Presumed zeroed, in order of appearance:
2012 * cookie_in_always, cookie_out_never,
2013 * s_data_constant, s_data_in, s_data_out
2015 sk->sk_sndbuf = sysctl_tcp_wmem[1];
2016 sk->sk_rcvbuf = sysctl_tcp_rmem[1];
2018 local_bh_disable();
2019 percpu_counter_inc(&tcp_sockets_allocated);
2020 local_bh_enable();
2022 return 0;
2025 static void tcp_v6_destroy_sock(struct sock *sk)
2027 #ifdef CONFIG_TCP_MD5SIG
2028 /* Clean up the MD5 key list */
2029 if (tcp_sk(sk)->md5sig_info)
2030 tcp_v6_clear_md5_list(sk);
2031 #endif
2032 tcp_v4_destroy_sock(sk);
2033 inet6_destroy_sock(sk);
2036 #ifdef CONFIG_PROC_FS
2037 /* Proc filesystem TCPv6 sock list dumping. */
2038 static void get_openreq6(struct seq_file *seq,
2039 struct sock *sk, struct request_sock *req, int i, int uid)
2041 int ttd = req->expires - jiffies;
2042 const struct in6_addr *src = &inet6_rsk(req)->loc_addr;
2043 const struct in6_addr *dest = &inet6_rsk(req)->rmt_addr;
2045 if (ttd < 0)
2046 ttd = 0;
2048 seq_printf(seq,
2049 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2050 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2052 src->s6_addr32[0], src->s6_addr32[1],
2053 src->s6_addr32[2], src->s6_addr32[3],
2054 ntohs(inet_rsk(req)->loc_port),
2055 dest->s6_addr32[0], dest->s6_addr32[1],
2056 dest->s6_addr32[2], dest->s6_addr32[3],
2057 ntohs(inet_rsk(req)->rmt_port),
2058 TCP_SYN_RECV,
2059 0,0, /* could print option size, but that is af dependent. */
2060 1, /* timers active (only the expire timer) */
2061 jiffies_to_clock_t(ttd),
2062 req->retrans,
2063 uid,
2064 0, /* non standard timer */
2065 0, /* open_requests have no inode */
2066 0, req);
2069 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
2071 const struct in6_addr *dest, *src;
2072 __u16 destp, srcp;
2073 int timer_active;
2074 unsigned long timer_expires;
2075 struct inet_sock *inet = inet_sk(sp);
2076 struct tcp_sock *tp = tcp_sk(sp);
2077 const struct inet_connection_sock *icsk = inet_csk(sp);
2078 struct ipv6_pinfo *np = inet6_sk(sp);
2080 dest = &np->daddr;
2081 src = &np->rcv_saddr;
2082 destp = ntohs(inet->inet_dport);
2083 srcp = ntohs(inet->inet_sport);
2085 if (icsk->icsk_pending == ICSK_TIME_RETRANS) {
2086 timer_active = 1;
2087 timer_expires = icsk->icsk_timeout;
2088 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
2089 timer_active = 4;
2090 timer_expires = icsk->icsk_timeout;
2091 } else if (timer_pending(&sp->sk_timer)) {
2092 timer_active = 2;
2093 timer_expires = sp->sk_timer.expires;
2094 } else {
2095 timer_active = 0;
2096 timer_expires = jiffies;
2099 seq_printf(seq,
2100 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2101 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %lu %d %pK %lu %lu %u %u %d\n",
2103 src->s6_addr32[0], src->s6_addr32[1],
2104 src->s6_addr32[2], src->s6_addr32[3], srcp,
2105 dest->s6_addr32[0], dest->s6_addr32[1],
2106 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2107 sp->sk_state,
2108 tp->write_seq-tp->snd_una,
2109 (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq),
2110 timer_active,
2111 jiffies_to_clock_t(timer_expires - jiffies),
2112 icsk->icsk_retransmits,
2113 sock_i_uid(sp),
2114 icsk->icsk_probes_out,
2115 sock_i_ino(sp),
2116 atomic_read(&sp->sk_refcnt), sp,
2117 jiffies_to_clock_t(icsk->icsk_rto),
2118 jiffies_to_clock_t(icsk->icsk_ack.ato),
2119 (icsk->icsk_ack.quick << 1 ) | icsk->icsk_ack.pingpong,
2120 tp->snd_cwnd,
2121 tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh
2125 static void get_timewait6_sock(struct seq_file *seq,
2126 struct inet_timewait_sock *tw, int i)
2128 const struct in6_addr *dest, *src;
2129 __u16 destp, srcp;
2130 struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw);
2131 int ttd = tw->tw_ttd - jiffies;
2133 if (ttd < 0)
2134 ttd = 0;
2136 dest = &tw6->tw_v6_daddr;
2137 src = &tw6->tw_v6_rcv_saddr;
2138 destp = ntohs(tw->tw_dport);
2139 srcp = ntohs(tw->tw_sport);
2141 seq_printf(seq,
2142 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
2143 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
2145 src->s6_addr32[0], src->s6_addr32[1],
2146 src->s6_addr32[2], src->s6_addr32[3], srcp,
2147 dest->s6_addr32[0], dest->s6_addr32[1],
2148 dest->s6_addr32[2], dest->s6_addr32[3], destp,
2149 tw->tw_substate, 0, 0,
2150 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0,
2151 atomic_read(&tw->tw_refcnt), tw);
2154 static int tcp6_seq_show(struct seq_file *seq, void *v)
2156 struct tcp_iter_state *st;
2158 if (v == SEQ_START_TOKEN) {
2159 seq_puts(seq,
2160 " sl "
2161 "local_address "
2162 "remote_address "
2163 "st tx_queue rx_queue tr tm->when retrnsmt"
2164 " uid timeout inode\n");
2165 goto out;
2167 st = seq->private;
2169 switch (st->state) {
2170 case TCP_SEQ_STATE_LISTENING:
2171 case TCP_SEQ_STATE_ESTABLISHED:
2172 get_tcp6_sock(seq, v, st->num);
2173 break;
2174 case TCP_SEQ_STATE_OPENREQ:
2175 get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid);
2176 break;
2177 case TCP_SEQ_STATE_TIME_WAIT:
2178 get_timewait6_sock(seq, v, st->num);
2179 break;
2181 out:
2182 return 0;
2185 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
2186 .name = "tcp6",
2187 .family = AF_INET6,
2188 .seq_fops = {
2189 .owner = THIS_MODULE,
2191 .seq_ops = {
2192 .show = tcp6_seq_show,
2196 int __net_init tcp6_proc_init(struct net *net)
2198 return tcp_proc_register(net, &tcp6_seq_afinfo);
2201 void tcp6_proc_exit(struct net *net)
2203 tcp_proc_unregister(net, &tcp6_seq_afinfo);
2205 #endif
2207 struct proto tcpv6_prot = {
2208 .name = "TCPv6",
2209 .owner = THIS_MODULE,
2210 .close = tcp_close,
2211 .connect = tcp_v6_connect,
2212 .disconnect = tcp_disconnect,
2213 .accept = inet_csk_accept,
2214 .ioctl = tcp_ioctl,
2215 .init = tcp_v6_init_sock,
2216 .destroy = tcp_v6_destroy_sock,
2217 .shutdown = tcp_shutdown,
2218 .setsockopt = tcp_setsockopt,
2219 .getsockopt = tcp_getsockopt,
2220 .recvmsg = tcp_recvmsg,
2221 .sendmsg = tcp_sendmsg,
2222 .sendpage = tcp_sendpage,
2223 .backlog_rcv = tcp_v6_do_rcv,
2224 .hash = tcp_v6_hash,
2225 .unhash = inet_unhash,
2226 .get_port = inet_csk_get_port,
2227 .enter_memory_pressure = tcp_enter_memory_pressure,
2228 .sockets_allocated = &tcp_sockets_allocated,
2229 .memory_allocated = &tcp_memory_allocated,
2230 .memory_pressure = &tcp_memory_pressure,
2231 .orphan_count = &tcp_orphan_count,
2232 .sysctl_mem = sysctl_tcp_mem,
2233 .sysctl_wmem = sysctl_tcp_wmem,
2234 .sysctl_rmem = sysctl_tcp_rmem,
2235 .max_header = MAX_TCP_HEADER,
2236 .obj_size = sizeof(struct tcp6_sock),
2237 .slab_flags = SLAB_DESTROY_BY_RCU,
2238 .twsk_prot = &tcp6_timewait_sock_ops,
2239 .rsk_prot = &tcp6_request_sock_ops,
2240 .h.hashinfo = &tcp_hashinfo,
2241 .no_autobind = true,
2242 #ifdef CONFIG_COMPAT
2243 .compat_setsockopt = compat_tcp_setsockopt,
2244 .compat_getsockopt = compat_tcp_getsockopt,
2245 #endif
2248 static const struct inet6_protocol tcpv6_protocol = {
2249 .handler = tcp_v6_rcv,
2250 .err_handler = tcp_v6_err,
2251 .gso_send_check = tcp_v6_gso_send_check,
2252 .gso_segment = tcp_tso_segment,
2253 .gro_receive = tcp6_gro_receive,
2254 .gro_complete = tcp6_gro_complete,
2255 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2258 static struct inet_protosw tcpv6_protosw = {
2259 .type = SOCK_STREAM,
2260 .protocol = IPPROTO_TCP,
2261 .prot = &tcpv6_prot,
2262 .ops = &inet6_stream_ops,
2263 .no_check = 0,
2264 .flags = INET_PROTOSW_PERMANENT |
2265 INET_PROTOSW_ICSK,
2268 static int __net_init tcpv6_net_init(struct net *net)
2270 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2271 SOCK_RAW, IPPROTO_TCP, net);
2274 static void __net_exit tcpv6_net_exit(struct net *net)
2276 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2279 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2281 inet_twsk_purge(&tcp_hashinfo, &tcp_death_row, AF_INET6);
2284 static struct pernet_operations tcpv6_net_ops = {
2285 .init = tcpv6_net_init,
2286 .exit = tcpv6_net_exit,
2287 .exit_batch = tcpv6_net_exit_batch,
2290 int __init tcpv6_init(void)
2292 int ret;
2294 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2295 if (ret)
2296 goto out;
2298 /* register inet6 protocol */
2299 ret = inet6_register_protosw(&tcpv6_protosw);
2300 if (ret)
2301 goto out_tcpv6_protocol;
2303 ret = register_pernet_subsys(&tcpv6_net_ops);
2304 if (ret)
2305 goto out_tcpv6_protosw;
2306 out:
2307 return ret;
2309 out_tcpv6_protocol:
2310 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2311 out_tcpv6_protosw:
2312 inet6_unregister_protosw(&tcpv6_protosw);
2313 goto out;
2316 void tcpv6_exit(void)
2318 unregister_pernet_subsys(&tcpv6_net_ops);
2319 inet6_unregister_protosw(&tcpv6_protosw);
2320 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);