Merge tag 'mips_fixes_5.2_2' of git://git.kernel.org/pub/scm/linux/kernel/git/mips...
[linux-2.6/linux-2.6-arm.git] / net / ipv6 / tcp_ipv6.c
blob7a14ea37d2df4411ad3aea5095ff3a59b5af4966
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * TCP over IPv6
4 * Linux INET6 implementation
6 * Authors:
7 * Pedro Roque <roque@di.fc.ul.pt>
9 * Based on:
10 * linux/net/ipv4/tcp.c
11 * linux/net/ipv4/tcp_input.c
12 * linux/net/ipv4/tcp_output.c
14 * Fixes:
15 * Hideaki YOSHIFUJI : sin6_scope_id support
16 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
17 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
18 * a single port at the same time.
19 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
22 #include <linux/bottom_half.h>
23 #include <linux/module.h>
24 #include <linux/errno.h>
25 #include <linux/types.h>
26 #include <linux/socket.h>
27 #include <linux/sockios.h>
28 #include <linux/net.h>
29 #include <linux/jiffies.h>
30 #include <linux/in.h>
31 #include <linux/in6.h>
32 #include <linux/netdevice.h>
33 #include <linux/init.h>
34 #include <linux/jhash.h>
35 #include <linux/ipsec.h>
36 #include <linux/times.h>
37 #include <linux/slab.h>
38 #include <linux/uaccess.h>
39 #include <linux/ipv6.h>
40 #include <linux/icmpv6.h>
41 #include <linux/random.h>
42 #include <linux/indirect_call_wrapper.h>
44 #include <net/tcp.h>
45 #include <net/ndisc.h>
46 #include <net/inet6_hashtables.h>
47 #include <net/inet6_connection_sock.h>
48 #include <net/ipv6.h>
49 #include <net/transp_v6.h>
50 #include <net/addrconf.h>
51 #include <net/ip6_route.h>
52 #include <net/ip6_checksum.h>
53 #include <net/inet_ecn.h>
54 #include <net/protocol.h>
55 #include <net/xfrm.h>
56 #include <net/snmp.h>
57 #include <net/dsfield.h>
58 #include <net/timewait_sock.h>
59 #include <net/inet_common.h>
60 #include <net/secure_seq.h>
61 #include <net/busy_poll.h>
63 #include <linux/proc_fs.h>
64 #include <linux/seq_file.h>
66 #include <crypto/hash.h>
67 #include <linux/scatterlist.h>
69 #include <trace/events/tcp.h>
71 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb);
72 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
73 struct request_sock *req);
75 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb);
77 static const struct inet_connection_sock_af_ops ipv6_mapped;
78 static const struct inet_connection_sock_af_ops ipv6_specific;
79 #ifdef CONFIG_TCP_MD5SIG
80 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific;
81 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific;
82 #else
83 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
84 const struct in6_addr *addr)
86 return NULL;
88 #endif
90 /* Helper returning the inet6 address from a given tcp socket.
91 * It can be used in TCP stack instead of inet6_sk(sk).
92 * This avoids a dereference and allow compiler optimizations.
93 * It is a specialized version of inet6_sk_generic().
95 static struct ipv6_pinfo *tcp_inet6_sk(const struct sock *sk)
97 unsigned int offset = sizeof(struct tcp6_sock) - sizeof(struct ipv6_pinfo);
99 return (struct ipv6_pinfo *)(((u8 *)sk) + offset);
102 static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb)
104 struct dst_entry *dst = skb_dst(skb);
106 if (dst && dst_hold_safe(dst)) {
107 const struct rt6_info *rt = (const struct rt6_info *)dst;
109 sk->sk_rx_dst = dst;
110 inet_sk(sk)->rx_dst_ifindex = skb->skb_iif;
111 tcp_inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt);
115 static u32 tcp_v6_init_seq(const struct sk_buff *skb)
117 return secure_tcpv6_seq(ipv6_hdr(skb)->daddr.s6_addr32,
118 ipv6_hdr(skb)->saddr.s6_addr32,
119 tcp_hdr(skb)->dest,
120 tcp_hdr(skb)->source);
123 static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb)
125 return secure_tcpv6_ts_off(net, ipv6_hdr(skb)->daddr.s6_addr32,
126 ipv6_hdr(skb)->saddr.s6_addr32);
129 static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr,
130 int addr_len)
132 /* This check is replicated from tcp_v6_connect() and intended to
133 * prevent BPF program called below from accessing bytes that are out
134 * of the bound specified by user in addr_len.
136 if (addr_len < SIN6_LEN_RFC2133)
137 return -EINVAL;
139 sock_owned_by_me(sk);
141 return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr);
144 static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr,
145 int addr_len)
147 struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr;
148 struct inet_sock *inet = inet_sk(sk);
149 struct inet_connection_sock *icsk = inet_csk(sk);
150 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
151 struct tcp_sock *tp = tcp_sk(sk);
152 struct in6_addr *saddr = NULL, *final_p, final;
153 struct ipv6_txoptions *opt;
154 struct flowi6 fl6;
155 struct dst_entry *dst;
156 int addr_type;
157 int err;
158 struct inet_timewait_death_row *tcp_death_row = &sock_net(sk)->ipv4.tcp_death_row;
160 if (addr_len < SIN6_LEN_RFC2133)
161 return -EINVAL;
163 if (usin->sin6_family != AF_INET6)
164 return -EAFNOSUPPORT;
166 memset(&fl6, 0, sizeof(fl6));
168 if (np->sndflow) {
169 fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK;
170 IP6_ECN_flow_init(fl6.flowlabel);
171 if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) {
172 struct ip6_flowlabel *flowlabel;
173 flowlabel = fl6_sock_lookup(sk, fl6.flowlabel);
174 if (!flowlabel)
175 return -EINVAL;
176 fl6_sock_release(flowlabel);
181 * connect() to INADDR_ANY means loopback (BSD'ism).
184 if (ipv6_addr_any(&usin->sin6_addr)) {
185 if (ipv6_addr_v4mapped(&sk->sk_v6_rcv_saddr))
186 ipv6_addr_set_v4mapped(htonl(INADDR_LOOPBACK),
187 &usin->sin6_addr);
188 else
189 usin->sin6_addr = in6addr_loopback;
192 addr_type = ipv6_addr_type(&usin->sin6_addr);
194 if (addr_type & IPV6_ADDR_MULTICAST)
195 return -ENETUNREACH;
197 if (addr_type&IPV6_ADDR_LINKLOCAL) {
198 if (addr_len >= sizeof(struct sockaddr_in6) &&
199 usin->sin6_scope_id) {
200 /* If interface is set while binding, indices
201 * must coincide.
203 if (!sk_dev_equal_l3scope(sk, usin->sin6_scope_id))
204 return -EINVAL;
206 sk->sk_bound_dev_if = usin->sin6_scope_id;
209 /* Connect to link-local address requires an interface */
210 if (!sk->sk_bound_dev_if)
211 return -EINVAL;
214 if (tp->rx_opt.ts_recent_stamp &&
215 !ipv6_addr_equal(&sk->sk_v6_daddr, &usin->sin6_addr)) {
216 tp->rx_opt.ts_recent = 0;
217 tp->rx_opt.ts_recent_stamp = 0;
218 tp->write_seq = 0;
221 sk->sk_v6_daddr = usin->sin6_addr;
222 np->flow_label = fl6.flowlabel;
225 * TCP over IPv4
228 if (addr_type & IPV6_ADDR_MAPPED) {
229 u32 exthdrlen = icsk->icsk_ext_hdr_len;
230 struct sockaddr_in sin;
232 if (__ipv6_only_sock(sk))
233 return -ENETUNREACH;
235 sin.sin_family = AF_INET;
236 sin.sin_port = usin->sin6_port;
237 sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3];
239 icsk->icsk_af_ops = &ipv6_mapped;
240 sk->sk_backlog_rcv = tcp_v4_do_rcv;
241 #ifdef CONFIG_TCP_MD5SIG
242 tp->af_specific = &tcp_sock_ipv6_mapped_specific;
243 #endif
245 err = tcp_v4_connect(sk, (struct sockaddr *)&sin, sizeof(sin));
247 if (err) {
248 icsk->icsk_ext_hdr_len = exthdrlen;
249 icsk->icsk_af_ops = &ipv6_specific;
250 sk->sk_backlog_rcv = tcp_v6_do_rcv;
251 #ifdef CONFIG_TCP_MD5SIG
252 tp->af_specific = &tcp_sock_ipv6_specific;
253 #endif
254 goto failure;
256 np->saddr = sk->sk_v6_rcv_saddr;
258 return err;
261 if (!ipv6_addr_any(&sk->sk_v6_rcv_saddr))
262 saddr = &sk->sk_v6_rcv_saddr;
264 fl6.flowi6_proto = IPPROTO_TCP;
265 fl6.daddr = sk->sk_v6_daddr;
266 fl6.saddr = saddr ? *saddr : np->saddr;
267 fl6.flowi6_oif = sk->sk_bound_dev_if;
268 fl6.flowi6_mark = sk->sk_mark;
269 fl6.fl6_dport = usin->sin6_port;
270 fl6.fl6_sport = inet->inet_sport;
271 fl6.flowi6_uid = sk->sk_uid;
273 opt = rcu_dereference_protected(np->opt, lockdep_sock_is_held(sk));
274 final_p = fl6_update_dst(&fl6, opt, &final);
276 security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));
278 dst = ip6_dst_lookup_flow(sk, &fl6, final_p);
279 if (IS_ERR(dst)) {
280 err = PTR_ERR(dst);
281 goto failure;
284 if (!saddr) {
285 saddr = &fl6.saddr;
286 sk->sk_v6_rcv_saddr = *saddr;
289 /* set the source address */
290 np->saddr = *saddr;
291 inet->inet_rcv_saddr = LOOPBACK4_IPV6;
293 sk->sk_gso_type = SKB_GSO_TCPV6;
294 ip6_dst_store(sk, dst, NULL, NULL);
296 icsk->icsk_ext_hdr_len = 0;
297 if (opt)
298 icsk->icsk_ext_hdr_len = opt->opt_flen +
299 opt->opt_nflen;
301 tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);
303 inet->inet_dport = usin->sin6_port;
305 tcp_set_state(sk, TCP_SYN_SENT);
306 err = inet6_hash_connect(tcp_death_row, sk);
307 if (err)
308 goto late_failure;
310 sk_set_txhash(sk);
312 if (likely(!tp->repair)) {
313 if (!tp->write_seq)
314 tp->write_seq = secure_tcpv6_seq(np->saddr.s6_addr32,
315 sk->sk_v6_daddr.s6_addr32,
316 inet->inet_sport,
317 inet->inet_dport);
318 tp->tsoffset = secure_tcpv6_ts_off(sock_net(sk),
319 np->saddr.s6_addr32,
320 sk->sk_v6_daddr.s6_addr32);
323 if (tcp_fastopen_defer_connect(sk, &err))
324 return err;
325 if (err)
326 goto late_failure;
328 err = tcp_connect(sk);
329 if (err)
330 goto late_failure;
332 return 0;
334 late_failure:
335 tcp_set_state(sk, TCP_CLOSE);
336 failure:
337 inet->inet_dport = 0;
338 sk->sk_route_caps = 0;
339 return err;
342 static void tcp_v6_mtu_reduced(struct sock *sk)
344 struct dst_entry *dst;
346 if ((1 << sk->sk_state) & (TCPF_LISTEN | TCPF_CLOSE))
347 return;
349 dst = inet6_csk_update_pmtu(sk, tcp_sk(sk)->mtu_info);
350 if (!dst)
351 return;
353 if (inet_csk(sk)->icsk_pmtu_cookie > dst_mtu(dst)) {
354 tcp_sync_mss(sk, dst_mtu(dst));
355 tcp_simple_retransmit(sk);
359 static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
360 u8 type, u8 code, int offset, __be32 info)
362 const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data;
363 const struct tcphdr *th = (struct tcphdr *)(skb->data+offset);
364 struct net *net = dev_net(skb->dev);
365 struct request_sock *fastopen;
366 struct ipv6_pinfo *np;
367 struct tcp_sock *tp;
368 __u32 seq, snd_una;
369 struct sock *sk;
370 bool fatal;
371 int err;
373 sk = __inet6_lookup_established(net, &tcp_hashinfo,
374 &hdr->daddr, th->dest,
375 &hdr->saddr, ntohs(th->source),
376 skb->dev->ifindex, inet6_sdif(skb));
378 if (!sk) {
379 __ICMP6_INC_STATS(net, __in6_dev_get(skb->dev),
380 ICMP6_MIB_INERRORS);
381 return -ENOENT;
384 if (sk->sk_state == TCP_TIME_WAIT) {
385 inet_twsk_put(inet_twsk(sk));
386 return 0;
388 seq = ntohl(th->seq);
389 fatal = icmpv6_err_convert(type, code, &err);
390 if (sk->sk_state == TCP_NEW_SYN_RECV) {
391 tcp_req_err(sk, seq, fatal);
392 return 0;
395 bh_lock_sock(sk);
396 if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG)
397 __NET_INC_STATS(net, LINUX_MIB_LOCKDROPPEDICMPS);
399 if (sk->sk_state == TCP_CLOSE)
400 goto out;
402 if (ipv6_hdr(skb)->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
403 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
404 goto out;
407 tp = tcp_sk(sk);
408 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
409 fastopen = tp->fastopen_rsk;
410 snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una;
411 if (sk->sk_state != TCP_LISTEN &&
412 !between(seq, snd_una, tp->snd_nxt)) {
413 __NET_INC_STATS(net, LINUX_MIB_OUTOFWINDOWICMPS);
414 goto out;
417 np = tcp_inet6_sk(sk);
419 if (type == NDISC_REDIRECT) {
420 if (!sock_owned_by_user(sk)) {
421 struct dst_entry *dst = __sk_dst_check(sk, np->dst_cookie);
423 if (dst)
424 dst->ops->redirect(dst, sk, skb);
426 goto out;
429 if (type == ICMPV6_PKT_TOOBIG) {
430 /* We are not interested in TCP_LISTEN and open_requests
431 * (SYN-ACKs send out by Linux are always <576bytes so
432 * they should go through unfragmented).
434 if (sk->sk_state == TCP_LISTEN)
435 goto out;
437 if (!ip6_sk_accept_pmtu(sk))
438 goto out;
440 tp->mtu_info = ntohl(info);
441 if (!sock_owned_by_user(sk))
442 tcp_v6_mtu_reduced(sk);
443 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED,
444 &sk->sk_tsq_flags))
445 sock_hold(sk);
446 goto out;
450 /* Might be for an request_sock */
451 switch (sk->sk_state) {
452 case TCP_SYN_SENT:
453 case TCP_SYN_RECV:
454 /* Only in fast or simultaneous open. If a fast open socket is
455 * is already accepted it is treated as a connected one below.
457 if (fastopen && !fastopen->sk)
458 break;
460 if (!sock_owned_by_user(sk)) {
461 sk->sk_err = err;
462 sk->sk_error_report(sk); /* Wake people up to see the error (see connect in sock.c) */
464 tcp_done(sk);
465 } else
466 sk->sk_err_soft = err;
467 goto out;
470 if (!sock_owned_by_user(sk) && np->recverr) {
471 sk->sk_err = err;
472 sk->sk_error_report(sk);
473 } else
474 sk->sk_err_soft = err;
476 out:
477 bh_unlock_sock(sk);
478 sock_put(sk);
479 return 0;
483 static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst,
484 struct flowi *fl,
485 struct request_sock *req,
486 struct tcp_fastopen_cookie *foc,
487 enum tcp_synack_type synack_type)
489 struct inet_request_sock *ireq = inet_rsk(req);
490 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
491 struct ipv6_txoptions *opt;
492 struct flowi6 *fl6 = &fl->u.ip6;
493 struct sk_buff *skb;
494 int err = -ENOMEM;
496 /* First, grab a route. */
497 if (!dst && (dst = inet6_csk_route_req(sk, fl6, req,
498 IPPROTO_TCP)) == NULL)
499 goto done;
501 skb = tcp_make_synack(sk, dst, req, foc, synack_type);
503 if (skb) {
504 __tcp_v6_send_check(skb, &ireq->ir_v6_loc_addr,
505 &ireq->ir_v6_rmt_addr);
507 fl6->daddr = ireq->ir_v6_rmt_addr;
508 if (np->repflow && ireq->pktopts)
509 fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts));
511 rcu_read_lock();
512 opt = ireq->ipv6_opt;
513 if (!opt)
514 opt = rcu_dereference(np->opt);
515 err = ip6_xmit(sk, skb, fl6, sk->sk_mark, opt, np->tclass);
516 rcu_read_unlock();
517 err = net_xmit_eval(err);
520 done:
521 return err;
525 static void tcp_v6_reqsk_destructor(struct request_sock *req)
527 kfree(inet_rsk(req)->ipv6_opt);
528 kfree_skb(inet_rsk(req)->pktopts);
531 #ifdef CONFIG_TCP_MD5SIG
532 static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(const struct sock *sk,
533 const struct in6_addr *addr)
535 return tcp_md5_do_lookup(sk, (union tcp_md5_addr *)addr, AF_INET6);
538 static struct tcp_md5sig_key *tcp_v6_md5_lookup(const struct sock *sk,
539 const struct sock *addr_sk)
541 return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr);
544 static int tcp_v6_parse_md5_keys(struct sock *sk, int optname,
545 char __user *optval, int optlen)
547 struct tcp_md5sig cmd;
548 struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&cmd.tcpm_addr;
549 u8 prefixlen;
551 if (optlen < sizeof(cmd))
552 return -EINVAL;
554 if (copy_from_user(&cmd, optval, sizeof(cmd)))
555 return -EFAULT;
557 if (sin6->sin6_family != AF_INET6)
558 return -EINVAL;
560 if (optname == TCP_MD5SIG_EXT &&
561 cmd.tcpm_flags & TCP_MD5SIG_FLAG_PREFIX) {
562 prefixlen = cmd.tcpm_prefixlen;
563 if (prefixlen > 128 || (ipv6_addr_v4mapped(&sin6->sin6_addr) &&
564 prefixlen > 32))
565 return -EINVAL;
566 } else {
567 prefixlen = ipv6_addr_v4mapped(&sin6->sin6_addr) ? 32 : 128;
570 if (!cmd.tcpm_keylen) {
571 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
572 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
573 AF_INET, prefixlen);
574 return tcp_md5_do_del(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
575 AF_INET6, prefixlen);
578 if (cmd.tcpm_keylen > TCP_MD5SIG_MAXKEYLEN)
579 return -EINVAL;
581 if (ipv6_addr_v4mapped(&sin6->sin6_addr))
582 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr.s6_addr32[3],
583 AF_INET, prefixlen, cmd.tcpm_key,
584 cmd.tcpm_keylen, GFP_KERNEL);
586 return tcp_md5_do_add(sk, (union tcp_md5_addr *)&sin6->sin6_addr,
587 AF_INET6, prefixlen, cmd.tcpm_key,
588 cmd.tcpm_keylen, GFP_KERNEL);
591 static int tcp_v6_md5_hash_headers(struct tcp_md5sig_pool *hp,
592 const struct in6_addr *daddr,
593 const struct in6_addr *saddr,
594 const struct tcphdr *th, int nbytes)
596 struct tcp6_pseudohdr *bp;
597 struct scatterlist sg;
598 struct tcphdr *_th;
600 bp = hp->scratch;
601 /* 1. TCP pseudo-header (RFC2460) */
602 bp->saddr = *saddr;
603 bp->daddr = *daddr;
604 bp->protocol = cpu_to_be32(IPPROTO_TCP);
605 bp->len = cpu_to_be32(nbytes);
607 _th = (struct tcphdr *)(bp + 1);
608 memcpy(_th, th, sizeof(*th));
609 _th->check = 0;
611 sg_init_one(&sg, bp, sizeof(*bp) + sizeof(*th));
612 ahash_request_set_crypt(hp->md5_req, &sg, NULL,
613 sizeof(*bp) + sizeof(*th));
614 return crypto_ahash_update(hp->md5_req);
617 static int tcp_v6_md5_hash_hdr(char *md5_hash, const struct tcp_md5sig_key *key,
618 const struct in6_addr *daddr, struct in6_addr *saddr,
619 const struct tcphdr *th)
621 struct tcp_md5sig_pool *hp;
622 struct ahash_request *req;
624 hp = tcp_get_md5sig_pool();
625 if (!hp)
626 goto clear_hash_noput;
627 req = hp->md5_req;
629 if (crypto_ahash_init(req))
630 goto clear_hash;
631 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, th->doff << 2))
632 goto clear_hash;
633 if (tcp_md5_hash_key(hp, key))
634 goto clear_hash;
635 ahash_request_set_crypt(req, NULL, md5_hash, 0);
636 if (crypto_ahash_final(req))
637 goto clear_hash;
639 tcp_put_md5sig_pool();
640 return 0;
642 clear_hash:
643 tcp_put_md5sig_pool();
644 clear_hash_noput:
645 memset(md5_hash, 0, 16);
646 return 1;
649 static int tcp_v6_md5_hash_skb(char *md5_hash,
650 const struct tcp_md5sig_key *key,
651 const struct sock *sk,
652 const struct sk_buff *skb)
654 const struct in6_addr *saddr, *daddr;
655 struct tcp_md5sig_pool *hp;
656 struct ahash_request *req;
657 const struct tcphdr *th = tcp_hdr(skb);
659 if (sk) { /* valid for establish/request sockets */
660 saddr = &sk->sk_v6_rcv_saddr;
661 daddr = &sk->sk_v6_daddr;
662 } else {
663 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
664 saddr = &ip6h->saddr;
665 daddr = &ip6h->daddr;
668 hp = tcp_get_md5sig_pool();
669 if (!hp)
670 goto clear_hash_noput;
671 req = hp->md5_req;
673 if (crypto_ahash_init(req))
674 goto clear_hash;
676 if (tcp_v6_md5_hash_headers(hp, daddr, saddr, th, skb->len))
677 goto clear_hash;
678 if (tcp_md5_hash_skb_data(hp, skb, th->doff << 2))
679 goto clear_hash;
680 if (tcp_md5_hash_key(hp, key))
681 goto clear_hash;
682 ahash_request_set_crypt(req, NULL, md5_hash, 0);
683 if (crypto_ahash_final(req))
684 goto clear_hash;
686 tcp_put_md5sig_pool();
687 return 0;
689 clear_hash:
690 tcp_put_md5sig_pool();
691 clear_hash_noput:
692 memset(md5_hash, 0, 16);
693 return 1;
696 #endif
698 static bool tcp_v6_inbound_md5_hash(const struct sock *sk,
699 const struct sk_buff *skb)
701 #ifdef CONFIG_TCP_MD5SIG
702 const __u8 *hash_location = NULL;
703 struct tcp_md5sig_key *hash_expected;
704 const struct ipv6hdr *ip6h = ipv6_hdr(skb);
705 const struct tcphdr *th = tcp_hdr(skb);
706 int genhash;
707 u8 newhash[16];
709 hash_expected = tcp_v6_md5_do_lookup(sk, &ip6h->saddr);
710 hash_location = tcp_parse_md5sig_option(th);
712 /* We've parsed the options - do we have a hash? */
713 if (!hash_expected && !hash_location)
714 return false;
716 if (hash_expected && !hash_location) {
717 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND);
718 return true;
721 if (!hash_expected && hash_location) {
722 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED);
723 return true;
726 /* check the signature */
727 genhash = tcp_v6_md5_hash_skb(newhash,
728 hash_expected,
729 NULL, skb);
731 if (genhash || memcmp(hash_location, newhash, 16) != 0) {
732 NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPMD5FAILURE);
733 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
734 genhash ? "failed" : "mismatch",
735 &ip6h->saddr, ntohs(th->source),
736 &ip6h->daddr, ntohs(th->dest));
737 return true;
739 #endif
740 return false;
743 static void tcp_v6_init_req(struct request_sock *req,
744 const struct sock *sk_listener,
745 struct sk_buff *skb)
747 bool l3_slave = ipv6_l3mdev_skb(TCP_SKB_CB(skb)->header.h6.flags);
748 struct inet_request_sock *ireq = inet_rsk(req);
749 const struct ipv6_pinfo *np = tcp_inet6_sk(sk_listener);
751 ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
752 ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
754 /* So that link locals have meaning */
755 if ((!sk_listener->sk_bound_dev_if || l3_slave) &&
756 ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL)
757 ireq->ir_iif = tcp_v6_iif(skb);
759 if (!TCP_SKB_CB(skb)->tcp_tw_isn &&
760 (ipv6_opt_accepted(sk_listener, skb, &TCP_SKB_CB(skb)->header.h6) ||
761 np->rxopt.bits.rxinfo ||
762 np->rxopt.bits.rxoinfo || np->rxopt.bits.rxhlim ||
763 np->rxopt.bits.rxohlim || np->repflow)) {
764 refcount_inc(&skb->users);
765 ireq->pktopts = skb;
769 static struct dst_entry *tcp_v6_route_req(const struct sock *sk,
770 struct flowi *fl,
771 const struct request_sock *req)
773 return inet6_csk_route_req(sk, &fl->u.ip6, req, IPPROTO_TCP);
776 struct request_sock_ops tcp6_request_sock_ops __read_mostly = {
777 .family = AF_INET6,
778 .obj_size = sizeof(struct tcp6_request_sock),
779 .rtx_syn_ack = tcp_rtx_synack,
780 .send_ack = tcp_v6_reqsk_send_ack,
781 .destructor = tcp_v6_reqsk_destructor,
782 .send_reset = tcp_v6_send_reset,
783 .syn_ack_timeout = tcp_syn_ack_timeout,
786 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = {
787 .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) -
788 sizeof(struct ipv6hdr),
789 #ifdef CONFIG_TCP_MD5SIG
790 .req_md5_lookup = tcp_v6_md5_lookup,
791 .calc_md5_hash = tcp_v6_md5_hash_skb,
792 #endif
793 .init_req = tcp_v6_init_req,
794 #ifdef CONFIG_SYN_COOKIES
795 .cookie_init_seq = cookie_v6_init_sequence,
796 #endif
797 .route_req = tcp_v6_route_req,
798 .init_seq = tcp_v6_init_seq,
799 .init_ts_off = tcp_v6_init_ts_off,
800 .send_synack = tcp_v6_send_synack,
803 static void tcp_v6_send_response(const struct sock *sk, struct sk_buff *skb, u32 seq,
804 u32 ack, u32 win, u32 tsval, u32 tsecr,
805 int oif, struct tcp_md5sig_key *key, int rst,
806 u8 tclass, __be32 label)
808 const struct tcphdr *th = tcp_hdr(skb);
809 struct tcphdr *t1;
810 struct sk_buff *buff;
811 struct flowi6 fl6;
812 struct net *net = sk ? sock_net(sk) : dev_net(skb_dst(skb)->dev);
813 struct sock *ctl_sk = net->ipv6.tcp_sk;
814 unsigned int tot_len = sizeof(struct tcphdr);
815 struct dst_entry *dst;
816 __be32 *topt;
817 __u32 mark = 0;
819 if (tsecr)
820 tot_len += TCPOLEN_TSTAMP_ALIGNED;
821 #ifdef CONFIG_TCP_MD5SIG
822 if (key)
823 tot_len += TCPOLEN_MD5SIG_ALIGNED;
824 #endif
826 buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len,
827 GFP_ATOMIC);
828 if (!buff)
829 return;
831 skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len);
833 t1 = skb_push(buff, tot_len);
834 skb_reset_transport_header(buff);
836 /* Swap the send and the receive. */
837 memset(t1, 0, sizeof(*t1));
838 t1->dest = th->source;
839 t1->source = th->dest;
840 t1->doff = tot_len / 4;
841 t1->seq = htonl(seq);
842 t1->ack_seq = htonl(ack);
843 t1->ack = !rst || !th->ack;
844 t1->rst = rst;
845 t1->window = htons(win);
847 topt = (__be32 *)(t1 + 1);
849 if (tsecr) {
850 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
851 (TCPOPT_TIMESTAMP << 8) | TCPOLEN_TIMESTAMP);
852 *topt++ = htonl(tsval);
853 *topt++ = htonl(tsecr);
856 #ifdef CONFIG_TCP_MD5SIG
857 if (key) {
858 *topt++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) |
859 (TCPOPT_MD5SIG << 8) | TCPOLEN_MD5SIG);
860 tcp_v6_md5_hash_hdr((__u8 *)topt, key,
861 &ipv6_hdr(skb)->saddr,
862 &ipv6_hdr(skb)->daddr, t1);
864 #endif
866 memset(&fl6, 0, sizeof(fl6));
867 fl6.daddr = ipv6_hdr(skb)->saddr;
868 fl6.saddr = ipv6_hdr(skb)->daddr;
869 fl6.flowlabel = label;
871 buff->ip_summed = CHECKSUM_PARTIAL;
872 buff->csum = 0;
874 __tcp_v6_send_check(buff, &fl6.saddr, &fl6.daddr);
876 fl6.flowi6_proto = IPPROTO_TCP;
877 if (rt6_need_strict(&fl6.daddr) && !oif)
878 fl6.flowi6_oif = tcp_v6_iif(skb);
879 else {
880 if (!oif && netif_index_is_l3_master(net, skb->skb_iif))
881 oif = skb->skb_iif;
883 fl6.flowi6_oif = oif;
886 if (sk)
887 mark = (sk->sk_state == TCP_TIME_WAIT) ?
888 inet_twsk(sk)->tw_mark : sk->sk_mark;
889 fl6.flowi6_mark = IP6_REPLY_MARK(net, skb->mark) ?: mark;
890 fl6.fl6_dport = t1->dest;
891 fl6.fl6_sport = t1->source;
892 fl6.flowi6_uid = sock_net_uid(net, sk && sk_fullsock(sk) ? sk : NULL);
893 security_skb_classify_flow(skb, flowi6_to_flowi(&fl6));
895 /* Pass a socket to ip6_dst_lookup either it is for RST
896 * Underlying function will use this to retrieve the network
897 * namespace
899 dst = ip6_dst_lookup_flow(ctl_sk, &fl6, NULL);
900 if (!IS_ERR(dst)) {
901 skb_dst_set(buff, dst);
902 ip6_xmit(ctl_sk, buff, &fl6, fl6.flowi6_mark, NULL, tclass);
903 TCP_INC_STATS(net, TCP_MIB_OUTSEGS);
904 if (rst)
905 TCP_INC_STATS(net, TCP_MIB_OUTRSTS);
906 return;
909 kfree_skb(buff);
912 static void tcp_v6_send_reset(const struct sock *sk, struct sk_buff *skb)
914 const struct tcphdr *th = tcp_hdr(skb);
915 u32 seq = 0, ack_seq = 0;
916 struct tcp_md5sig_key *key = NULL;
917 #ifdef CONFIG_TCP_MD5SIG
918 const __u8 *hash_location = NULL;
919 struct ipv6hdr *ipv6h = ipv6_hdr(skb);
920 unsigned char newhash[16];
921 int genhash;
922 struct sock *sk1 = NULL;
923 #endif
924 int oif = 0;
926 if (th->rst)
927 return;
929 /* If sk not NULL, it means we did a successful lookup and incoming
930 * route had to be correct. prequeue might have dropped our dst.
932 if (!sk && !ipv6_unicast_destination(skb))
933 return;
935 #ifdef CONFIG_TCP_MD5SIG
936 rcu_read_lock();
937 hash_location = tcp_parse_md5sig_option(th);
938 if (sk && sk_fullsock(sk)) {
939 key = tcp_v6_md5_do_lookup(sk, &ipv6h->saddr);
940 } else if (hash_location) {
942 * active side is lost. Try to find listening socket through
943 * source port, and then find md5 key through listening socket.
944 * we are not loose security here:
945 * Incoming packet is checked with md5 hash with finding key,
946 * no RST generated if md5 hash doesn't match.
948 sk1 = inet6_lookup_listener(dev_net(skb_dst(skb)->dev),
949 &tcp_hashinfo, NULL, 0,
950 &ipv6h->saddr,
951 th->source, &ipv6h->daddr,
952 ntohs(th->source),
953 tcp_v6_iif_l3_slave(skb),
954 tcp_v6_sdif(skb));
955 if (!sk1)
956 goto out;
958 key = tcp_v6_md5_do_lookup(sk1, &ipv6h->saddr);
959 if (!key)
960 goto out;
962 genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb);
963 if (genhash || memcmp(hash_location, newhash, 16) != 0)
964 goto out;
966 #endif
968 if (th->ack)
969 seq = ntohl(th->ack_seq);
970 else
971 ack_seq = ntohl(th->seq) + th->syn + th->fin + skb->len -
972 (th->doff << 2);
974 if (sk) {
975 oif = sk->sk_bound_dev_if;
976 if (sk_fullsock(sk))
977 trace_tcp_send_reset(sk, skb);
980 tcp_v6_send_response(sk, skb, seq, ack_seq, 0, 0, 0, oif, key, 1, 0, 0);
982 #ifdef CONFIG_TCP_MD5SIG
983 out:
984 rcu_read_unlock();
985 #endif
988 static void tcp_v6_send_ack(const struct sock *sk, struct sk_buff *skb, u32 seq,
989 u32 ack, u32 win, u32 tsval, u32 tsecr, int oif,
990 struct tcp_md5sig_key *key, u8 tclass,
991 __be32 label)
993 tcp_v6_send_response(sk, skb, seq, ack, win, tsval, tsecr, oif, key, 0,
994 tclass, label);
997 static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb)
999 struct inet_timewait_sock *tw = inet_twsk(sk);
1000 struct tcp_timewait_sock *tcptw = tcp_twsk(sk);
1002 tcp_v6_send_ack(sk, skb, tcptw->tw_snd_nxt, tcptw->tw_rcv_nxt,
1003 tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale,
1004 tcp_time_stamp_raw() + tcptw->tw_ts_offset,
1005 tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw),
1006 tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel));
1008 inet_twsk_put(tw);
1011 static void tcp_v6_reqsk_send_ack(const struct sock *sk, struct sk_buff *skb,
1012 struct request_sock *req)
1014 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
1015 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
1017 /* RFC 7323 2.3
1018 * The window field (SEG.WND) of every outgoing segment, with the
1019 * exception of <SYN> segments, MUST be right-shifted by
1020 * Rcv.Wind.Shift bits:
1022 tcp_v6_send_ack(sk, skb, (sk->sk_state == TCP_LISTEN) ?
1023 tcp_rsk(req)->snt_isn + 1 : tcp_sk(sk)->snd_nxt,
1024 tcp_rsk(req)->rcv_nxt,
1025 req->rsk_rcv_wnd >> inet_rsk(req)->rcv_wscale,
1026 tcp_time_stamp_raw() + tcp_rsk(req)->ts_off,
1027 req->ts_recent, sk->sk_bound_dev_if,
1028 tcp_v6_md5_do_lookup(sk, &ipv6_hdr(skb)->saddr),
1029 0, 0);
1033 static struct sock *tcp_v6_cookie_check(struct sock *sk, struct sk_buff *skb)
1035 #ifdef CONFIG_SYN_COOKIES
1036 const struct tcphdr *th = tcp_hdr(skb);
1038 if (!th->syn)
1039 sk = cookie_v6_check(sk, skb);
1040 #endif
1041 return sk;
1044 static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
1046 if (skb->protocol == htons(ETH_P_IP))
1047 return tcp_v4_conn_request(sk, skb);
1049 if (!ipv6_unicast_destination(skb))
1050 goto drop;
1052 return tcp_conn_request(&tcp6_request_sock_ops,
1053 &tcp_request_sock_ipv6_ops, sk, skb);
1055 drop:
1056 tcp_listendrop(sk);
1057 return 0; /* don't send reset */
1060 static void tcp_v6_restore_cb(struct sk_buff *skb)
1062 /* We need to move header back to the beginning if xfrm6_policy_check()
1063 * and tcp_v6_fill_cb() are going to be called again.
1064 * ip6_datagram_recv_specific_ctl() also expects IP6CB to be there.
1066 memmove(IP6CB(skb), &TCP_SKB_CB(skb)->header.h6,
1067 sizeof(struct inet6_skb_parm));
1070 static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff *skb,
1071 struct request_sock *req,
1072 struct dst_entry *dst,
1073 struct request_sock *req_unhash,
1074 bool *own_req)
1076 struct inet_request_sock *ireq;
1077 struct ipv6_pinfo *newnp;
1078 const struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1079 struct ipv6_txoptions *opt;
1080 struct inet_sock *newinet;
1081 struct tcp_sock *newtp;
1082 struct sock *newsk;
1083 #ifdef CONFIG_TCP_MD5SIG
1084 struct tcp_md5sig_key *key;
1085 #endif
1086 struct flowi6 fl6;
1088 if (skb->protocol == htons(ETH_P_IP)) {
1090 * v6 mapped
1093 newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst,
1094 req_unhash, own_req);
1096 if (!newsk)
1097 return NULL;
1099 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1101 newinet = inet_sk(newsk);
1102 newnp = tcp_inet6_sk(newsk);
1103 newtp = tcp_sk(newsk);
1105 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1107 newnp->saddr = newsk->sk_v6_rcv_saddr;
1109 inet_csk(newsk)->icsk_af_ops = &ipv6_mapped;
1110 newsk->sk_backlog_rcv = tcp_v4_do_rcv;
1111 #ifdef CONFIG_TCP_MD5SIG
1112 newtp->af_specific = &tcp_sock_ipv6_mapped_specific;
1113 #endif
1115 newnp->ipv6_mc_list = NULL;
1116 newnp->ipv6_ac_list = NULL;
1117 newnp->ipv6_fl_list = NULL;
1118 newnp->pktoptions = NULL;
1119 newnp->opt = NULL;
1120 newnp->mcast_oif = inet_iif(skb);
1121 newnp->mcast_hops = ip_hdr(skb)->ttl;
1122 newnp->rcv_flowinfo = 0;
1123 if (np->repflow)
1124 newnp->flow_label = 0;
1127 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1128 * here, tcp_create_openreq_child now does this for us, see the comment in
1129 * that function for the gory details. -acme
1132 /* It is tricky place. Until this moment IPv4 tcp
1133 worked with IPv6 icsk.icsk_af_ops.
1134 Sync it now.
1136 tcp_sync_mss(newsk, inet_csk(newsk)->icsk_pmtu_cookie);
1138 return newsk;
1141 ireq = inet_rsk(req);
1143 if (sk_acceptq_is_full(sk))
1144 goto out_overflow;
1146 if (!dst) {
1147 dst = inet6_csk_route_req(sk, &fl6, req, IPPROTO_TCP);
1148 if (!dst)
1149 goto out;
1152 newsk = tcp_create_openreq_child(sk, req, skb);
1153 if (!newsk)
1154 goto out_nonewsk;
1157 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1158 * count here, tcp_create_openreq_child now does this for us, see the
1159 * comment in that function for the gory details. -acme
1162 newsk->sk_gso_type = SKB_GSO_TCPV6;
1163 ip6_dst_store(newsk, dst, NULL, NULL);
1164 inet6_sk_rx_dst_set(newsk, skb);
1166 inet_sk(newsk)->pinet6 = tcp_inet6_sk(newsk);
1168 newtp = tcp_sk(newsk);
1169 newinet = inet_sk(newsk);
1170 newnp = tcp_inet6_sk(newsk);
1172 memcpy(newnp, np, sizeof(struct ipv6_pinfo));
1174 newsk->sk_v6_daddr = ireq->ir_v6_rmt_addr;
1175 newnp->saddr = ireq->ir_v6_loc_addr;
1176 newsk->sk_v6_rcv_saddr = ireq->ir_v6_loc_addr;
1177 newsk->sk_bound_dev_if = ireq->ir_iif;
1179 /* Now IPv6 options...
1181 First: no IPv4 options.
1183 newinet->inet_opt = NULL;
1184 newnp->ipv6_mc_list = NULL;
1185 newnp->ipv6_ac_list = NULL;
1186 newnp->ipv6_fl_list = NULL;
1188 /* Clone RX bits */
1189 newnp->rxopt.all = np->rxopt.all;
1191 newnp->pktoptions = NULL;
1192 newnp->opt = NULL;
1193 newnp->mcast_oif = tcp_v6_iif(skb);
1194 newnp->mcast_hops = ipv6_hdr(skb)->hop_limit;
1195 newnp->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(skb));
1196 if (np->repflow)
1197 newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb));
1199 /* Clone native IPv6 options from listening socket (if any)
1201 Yes, keeping reference count would be much more clever,
1202 but we make one more one thing there: reattach optmem
1203 to newsk.
1205 opt = ireq->ipv6_opt;
1206 if (!opt)
1207 opt = rcu_dereference(np->opt);
1208 if (opt) {
1209 opt = ipv6_dup_options(newsk, opt);
1210 RCU_INIT_POINTER(newnp->opt, opt);
1212 inet_csk(newsk)->icsk_ext_hdr_len = 0;
1213 if (opt)
1214 inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
1215 opt->opt_flen;
1217 tcp_ca_openreq_child(newsk, dst);
1219 tcp_sync_mss(newsk, dst_mtu(dst));
1220 newtp->advmss = tcp_mss_clamp(tcp_sk(sk), dst_metric_advmss(dst));
1222 tcp_initialize_rcv_mss(newsk);
1224 newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6;
1225 newinet->inet_rcv_saddr = LOOPBACK4_IPV6;
1227 #ifdef CONFIG_TCP_MD5SIG
1228 /* Copy over the MD5 key from the original socket */
1229 key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr);
1230 if (key) {
1231 /* We're using one, so create a matching key
1232 * on the newsk structure. If we fail to get
1233 * memory, then we end up not copying the key
1234 * across. Shucks.
1236 tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newsk->sk_v6_daddr,
1237 AF_INET6, 128, key->key, key->keylen,
1238 sk_gfp_mask(sk, GFP_ATOMIC));
1240 #endif
1242 if (__inet_inherit_port(sk, newsk) < 0) {
1243 inet_csk_prepare_forced_close(newsk);
1244 tcp_done(newsk);
1245 goto out;
1247 *own_req = inet_ehash_nolisten(newsk, req_to_sk(req_unhash));
1248 if (*own_req) {
1249 tcp_move_syn(newtp, req);
1251 /* Clone pktoptions received with SYN, if we own the req */
1252 if (ireq->pktopts) {
1253 newnp->pktoptions = skb_clone(ireq->pktopts,
1254 sk_gfp_mask(sk, GFP_ATOMIC));
1255 consume_skb(ireq->pktopts);
1256 ireq->pktopts = NULL;
1257 if (newnp->pktoptions) {
1258 tcp_v6_restore_cb(newnp->pktoptions);
1259 skb_set_owner_r(newnp->pktoptions, newsk);
1264 return newsk;
1266 out_overflow:
1267 __NET_INC_STATS(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
1268 out_nonewsk:
1269 dst_release(dst);
1270 out:
1271 tcp_listendrop(sk);
1272 return NULL;
1275 /* The socket must have it's spinlock held when we get
1276 * here, unless it is a TCP_LISTEN socket.
1278 * We have a potential double-lock case here, so even when
1279 * doing backlog processing we use the BH locking scheme.
1280 * This is because we cannot sleep with the original spinlock
1281 * held.
1283 static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
1285 struct ipv6_pinfo *np = tcp_inet6_sk(sk);
1286 struct sk_buff *opt_skb = NULL;
1287 struct tcp_sock *tp;
1289 /* Imagine: socket is IPv6. IPv4 packet arrives,
1290 goes to IPv4 receive handler and backlogged.
1291 From backlog it always goes here. Kerboom...
1292 Fortunately, tcp_rcv_established and rcv_established
1293 handle them correctly, but it is not case with
1294 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1297 if (skb->protocol == htons(ETH_P_IP))
1298 return tcp_v4_do_rcv(sk, skb);
1301 * socket locking is here for SMP purposes as backlog rcv
1302 * is currently called with bh processing disabled.
1305 /* Do Stevens' IPV6_PKTOPTIONS.
1307 Yes, guys, it is the only place in our code, where we
1308 may make it not affecting IPv4.
1309 The rest of code is protocol independent,
1310 and I do not like idea to uglify IPv4.
1312 Actually, all the idea behind IPV6_PKTOPTIONS
1313 looks not very well thought. For now we latch
1314 options, received in the last packet, enqueued
1315 by tcp. Feel free to propose better solution.
1316 --ANK (980728)
1318 if (np->rxopt.all)
1319 opt_skb = skb_clone(skb, sk_gfp_mask(sk, GFP_ATOMIC));
1321 if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
1322 struct dst_entry *dst = sk->sk_rx_dst;
1324 sock_rps_save_rxhash(sk, skb);
1325 sk_mark_napi_id(sk, skb);
1326 if (dst) {
1327 if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif ||
1328 dst->ops->check(dst, np->rx_dst_cookie) == NULL) {
1329 dst_release(dst);
1330 sk->sk_rx_dst = NULL;
1334 tcp_rcv_established(sk, skb);
1335 if (opt_skb)
1336 goto ipv6_pktoptions;
1337 return 0;
1340 if (tcp_checksum_complete(skb))
1341 goto csum_err;
1343 if (sk->sk_state == TCP_LISTEN) {
1344 struct sock *nsk = tcp_v6_cookie_check(sk, skb);
1346 if (!nsk)
1347 goto discard;
1349 if (nsk != sk) {
1350 if (tcp_child_process(sk, nsk, skb))
1351 goto reset;
1352 if (opt_skb)
1353 __kfree_skb(opt_skb);
1354 return 0;
1356 } else
1357 sock_rps_save_rxhash(sk, skb);
1359 if (tcp_rcv_state_process(sk, skb))
1360 goto reset;
1361 if (opt_skb)
1362 goto ipv6_pktoptions;
1363 return 0;
1365 reset:
1366 tcp_v6_send_reset(sk, skb);
1367 discard:
1368 if (opt_skb)
1369 __kfree_skb(opt_skb);
1370 kfree_skb(skb);
1371 return 0;
1372 csum_err:
1373 TCP_INC_STATS(sock_net(sk), TCP_MIB_CSUMERRORS);
1374 TCP_INC_STATS(sock_net(sk), TCP_MIB_INERRS);
1375 goto discard;
1378 ipv6_pktoptions:
1379 /* Do you ask, what is it?
1381 1. skb was enqueued by tcp.
1382 2. skb is added to tail of read queue, rather than out of order.
1383 3. socket is not in passive state.
1384 4. Finally, it really contains options, which user wants to receive.
1386 tp = tcp_sk(sk);
1387 if (TCP_SKB_CB(opt_skb)->end_seq == tp->rcv_nxt &&
1388 !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
1389 if (np->rxopt.bits.rxinfo || np->rxopt.bits.rxoinfo)
1390 np->mcast_oif = tcp_v6_iif(opt_skb);
1391 if (np->rxopt.bits.rxhlim || np->rxopt.bits.rxohlim)
1392 np->mcast_hops = ipv6_hdr(opt_skb)->hop_limit;
1393 if (np->rxopt.bits.rxflow || np->rxopt.bits.rxtclass)
1394 np->rcv_flowinfo = ip6_flowinfo(ipv6_hdr(opt_skb));
1395 if (np->repflow)
1396 np->flow_label = ip6_flowlabel(ipv6_hdr(opt_skb));
1397 if (ipv6_opt_accepted(sk, opt_skb, &TCP_SKB_CB(opt_skb)->header.h6)) {
1398 skb_set_owner_r(opt_skb, sk);
1399 tcp_v6_restore_cb(opt_skb);
1400 opt_skb = xchg(&np->pktoptions, opt_skb);
1401 } else {
1402 __kfree_skb(opt_skb);
1403 opt_skb = xchg(&np->pktoptions, NULL);
1407 kfree_skb(opt_skb);
1408 return 0;
1411 static void tcp_v6_fill_cb(struct sk_buff *skb, const struct ipv6hdr *hdr,
1412 const struct tcphdr *th)
1414 /* This is tricky: we move IP6CB at its correct location into
1415 * TCP_SKB_CB(). It must be done after xfrm6_policy_check(), because
1416 * _decode_session6() uses IP6CB().
1417 * barrier() makes sure compiler won't play aliasing games.
1419 memmove(&TCP_SKB_CB(skb)->header.h6, IP6CB(skb),
1420 sizeof(struct inet6_skb_parm));
1421 barrier();
1423 TCP_SKB_CB(skb)->seq = ntohl(th->seq);
1424 TCP_SKB_CB(skb)->end_seq = (TCP_SKB_CB(skb)->seq + th->syn + th->fin +
1425 skb->len - th->doff*4);
1426 TCP_SKB_CB(skb)->ack_seq = ntohl(th->ack_seq);
1427 TCP_SKB_CB(skb)->tcp_flags = tcp_flag_byte(th);
1428 TCP_SKB_CB(skb)->tcp_tw_isn = 0;
1429 TCP_SKB_CB(skb)->ip_dsfield = ipv6_get_dsfield(hdr);
1430 TCP_SKB_CB(skb)->sacked = 0;
1431 TCP_SKB_CB(skb)->has_rxtstamp =
1432 skb->tstamp || skb_hwtstamps(skb)->hwtstamp;
1435 INDIRECT_CALLABLE_SCOPE int tcp_v6_rcv(struct sk_buff *skb)
1437 struct sk_buff *skb_to_free;
1438 int sdif = inet6_sdif(skb);
1439 const struct tcphdr *th;
1440 const struct ipv6hdr *hdr;
1441 bool refcounted;
1442 struct sock *sk;
1443 int ret;
1444 struct net *net = dev_net(skb->dev);
1446 if (skb->pkt_type != PACKET_HOST)
1447 goto discard_it;
1450 * Count it even if it's bad.
1452 __TCP_INC_STATS(net, TCP_MIB_INSEGS);
1454 if (!pskb_may_pull(skb, sizeof(struct tcphdr)))
1455 goto discard_it;
1457 th = (const struct tcphdr *)skb->data;
1459 if (unlikely(th->doff < sizeof(struct tcphdr)/4))
1460 goto bad_packet;
1461 if (!pskb_may_pull(skb, th->doff*4))
1462 goto discard_it;
1464 if (skb_checksum_init(skb, IPPROTO_TCP, ip6_compute_pseudo))
1465 goto csum_error;
1467 th = (const struct tcphdr *)skb->data;
1468 hdr = ipv6_hdr(skb);
1470 lookup:
1471 sk = __inet6_lookup_skb(&tcp_hashinfo, skb, __tcp_hdrlen(th),
1472 th->source, th->dest, inet6_iif(skb), sdif,
1473 &refcounted);
1474 if (!sk)
1475 goto no_tcp_socket;
1477 process:
1478 if (sk->sk_state == TCP_TIME_WAIT)
1479 goto do_time_wait;
1481 if (sk->sk_state == TCP_NEW_SYN_RECV) {
1482 struct request_sock *req = inet_reqsk(sk);
1483 bool req_stolen = false;
1484 struct sock *nsk;
1486 sk = req->rsk_listener;
1487 if (tcp_v6_inbound_md5_hash(sk, skb)) {
1488 sk_drops_add(sk, skb);
1489 reqsk_put(req);
1490 goto discard_it;
1492 if (tcp_checksum_complete(skb)) {
1493 reqsk_put(req);
1494 goto csum_error;
1496 if (unlikely(sk->sk_state != TCP_LISTEN)) {
1497 inet_csk_reqsk_queue_drop_and_put(sk, req);
1498 goto lookup;
1500 sock_hold(sk);
1501 refcounted = true;
1502 nsk = NULL;
1503 if (!tcp_filter(sk, skb)) {
1504 th = (const struct tcphdr *)skb->data;
1505 hdr = ipv6_hdr(skb);
1506 tcp_v6_fill_cb(skb, hdr, th);
1507 nsk = tcp_check_req(sk, skb, req, false, &req_stolen);
1509 if (!nsk) {
1510 reqsk_put(req);
1511 if (req_stolen) {
1512 /* Another cpu got exclusive access to req
1513 * and created a full blown socket.
1514 * Try to feed this packet to this socket
1515 * instead of discarding it.
1517 tcp_v6_restore_cb(skb);
1518 sock_put(sk);
1519 goto lookup;
1521 goto discard_and_relse;
1523 if (nsk == sk) {
1524 reqsk_put(req);
1525 tcp_v6_restore_cb(skb);
1526 } else if (tcp_child_process(sk, nsk, skb)) {
1527 tcp_v6_send_reset(nsk, skb);
1528 goto discard_and_relse;
1529 } else {
1530 sock_put(sk);
1531 return 0;
1534 if (hdr->hop_limit < tcp_inet6_sk(sk)->min_hopcount) {
1535 __NET_INC_STATS(net, LINUX_MIB_TCPMINTTLDROP);
1536 goto discard_and_relse;
1539 if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
1540 goto discard_and_relse;
1542 if (tcp_v6_inbound_md5_hash(sk, skb))
1543 goto discard_and_relse;
1545 if (tcp_filter(sk, skb))
1546 goto discard_and_relse;
1547 th = (const struct tcphdr *)skb->data;
1548 hdr = ipv6_hdr(skb);
1549 tcp_v6_fill_cb(skb, hdr, th);
1551 skb->dev = NULL;
1553 if (sk->sk_state == TCP_LISTEN) {
1554 ret = tcp_v6_do_rcv(sk, skb);
1555 goto put_and_return;
1558 sk_incoming_cpu_update(sk);
1560 bh_lock_sock_nested(sk);
1561 tcp_segs_in(tcp_sk(sk), skb);
1562 ret = 0;
1563 if (!sock_owned_by_user(sk)) {
1564 skb_to_free = sk->sk_rx_skb_cache;
1565 sk->sk_rx_skb_cache = NULL;
1566 ret = tcp_v6_do_rcv(sk, skb);
1567 } else {
1568 if (tcp_add_backlog(sk, skb))
1569 goto discard_and_relse;
1570 skb_to_free = NULL;
1572 bh_unlock_sock(sk);
1573 if (skb_to_free)
1574 __kfree_skb(skb_to_free);
1575 put_and_return:
1576 if (refcounted)
1577 sock_put(sk);
1578 return ret ? -1 : 0;
1580 no_tcp_socket:
1581 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb))
1582 goto discard_it;
1584 tcp_v6_fill_cb(skb, hdr, th);
1586 if (tcp_checksum_complete(skb)) {
1587 csum_error:
1588 __TCP_INC_STATS(net, TCP_MIB_CSUMERRORS);
1589 bad_packet:
1590 __TCP_INC_STATS(net, TCP_MIB_INERRS);
1591 } else {
1592 tcp_v6_send_reset(NULL, skb);
1595 discard_it:
1596 kfree_skb(skb);
1597 return 0;
1599 discard_and_relse:
1600 sk_drops_add(sk, skb);
1601 if (refcounted)
1602 sock_put(sk);
1603 goto discard_it;
1605 do_time_wait:
1606 if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) {
1607 inet_twsk_put(inet_twsk(sk));
1608 goto discard_it;
1611 tcp_v6_fill_cb(skb, hdr, th);
1613 if (tcp_checksum_complete(skb)) {
1614 inet_twsk_put(inet_twsk(sk));
1615 goto csum_error;
1618 switch (tcp_timewait_state_process(inet_twsk(sk), skb, th)) {
1619 case TCP_TW_SYN:
1621 struct sock *sk2;
1623 sk2 = inet6_lookup_listener(dev_net(skb->dev), &tcp_hashinfo,
1624 skb, __tcp_hdrlen(th),
1625 &ipv6_hdr(skb)->saddr, th->source,
1626 &ipv6_hdr(skb)->daddr,
1627 ntohs(th->dest),
1628 tcp_v6_iif_l3_slave(skb),
1629 sdif);
1630 if (sk2) {
1631 struct inet_timewait_sock *tw = inet_twsk(sk);
1632 inet_twsk_deschedule_put(tw);
1633 sk = sk2;
1634 tcp_v6_restore_cb(skb);
1635 refcounted = false;
1636 goto process;
1639 /* to ACK */
1640 /* fall through */
1641 case TCP_TW_ACK:
1642 tcp_v6_timewait_ack(sk, skb);
1643 break;
1644 case TCP_TW_RST:
1645 tcp_v6_send_reset(sk, skb);
1646 inet_twsk_deschedule_put(inet_twsk(sk));
1647 goto discard_it;
1648 case TCP_TW_SUCCESS:
1651 goto discard_it;
1654 INDIRECT_CALLABLE_SCOPE void tcp_v6_early_demux(struct sk_buff *skb)
1656 const struct ipv6hdr *hdr;
1657 const struct tcphdr *th;
1658 struct sock *sk;
1660 if (skb->pkt_type != PACKET_HOST)
1661 return;
1663 if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr)))
1664 return;
1666 hdr = ipv6_hdr(skb);
1667 th = tcp_hdr(skb);
1669 if (th->doff < sizeof(struct tcphdr) / 4)
1670 return;
1672 /* Note : We use inet6_iif() here, not tcp_v6_iif() */
1673 sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo,
1674 &hdr->saddr, th->source,
1675 &hdr->daddr, ntohs(th->dest),
1676 inet6_iif(skb), inet6_sdif(skb));
1677 if (sk) {
1678 skb->sk = sk;
1679 skb->destructor = sock_edemux;
1680 if (sk_fullsock(sk)) {
1681 struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst);
1683 if (dst)
1684 dst = dst_check(dst, tcp_inet6_sk(sk)->rx_dst_cookie);
1685 if (dst &&
1686 inet_sk(sk)->rx_dst_ifindex == skb->skb_iif)
1687 skb_dst_set_noref(skb, dst);
1692 static struct timewait_sock_ops tcp6_timewait_sock_ops = {
1693 .twsk_obj_size = sizeof(struct tcp6_timewait_sock),
1694 .twsk_unique = tcp_twsk_unique,
1695 .twsk_destructor = tcp_twsk_destructor,
1698 static const struct inet_connection_sock_af_ops ipv6_specific = {
1699 .queue_xmit = inet6_csk_xmit,
1700 .send_check = tcp_v6_send_check,
1701 .rebuild_header = inet6_sk_rebuild_header,
1702 .sk_rx_dst_set = inet6_sk_rx_dst_set,
1703 .conn_request = tcp_v6_conn_request,
1704 .syn_recv_sock = tcp_v6_syn_recv_sock,
1705 .net_header_len = sizeof(struct ipv6hdr),
1706 .net_frag_header_len = sizeof(struct frag_hdr),
1707 .setsockopt = ipv6_setsockopt,
1708 .getsockopt = ipv6_getsockopt,
1709 .addr2sockaddr = inet6_csk_addr2sockaddr,
1710 .sockaddr_len = sizeof(struct sockaddr_in6),
1711 #ifdef CONFIG_COMPAT
1712 .compat_setsockopt = compat_ipv6_setsockopt,
1713 .compat_getsockopt = compat_ipv6_getsockopt,
1714 #endif
1715 .mtu_reduced = tcp_v6_mtu_reduced,
1718 #ifdef CONFIG_TCP_MD5SIG
1719 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific = {
1720 .md5_lookup = tcp_v6_md5_lookup,
1721 .calc_md5_hash = tcp_v6_md5_hash_skb,
1722 .md5_parse = tcp_v6_parse_md5_keys,
1724 #endif
1727 * TCP over IPv4 via INET6 API
1729 static const struct inet_connection_sock_af_ops ipv6_mapped = {
1730 .queue_xmit = ip_queue_xmit,
1731 .send_check = tcp_v4_send_check,
1732 .rebuild_header = inet_sk_rebuild_header,
1733 .sk_rx_dst_set = inet_sk_rx_dst_set,
1734 .conn_request = tcp_v6_conn_request,
1735 .syn_recv_sock = tcp_v6_syn_recv_sock,
1736 .net_header_len = sizeof(struct iphdr),
1737 .setsockopt = ipv6_setsockopt,
1738 .getsockopt = ipv6_getsockopt,
1739 .addr2sockaddr = inet6_csk_addr2sockaddr,
1740 .sockaddr_len = sizeof(struct sockaddr_in6),
1741 #ifdef CONFIG_COMPAT
1742 .compat_setsockopt = compat_ipv6_setsockopt,
1743 .compat_getsockopt = compat_ipv6_getsockopt,
1744 #endif
1745 .mtu_reduced = tcp_v4_mtu_reduced,
1748 #ifdef CONFIG_TCP_MD5SIG
1749 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific = {
1750 .md5_lookup = tcp_v4_md5_lookup,
1751 .calc_md5_hash = tcp_v4_md5_hash_skb,
1752 .md5_parse = tcp_v6_parse_md5_keys,
1754 #endif
1756 /* NOTE: A lot of things set to zero explicitly by call to
1757 * sk_alloc() so need not be done here.
1759 static int tcp_v6_init_sock(struct sock *sk)
1761 struct inet_connection_sock *icsk = inet_csk(sk);
1763 tcp_init_sock(sk);
1765 icsk->icsk_af_ops = &ipv6_specific;
1767 #ifdef CONFIG_TCP_MD5SIG
1768 tcp_sk(sk)->af_specific = &tcp_sock_ipv6_specific;
1769 #endif
1771 return 0;
1774 static void tcp_v6_destroy_sock(struct sock *sk)
1776 tcp_v4_destroy_sock(sk);
1777 inet6_destroy_sock(sk);
1780 #ifdef CONFIG_PROC_FS
1781 /* Proc filesystem TCPv6 sock list dumping. */
1782 static void get_openreq6(struct seq_file *seq,
1783 const struct request_sock *req, int i)
1785 long ttd = req->rsk_timer.expires - jiffies;
1786 const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr;
1787 const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr;
1789 if (ttd < 0)
1790 ttd = 0;
1792 seq_printf(seq,
1793 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1794 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1796 src->s6_addr32[0], src->s6_addr32[1],
1797 src->s6_addr32[2], src->s6_addr32[3],
1798 inet_rsk(req)->ir_num,
1799 dest->s6_addr32[0], dest->s6_addr32[1],
1800 dest->s6_addr32[2], dest->s6_addr32[3],
1801 ntohs(inet_rsk(req)->ir_rmt_port),
1802 TCP_SYN_RECV,
1803 0, 0, /* could print option size, but that is af dependent. */
1804 1, /* timers active (only the expire timer) */
1805 jiffies_to_clock_t(ttd),
1806 req->num_timeout,
1807 from_kuid_munged(seq_user_ns(seq),
1808 sock_i_uid(req->rsk_listener)),
1809 0, /* non standard timer */
1810 0, /* open_requests have no inode */
1811 0, req);
1814 static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i)
1816 const struct in6_addr *dest, *src;
1817 __u16 destp, srcp;
1818 int timer_active;
1819 unsigned long timer_expires;
1820 const struct inet_sock *inet = inet_sk(sp);
1821 const struct tcp_sock *tp = tcp_sk(sp);
1822 const struct inet_connection_sock *icsk = inet_csk(sp);
1823 const struct fastopen_queue *fastopenq = &icsk->icsk_accept_queue.fastopenq;
1824 int rx_queue;
1825 int state;
1827 dest = &sp->sk_v6_daddr;
1828 src = &sp->sk_v6_rcv_saddr;
1829 destp = ntohs(inet->inet_dport);
1830 srcp = ntohs(inet->inet_sport);
1832 if (icsk->icsk_pending == ICSK_TIME_RETRANS ||
1833 icsk->icsk_pending == ICSK_TIME_REO_TIMEOUT ||
1834 icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) {
1835 timer_active = 1;
1836 timer_expires = icsk->icsk_timeout;
1837 } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) {
1838 timer_active = 4;
1839 timer_expires = icsk->icsk_timeout;
1840 } else if (timer_pending(&sp->sk_timer)) {
1841 timer_active = 2;
1842 timer_expires = sp->sk_timer.expires;
1843 } else {
1844 timer_active = 0;
1845 timer_expires = jiffies;
1848 state = inet_sk_state_load(sp);
1849 if (state == TCP_LISTEN)
1850 rx_queue = sp->sk_ack_backlog;
1851 else
1852 /* Because we don't lock the socket,
1853 * we might find a transient negative value.
1855 rx_queue = max_t(int, tp->rcv_nxt - tp->copied_seq, 0);
1857 seq_printf(seq,
1858 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1859 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1861 src->s6_addr32[0], src->s6_addr32[1],
1862 src->s6_addr32[2], src->s6_addr32[3], srcp,
1863 dest->s6_addr32[0], dest->s6_addr32[1],
1864 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1865 state,
1866 tp->write_seq - tp->snd_una,
1867 rx_queue,
1868 timer_active,
1869 jiffies_delta_to_clock_t(timer_expires - jiffies),
1870 icsk->icsk_retransmits,
1871 from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)),
1872 icsk->icsk_probes_out,
1873 sock_i_ino(sp),
1874 refcount_read(&sp->sk_refcnt), sp,
1875 jiffies_to_clock_t(icsk->icsk_rto),
1876 jiffies_to_clock_t(icsk->icsk_ack.ato),
1877 (icsk->icsk_ack.quick << 1) | inet_csk_in_pingpong_mode(sp),
1878 tp->snd_cwnd,
1879 state == TCP_LISTEN ?
1880 fastopenq->max_qlen :
1881 (tcp_in_initial_slowstart(tp) ? -1 : tp->snd_ssthresh)
1885 static void get_timewait6_sock(struct seq_file *seq,
1886 struct inet_timewait_sock *tw, int i)
1888 long delta = tw->tw_timer.expires - jiffies;
1889 const struct in6_addr *dest, *src;
1890 __u16 destp, srcp;
1892 dest = &tw->tw_v6_daddr;
1893 src = &tw->tw_v6_rcv_saddr;
1894 destp = ntohs(tw->tw_dport);
1895 srcp = ntohs(tw->tw_sport);
1897 seq_printf(seq,
1898 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1899 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1901 src->s6_addr32[0], src->s6_addr32[1],
1902 src->s6_addr32[2], src->s6_addr32[3], srcp,
1903 dest->s6_addr32[0], dest->s6_addr32[1],
1904 dest->s6_addr32[2], dest->s6_addr32[3], destp,
1905 tw->tw_substate, 0, 0,
1906 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0,
1907 refcount_read(&tw->tw_refcnt), tw);
1910 static int tcp6_seq_show(struct seq_file *seq, void *v)
1912 struct tcp_iter_state *st;
1913 struct sock *sk = v;
1915 if (v == SEQ_START_TOKEN) {
1916 seq_puts(seq,
1917 " sl "
1918 "local_address "
1919 "remote_address "
1920 "st tx_queue rx_queue tr tm->when retrnsmt"
1921 " uid timeout inode\n");
1922 goto out;
1924 st = seq->private;
1926 if (sk->sk_state == TCP_TIME_WAIT)
1927 get_timewait6_sock(seq, v, st->num);
1928 else if (sk->sk_state == TCP_NEW_SYN_RECV)
1929 get_openreq6(seq, v, st->num);
1930 else
1931 get_tcp6_sock(seq, v, st->num);
1932 out:
1933 return 0;
1936 static const struct seq_operations tcp6_seq_ops = {
1937 .show = tcp6_seq_show,
1938 .start = tcp_seq_start,
1939 .next = tcp_seq_next,
1940 .stop = tcp_seq_stop,
1943 static struct tcp_seq_afinfo tcp6_seq_afinfo = {
1944 .family = AF_INET6,
1947 int __net_init tcp6_proc_init(struct net *net)
1949 if (!proc_create_net_data("tcp6", 0444, net->proc_net, &tcp6_seq_ops,
1950 sizeof(struct tcp_iter_state), &tcp6_seq_afinfo))
1951 return -ENOMEM;
1952 return 0;
1955 void tcp6_proc_exit(struct net *net)
1957 remove_proc_entry("tcp6", net->proc_net);
1959 #endif
1961 struct proto tcpv6_prot = {
1962 .name = "TCPv6",
1963 .owner = THIS_MODULE,
1964 .close = tcp_close,
1965 .pre_connect = tcp_v6_pre_connect,
1966 .connect = tcp_v6_connect,
1967 .disconnect = tcp_disconnect,
1968 .accept = inet_csk_accept,
1969 .ioctl = tcp_ioctl,
1970 .init = tcp_v6_init_sock,
1971 .destroy = tcp_v6_destroy_sock,
1972 .shutdown = tcp_shutdown,
1973 .setsockopt = tcp_setsockopt,
1974 .getsockopt = tcp_getsockopt,
1975 .keepalive = tcp_set_keepalive,
1976 .recvmsg = tcp_recvmsg,
1977 .sendmsg = tcp_sendmsg,
1978 .sendpage = tcp_sendpage,
1979 .backlog_rcv = tcp_v6_do_rcv,
1980 .release_cb = tcp_release_cb,
1981 .hash = inet6_hash,
1982 .unhash = inet_unhash,
1983 .get_port = inet_csk_get_port,
1984 .enter_memory_pressure = tcp_enter_memory_pressure,
1985 .leave_memory_pressure = tcp_leave_memory_pressure,
1986 .stream_memory_free = tcp_stream_memory_free,
1987 .sockets_allocated = &tcp_sockets_allocated,
1988 .memory_allocated = &tcp_memory_allocated,
1989 .memory_pressure = &tcp_memory_pressure,
1990 .orphan_count = &tcp_orphan_count,
1991 .sysctl_mem = sysctl_tcp_mem,
1992 .sysctl_wmem_offset = offsetof(struct net, ipv4.sysctl_tcp_wmem),
1993 .sysctl_rmem_offset = offsetof(struct net, ipv4.sysctl_tcp_rmem),
1994 .max_header = MAX_TCP_HEADER,
1995 .obj_size = sizeof(struct tcp6_sock),
1996 .slab_flags = SLAB_TYPESAFE_BY_RCU,
1997 .twsk_prot = &tcp6_timewait_sock_ops,
1998 .rsk_prot = &tcp6_request_sock_ops,
1999 .h.hashinfo = &tcp_hashinfo,
2000 .no_autobind = true,
2001 #ifdef CONFIG_COMPAT
2002 .compat_setsockopt = compat_tcp_setsockopt,
2003 .compat_getsockopt = compat_tcp_getsockopt,
2004 #endif
2005 .diag_destroy = tcp_abort,
2008 /* thinking of making this const? Don't.
2009 * early_demux can change based on sysctl.
2011 static struct inet6_protocol tcpv6_protocol = {
2012 .early_demux = tcp_v6_early_demux,
2013 .early_demux_handler = tcp_v6_early_demux,
2014 .handler = tcp_v6_rcv,
2015 .err_handler = tcp_v6_err,
2016 .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
2019 static struct inet_protosw tcpv6_protosw = {
2020 .type = SOCK_STREAM,
2021 .protocol = IPPROTO_TCP,
2022 .prot = &tcpv6_prot,
2023 .ops = &inet6_stream_ops,
2024 .flags = INET_PROTOSW_PERMANENT |
2025 INET_PROTOSW_ICSK,
2028 static int __net_init tcpv6_net_init(struct net *net)
2030 return inet_ctl_sock_create(&net->ipv6.tcp_sk, PF_INET6,
2031 SOCK_RAW, IPPROTO_TCP, net);
2034 static void __net_exit tcpv6_net_exit(struct net *net)
2036 inet_ctl_sock_destroy(net->ipv6.tcp_sk);
2039 static void __net_exit tcpv6_net_exit_batch(struct list_head *net_exit_list)
2041 inet_twsk_purge(&tcp_hashinfo, AF_INET6);
2044 static struct pernet_operations tcpv6_net_ops = {
2045 .init = tcpv6_net_init,
2046 .exit = tcpv6_net_exit,
2047 .exit_batch = tcpv6_net_exit_batch,
2050 int __init tcpv6_init(void)
2052 int ret;
2054 ret = inet6_add_protocol(&tcpv6_protocol, IPPROTO_TCP);
2055 if (ret)
2056 goto out;
2058 /* register inet6 protocol */
2059 ret = inet6_register_protosw(&tcpv6_protosw);
2060 if (ret)
2061 goto out_tcpv6_protocol;
2063 ret = register_pernet_subsys(&tcpv6_net_ops);
2064 if (ret)
2065 goto out_tcpv6_protosw;
2066 out:
2067 return ret;
2069 out_tcpv6_protosw:
2070 inet6_unregister_protosw(&tcpv6_protosw);
2071 out_tcpv6_protocol:
2072 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);
2073 goto out;
2076 void tcpv6_exit(void)
2078 unregister_pernet_subsys(&tcpv6_net_ops);
2079 inet6_unregister_protosw(&tcpv6_protosw);
2080 inet6_del_protocol(&tcpv6_protocol, IPPROTO_TCP);