3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
10 * linux/net/ipv4/tcp_input.c
11 * linux/net/ipv4/tcp_output.c
14 * Hideaki YOSHIFUJI : sin6_scope_id support
15 * YOSHIFUJI Hideaki @USAGI and: Support IPV6_V6ONLY socket option, which
16 * Alexey Kuznetsov allow both IPv4 and IPv6 sockets to bind
17 * a single port at the same time.
18 * YOSHIFUJI Hideaki @USAGI: convert /proc/net/tcp6 to seq_file.
20 * This program is free software; you can redistribute it and/or
21 * modify it under the terms of the GNU General Public License
22 * as published by the Free Software Foundation; either version
23 * 2 of the License, or (at your option) any later version.
26 #include <linux/bottom_half.h>
27 #include <linux/module.h>
28 #include <linux/errno.h>
29 #include <linux/types.h>
30 #include <linux/socket.h>
31 #include <linux/sockios.h>
32 #include <linux/net.h>
33 #include <linux/jiffies.h>
35 #include <linux/in6.h>
36 #include <linux/netdevice.h>
37 #include <linux/init.h>
38 #include <linux/jhash.h>
39 #include <linux/ipsec.h>
40 #include <linux/times.h>
41 #include <linux/slab.h>
42 #include <linux/uaccess.h>
43 #include <linux/ipv6.h>
44 #include <linux/icmpv6.h>
45 #include <linux/random.h>
48 #include <net/ndisc.h>
49 #include <net/inet6_hashtables.h>
50 #include <net/inet6_connection_sock.h>
52 #include <net/transp_v6.h>
53 #include <net/addrconf.h>
54 #include <net/ip6_route.h>
55 #include <net/ip6_checksum.h>
56 #include <net/inet_ecn.h>
57 #include <net/protocol.h>
60 #include <net/dsfield.h>
61 #include <net/timewait_sock.h>
62 #include <net/netdma.h>
63 #include <net/inet_common.h>
64 #include <net/secure_seq.h>
65 #include <net/tcp_memcontrol.h>
66 #include <net/busy_poll.h>
68 #include <linux/proc_fs.h>
69 #include <linux/seq_file.h>
71 #include <linux/crypto.h>
72 #include <linux/scatterlist.h>
74 static void tcp_v6_send_reset(struct sock
*sk
, struct sk_buff
*skb
);
75 static void tcp_v6_reqsk_send_ack(struct sock
*sk
, struct sk_buff
*skb
,
76 struct request_sock
*req
);
78 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
);
80 static const struct inet_connection_sock_af_ops ipv6_mapped
;
81 static const struct inet_connection_sock_af_ops ipv6_specific
;
82 #ifdef CONFIG_TCP_MD5SIG
83 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific
;
84 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific
;
86 static struct tcp_md5sig_key
*tcp_v6_md5_do_lookup(struct sock
*sk
,
87 const struct in6_addr
*addr
)
93 static void inet6_sk_rx_dst_set(struct sock
*sk
, const struct sk_buff
*skb
)
95 struct dst_entry
*dst
= skb_dst(skb
);
96 const struct rt6_info
*rt
= (const struct rt6_info
*)dst
;
100 inet_sk(sk
)->rx_dst_ifindex
= skb
->skb_iif
;
102 inet6_sk(sk
)->rx_dst_cookie
= rt
->rt6i_node
->fn_sernum
;
105 static void tcp_v6_hash(struct sock
*sk
)
107 if (sk
->sk_state
!= TCP_CLOSE
) {
108 if (inet_csk(sk
)->icsk_af_ops
== &ipv6_mapped
) {
113 __inet6_hash(sk
, NULL
);
118 static __u32
tcp_v6_init_sequence(const struct sk_buff
*skb
)
120 return secure_tcpv6_sequence_number(ipv6_hdr(skb
)->daddr
.s6_addr32
,
121 ipv6_hdr(skb
)->saddr
.s6_addr32
,
123 tcp_hdr(skb
)->source
);
126 static int tcp_v6_connect(struct sock
*sk
, struct sockaddr
*uaddr
,
129 struct sockaddr_in6
*usin
= (struct sockaddr_in6
*) uaddr
;
130 struct inet_sock
*inet
= inet_sk(sk
);
131 struct inet_connection_sock
*icsk
= inet_csk(sk
);
132 struct ipv6_pinfo
*np
= inet6_sk(sk
);
133 struct tcp_sock
*tp
= tcp_sk(sk
);
134 struct in6_addr
*saddr
= NULL
, *final_p
, final
;
137 struct dst_entry
*dst
;
141 if (addr_len
< SIN6_LEN_RFC2133
)
144 if (usin
->sin6_family
!= AF_INET6
)
145 return -EAFNOSUPPORT
;
147 memset(&fl6
, 0, sizeof(fl6
));
150 fl6
.flowlabel
= usin
->sin6_flowinfo
&IPV6_FLOWINFO_MASK
;
151 IP6_ECN_flow_init(fl6
.flowlabel
);
152 if (fl6
.flowlabel
&IPV6_FLOWLABEL_MASK
) {
153 struct ip6_flowlabel
*flowlabel
;
154 flowlabel
= fl6_sock_lookup(sk
, fl6
.flowlabel
);
155 if (flowlabel
== NULL
)
157 fl6_sock_release(flowlabel
);
162 * connect() to INADDR_ANY means loopback (BSD'ism).
165 if (ipv6_addr_any(&usin
->sin6_addr
))
166 usin
->sin6_addr
.s6_addr
[15] = 0x1;
168 addr_type
= ipv6_addr_type(&usin
->sin6_addr
);
170 if (addr_type
& IPV6_ADDR_MULTICAST
)
173 if (addr_type
&IPV6_ADDR_LINKLOCAL
) {
174 if (addr_len
>= sizeof(struct sockaddr_in6
) &&
175 usin
->sin6_scope_id
) {
176 /* If interface is set while binding, indices
179 if (sk
->sk_bound_dev_if
&&
180 sk
->sk_bound_dev_if
!= usin
->sin6_scope_id
)
183 sk
->sk_bound_dev_if
= usin
->sin6_scope_id
;
186 /* Connect to link-local address requires an interface */
187 if (!sk
->sk_bound_dev_if
)
191 if (tp
->rx_opt
.ts_recent_stamp
&&
192 !ipv6_addr_equal(&sk
->sk_v6_daddr
, &usin
->sin6_addr
)) {
193 tp
->rx_opt
.ts_recent
= 0;
194 tp
->rx_opt
.ts_recent_stamp
= 0;
198 sk
->sk_v6_daddr
= usin
->sin6_addr
;
199 np
->flow_label
= fl6
.flowlabel
;
207 if (addr_type
== IPV6_ADDR_MAPPED
) {
208 u32 exthdrlen
= icsk
->icsk_ext_hdr_len
;
209 struct sockaddr_in sin
;
211 SOCK_DEBUG(sk
, "connect: ipv4 mapped\n");
213 if (__ipv6_only_sock(sk
))
216 sin
.sin_family
= AF_INET
;
217 sin
.sin_port
= usin
->sin6_port
;
218 sin
.sin_addr
.s_addr
= usin
->sin6_addr
.s6_addr32
[3];
220 icsk
->icsk_af_ops
= &ipv6_mapped
;
221 sk
->sk_backlog_rcv
= tcp_v4_do_rcv
;
222 #ifdef CONFIG_TCP_MD5SIG
223 tp
->af_specific
= &tcp_sock_ipv6_mapped_specific
;
226 err
= tcp_v4_connect(sk
, (struct sockaddr
*)&sin
, sizeof(sin
));
229 icsk
->icsk_ext_hdr_len
= exthdrlen
;
230 icsk
->icsk_af_ops
= &ipv6_specific
;
231 sk
->sk_backlog_rcv
= tcp_v6_do_rcv
;
232 #ifdef CONFIG_TCP_MD5SIG
233 tp
->af_specific
= &tcp_sock_ipv6_specific
;
237 ipv6_addr_set_v4mapped(inet
->inet_saddr
, &np
->saddr
);
238 ipv6_addr_set_v4mapped(inet
->inet_rcv_saddr
,
239 &sk
->sk_v6_rcv_saddr
);
245 if (!ipv6_addr_any(&sk
->sk_v6_rcv_saddr
))
246 saddr
= &sk
->sk_v6_rcv_saddr
;
248 fl6
.flowi6_proto
= IPPROTO_TCP
;
249 fl6
.daddr
= sk
->sk_v6_daddr
;
250 fl6
.saddr
= saddr
? *saddr
: np
->saddr
;
251 fl6
.flowi6_oif
= sk
->sk_bound_dev_if
;
252 fl6
.flowi6_mark
= sk
->sk_mark
;
253 fl6
.fl6_dport
= usin
->sin6_port
;
254 fl6
.fl6_sport
= inet
->inet_sport
;
256 final_p
= fl6_update_dst(&fl6
, np
->opt
, &final
);
258 security_sk_classify_flow(sk
, flowi6_to_flowi(&fl6
));
260 dst
= ip6_dst_lookup_flow(sk
, &fl6
, final_p
);
268 sk
->sk_v6_rcv_saddr
= *saddr
;
271 /* set the source address */
273 inet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
275 sk
->sk_gso_type
= SKB_GSO_TCPV6
;
276 __ip6_dst_store(sk
, dst
, NULL
, NULL
);
278 rt
= (struct rt6_info
*) dst
;
279 if (tcp_death_row
.sysctl_tw_recycle
&&
280 !tp
->rx_opt
.ts_recent_stamp
&&
281 ipv6_addr_equal(&rt
->rt6i_dst
.addr
, &sk
->sk_v6_daddr
))
282 tcp_fetch_timewait_stamp(sk
, dst
);
284 icsk
->icsk_ext_hdr_len
= 0;
286 icsk
->icsk_ext_hdr_len
= (np
->opt
->opt_flen
+
289 tp
->rx_opt
.mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) - sizeof(struct ipv6hdr
);
291 inet
->inet_dport
= usin
->sin6_port
;
293 tcp_set_state(sk
, TCP_SYN_SENT
);
294 err
= inet6_hash_connect(&tcp_death_row
, sk
);
298 if (!tp
->write_seq
&& likely(!tp
->repair
))
299 tp
->write_seq
= secure_tcpv6_sequence_number(np
->saddr
.s6_addr32
,
300 sk
->sk_v6_daddr
.s6_addr32
,
304 err
= tcp_connect(sk
);
311 tcp_set_state(sk
, TCP_CLOSE
);
314 inet
->inet_dport
= 0;
315 sk
->sk_route_caps
= 0;
319 static void tcp_v6_mtu_reduced(struct sock
*sk
)
321 struct dst_entry
*dst
;
323 if ((1 << sk
->sk_state
) & (TCPF_LISTEN
| TCPF_CLOSE
))
326 dst
= inet6_csk_update_pmtu(sk
, tcp_sk(sk
)->mtu_info
);
330 if (inet_csk(sk
)->icsk_pmtu_cookie
> dst_mtu(dst
)) {
331 tcp_sync_mss(sk
, dst_mtu(dst
));
332 tcp_simple_retransmit(sk
);
336 static void tcp_v6_err(struct sk_buff
*skb
, struct inet6_skb_parm
*opt
,
337 u8 type
, u8 code
, int offset
, __be32 info
)
339 const struct ipv6hdr
*hdr
= (const struct ipv6hdr
*)skb
->data
;
340 const struct tcphdr
*th
= (struct tcphdr
*)(skb
->data
+offset
);
341 struct ipv6_pinfo
*np
;
345 struct request_sock
*fastopen
;
347 struct net
*net
= dev_net(skb
->dev
);
349 sk
= inet6_lookup(net
, &tcp_hashinfo
, &hdr
->daddr
,
350 th
->dest
, &hdr
->saddr
, th
->source
, skb
->dev
->ifindex
);
353 ICMP6_INC_STATS_BH(net
, __in6_dev_get(skb
->dev
),
358 if (sk
->sk_state
== TCP_TIME_WAIT
) {
359 inet_twsk_put(inet_twsk(sk
));
364 if (sock_owned_by_user(sk
) && type
!= ICMPV6_PKT_TOOBIG
)
365 NET_INC_STATS_BH(net
, LINUX_MIB_LOCKDROPPEDICMPS
);
367 if (sk
->sk_state
== TCP_CLOSE
)
370 if (ipv6_hdr(skb
)->hop_limit
< inet6_sk(sk
)->min_hopcount
) {
371 NET_INC_STATS_BH(net
, LINUX_MIB_TCPMINTTLDROP
);
376 seq
= ntohl(th
->seq
);
377 /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
378 fastopen
= tp
->fastopen_rsk
;
379 snd_una
= fastopen
? tcp_rsk(fastopen
)->snt_isn
: tp
->snd_una
;
380 if (sk
->sk_state
!= TCP_LISTEN
&&
381 !between(seq
, snd_una
, tp
->snd_nxt
)) {
382 NET_INC_STATS_BH(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
388 if (type
== NDISC_REDIRECT
) {
389 struct dst_entry
*dst
= __sk_dst_check(sk
, np
->dst_cookie
);
392 dst
->ops
->redirect(dst
, sk
, skb
);
396 if (type
== ICMPV6_PKT_TOOBIG
) {
397 /* We are not interested in TCP_LISTEN and open_requests
398 * (SYN-ACKs send out by Linux are always <576bytes so
399 * they should go through unfragmented).
401 if (sk
->sk_state
== TCP_LISTEN
)
404 if (!ip6_sk_accept_pmtu(sk
))
407 tp
->mtu_info
= ntohl(info
);
408 if (!sock_owned_by_user(sk
))
409 tcp_v6_mtu_reduced(sk
);
410 else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED
,
416 icmpv6_err_convert(type
, code
, &err
);
418 /* Might be for an request_sock */
419 switch (sk
->sk_state
) {
420 struct request_sock
*req
, **prev
;
422 if (sock_owned_by_user(sk
))
425 req
= inet6_csk_search_req(sk
, &prev
, th
->dest
, &hdr
->daddr
,
426 &hdr
->saddr
, inet6_iif(skb
));
430 /* ICMPs are not backlogged, hence we cannot get
431 * an established socket here.
433 WARN_ON(req
->sk
!= NULL
);
435 if (seq
!= tcp_rsk(req
)->snt_isn
) {
436 NET_INC_STATS_BH(net
, LINUX_MIB_OUTOFWINDOWICMPS
);
440 inet_csk_reqsk_queue_drop(sk
, req
, prev
);
441 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
446 /* Only in fast or simultaneous open. If a fast open socket is
447 * is already accepted it is treated as a connected one below.
449 if (fastopen
&& fastopen
->sk
== NULL
)
452 if (!sock_owned_by_user(sk
)) {
454 sk
->sk_error_report(sk
); /* Wake people up to see the error (see connect in sock.c) */
458 sk
->sk_err_soft
= err
;
462 if (!sock_owned_by_user(sk
) && np
->recverr
) {
464 sk
->sk_error_report(sk
);
466 sk
->sk_err_soft
= err
;
474 static int tcp_v6_send_synack(struct sock
*sk
, struct dst_entry
*dst
,
476 struct request_sock
*req
,
478 struct tcp_fastopen_cookie
*foc
)
480 struct inet_request_sock
*ireq
= inet_rsk(req
);
481 struct ipv6_pinfo
*np
= inet6_sk(sk
);
482 struct flowi6
*fl6
= &fl
->u
.ip6
;
486 /* First, grab a route. */
487 if (!dst
&& (dst
= inet6_csk_route_req(sk
, fl6
, req
)) == NULL
)
490 skb
= tcp_make_synack(sk
, dst
, req
, foc
);
493 __tcp_v6_send_check(skb
, &ireq
->ir_v6_loc_addr
,
494 &ireq
->ir_v6_rmt_addr
);
496 fl6
->daddr
= ireq
->ir_v6_rmt_addr
;
497 if (np
->repflow
&& (ireq
->pktopts
!= NULL
))
498 fl6
->flowlabel
= ip6_flowlabel(ipv6_hdr(ireq
->pktopts
));
500 skb_set_queue_mapping(skb
, queue_mapping
);
501 err
= ip6_xmit(sk
, skb
, fl6
, np
->opt
, np
->tclass
);
502 err
= net_xmit_eval(err
);
510 static void tcp_v6_reqsk_destructor(struct request_sock
*req
)
512 kfree_skb(inet_rsk(req
)->pktopts
);
515 #ifdef CONFIG_TCP_MD5SIG
516 static struct tcp_md5sig_key
*tcp_v6_md5_do_lookup(struct sock
*sk
,
517 const struct in6_addr
*addr
)
519 return tcp_md5_do_lookup(sk
, (union tcp_md5_addr
*)addr
, AF_INET6
);
522 static struct tcp_md5sig_key
*tcp_v6_md5_lookup(struct sock
*sk
,
523 struct sock
*addr_sk
)
525 return tcp_v6_md5_do_lookup(sk
, &addr_sk
->sk_v6_daddr
);
528 static struct tcp_md5sig_key
*tcp_v6_reqsk_md5_lookup(struct sock
*sk
,
529 struct request_sock
*req
)
531 return tcp_v6_md5_do_lookup(sk
, &inet_rsk(req
)->ir_v6_rmt_addr
);
534 static int tcp_v6_parse_md5_keys(struct sock
*sk
, char __user
*optval
,
537 struct tcp_md5sig cmd
;
538 struct sockaddr_in6
*sin6
= (struct sockaddr_in6
*)&cmd
.tcpm_addr
;
540 if (optlen
< sizeof(cmd
))
543 if (copy_from_user(&cmd
, optval
, sizeof(cmd
)))
546 if (sin6
->sin6_family
!= AF_INET6
)
549 if (!cmd
.tcpm_keylen
) {
550 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
))
551 return tcp_md5_do_del(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
.s6_addr32
[3],
553 return tcp_md5_do_del(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
,
557 if (cmd
.tcpm_keylen
> TCP_MD5SIG_MAXKEYLEN
)
560 if (ipv6_addr_v4mapped(&sin6
->sin6_addr
))
561 return tcp_md5_do_add(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
.s6_addr32
[3],
562 AF_INET
, cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
564 return tcp_md5_do_add(sk
, (union tcp_md5_addr
*)&sin6
->sin6_addr
,
565 AF_INET6
, cmd
.tcpm_key
, cmd
.tcpm_keylen
, GFP_KERNEL
);
568 static int tcp_v6_md5_hash_pseudoheader(struct tcp_md5sig_pool
*hp
,
569 const struct in6_addr
*daddr
,
570 const struct in6_addr
*saddr
, int nbytes
)
572 struct tcp6_pseudohdr
*bp
;
573 struct scatterlist sg
;
575 bp
= &hp
->md5_blk
.ip6
;
576 /* 1. TCP pseudo-header (RFC2460) */
579 bp
->protocol
= cpu_to_be32(IPPROTO_TCP
);
580 bp
->len
= cpu_to_be32(nbytes
);
582 sg_init_one(&sg
, bp
, sizeof(*bp
));
583 return crypto_hash_update(&hp
->md5_desc
, &sg
, sizeof(*bp
));
586 static int tcp_v6_md5_hash_hdr(char *md5_hash
, struct tcp_md5sig_key
*key
,
587 const struct in6_addr
*daddr
, struct in6_addr
*saddr
,
588 const struct tcphdr
*th
)
590 struct tcp_md5sig_pool
*hp
;
591 struct hash_desc
*desc
;
593 hp
= tcp_get_md5sig_pool();
595 goto clear_hash_noput
;
596 desc
= &hp
->md5_desc
;
598 if (crypto_hash_init(desc
))
600 if (tcp_v6_md5_hash_pseudoheader(hp
, daddr
, saddr
, th
->doff
<< 2))
602 if (tcp_md5_hash_header(hp
, th
))
604 if (tcp_md5_hash_key(hp
, key
))
606 if (crypto_hash_final(desc
, md5_hash
))
609 tcp_put_md5sig_pool();
613 tcp_put_md5sig_pool();
615 memset(md5_hash
, 0, 16);
619 static int tcp_v6_md5_hash_skb(char *md5_hash
, struct tcp_md5sig_key
*key
,
620 const struct sock
*sk
,
621 const struct request_sock
*req
,
622 const struct sk_buff
*skb
)
624 const struct in6_addr
*saddr
, *daddr
;
625 struct tcp_md5sig_pool
*hp
;
626 struct hash_desc
*desc
;
627 const struct tcphdr
*th
= tcp_hdr(skb
);
630 saddr
= &inet6_sk(sk
)->saddr
;
631 daddr
= &sk
->sk_v6_daddr
;
633 saddr
= &inet_rsk(req
)->ir_v6_loc_addr
;
634 daddr
= &inet_rsk(req
)->ir_v6_rmt_addr
;
636 const struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
637 saddr
= &ip6h
->saddr
;
638 daddr
= &ip6h
->daddr
;
641 hp
= tcp_get_md5sig_pool();
643 goto clear_hash_noput
;
644 desc
= &hp
->md5_desc
;
646 if (crypto_hash_init(desc
))
649 if (tcp_v6_md5_hash_pseudoheader(hp
, daddr
, saddr
, skb
->len
))
651 if (tcp_md5_hash_header(hp
, th
))
653 if (tcp_md5_hash_skb_data(hp
, skb
, th
->doff
<< 2))
655 if (tcp_md5_hash_key(hp
, key
))
657 if (crypto_hash_final(desc
, md5_hash
))
660 tcp_put_md5sig_pool();
664 tcp_put_md5sig_pool();
666 memset(md5_hash
, 0, 16);
670 static int __tcp_v6_inbound_md5_hash(struct sock
*sk
,
671 const struct sk_buff
*skb
)
673 const __u8
*hash_location
= NULL
;
674 struct tcp_md5sig_key
*hash_expected
;
675 const struct ipv6hdr
*ip6h
= ipv6_hdr(skb
);
676 const struct tcphdr
*th
= tcp_hdr(skb
);
680 hash_expected
= tcp_v6_md5_do_lookup(sk
, &ip6h
->saddr
);
681 hash_location
= tcp_parse_md5sig_option(th
);
683 /* We've parsed the options - do we have a hash? */
684 if (!hash_expected
&& !hash_location
)
687 if (hash_expected
&& !hash_location
) {
688 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_TCPMD5NOTFOUND
);
692 if (!hash_expected
&& hash_location
) {
693 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_TCPMD5UNEXPECTED
);
697 /* check the signature */
698 genhash
= tcp_v6_md5_hash_skb(newhash
,
702 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0) {
703 net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n",
704 genhash
? "failed" : "mismatch",
705 &ip6h
->saddr
, ntohs(th
->source
),
706 &ip6h
->daddr
, ntohs(th
->dest
));
712 static int tcp_v6_inbound_md5_hash(struct sock
*sk
, const struct sk_buff
*skb
)
717 ret
= __tcp_v6_inbound_md5_hash(sk
, skb
);
725 static void tcp_v6_init_req(struct request_sock
*req
, struct sock
*sk
,
728 struct inet_request_sock
*ireq
= inet_rsk(req
);
729 struct ipv6_pinfo
*np
= inet6_sk(sk
);
731 ireq
->ir_v6_rmt_addr
= ipv6_hdr(skb
)->saddr
;
732 ireq
->ir_v6_loc_addr
= ipv6_hdr(skb
)->daddr
;
734 ireq
->ir_iif
= sk
->sk_bound_dev_if
;
736 /* So that link locals have meaning */
737 if (!sk
->sk_bound_dev_if
&&
738 ipv6_addr_type(&ireq
->ir_v6_rmt_addr
) & IPV6_ADDR_LINKLOCAL
)
739 ireq
->ir_iif
= inet6_iif(skb
);
741 if (!TCP_SKB_CB(skb
)->when
&&
742 (ipv6_opt_accepted(sk
, skb
) || np
->rxopt
.bits
.rxinfo
||
743 np
->rxopt
.bits
.rxoinfo
|| np
->rxopt
.bits
.rxhlim
||
744 np
->rxopt
.bits
.rxohlim
|| np
->repflow
)) {
745 atomic_inc(&skb
->users
);
750 static struct dst_entry
*tcp_v6_route_req(struct sock
*sk
, struct flowi
*fl
,
751 const struct request_sock
*req
,
756 return inet6_csk_route_req(sk
, &fl
->u
.ip6
, req
);
759 struct request_sock_ops tcp6_request_sock_ops __read_mostly
= {
761 .obj_size
= sizeof(struct tcp6_request_sock
),
762 .rtx_syn_ack
= tcp_rtx_synack
,
763 .send_ack
= tcp_v6_reqsk_send_ack
,
764 .destructor
= tcp_v6_reqsk_destructor
,
765 .send_reset
= tcp_v6_send_reset
,
766 .syn_ack_timeout
= tcp_syn_ack_timeout
,
769 static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops
= {
770 .mss_clamp
= IPV6_MIN_MTU
- sizeof(struct tcphdr
) -
771 sizeof(struct ipv6hdr
),
772 #ifdef CONFIG_TCP_MD5SIG
773 .md5_lookup
= tcp_v6_reqsk_md5_lookup
,
774 .calc_md5_hash
= tcp_v6_md5_hash_skb
,
776 .init_req
= tcp_v6_init_req
,
777 #ifdef CONFIG_SYN_COOKIES
778 .cookie_init_seq
= cookie_v6_init_sequence
,
780 .route_req
= tcp_v6_route_req
,
781 .init_seq
= tcp_v6_init_sequence
,
782 .send_synack
= tcp_v6_send_synack
,
783 .queue_hash_add
= inet6_csk_reqsk_queue_hash_add
,
786 static void tcp_v6_send_response(struct sk_buff
*skb
, u32 seq
, u32 ack
, u32 win
,
787 u32 tsval
, u32 tsecr
, int oif
,
788 struct tcp_md5sig_key
*key
, int rst
, u8 tclass
,
791 const struct tcphdr
*th
= tcp_hdr(skb
);
793 struct sk_buff
*buff
;
795 struct net
*net
= dev_net(skb_dst(skb
)->dev
);
796 struct sock
*ctl_sk
= net
->ipv6
.tcp_sk
;
797 unsigned int tot_len
= sizeof(struct tcphdr
);
798 struct dst_entry
*dst
;
802 tot_len
+= TCPOLEN_TSTAMP_ALIGNED
;
803 #ifdef CONFIG_TCP_MD5SIG
805 tot_len
+= TCPOLEN_MD5SIG_ALIGNED
;
808 buff
= alloc_skb(MAX_HEADER
+ sizeof(struct ipv6hdr
) + tot_len
,
813 skb_reserve(buff
, MAX_HEADER
+ sizeof(struct ipv6hdr
) + tot_len
);
815 t1
= (struct tcphdr
*) skb_push(buff
, tot_len
);
816 skb_reset_transport_header(buff
);
818 /* Swap the send and the receive. */
819 memset(t1
, 0, sizeof(*t1
));
820 t1
->dest
= th
->source
;
821 t1
->source
= th
->dest
;
822 t1
->doff
= tot_len
/ 4;
823 t1
->seq
= htonl(seq
);
824 t1
->ack_seq
= htonl(ack
);
825 t1
->ack
= !rst
|| !th
->ack
;
827 t1
->window
= htons(win
);
829 topt
= (__be32
*)(t1
+ 1);
832 *topt
++ = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
833 (TCPOPT_TIMESTAMP
<< 8) | TCPOLEN_TIMESTAMP
);
834 *topt
++ = htonl(tsval
);
835 *topt
++ = htonl(tsecr
);
838 #ifdef CONFIG_TCP_MD5SIG
840 *topt
++ = htonl((TCPOPT_NOP
<< 24) | (TCPOPT_NOP
<< 16) |
841 (TCPOPT_MD5SIG
<< 8) | TCPOLEN_MD5SIG
);
842 tcp_v6_md5_hash_hdr((__u8
*)topt
, key
,
843 &ipv6_hdr(skb
)->saddr
,
844 &ipv6_hdr(skb
)->daddr
, t1
);
848 memset(&fl6
, 0, sizeof(fl6
));
849 fl6
.daddr
= ipv6_hdr(skb
)->saddr
;
850 fl6
.saddr
= ipv6_hdr(skb
)->daddr
;
851 fl6
.flowlabel
= label
;
853 buff
->ip_summed
= CHECKSUM_PARTIAL
;
856 __tcp_v6_send_check(buff
, &fl6
.saddr
, &fl6
.daddr
);
858 fl6
.flowi6_proto
= IPPROTO_TCP
;
859 if (rt6_need_strict(&fl6
.daddr
) && !oif
)
860 fl6
.flowi6_oif
= inet6_iif(skb
);
862 fl6
.flowi6_oif
= oif
;
863 fl6
.flowi6_mark
= IP6_REPLY_MARK(net
, skb
->mark
);
864 fl6
.fl6_dport
= t1
->dest
;
865 fl6
.fl6_sport
= t1
->source
;
866 security_skb_classify_flow(skb
, flowi6_to_flowi(&fl6
));
868 /* Pass a socket to ip6_dst_lookup either it is for RST
869 * Underlying function will use this to retrieve the network
872 dst
= ip6_dst_lookup_flow(ctl_sk
, &fl6
, NULL
);
874 skb_dst_set(buff
, dst
);
875 ip6_xmit(ctl_sk
, buff
, &fl6
, NULL
, tclass
);
876 TCP_INC_STATS_BH(net
, TCP_MIB_OUTSEGS
);
878 TCP_INC_STATS_BH(net
, TCP_MIB_OUTRSTS
);
885 static void tcp_v6_send_reset(struct sock
*sk
, struct sk_buff
*skb
)
887 const struct tcphdr
*th
= tcp_hdr(skb
);
888 u32 seq
= 0, ack_seq
= 0;
889 struct tcp_md5sig_key
*key
= NULL
;
890 #ifdef CONFIG_TCP_MD5SIG
891 const __u8
*hash_location
= NULL
;
892 struct ipv6hdr
*ipv6h
= ipv6_hdr(skb
);
893 unsigned char newhash
[16];
895 struct sock
*sk1
= NULL
;
902 if (!ipv6_unicast_destination(skb
))
905 #ifdef CONFIG_TCP_MD5SIG
906 hash_location
= tcp_parse_md5sig_option(th
);
907 if (!sk
&& hash_location
) {
909 * active side is lost. Try to find listening socket through
910 * source port, and then find md5 key through listening socket.
911 * we are not loose security here:
912 * Incoming packet is checked with md5 hash with finding key,
913 * no RST generated if md5 hash doesn't match.
915 sk1
= inet6_lookup_listener(dev_net(skb_dst(skb
)->dev
),
916 &tcp_hashinfo
, &ipv6h
->saddr
,
917 th
->source
, &ipv6h
->daddr
,
918 ntohs(th
->source
), inet6_iif(skb
));
923 key
= tcp_v6_md5_do_lookup(sk1
, &ipv6h
->saddr
);
927 genhash
= tcp_v6_md5_hash_skb(newhash
, key
, NULL
, NULL
, skb
);
928 if (genhash
|| memcmp(hash_location
, newhash
, 16) != 0)
931 key
= sk
? tcp_v6_md5_do_lookup(sk
, &ipv6h
->saddr
) : NULL
;
936 seq
= ntohl(th
->ack_seq
);
938 ack_seq
= ntohl(th
->seq
) + th
->syn
+ th
->fin
+ skb
->len
-
941 oif
= sk
? sk
->sk_bound_dev_if
: 0;
942 tcp_v6_send_response(skb
, seq
, ack_seq
, 0, 0, 0, oif
, key
, 1, 0, 0);
944 #ifdef CONFIG_TCP_MD5SIG
953 static void tcp_v6_send_ack(struct sk_buff
*skb
, u32 seq
, u32 ack
,
954 u32 win
, u32 tsval
, u32 tsecr
, int oif
,
955 struct tcp_md5sig_key
*key
, u8 tclass
,
958 tcp_v6_send_response(skb
, seq
, ack
, win
, tsval
, tsecr
, oif
, key
, 0, tclass
,
962 static void tcp_v6_timewait_ack(struct sock
*sk
, struct sk_buff
*skb
)
964 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
965 struct tcp_timewait_sock
*tcptw
= tcp_twsk(sk
);
967 tcp_v6_send_ack(skb
, tcptw
->tw_snd_nxt
, tcptw
->tw_rcv_nxt
,
968 tcptw
->tw_rcv_wnd
>> tw
->tw_rcv_wscale
,
969 tcp_time_stamp
+ tcptw
->tw_ts_offset
,
970 tcptw
->tw_ts_recent
, tw
->tw_bound_dev_if
, tcp_twsk_md5_key(tcptw
),
971 tw
->tw_tclass
, (tw
->tw_flowlabel
<< 12));
976 static void tcp_v6_reqsk_send_ack(struct sock
*sk
, struct sk_buff
*skb
,
977 struct request_sock
*req
)
979 /* sk->sk_state == TCP_LISTEN -> for regular TCP_SYN_RECV
980 * sk->sk_state == TCP_SYN_RECV -> for Fast Open.
982 tcp_v6_send_ack(skb
, (sk
->sk_state
== TCP_LISTEN
) ?
983 tcp_rsk(req
)->snt_isn
+ 1 : tcp_sk(sk
)->snd_nxt
,
984 tcp_rsk(req
)->rcv_nxt
,
985 req
->rcv_wnd
, tcp_time_stamp
, req
->ts_recent
, sk
->sk_bound_dev_if
,
986 tcp_v6_md5_do_lookup(sk
, &ipv6_hdr(skb
)->daddr
),
991 static struct sock
*tcp_v6_hnd_req(struct sock
*sk
, struct sk_buff
*skb
)
993 struct request_sock
*req
, **prev
;
994 const struct tcphdr
*th
= tcp_hdr(skb
);
997 /* Find possible connection requests. */
998 req
= inet6_csk_search_req(sk
, &prev
, th
->source
,
999 &ipv6_hdr(skb
)->saddr
,
1000 &ipv6_hdr(skb
)->daddr
, inet6_iif(skb
));
1002 return tcp_check_req(sk
, skb
, req
, prev
, false);
1004 nsk
= __inet6_lookup_established(sock_net(sk
), &tcp_hashinfo
,
1005 &ipv6_hdr(skb
)->saddr
, th
->source
,
1006 &ipv6_hdr(skb
)->daddr
, ntohs(th
->dest
), inet6_iif(skb
));
1009 if (nsk
->sk_state
!= TCP_TIME_WAIT
) {
1013 inet_twsk_put(inet_twsk(nsk
));
1017 #ifdef CONFIG_SYN_COOKIES
1019 sk
= cookie_v6_check(sk
, skb
);
1024 static int tcp_v6_conn_request(struct sock
*sk
, struct sk_buff
*skb
)
1026 if (skb
->protocol
== htons(ETH_P_IP
))
1027 return tcp_v4_conn_request(sk
, skb
);
1029 if (!ipv6_unicast_destination(skb
))
1032 return tcp_conn_request(&tcp6_request_sock_ops
,
1033 &tcp_request_sock_ipv6_ops
, sk
, skb
);
1036 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
1037 return 0; /* don't send reset */
1040 static struct sock
*tcp_v6_syn_recv_sock(struct sock
*sk
, struct sk_buff
*skb
,
1041 struct request_sock
*req
,
1042 struct dst_entry
*dst
)
1044 struct inet_request_sock
*ireq
;
1045 struct ipv6_pinfo
*newnp
, *np
= inet6_sk(sk
);
1046 struct tcp6_sock
*newtcp6sk
;
1047 struct inet_sock
*newinet
;
1048 struct tcp_sock
*newtp
;
1050 #ifdef CONFIG_TCP_MD5SIG
1051 struct tcp_md5sig_key
*key
;
1055 if (skb
->protocol
== htons(ETH_P_IP
)) {
1060 newsk
= tcp_v4_syn_recv_sock(sk
, skb
, req
, dst
);
1065 newtcp6sk
= (struct tcp6_sock
*)newsk
;
1066 inet_sk(newsk
)->pinet6
= &newtcp6sk
->inet6
;
1068 newinet
= inet_sk(newsk
);
1069 newnp
= inet6_sk(newsk
);
1070 newtp
= tcp_sk(newsk
);
1072 memcpy(newnp
, np
, sizeof(struct ipv6_pinfo
));
1074 ipv6_addr_set_v4mapped(newinet
->inet_daddr
, &newsk
->sk_v6_daddr
);
1076 ipv6_addr_set_v4mapped(newinet
->inet_saddr
, &newnp
->saddr
);
1078 newsk
->sk_v6_rcv_saddr
= newnp
->saddr
;
1080 inet_csk(newsk
)->icsk_af_ops
= &ipv6_mapped
;
1081 newsk
->sk_backlog_rcv
= tcp_v4_do_rcv
;
1082 #ifdef CONFIG_TCP_MD5SIG
1083 newtp
->af_specific
= &tcp_sock_ipv6_mapped_specific
;
1086 newnp
->ipv6_ac_list
= NULL
;
1087 newnp
->ipv6_fl_list
= NULL
;
1088 newnp
->pktoptions
= NULL
;
1090 newnp
->mcast_oif
= inet6_iif(skb
);
1091 newnp
->mcast_hops
= ipv6_hdr(skb
)->hop_limit
;
1092 newnp
->rcv_flowinfo
= ip6_flowinfo(ipv6_hdr(skb
));
1094 newnp
->flow_label
= ip6_flowlabel(ipv6_hdr(skb
));
1097 * No need to charge this sock to the relevant IPv6 refcnt debug socks count
1098 * here, tcp_create_openreq_child now does this for us, see the comment in
1099 * that function for the gory details. -acme
1102 /* It is tricky place. Until this moment IPv4 tcp
1103 worked with IPv6 icsk.icsk_af_ops.
1106 tcp_sync_mss(newsk
, inet_csk(newsk
)->icsk_pmtu_cookie
);
1111 ireq
= inet_rsk(req
);
1113 if (sk_acceptq_is_full(sk
))
1117 dst
= inet6_csk_route_req(sk
, &fl6
, req
);
1122 newsk
= tcp_create_openreq_child(sk
, req
, skb
);
1127 * No need to charge this sock to the relevant IPv6 refcnt debug socks
1128 * count here, tcp_create_openreq_child now does this for us, see the
1129 * comment in that function for the gory details. -acme
1132 newsk
->sk_gso_type
= SKB_GSO_TCPV6
;
1133 __ip6_dst_store(newsk
, dst
, NULL
, NULL
);
1134 inet6_sk_rx_dst_set(newsk
, skb
);
1136 newtcp6sk
= (struct tcp6_sock
*)newsk
;
1137 inet_sk(newsk
)->pinet6
= &newtcp6sk
->inet6
;
1139 newtp
= tcp_sk(newsk
);
1140 newinet
= inet_sk(newsk
);
1141 newnp
= inet6_sk(newsk
);
1143 memcpy(newnp
, np
, sizeof(struct ipv6_pinfo
));
1145 newsk
->sk_v6_daddr
= ireq
->ir_v6_rmt_addr
;
1146 newnp
->saddr
= ireq
->ir_v6_loc_addr
;
1147 newsk
->sk_v6_rcv_saddr
= ireq
->ir_v6_loc_addr
;
1148 newsk
->sk_bound_dev_if
= ireq
->ir_iif
;
1150 ip6_set_txhash(newsk
);
1152 /* Now IPv6 options...
1154 First: no IPv4 options.
1156 newinet
->inet_opt
= NULL
;
1157 newnp
->ipv6_ac_list
= NULL
;
1158 newnp
->ipv6_fl_list
= NULL
;
1161 newnp
->rxopt
.all
= np
->rxopt
.all
;
1163 /* Clone pktoptions received with SYN */
1164 newnp
->pktoptions
= NULL
;
1165 if (ireq
->pktopts
!= NULL
) {
1166 newnp
->pktoptions
= skb_clone(ireq
->pktopts
,
1167 sk_gfp_atomic(sk
, GFP_ATOMIC
));
1168 consume_skb(ireq
->pktopts
);
1169 ireq
->pktopts
= NULL
;
1170 if (newnp
->pktoptions
)
1171 skb_set_owner_r(newnp
->pktoptions
, newsk
);
1174 newnp
->mcast_oif
= inet6_iif(skb
);
1175 newnp
->mcast_hops
= ipv6_hdr(skb
)->hop_limit
;
1176 newnp
->rcv_flowinfo
= ip6_flowinfo(ipv6_hdr(skb
));
1178 newnp
->flow_label
= ip6_flowlabel(ipv6_hdr(skb
));
1180 /* Clone native IPv6 options from listening socket (if any)
1182 Yes, keeping reference count would be much more clever,
1183 but we make one more one thing there: reattach optmem
1187 newnp
->opt
= ipv6_dup_options(newsk
, np
->opt
);
1189 inet_csk(newsk
)->icsk_ext_hdr_len
= 0;
1191 inet_csk(newsk
)->icsk_ext_hdr_len
= (newnp
->opt
->opt_nflen
+
1192 newnp
->opt
->opt_flen
);
1194 tcp_sync_mss(newsk
, dst_mtu(dst
));
1195 newtp
->advmss
= dst_metric_advmss(dst
);
1196 if (tcp_sk(sk
)->rx_opt
.user_mss
&&
1197 tcp_sk(sk
)->rx_opt
.user_mss
< newtp
->advmss
)
1198 newtp
->advmss
= tcp_sk(sk
)->rx_opt
.user_mss
;
1200 tcp_initialize_rcv_mss(newsk
);
1202 newinet
->inet_daddr
= newinet
->inet_saddr
= LOOPBACK4_IPV6
;
1203 newinet
->inet_rcv_saddr
= LOOPBACK4_IPV6
;
1205 #ifdef CONFIG_TCP_MD5SIG
1206 /* Copy over the MD5 key from the original socket */
1207 key
= tcp_v6_md5_do_lookup(sk
, &newsk
->sk_v6_daddr
);
1209 /* We're using one, so create a matching key
1210 * on the newsk structure. If we fail to get
1211 * memory, then we end up not copying the key
1214 tcp_md5_do_add(newsk
, (union tcp_md5_addr
*)&newsk
->sk_v6_daddr
,
1215 AF_INET6
, key
->key
, key
->keylen
,
1216 sk_gfp_atomic(sk
, GFP_ATOMIC
));
1220 if (__inet_inherit_port(sk
, newsk
) < 0) {
1221 inet_csk_prepare_forced_close(newsk
);
1225 __inet6_hash(newsk
, NULL
);
1230 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENOVERFLOWS
);
1234 NET_INC_STATS_BH(sock_net(sk
), LINUX_MIB_LISTENDROPS
);
1238 /* The socket must have it's spinlock held when we get
1241 * We have a potential double-lock case here, so even when
1242 * doing backlog processing we use the BH locking scheme.
1243 * This is because we cannot sleep with the original spinlock
1246 static int tcp_v6_do_rcv(struct sock
*sk
, struct sk_buff
*skb
)
1248 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1249 struct tcp_sock
*tp
;
1250 struct sk_buff
*opt_skb
= NULL
;
1252 /* Imagine: socket is IPv6. IPv4 packet arrives,
1253 goes to IPv4 receive handler and backlogged.
1254 From backlog it always goes here. Kerboom...
1255 Fortunately, tcp_rcv_established and rcv_established
1256 handle them correctly, but it is not case with
1257 tcp_v6_hnd_req and tcp_v6_send_reset(). --ANK
1260 if (skb
->protocol
== htons(ETH_P_IP
))
1261 return tcp_v4_do_rcv(sk
, skb
);
1263 if (sk_filter(sk
, skb
))
1267 * socket locking is here for SMP purposes as backlog rcv
1268 * is currently called with bh processing disabled.
1271 /* Do Stevens' IPV6_PKTOPTIONS.
1273 Yes, guys, it is the only place in our code, where we
1274 may make it not affecting IPv4.
1275 The rest of code is protocol independent,
1276 and I do not like idea to uglify IPv4.
1278 Actually, all the idea behind IPV6_PKTOPTIONS
1279 looks not very well thought. For now we latch
1280 options, received in the last packet, enqueued
1281 by tcp. Feel free to propose better solution.
1285 opt_skb
= skb_clone(skb
, sk_gfp_atomic(sk
, GFP_ATOMIC
));
1287 if (sk
->sk_state
== TCP_ESTABLISHED
) { /* Fast path */
1288 struct dst_entry
*dst
= sk
->sk_rx_dst
;
1290 sock_rps_save_rxhash(sk
, skb
);
1292 if (inet_sk(sk
)->rx_dst_ifindex
!= skb
->skb_iif
||
1293 dst
->ops
->check(dst
, np
->rx_dst_cookie
) == NULL
) {
1295 sk
->sk_rx_dst
= NULL
;
1299 tcp_rcv_established(sk
, skb
, tcp_hdr(skb
), skb
->len
);
1301 goto ipv6_pktoptions
;
1305 if (skb
->len
< tcp_hdrlen(skb
) || tcp_checksum_complete(skb
))
1308 if (sk
->sk_state
== TCP_LISTEN
) {
1309 struct sock
*nsk
= tcp_v6_hnd_req(sk
, skb
);
1314 * Queue it on the new socket if the new socket is active,
1315 * otherwise we just shortcircuit this and continue with
1319 sock_rps_save_rxhash(nsk
, skb
);
1320 if (tcp_child_process(sk
, nsk
, skb
))
1323 __kfree_skb(opt_skb
);
1327 sock_rps_save_rxhash(sk
, skb
);
1329 if (tcp_rcv_state_process(sk
, skb
, tcp_hdr(skb
), skb
->len
))
1332 goto ipv6_pktoptions
;
1336 tcp_v6_send_reset(sk
, skb
);
1339 __kfree_skb(opt_skb
);
1343 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_CSUMERRORS
);
1344 TCP_INC_STATS_BH(sock_net(sk
), TCP_MIB_INERRS
);
1349 /* Do you ask, what is it?
1351 1. skb was enqueued by tcp.
1352 2. skb is added to tail of read queue, rather than out of order.
1353 3. socket is not in passive state.
1354 4. Finally, it really contains options, which user wants to receive.
1357 if (TCP_SKB_CB(opt_skb
)->end_seq
== tp
->rcv_nxt
&&
1358 !((1 << sk
->sk_state
) & (TCPF_CLOSE
| TCPF_LISTEN
))) {
1359 if (np
->rxopt
.bits
.rxinfo
|| np
->rxopt
.bits
.rxoinfo
)
1360 np
->mcast_oif
= inet6_iif(opt_skb
);
1361 if (np
->rxopt
.bits
.rxhlim
|| np
->rxopt
.bits
.rxohlim
)
1362 np
->mcast_hops
= ipv6_hdr(opt_skb
)->hop_limit
;
1363 if (np
->rxopt
.bits
.rxflow
|| np
->rxopt
.bits
.rxtclass
)
1364 np
->rcv_flowinfo
= ip6_flowinfo(ipv6_hdr(opt_skb
));
1366 np
->flow_label
= ip6_flowlabel(ipv6_hdr(opt_skb
));
1367 if (ipv6_opt_accepted(sk
, opt_skb
)) {
1368 skb_set_owner_r(opt_skb
, sk
);
1369 opt_skb
= xchg(&np
->pktoptions
, opt_skb
);
1371 __kfree_skb(opt_skb
);
1372 opt_skb
= xchg(&np
->pktoptions
, NULL
);
1380 static int tcp_v6_rcv(struct sk_buff
*skb
)
1382 const struct tcphdr
*th
;
1383 const struct ipv6hdr
*hdr
;
1386 struct net
*net
= dev_net(skb
->dev
);
1388 if (skb
->pkt_type
!= PACKET_HOST
)
1392 * Count it even if it's bad.
1394 TCP_INC_STATS_BH(net
, TCP_MIB_INSEGS
);
1396 if (!pskb_may_pull(skb
, sizeof(struct tcphdr
)))
1401 if (th
->doff
< sizeof(struct tcphdr
)/4)
1403 if (!pskb_may_pull(skb
, th
->doff
*4))
1406 if (skb_checksum_init(skb
, IPPROTO_TCP
, ip6_compute_pseudo
))
1410 hdr
= ipv6_hdr(skb
);
1411 TCP_SKB_CB(skb
)->seq
= ntohl(th
->seq
);
1412 TCP_SKB_CB(skb
)->end_seq
= (TCP_SKB_CB(skb
)->seq
+ th
->syn
+ th
->fin
+
1413 skb
->len
- th
->doff
*4);
1414 TCP_SKB_CB(skb
)->ack_seq
= ntohl(th
->ack_seq
);
1415 TCP_SKB_CB(skb
)->when
= 0;
1416 TCP_SKB_CB(skb
)->ip_dsfield
= ipv6_get_dsfield(hdr
);
1417 TCP_SKB_CB(skb
)->sacked
= 0;
1419 sk
= __inet6_lookup_skb(&tcp_hashinfo
, skb
, th
->source
, th
->dest
);
1424 if (sk
->sk_state
== TCP_TIME_WAIT
)
1427 if (hdr
->hop_limit
< inet6_sk(sk
)->min_hopcount
) {
1428 NET_INC_STATS_BH(net
, LINUX_MIB_TCPMINTTLDROP
);
1429 goto discard_and_relse
;
1432 if (!xfrm6_policy_check(sk
, XFRM_POLICY_IN
, skb
))
1433 goto discard_and_relse
;
1435 #ifdef CONFIG_TCP_MD5SIG
1436 if (tcp_v6_inbound_md5_hash(sk
, skb
))
1437 goto discard_and_relse
;
1440 if (sk_filter(sk
, skb
))
1441 goto discard_and_relse
;
1443 sk_mark_napi_id(sk
, skb
);
1446 bh_lock_sock_nested(sk
);
1448 if (!sock_owned_by_user(sk
)) {
1449 #ifdef CONFIG_NET_DMA
1450 struct tcp_sock
*tp
= tcp_sk(sk
);
1451 if (!tp
->ucopy
.dma_chan
&& tp
->ucopy
.pinned_list
)
1452 tp
->ucopy
.dma_chan
= net_dma_find_channel();
1453 if (tp
->ucopy
.dma_chan
)
1454 ret
= tcp_v6_do_rcv(sk
, skb
);
1458 if (!tcp_prequeue(sk
, skb
))
1459 ret
= tcp_v6_do_rcv(sk
, skb
);
1461 } else if (unlikely(sk_add_backlog(sk
, skb
,
1462 sk
->sk_rcvbuf
+ sk
->sk_sndbuf
))) {
1464 NET_INC_STATS_BH(net
, LINUX_MIB_TCPBACKLOGDROP
);
1465 goto discard_and_relse
;
1470 return ret
? -1 : 0;
1473 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
))
1476 if (skb
->len
< (th
->doff
<<2) || tcp_checksum_complete(skb
)) {
1478 TCP_INC_STATS_BH(net
, TCP_MIB_CSUMERRORS
);
1480 TCP_INC_STATS_BH(net
, TCP_MIB_INERRS
);
1482 tcp_v6_send_reset(NULL
, skb
);
1494 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_IN
, skb
)) {
1495 inet_twsk_put(inet_twsk(sk
));
1499 if (skb
->len
< (th
->doff
<<2)) {
1500 inet_twsk_put(inet_twsk(sk
));
1503 if (tcp_checksum_complete(skb
)) {
1504 inet_twsk_put(inet_twsk(sk
));
1508 switch (tcp_timewait_state_process(inet_twsk(sk
), skb
, th
)) {
1513 sk2
= inet6_lookup_listener(dev_net(skb
->dev
), &tcp_hashinfo
,
1514 &ipv6_hdr(skb
)->saddr
, th
->source
,
1515 &ipv6_hdr(skb
)->daddr
,
1516 ntohs(th
->dest
), inet6_iif(skb
));
1518 struct inet_timewait_sock
*tw
= inet_twsk(sk
);
1519 inet_twsk_deschedule(tw
, &tcp_death_row
);
1524 /* Fall through to ACK */
1527 tcp_v6_timewait_ack(sk
, skb
);
1531 case TCP_TW_SUCCESS
:
1537 static void tcp_v6_early_demux(struct sk_buff
*skb
)
1539 const struct ipv6hdr
*hdr
;
1540 const struct tcphdr
*th
;
1543 if (skb
->pkt_type
!= PACKET_HOST
)
1546 if (!pskb_may_pull(skb
, skb_transport_offset(skb
) + sizeof(struct tcphdr
)))
1549 hdr
= ipv6_hdr(skb
);
1552 if (th
->doff
< sizeof(struct tcphdr
) / 4)
1555 sk
= __inet6_lookup_established(dev_net(skb
->dev
), &tcp_hashinfo
,
1556 &hdr
->saddr
, th
->source
,
1557 &hdr
->daddr
, ntohs(th
->dest
),
1561 skb
->destructor
= sock_edemux
;
1562 if (sk
->sk_state
!= TCP_TIME_WAIT
) {
1563 struct dst_entry
*dst
= sk
->sk_rx_dst
;
1566 dst
= dst_check(dst
, inet6_sk(sk
)->rx_dst_cookie
);
1568 inet_sk(sk
)->rx_dst_ifindex
== skb
->skb_iif
)
1569 skb_dst_set_noref(skb
, dst
);
1574 static struct timewait_sock_ops tcp6_timewait_sock_ops
= {
1575 .twsk_obj_size
= sizeof(struct tcp6_timewait_sock
),
1576 .twsk_unique
= tcp_twsk_unique
,
1577 .twsk_destructor
= tcp_twsk_destructor
,
1580 static const struct inet_connection_sock_af_ops ipv6_specific
= {
1581 .queue_xmit
= inet6_csk_xmit
,
1582 .send_check
= tcp_v6_send_check
,
1583 .rebuild_header
= inet6_sk_rebuild_header
,
1584 .sk_rx_dst_set
= inet6_sk_rx_dst_set
,
1585 .conn_request
= tcp_v6_conn_request
,
1586 .syn_recv_sock
= tcp_v6_syn_recv_sock
,
1587 .net_header_len
= sizeof(struct ipv6hdr
),
1588 .net_frag_header_len
= sizeof(struct frag_hdr
),
1589 .setsockopt
= ipv6_setsockopt
,
1590 .getsockopt
= ipv6_getsockopt
,
1591 .addr2sockaddr
= inet6_csk_addr2sockaddr
,
1592 .sockaddr_len
= sizeof(struct sockaddr_in6
),
1593 .bind_conflict
= inet6_csk_bind_conflict
,
1594 #ifdef CONFIG_COMPAT
1595 .compat_setsockopt
= compat_ipv6_setsockopt
,
1596 .compat_getsockopt
= compat_ipv6_getsockopt
,
1598 .mtu_reduced
= tcp_v6_mtu_reduced
,
1601 #ifdef CONFIG_TCP_MD5SIG
1602 static const struct tcp_sock_af_ops tcp_sock_ipv6_specific
= {
1603 .md5_lookup
= tcp_v6_md5_lookup
,
1604 .calc_md5_hash
= tcp_v6_md5_hash_skb
,
1605 .md5_parse
= tcp_v6_parse_md5_keys
,
1610 * TCP over IPv4 via INET6 API
1612 static const struct inet_connection_sock_af_ops ipv6_mapped
= {
1613 .queue_xmit
= ip_queue_xmit
,
1614 .send_check
= tcp_v4_send_check
,
1615 .rebuild_header
= inet_sk_rebuild_header
,
1616 .sk_rx_dst_set
= inet_sk_rx_dst_set
,
1617 .conn_request
= tcp_v6_conn_request
,
1618 .syn_recv_sock
= tcp_v6_syn_recv_sock
,
1619 .net_header_len
= sizeof(struct iphdr
),
1620 .setsockopt
= ipv6_setsockopt
,
1621 .getsockopt
= ipv6_getsockopt
,
1622 .addr2sockaddr
= inet6_csk_addr2sockaddr
,
1623 .sockaddr_len
= sizeof(struct sockaddr_in6
),
1624 .bind_conflict
= inet6_csk_bind_conflict
,
1625 #ifdef CONFIG_COMPAT
1626 .compat_setsockopt
= compat_ipv6_setsockopt
,
1627 .compat_getsockopt
= compat_ipv6_getsockopt
,
1629 .mtu_reduced
= tcp_v4_mtu_reduced
,
1632 #ifdef CONFIG_TCP_MD5SIG
1633 static const struct tcp_sock_af_ops tcp_sock_ipv6_mapped_specific
= {
1634 .md5_lookup
= tcp_v4_md5_lookup
,
1635 .calc_md5_hash
= tcp_v4_md5_hash_skb
,
1636 .md5_parse
= tcp_v6_parse_md5_keys
,
1640 /* NOTE: A lot of things set to zero explicitly by call to
1641 * sk_alloc() so need not be done here.
1643 static int tcp_v6_init_sock(struct sock
*sk
)
1645 struct inet_connection_sock
*icsk
= inet_csk(sk
);
1649 icsk
->icsk_af_ops
= &ipv6_specific
;
1651 #ifdef CONFIG_TCP_MD5SIG
1652 tcp_sk(sk
)->af_specific
= &tcp_sock_ipv6_specific
;
1658 static void tcp_v6_destroy_sock(struct sock
*sk
)
1660 tcp_v4_destroy_sock(sk
);
1661 inet6_destroy_sock(sk
);
1664 #ifdef CONFIG_PROC_FS
1665 /* Proc filesystem TCPv6 sock list dumping. */
1666 static void get_openreq6(struct seq_file
*seq
,
1667 const struct sock
*sk
, struct request_sock
*req
, int i
, kuid_t uid
)
1669 int ttd
= req
->expires
- jiffies
;
1670 const struct in6_addr
*src
= &inet_rsk(req
)->ir_v6_loc_addr
;
1671 const struct in6_addr
*dest
= &inet_rsk(req
)->ir_v6_rmt_addr
;
1677 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1678 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %d %d %pK\n",
1680 src
->s6_addr32
[0], src
->s6_addr32
[1],
1681 src
->s6_addr32
[2], src
->s6_addr32
[3],
1682 inet_rsk(req
)->ir_num
,
1683 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1684 dest
->s6_addr32
[2], dest
->s6_addr32
[3],
1685 ntohs(inet_rsk(req
)->ir_rmt_port
),
1687 0, 0, /* could print option size, but that is af dependent. */
1688 1, /* timers active (only the expire timer) */
1689 jiffies_to_clock_t(ttd
),
1691 from_kuid_munged(seq_user_ns(seq
), uid
),
1692 0, /* non standard timer */
1693 0, /* open_requests have no inode */
1697 static void get_tcp6_sock(struct seq_file
*seq
, struct sock
*sp
, int i
)
1699 const struct in6_addr
*dest
, *src
;
1702 unsigned long timer_expires
;
1703 const struct inet_sock
*inet
= inet_sk(sp
);
1704 const struct tcp_sock
*tp
= tcp_sk(sp
);
1705 const struct inet_connection_sock
*icsk
= inet_csk(sp
);
1706 struct fastopen_queue
*fastopenq
= icsk
->icsk_accept_queue
.fastopenq
;
1708 dest
= &sp
->sk_v6_daddr
;
1709 src
= &sp
->sk_v6_rcv_saddr
;
1710 destp
= ntohs(inet
->inet_dport
);
1711 srcp
= ntohs(inet
->inet_sport
);
1713 if (icsk
->icsk_pending
== ICSK_TIME_RETRANS
) {
1715 timer_expires
= icsk
->icsk_timeout
;
1716 } else if (icsk
->icsk_pending
== ICSK_TIME_PROBE0
) {
1718 timer_expires
= icsk
->icsk_timeout
;
1719 } else if (timer_pending(&sp
->sk_timer
)) {
1721 timer_expires
= sp
->sk_timer
.expires
;
1724 timer_expires
= jiffies
;
1728 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1729 "%02X %08X:%08X %02X:%08lX %08X %5u %8d %lu %d %pK %lu %lu %u %u %d\n",
1731 src
->s6_addr32
[0], src
->s6_addr32
[1],
1732 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1733 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1734 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1736 tp
->write_seq
-tp
->snd_una
,
1737 (sp
->sk_state
== TCP_LISTEN
) ? sp
->sk_ack_backlog
: (tp
->rcv_nxt
- tp
->copied_seq
),
1739 jiffies_delta_to_clock_t(timer_expires
- jiffies
),
1740 icsk
->icsk_retransmits
,
1741 from_kuid_munged(seq_user_ns(seq
), sock_i_uid(sp
)),
1742 icsk
->icsk_probes_out
,
1744 atomic_read(&sp
->sk_refcnt
), sp
,
1745 jiffies_to_clock_t(icsk
->icsk_rto
),
1746 jiffies_to_clock_t(icsk
->icsk_ack
.ato
),
1747 (icsk
->icsk_ack
.quick
<< 1) | icsk
->icsk_ack
.pingpong
,
1749 sp
->sk_state
== TCP_LISTEN
?
1750 (fastopenq
? fastopenq
->max_qlen
: 0) :
1751 (tcp_in_initial_slowstart(tp
) ? -1 : tp
->snd_ssthresh
)
1755 static void get_timewait6_sock(struct seq_file
*seq
,
1756 struct inet_timewait_sock
*tw
, int i
)
1758 const struct in6_addr
*dest
, *src
;
1760 s32 delta
= tw
->tw_ttd
- inet_tw_time_stamp();
1762 dest
= &tw
->tw_v6_daddr
;
1763 src
= &tw
->tw_v6_rcv_saddr
;
1764 destp
= ntohs(tw
->tw_dport
);
1765 srcp
= ntohs(tw
->tw_sport
);
1768 "%4d: %08X%08X%08X%08X:%04X %08X%08X%08X%08X:%04X "
1769 "%02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK\n",
1771 src
->s6_addr32
[0], src
->s6_addr32
[1],
1772 src
->s6_addr32
[2], src
->s6_addr32
[3], srcp
,
1773 dest
->s6_addr32
[0], dest
->s6_addr32
[1],
1774 dest
->s6_addr32
[2], dest
->s6_addr32
[3], destp
,
1775 tw
->tw_substate
, 0, 0,
1776 3, jiffies_delta_to_clock_t(delta
), 0, 0, 0, 0,
1777 atomic_read(&tw
->tw_refcnt
), tw
);
1780 static int tcp6_seq_show(struct seq_file
*seq
, void *v
)
1782 struct tcp_iter_state
*st
;
1783 struct sock
*sk
= v
;
1785 if (v
== SEQ_START_TOKEN
) {
1790 "st tx_queue rx_queue tr tm->when retrnsmt"
1791 " uid timeout inode\n");
1796 switch (st
->state
) {
1797 case TCP_SEQ_STATE_LISTENING
:
1798 case TCP_SEQ_STATE_ESTABLISHED
:
1799 if (sk
->sk_state
== TCP_TIME_WAIT
)
1800 get_timewait6_sock(seq
, v
, st
->num
);
1802 get_tcp6_sock(seq
, v
, st
->num
);
1804 case TCP_SEQ_STATE_OPENREQ
:
1805 get_openreq6(seq
, st
->syn_wait_sk
, v
, st
->num
, st
->uid
);
1812 static const struct file_operations tcp6_afinfo_seq_fops
= {
1813 .owner
= THIS_MODULE
,
1814 .open
= tcp_seq_open
,
1816 .llseek
= seq_lseek
,
1817 .release
= seq_release_net
1820 static struct tcp_seq_afinfo tcp6_seq_afinfo
= {
1823 .seq_fops
= &tcp6_afinfo_seq_fops
,
1825 .show
= tcp6_seq_show
,
1829 int __net_init
tcp6_proc_init(struct net
*net
)
1831 return tcp_proc_register(net
, &tcp6_seq_afinfo
);
1834 void tcp6_proc_exit(struct net
*net
)
1836 tcp_proc_unregister(net
, &tcp6_seq_afinfo
);
1840 static void tcp_v6_clear_sk(struct sock
*sk
, int size
)
1842 struct inet_sock
*inet
= inet_sk(sk
);
1844 /* we do not want to clear pinet6 field, because of RCU lookups */
1845 sk_prot_clear_nulls(sk
, offsetof(struct inet_sock
, pinet6
));
1847 size
-= offsetof(struct inet_sock
, pinet6
) + sizeof(inet
->pinet6
);
1848 memset(&inet
->pinet6
+ 1, 0, size
);
1851 struct proto tcpv6_prot
= {
1853 .owner
= THIS_MODULE
,
1855 .connect
= tcp_v6_connect
,
1856 .disconnect
= tcp_disconnect
,
1857 .accept
= inet_csk_accept
,
1859 .init
= tcp_v6_init_sock
,
1860 .destroy
= tcp_v6_destroy_sock
,
1861 .shutdown
= tcp_shutdown
,
1862 .setsockopt
= tcp_setsockopt
,
1863 .getsockopt
= tcp_getsockopt
,
1864 .recvmsg
= tcp_recvmsg
,
1865 .sendmsg
= tcp_sendmsg
,
1866 .sendpage
= tcp_sendpage
,
1867 .backlog_rcv
= tcp_v6_do_rcv
,
1868 .release_cb
= tcp_release_cb
,
1869 .hash
= tcp_v6_hash
,
1870 .unhash
= inet_unhash
,
1871 .get_port
= inet_csk_get_port
,
1872 .enter_memory_pressure
= tcp_enter_memory_pressure
,
1873 .stream_memory_free
= tcp_stream_memory_free
,
1874 .sockets_allocated
= &tcp_sockets_allocated
,
1875 .memory_allocated
= &tcp_memory_allocated
,
1876 .memory_pressure
= &tcp_memory_pressure
,
1877 .orphan_count
= &tcp_orphan_count
,
1878 .sysctl_mem
= sysctl_tcp_mem
,
1879 .sysctl_wmem
= sysctl_tcp_wmem
,
1880 .sysctl_rmem
= sysctl_tcp_rmem
,
1881 .max_header
= MAX_TCP_HEADER
,
1882 .obj_size
= sizeof(struct tcp6_sock
),
1883 .slab_flags
= SLAB_DESTROY_BY_RCU
,
1884 .twsk_prot
= &tcp6_timewait_sock_ops
,
1885 .rsk_prot
= &tcp6_request_sock_ops
,
1886 .h
.hashinfo
= &tcp_hashinfo
,
1887 .no_autobind
= true,
1888 #ifdef CONFIG_COMPAT
1889 .compat_setsockopt
= compat_tcp_setsockopt
,
1890 .compat_getsockopt
= compat_tcp_getsockopt
,
1892 #ifdef CONFIG_MEMCG_KMEM
1893 .proto_cgroup
= tcp_proto_cgroup
,
1895 .clear_sk
= tcp_v6_clear_sk
,
1898 static const struct inet6_protocol tcpv6_protocol
= {
1899 .early_demux
= tcp_v6_early_demux
,
1900 .handler
= tcp_v6_rcv
,
1901 .err_handler
= tcp_v6_err
,
1902 .flags
= INET6_PROTO_NOPOLICY
|INET6_PROTO_FINAL
,
1905 static struct inet_protosw tcpv6_protosw
= {
1906 .type
= SOCK_STREAM
,
1907 .protocol
= IPPROTO_TCP
,
1908 .prot
= &tcpv6_prot
,
1909 .ops
= &inet6_stream_ops
,
1910 .flags
= INET_PROTOSW_PERMANENT
|
1914 static int __net_init
tcpv6_net_init(struct net
*net
)
1916 return inet_ctl_sock_create(&net
->ipv6
.tcp_sk
, PF_INET6
,
1917 SOCK_RAW
, IPPROTO_TCP
, net
);
1920 static void __net_exit
tcpv6_net_exit(struct net
*net
)
1922 inet_ctl_sock_destroy(net
->ipv6
.tcp_sk
);
1925 static void __net_exit
tcpv6_net_exit_batch(struct list_head
*net_exit_list
)
1927 inet_twsk_purge(&tcp_hashinfo
, &tcp_death_row
, AF_INET6
);
1930 static struct pernet_operations tcpv6_net_ops
= {
1931 .init
= tcpv6_net_init
,
1932 .exit
= tcpv6_net_exit
,
1933 .exit_batch
= tcpv6_net_exit_batch
,
1936 int __init
tcpv6_init(void)
1940 ret
= inet6_add_protocol(&tcpv6_protocol
, IPPROTO_TCP
);
1944 /* register inet6 protocol */
1945 ret
= inet6_register_protosw(&tcpv6_protosw
);
1947 goto out_tcpv6_protocol
;
1949 ret
= register_pernet_subsys(&tcpv6_net_ops
);
1951 goto out_tcpv6_protosw
;
1956 inet6_unregister_protosw(&tcpv6_protosw
);
1958 inet6_del_protocol(&tcpv6_protocol
, IPPROTO_TCP
);
1962 void tcpv6_exit(void)
1964 unregister_pernet_subsys(&tcpv6_net_ops
);
1965 inet6_unregister_protosw(&tcpv6_protosw
);
1966 inet6_del_protocol(&tcpv6_protocol
, IPPROTO_TCP
);