2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
64 struct dst_entry
*dst
= skb_dst(skb
);
65 struct net_device
*dev
= dst
->dev
;
66 struct neighbour
*neigh
;
67 struct in6_addr
*nexthop
;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb
)->daddr
)) {
71 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
73 if (!(dev
->flags
& IFF_LOOPBACK
) && sk_mc_loop(sk
) &&
74 ((mroute6_socket(net
, skb
) &&
75 !(IP6CB(skb
)->flags
& IP6SKB_FORWARDED
)) ||
76 ipv6_chk_mcast_addr(dev
, &ipv6_hdr(skb
)->daddr
,
77 &ipv6_hdr(skb
)->saddr
))) {
78 struct sk_buff
*newskb
= skb_clone(skb
, GFP_ATOMIC
);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
85 net
, sk
, newskb
, NULL
, newskb
->dev
,
88 if (ipv6_hdr(skb
)->hop_limit
== 0) {
89 IP6_INC_STATS(net
, idev
,
90 IPSTATS_MIB_OUTDISCARDS
);
96 IP6_UPD_PO_STATS(net
, idev
, IPSTATS_MIB_OUTMCAST
, skb
->len
);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb
)->daddr
) <=
99 IPV6_ADDR_SCOPE_NODELOCAL
&&
100 !(dev
->flags
& IFF_LOOPBACK
)) {
106 if (lwtunnel_xmit_redirect(dst
->lwtstate
)) {
107 int res
= lwtunnel_xmit(skb
);
109 if (res
< 0 || res
== LWTUNNEL_XMIT_DONE
)
114 nexthop
= rt6_nexthop((struct rt6_info
*)dst
, &ipv6_hdr(skb
)->daddr
);
115 neigh
= __ipv6_neigh_lookup_noref(dst
->dev
, nexthop
);
116 if (unlikely(!neigh
))
117 neigh
= __neigh_create(&nd_tbl
, nexthop
, dst
->dev
, false);
118 if (!IS_ERR(neigh
)) {
119 sock_confirm_neigh(skb
, neigh
);
120 ret
= neigh_output(neigh
, skb
);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTNOROUTES
);
131 static int ip6_finish_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
135 ret
= BPF_CGROUP_RUN_PROG_INET_EGRESS(sk
, skb
);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb
)->xfrm
) {
144 IPCB(skb
)->flags
|= IPSKB_REROUTED
;
145 return dst_output(net
, sk
, skb
);
149 if ((skb
->len
> ip6_skb_dst_mtu(skb
) && !skb_is_gso(skb
)) ||
150 dst_allfrag(skb_dst(skb
)) ||
151 (IP6CB(skb
)->frag_max_size
&& skb
->len
> IP6CB(skb
)->frag_max_size
))
152 return ip6_fragment(net
, sk
, skb
, ip6_finish_output2
);
154 return ip6_finish_output2(net
, sk
, skb
);
157 int ip6_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
159 struct net_device
*dev
= skb_dst(skb
)->dev
;
160 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
162 skb
->protocol
= htons(ETH_P_IPV6
);
165 if (unlikely(idev
->cnf
.disable_ipv6
)) {
166 IP6_INC_STATS(net
, idev
, IPSTATS_MIB_OUTDISCARDS
);
171 return NF_HOOK_COND(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
172 net
, sk
, skb
, NULL
, dev
,
174 !(IP6CB(skb
)->flags
& IP6SKB_REROUTED
));
177 bool ip6_autoflowlabel(struct net
*net
, const struct ipv6_pinfo
*np
)
179 if (!np
->autoflowlabel_set
)
180 return ip6_default_np_autolabel(net
);
182 return np
->autoflowlabel
;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock
*sk
, struct sk_buff
*skb
, struct flowi6
*fl6
,
192 __u32 mark
, struct ipv6_txoptions
*opt
, int tclass
)
194 struct net
*net
= sock_net(sk
);
195 const struct ipv6_pinfo
*np
= inet6_sk(sk
);
196 struct in6_addr
*first_hop
= &fl6
->daddr
;
197 struct dst_entry
*dst
= skb_dst(skb
);
198 unsigned int head_room
;
200 u8 proto
= fl6
->flowi6_proto
;
201 int seg_len
= skb
->len
;
205 head_room
= sizeof(struct ipv6hdr
) + LL_RESERVED_SPACE(dst
->dev
);
207 head_room
+= opt
->opt_nflen
+ opt
->opt_flen
;
209 if (unlikely(skb_headroom(skb
) < head_room
)) {
210 struct sk_buff
*skb2
= skb_realloc_headroom(skb
, head_room
);
212 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
213 IPSTATS_MIB_OUTDISCARDS
);
218 skb_set_owner_w(skb2
, skb
->sk
);
224 seg_len
+= opt
->opt_nflen
+ opt
->opt_flen
;
227 ipv6_push_frag_opts(skb
, opt
, &proto
);
230 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &first_hop
,
234 skb_push(skb
, sizeof(struct ipv6hdr
));
235 skb_reset_network_header(skb
);
239 * Fill in the IPv6 header
242 hlimit
= np
->hop_limit
;
244 hlimit
= ip6_dst_hoplimit(dst
);
246 ip6_flow_hdr(hdr
, tclass
, ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
247 ip6_autoflowlabel(net
, np
), fl6
));
249 hdr
->payload_len
= htons(seg_len
);
250 hdr
->nexthdr
= proto
;
251 hdr
->hop_limit
= hlimit
;
253 hdr
->saddr
= fl6
->saddr
;
254 hdr
->daddr
= *first_hop
;
256 skb
->protocol
= htons(ETH_P_IPV6
);
257 skb
->priority
= sk
->sk_priority
;
261 if ((skb
->len
<= mtu
) || skb
->ignore_df
|| skb_is_gso(skb
)) {
262 IP6_UPD_PO_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
263 IPSTATS_MIB_OUT
, skb
->len
);
265 /* if egress device is enslaved to an L3 master device pass the
266 * skb to its handler for processing
268 skb
= l3mdev_ip6_out((struct sock
*)sk
, skb
);
272 /* hooks should never assume socket lock is held.
273 * we promote our socket to non const
275 return NF_HOOK(NFPROTO_IPV6
, NF_INET_LOCAL_OUT
,
276 net
, (struct sock
*)sk
, skb
, NULL
, dst
->dev
,
281 /* ipv6_local_error() does not require socket lock,
282 * we promote our socket to non const
284 ipv6_local_error((struct sock
*)sk
, EMSGSIZE
, fl6
, mtu
);
286 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)), IPSTATS_MIB_FRAGFAILS
);
290 EXPORT_SYMBOL(ip6_xmit
);
292 static int ip6_call_ra_chain(struct sk_buff
*skb
, int sel
)
294 struct ip6_ra_chain
*ra
;
295 struct sock
*last
= NULL
;
297 read_lock(&ip6_ra_lock
);
298 for (ra
= ip6_ra_chain
; ra
; ra
= ra
->next
) {
299 struct sock
*sk
= ra
->sk
;
300 if (sk
&& ra
->sel
== sel
&&
301 (!sk
->sk_bound_dev_if
||
302 sk
->sk_bound_dev_if
== skb
->dev
->ifindex
)) {
304 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
306 rawv6_rcv(last
, skb2
);
313 rawv6_rcv(last
, skb
);
314 read_unlock(&ip6_ra_lock
);
317 read_unlock(&ip6_ra_lock
);
321 static int ip6_forward_proxy_check(struct sk_buff
*skb
)
323 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
324 u8 nexthdr
= hdr
->nexthdr
;
328 if (ipv6_ext_hdr(nexthdr
)) {
329 offset
= ipv6_skip_exthdr(skb
, sizeof(*hdr
), &nexthdr
, &frag_off
);
333 offset
= sizeof(struct ipv6hdr
);
335 if (nexthdr
== IPPROTO_ICMPV6
) {
336 struct icmp6hdr
*icmp6
;
338 if (!pskb_may_pull(skb
, (skb_network_header(skb
) +
339 offset
+ 1 - skb
->data
)))
342 icmp6
= (struct icmp6hdr
*)(skb_network_header(skb
) + offset
);
344 switch (icmp6
->icmp6_type
) {
345 case NDISC_ROUTER_SOLICITATION
:
346 case NDISC_ROUTER_ADVERTISEMENT
:
347 case NDISC_NEIGHBOUR_SOLICITATION
:
348 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
350 /* For reaction involving unicast neighbor discovery
351 * message destined to the proxied address, pass it to
361 * The proxying router can't forward traffic sent to a link-local
362 * address, so signal the sender and discard the packet. This
363 * behavior is clarified by the MIPv6 specification.
365 if (ipv6_addr_type(&hdr
->daddr
) & IPV6_ADDR_LINKLOCAL
) {
366 dst_link_failure(skb
);
373 static inline int ip6_forward_finish(struct net
*net
, struct sock
*sk
,
376 struct dst_entry
*dst
= skb_dst(skb
);
378 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
379 __IP6_ADD_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTOCTETS
, skb
->len
);
381 return dst_output(net
, sk
, skb
);
384 static unsigned int ip6_dst_mtu_forward(const struct dst_entry
*dst
)
387 struct inet6_dev
*idev
;
389 if (dst_metric_locked(dst
, RTAX_MTU
)) {
390 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
397 idev
= __in6_dev_get(dst
->dev
);
399 mtu
= idev
->cnf
.mtu6
;
405 static bool ip6_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
410 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
411 if (IP6CB(skb
)->frag_max_size
&& IP6CB(skb
)->frag_max_size
> mtu
)
417 if (skb_is_gso(skb
) && skb_gso_validate_mtu(skb
, mtu
))
423 int ip6_forward(struct sk_buff
*skb
)
425 struct dst_entry
*dst
= skb_dst(skb
);
426 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
427 struct inet6_skb_parm
*opt
= IP6CB(skb
);
428 struct net
*net
= dev_net(dst
->dev
);
431 if (net
->ipv6
.devconf_all
->forwarding
== 0)
434 if (skb
->pkt_type
!= PACKET_HOST
)
437 if (unlikely(skb
->sk
))
440 if (skb_warn_if_lro(skb
))
443 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_FWD
, skb
)) {
444 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
445 IPSTATS_MIB_INDISCARDS
);
449 skb_forward_csum(skb
);
452 * We DO NOT make any processing on
453 * RA packets, pushing them to user level AS IS
454 * without ane WARRANTY that application will be able
455 * to interpret them. The reason is that we
456 * cannot make anything clever here.
458 * We are not end-node, so that if packet contains
459 * AH/ESP, we cannot make anything.
460 * Defragmentation also would be mistake, RA packets
461 * cannot be fragmented, because there is no warranty
462 * that different fragments will go along one path. --ANK
464 if (unlikely(opt
->flags
& IP6SKB_ROUTERALERT
)) {
465 if (ip6_call_ra_chain(skb
, ntohs(opt
->ra
)))
470 * check and decrement ttl
472 if (hdr
->hop_limit
<= 1) {
473 /* Force OUTPUT device used as source address */
475 icmpv6_send(skb
, ICMPV6_TIME_EXCEED
, ICMPV6_EXC_HOPLIMIT
, 0);
476 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
477 IPSTATS_MIB_INHDRERRORS
);
483 /* XXX: idev->cnf.proxy_ndp? */
484 if (net
->ipv6
.devconf_all
->proxy_ndp
&&
485 pneigh_lookup(&nd_tbl
, net
, &hdr
->daddr
, skb
->dev
, 0)) {
486 int proxied
= ip6_forward_proxy_check(skb
);
488 return ip6_input(skb
);
489 else if (proxied
< 0) {
490 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
491 IPSTATS_MIB_INDISCARDS
);
496 if (!xfrm6_route_forward(skb
)) {
497 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
498 IPSTATS_MIB_INDISCARDS
);
503 /* IPv6 specs say nothing about it, but it is clear that we cannot
504 send redirects to source routed frames.
505 We don't send redirects to frames decapsulated from IPsec.
507 if (IP6CB(skb
)->iif
== dst
->dev
->ifindex
&&
508 opt
->srcrt
== 0 && !skb_sec_path(skb
)) {
509 struct in6_addr
*target
= NULL
;
510 struct inet_peer
*peer
;
514 * incoming and outgoing devices are the same
518 rt
= (struct rt6_info
*) dst
;
519 if (rt
->rt6i_flags
& RTF_GATEWAY
)
520 target
= &rt
->rt6i_gateway
;
522 target
= &hdr
->daddr
;
524 peer
= inet_getpeer_v6(net
->ipv6
.peers
, &hdr
->daddr
, 1);
526 /* Limit redirects both by destination (here)
527 and by source (inside ndisc_send_redirect)
529 if (inet_peer_xrlim_allow(peer
, 1*HZ
))
530 ndisc_send_redirect(skb
, target
);
534 int addrtype
= ipv6_addr_type(&hdr
->saddr
);
536 /* This check is security critical. */
537 if (addrtype
== IPV6_ADDR_ANY
||
538 addrtype
& (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LOOPBACK
))
540 if (addrtype
& IPV6_ADDR_LINKLOCAL
) {
541 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
,
542 ICMPV6_NOT_NEIGHBOUR
, 0);
547 mtu
= ip6_dst_mtu_forward(dst
);
548 if (mtu
< IPV6_MIN_MTU
)
551 if (ip6_pkt_too_big(skb
, mtu
)) {
552 /* Again, force OUTPUT device used as source address */
554 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
555 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
556 IPSTATS_MIB_INTOOBIGERRORS
);
557 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
558 IPSTATS_MIB_FRAGFAILS
);
563 if (skb_cow(skb
, dst
->dev
->hard_header_len
)) {
564 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
565 IPSTATS_MIB_OUTDISCARDS
);
571 /* Mangling hops number delayed to point after skb COW */
575 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
,
576 net
, NULL
, skb
, skb
->dev
, dst
->dev
,
580 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_INADDRERRORS
);
586 static void ip6_copy_metadata(struct sk_buff
*to
, struct sk_buff
*from
)
588 to
->pkt_type
= from
->pkt_type
;
589 to
->priority
= from
->priority
;
590 to
->protocol
= from
->protocol
;
592 skb_dst_set(to
, dst_clone(skb_dst(from
)));
594 to
->mark
= from
->mark
;
596 skb_copy_hash(to
, from
);
598 #ifdef CONFIG_NET_SCHED
599 to
->tc_index
= from
->tc_index
;
602 skb_copy_secmark(to
, from
);
605 int ip6_fragment(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
,
606 int (*output
)(struct net
*, struct sock
*, struct sk_buff
*))
608 struct sk_buff
*frag
;
609 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
610 struct ipv6_pinfo
*np
= skb
->sk
&& !dev_recursion_level() ?
611 inet6_sk(skb
->sk
) : NULL
;
612 struct ipv6hdr
*tmp_hdr
;
614 unsigned int mtu
, hlen
, left
, len
, nexthdr_offset
;
617 int ptr
, offset
= 0, err
= 0;
618 u8
*prevhdr
, nexthdr
= 0;
620 err
= ip6_find_1stfragopt(skb
, &prevhdr
);
625 nexthdr_offset
= prevhdr
- skb_network_header(skb
);
627 mtu
= ip6_skb_dst_mtu(skb
);
629 /* We must not fragment if the socket is set to force MTU discovery
630 * or if the skb it not generated by a local socket.
632 if (unlikely(!skb
->ignore_df
&& skb
->len
> mtu
))
635 if (IP6CB(skb
)->frag_max_size
) {
636 if (IP6CB(skb
)->frag_max_size
> mtu
)
639 /* don't send fragments larger than what we received */
640 mtu
= IP6CB(skb
)->frag_max_size
;
641 if (mtu
< IPV6_MIN_MTU
)
645 if (np
&& np
->frag_size
< mtu
) {
649 if (mtu
< hlen
+ sizeof(struct frag_hdr
) + 8)
651 mtu
-= hlen
+ sizeof(struct frag_hdr
);
653 frag_id
= ipv6_select_ident(net
, &ipv6_hdr(skb
)->daddr
,
654 &ipv6_hdr(skb
)->saddr
);
656 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
657 (err
= skb_checksum_help(skb
)))
660 prevhdr
= skb_network_header(skb
) + nexthdr_offset
;
661 hroom
= LL_RESERVED_SPACE(rt
->dst
.dev
);
662 if (skb_has_frag_list(skb
)) {
663 unsigned int first_len
= skb_pagelen(skb
);
664 struct sk_buff
*frag2
;
666 if (first_len
- hlen
> mtu
||
667 ((first_len
- hlen
) & 7) ||
669 skb_headroom(skb
) < (hroom
+ sizeof(struct frag_hdr
)))
672 skb_walk_frags(skb
, frag
) {
673 /* Correct geometry. */
674 if (frag
->len
> mtu
||
675 ((frag
->len
& 7) && frag
->next
) ||
676 skb_headroom(frag
) < (hlen
+ hroom
+ sizeof(struct frag_hdr
)))
677 goto slow_path_clean
;
679 /* Partially cloned skb? */
680 if (skb_shared(frag
))
681 goto slow_path_clean
;
686 frag
->destructor
= sock_wfree
;
688 skb
->truesize
-= frag
->truesize
;
695 *prevhdr
= NEXTHDR_FRAGMENT
;
696 tmp_hdr
= kmemdup(skb_network_header(skb
), hlen
, GFP_ATOMIC
);
701 frag
= skb_shinfo(skb
)->frag_list
;
702 skb_frag_list_init(skb
);
704 __skb_pull(skb
, hlen
);
705 fh
= __skb_push(skb
, sizeof(struct frag_hdr
));
706 __skb_push(skb
, hlen
);
707 skb_reset_network_header(skb
);
708 memcpy(skb_network_header(skb
), tmp_hdr
, hlen
);
710 fh
->nexthdr
= nexthdr
;
712 fh
->frag_off
= htons(IP6_MF
);
713 fh
->identification
= frag_id
;
715 first_len
= skb_pagelen(skb
);
716 skb
->data_len
= first_len
- skb_headlen(skb
);
717 skb
->len
= first_len
;
718 ipv6_hdr(skb
)->payload_len
= htons(first_len
-
719 sizeof(struct ipv6hdr
));
722 /* Prepare header of the next frame,
723 * before previous one went down. */
725 frag
->ip_summed
= CHECKSUM_NONE
;
726 skb_reset_transport_header(frag
);
727 fh
= __skb_push(frag
, sizeof(struct frag_hdr
));
728 __skb_push(frag
, hlen
);
729 skb_reset_network_header(frag
);
730 memcpy(skb_network_header(frag
), tmp_hdr
,
732 offset
+= skb
->len
- hlen
- sizeof(struct frag_hdr
);
733 fh
->nexthdr
= nexthdr
;
735 fh
->frag_off
= htons(offset
);
737 fh
->frag_off
|= htons(IP6_MF
);
738 fh
->identification
= frag_id
;
739 ipv6_hdr(frag
)->payload_len
=
741 sizeof(struct ipv6hdr
));
742 ip6_copy_metadata(frag
, skb
);
745 err
= output(net
, sk
, skb
);
747 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
748 IPSTATS_MIB_FRAGCREATES
);
761 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
762 IPSTATS_MIB_FRAGOKS
);
766 kfree_skb_list(frag
);
768 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
769 IPSTATS_MIB_FRAGFAILS
);
773 skb_walk_frags(skb
, frag2
) {
777 frag2
->destructor
= NULL
;
778 skb
->truesize
+= frag2
->truesize
;
783 left
= skb
->len
- hlen
; /* Space per frame */
784 ptr
= hlen
; /* Where to start from */
787 * Fragment the datagram.
790 troom
= rt
->dst
.dev
->needed_tailroom
;
793 * Keep copying data until we run out.
796 u8
*fragnexthdr_offset
;
799 /* IF: it doesn't fit, use 'mtu' - the data space left */
802 /* IF: we are not sending up to and including the packet end
803 then align the next start on an eight byte boundary */
808 /* Allocate buffer */
809 frag
= alloc_skb(len
+ hlen
+ sizeof(struct frag_hdr
) +
810 hroom
+ troom
, GFP_ATOMIC
);
817 * Set up data on packet
820 ip6_copy_metadata(frag
, skb
);
821 skb_reserve(frag
, hroom
);
822 skb_put(frag
, len
+ hlen
+ sizeof(struct frag_hdr
));
823 skb_reset_network_header(frag
);
824 fh
= (struct frag_hdr
*)(skb_network_header(frag
) + hlen
);
825 frag
->transport_header
= (frag
->network_header
+ hlen
+
826 sizeof(struct frag_hdr
));
829 * Charge the memory for the fragment to any owner
833 skb_set_owner_w(frag
, skb
->sk
);
836 * Copy the packet header into the new buffer.
838 skb_copy_from_linear_data(skb
, skb_network_header(frag
), hlen
);
840 fragnexthdr_offset
= skb_network_header(frag
);
841 fragnexthdr_offset
+= prevhdr
- skb_network_header(skb
);
842 *fragnexthdr_offset
= NEXTHDR_FRAGMENT
;
845 * Build fragment header.
847 fh
->nexthdr
= nexthdr
;
849 fh
->identification
= frag_id
;
852 * Copy a block of the IP datagram.
854 BUG_ON(skb_copy_bits(skb
, ptr
, skb_transport_header(frag
),
858 fh
->frag_off
= htons(offset
);
860 fh
->frag_off
|= htons(IP6_MF
);
861 ipv6_hdr(frag
)->payload_len
= htons(frag
->len
-
862 sizeof(struct ipv6hdr
));
868 * Put this fragment into the sending queue.
870 err
= output(net
, sk
, frag
);
874 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
875 IPSTATS_MIB_FRAGCREATES
);
877 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
878 IPSTATS_MIB_FRAGOKS
);
883 if (skb
->sk
&& dst_allfrag(skb_dst(skb
)))
884 sk_nocaps_add(skb
->sk
, NETIF_F_GSO_MASK
);
886 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
890 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
891 IPSTATS_MIB_FRAGFAILS
);
896 static inline int ip6_rt_check(const struct rt6key
*rt_key
,
897 const struct in6_addr
*fl_addr
,
898 const struct in6_addr
*addr_cache
)
900 return (rt_key
->plen
!= 128 || !ipv6_addr_equal(fl_addr
, &rt_key
->addr
)) &&
901 (!addr_cache
|| !ipv6_addr_equal(fl_addr
, addr_cache
));
904 static struct dst_entry
*ip6_sk_dst_check(struct sock
*sk
,
905 struct dst_entry
*dst
,
906 const struct flowi6
*fl6
)
908 struct ipv6_pinfo
*np
= inet6_sk(sk
);
914 if (dst
->ops
->family
!= AF_INET6
) {
919 rt
= (struct rt6_info
*)dst
;
920 /* Yes, checking route validity in not connected
921 * case is not very simple. Take into account,
922 * that we do not support routing by source, TOS,
923 * and MSG_DONTROUTE --ANK (980726)
925 * 1. ip6_rt_check(): If route was host route,
926 * check that cached destination is current.
927 * If it is network route, we still may
928 * check its validity using saved pointer
929 * to the last used address: daddr_cache.
930 * We do not want to save whole address now,
931 * (because main consumer of this service
932 * is tcp, which has not this problem),
933 * so that the last trick works only on connected
935 * 2. oif also should be the same.
937 if (ip6_rt_check(&rt
->rt6i_dst
, &fl6
->daddr
, np
->daddr_cache
) ||
938 #ifdef CONFIG_IPV6_SUBTREES
939 ip6_rt_check(&rt
->rt6i_src
, &fl6
->saddr
, np
->saddr_cache
) ||
941 (!(fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
) &&
942 (fl6
->flowi6_oif
&& fl6
->flowi6_oif
!= dst
->dev
->ifindex
))) {
951 static int ip6_dst_lookup_tail(struct net
*net
, const struct sock
*sk
,
952 struct dst_entry
**dst
, struct flowi6
*fl6
)
954 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
961 /* The correct way to handle this would be to do
962 * ip6_route_get_saddr, and then ip6_route_output; however,
963 * the route-specific preferred source forces the
964 * ip6_route_output call _before_ ip6_route_get_saddr.
966 * In source specific routing (no src=any default route),
967 * ip6_route_output will fail given src=any saddr, though, so
968 * that's why we try it again later.
970 if (ipv6_addr_any(&fl6
->saddr
) && (!*dst
|| !(*dst
)->error
)) {
972 bool had_dst
= *dst
!= NULL
;
975 *dst
= ip6_route_output(net
, sk
, fl6
);
976 rt
= (*dst
)->error
? NULL
: (struct rt6_info
*)*dst
;
977 err
= ip6_route_get_saddr(net
, rt
, &fl6
->daddr
,
978 sk
? inet6_sk(sk
)->srcprefs
: 0,
981 goto out_err_release
;
983 /* If we had an erroneous initial result, pretend it
984 * never existed and let the SA-enabled version take
987 if (!had_dst
&& (*dst
)->error
) {
993 flags
|= RT6_LOOKUP_F_IFACE
;
997 *dst
= ip6_route_output_flags(net
, sk
, fl6
, flags
);
1001 goto out_err_release
;
1003 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1005 * Here if the dst entry we've looked up
1006 * has a neighbour entry that is in the INCOMPLETE
1007 * state and the src address from the flow is
1008 * marked as OPTIMISTIC, we release the found
1009 * dst entry and replace it instead with the
1010 * dst entry of the nexthop router
1012 rt
= (struct rt6_info
*) *dst
;
1014 n
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
,
1015 rt6_nexthop(rt
, &fl6
->daddr
));
1016 err
= n
&& !(n
->nud_state
& NUD_VALID
) ? -EINVAL
: 0;
1017 rcu_read_unlock_bh();
1020 struct inet6_ifaddr
*ifp
;
1021 struct flowi6 fl_gw6
;
1024 ifp
= ipv6_get_ifaddr(net
, &fl6
->saddr
,
1027 redirect
= (ifp
&& ifp
->flags
& IFA_F_OPTIMISTIC
);
1033 * We need to get the dst entry for the
1034 * default router instead
1037 memcpy(&fl_gw6
, fl6
, sizeof(struct flowi6
));
1038 memset(&fl_gw6
.daddr
, 0, sizeof(struct in6_addr
));
1039 *dst
= ip6_route_output(net
, sk
, &fl_gw6
);
1040 err
= (*dst
)->error
;
1042 goto out_err_release
;
1046 if (ipv6_addr_v4mapped(&fl6
->saddr
) &&
1047 !(ipv6_addr_v4mapped(&fl6
->daddr
) || ipv6_addr_any(&fl6
->daddr
))) {
1048 err
= -EAFNOSUPPORT
;
1049 goto out_err_release
;
1058 if (err
== -ENETUNREACH
)
1059 IP6_INC_STATS(net
, NULL
, IPSTATS_MIB_OUTNOROUTES
);
1064 * ip6_dst_lookup - perform route lookup on flow
1065 * @sk: socket which provides route info
1066 * @dst: pointer to dst_entry * for result
1067 * @fl6: flow to lookup
1069 * This function performs a route lookup on the given flow.
1071 * It returns zero on success, or a standard errno code on error.
1073 int ip6_dst_lookup(struct net
*net
, struct sock
*sk
, struct dst_entry
**dst
,
1077 return ip6_dst_lookup_tail(net
, sk
, dst
, fl6
);
1079 EXPORT_SYMBOL_GPL(ip6_dst_lookup
);
1082 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1083 * @sk: socket which provides route info
1084 * @fl6: flow to lookup
1085 * @final_dst: final destination address for ipsec lookup
1087 * This function performs a route lookup on the given flow.
1089 * It returns a valid dst pointer on success, or a pointer encoded
1092 struct dst_entry
*ip6_dst_lookup_flow(const struct sock
*sk
, struct flowi6
*fl6
,
1093 const struct in6_addr
*final_dst
)
1095 struct dst_entry
*dst
= NULL
;
1098 err
= ip6_dst_lookup_tail(sock_net(sk
), sk
, &dst
, fl6
);
1100 return ERR_PTR(err
);
1102 fl6
->daddr
= *final_dst
;
1104 return xfrm_lookup_route(sock_net(sk
), dst
, flowi6_to_flowi(fl6
), sk
, 0);
1106 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow
);
1109 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1110 * @sk: socket which provides the dst cache and route info
1111 * @fl6: flow to lookup
1112 * @final_dst: final destination address for ipsec lookup
1114 * This function performs a route lookup on the given flow with the
1115 * possibility of using the cached route in the socket if it is valid.
1116 * It will take the socket dst lock when operating on the dst cache.
1117 * As a result, this function can only be used in process context.
1119 * It returns a valid dst pointer on success, or a pointer encoded
1122 struct dst_entry
*ip6_sk_dst_lookup_flow(struct sock
*sk
, struct flowi6
*fl6
,
1123 const struct in6_addr
*final_dst
)
1125 struct dst_entry
*dst
= sk_dst_check(sk
, inet6_sk(sk
)->dst_cookie
);
1127 dst
= ip6_sk_dst_check(sk
, dst
, fl6
);
1129 dst
= ip6_dst_lookup_flow(sk
, fl6
, final_dst
);
1133 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow
);
1135 static inline struct ipv6_opt_hdr
*ip6_opt_dup(struct ipv6_opt_hdr
*src
,
1138 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1141 static inline struct ipv6_rt_hdr
*ip6_rthdr_dup(struct ipv6_rt_hdr
*src
,
1144 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1147 static void ip6_append_data_mtu(unsigned int *mtu
,
1149 unsigned int fragheaderlen
,
1150 struct sk_buff
*skb
,
1151 struct rt6_info
*rt
,
1152 unsigned int orig_mtu
)
1154 if (!(rt
->dst
.flags
& DST_XFRM_TUNNEL
)) {
1156 /* first fragment, reserve header_len */
1157 *mtu
= orig_mtu
- rt
->dst
.header_len
;
1161 * this fragment is not first, the headers
1162 * space is regarded as data space.
1166 *maxfraglen
= ((*mtu
- fragheaderlen
) & ~7)
1167 + fragheaderlen
- sizeof(struct frag_hdr
);
1171 static int ip6_setup_cork(struct sock
*sk
, struct inet_cork_full
*cork
,
1172 struct inet6_cork
*v6_cork
, struct ipcm6_cookie
*ipc6
,
1173 struct rt6_info
*rt
, struct flowi6
*fl6
)
1175 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1177 struct ipv6_txoptions
*opt
= ipc6
->opt
;
1183 if (WARN_ON(v6_cork
->opt
))
1186 v6_cork
->opt
= kzalloc(sizeof(*opt
), sk
->sk_allocation
);
1187 if (unlikely(!v6_cork
->opt
))
1190 v6_cork
->opt
->tot_len
= sizeof(*opt
);
1191 v6_cork
->opt
->opt_flen
= opt
->opt_flen
;
1192 v6_cork
->opt
->opt_nflen
= opt
->opt_nflen
;
1194 v6_cork
->opt
->dst0opt
= ip6_opt_dup(opt
->dst0opt
,
1196 if (opt
->dst0opt
&& !v6_cork
->opt
->dst0opt
)
1199 v6_cork
->opt
->dst1opt
= ip6_opt_dup(opt
->dst1opt
,
1201 if (opt
->dst1opt
&& !v6_cork
->opt
->dst1opt
)
1204 v6_cork
->opt
->hopopt
= ip6_opt_dup(opt
->hopopt
,
1206 if (opt
->hopopt
&& !v6_cork
->opt
->hopopt
)
1209 v6_cork
->opt
->srcrt
= ip6_rthdr_dup(opt
->srcrt
,
1211 if (opt
->srcrt
&& !v6_cork
->opt
->srcrt
)
1214 /* need source address above miyazawa*/
1217 cork
->base
.dst
= &rt
->dst
;
1218 cork
->fl
.u
.ip6
= *fl6
;
1219 v6_cork
->hop_limit
= ipc6
->hlimit
;
1220 v6_cork
->tclass
= ipc6
->tclass
;
1221 if (rt
->dst
.flags
& DST_XFRM_TUNNEL
)
1222 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1223 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(&rt
->dst
);
1225 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1226 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(rt
->dst
.path
);
1227 if (np
->frag_size
< mtu
) {
1229 mtu
= np
->frag_size
;
1231 if (mtu
< IPV6_MIN_MTU
)
1233 cork
->base
.fragsize
= mtu
;
1234 if (dst_allfrag(rt
->dst
.path
))
1235 cork
->base
.flags
|= IPCORK_ALLFRAG
;
1236 cork
->base
.length
= 0;
1241 static int __ip6_append_data(struct sock
*sk
,
1243 struct sk_buff_head
*queue
,
1244 struct inet_cork
*cork
,
1245 struct inet6_cork
*v6_cork
,
1246 struct page_frag
*pfrag
,
1247 int getfrag(void *from
, char *to
, int offset
,
1248 int len
, int odd
, struct sk_buff
*skb
),
1249 void *from
, int length
, int transhdrlen
,
1250 unsigned int flags
, struct ipcm6_cookie
*ipc6
,
1251 const struct sockcm_cookie
*sockc
)
1253 struct sk_buff
*skb
, *skb_prev
= NULL
;
1254 unsigned int maxfraglen
, fragheaderlen
, mtu
, orig_mtu
, pmtu
;
1256 int dst_exthdrlen
= 0;
1263 struct rt6_info
*rt
= (struct rt6_info
*)cork
->dst
;
1264 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1265 int csummode
= CHECKSUM_NONE
;
1266 unsigned int maxnonfragsize
, headersize
;
1268 skb
= skb_peek_tail(queue
);
1270 exthdrlen
= opt
? opt
->opt_flen
: 0;
1271 dst_exthdrlen
= rt
->dst
.header_len
- rt
->rt6i_nfheader_len
;
1274 mtu
= cork
->fragsize
;
1277 hh_len
= LL_RESERVED_SPACE(rt
->dst
.dev
);
1279 fragheaderlen
= sizeof(struct ipv6hdr
) + rt
->rt6i_nfheader_len
+
1280 (opt
? opt
->opt_nflen
: 0);
1281 maxfraglen
= ((mtu
- fragheaderlen
) & ~7) + fragheaderlen
-
1282 sizeof(struct frag_hdr
);
1284 headersize
= sizeof(struct ipv6hdr
) +
1285 (opt
? opt
->opt_flen
+ opt
->opt_nflen
: 0) +
1286 (dst_allfrag(&rt
->dst
) ?
1287 sizeof(struct frag_hdr
) : 0) +
1288 rt
->rt6i_nfheader_len
;
1290 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1291 * the first fragment
1293 if (headersize
+ transhdrlen
> mtu
)
1296 if (cork
->length
+ length
> mtu
- headersize
&& ipc6
->dontfrag
&&
1297 (sk
->sk_protocol
== IPPROTO_UDP
||
1298 sk
->sk_protocol
== IPPROTO_RAW
)) {
1299 ipv6_local_rxpmtu(sk
, fl6
, mtu
- headersize
+
1300 sizeof(struct ipv6hdr
));
1304 if (ip6_sk_ignore_df(sk
))
1305 maxnonfragsize
= sizeof(struct ipv6hdr
) + IPV6_MAXPLEN
;
1307 maxnonfragsize
= mtu
;
1309 if (cork
->length
+ length
> maxnonfragsize
- headersize
) {
1311 pmtu
= max_t(int, mtu
- headersize
+ sizeof(struct ipv6hdr
), 0);
1312 ipv6_local_error(sk
, EMSGSIZE
, fl6
, pmtu
);
1316 /* CHECKSUM_PARTIAL only with no extension headers and when
1317 * we are not going to fragment
1319 if (transhdrlen
&& sk
->sk_protocol
== IPPROTO_UDP
&&
1320 headersize
== sizeof(struct ipv6hdr
) &&
1321 length
<= mtu
- headersize
&&
1322 !(flags
& MSG_MORE
) &&
1323 rt
->dst
.dev
->features
& (NETIF_F_IPV6_CSUM
| NETIF_F_HW_CSUM
))
1324 csummode
= CHECKSUM_PARTIAL
;
1326 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_RAW
) {
1327 sock_tx_timestamp(sk
, sockc
->tsflags
, &tx_flags
);
1328 if (tx_flags
& SKBTX_ANY_SW_TSTAMP
&&
1329 sk
->sk_tsflags
& SOF_TIMESTAMPING_OPT_ID
)
1330 tskey
= sk
->sk_tskey
++;
1334 * Let's try using as much space as possible.
1335 * Use MTU if total length of the message fits into the MTU.
1336 * Otherwise, we need to reserve fragment header and
1337 * fragment alignment (= 8-15 octects, in total).
1339 * Note that we may need to "move" the data from the tail of
1340 * of the buffer to the new fragment when we split
1343 * FIXME: It may be fragmented into multiple chunks
1344 * at once if non-fragmentable extension headers
1349 cork
->length
+= length
;
1353 while (length
> 0) {
1354 /* Check if the remaining data fits into current packet. */
1355 copy
= (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - skb
->len
;
1357 copy
= maxfraglen
- skb
->len
;
1361 unsigned int datalen
;
1362 unsigned int fraglen
;
1363 unsigned int fraggap
;
1364 unsigned int alloclen
;
1366 /* There's no room in the current skb */
1368 fraggap
= skb
->len
- maxfraglen
;
1371 /* update mtu and maxfraglen if necessary */
1372 if (!skb
|| !skb_prev
)
1373 ip6_append_data_mtu(&mtu
, &maxfraglen
,
1374 fragheaderlen
, skb
, rt
,
1380 * If remaining data exceeds the mtu,
1381 * we know we need more fragment(s).
1383 datalen
= length
+ fraggap
;
1385 if (datalen
> (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - fragheaderlen
)
1386 datalen
= maxfraglen
- fragheaderlen
- rt
->dst
.trailer_len
;
1387 if ((flags
& MSG_MORE
) &&
1388 !(rt
->dst
.dev
->features
&NETIF_F_SG
))
1391 alloclen
= datalen
+ fragheaderlen
;
1393 alloclen
+= dst_exthdrlen
;
1395 if (datalen
!= length
+ fraggap
) {
1397 * this is not the last fragment, the trailer
1398 * space is regarded as data space.
1400 datalen
+= rt
->dst
.trailer_len
;
1403 alloclen
+= rt
->dst
.trailer_len
;
1404 fraglen
= datalen
+ fragheaderlen
;
1407 * We just reserve space for fragment header.
1408 * Note: this may be overallocation if the message
1409 * (without MSG_MORE) fits into the MTU.
1411 alloclen
+= sizeof(struct frag_hdr
);
1413 copy
= datalen
- transhdrlen
- fraggap
;
1419 skb
= sock_alloc_send_skb(sk
,
1421 (flags
& MSG_DONTWAIT
), &err
);
1424 if (refcount_read(&sk
->sk_wmem_alloc
) <=
1426 skb
= sock_wmalloc(sk
,
1427 alloclen
+ hh_len
, 1,
1435 * Fill in the control structures
1437 skb
->protocol
= htons(ETH_P_IPV6
);
1438 skb
->ip_summed
= csummode
;
1440 /* reserve for fragmentation and ipsec header */
1441 skb_reserve(skb
, hh_len
+ sizeof(struct frag_hdr
) +
1444 /* Only the initial fragment is time stamped */
1445 skb_shinfo(skb
)->tx_flags
= tx_flags
;
1447 skb_shinfo(skb
)->tskey
= tskey
;
1451 * Find where to start putting bytes
1453 data
= skb_put(skb
, fraglen
);
1454 skb_set_network_header(skb
, exthdrlen
);
1455 data
+= fragheaderlen
;
1456 skb
->transport_header
= (skb
->network_header
+
1459 skb
->csum
= skb_copy_and_csum_bits(
1460 skb_prev
, maxfraglen
,
1461 data
+ transhdrlen
, fraggap
, 0);
1462 skb_prev
->csum
= csum_sub(skb_prev
->csum
,
1465 pskb_trim_unique(skb_prev
, maxfraglen
);
1468 getfrag(from
, data
+ transhdrlen
, offset
,
1469 copy
, fraggap
, skb
) < 0) {
1476 length
-= datalen
- fraggap
;
1481 if ((flags
& MSG_CONFIRM
) && !skb_prev
)
1482 skb_set_dst_pending_confirm(skb
, 1);
1485 * Put the packet on the pending queue
1487 __skb_queue_tail(queue
, skb
);
1494 if (!(rt
->dst
.dev
->features
&NETIF_F_SG
) &&
1495 skb_tailroom(skb
) >= copy
) {
1499 if (getfrag(from
, skb_put(skb
, copy
),
1500 offset
, copy
, off
, skb
) < 0) {
1501 __skb_trim(skb
, off
);
1506 int i
= skb_shinfo(skb
)->nr_frags
;
1509 if (!sk_page_frag_refill(sk
, pfrag
))
1512 if (!skb_can_coalesce(skb
, i
, pfrag
->page
,
1515 if (i
== MAX_SKB_FRAGS
)
1518 __skb_fill_page_desc(skb
, i
, pfrag
->page
,
1520 skb_shinfo(skb
)->nr_frags
= ++i
;
1521 get_page(pfrag
->page
);
1523 copy
= min_t(int, copy
, pfrag
->size
- pfrag
->offset
);
1525 page_address(pfrag
->page
) + pfrag
->offset
,
1526 offset
, copy
, skb
->len
, skb
) < 0)
1529 pfrag
->offset
+= copy
;
1530 skb_frag_size_add(&skb_shinfo(skb
)->frags
[i
- 1], copy
);
1532 skb
->data_len
+= copy
;
1533 skb
->truesize
+= copy
;
1534 refcount_add(copy
, &sk
->sk_wmem_alloc
);
1545 cork
->length
-= length
;
1546 IP6_INC_STATS(sock_net(sk
), rt
->rt6i_idev
, IPSTATS_MIB_OUTDISCARDS
);
1550 int ip6_append_data(struct sock
*sk
,
1551 int getfrag(void *from
, char *to
, int offset
, int len
,
1552 int odd
, struct sk_buff
*skb
),
1553 void *from
, int length
, int transhdrlen
,
1554 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1555 struct rt6_info
*rt
, unsigned int flags
,
1556 const struct sockcm_cookie
*sockc
)
1558 struct inet_sock
*inet
= inet_sk(sk
);
1559 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1563 if (flags
&MSG_PROBE
)
1565 if (skb_queue_empty(&sk
->sk_write_queue
)) {
1569 err
= ip6_setup_cork(sk
, &inet
->cork
, &np
->cork
,
1574 exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1575 length
+= exthdrlen
;
1576 transhdrlen
+= exthdrlen
;
1578 fl6
= &inet
->cork
.fl
.u
.ip6
;
1582 return __ip6_append_data(sk
, fl6
, &sk
->sk_write_queue
, &inet
->cork
.base
,
1583 &np
->cork
, sk_page_frag(sk
), getfrag
,
1584 from
, length
, transhdrlen
, flags
, ipc6
, sockc
);
1586 EXPORT_SYMBOL_GPL(ip6_append_data
);
1588 static void ip6_cork_release(struct inet_cork_full
*cork
,
1589 struct inet6_cork
*v6_cork
)
1592 kfree(v6_cork
->opt
->dst0opt
);
1593 kfree(v6_cork
->opt
->dst1opt
);
1594 kfree(v6_cork
->opt
->hopopt
);
1595 kfree(v6_cork
->opt
->srcrt
);
1596 kfree(v6_cork
->opt
);
1597 v6_cork
->opt
= NULL
;
1600 if (cork
->base
.dst
) {
1601 dst_release(cork
->base
.dst
);
1602 cork
->base
.dst
= NULL
;
1603 cork
->base
.flags
&= ~IPCORK_ALLFRAG
;
1605 memset(&cork
->fl
, 0, sizeof(cork
->fl
));
1608 struct sk_buff
*__ip6_make_skb(struct sock
*sk
,
1609 struct sk_buff_head
*queue
,
1610 struct inet_cork_full
*cork
,
1611 struct inet6_cork
*v6_cork
)
1613 struct sk_buff
*skb
, *tmp_skb
;
1614 struct sk_buff
**tail_skb
;
1615 struct in6_addr final_dst_buf
, *final_dst
= &final_dst_buf
;
1616 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1617 struct net
*net
= sock_net(sk
);
1618 struct ipv6hdr
*hdr
;
1619 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1620 struct rt6_info
*rt
= (struct rt6_info
*)cork
->base
.dst
;
1621 struct flowi6
*fl6
= &cork
->fl
.u
.ip6
;
1622 unsigned char proto
= fl6
->flowi6_proto
;
1624 skb
= __skb_dequeue(queue
);
1627 tail_skb
= &(skb_shinfo(skb
)->frag_list
);
1629 /* move skb->data to ip header from ext header */
1630 if (skb
->data
< skb_network_header(skb
))
1631 __skb_pull(skb
, skb_network_offset(skb
));
1632 while ((tmp_skb
= __skb_dequeue(queue
)) != NULL
) {
1633 __skb_pull(tmp_skb
, skb_network_header_len(skb
));
1634 *tail_skb
= tmp_skb
;
1635 tail_skb
= &(tmp_skb
->next
);
1636 skb
->len
+= tmp_skb
->len
;
1637 skb
->data_len
+= tmp_skb
->len
;
1638 skb
->truesize
+= tmp_skb
->truesize
;
1639 tmp_skb
->destructor
= NULL
;
1643 /* Allow local fragmentation. */
1644 skb
->ignore_df
= ip6_sk_ignore_df(sk
);
1646 *final_dst
= fl6
->daddr
;
1647 __skb_pull(skb
, skb_network_header_len(skb
));
1648 if (opt
&& opt
->opt_flen
)
1649 ipv6_push_frag_opts(skb
, opt
, &proto
);
1650 if (opt
&& opt
->opt_nflen
)
1651 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &final_dst
, &fl6
->saddr
);
1653 skb_push(skb
, sizeof(struct ipv6hdr
));
1654 skb_reset_network_header(skb
);
1655 hdr
= ipv6_hdr(skb
);
1657 ip6_flow_hdr(hdr
, v6_cork
->tclass
,
1658 ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
1659 ip6_autoflowlabel(net
, np
), fl6
));
1660 hdr
->hop_limit
= v6_cork
->hop_limit
;
1661 hdr
->nexthdr
= proto
;
1662 hdr
->saddr
= fl6
->saddr
;
1663 hdr
->daddr
= *final_dst
;
1665 skb
->priority
= sk
->sk_priority
;
1666 skb
->mark
= sk
->sk_mark
;
1668 skb_dst_set(skb
, dst_clone(&rt
->dst
));
1669 IP6_UPD_PO_STATS(net
, rt
->rt6i_idev
, IPSTATS_MIB_OUT
, skb
->len
);
1670 if (proto
== IPPROTO_ICMPV6
) {
1671 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
1673 ICMP6MSGOUT_INC_STATS(net
, idev
, icmp6_hdr(skb
)->icmp6_type
);
1674 ICMP6_INC_STATS(net
, idev
, ICMP6_MIB_OUTMSGS
);
1677 ip6_cork_release(cork
, v6_cork
);
1682 int ip6_send_skb(struct sk_buff
*skb
)
1684 struct net
*net
= sock_net(skb
->sk
);
1685 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
1688 err
= ip6_local_out(net
, skb
->sk
, skb
);
1691 err
= net_xmit_errno(err
);
1693 IP6_INC_STATS(net
, rt
->rt6i_idev
,
1694 IPSTATS_MIB_OUTDISCARDS
);
1700 int ip6_push_pending_frames(struct sock
*sk
)
1702 struct sk_buff
*skb
;
1704 skb
= ip6_finish_skb(sk
);
1708 return ip6_send_skb(skb
);
1710 EXPORT_SYMBOL_GPL(ip6_push_pending_frames
);
1712 static void __ip6_flush_pending_frames(struct sock
*sk
,
1713 struct sk_buff_head
*queue
,
1714 struct inet_cork_full
*cork
,
1715 struct inet6_cork
*v6_cork
)
1717 struct sk_buff
*skb
;
1719 while ((skb
= __skb_dequeue_tail(queue
)) != NULL
) {
1721 IP6_INC_STATS(sock_net(sk
), ip6_dst_idev(skb_dst(skb
)),
1722 IPSTATS_MIB_OUTDISCARDS
);
1726 ip6_cork_release(cork
, v6_cork
);
1729 void ip6_flush_pending_frames(struct sock
*sk
)
1731 __ip6_flush_pending_frames(sk
, &sk
->sk_write_queue
,
1732 &inet_sk(sk
)->cork
, &inet6_sk(sk
)->cork
);
1734 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames
);
1736 struct sk_buff
*ip6_make_skb(struct sock
*sk
,
1737 int getfrag(void *from
, char *to
, int offset
,
1738 int len
, int odd
, struct sk_buff
*skb
),
1739 void *from
, int length
, int transhdrlen
,
1740 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1741 struct rt6_info
*rt
, unsigned int flags
,
1742 const struct sockcm_cookie
*sockc
)
1744 struct inet_cork_full cork
;
1745 struct inet6_cork v6_cork
;
1746 struct sk_buff_head queue
;
1747 int exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1750 if (flags
& MSG_PROBE
)
1753 __skb_queue_head_init(&queue
);
1755 cork
.base
.flags
= 0;
1757 cork
.base
.opt
= NULL
;
1758 cork
.base
.dst
= NULL
;
1760 err
= ip6_setup_cork(sk
, &cork
, &v6_cork
, ipc6
, rt
, fl6
);
1762 ip6_cork_release(&cork
, &v6_cork
);
1763 return ERR_PTR(err
);
1765 if (ipc6
->dontfrag
< 0)
1766 ipc6
->dontfrag
= inet6_sk(sk
)->dontfrag
;
1768 err
= __ip6_append_data(sk
, fl6
, &queue
, &cork
.base
, &v6_cork
,
1769 ¤t
->task_frag
, getfrag
, from
,
1770 length
+ exthdrlen
, transhdrlen
+ exthdrlen
,
1771 flags
, ipc6
, sockc
);
1773 __ip6_flush_pending_frames(sk
, &queue
, &cork
, &v6_cork
);
1774 return ERR_PTR(err
);
1777 return __ip6_make_skb(sk
, &queue
, &cork
, &v6_cork
);