2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/bpf-cgroup.h>
43 #include <linux/netfilter.h>
44 #include <linux/netfilter_ipv6.h>
50 #include <net/ndisc.h>
51 #include <net/protocol.h>
52 #include <net/ip6_route.h>
53 #include <net/addrconf.h>
54 #include <net/rawv6.h>
57 #include <net/checksum.h>
58 #include <linux/mroute6.h>
59 #include <net/l3mdev.h>
60 #include <net/lwtunnel.h>
62 static int ip6_finish_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
64 struct dst_entry
*dst
= skb_dst(skb
);
65 struct net_device
*dev
= dst
->dev
;
66 struct neighbour
*neigh
;
67 struct in6_addr
*nexthop
;
70 if (ipv6_addr_is_multicast(&ipv6_hdr(skb
)->daddr
)) {
71 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
73 if (!(dev
->flags
& IFF_LOOPBACK
) && sk_mc_loop(sk
) &&
74 ((mroute6_is_socket(net
, skb
) &&
75 !(IP6CB(skb
)->flags
& IP6SKB_FORWARDED
)) ||
76 ipv6_chk_mcast_addr(dev
, &ipv6_hdr(skb
)->daddr
,
77 &ipv6_hdr(skb
)->saddr
))) {
78 struct sk_buff
*newskb
= skb_clone(skb
, GFP_ATOMIC
);
80 /* Do not check for IFF_ALLMULTI; multicast routing
81 is not supported in any case.
84 NF_HOOK(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
85 net
, sk
, newskb
, NULL
, newskb
->dev
,
88 if (ipv6_hdr(skb
)->hop_limit
== 0) {
89 IP6_INC_STATS(net
, idev
,
90 IPSTATS_MIB_OUTDISCARDS
);
96 IP6_UPD_PO_STATS(net
, idev
, IPSTATS_MIB_OUTMCAST
, skb
->len
);
98 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb
)->daddr
) <=
99 IPV6_ADDR_SCOPE_NODELOCAL
&&
100 !(dev
->flags
& IFF_LOOPBACK
)) {
106 if (lwtunnel_xmit_redirect(dst
->lwtstate
)) {
107 int res
= lwtunnel_xmit(skb
);
109 if (res
< 0 || res
== LWTUNNEL_XMIT_DONE
)
114 nexthop
= rt6_nexthop((struct rt6_info
*)dst
, &ipv6_hdr(skb
)->daddr
);
115 neigh
= __ipv6_neigh_lookup_noref(dst
->dev
, nexthop
);
116 if (unlikely(!neigh
))
117 neigh
= __neigh_create(&nd_tbl
, nexthop
, dst
->dev
, false);
118 if (!IS_ERR(neigh
)) {
119 sock_confirm_neigh(skb
, neigh
);
120 ret
= neigh_output(neigh
, skb
);
121 rcu_read_unlock_bh();
124 rcu_read_unlock_bh();
126 IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTNOROUTES
);
131 static int ip6_finish_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
135 ret
= BPF_CGROUP_RUN_PROG_INET_EGRESS(sk
, skb
);
141 #if defined(CONFIG_NETFILTER) && defined(CONFIG_XFRM)
142 /* Policy lookup after SNAT yielded a new policy */
143 if (skb_dst(skb
)->xfrm
) {
144 IPCB(skb
)->flags
|= IPSKB_REROUTED
;
145 return dst_output(net
, sk
, skb
);
149 if ((skb
->len
> ip6_skb_dst_mtu(skb
) && !skb_is_gso(skb
)) ||
150 dst_allfrag(skb_dst(skb
)) ||
151 (IP6CB(skb
)->frag_max_size
&& skb
->len
> IP6CB(skb
)->frag_max_size
))
152 return ip6_fragment(net
, sk
, skb
, ip6_finish_output2
);
154 return ip6_finish_output2(net
, sk
, skb
);
157 int ip6_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
159 struct net_device
*dev
= skb_dst(skb
)->dev
;
160 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
162 skb
->protocol
= htons(ETH_P_IPV6
);
165 if (unlikely(idev
->cnf
.disable_ipv6
)) {
166 IP6_INC_STATS(net
, idev
, IPSTATS_MIB_OUTDISCARDS
);
171 return NF_HOOK_COND(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
172 net
, sk
, skb
, NULL
, dev
,
174 !(IP6CB(skb
)->flags
& IP6SKB_REROUTED
));
177 bool ip6_autoflowlabel(struct net
*net
, const struct ipv6_pinfo
*np
)
179 if (!np
->autoflowlabel_set
)
180 return ip6_default_np_autolabel(net
);
182 return np
->autoflowlabel
;
186 * xmit an sk_buff (used by TCP, SCTP and DCCP)
187 * Note : socket lock is not held for SYNACK packets, but might be modified
188 * by calls to skb_set_owner_w() and ipv6_local_error(),
189 * which are using proper atomic operations or spinlocks.
191 int ip6_xmit(const struct sock
*sk
, struct sk_buff
*skb
, struct flowi6
*fl6
,
192 __u32 mark
, struct ipv6_txoptions
*opt
, int tclass
)
194 struct net
*net
= sock_net(sk
);
195 const struct ipv6_pinfo
*np
= inet6_sk(sk
);
196 struct in6_addr
*first_hop
= &fl6
->daddr
;
197 struct dst_entry
*dst
= skb_dst(skb
);
198 unsigned int head_room
;
200 u8 proto
= fl6
->flowi6_proto
;
201 int seg_len
= skb
->len
;
205 head_room
= sizeof(struct ipv6hdr
) + LL_RESERVED_SPACE(dst
->dev
);
207 head_room
+= opt
->opt_nflen
+ opt
->opt_flen
;
209 if (unlikely(skb_headroom(skb
) < head_room
)) {
210 struct sk_buff
*skb2
= skb_realloc_headroom(skb
, head_room
);
212 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
213 IPSTATS_MIB_OUTDISCARDS
);
218 skb_set_owner_w(skb2
, skb
->sk
);
224 seg_len
+= opt
->opt_nflen
+ opt
->opt_flen
;
227 ipv6_push_frag_opts(skb
, opt
, &proto
);
230 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &first_hop
,
234 skb_push(skb
, sizeof(struct ipv6hdr
));
235 skb_reset_network_header(skb
);
239 * Fill in the IPv6 header
242 hlimit
= np
->hop_limit
;
244 hlimit
= ip6_dst_hoplimit(dst
);
246 ip6_flow_hdr(hdr
, tclass
, ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
247 ip6_autoflowlabel(net
, np
), fl6
));
249 hdr
->payload_len
= htons(seg_len
);
250 hdr
->nexthdr
= proto
;
251 hdr
->hop_limit
= hlimit
;
253 hdr
->saddr
= fl6
->saddr
;
254 hdr
->daddr
= *first_hop
;
256 skb
->protocol
= htons(ETH_P_IPV6
);
257 skb
->priority
= sk
->sk_priority
;
261 if ((skb
->len
<= mtu
) || skb
->ignore_df
|| skb_is_gso(skb
)) {
262 IP6_UPD_PO_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
263 IPSTATS_MIB_OUT
, skb
->len
);
265 /* if egress device is enslaved to an L3 master device pass the
266 * skb to its handler for processing
268 skb
= l3mdev_ip6_out((struct sock
*)sk
, skb
);
272 /* hooks should never assume socket lock is held.
273 * we promote our socket to non const
275 return NF_HOOK(NFPROTO_IPV6
, NF_INET_LOCAL_OUT
,
276 net
, (struct sock
*)sk
, skb
, NULL
, dst
->dev
,
281 /* ipv6_local_error() does not require socket lock,
282 * we promote our socket to non const
284 ipv6_local_error((struct sock
*)sk
, EMSGSIZE
, fl6
, mtu
);
286 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)), IPSTATS_MIB_FRAGFAILS
);
290 EXPORT_SYMBOL(ip6_xmit
);
292 static int ip6_call_ra_chain(struct sk_buff
*skb
, int sel
)
294 struct ip6_ra_chain
*ra
;
295 struct sock
*last
= NULL
;
297 read_lock(&ip6_ra_lock
);
298 for (ra
= ip6_ra_chain
; ra
; ra
= ra
->next
) {
299 struct sock
*sk
= ra
->sk
;
300 if (sk
&& ra
->sel
== sel
&&
301 (!sk
->sk_bound_dev_if
||
302 sk
->sk_bound_dev_if
== skb
->dev
->ifindex
)) {
303 struct ipv6_pinfo
*np
= inet6_sk(sk
);
305 if (np
&& np
->rtalert_isolate
&&
306 !net_eq(sock_net(sk
), dev_net(skb
->dev
))) {
310 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
312 rawv6_rcv(last
, skb2
);
319 rawv6_rcv(last
, skb
);
320 read_unlock(&ip6_ra_lock
);
323 read_unlock(&ip6_ra_lock
);
327 static int ip6_forward_proxy_check(struct sk_buff
*skb
)
329 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
330 u8 nexthdr
= hdr
->nexthdr
;
334 if (ipv6_ext_hdr(nexthdr
)) {
335 offset
= ipv6_skip_exthdr(skb
, sizeof(*hdr
), &nexthdr
, &frag_off
);
339 offset
= sizeof(struct ipv6hdr
);
341 if (nexthdr
== IPPROTO_ICMPV6
) {
342 struct icmp6hdr
*icmp6
;
344 if (!pskb_may_pull(skb
, (skb_network_header(skb
) +
345 offset
+ 1 - skb
->data
)))
348 icmp6
= (struct icmp6hdr
*)(skb_network_header(skb
) + offset
);
350 switch (icmp6
->icmp6_type
) {
351 case NDISC_ROUTER_SOLICITATION
:
352 case NDISC_ROUTER_ADVERTISEMENT
:
353 case NDISC_NEIGHBOUR_SOLICITATION
:
354 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
356 /* For reaction involving unicast neighbor discovery
357 * message destined to the proxied address, pass it to
367 * The proxying router can't forward traffic sent to a link-local
368 * address, so signal the sender and discard the packet. This
369 * behavior is clarified by the MIPv6 specification.
371 if (ipv6_addr_type(&hdr
->daddr
) & IPV6_ADDR_LINKLOCAL
) {
372 dst_link_failure(skb
);
379 static inline int ip6_forward_finish(struct net
*net
, struct sock
*sk
,
382 struct dst_entry
*dst
= skb_dst(skb
);
384 __IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
385 __IP6_ADD_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTOCTETS
, skb
->len
);
387 #ifdef CONFIG_NET_SWITCHDEV
388 if (skb
->offload_l3_fwd_mark
) {
395 return dst_output(net
, sk
, skb
);
398 static bool ip6_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
403 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
404 if (IP6CB(skb
)->frag_max_size
&& IP6CB(skb
)->frag_max_size
> mtu
)
410 if (skb_is_gso(skb
) && skb_gso_validate_network_len(skb
, mtu
))
416 int ip6_forward(struct sk_buff
*skb
)
418 struct inet6_dev
*idev
= __in6_dev_get_safely(skb
->dev
);
419 struct dst_entry
*dst
= skb_dst(skb
);
420 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
421 struct inet6_skb_parm
*opt
= IP6CB(skb
);
422 struct net
*net
= dev_net(dst
->dev
);
425 if (net
->ipv6
.devconf_all
->forwarding
== 0)
428 if (skb
->pkt_type
!= PACKET_HOST
)
431 if (unlikely(skb
->sk
))
434 if (skb_warn_if_lro(skb
))
437 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_FWD
, skb
)) {
438 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INDISCARDS
);
442 skb_forward_csum(skb
);
445 * We DO NOT make any processing on
446 * RA packets, pushing them to user level AS IS
447 * without ane WARRANTY that application will be able
448 * to interpret them. The reason is that we
449 * cannot make anything clever here.
451 * We are not end-node, so that if packet contains
452 * AH/ESP, we cannot make anything.
453 * Defragmentation also would be mistake, RA packets
454 * cannot be fragmented, because there is no warranty
455 * that different fragments will go along one path. --ANK
457 if (unlikely(opt
->flags
& IP6SKB_ROUTERALERT
)) {
458 if (ip6_call_ra_chain(skb
, ntohs(opt
->ra
)))
463 * check and decrement ttl
465 if (hdr
->hop_limit
<= 1) {
466 /* Force OUTPUT device used as source address */
468 icmpv6_send(skb
, ICMPV6_TIME_EXCEED
, ICMPV6_EXC_HOPLIMIT
, 0);
469 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INHDRERRORS
);
475 /* XXX: idev->cnf.proxy_ndp? */
476 if (net
->ipv6
.devconf_all
->proxy_ndp
&&
477 pneigh_lookup(&nd_tbl
, net
, &hdr
->daddr
, skb
->dev
, 0)) {
478 int proxied
= ip6_forward_proxy_check(skb
);
480 return ip6_input(skb
);
481 else if (proxied
< 0) {
482 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INDISCARDS
);
487 if (!xfrm6_route_forward(skb
)) {
488 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INDISCARDS
);
493 /* IPv6 specs say nothing about it, but it is clear that we cannot
494 send redirects to source routed frames.
495 We don't send redirects to frames decapsulated from IPsec.
497 if (IP6CB(skb
)->iif
== dst
->dev
->ifindex
&&
498 opt
->srcrt
== 0 && !skb_sec_path(skb
)) {
499 struct in6_addr
*target
= NULL
;
500 struct inet_peer
*peer
;
504 * incoming and outgoing devices are the same
508 rt
= (struct rt6_info
*) dst
;
509 if (rt
->rt6i_flags
& RTF_GATEWAY
)
510 target
= &rt
->rt6i_gateway
;
512 target
= &hdr
->daddr
;
514 peer
= inet_getpeer_v6(net
->ipv6
.peers
, &hdr
->daddr
, 1);
516 /* Limit redirects both by destination (here)
517 and by source (inside ndisc_send_redirect)
519 if (inet_peer_xrlim_allow(peer
, 1*HZ
))
520 ndisc_send_redirect(skb
, target
);
524 int addrtype
= ipv6_addr_type(&hdr
->saddr
);
526 /* This check is security critical. */
527 if (addrtype
== IPV6_ADDR_ANY
||
528 addrtype
& (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LOOPBACK
))
530 if (addrtype
& IPV6_ADDR_LINKLOCAL
) {
531 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
,
532 ICMPV6_NOT_NEIGHBOUR
, 0);
537 mtu
= ip6_dst_mtu_forward(dst
);
538 if (mtu
< IPV6_MIN_MTU
)
541 if (ip6_pkt_too_big(skb
, mtu
)) {
542 /* Again, force OUTPUT device used as source address */
544 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
545 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INTOOBIGERRORS
);
546 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
547 IPSTATS_MIB_FRAGFAILS
);
552 if (skb_cow(skb
, dst
->dev
->hard_header_len
)) {
553 __IP6_INC_STATS(net
, ip6_dst_idev(dst
),
554 IPSTATS_MIB_OUTDISCARDS
);
560 /* Mangling hops number delayed to point after skb COW */
564 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
,
565 net
, NULL
, skb
, skb
->dev
, dst
->dev
,
569 __IP6_INC_STATS(net
, idev
, IPSTATS_MIB_INADDRERRORS
);
575 static void ip6_copy_metadata(struct sk_buff
*to
, struct sk_buff
*from
)
577 to
->pkt_type
= from
->pkt_type
;
578 to
->priority
= from
->priority
;
579 to
->protocol
= from
->protocol
;
581 skb_dst_set(to
, dst_clone(skb_dst(from
)));
583 to
->mark
= from
->mark
;
585 skb_copy_hash(to
, from
);
587 #ifdef CONFIG_NET_SCHED
588 to
->tc_index
= from
->tc_index
;
591 skb_ext_copy(to
, from
);
592 skb_copy_secmark(to
, from
);
595 int ip6_fragment(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
,
596 int (*output
)(struct net
*, struct sock
*, struct sk_buff
*))
598 struct sk_buff
*frag
;
599 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
600 struct ipv6_pinfo
*np
= skb
->sk
&& !dev_recursion_level() ?
601 inet6_sk(skb
->sk
) : NULL
;
602 struct ipv6hdr
*tmp_hdr
;
604 unsigned int mtu
, hlen
, left
, len
, nexthdr_offset
;
607 int ptr
, offset
= 0, err
= 0;
608 u8
*prevhdr
, nexthdr
= 0;
610 err
= ip6_find_1stfragopt(skb
, &prevhdr
);
615 nexthdr_offset
= prevhdr
- skb_network_header(skb
);
617 mtu
= ip6_skb_dst_mtu(skb
);
619 /* We must not fragment if the socket is set to force MTU discovery
620 * or if the skb it not generated by a local socket.
622 if (unlikely(!skb
->ignore_df
&& skb
->len
> mtu
))
625 if (IP6CB(skb
)->frag_max_size
) {
626 if (IP6CB(skb
)->frag_max_size
> mtu
)
629 /* don't send fragments larger than what we received */
630 mtu
= IP6CB(skb
)->frag_max_size
;
631 if (mtu
< IPV6_MIN_MTU
)
635 if (np
&& np
->frag_size
< mtu
) {
639 if (mtu
< hlen
+ sizeof(struct frag_hdr
) + 8)
641 mtu
-= hlen
+ sizeof(struct frag_hdr
);
643 frag_id
= ipv6_select_ident(net
, &ipv6_hdr(skb
)->daddr
,
644 &ipv6_hdr(skb
)->saddr
);
646 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
647 (err
= skb_checksum_help(skb
)))
650 prevhdr
= skb_network_header(skb
) + nexthdr_offset
;
651 hroom
= LL_RESERVED_SPACE(rt
->dst
.dev
);
652 if (skb_has_frag_list(skb
)) {
653 unsigned int first_len
= skb_pagelen(skb
);
654 struct sk_buff
*frag2
;
656 if (first_len
- hlen
> mtu
||
657 ((first_len
- hlen
) & 7) ||
659 skb_headroom(skb
) < (hroom
+ sizeof(struct frag_hdr
)))
662 skb_walk_frags(skb
, frag
) {
663 /* Correct geometry. */
664 if (frag
->len
> mtu
||
665 ((frag
->len
& 7) && frag
->next
) ||
666 skb_headroom(frag
) < (hlen
+ hroom
+ sizeof(struct frag_hdr
)))
667 goto slow_path_clean
;
669 /* Partially cloned skb? */
670 if (skb_shared(frag
))
671 goto slow_path_clean
;
676 frag
->destructor
= sock_wfree
;
678 skb
->truesize
-= frag
->truesize
;
685 *prevhdr
= NEXTHDR_FRAGMENT
;
686 tmp_hdr
= kmemdup(skb_network_header(skb
), hlen
, GFP_ATOMIC
);
691 frag
= skb_shinfo(skb
)->frag_list
;
692 skb_frag_list_init(skb
);
694 __skb_pull(skb
, hlen
);
695 fh
= __skb_push(skb
, sizeof(struct frag_hdr
));
696 __skb_push(skb
, hlen
);
697 skb_reset_network_header(skb
);
698 memcpy(skb_network_header(skb
), tmp_hdr
, hlen
);
700 fh
->nexthdr
= nexthdr
;
702 fh
->frag_off
= htons(IP6_MF
);
703 fh
->identification
= frag_id
;
705 first_len
= skb_pagelen(skb
);
706 skb
->data_len
= first_len
- skb_headlen(skb
);
707 skb
->len
= first_len
;
708 ipv6_hdr(skb
)->payload_len
= htons(first_len
-
709 sizeof(struct ipv6hdr
));
712 /* Prepare header of the next frame,
713 * before previous one went down. */
715 frag
->ip_summed
= CHECKSUM_NONE
;
716 skb_reset_transport_header(frag
);
717 fh
= __skb_push(frag
, sizeof(struct frag_hdr
));
718 __skb_push(frag
, hlen
);
719 skb_reset_network_header(frag
);
720 memcpy(skb_network_header(frag
), tmp_hdr
,
722 offset
+= skb
->len
- hlen
- sizeof(struct frag_hdr
);
723 fh
->nexthdr
= nexthdr
;
725 fh
->frag_off
= htons(offset
);
727 fh
->frag_off
|= htons(IP6_MF
);
728 fh
->identification
= frag_id
;
729 ipv6_hdr(frag
)->payload_len
=
731 sizeof(struct ipv6hdr
));
732 ip6_copy_metadata(frag
, skb
);
735 err
= output(net
, sk
, skb
);
737 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
738 IPSTATS_MIB_FRAGCREATES
);
745 skb_mark_not_on_list(skb
);
751 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
752 IPSTATS_MIB_FRAGOKS
);
756 kfree_skb_list(frag
);
758 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
759 IPSTATS_MIB_FRAGFAILS
);
763 skb_walk_frags(skb
, frag2
) {
767 frag2
->destructor
= NULL
;
768 skb
->truesize
+= frag2
->truesize
;
773 left
= skb
->len
- hlen
; /* Space per frame */
774 ptr
= hlen
; /* Where to start from */
777 * Fragment the datagram.
780 troom
= rt
->dst
.dev
->needed_tailroom
;
783 * Keep copying data until we run out.
786 u8
*fragnexthdr_offset
;
789 /* IF: it doesn't fit, use 'mtu' - the data space left */
792 /* IF: we are not sending up to and including the packet end
793 then align the next start on an eight byte boundary */
798 /* Allocate buffer */
799 frag
= alloc_skb(len
+ hlen
+ sizeof(struct frag_hdr
) +
800 hroom
+ troom
, GFP_ATOMIC
);
807 * Set up data on packet
810 ip6_copy_metadata(frag
, skb
);
811 skb_reserve(frag
, hroom
);
812 skb_put(frag
, len
+ hlen
+ sizeof(struct frag_hdr
));
813 skb_reset_network_header(frag
);
814 fh
= (struct frag_hdr
*)(skb_network_header(frag
) + hlen
);
815 frag
->transport_header
= (frag
->network_header
+ hlen
+
816 sizeof(struct frag_hdr
));
819 * Charge the memory for the fragment to any owner
823 skb_set_owner_w(frag
, skb
->sk
);
826 * Copy the packet header into the new buffer.
828 skb_copy_from_linear_data(skb
, skb_network_header(frag
), hlen
);
830 fragnexthdr_offset
= skb_network_header(frag
);
831 fragnexthdr_offset
+= prevhdr
- skb_network_header(skb
);
832 *fragnexthdr_offset
= NEXTHDR_FRAGMENT
;
835 * Build fragment header.
837 fh
->nexthdr
= nexthdr
;
839 fh
->identification
= frag_id
;
842 * Copy a block of the IP datagram.
844 BUG_ON(skb_copy_bits(skb
, ptr
, skb_transport_header(frag
),
848 fh
->frag_off
= htons(offset
);
850 fh
->frag_off
|= htons(IP6_MF
);
851 ipv6_hdr(frag
)->payload_len
= htons(frag
->len
-
852 sizeof(struct ipv6hdr
));
858 * Put this fragment into the sending queue.
860 err
= output(net
, sk
, frag
);
864 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
865 IPSTATS_MIB_FRAGCREATES
);
867 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
868 IPSTATS_MIB_FRAGOKS
);
873 if (skb
->sk
&& dst_allfrag(skb_dst(skb
)))
874 sk_nocaps_add(skb
->sk
, NETIF_F_GSO_MASK
);
876 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
880 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
881 IPSTATS_MIB_FRAGFAILS
);
886 static inline int ip6_rt_check(const struct rt6key
*rt_key
,
887 const struct in6_addr
*fl_addr
,
888 const struct in6_addr
*addr_cache
)
890 return (rt_key
->plen
!= 128 || !ipv6_addr_equal(fl_addr
, &rt_key
->addr
)) &&
891 (!addr_cache
|| !ipv6_addr_equal(fl_addr
, addr_cache
));
894 static struct dst_entry
*ip6_sk_dst_check(struct sock
*sk
,
895 struct dst_entry
*dst
,
896 const struct flowi6
*fl6
)
898 struct ipv6_pinfo
*np
= inet6_sk(sk
);
904 if (dst
->ops
->family
!= AF_INET6
) {
909 rt
= (struct rt6_info
*)dst
;
910 /* Yes, checking route validity in not connected
911 * case is not very simple. Take into account,
912 * that we do not support routing by source, TOS,
913 * and MSG_DONTROUTE --ANK (980726)
915 * 1. ip6_rt_check(): If route was host route,
916 * check that cached destination is current.
917 * If it is network route, we still may
918 * check its validity using saved pointer
919 * to the last used address: daddr_cache.
920 * We do not want to save whole address now,
921 * (because main consumer of this service
922 * is tcp, which has not this problem),
923 * so that the last trick works only on connected
925 * 2. oif also should be the same.
927 if (ip6_rt_check(&rt
->rt6i_dst
, &fl6
->daddr
, np
->daddr_cache
) ||
928 #ifdef CONFIG_IPV6_SUBTREES
929 ip6_rt_check(&rt
->rt6i_src
, &fl6
->saddr
, np
->saddr_cache
) ||
931 (!(fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
) &&
932 (fl6
->flowi6_oif
&& fl6
->flowi6_oif
!= dst
->dev
->ifindex
))) {
941 static int ip6_dst_lookup_tail(struct net
*net
, const struct sock
*sk
,
942 struct dst_entry
**dst
, struct flowi6
*fl6
)
944 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
951 /* The correct way to handle this would be to do
952 * ip6_route_get_saddr, and then ip6_route_output; however,
953 * the route-specific preferred source forces the
954 * ip6_route_output call _before_ ip6_route_get_saddr.
956 * In source specific routing (no src=any default route),
957 * ip6_route_output will fail given src=any saddr, though, so
958 * that's why we try it again later.
960 if (ipv6_addr_any(&fl6
->saddr
) && (!*dst
|| !(*dst
)->error
)) {
961 struct fib6_info
*from
;
963 bool had_dst
= *dst
!= NULL
;
966 *dst
= ip6_route_output(net
, sk
, fl6
);
967 rt
= (*dst
)->error
? NULL
: (struct rt6_info
*)*dst
;
970 from
= rt
? rcu_dereference(rt
->from
) : NULL
;
971 err
= ip6_route_get_saddr(net
, from
, &fl6
->daddr
,
972 sk
? inet6_sk(sk
)->srcprefs
: 0,
977 goto out_err_release
;
979 /* If we had an erroneous initial result, pretend it
980 * never existed and let the SA-enabled version take
983 if (!had_dst
&& (*dst
)->error
) {
989 flags
|= RT6_LOOKUP_F_IFACE
;
993 *dst
= ip6_route_output_flags(net
, sk
, fl6
, flags
);
997 goto out_err_release
;
999 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
1001 * Here if the dst entry we've looked up
1002 * has a neighbour entry that is in the INCOMPLETE
1003 * state and the src address from the flow is
1004 * marked as OPTIMISTIC, we release the found
1005 * dst entry and replace it instead with the
1006 * dst entry of the nexthop router
1008 rt
= (struct rt6_info
*) *dst
;
1010 n
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
,
1011 rt6_nexthop(rt
, &fl6
->daddr
));
1012 err
= n
&& !(n
->nud_state
& NUD_VALID
) ? -EINVAL
: 0;
1013 rcu_read_unlock_bh();
1016 struct inet6_ifaddr
*ifp
;
1017 struct flowi6 fl_gw6
;
1020 ifp
= ipv6_get_ifaddr(net
, &fl6
->saddr
,
1023 redirect
= (ifp
&& ifp
->flags
& IFA_F_OPTIMISTIC
);
1029 * We need to get the dst entry for the
1030 * default router instead
1033 memcpy(&fl_gw6
, fl6
, sizeof(struct flowi6
));
1034 memset(&fl_gw6
.daddr
, 0, sizeof(struct in6_addr
));
1035 *dst
= ip6_route_output(net
, sk
, &fl_gw6
);
1036 err
= (*dst
)->error
;
1038 goto out_err_release
;
1042 if (ipv6_addr_v4mapped(&fl6
->saddr
) &&
1043 !(ipv6_addr_v4mapped(&fl6
->daddr
) || ipv6_addr_any(&fl6
->daddr
))) {
1044 err
= -EAFNOSUPPORT
;
1045 goto out_err_release
;
1054 if (err
== -ENETUNREACH
)
1055 IP6_INC_STATS(net
, NULL
, IPSTATS_MIB_OUTNOROUTES
);
1060 * ip6_dst_lookup - perform route lookup on flow
1061 * @sk: socket which provides route info
1062 * @dst: pointer to dst_entry * for result
1063 * @fl6: flow to lookup
1065 * This function performs a route lookup on the given flow.
1067 * It returns zero on success, or a standard errno code on error.
1069 int ip6_dst_lookup(struct net
*net
, struct sock
*sk
, struct dst_entry
**dst
,
1073 return ip6_dst_lookup_tail(net
, sk
, dst
, fl6
);
1075 EXPORT_SYMBOL_GPL(ip6_dst_lookup
);
1078 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1079 * @sk: socket which provides route info
1080 * @fl6: flow to lookup
1081 * @final_dst: final destination address for ipsec lookup
1083 * This function performs a route lookup on the given flow.
1085 * It returns a valid dst pointer on success, or a pointer encoded
1088 struct dst_entry
*ip6_dst_lookup_flow(const struct sock
*sk
, struct flowi6
*fl6
,
1089 const struct in6_addr
*final_dst
)
1091 struct dst_entry
*dst
= NULL
;
1094 err
= ip6_dst_lookup_tail(sock_net(sk
), sk
, &dst
, fl6
);
1096 return ERR_PTR(err
);
1098 fl6
->daddr
= *final_dst
;
1100 return xfrm_lookup_route(sock_net(sk
), dst
, flowi6_to_flowi(fl6
), sk
, 0);
1102 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow
);
1105 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1106 * @sk: socket which provides the dst cache and route info
1107 * @fl6: flow to lookup
1108 * @final_dst: final destination address for ipsec lookup
1109 * @connected: whether @sk is connected or not
1111 * This function performs a route lookup on the given flow with the
1112 * possibility of using the cached route in the socket if it is valid.
1113 * It will take the socket dst lock when operating on the dst cache.
1114 * As a result, this function can only be used in process context.
1116 * In addition, for a connected socket, cache the dst in the socket
1117 * if the current cache is not valid.
1119 * It returns a valid dst pointer on success, or a pointer encoded
1122 struct dst_entry
*ip6_sk_dst_lookup_flow(struct sock
*sk
, struct flowi6
*fl6
,
1123 const struct in6_addr
*final_dst
,
1126 struct dst_entry
*dst
= sk_dst_check(sk
, inet6_sk(sk
)->dst_cookie
);
1128 dst
= ip6_sk_dst_check(sk
, dst
, fl6
);
1132 dst
= ip6_dst_lookup_flow(sk
, fl6
, final_dst
);
1133 if (connected
&& !IS_ERR(dst
))
1134 ip6_sk_dst_store_flow(sk
, dst_clone(dst
), fl6
);
1138 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow
);
1140 static inline struct ipv6_opt_hdr
*ip6_opt_dup(struct ipv6_opt_hdr
*src
,
1143 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1146 static inline struct ipv6_rt_hdr
*ip6_rthdr_dup(struct ipv6_rt_hdr
*src
,
1149 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1152 static void ip6_append_data_mtu(unsigned int *mtu
,
1154 unsigned int fragheaderlen
,
1155 struct sk_buff
*skb
,
1156 struct rt6_info
*rt
,
1157 unsigned int orig_mtu
)
1159 if (!(rt
->dst
.flags
& DST_XFRM_TUNNEL
)) {
1161 /* first fragment, reserve header_len */
1162 *mtu
= orig_mtu
- rt
->dst
.header_len
;
1166 * this fragment is not first, the headers
1167 * space is regarded as data space.
1171 *maxfraglen
= ((*mtu
- fragheaderlen
) & ~7)
1172 + fragheaderlen
- sizeof(struct frag_hdr
);
1176 static int ip6_setup_cork(struct sock
*sk
, struct inet_cork_full
*cork
,
1177 struct inet6_cork
*v6_cork
, struct ipcm6_cookie
*ipc6
,
1178 struct rt6_info
*rt
, struct flowi6
*fl6
)
1180 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1182 struct ipv6_txoptions
*opt
= ipc6
->opt
;
1188 if (WARN_ON(v6_cork
->opt
))
1191 v6_cork
->opt
= kzalloc(sizeof(*opt
), sk
->sk_allocation
);
1192 if (unlikely(!v6_cork
->opt
))
1195 v6_cork
->opt
->tot_len
= sizeof(*opt
);
1196 v6_cork
->opt
->opt_flen
= opt
->opt_flen
;
1197 v6_cork
->opt
->opt_nflen
= opt
->opt_nflen
;
1199 v6_cork
->opt
->dst0opt
= ip6_opt_dup(opt
->dst0opt
,
1201 if (opt
->dst0opt
&& !v6_cork
->opt
->dst0opt
)
1204 v6_cork
->opt
->dst1opt
= ip6_opt_dup(opt
->dst1opt
,
1206 if (opt
->dst1opt
&& !v6_cork
->opt
->dst1opt
)
1209 v6_cork
->opt
->hopopt
= ip6_opt_dup(opt
->hopopt
,
1211 if (opt
->hopopt
&& !v6_cork
->opt
->hopopt
)
1214 v6_cork
->opt
->srcrt
= ip6_rthdr_dup(opt
->srcrt
,
1216 if (opt
->srcrt
&& !v6_cork
->opt
->srcrt
)
1219 /* need source address above miyazawa*/
1222 cork
->base
.dst
= &rt
->dst
;
1223 cork
->fl
.u
.ip6
= *fl6
;
1224 v6_cork
->hop_limit
= ipc6
->hlimit
;
1225 v6_cork
->tclass
= ipc6
->tclass
;
1226 if (rt
->dst
.flags
& DST_XFRM_TUNNEL
)
1227 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1228 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(&rt
->dst
);
1230 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1231 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(xfrm_dst_path(&rt
->dst
));
1232 if (np
->frag_size
< mtu
) {
1234 mtu
= np
->frag_size
;
1236 if (mtu
< IPV6_MIN_MTU
)
1238 cork
->base
.fragsize
= mtu
;
1239 cork
->base
.gso_size
= ipc6
->gso_size
;
1240 cork
->base
.tx_flags
= 0;
1241 sock_tx_timestamp(sk
, ipc6
->sockc
.tsflags
, &cork
->base
.tx_flags
);
1243 if (dst_allfrag(xfrm_dst_path(&rt
->dst
)))
1244 cork
->base
.flags
|= IPCORK_ALLFRAG
;
1245 cork
->base
.length
= 0;
1247 cork
->base
.transmit_time
= ipc6
->sockc
.transmit_time
;
1252 static int __ip6_append_data(struct sock
*sk
,
1254 struct sk_buff_head
*queue
,
1255 struct inet_cork
*cork
,
1256 struct inet6_cork
*v6_cork
,
1257 struct page_frag
*pfrag
,
1258 int getfrag(void *from
, char *to
, int offset
,
1259 int len
, int odd
, struct sk_buff
*skb
),
1260 void *from
, int length
, int transhdrlen
,
1261 unsigned int flags
, struct ipcm6_cookie
*ipc6
)
1263 struct sk_buff
*skb
, *skb_prev
= NULL
;
1264 unsigned int maxfraglen
, fragheaderlen
, mtu
, orig_mtu
, pmtu
;
1265 struct ubuf_info
*uarg
= NULL
;
1267 int dst_exthdrlen
= 0;
1273 struct rt6_info
*rt
= (struct rt6_info
*)cork
->dst
;
1274 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1275 int csummode
= CHECKSUM_NONE
;
1276 unsigned int maxnonfragsize
, headersize
;
1277 unsigned int wmem_alloc_delta
= 0;
1278 bool paged
, extra_uref
= false;
1280 skb
= skb_peek_tail(queue
);
1282 exthdrlen
= opt
? opt
->opt_flen
: 0;
1283 dst_exthdrlen
= rt
->dst
.header_len
- rt
->rt6i_nfheader_len
;
1286 paged
= !!cork
->gso_size
;
1287 mtu
= cork
->gso_size
? IP6_MAX_MTU
: cork
->fragsize
;
1290 if (cork
->tx_flags
& SKBTX_ANY_SW_TSTAMP
&&
1291 sk
->sk_tsflags
& SOF_TIMESTAMPING_OPT_ID
)
1292 tskey
= sk
->sk_tskey
++;
1294 hh_len
= LL_RESERVED_SPACE(rt
->dst
.dev
);
1296 fragheaderlen
= sizeof(struct ipv6hdr
) + rt
->rt6i_nfheader_len
+
1297 (opt
? opt
->opt_nflen
: 0);
1298 maxfraglen
= ((mtu
- fragheaderlen
) & ~7) + fragheaderlen
-
1299 sizeof(struct frag_hdr
);
1301 headersize
= sizeof(struct ipv6hdr
) +
1302 (opt
? opt
->opt_flen
+ opt
->opt_nflen
: 0) +
1303 (dst_allfrag(&rt
->dst
) ?
1304 sizeof(struct frag_hdr
) : 0) +
1305 rt
->rt6i_nfheader_len
;
1307 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1308 * the first fragment
1310 if (headersize
+ transhdrlen
> mtu
)
1313 if (cork
->length
+ length
> mtu
- headersize
&& ipc6
->dontfrag
&&
1314 (sk
->sk_protocol
== IPPROTO_UDP
||
1315 sk
->sk_protocol
== IPPROTO_RAW
)) {
1316 ipv6_local_rxpmtu(sk
, fl6
, mtu
- headersize
+
1317 sizeof(struct ipv6hdr
));
1321 if (ip6_sk_ignore_df(sk
))
1322 maxnonfragsize
= sizeof(struct ipv6hdr
) + IPV6_MAXPLEN
;
1324 maxnonfragsize
= mtu
;
1326 if (cork
->length
+ length
> maxnonfragsize
- headersize
) {
1328 pmtu
= max_t(int, mtu
- headersize
+ sizeof(struct ipv6hdr
), 0);
1329 ipv6_local_error(sk
, EMSGSIZE
, fl6
, pmtu
);
1333 /* CHECKSUM_PARTIAL only with no extension headers and when
1334 * we are not going to fragment
1336 if (transhdrlen
&& sk
->sk_protocol
== IPPROTO_UDP
&&
1337 headersize
== sizeof(struct ipv6hdr
) &&
1338 length
<= mtu
- headersize
&&
1339 (!(flags
& MSG_MORE
) || cork
->gso_size
) &&
1340 rt
->dst
.dev
->features
& (NETIF_F_IPV6_CSUM
| NETIF_F_HW_CSUM
))
1341 csummode
= CHECKSUM_PARTIAL
;
1343 if (flags
& MSG_ZEROCOPY
&& length
&& sock_flag(sk
, SOCK_ZEROCOPY
)) {
1344 uarg
= sock_zerocopy_realloc(sk
, length
, skb_zcopy(skb
));
1347 extra_uref
= !skb_zcopy(skb
); /* only ref on new uarg */
1348 if (rt
->dst
.dev
->features
& NETIF_F_SG
&&
1349 csummode
== CHECKSUM_PARTIAL
) {
1353 skb_zcopy_set(skb
, uarg
, &extra_uref
);
1358 * Let's try using as much space as possible.
1359 * Use MTU if total length of the message fits into the MTU.
1360 * Otherwise, we need to reserve fragment header and
1361 * fragment alignment (= 8-15 octects, in total).
1363 * Note that we may need to "move" the data from the tail of
1364 * of the buffer to the new fragment when we split
1367 * FIXME: It may be fragmented into multiple chunks
1368 * at once if non-fragmentable extension headers
1373 cork
->length
+= length
;
1377 while (length
> 0) {
1378 /* Check if the remaining data fits into current packet. */
1379 copy
= (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - skb
->len
;
1381 copy
= maxfraglen
- skb
->len
;
1385 unsigned int datalen
;
1386 unsigned int fraglen
;
1387 unsigned int fraggap
;
1388 unsigned int alloclen
;
1389 unsigned int pagedlen
;
1391 /* There's no room in the current skb */
1393 fraggap
= skb
->len
- maxfraglen
;
1396 /* update mtu and maxfraglen if necessary */
1397 if (!skb
|| !skb_prev
)
1398 ip6_append_data_mtu(&mtu
, &maxfraglen
,
1399 fragheaderlen
, skb
, rt
,
1405 * If remaining data exceeds the mtu,
1406 * we know we need more fragment(s).
1408 datalen
= length
+ fraggap
;
1410 if (datalen
> (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - fragheaderlen
)
1411 datalen
= maxfraglen
- fragheaderlen
- rt
->dst
.trailer_len
;
1412 fraglen
= datalen
+ fragheaderlen
;
1415 if ((flags
& MSG_MORE
) &&
1416 !(rt
->dst
.dev
->features
&NETIF_F_SG
))
1421 alloclen
= min_t(int, fraglen
, MAX_HEADER
);
1422 pagedlen
= fraglen
- alloclen
;
1425 alloclen
+= dst_exthdrlen
;
1427 if (datalen
!= length
+ fraggap
) {
1429 * this is not the last fragment, the trailer
1430 * space is regarded as data space.
1432 datalen
+= rt
->dst
.trailer_len
;
1435 alloclen
+= rt
->dst
.trailer_len
;
1436 fraglen
= datalen
+ fragheaderlen
;
1439 * We just reserve space for fragment header.
1440 * Note: this may be overallocation if the message
1441 * (without MSG_MORE) fits into the MTU.
1443 alloclen
+= sizeof(struct frag_hdr
);
1445 copy
= datalen
- transhdrlen
- fraggap
- pagedlen
;
1451 skb
= sock_alloc_send_skb(sk
,
1453 (flags
& MSG_DONTWAIT
), &err
);
1456 if (refcount_read(&sk
->sk_wmem_alloc
) + wmem_alloc_delta
<=
1458 skb
= alloc_skb(alloclen
+ hh_len
,
1466 * Fill in the control structures
1468 skb
->protocol
= htons(ETH_P_IPV6
);
1469 skb
->ip_summed
= csummode
;
1471 /* reserve for fragmentation and ipsec header */
1472 skb_reserve(skb
, hh_len
+ sizeof(struct frag_hdr
) +
1476 * Find where to start putting bytes
1478 data
= skb_put(skb
, fraglen
- pagedlen
);
1479 skb_set_network_header(skb
, exthdrlen
);
1480 data
+= fragheaderlen
;
1481 skb
->transport_header
= (skb
->network_header
+
1484 skb
->csum
= skb_copy_and_csum_bits(
1485 skb_prev
, maxfraglen
,
1486 data
+ transhdrlen
, fraggap
, 0);
1487 skb_prev
->csum
= csum_sub(skb_prev
->csum
,
1490 pskb_trim_unique(skb_prev
, maxfraglen
);
1493 getfrag(from
, data
+ transhdrlen
, offset
,
1494 copy
, fraggap
, skb
) < 0) {
1501 length
-= copy
+ transhdrlen
;
1506 /* Only the initial fragment is time stamped */
1507 skb_shinfo(skb
)->tx_flags
= cork
->tx_flags
;
1509 skb_shinfo(skb
)->tskey
= tskey
;
1511 skb_zcopy_set(skb
, uarg
, &extra_uref
);
1513 if ((flags
& MSG_CONFIRM
) && !skb_prev
)
1514 skb_set_dst_pending_confirm(skb
, 1);
1517 * Put the packet on the pending queue
1519 if (!skb
->destructor
) {
1520 skb
->destructor
= sock_wfree
;
1522 wmem_alloc_delta
+= skb
->truesize
;
1524 __skb_queue_tail(queue
, skb
);
1531 if (!(rt
->dst
.dev
->features
&NETIF_F_SG
) &&
1532 skb_tailroom(skb
) >= copy
) {
1536 if (getfrag(from
, skb_put(skb
, copy
),
1537 offset
, copy
, off
, skb
) < 0) {
1538 __skb_trim(skb
, off
);
1542 } else if (!uarg
|| !uarg
->zerocopy
) {
1543 int i
= skb_shinfo(skb
)->nr_frags
;
1546 if (!sk_page_frag_refill(sk
, pfrag
))
1549 if (!skb_can_coalesce(skb
, i
, pfrag
->page
,
1552 if (i
== MAX_SKB_FRAGS
)
1555 __skb_fill_page_desc(skb
, i
, pfrag
->page
,
1557 skb_shinfo(skb
)->nr_frags
= ++i
;
1558 get_page(pfrag
->page
);
1560 copy
= min_t(int, copy
, pfrag
->size
- pfrag
->offset
);
1562 page_address(pfrag
->page
) + pfrag
->offset
,
1563 offset
, copy
, skb
->len
, skb
) < 0)
1566 pfrag
->offset
+= copy
;
1567 skb_frag_size_add(&skb_shinfo(skb
)->frags
[i
- 1], copy
);
1569 skb
->data_len
+= copy
;
1570 skb
->truesize
+= copy
;
1571 wmem_alloc_delta
+= copy
;
1573 err
= skb_zerocopy_iter_dgram(skb
, from
, copy
);
1581 if (wmem_alloc_delta
)
1582 refcount_add(wmem_alloc_delta
, &sk
->sk_wmem_alloc
);
1589 sock_zerocopy_put_abort(uarg
, extra_uref
);
1590 cork
->length
-= length
;
1591 IP6_INC_STATS(sock_net(sk
), rt
->rt6i_idev
, IPSTATS_MIB_OUTDISCARDS
);
1592 refcount_add(wmem_alloc_delta
, &sk
->sk_wmem_alloc
);
1596 int ip6_append_data(struct sock
*sk
,
1597 int getfrag(void *from
, char *to
, int offset
, int len
,
1598 int odd
, struct sk_buff
*skb
),
1599 void *from
, int length
, int transhdrlen
,
1600 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1601 struct rt6_info
*rt
, unsigned int flags
)
1603 struct inet_sock
*inet
= inet_sk(sk
);
1604 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1608 if (flags
&MSG_PROBE
)
1610 if (skb_queue_empty(&sk
->sk_write_queue
)) {
1614 err
= ip6_setup_cork(sk
, &inet
->cork
, &np
->cork
,
1619 exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1620 length
+= exthdrlen
;
1621 transhdrlen
+= exthdrlen
;
1623 fl6
= &inet
->cork
.fl
.u
.ip6
;
1627 return __ip6_append_data(sk
, fl6
, &sk
->sk_write_queue
, &inet
->cork
.base
,
1628 &np
->cork
, sk_page_frag(sk
), getfrag
,
1629 from
, length
, transhdrlen
, flags
, ipc6
);
1631 EXPORT_SYMBOL_GPL(ip6_append_data
);
1633 static void ip6_cork_release(struct inet_cork_full
*cork
,
1634 struct inet6_cork
*v6_cork
)
1637 kfree(v6_cork
->opt
->dst0opt
);
1638 kfree(v6_cork
->opt
->dst1opt
);
1639 kfree(v6_cork
->opt
->hopopt
);
1640 kfree(v6_cork
->opt
->srcrt
);
1641 kfree(v6_cork
->opt
);
1642 v6_cork
->opt
= NULL
;
1645 if (cork
->base
.dst
) {
1646 dst_release(cork
->base
.dst
);
1647 cork
->base
.dst
= NULL
;
1648 cork
->base
.flags
&= ~IPCORK_ALLFRAG
;
1650 memset(&cork
->fl
, 0, sizeof(cork
->fl
));
1653 struct sk_buff
*__ip6_make_skb(struct sock
*sk
,
1654 struct sk_buff_head
*queue
,
1655 struct inet_cork_full
*cork
,
1656 struct inet6_cork
*v6_cork
)
1658 struct sk_buff
*skb
, *tmp_skb
;
1659 struct sk_buff
**tail_skb
;
1660 struct in6_addr final_dst_buf
, *final_dst
= &final_dst_buf
;
1661 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1662 struct net
*net
= sock_net(sk
);
1663 struct ipv6hdr
*hdr
;
1664 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1665 struct rt6_info
*rt
= (struct rt6_info
*)cork
->base
.dst
;
1666 struct flowi6
*fl6
= &cork
->fl
.u
.ip6
;
1667 unsigned char proto
= fl6
->flowi6_proto
;
1669 skb
= __skb_dequeue(queue
);
1672 tail_skb
= &(skb_shinfo(skb
)->frag_list
);
1674 /* move skb->data to ip header from ext header */
1675 if (skb
->data
< skb_network_header(skb
))
1676 __skb_pull(skb
, skb_network_offset(skb
));
1677 while ((tmp_skb
= __skb_dequeue(queue
)) != NULL
) {
1678 __skb_pull(tmp_skb
, skb_network_header_len(skb
));
1679 *tail_skb
= tmp_skb
;
1680 tail_skb
= &(tmp_skb
->next
);
1681 skb
->len
+= tmp_skb
->len
;
1682 skb
->data_len
+= tmp_skb
->len
;
1683 skb
->truesize
+= tmp_skb
->truesize
;
1684 tmp_skb
->destructor
= NULL
;
1688 /* Allow local fragmentation. */
1689 skb
->ignore_df
= ip6_sk_ignore_df(sk
);
1691 *final_dst
= fl6
->daddr
;
1692 __skb_pull(skb
, skb_network_header_len(skb
));
1693 if (opt
&& opt
->opt_flen
)
1694 ipv6_push_frag_opts(skb
, opt
, &proto
);
1695 if (opt
&& opt
->opt_nflen
)
1696 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &final_dst
, &fl6
->saddr
);
1698 skb_push(skb
, sizeof(struct ipv6hdr
));
1699 skb_reset_network_header(skb
);
1700 hdr
= ipv6_hdr(skb
);
1702 ip6_flow_hdr(hdr
, v6_cork
->tclass
,
1703 ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
1704 ip6_autoflowlabel(net
, np
), fl6
));
1705 hdr
->hop_limit
= v6_cork
->hop_limit
;
1706 hdr
->nexthdr
= proto
;
1707 hdr
->saddr
= fl6
->saddr
;
1708 hdr
->daddr
= *final_dst
;
1710 skb
->priority
= sk
->sk_priority
;
1711 skb
->mark
= sk
->sk_mark
;
1713 skb
->tstamp
= cork
->base
.transmit_time
;
1715 skb_dst_set(skb
, dst_clone(&rt
->dst
));
1716 IP6_UPD_PO_STATS(net
, rt
->rt6i_idev
, IPSTATS_MIB_OUT
, skb
->len
);
1717 if (proto
== IPPROTO_ICMPV6
) {
1718 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
1720 ICMP6MSGOUT_INC_STATS(net
, idev
, icmp6_hdr(skb
)->icmp6_type
);
1721 ICMP6_INC_STATS(net
, idev
, ICMP6_MIB_OUTMSGS
);
1724 ip6_cork_release(cork
, v6_cork
);
1729 int ip6_send_skb(struct sk_buff
*skb
)
1731 struct net
*net
= sock_net(skb
->sk
);
1732 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
1735 err
= ip6_local_out(net
, skb
->sk
, skb
);
1738 err
= net_xmit_errno(err
);
1740 IP6_INC_STATS(net
, rt
->rt6i_idev
,
1741 IPSTATS_MIB_OUTDISCARDS
);
1747 int ip6_push_pending_frames(struct sock
*sk
)
1749 struct sk_buff
*skb
;
1751 skb
= ip6_finish_skb(sk
);
1755 return ip6_send_skb(skb
);
1757 EXPORT_SYMBOL_GPL(ip6_push_pending_frames
);
1759 static void __ip6_flush_pending_frames(struct sock
*sk
,
1760 struct sk_buff_head
*queue
,
1761 struct inet_cork_full
*cork
,
1762 struct inet6_cork
*v6_cork
)
1764 struct sk_buff
*skb
;
1766 while ((skb
= __skb_dequeue_tail(queue
)) != NULL
) {
1768 IP6_INC_STATS(sock_net(sk
), ip6_dst_idev(skb_dst(skb
)),
1769 IPSTATS_MIB_OUTDISCARDS
);
1773 ip6_cork_release(cork
, v6_cork
);
1776 void ip6_flush_pending_frames(struct sock
*sk
)
1778 __ip6_flush_pending_frames(sk
, &sk
->sk_write_queue
,
1779 &inet_sk(sk
)->cork
, &inet6_sk(sk
)->cork
);
1781 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames
);
1783 struct sk_buff
*ip6_make_skb(struct sock
*sk
,
1784 int getfrag(void *from
, char *to
, int offset
,
1785 int len
, int odd
, struct sk_buff
*skb
),
1786 void *from
, int length
, int transhdrlen
,
1787 struct ipcm6_cookie
*ipc6
, struct flowi6
*fl6
,
1788 struct rt6_info
*rt
, unsigned int flags
,
1789 struct inet_cork_full
*cork
)
1791 struct inet6_cork v6_cork
;
1792 struct sk_buff_head queue
;
1793 int exthdrlen
= (ipc6
->opt
? ipc6
->opt
->opt_flen
: 0);
1796 if (flags
& MSG_PROBE
)
1799 __skb_queue_head_init(&queue
);
1801 cork
->base
.flags
= 0;
1802 cork
->base
.addr
= 0;
1803 cork
->base
.opt
= NULL
;
1804 cork
->base
.dst
= NULL
;
1806 err
= ip6_setup_cork(sk
, cork
, &v6_cork
, ipc6
, rt
, fl6
);
1808 ip6_cork_release(cork
, &v6_cork
);
1809 return ERR_PTR(err
);
1811 if (ipc6
->dontfrag
< 0)
1812 ipc6
->dontfrag
= inet6_sk(sk
)->dontfrag
;
1814 err
= __ip6_append_data(sk
, fl6
, &queue
, &cork
->base
, &v6_cork
,
1815 ¤t
->task_frag
, getfrag
, from
,
1816 length
+ exthdrlen
, transhdrlen
+ exthdrlen
,
1819 __ip6_flush_pending_frames(sk
, &queue
, cork
, &v6_cork
);
1820 return ERR_PTR(err
);
1823 return __ip6_make_skb(sk
, &queue
, cork
, &v6_cork
);