2 * IPv6 output functions
3 * Linux INET6 implementation
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
58 #include <net/l3mdev.h>
60 static int ip6_finish_output2(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
62 struct dst_entry
*dst
= skb_dst(skb
);
63 struct net_device
*dev
= dst
->dev
;
64 struct neighbour
*neigh
;
65 struct in6_addr
*nexthop
;
68 skb
->protocol
= htons(ETH_P_IPV6
);
71 if (ipv6_addr_is_multicast(&ipv6_hdr(skb
)->daddr
)) {
72 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
74 if (!(dev
->flags
& IFF_LOOPBACK
) && sk_mc_loop(sk
) &&
75 ((mroute6_socket(net
, skb
) &&
76 !(IP6CB(skb
)->flags
& IP6SKB_FORWARDED
)) ||
77 ipv6_chk_mcast_addr(dev
, &ipv6_hdr(skb
)->daddr
,
78 &ipv6_hdr(skb
)->saddr
))) {
79 struct sk_buff
*newskb
= skb_clone(skb
, GFP_ATOMIC
);
81 /* Do not check for IFF_ALLMULTI; multicast routing
82 is not supported in any case.
85 NF_HOOK(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
86 net
, sk
, newskb
, NULL
, newskb
->dev
,
89 if (ipv6_hdr(skb
)->hop_limit
== 0) {
90 IP6_INC_STATS(net
, idev
,
91 IPSTATS_MIB_OUTDISCARDS
);
97 IP6_UPD_PO_STATS(net
, idev
, IPSTATS_MIB_OUTMCAST
, skb
->len
);
99 if (IPV6_ADDR_MC_SCOPE(&ipv6_hdr(skb
)->daddr
) <=
100 IPV6_ADDR_SCOPE_NODELOCAL
&&
101 !(dev
->flags
& IFF_LOOPBACK
)) {
108 nexthop
= rt6_nexthop((struct rt6_info
*)dst
, &ipv6_hdr(skb
)->daddr
);
109 neigh
= __ipv6_neigh_lookup_noref(dst
->dev
, nexthop
);
110 if (unlikely(!neigh
))
111 neigh
= __neigh_create(&nd_tbl
, nexthop
, dst
->dev
, false);
112 if (!IS_ERR(neigh
)) {
113 ret
= dst_neigh_output(dst
, neigh
, skb
);
114 rcu_read_unlock_bh();
117 rcu_read_unlock_bh();
119 IP6_INC_STATS(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTNOROUTES
);
124 static int ip6_finish_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
126 if ((skb
->len
> ip6_skb_dst_mtu(skb
) && !skb_is_gso(skb
)) ||
127 dst_allfrag(skb_dst(skb
)) ||
128 (IP6CB(skb
)->frag_max_size
&& skb
->len
> IP6CB(skb
)->frag_max_size
))
129 return ip6_fragment(net
, sk
, skb
, ip6_finish_output2
);
131 return ip6_finish_output2(net
, sk
, skb
);
134 int ip6_output(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
)
136 struct net_device
*dev
= skb_dst(skb
)->dev
;
137 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
139 if (unlikely(idev
->cnf
.disable_ipv6
)) {
140 IP6_INC_STATS(net
, idev
, IPSTATS_MIB_OUTDISCARDS
);
145 return NF_HOOK_COND(NFPROTO_IPV6
, NF_INET_POST_ROUTING
,
146 net
, sk
, skb
, NULL
, dev
,
148 !(IP6CB(skb
)->flags
& IP6SKB_REROUTED
));
151 bool ip6_autoflowlabel(struct net
*net
, const struct ipv6_pinfo
*np
)
153 if (!np
->autoflowlabel_set
)
154 return ip6_default_np_autolabel(net
);
156 return np
->autoflowlabel
;
160 * xmit an sk_buff (used by TCP, SCTP and DCCP)
161 * Note : socket lock is not held for SYNACK packets, but might be modified
162 * by calls to skb_set_owner_w() and ipv6_local_error(),
163 * which are using proper atomic operations or spinlocks.
165 int ip6_xmit(const struct sock
*sk
, struct sk_buff
*skb
, struct flowi6
*fl6
,
166 struct ipv6_txoptions
*opt
, int tclass
)
168 struct net
*net
= sock_net(sk
);
169 const struct ipv6_pinfo
*np
= inet6_sk(sk
);
170 struct in6_addr
*first_hop
= &fl6
->daddr
;
171 struct dst_entry
*dst
= skb_dst(skb
);
173 u8 proto
= fl6
->flowi6_proto
;
174 int seg_len
= skb
->len
;
179 unsigned int head_room
;
181 /* First: exthdrs may take lots of space (~8K for now)
182 MAX_HEADER is not enough.
184 head_room
= opt
->opt_nflen
+ opt
->opt_flen
;
185 seg_len
+= head_room
;
186 head_room
+= sizeof(struct ipv6hdr
) + LL_RESERVED_SPACE(dst
->dev
);
188 if (skb_headroom(skb
) < head_room
) {
189 struct sk_buff
*skb2
= skb_realloc_headroom(skb
, head_room
);
191 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
192 IPSTATS_MIB_OUTDISCARDS
);
197 skb_set_owner_w(skb2
, skb
->sk
);
202 ipv6_push_frag_opts(skb
, opt
, &proto
);
204 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &first_hop
);
207 skb_push(skb
, sizeof(struct ipv6hdr
));
208 skb_reset_network_header(skb
);
212 * Fill in the IPv6 header
215 hlimit
= np
->hop_limit
;
217 hlimit
= ip6_dst_hoplimit(dst
);
219 ip6_flow_hdr(hdr
, tclass
, ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
220 ip6_autoflowlabel(net
, np
), fl6
));
222 hdr
->payload_len
= htons(seg_len
);
223 hdr
->nexthdr
= proto
;
224 hdr
->hop_limit
= hlimit
;
226 hdr
->saddr
= fl6
->saddr
;
227 hdr
->daddr
= *first_hop
;
229 skb
->protocol
= htons(ETH_P_IPV6
);
230 skb
->priority
= sk
->sk_priority
;
231 skb
->mark
= sk
->sk_mark
;
234 if ((skb
->len
<= mtu
) || skb
->ignore_df
|| skb_is_gso(skb
)) {
235 IP6_UPD_PO_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
236 IPSTATS_MIB_OUT
, skb
->len
);
237 /* hooks should never assume socket lock is held.
238 * we promote our socket to non const
240 return NF_HOOK(NFPROTO_IPV6
, NF_INET_LOCAL_OUT
,
241 net
, (struct sock
*)sk
, skb
, NULL
, dst
->dev
,
246 /* ipv6_local_error() does not require socket lock,
247 * we promote our socket to non const
249 ipv6_local_error((struct sock
*)sk
, EMSGSIZE
, fl6
, mtu
);
251 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)), IPSTATS_MIB_FRAGFAILS
);
255 EXPORT_SYMBOL(ip6_xmit
);
257 static int ip6_call_ra_chain(struct sk_buff
*skb
, int sel
)
259 struct ip6_ra_chain
*ra
;
260 struct sock
*last
= NULL
;
262 read_lock(&ip6_ra_lock
);
263 for (ra
= ip6_ra_chain
; ra
; ra
= ra
->next
) {
264 struct sock
*sk
= ra
->sk
;
265 if (sk
&& ra
->sel
== sel
&&
266 (!sk
->sk_bound_dev_if
||
267 sk
->sk_bound_dev_if
== skb
->dev
->ifindex
)) {
269 struct sk_buff
*skb2
= skb_clone(skb
, GFP_ATOMIC
);
271 rawv6_rcv(last
, skb2
);
278 rawv6_rcv(last
, skb
);
279 read_unlock(&ip6_ra_lock
);
282 read_unlock(&ip6_ra_lock
);
286 static int ip6_forward_proxy_check(struct sk_buff
*skb
)
288 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
289 u8 nexthdr
= hdr
->nexthdr
;
293 if (ipv6_ext_hdr(nexthdr
)) {
294 offset
= ipv6_skip_exthdr(skb
, sizeof(*hdr
), &nexthdr
, &frag_off
);
298 offset
= sizeof(struct ipv6hdr
);
300 if (nexthdr
== IPPROTO_ICMPV6
) {
301 struct icmp6hdr
*icmp6
;
303 if (!pskb_may_pull(skb
, (skb_network_header(skb
) +
304 offset
+ 1 - skb
->data
)))
307 icmp6
= (struct icmp6hdr
*)(skb_network_header(skb
) + offset
);
309 switch (icmp6
->icmp6_type
) {
310 case NDISC_ROUTER_SOLICITATION
:
311 case NDISC_ROUTER_ADVERTISEMENT
:
312 case NDISC_NEIGHBOUR_SOLICITATION
:
313 case NDISC_NEIGHBOUR_ADVERTISEMENT
:
315 /* For reaction involving unicast neighbor discovery
316 * message destined to the proxied address, pass it to
326 * The proxying router can't forward traffic sent to a link-local
327 * address, so signal the sender and discard the packet. This
328 * behavior is clarified by the MIPv6 specification.
330 if (ipv6_addr_type(&hdr
->daddr
) & IPV6_ADDR_LINKLOCAL
) {
331 dst_link_failure(skb
);
338 static inline int ip6_forward_finish(struct net
*net
, struct sock
*sk
,
341 struct dst_entry
*dst
= skb_dst(skb
);
343 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTFORWDATAGRAMS
);
344 IP6_ADD_STATS_BH(net
, ip6_dst_idev(dst
), IPSTATS_MIB_OUTOCTETS
, skb
->len
);
345 skb_sender_cpu_clear(skb
);
346 return dst_output(net
, sk
, skb
);
349 static unsigned int ip6_dst_mtu_forward(const struct dst_entry
*dst
)
352 struct inet6_dev
*idev
;
354 if (dst_metric_locked(dst
, RTAX_MTU
)) {
355 mtu
= dst_metric_raw(dst
, RTAX_MTU
);
362 idev
= __in6_dev_get(dst
->dev
);
364 mtu
= idev
->cnf
.mtu6
;
370 static bool ip6_pkt_too_big(const struct sk_buff
*skb
, unsigned int mtu
)
375 /* ipv6 conntrack defrag sets max_frag_size + ignore_df */
376 if (IP6CB(skb
)->frag_max_size
&& IP6CB(skb
)->frag_max_size
> mtu
)
382 if (skb_is_gso(skb
) && skb_gso_network_seglen(skb
) <= mtu
)
388 int ip6_forward(struct sk_buff
*skb
)
390 struct dst_entry
*dst
= skb_dst(skb
);
391 struct ipv6hdr
*hdr
= ipv6_hdr(skb
);
392 struct inet6_skb_parm
*opt
= IP6CB(skb
);
393 struct net
*net
= dev_net(dst
->dev
);
396 if (net
->ipv6
.devconf_all
->forwarding
== 0)
399 if (skb
->pkt_type
!= PACKET_HOST
)
402 if (unlikely(skb
->sk
))
405 if (skb_warn_if_lro(skb
))
408 if (!xfrm6_policy_check(NULL
, XFRM_POLICY_FWD
, skb
)) {
409 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
410 IPSTATS_MIB_INDISCARDS
);
414 skb_forward_csum(skb
);
417 * We DO NOT make any processing on
418 * RA packets, pushing them to user level AS IS
419 * without ane WARRANTY that application will be able
420 * to interpret them. The reason is that we
421 * cannot make anything clever here.
423 * We are not end-node, so that if packet contains
424 * AH/ESP, we cannot make anything.
425 * Defragmentation also would be mistake, RA packets
426 * cannot be fragmented, because there is no warranty
427 * that different fragments will go along one path. --ANK
429 if (unlikely(opt
->flags
& IP6SKB_ROUTERALERT
)) {
430 if (ip6_call_ra_chain(skb
, ntohs(opt
->ra
)))
435 * check and decrement ttl
437 if (hdr
->hop_limit
<= 1) {
438 /* Force OUTPUT device used as source address */
440 icmpv6_send(skb
, ICMPV6_TIME_EXCEED
, ICMPV6_EXC_HOPLIMIT
, 0);
441 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
442 IPSTATS_MIB_INHDRERRORS
);
448 /* XXX: idev->cnf.proxy_ndp? */
449 if (net
->ipv6
.devconf_all
->proxy_ndp
&&
450 pneigh_lookup(&nd_tbl
, net
, &hdr
->daddr
, skb
->dev
, 0)) {
451 int proxied
= ip6_forward_proxy_check(skb
);
453 return ip6_input(skb
);
454 else if (proxied
< 0) {
455 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
456 IPSTATS_MIB_INDISCARDS
);
461 if (!xfrm6_route_forward(skb
)) {
462 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
463 IPSTATS_MIB_INDISCARDS
);
468 /* IPv6 specs say nothing about it, but it is clear that we cannot
469 send redirects to source routed frames.
470 We don't send redirects to frames decapsulated from IPsec.
472 if (skb
->dev
== dst
->dev
&& opt
->srcrt
== 0 && !skb_sec_path(skb
)) {
473 struct in6_addr
*target
= NULL
;
474 struct inet_peer
*peer
;
478 * incoming and outgoing devices are the same
482 rt
= (struct rt6_info
*) dst
;
483 if (rt
->rt6i_flags
& RTF_GATEWAY
)
484 target
= &rt
->rt6i_gateway
;
486 target
= &hdr
->daddr
;
488 peer
= inet_getpeer_v6(net
->ipv6
.peers
, &hdr
->daddr
, 1);
490 /* Limit redirects both by destination (here)
491 and by source (inside ndisc_send_redirect)
493 if (inet_peer_xrlim_allow(peer
, 1*HZ
))
494 ndisc_send_redirect(skb
, target
);
498 int addrtype
= ipv6_addr_type(&hdr
->saddr
);
500 /* This check is security critical. */
501 if (addrtype
== IPV6_ADDR_ANY
||
502 addrtype
& (IPV6_ADDR_MULTICAST
| IPV6_ADDR_LOOPBACK
))
504 if (addrtype
& IPV6_ADDR_LINKLOCAL
) {
505 icmpv6_send(skb
, ICMPV6_DEST_UNREACH
,
506 ICMPV6_NOT_NEIGHBOUR
, 0);
511 mtu
= ip6_dst_mtu_forward(dst
);
512 if (mtu
< IPV6_MIN_MTU
)
515 if (ip6_pkt_too_big(skb
, mtu
)) {
516 /* Again, force OUTPUT device used as source address */
518 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
519 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
520 IPSTATS_MIB_INTOOBIGERRORS
);
521 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
522 IPSTATS_MIB_FRAGFAILS
);
527 if (skb_cow(skb
, dst
->dev
->hard_header_len
)) {
528 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
),
529 IPSTATS_MIB_OUTDISCARDS
);
535 /* Mangling hops number delayed to point after skb COW */
539 return NF_HOOK(NFPROTO_IPV6
, NF_INET_FORWARD
,
540 net
, NULL
, skb
, skb
->dev
, dst
->dev
,
544 IP6_INC_STATS_BH(net
, ip6_dst_idev(dst
), IPSTATS_MIB_INADDRERRORS
);
550 static void ip6_copy_metadata(struct sk_buff
*to
, struct sk_buff
*from
)
552 to
->pkt_type
= from
->pkt_type
;
553 to
->priority
= from
->priority
;
554 to
->protocol
= from
->protocol
;
556 skb_dst_set(to
, dst_clone(skb_dst(from
)));
558 to
->mark
= from
->mark
;
560 skb_copy_hash(to
, from
);
562 #ifdef CONFIG_NET_SCHED
563 to
->tc_index
= from
->tc_index
;
566 skb_copy_secmark(to
, from
);
569 int ip6_fragment(struct net
*net
, struct sock
*sk
, struct sk_buff
*skb
,
570 int (*output
)(struct net
*, struct sock
*, struct sk_buff
*))
572 struct sk_buff
*frag
;
573 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
574 struct ipv6_pinfo
*np
= skb
->sk
&& !dev_recursion_level() ?
575 inet6_sk(skb
->sk
) : NULL
;
576 struct ipv6hdr
*tmp_hdr
;
578 unsigned int mtu
, hlen
, left
, len
;
581 int ptr
, offset
= 0, err
= 0;
582 u8
*prevhdr
, nexthdr
= 0;
584 err
= ip6_find_1stfragopt(skb
, &prevhdr
);
590 mtu
= ip6_skb_dst_mtu(skb
);
592 /* We must not fragment if the socket is set to force MTU discovery
593 * or if the skb it not generated by a local socket.
595 if (unlikely(!skb
->ignore_df
&& skb
->len
> mtu
))
598 if (IP6CB(skb
)->frag_max_size
) {
599 if (IP6CB(skb
)->frag_max_size
> mtu
)
602 /* don't send fragments larger than what we received */
603 mtu
= IP6CB(skb
)->frag_max_size
;
604 if (mtu
< IPV6_MIN_MTU
)
608 if (np
&& np
->frag_size
< mtu
) {
612 if (mtu
< hlen
+ sizeof(struct frag_hdr
) + 8)
614 mtu
-= hlen
+ sizeof(struct frag_hdr
);
616 frag_id
= ipv6_select_ident(net
, &ipv6_hdr(skb
)->daddr
,
617 &ipv6_hdr(skb
)->saddr
);
619 if (skb
->ip_summed
== CHECKSUM_PARTIAL
&&
620 (err
= skb_checksum_help(skb
)))
623 hroom
= LL_RESERVED_SPACE(rt
->dst
.dev
);
624 if (skb_has_frag_list(skb
)) {
625 int first_len
= skb_pagelen(skb
);
626 struct sk_buff
*frag2
;
628 if (first_len
- hlen
> mtu
||
629 ((first_len
- hlen
) & 7) ||
631 skb_headroom(skb
) < (hroom
+ sizeof(struct frag_hdr
)))
634 skb_walk_frags(skb
, frag
) {
635 /* Correct geometry. */
636 if (frag
->len
> mtu
||
637 ((frag
->len
& 7) && frag
->next
) ||
638 skb_headroom(frag
) < (hlen
+ hroom
+ sizeof(struct frag_hdr
)))
639 goto slow_path_clean
;
641 /* Partially cloned skb? */
642 if (skb_shared(frag
))
643 goto slow_path_clean
;
648 frag
->destructor
= sock_wfree
;
650 skb
->truesize
-= frag
->truesize
;
657 *prevhdr
= NEXTHDR_FRAGMENT
;
658 tmp_hdr
= kmemdup(skb_network_header(skb
), hlen
, GFP_ATOMIC
);
663 frag
= skb_shinfo(skb
)->frag_list
;
664 skb_frag_list_init(skb
);
666 __skb_pull(skb
, hlen
);
667 fh
= (struct frag_hdr
*)__skb_push(skb
, sizeof(struct frag_hdr
));
668 __skb_push(skb
, hlen
);
669 skb_reset_network_header(skb
);
670 memcpy(skb_network_header(skb
), tmp_hdr
, hlen
);
672 fh
->nexthdr
= nexthdr
;
674 fh
->frag_off
= htons(IP6_MF
);
675 fh
->identification
= frag_id
;
677 first_len
= skb_pagelen(skb
);
678 skb
->data_len
= first_len
- skb_headlen(skb
);
679 skb
->len
= first_len
;
680 ipv6_hdr(skb
)->payload_len
= htons(first_len
-
681 sizeof(struct ipv6hdr
));
686 /* Prepare header of the next frame,
687 * before previous one went down. */
689 frag
->ip_summed
= CHECKSUM_NONE
;
690 skb_reset_transport_header(frag
);
691 fh
= (struct frag_hdr
*)__skb_push(frag
, sizeof(struct frag_hdr
));
692 __skb_push(frag
, hlen
);
693 skb_reset_network_header(frag
);
694 memcpy(skb_network_header(frag
), tmp_hdr
,
696 offset
+= skb
->len
- hlen
- sizeof(struct frag_hdr
);
697 fh
->nexthdr
= nexthdr
;
699 fh
->frag_off
= htons(offset
);
701 fh
->frag_off
|= htons(IP6_MF
);
702 fh
->identification
= frag_id
;
703 ipv6_hdr(frag
)->payload_len
=
705 sizeof(struct ipv6hdr
));
706 ip6_copy_metadata(frag
, skb
);
709 err
= output(net
, sk
, skb
);
711 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
712 IPSTATS_MIB_FRAGCREATES
);
725 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
726 IPSTATS_MIB_FRAGOKS
);
731 kfree_skb_list(frag
);
733 IP6_INC_STATS(net
, ip6_dst_idev(&rt
->dst
),
734 IPSTATS_MIB_FRAGFAILS
);
739 skb_walk_frags(skb
, frag2
) {
743 frag2
->destructor
= NULL
;
744 skb
->truesize
+= frag2
->truesize
;
749 left
= skb
->len
- hlen
; /* Space per frame */
750 ptr
= hlen
; /* Where to start from */
753 * Fragment the datagram.
756 troom
= rt
->dst
.dev
->needed_tailroom
;
759 * Keep copying data until we run out.
762 u8
*fragnexthdr_offset
;
765 /* IF: it doesn't fit, use 'mtu' - the data space left */
768 /* IF: we are not sending up to and including the packet end
769 then align the next start on an eight byte boundary */
774 /* Allocate buffer */
775 frag
= alloc_skb(len
+ hlen
+ sizeof(struct frag_hdr
) +
776 hroom
+ troom
, GFP_ATOMIC
);
783 * Set up data on packet
786 ip6_copy_metadata(frag
, skb
);
787 skb_reserve(frag
, hroom
);
788 skb_put(frag
, len
+ hlen
+ sizeof(struct frag_hdr
));
789 skb_reset_network_header(frag
);
790 fh
= (struct frag_hdr
*)(skb_network_header(frag
) + hlen
);
791 frag
->transport_header
= (frag
->network_header
+ hlen
+
792 sizeof(struct frag_hdr
));
795 * Charge the memory for the fragment to any owner
799 skb_set_owner_w(frag
, skb
->sk
);
802 * Copy the packet header into the new buffer.
804 skb_copy_from_linear_data(skb
, skb_network_header(frag
), hlen
);
806 fragnexthdr_offset
= skb_network_header(frag
);
807 fragnexthdr_offset
+= prevhdr
- skb_network_header(skb
);
808 *fragnexthdr_offset
= NEXTHDR_FRAGMENT
;
811 * Build fragment header.
813 fh
->nexthdr
= nexthdr
;
815 fh
->identification
= frag_id
;
818 * Copy a block of the IP datagram.
820 BUG_ON(skb_copy_bits(skb
, ptr
, skb_transport_header(frag
),
824 fh
->frag_off
= htons(offset
);
826 fh
->frag_off
|= htons(IP6_MF
);
827 ipv6_hdr(frag
)->payload_len
= htons(frag
->len
-
828 sizeof(struct ipv6hdr
));
834 * Put this fragment into the sending queue.
836 err
= output(net
, sk
, frag
);
840 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
841 IPSTATS_MIB_FRAGCREATES
);
843 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
844 IPSTATS_MIB_FRAGOKS
);
849 if (skb
->sk
&& dst_allfrag(skb_dst(skb
)))
850 sk_nocaps_add(skb
->sk
, NETIF_F_GSO_MASK
);
852 skb
->dev
= skb_dst(skb
)->dev
;
853 icmpv6_send(skb
, ICMPV6_PKT_TOOBIG
, 0, mtu
);
857 IP6_INC_STATS(net
, ip6_dst_idev(skb_dst(skb
)),
858 IPSTATS_MIB_FRAGFAILS
);
863 static inline int ip6_rt_check(const struct rt6key
*rt_key
,
864 const struct in6_addr
*fl_addr
,
865 const struct in6_addr
*addr_cache
)
867 return (rt_key
->plen
!= 128 || !ipv6_addr_equal(fl_addr
, &rt_key
->addr
)) &&
868 (!addr_cache
|| !ipv6_addr_equal(fl_addr
, addr_cache
));
871 static struct dst_entry
*ip6_sk_dst_check(struct sock
*sk
,
872 struct dst_entry
*dst
,
873 const struct flowi6
*fl6
)
875 struct ipv6_pinfo
*np
= inet6_sk(sk
);
881 if (dst
->ops
->family
!= AF_INET6
) {
886 rt
= (struct rt6_info
*)dst
;
887 /* Yes, checking route validity in not connected
888 * case is not very simple. Take into account,
889 * that we do not support routing by source, TOS,
890 * and MSG_DONTROUTE --ANK (980726)
892 * 1. ip6_rt_check(): If route was host route,
893 * check that cached destination is current.
894 * If it is network route, we still may
895 * check its validity using saved pointer
896 * to the last used address: daddr_cache.
897 * We do not want to save whole address now,
898 * (because main consumer of this service
899 * is tcp, which has not this problem),
900 * so that the last trick works only on connected
902 * 2. oif also should be the same.
904 if (ip6_rt_check(&rt
->rt6i_dst
, &fl6
->daddr
, np
->daddr_cache
) ||
905 #ifdef CONFIG_IPV6_SUBTREES
906 ip6_rt_check(&rt
->rt6i_src
, &fl6
->saddr
, np
->saddr_cache
) ||
908 (!(fl6
->flowi6_flags
& FLOWI_FLAG_SKIP_NH_OIF
) &&
909 (fl6
->flowi6_oif
&& fl6
->flowi6_oif
!= dst
->dev
->ifindex
))) {
918 static int ip6_dst_lookup_tail(struct net
*net
, const struct sock
*sk
,
919 struct dst_entry
**dst
, struct flowi6
*fl6
)
921 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
928 /* The correct way to handle this would be to do
929 * ip6_route_get_saddr, and then ip6_route_output; however,
930 * the route-specific preferred source forces the
931 * ip6_route_output call _before_ ip6_route_get_saddr.
933 * In source specific routing (no src=any default route),
934 * ip6_route_output will fail given src=any saddr, though, so
935 * that's why we try it again later.
937 if (ipv6_addr_any(&fl6
->saddr
) && (!*dst
|| !(*dst
)->error
)) {
939 bool had_dst
= *dst
!= NULL
;
942 *dst
= ip6_route_output(net
, sk
, fl6
);
943 rt
= (*dst
)->error
? NULL
: (struct rt6_info
*)*dst
;
944 err
= ip6_route_get_saddr(net
, rt
, &fl6
->daddr
,
945 sk
? inet6_sk(sk
)->srcprefs
: 0,
948 goto out_err_release
;
950 /* If we had an erroneous initial result, pretend it
951 * never existed and let the SA-enabled version take
954 if (!had_dst
&& (*dst
)->error
) {
960 flags
|= RT6_LOOKUP_F_IFACE
;
964 *dst
= ip6_route_output_flags(net
, sk
, fl6
, flags
);
968 goto out_err_release
;
970 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
972 * Here if the dst entry we've looked up
973 * has a neighbour entry that is in the INCOMPLETE
974 * state and the src address from the flow is
975 * marked as OPTIMISTIC, we release the found
976 * dst entry and replace it instead with the
977 * dst entry of the nexthop router
979 rt
= (struct rt6_info
*) *dst
;
981 n
= __ipv6_neigh_lookup_noref(rt
->dst
.dev
,
982 rt6_nexthop(rt
, &fl6
->daddr
));
983 err
= n
&& !(n
->nud_state
& NUD_VALID
) ? -EINVAL
: 0;
984 rcu_read_unlock_bh();
987 struct inet6_ifaddr
*ifp
;
988 struct flowi6 fl_gw6
;
991 ifp
= ipv6_get_ifaddr(net
, &fl6
->saddr
,
994 redirect
= (ifp
&& ifp
->flags
& IFA_F_OPTIMISTIC
);
1000 * We need to get the dst entry for the
1001 * default router instead
1004 memcpy(&fl_gw6
, fl6
, sizeof(struct flowi6
));
1005 memset(&fl_gw6
.daddr
, 0, sizeof(struct in6_addr
));
1006 *dst
= ip6_route_output(net
, sk
, &fl_gw6
);
1007 err
= (*dst
)->error
;
1009 goto out_err_release
;
1013 if (ipv6_addr_v4mapped(&fl6
->saddr
) &&
1014 !(ipv6_addr_v4mapped(&fl6
->daddr
) || ipv6_addr_any(&fl6
->daddr
))) {
1015 err
= -EAFNOSUPPORT
;
1016 goto out_err_release
;
1022 if (err
== -ENETUNREACH
)
1023 IP6_INC_STATS(net
, NULL
, IPSTATS_MIB_OUTNOROUTES
);
1030 * ip6_dst_lookup - perform route lookup on flow
1031 * @sk: socket which provides route info
1032 * @dst: pointer to dst_entry * for result
1033 * @fl6: flow to lookup
1035 * This function performs a route lookup on the given flow.
1037 * It returns zero on success, or a standard errno code on error.
1039 int ip6_dst_lookup(struct net
*net
, struct sock
*sk
, struct dst_entry
**dst
,
1043 return ip6_dst_lookup_tail(net
, sk
, dst
, fl6
);
1045 EXPORT_SYMBOL_GPL(ip6_dst_lookup
);
1048 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1049 * @sk: socket which provides route info
1050 * @fl6: flow to lookup
1051 * @final_dst: final destination address for ipsec lookup
1053 * This function performs a route lookup on the given flow.
1055 * It returns a valid dst pointer on success, or a pointer encoded
1058 struct dst_entry
*ip6_dst_lookup_flow(const struct sock
*sk
, struct flowi6
*fl6
,
1059 const struct in6_addr
*final_dst
)
1061 struct dst_entry
*dst
= NULL
;
1064 err
= ip6_dst_lookup_tail(sock_net(sk
), sk
, &dst
, fl6
);
1066 return ERR_PTR(err
);
1068 fl6
->daddr
= *final_dst
;
1069 if (!fl6
->flowi6_oif
)
1070 fl6
->flowi6_oif
= l3mdev_fib_oif(dst
->dev
);
1072 return xfrm_lookup_route(sock_net(sk
), dst
, flowi6_to_flowi(fl6
), sk
, 0);
1074 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow
);
1077 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1078 * @sk: socket which provides the dst cache and route info
1079 * @fl6: flow to lookup
1080 * @final_dst: final destination address for ipsec lookup
1082 * This function performs a route lookup on the given flow with the
1083 * possibility of using the cached route in the socket if it is valid.
1084 * It will take the socket dst lock when operating on the dst cache.
1085 * As a result, this function can only be used in process context.
1087 * It returns a valid dst pointer on success, or a pointer encoded
1090 struct dst_entry
*ip6_sk_dst_lookup_flow(struct sock
*sk
, struct flowi6
*fl6
,
1091 const struct in6_addr
*final_dst
)
1093 struct dst_entry
*dst
= sk_dst_check(sk
, inet6_sk(sk
)->dst_cookie
);
1095 dst
= ip6_sk_dst_check(sk
, dst
, fl6
);
1097 dst
= ip6_dst_lookup_flow(sk
, fl6
, final_dst
);
1101 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow
);
1103 static inline int ip6_ufo_append_data(struct sock
*sk
,
1104 struct sk_buff_head
*queue
,
1105 int getfrag(void *from
, char *to
, int offset
, int len
,
1106 int odd
, struct sk_buff
*skb
),
1107 void *from
, int length
, int hh_len
, int fragheaderlen
,
1108 int exthdrlen
, int transhdrlen
, int mtu
,
1109 unsigned int flags
, const struct flowi6
*fl6
)
1112 struct sk_buff
*skb
;
1115 /* There is support for UDP large send offload by network
1116 * device, so create one single skb packet containing complete
1119 skb
= skb_peek_tail(queue
);
1121 skb
= sock_alloc_send_skb(sk
,
1122 hh_len
+ fragheaderlen
+ transhdrlen
+ 20,
1123 (flags
& MSG_DONTWAIT
), &err
);
1127 /* reserve space for Hardware header */
1128 skb_reserve(skb
, hh_len
);
1130 /* create space for UDP/IP header */
1131 skb_put(skb
, fragheaderlen
+ transhdrlen
);
1133 /* initialize network header pointer */
1134 skb_set_network_header(skb
, exthdrlen
);
1136 /* initialize protocol header pointer */
1137 skb
->transport_header
= skb
->network_header
+ fragheaderlen
;
1139 skb
->protocol
= htons(ETH_P_IPV6
);
1142 __skb_queue_tail(queue
, skb
);
1143 } else if (skb_is_gso(skb
)) {
1147 skb
->ip_summed
= CHECKSUM_PARTIAL
;
1148 /* Specify the length of each IPv6 datagram fragment.
1149 * It has to be a multiple of 8.
1151 skb_shinfo(skb
)->gso_size
= (mtu
- fragheaderlen
-
1152 sizeof(struct frag_hdr
)) & ~7;
1153 skb_shinfo(skb
)->gso_type
= SKB_GSO_UDP
;
1154 skb_shinfo(skb
)->ip6_frag_id
= ipv6_select_ident(sock_net(sk
),
1159 return skb_append_datato_frags(sk
, skb
, getfrag
, from
,
1160 (length
- transhdrlen
));
1163 static inline struct ipv6_opt_hdr
*ip6_opt_dup(struct ipv6_opt_hdr
*src
,
1166 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1169 static inline struct ipv6_rt_hdr
*ip6_rthdr_dup(struct ipv6_rt_hdr
*src
,
1172 return src
? kmemdup(src
, (src
->hdrlen
+ 1) * 8, gfp
) : NULL
;
1175 static void ip6_append_data_mtu(unsigned int *mtu
,
1177 unsigned int fragheaderlen
,
1178 struct sk_buff
*skb
,
1179 struct rt6_info
*rt
,
1180 unsigned int orig_mtu
)
1182 if (!(rt
->dst
.flags
& DST_XFRM_TUNNEL
)) {
1184 /* first fragment, reserve header_len */
1185 *mtu
= orig_mtu
- rt
->dst
.header_len
;
1189 * this fragment is not first, the headers
1190 * space is regarded as data space.
1194 *maxfraglen
= ((*mtu
- fragheaderlen
) & ~7)
1195 + fragheaderlen
- sizeof(struct frag_hdr
);
1199 static int ip6_setup_cork(struct sock
*sk
, struct inet_cork_full
*cork
,
1200 struct inet6_cork
*v6_cork
,
1201 int hlimit
, int tclass
, struct ipv6_txoptions
*opt
,
1202 struct rt6_info
*rt
, struct flowi6
*fl6
)
1204 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1211 if (WARN_ON(v6_cork
->opt
))
1214 v6_cork
->opt
= kzalloc(sizeof(*opt
), sk
->sk_allocation
);
1215 if (unlikely(!v6_cork
->opt
))
1218 v6_cork
->opt
->tot_len
= sizeof(*opt
);
1219 v6_cork
->opt
->opt_flen
= opt
->opt_flen
;
1220 v6_cork
->opt
->opt_nflen
= opt
->opt_nflen
;
1222 v6_cork
->opt
->dst0opt
= ip6_opt_dup(opt
->dst0opt
,
1224 if (opt
->dst0opt
&& !v6_cork
->opt
->dst0opt
)
1227 v6_cork
->opt
->dst1opt
= ip6_opt_dup(opt
->dst1opt
,
1229 if (opt
->dst1opt
&& !v6_cork
->opt
->dst1opt
)
1232 v6_cork
->opt
->hopopt
= ip6_opt_dup(opt
->hopopt
,
1234 if (opt
->hopopt
&& !v6_cork
->opt
->hopopt
)
1237 v6_cork
->opt
->srcrt
= ip6_rthdr_dup(opt
->srcrt
,
1239 if (opt
->srcrt
&& !v6_cork
->opt
->srcrt
)
1242 /* need source address above miyazawa*/
1245 cork
->base
.dst
= &rt
->dst
;
1246 cork
->fl
.u
.ip6
= *fl6
;
1247 v6_cork
->hop_limit
= hlimit
;
1248 v6_cork
->tclass
= tclass
;
1249 if (rt
->dst
.flags
& DST_XFRM_TUNNEL
)
1250 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1251 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(&rt
->dst
);
1253 mtu
= np
->pmtudisc
>= IPV6_PMTUDISC_PROBE
?
1254 READ_ONCE(rt
->dst
.dev
->mtu
) : dst_mtu(rt
->dst
.path
);
1255 if (np
->frag_size
< mtu
) {
1257 mtu
= np
->frag_size
;
1259 if (mtu
< IPV6_MIN_MTU
)
1261 cork
->base
.fragsize
= mtu
;
1262 if (dst_allfrag(rt
->dst
.path
))
1263 cork
->base
.flags
|= IPCORK_ALLFRAG
;
1264 cork
->base
.length
= 0;
1269 static int __ip6_append_data(struct sock
*sk
,
1271 struct sk_buff_head
*queue
,
1272 struct inet_cork
*cork
,
1273 struct inet6_cork
*v6_cork
,
1274 struct page_frag
*pfrag
,
1275 int getfrag(void *from
, char *to
, int offset
,
1276 int len
, int odd
, struct sk_buff
*skb
),
1277 void *from
, int length
, int transhdrlen
,
1278 unsigned int flags
, int dontfrag
)
1280 struct sk_buff
*skb
, *skb_prev
= NULL
;
1281 unsigned int maxfraglen
, fragheaderlen
, mtu
, orig_mtu
, pmtu
;
1283 int dst_exthdrlen
= 0;
1290 struct rt6_info
*rt
= (struct rt6_info
*)cork
->dst
;
1291 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1292 int csummode
= CHECKSUM_NONE
;
1293 unsigned int maxnonfragsize
, headersize
;
1295 skb
= skb_peek_tail(queue
);
1297 exthdrlen
= opt
? opt
->opt_flen
: 0;
1298 dst_exthdrlen
= rt
->dst
.header_len
- rt
->rt6i_nfheader_len
;
1301 mtu
= cork
->fragsize
;
1304 hh_len
= LL_RESERVED_SPACE(rt
->dst
.dev
);
1306 fragheaderlen
= sizeof(struct ipv6hdr
) + rt
->rt6i_nfheader_len
+
1307 (opt
? opt
->opt_nflen
: 0);
1308 maxfraglen
= ((mtu
- fragheaderlen
) & ~7) + fragheaderlen
-
1309 sizeof(struct frag_hdr
);
1311 headersize
= sizeof(struct ipv6hdr
) +
1312 (opt
? opt
->opt_flen
+ opt
->opt_nflen
: 0) +
1313 (dst_allfrag(&rt
->dst
) ?
1314 sizeof(struct frag_hdr
) : 0) +
1315 rt
->rt6i_nfheader_len
;
1317 /* as per RFC 7112 section 5, the entire IPv6 Header Chain must fit
1318 * the first fragment
1320 if (headersize
+ transhdrlen
> mtu
)
1323 if (cork
->length
+ length
> mtu
- headersize
&& dontfrag
&&
1324 (sk
->sk_protocol
== IPPROTO_UDP
||
1325 sk
->sk_protocol
== IPPROTO_RAW
)) {
1326 ipv6_local_rxpmtu(sk
, fl6
, mtu
- headersize
+
1327 sizeof(struct ipv6hdr
));
1331 if (ip6_sk_ignore_df(sk
))
1332 maxnonfragsize
= sizeof(struct ipv6hdr
) + IPV6_MAXPLEN
;
1334 maxnonfragsize
= mtu
;
1336 if (cork
->length
+ length
> maxnonfragsize
- headersize
) {
1338 pmtu
= max_t(int, mtu
- headersize
+ sizeof(struct ipv6hdr
), 0);
1339 ipv6_local_error(sk
, EMSGSIZE
, fl6
, pmtu
);
1343 /* CHECKSUM_PARTIAL only with no extension headers and when
1344 * we are not going to fragment
1346 if (transhdrlen
&& sk
->sk_protocol
== IPPROTO_UDP
&&
1347 headersize
== sizeof(struct ipv6hdr
) &&
1348 length
< mtu
- headersize
&&
1349 !(flags
& MSG_MORE
) &&
1350 rt
->dst
.dev
->features
& NETIF_F_V6_CSUM
)
1351 csummode
= CHECKSUM_PARTIAL
;
1353 if (sk
->sk_type
== SOCK_DGRAM
|| sk
->sk_type
== SOCK_RAW
) {
1354 sock_tx_timestamp(sk
, &tx_flags
);
1355 if (tx_flags
& SKBTX_ANY_SW_TSTAMP
&&
1356 sk
->sk_tsflags
& SOF_TIMESTAMPING_OPT_ID
)
1357 tskey
= sk
->sk_tskey
++;
1361 * Let's try using as much space as possible.
1362 * Use MTU if total length of the message fits into the MTU.
1363 * Otherwise, we need to reserve fragment header and
1364 * fragment alignment (= 8-15 octects, in total).
1366 * Note that we may need to "move" the data from the tail of
1367 * of the buffer to the new fragment when we split
1370 * FIXME: It may be fragmented into multiple chunks
1371 * at once if non-fragmentable extension headers
1376 cork
->length
+= length
;
1377 if ((skb
&& skb_is_gso(skb
)) ||
1378 (((length
+ (skb
? skb
->len
: headersize
)) > mtu
) &&
1379 (skb_queue_len(queue
) <= 1) &&
1380 (sk
->sk_protocol
== IPPROTO_UDP
) &&
1381 (rt
->dst
.dev
->features
& NETIF_F_UFO
) &&
1382 (sk
->sk_type
== SOCK_DGRAM
) && !udp_get_no_check6_tx(sk
))) {
1383 err
= ip6_ufo_append_data(sk
, queue
, getfrag
, from
, length
,
1384 hh_len
, fragheaderlen
, exthdrlen
,
1385 transhdrlen
, mtu
, flags
, fl6
);
1394 while (length
> 0) {
1395 /* Check if the remaining data fits into current packet. */
1396 copy
= (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - skb
->len
;
1398 copy
= maxfraglen
- skb
->len
;
1402 unsigned int datalen
;
1403 unsigned int fraglen
;
1404 unsigned int fraggap
;
1405 unsigned int alloclen
;
1407 /* There's no room in the current skb */
1409 fraggap
= skb
->len
- maxfraglen
;
1412 /* update mtu and maxfraglen if necessary */
1413 if (!skb
|| !skb_prev
)
1414 ip6_append_data_mtu(&mtu
, &maxfraglen
,
1415 fragheaderlen
, skb
, rt
,
1421 * If remaining data exceeds the mtu,
1422 * we know we need more fragment(s).
1424 datalen
= length
+ fraggap
;
1426 if (datalen
> (cork
->length
<= mtu
&& !(cork
->flags
& IPCORK_ALLFRAG
) ? mtu
: maxfraglen
) - fragheaderlen
)
1427 datalen
= maxfraglen
- fragheaderlen
- rt
->dst
.trailer_len
;
1428 if ((flags
& MSG_MORE
) &&
1429 !(rt
->dst
.dev
->features
&NETIF_F_SG
))
1432 alloclen
= datalen
+ fragheaderlen
;
1434 alloclen
+= dst_exthdrlen
;
1436 if (datalen
!= length
+ fraggap
) {
1438 * this is not the last fragment, the trailer
1439 * space is regarded as data space.
1441 datalen
+= rt
->dst
.trailer_len
;
1444 alloclen
+= rt
->dst
.trailer_len
;
1445 fraglen
= datalen
+ fragheaderlen
;
1448 * We just reserve space for fragment header.
1449 * Note: this may be overallocation if the message
1450 * (without MSG_MORE) fits into the MTU.
1452 alloclen
+= sizeof(struct frag_hdr
);
1454 copy
= datalen
- transhdrlen
- fraggap
;
1460 skb
= sock_alloc_send_skb(sk
,
1462 (flags
& MSG_DONTWAIT
), &err
);
1465 if (atomic_read(&sk
->sk_wmem_alloc
) <=
1467 skb
= sock_wmalloc(sk
,
1468 alloclen
+ hh_len
, 1,
1476 * Fill in the control structures
1478 skb
->protocol
= htons(ETH_P_IPV6
);
1479 skb
->ip_summed
= csummode
;
1481 /* reserve for fragmentation and ipsec header */
1482 skb_reserve(skb
, hh_len
+ sizeof(struct frag_hdr
) +
1485 /* Only the initial fragment is time stamped */
1486 skb_shinfo(skb
)->tx_flags
= tx_flags
;
1488 skb_shinfo(skb
)->tskey
= tskey
;
1492 * Find where to start putting bytes
1494 data
= skb_put(skb
, fraglen
);
1495 skb_set_network_header(skb
, exthdrlen
);
1496 data
+= fragheaderlen
;
1497 skb
->transport_header
= (skb
->network_header
+
1500 skb
->csum
= skb_copy_and_csum_bits(
1501 skb_prev
, maxfraglen
,
1502 data
+ transhdrlen
, fraggap
, 0);
1503 skb_prev
->csum
= csum_sub(skb_prev
->csum
,
1506 pskb_trim_unique(skb_prev
, maxfraglen
);
1509 getfrag(from
, data
+ transhdrlen
, offset
,
1510 copy
, fraggap
, skb
) < 0) {
1517 length
-= datalen
- fraggap
;
1523 * Put the packet on the pending queue
1525 __skb_queue_tail(queue
, skb
);
1532 if (!(rt
->dst
.dev
->features
&NETIF_F_SG
) &&
1533 skb_tailroom(skb
) >= copy
) {
1537 if (getfrag(from
, skb_put(skb
, copy
),
1538 offset
, copy
, off
, skb
) < 0) {
1539 __skb_trim(skb
, off
);
1544 int i
= skb_shinfo(skb
)->nr_frags
;
1547 if (!sk_page_frag_refill(sk
, pfrag
))
1550 if (!skb_can_coalesce(skb
, i
, pfrag
->page
,
1553 if (i
== MAX_SKB_FRAGS
)
1556 __skb_fill_page_desc(skb
, i
, pfrag
->page
,
1558 skb_shinfo(skb
)->nr_frags
= ++i
;
1559 get_page(pfrag
->page
);
1561 copy
= min_t(int, copy
, pfrag
->size
- pfrag
->offset
);
1563 page_address(pfrag
->page
) + pfrag
->offset
,
1564 offset
, copy
, skb
->len
, skb
) < 0)
1567 pfrag
->offset
+= copy
;
1568 skb_frag_size_add(&skb_shinfo(skb
)->frags
[i
- 1], copy
);
1570 skb
->data_len
+= copy
;
1571 skb
->truesize
+= copy
;
1572 atomic_add(copy
, &sk
->sk_wmem_alloc
);
1583 cork
->length
-= length
;
1584 IP6_INC_STATS(sock_net(sk
), rt
->rt6i_idev
, IPSTATS_MIB_OUTDISCARDS
);
1588 int ip6_append_data(struct sock
*sk
,
1589 int getfrag(void *from
, char *to
, int offset
, int len
,
1590 int odd
, struct sk_buff
*skb
),
1591 void *from
, int length
, int transhdrlen
, int hlimit
,
1592 int tclass
, struct ipv6_txoptions
*opt
, struct flowi6
*fl6
,
1593 struct rt6_info
*rt
, unsigned int flags
, int dontfrag
)
1595 struct inet_sock
*inet
= inet_sk(sk
);
1596 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1600 if (flags
&MSG_PROBE
)
1602 if (skb_queue_empty(&sk
->sk_write_queue
)) {
1606 err
= ip6_setup_cork(sk
, &inet
->cork
, &np
->cork
, hlimit
,
1607 tclass
, opt
, rt
, fl6
);
1611 exthdrlen
= (opt
? opt
->opt_flen
: 0);
1612 length
+= exthdrlen
;
1613 transhdrlen
+= exthdrlen
;
1615 fl6
= &inet
->cork
.fl
.u
.ip6
;
1619 return __ip6_append_data(sk
, fl6
, &sk
->sk_write_queue
, &inet
->cork
.base
,
1620 &np
->cork
, sk_page_frag(sk
), getfrag
,
1621 from
, length
, transhdrlen
, flags
, dontfrag
);
1623 EXPORT_SYMBOL_GPL(ip6_append_data
);
1625 static void ip6_cork_release(struct inet_cork_full
*cork
,
1626 struct inet6_cork
*v6_cork
)
1629 kfree(v6_cork
->opt
->dst0opt
);
1630 kfree(v6_cork
->opt
->dst1opt
);
1631 kfree(v6_cork
->opt
->hopopt
);
1632 kfree(v6_cork
->opt
->srcrt
);
1633 kfree(v6_cork
->opt
);
1634 v6_cork
->opt
= NULL
;
1637 if (cork
->base
.dst
) {
1638 dst_release(cork
->base
.dst
);
1639 cork
->base
.dst
= NULL
;
1640 cork
->base
.flags
&= ~IPCORK_ALLFRAG
;
1642 memset(&cork
->fl
, 0, sizeof(cork
->fl
));
1645 struct sk_buff
*__ip6_make_skb(struct sock
*sk
,
1646 struct sk_buff_head
*queue
,
1647 struct inet_cork_full
*cork
,
1648 struct inet6_cork
*v6_cork
)
1650 struct sk_buff
*skb
, *tmp_skb
;
1651 struct sk_buff
**tail_skb
;
1652 struct in6_addr final_dst_buf
, *final_dst
= &final_dst_buf
;
1653 struct ipv6_pinfo
*np
= inet6_sk(sk
);
1654 struct net
*net
= sock_net(sk
);
1655 struct ipv6hdr
*hdr
;
1656 struct ipv6_txoptions
*opt
= v6_cork
->opt
;
1657 struct rt6_info
*rt
= (struct rt6_info
*)cork
->base
.dst
;
1658 struct flowi6
*fl6
= &cork
->fl
.u
.ip6
;
1659 unsigned char proto
= fl6
->flowi6_proto
;
1661 skb
= __skb_dequeue(queue
);
1664 tail_skb
= &(skb_shinfo(skb
)->frag_list
);
1666 /* move skb->data to ip header from ext header */
1667 if (skb
->data
< skb_network_header(skb
))
1668 __skb_pull(skb
, skb_network_offset(skb
));
1669 while ((tmp_skb
= __skb_dequeue(queue
)) != NULL
) {
1670 __skb_pull(tmp_skb
, skb_network_header_len(skb
));
1671 *tail_skb
= tmp_skb
;
1672 tail_skb
= &(tmp_skb
->next
);
1673 skb
->len
+= tmp_skb
->len
;
1674 skb
->data_len
+= tmp_skb
->len
;
1675 skb
->truesize
+= tmp_skb
->truesize
;
1676 tmp_skb
->destructor
= NULL
;
1680 /* Allow local fragmentation. */
1681 skb
->ignore_df
= ip6_sk_ignore_df(sk
);
1683 *final_dst
= fl6
->daddr
;
1684 __skb_pull(skb
, skb_network_header_len(skb
));
1685 if (opt
&& opt
->opt_flen
)
1686 ipv6_push_frag_opts(skb
, opt
, &proto
);
1687 if (opt
&& opt
->opt_nflen
)
1688 ipv6_push_nfrag_opts(skb
, opt
, &proto
, &final_dst
);
1690 skb_push(skb
, sizeof(struct ipv6hdr
));
1691 skb_reset_network_header(skb
);
1692 hdr
= ipv6_hdr(skb
);
1694 ip6_flow_hdr(hdr
, v6_cork
->tclass
,
1695 ip6_make_flowlabel(net
, skb
, fl6
->flowlabel
,
1696 ip6_autoflowlabel(net
, np
), fl6
));
1697 hdr
->hop_limit
= v6_cork
->hop_limit
;
1698 hdr
->nexthdr
= proto
;
1699 hdr
->saddr
= fl6
->saddr
;
1700 hdr
->daddr
= *final_dst
;
1702 skb
->priority
= sk
->sk_priority
;
1703 skb
->mark
= sk
->sk_mark
;
1705 skb_dst_set(skb
, dst_clone(&rt
->dst
));
1706 IP6_UPD_PO_STATS(net
, rt
->rt6i_idev
, IPSTATS_MIB_OUT
, skb
->len
);
1707 if (proto
== IPPROTO_ICMPV6
) {
1708 struct inet6_dev
*idev
= ip6_dst_idev(skb_dst(skb
));
1710 ICMP6MSGOUT_INC_STATS(net
, idev
, icmp6_hdr(skb
)->icmp6_type
);
1711 ICMP6_INC_STATS(net
, idev
, ICMP6_MIB_OUTMSGS
);
1714 ip6_cork_release(cork
, v6_cork
);
1719 int ip6_send_skb(struct sk_buff
*skb
)
1721 struct net
*net
= sock_net(skb
->sk
);
1722 struct rt6_info
*rt
= (struct rt6_info
*)skb_dst(skb
);
1725 err
= ip6_local_out(net
, skb
->sk
, skb
);
1728 err
= net_xmit_errno(err
);
1730 IP6_INC_STATS(net
, rt
->rt6i_idev
,
1731 IPSTATS_MIB_OUTDISCARDS
);
1737 int ip6_push_pending_frames(struct sock
*sk
)
1739 struct sk_buff
*skb
;
1741 skb
= ip6_finish_skb(sk
);
1745 return ip6_send_skb(skb
);
1747 EXPORT_SYMBOL_GPL(ip6_push_pending_frames
);
1749 static void __ip6_flush_pending_frames(struct sock
*sk
,
1750 struct sk_buff_head
*queue
,
1751 struct inet_cork_full
*cork
,
1752 struct inet6_cork
*v6_cork
)
1754 struct sk_buff
*skb
;
1756 while ((skb
= __skb_dequeue_tail(queue
)) != NULL
) {
1758 IP6_INC_STATS(sock_net(sk
), ip6_dst_idev(skb_dst(skb
)),
1759 IPSTATS_MIB_OUTDISCARDS
);
1763 ip6_cork_release(cork
, v6_cork
);
1766 void ip6_flush_pending_frames(struct sock
*sk
)
1768 __ip6_flush_pending_frames(sk
, &sk
->sk_write_queue
,
1769 &inet_sk(sk
)->cork
, &inet6_sk(sk
)->cork
);
1771 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames
);
1773 struct sk_buff
*ip6_make_skb(struct sock
*sk
,
1774 int getfrag(void *from
, char *to
, int offset
,
1775 int len
, int odd
, struct sk_buff
*skb
),
1776 void *from
, int length
, int transhdrlen
,
1777 int hlimit
, int tclass
,
1778 struct ipv6_txoptions
*opt
, struct flowi6
*fl6
,
1779 struct rt6_info
*rt
, unsigned int flags
,
1782 struct inet_cork_full cork
;
1783 struct inet6_cork v6_cork
;
1784 struct sk_buff_head queue
;
1785 int exthdrlen
= (opt
? opt
->opt_flen
: 0);
1788 if (flags
& MSG_PROBE
)
1791 __skb_queue_head_init(&queue
);
1793 cork
.base
.flags
= 0;
1795 cork
.base
.opt
= NULL
;
1796 cork
.base
.dst
= NULL
;
1798 err
= ip6_setup_cork(sk
, &cork
, &v6_cork
, hlimit
, tclass
, opt
, rt
, fl6
);
1800 ip6_cork_release(&cork
, &v6_cork
);
1801 return ERR_PTR(err
);
1805 dontfrag
= inet6_sk(sk
)->dontfrag
;
1807 err
= __ip6_append_data(sk
, fl6
, &queue
, &cork
.base
, &v6_cork
,
1808 ¤t
->task_frag
, getfrag
, from
,
1809 length
+ exthdrlen
, transhdrlen
+ exthdrlen
,
1812 __ip6_flush_pending_frames(sk
, &queue
, &cork
, &v6_cork
);
1813 return ERR_PTR(err
);
1816 return __ip6_make_skb(sk
, &queue
, &cork
, &v6_cork
);