checkpatch: warn on uapi #includes that #include <uapi/...
[linux/fpc-iii.git] / net / ipv6 / ip6_output.c
blob5552d13ae92f8554c04ec912b82317dfeef065a5
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
45 #include <net/sock.h>
46 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 int __ip6_local_out(struct sk_buff *skb)
63 int len;
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
74 int ip6_local_out(struct sk_buff *skb)
76 int err;
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
82 return err;
84 EXPORT_SYMBOL_GPL(ip6_local_out);
86 static int ip6_finish_output2(struct sk_buff *skb)
88 struct dst_entry *dst = skb_dst(skb);
89 struct net_device *dev = dst->dev;
90 struct neighbour *neigh;
91 struct rt6_info *rt;
93 skb->protocol = htons(ETH_P_IPV6);
94 skb->dev = dev;
96 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
97 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
99 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
100 ((mroute6_socket(dev_net(dev), skb) &&
101 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
102 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
103 &ipv6_hdr(skb)->saddr))) {
104 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
106 /* Do not check for IFF_ALLMULTI; multicast routing
107 is not supported in any case.
109 if (newskb)
110 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
111 newskb, NULL, newskb->dev,
112 dev_loopback_xmit);
114 if (ipv6_hdr(skb)->hop_limit == 0) {
115 IP6_INC_STATS(dev_net(dev), idev,
116 IPSTATS_MIB_OUTDISCARDS);
117 kfree_skb(skb);
118 return 0;
122 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
123 skb->len);
126 rt = (struct rt6_info *) dst;
127 neigh = rt->n;
128 if (neigh)
129 return dst_neigh_output(dst, neigh, skb);
131 IP6_INC_STATS_BH(dev_net(dst->dev),
132 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
133 kfree_skb(skb);
134 return -EINVAL;
137 static int ip6_finish_output(struct sk_buff *skb)
139 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
140 dst_allfrag(skb_dst(skb)))
141 return ip6_fragment(skb, ip6_finish_output2);
142 else
143 return ip6_finish_output2(skb);
146 int ip6_output(struct sk_buff *skb)
148 struct net_device *dev = skb_dst(skb)->dev;
149 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
150 if (unlikely(idev->cnf.disable_ipv6)) {
151 IP6_INC_STATS(dev_net(dev), idev,
152 IPSTATS_MIB_OUTDISCARDS);
153 kfree_skb(skb);
154 return 0;
157 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
158 ip6_finish_output,
159 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
163 * xmit an sk_buff (used by TCP, SCTP and DCCP)
166 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
167 struct ipv6_txoptions *opt, int tclass)
169 struct net *net = sock_net(sk);
170 struct ipv6_pinfo *np = inet6_sk(sk);
171 struct in6_addr *first_hop = &fl6->daddr;
172 struct dst_entry *dst = skb_dst(skb);
173 struct ipv6hdr *hdr;
174 u8 proto = fl6->flowi6_proto;
175 int seg_len = skb->len;
176 int hlimit = -1;
177 u32 mtu;
179 if (opt) {
180 unsigned int head_room;
182 /* First: exthdrs may take lots of space (~8K for now)
183 MAX_HEADER is not enough.
185 head_room = opt->opt_nflen + opt->opt_flen;
186 seg_len += head_room;
187 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
189 if (skb_headroom(skb) < head_room) {
190 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
191 if (skb2 == NULL) {
192 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
193 IPSTATS_MIB_OUTDISCARDS);
194 kfree_skb(skb);
195 return -ENOBUFS;
197 consume_skb(skb);
198 skb = skb2;
199 skb_set_owner_w(skb, sk);
201 if (opt->opt_flen)
202 ipv6_push_frag_opts(skb, opt, &proto);
203 if (opt->opt_nflen)
204 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
207 skb_push(skb, sizeof(struct ipv6hdr));
208 skb_reset_network_header(skb);
209 hdr = ipv6_hdr(skb);
212 * Fill in the IPv6 header
214 if (np)
215 hlimit = np->hop_limit;
216 if (hlimit < 0)
217 hlimit = ip6_dst_hoplimit(dst);
219 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
221 hdr->payload_len = htons(seg_len);
222 hdr->nexthdr = proto;
223 hdr->hop_limit = hlimit;
225 hdr->saddr = fl6->saddr;
226 hdr->daddr = *first_hop;
228 skb->priority = sk->sk_priority;
229 skb->mark = sk->sk_mark;
231 mtu = dst_mtu(dst);
232 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
233 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
234 IPSTATS_MIB_OUT, skb->len);
235 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
236 dst->dev, dst_output);
239 net_dbg_ratelimited("IPv6: sending pkt_too_big to self\n");
240 skb->dev = dst->dev;
241 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
242 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
243 kfree_skb(skb);
244 return -EMSGSIZE;
247 EXPORT_SYMBOL(ip6_xmit);
250 * To avoid extra problems ND packets are send through this
251 * routine. It's code duplication but I really want to avoid
252 * extra checks since ipv6_build_header is used by TCP (which
253 * is for us performance critical)
256 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
257 const struct in6_addr *saddr, const struct in6_addr *daddr,
258 int proto, int len)
260 struct ipv6_pinfo *np = inet6_sk(sk);
261 struct ipv6hdr *hdr;
263 skb->protocol = htons(ETH_P_IPV6);
264 skb->dev = dev;
266 skb_reset_network_header(skb);
267 skb_put(skb, sizeof(struct ipv6hdr));
268 hdr = ipv6_hdr(skb);
270 *(__be32*)hdr = htonl(0x60000000);
272 hdr->payload_len = htons(len);
273 hdr->nexthdr = proto;
274 hdr->hop_limit = np->hop_limit;
276 hdr->saddr = *saddr;
277 hdr->daddr = *daddr;
279 return 0;
282 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
284 struct ip6_ra_chain *ra;
285 struct sock *last = NULL;
287 read_lock(&ip6_ra_lock);
288 for (ra = ip6_ra_chain; ra; ra = ra->next) {
289 struct sock *sk = ra->sk;
290 if (sk && ra->sel == sel &&
291 (!sk->sk_bound_dev_if ||
292 sk->sk_bound_dev_if == skb->dev->ifindex)) {
293 if (last) {
294 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
295 if (skb2)
296 rawv6_rcv(last, skb2);
298 last = sk;
302 if (last) {
303 rawv6_rcv(last, skb);
304 read_unlock(&ip6_ra_lock);
305 return 1;
307 read_unlock(&ip6_ra_lock);
308 return 0;
311 static int ip6_forward_proxy_check(struct sk_buff *skb)
313 struct ipv6hdr *hdr = ipv6_hdr(skb);
314 u8 nexthdr = hdr->nexthdr;
315 __be16 frag_off;
316 int offset;
318 if (ipv6_ext_hdr(nexthdr)) {
319 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
320 if (offset < 0)
321 return 0;
322 } else
323 offset = sizeof(struct ipv6hdr);
325 if (nexthdr == IPPROTO_ICMPV6) {
326 struct icmp6hdr *icmp6;
328 if (!pskb_may_pull(skb, (skb_network_header(skb) +
329 offset + 1 - skb->data)))
330 return 0;
332 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
334 switch (icmp6->icmp6_type) {
335 case NDISC_ROUTER_SOLICITATION:
336 case NDISC_ROUTER_ADVERTISEMENT:
337 case NDISC_NEIGHBOUR_SOLICITATION:
338 case NDISC_NEIGHBOUR_ADVERTISEMENT:
339 case NDISC_REDIRECT:
340 /* For reaction involving unicast neighbor discovery
341 * message destined to the proxied address, pass it to
342 * input function.
344 return 1;
345 default:
346 break;
351 * The proxying router can't forward traffic sent to a link-local
352 * address, so signal the sender and discard the packet. This
353 * behavior is clarified by the MIPv6 specification.
355 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
356 dst_link_failure(skb);
357 return -1;
360 return 0;
363 static inline int ip6_forward_finish(struct sk_buff *skb)
365 return dst_output(skb);
368 int ip6_forward(struct sk_buff *skb)
370 struct dst_entry *dst = skb_dst(skb);
371 struct ipv6hdr *hdr = ipv6_hdr(skb);
372 struct inet6_skb_parm *opt = IP6CB(skb);
373 struct net *net = dev_net(dst->dev);
374 u32 mtu;
376 if (net->ipv6.devconf_all->forwarding == 0)
377 goto error;
379 if (skb_warn_if_lro(skb))
380 goto drop;
382 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
383 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
384 goto drop;
387 if (skb->pkt_type != PACKET_HOST)
388 goto drop;
390 skb_forward_csum(skb);
393 * We DO NOT make any processing on
394 * RA packets, pushing them to user level AS IS
395 * without ane WARRANTY that application will be able
396 * to interpret them. The reason is that we
397 * cannot make anything clever here.
399 * We are not end-node, so that if packet contains
400 * AH/ESP, we cannot make anything.
401 * Defragmentation also would be mistake, RA packets
402 * cannot be fragmented, because there is no warranty
403 * that different fragments will go along one path. --ANK
405 if (opt->ra) {
406 u8 *ptr = skb_network_header(skb) + opt->ra;
407 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
408 return 0;
412 * check and decrement ttl
414 if (hdr->hop_limit <= 1) {
415 /* Force OUTPUT device used as source address */
416 skb->dev = dst->dev;
417 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
418 IP6_INC_STATS_BH(net,
419 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
421 kfree_skb(skb);
422 return -ETIMEDOUT;
425 /* XXX: idev->cnf.proxy_ndp? */
426 if (net->ipv6.devconf_all->proxy_ndp &&
427 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
428 int proxied = ip6_forward_proxy_check(skb);
429 if (proxied > 0)
430 return ip6_input(skb);
431 else if (proxied < 0) {
432 IP6_INC_STATS(net, ip6_dst_idev(dst),
433 IPSTATS_MIB_INDISCARDS);
434 goto drop;
438 if (!xfrm6_route_forward(skb)) {
439 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
440 goto drop;
442 dst = skb_dst(skb);
444 /* IPv6 specs say nothing about it, but it is clear that we cannot
445 send redirects to source routed frames.
446 We don't send redirects to frames decapsulated from IPsec.
448 if (skb->dev == dst->dev && opt->srcrt == 0 && !skb_sec_path(skb)) {
449 struct in6_addr *target = NULL;
450 struct inet_peer *peer;
451 struct rt6_info *rt;
454 * incoming and outgoing devices are the same
455 * send a redirect.
458 rt = (struct rt6_info *) dst;
459 if (rt->rt6i_flags & RTF_GATEWAY)
460 target = &rt->rt6i_gateway;
461 else
462 target = &hdr->daddr;
464 peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1);
466 /* Limit redirects both by destination (here)
467 and by source (inside ndisc_send_redirect)
469 if (inet_peer_xrlim_allow(peer, 1*HZ))
470 ndisc_send_redirect(skb, target);
471 if (peer)
472 inet_putpeer(peer);
473 } else {
474 int addrtype = ipv6_addr_type(&hdr->saddr);
476 /* This check is security critical. */
477 if (addrtype == IPV6_ADDR_ANY ||
478 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
479 goto error;
480 if (addrtype & IPV6_ADDR_LINKLOCAL) {
481 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
482 ICMPV6_NOT_NEIGHBOUR, 0);
483 goto error;
487 mtu = dst_mtu(dst);
488 if (mtu < IPV6_MIN_MTU)
489 mtu = IPV6_MIN_MTU;
491 if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) ||
492 (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) {
493 /* Again, force OUTPUT device used as source address */
494 skb->dev = dst->dev;
495 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
496 IP6_INC_STATS_BH(net,
497 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
498 IP6_INC_STATS_BH(net,
499 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
500 kfree_skb(skb);
501 return -EMSGSIZE;
504 if (skb_cow(skb, dst->dev->hard_header_len)) {
505 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
506 goto drop;
509 hdr = ipv6_hdr(skb);
511 /* Mangling hops number delayed to point after skb COW */
513 hdr->hop_limit--;
515 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
516 IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len);
517 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
518 ip6_forward_finish);
520 error:
521 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
522 drop:
523 kfree_skb(skb);
524 return -EINVAL;
527 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
529 to->pkt_type = from->pkt_type;
530 to->priority = from->priority;
531 to->protocol = from->protocol;
532 skb_dst_drop(to);
533 skb_dst_set(to, dst_clone(skb_dst(from)));
534 to->dev = from->dev;
535 to->mark = from->mark;
537 #ifdef CONFIG_NET_SCHED
538 to->tc_index = from->tc_index;
539 #endif
540 nf_copy(to, from);
541 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE)
542 to->nf_trace = from->nf_trace;
543 #endif
544 skb_copy_secmark(to, from);
547 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
549 struct sk_buff *frag;
550 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
551 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
552 struct ipv6hdr *tmp_hdr;
553 struct frag_hdr *fh;
554 unsigned int mtu, hlen, left, len;
555 int hroom, troom;
556 __be32 frag_id = 0;
557 int ptr, offset = 0, err=0;
558 u8 *prevhdr, nexthdr = 0;
559 struct net *net = dev_net(skb_dst(skb)->dev);
561 hlen = ip6_find_1stfragopt(skb, &prevhdr);
562 nexthdr = *prevhdr;
564 mtu = ip6_skb_dst_mtu(skb);
566 /* We must not fragment if the socket is set to force MTU discovery
567 * or if the skb it not generated by a local socket.
569 if (unlikely(!skb->local_df && skb->len > mtu) ||
570 (IP6CB(skb)->frag_max_size &&
571 IP6CB(skb)->frag_max_size > mtu)) {
572 if (skb->sk && dst_allfrag(skb_dst(skb)))
573 sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK);
575 skb->dev = skb_dst(skb)->dev;
576 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
577 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
578 IPSTATS_MIB_FRAGFAILS);
579 kfree_skb(skb);
580 return -EMSGSIZE;
583 if (np && np->frag_size < mtu) {
584 if (np->frag_size)
585 mtu = np->frag_size;
587 mtu -= hlen + sizeof(struct frag_hdr);
589 if (skb_has_frag_list(skb)) {
590 int first_len = skb_pagelen(skb);
591 struct sk_buff *frag2;
593 if (first_len - hlen > mtu ||
594 ((first_len - hlen) & 7) ||
595 skb_cloned(skb))
596 goto slow_path;
598 skb_walk_frags(skb, frag) {
599 /* Correct geometry. */
600 if (frag->len > mtu ||
601 ((frag->len & 7) && frag->next) ||
602 skb_headroom(frag) < hlen)
603 goto slow_path_clean;
605 /* Partially cloned skb? */
606 if (skb_shared(frag))
607 goto slow_path_clean;
609 BUG_ON(frag->sk);
610 if (skb->sk) {
611 frag->sk = skb->sk;
612 frag->destructor = sock_wfree;
614 skb->truesize -= frag->truesize;
617 err = 0;
618 offset = 0;
619 frag = skb_shinfo(skb)->frag_list;
620 skb_frag_list_init(skb);
621 /* BUILD HEADER */
623 *prevhdr = NEXTHDR_FRAGMENT;
624 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
625 if (!tmp_hdr) {
626 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
627 IPSTATS_MIB_FRAGFAILS);
628 return -ENOMEM;
631 __skb_pull(skb, hlen);
632 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
633 __skb_push(skb, hlen);
634 skb_reset_network_header(skb);
635 memcpy(skb_network_header(skb), tmp_hdr, hlen);
637 ipv6_select_ident(fh, rt);
638 fh->nexthdr = nexthdr;
639 fh->reserved = 0;
640 fh->frag_off = htons(IP6_MF);
641 frag_id = fh->identification;
643 first_len = skb_pagelen(skb);
644 skb->data_len = first_len - skb_headlen(skb);
645 skb->len = first_len;
646 ipv6_hdr(skb)->payload_len = htons(first_len -
647 sizeof(struct ipv6hdr));
649 dst_hold(&rt->dst);
651 for (;;) {
652 /* Prepare header of the next frame,
653 * before previous one went down. */
654 if (frag) {
655 frag->ip_summed = CHECKSUM_NONE;
656 skb_reset_transport_header(frag);
657 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
658 __skb_push(frag, hlen);
659 skb_reset_network_header(frag);
660 memcpy(skb_network_header(frag), tmp_hdr,
661 hlen);
662 offset += skb->len - hlen - sizeof(struct frag_hdr);
663 fh->nexthdr = nexthdr;
664 fh->reserved = 0;
665 fh->frag_off = htons(offset);
666 if (frag->next != NULL)
667 fh->frag_off |= htons(IP6_MF);
668 fh->identification = frag_id;
669 ipv6_hdr(frag)->payload_len =
670 htons(frag->len -
671 sizeof(struct ipv6hdr));
672 ip6_copy_metadata(frag, skb);
675 err = output(skb);
676 if(!err)
677 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
678 IPSTATS_MIB_FRAGCREATES);
680 if (err || !frag)
681 break;
683 skb = frag;
684 frag = skb->next;
685 skb->next = NULL;
688 kfree(tmp_hdr);
690 if (err == 0) {
691 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
692 IPSTATS_MIB_FRAGOKS);
693 ip6_rt_put(rt);
694 return 0;
697 while (frag) {
698 skb = frag->next;
699 kfree_skb(frag);
700 frag = skb;
703 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
704 IPSTATS_MIB_FRAGFAILS);
705 ip6_rt_put(rt);
706 return err;
708 slow_path_clean:
709 skb_walk_frags(skb, frag2) {
710 if (frag2 == frag)
711 break;
712 frag2->sk = NULL;
713 frag2->destructor = NULL;
714 skb->truesize += frag2->truesize;
718 slow_path:
719 if ((skb->ip_summed == CHECKSUM_PARTIAL) &&
720 skb_checksum_help(skb))
721 goto fail;
723 left = skb->len - hlen; /* Space per frame */
724 ptr = hlen; /* Where to start from */
727 * Fragment the datagram.
730 *prevhdr = NEXTHDR_FRAGMENT;
731 hroom = LL_RESERVED_SPACE(rt->dst.dev);
732 troom = rt->dst.dev->needed_tailroom;
735 * Keep copying data until we run out.
737 while(left > 0) {
738 len = left;
739 /* IF: it doesn't fit, use 'mtu' - the data space left */
740 if (len > mtu)
741 len = mtu;
742 /* IF: we are not sending up to and including the packet end
743 then align the next start on an eight byte boundary */
744 if (len < left) {
745 len &= ~7;
748 * Allocate buffer.
751 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
752 hroom + troom, GFP_ATOMIC)) == NULL) {
753 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
754 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
755 IPSTATS_MIB_FRAGFAILS);
756 err = -ENOMEM;
757 goto fail;
761 * Set up data on packet
764 ip6_copy_metadata(frag, skb);
765 skb_reserve(frag, hroom);
766 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
767 skb_reset_network_header(frag);
768 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
769 frag->transport_header = (frag->network_header + hlen +
770 sizeof(struct frag_hdr));
773 * Charge the memory for the fragment to any owner
774 * it might possess
776 if (skb->sk)
777 skb_set_owner_w(frag, skb->sk);
780 * Copy the packet header into the new buffer.
782 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
785 * Build fragment header.
787 fh->nexthdr = nexthdr;
788 fh->reserved = 0;
789 if (!frag_id) {
790 ipv6_select_ident(fh, rt);
791 frag_id = fh->identification;
792 } else
793 fh->identification = frag_id;
796 * Copy a block of the IP datagram.
798 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
799 BUG();
800 left -= len;
802 fh->frag_off = htons(offset);
803 if (left > 0)
804 fh->frag_off |= htons(IP6_MF);
805 ipv6_hdr(frag)->payload_len = htons(frag->len -
806 sizeof(struct ipv6hdr));
808 ptr += len;
809 offset += len;
812 * Put this fragment into the sending queue.
814 err = output(frag);
815 if (err)
816 goto fail;
818 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
819 IPSTATS_MIB_FRAGCREATES);
821 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
822 IPSTATS_MIB_FRAGOKS);
823 consume_skb(skb);
824 return err;
826 fail:
827 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
828 IPSTATS_MIB_FRAGFAILS);
829 kfree_skb(skb);
830 return err;
833 static inline int ip6_rt_check(const struct rt6key *rt_key,
834 const struct in6_addr *fl_addr,
835 const struct in6_addr *addr_cache)
837 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
838 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
841 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
842 struct dst_entry *dst,
843 const struct flowi6 *fl6)
845 struct ipv6_pinfo *np = inet6_sk(sk);
846 struct rt6_info *rt = (struct rt6_info *)dst;
848 if (!dst)
849 goto out;
851 /* Yes, checking route validity in not connected
852 * case is not very simple. Take into account,
853 * that we do not support routing by source, TOS,
854 * and MSG_DONTROUTE --ANK (980726)
856 * 1. ip6_rt_check(): If route was host route,
857 * check that cached destination is current.
858 * If it is network route, we still may
859 * check its validity using saved pointer
860 * to the last used address: daddr_cache.
861 * We do not want to save whole address now,
862 * (because main consumer of this service
863 * is tcp, which has not this problem),
864 * so that the last trick works only on connected
865 * sockets.
866 * 2. oif also should be the same.
868 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
869 #ifdef CONFIG_IPV6_SUBTREES
870 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
871 #endif
872 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
873 dst_release(dst);
874 dst = NULL;
877 out:
878 return dst;
881 static int ip6_dst_lookup_tail(struct sock *sk,
882 struct dst_entry **dst, struct flowi6 *fl6)
884 struct net *net = sock_net(sk);
885 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
886 struct neighbour *n;
887 struct rt6_info *rt;
888 #endif
889 int err;
891 if (*dst == NULL)
892 *dst = ip6_route_output(net, sk, fl6);
894 if ((err = (*dst)->error))
895 goto out_err_release;
897 if (ipv6_addr_any(&fl6->saddr)) {
898 struct rt6_info *rt = (struct rt6_info *) *dst;
899 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
900 sk ? inet6_sk(sk)->srcprefs : 0,
901 &fl6->saddr);
902 if (err)
903 goto out_err_release;
906 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
908 * Here if the dst entry we've looked up
909 * has a neighbour entry that is in the INCOMPLETE
910 * state and the src address from the flow is
911 * marked as OPTIMISTIC, we release the found
912 * dst entry and replace it instead with the
913 * dst entry of the nexthop router
915 rt = (struct rt6_info *) *dst;
916 n = rt->n;
917 if (n && !(n->nud_state & NUD_VALID)) {
918 struct inet6_ifaddr *ifp;
919 struct flowi6 fl_gw6;
920 int redirect;
922 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
923 (*dst)->dev, 1);
925 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
926 if (ifp)
927 in6_ifa_put(ifp);
929 if (redirect) {
931 * We need to get the dst entry for the
932 * default router instead
934 dst_release(*dst);
935 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
936 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
937 *dst = ip6_route_output(net, sk, &fl_gw6);
938 if ((err = (*dst)->error))
939 goto out_err_release;
942 #endif
944 return 0;
946 out_err_release:
947 if (err == -ENETUNREACH)
948 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
949 dst_release(*dst);
950 *dst = NULL;
951 return err;
955 * ip6_dst_lookup - perform route lookup on flow
956 * @sk: socket which provides route info
957 * @dst: pointer to dst_entry * for result
958 * @fl6: flow to lookup
960 * This function performs a route lookup on the given flow.
962 * It returns zero on success, or a standard errno code on error.
964 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
966 *dst = NULL;
967 return ip6_dst_lookup_tail(sk, dst, fl6);
969 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
972 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
973 * @sk: socket which provides route info
974 * @fl6: flow to lookup
975 * @final_dst: final destination address for ipsec lookup
976 * @can_sleep: we are in a sleepable context
978 * This function performs a route lookup on the given flow.
980 * It returns a valid dst pointer on success, or a pointer encoded
981 * error code.
983 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
984 const struct in6_addr *final_dst,
985 bool can_sleep)
987 struct dst_entry *dst = NULL;
988 int err;
990 err = ip6_dst_lookup_tail(sk, &dst, fl6);
991 if (err)
992 return ERR_PTR(err);
993 if (final_dst)
994 fl6->daddr = *final_dst;
995 if (can_sleep)
996 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
998 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1000 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1003 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1004 * @sk: socket which provides the dst cache and route info
1005 * @fl6: flow to lookup
1006 * @final_dst: final destination address for ipsec lookup
1007 * @can_sleep: we are in a sleepable context
1009 * This function performs a route lookup on the given flow with the
1010 * possibility of using the cached route in the socket if it is valid.
1011 * It will take the socket dst lock when operating on the dst cache.
1012 * As a result, this function can only be used in process context.
1014 * It returns a valid dst pointer on success, or a pointer encoded
1015 * error code.
1017 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1018 const struct in6_addr *final_dst,
1019 bool can_sleep)
1021 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1022 int err;
1024 dst = ip6_sk_dst_check(sk, dst, fl6);
1026 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1027 if (err)
1028 return ERR_PTR(err);
1029 if (final_dst)
1030 fl6->daddr = *final_dst;
1031 if (can_sleep)
1032 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1034 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1036 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1038 static inline int ip6_ufo_append_data(struct sock *sk,
1039 int getfrag(void *from, char *to, int offset, int len,
1040 int odd, struct sk_buff *skb),
1041 void *from, int length, int hh_len, int fragheaderlen,
1042 int transhdrlen, int mtu,unsigned int flags,
1043 struct rt6_info *rt)
1046 struct sk_buff *skb;
1047 int err;
1049 /* There is support for UDP large send offload by network
1050 * device, so create one single skb packet containing complete
1051 * udp datagram
1053 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1054 skb = sock_alloc_send_skb(sk,
1055 hh_len + fragheaderlen + transhdrlen + 20,
1056 (flags & MSG_DONTWAIT), &err);
1057 if (skb == NULL)
1058 return err;
1060 /* reserve space for Hardware header */
1061 skb_reserve(skb, hh_len);
1063 /* create space for UDP/IP header */
1064 skb_put(skb,fragheaderlen + transhdrlen);
1066 /* initialize network header pointer */
1067 skb_reset_network_header(skb);
1069 /* initialize protocol header pointer */
1070 skb->transport_header = skb->network_header + fragheaderlen;
1072 skb->ip_summed = CHECKSUM_PARTIAL;
1073 skb->csum = 0;
1076 err = skb_append_datato_frags(sk,skb, getfrag, from,
1077 (length - transhdrlen));
1078 if (!err) {
1079 struct frag_hdr fhdr;
1081 /* Specify the length of each IPv6 datagram fragment.
1082 * It has to be a multiple of 8.
1084 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1085 sizeof(struct frag_hdr)) & ~7;
1086 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1087 ipv6_select_ident(&fhdr, rt);
1088 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1089 __skb_queue_tail(&sk->sk_write_queue, skb);
1091 return 0;
1093 /* There is not enough support do UPD LSO,
1094 * so follow normal path
1096 kfree_skb(skb);
1098 return err;
1101 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1102 gfp_t gfp)
1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1107 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1108 gfp_t gfp)
1110 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1113 static void ip6_append_data_mtu(int *mtu,
1114 int *maxfraglen,
1115 unsigned int fragheaderlen,
1116 struct sk_buff *skb,
1117 struct rt6_info *rt)
1119 if (!(rt->dst.flags & DST_XFRM_TUNNEL)) {
1120 if (skb == NULL) {
1121 /* first fragment, reserve header_len */
1122 *mtu = *mtu - rt->dst.header_len;
1124 } else {
1126 * this fragment is not first, the headers
1127 * space is regarded as data space.
1129 *mtu = dst_mtu(rt->dst.path);
1131 *maxfraglen = ((*mtu - fragheaderlen) & ~7)
1132 + fragheaderlen - sizeof(struct frag_hdr);
1136 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1137 int offset, int len, int odd, struct sk_buff *skb),
1138 void *from, int length, int transhdrlen,
1139 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1140 struct rt6_info *rt, unsigned int flags, int dontfrag)
1142 struct inet_sock *inet = inet_sk(sk);
1143 struct ipv6_pinfo *np = inet6_sk(sk);
1144 struct inet_cork *cork;
1145 struct sk_buff *skb, *skb_prev = NULL;
1146 unsigned int maxfraglen, fragheaderlen;
1147 int exthdrlen;
1148 int dst_exthdrlen;
1149 int hh_len;
1150 int mtu;
1151 int copy;
1152 int err;
1153 int offset = 0;
1154 __u8 tx_flags = 0;
1156 if (flags&MSG_PROBE)
1157 return 0;
1158 cork = &inet->cork.base;
1159 if (skb_queue_empty(&sk->sk_write_queue)) {
1161 * setup for corking
1163 if (opt) {
1164 if (WARN_ON(np->cork.opt))
1165 return -EINVAL;
1167 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1168 if (unlikely(np->cork.opt == NULL))
1169 return -ENOBUFS;
1171 np->cork.opt->tot_len = opt->tot_len;
1172 np->cork.opt->opt_flen = opt->opt_flen;
1173 np->cork.opt->opt_nflen = opt->opt_nflen;
1175 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1176 sk->sk_allocation);
1177 if (opt->dst0opt && !np->cork.opt->dst0opt)
1178 return -ENOBUFS;
1180 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1181 sk->sk_allocation);
1182 if (opt->dst1opt && !np->cork.opt->dst1opt)
1183 return -ENOBUFS;
1185 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1186 sk->sk_allocation);
1187 if (opt->hopopt && !np->cork.opt->hopopt)
1188 return -ENOBUFS;
1190 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1191 sk->sk_allocation);
1192 if (opt->srcrt && !np->cork.opt->srcrt)
1193 return -ENOBUFS;
1195 /* need source address above miyazawa*/
1197 dst_hold(&rt->dst);
1198 cork->dst = &rt->dst;
1199 inet->cork.fl.u.ip6 = *fl6;
1200 np->cork.hop_limit = hlimit;
1201 np->cork.tclass = tclass;
1202 if (rt->dst.flags & DST_XFRM_TUNNEL)
1203 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1204 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1205 else
1206 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1207 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1208 if (np->frag_size < mtu) {
1209 if (np->frag_size)
1210 mtu = np->frag_size;
1212 cork->fragsize = mtu;
1213 if (dst_allfrag(rt->dst.path))
1214 cork->flags |= IPCORK_ALLFRAG;
1215 cork->length = 0;
1216 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1217 length += exthdrlen;
1218 transhdrlen += exthdrlen;
1219 dst_exthdrlen = rt->dst.header_len;
1220 } else {
1221 rt = (struct rt6_info *)cork->dst;
1222 fl6 = &inet->cork.fl.u.ip6;
1223 opt = np->cork.opt;
1224 transhdrlen = 0;
1225 exthdrlen = 0;
1226 dst_exthdrlen = 0;
1227 mtu = cork->fragsize;
1230 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1232 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1233 (opt ? opt->opt_nflen : 0);
1234 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1236 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1237 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1238 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1239 return -EMSGSIZE;
1243 /* For UDP, check if TX timestamp is enabled */
1244 if (sk->sk_type == SOCK_DGRAM) {
1245 err = sock_tx_timestamp(sk, &tx_flags);
1246 if (err)
1247 goto error;
1251 * Let's try using as much space as possible.
1252 * Use MTU if total length of the message fits into the MTU.
1253 * Otherwise, we need to reserve fragment header and
1254 * fragment alignment (= 8-15 octects, in total).
1256 * Note that we may need to "move" the data from the tail of
1257 * of the buffer to the new fragment when we split
1258 * the message.
1260 * FIXME: It may be fragmented into multiple chunks
1261 * at once if non-fragmentable extension headers
1262 * are too large.
1263 * --yoshfuji
1266 cork->length += length;
1267 if (length > mtu) {
1268 int proto = sk->sk_protocol;
1269 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1270 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1271 return -EMSGSIZE;
1274 if (proto == IPPROTO_UDP &&
1275 (rt->dst.dev->features & NETIF_F_UFO)) {
1277 err = ip6_ufo_append_data(sk, getfrag, from, length,
1278 hh_len, fragheaderlen,
1279 transhdrlen, mtu, flags, rt);
1280 if (err)
1281 goto error;
1282 return 0;
1286 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1287 goto alloc_new_skb;
1289 while (length > 0) {
1290 /* Check if the remaining data fits into current packet. */
1291 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1292 if (copy < length)
1293 copy = maxfraglen - skb->len;
1295 if (copy <= 0) {
1296 char *data;
1297 unsigned int datalen;
1298 unsigned int fraglen;
1299 unsigned int fraggap;
1300 unsigned int alloclen;
1301 alloc_new_skb:
1302 /* There's no room in the current skb */
1303 if (skb)
1304 fraggap = skb->len - maxfraglen;
1305 else
1306 fraggap = 0;
1307 /* update mtu and maxfraglen if necessary */
1308 if (skb == NULL || skb_prev == NULL)
1309 ip6_append_data_mtu(&mtu, &maxfraglen,
1310 fragheaderlen, skb, rt);
1312 skb_prev = skb;
1315 * If remaining data exceeds the mtu,
1316 * we know we need more fragment(s).
1318 datalen = length + fraggap;
1320 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1321 datalen = maxfraglen - fragheaderlen - rt->dst.trailer_len;
1322 if ((flags & MSG_MORE) &&
1323 !(rt->dst.dev->features&NETIF_F_SG))
1324 alloclen = mtu;
1325 else
1326 alloclen = datalen + fragheaderlen;
1328 alloclen += dst_exthdrlen;
1330 if (datalen != length + fraggap) {
1332 * this is not the last fragment, the trailer
1333 * space is regarded as data space.
1335 datalen += rt->dst.trailer_len;
1338 alloclen += rt->dst.trailer_len;
1339 fraglen = datalen + fragheaderlen;
1342 * We just reserve space for fragment header.
1343 * Note: this may be overallocation if the message
1344 * (without MSG_MORE) fits into the MTU.
1346 alloclen += sizeof(struct frag_hdr);
1348 if (transhdrlen) {
1349 skb = sock_alloc_send_skb(sk,
1350 alloclen + hh_len,
1351 (flags & MSG_DONTWAIT), &err);
1352 } else {
1353 skb = NULL;
1354 if (atomic_read(&sk->sk_wmem_alloc) <=
1355 2 * sk->sk_sndbuf)
1356 skb = sock_wmalloc(sk,
1357 alloclen + hh_len, 1,
1358 sk->sk_allocation);
1359 if (unlikely(skb == NULL))
1360 err = -ENOBUFS;
1361 else {
1362 /* Only the initial fragment
1363 * is time stamped.
1365 tx_flags = 0;
1368 if (skb == NULL)
1369 goto error;
1371 * Fill in the control structures
1373 skb->ip_summed = CHECKSUM_NONE;
1374 skb->csum = 0;
1375 /* reserve for fragmentation and ipsec header */
1376 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1377 dst_exthdrlen);
1379 if (sk->sk_type == SOCK_DGRAM)
1380 skb_shinfo(skb)->tx_flags = tx_flags;
1383 * Find where to start putting bytes
1385 data = skb_put(skb, fraglen);
1386 skb_set_network_header(skb, exthdrlen);
1387 data += fragheaderlen;
1388 skb->transport_header = (skb->network_header +
1389 fragheaderlen);
1390 if (fraggap) {
1391 skb->csum = skb_copy_and_csum_bits(
1392 skb_prev, maxfraglen,
1393 data + transhdrlen, fraggap, 0);
1394 skb_prev->csum = csum_sub(skb_prev->csum,
1395 skb->csum);
1396 data += fraggap;
1397 pskb_trim_unique(skb_prev, maxfraglen);
1399 copy = datalen - transhdrlen - fraggap;
1401 if (copy < 0) {
1402 err = -EINVAL;
1403 kfree_skb(skb);
1404 goto error;
1405 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1406 err = -EFAULT;
1407 kfree_skb(skb);
1408 goto error;
1411 offset += copy;
1412 length -= datalen - fraggap;
1413 transhdrlen = 0;
1414 exthdrlen = 0;
1415 dst_exthdrlen = 0;
1418 * Put the packet on the pending queue
1420 __skb_queue_tail(&sk->sk_write_queue, skb);
1421 continue;
1424 if (copy > length)
1425 copy = length;
1427 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1428 unsigned int off;
1430 off = skb->len;
1431 if (getfrag(from, skb_put(skb, copy),
1432 offset, copy, off, skb) < 0) {
1433 __skb_trim(skb, off);
1434 err = -EFAULT;
1435 goto error;
1437 } else {
1438 int i = skb_shinfo(skb)->nr_frags;
1439 struct page_frag *pfrag = sk_page_frag(sk);
1441 err = -ENOMEM;
1442 if (!sk_page_frag_refill(sk, pfrag))
1443 goto error;
1445 if (!skb_can_coalesce(skb, i, pfrag->page,
1446 pfrag->offset)) {
1447 err = -EMSGSIZE;
1448 if (i == MAX_SKB_FRAGS)
1449 goto error;
1451 __skb_fill_page_desc(skb, i, pfrag->page,
1452 pfrag->offset, 0);
1453 skb_shinfo(skb)->nr_frags = ++i;
1454 get_page(pfrag->page);
1456 copy = min_t(int, copy, pfrag->size - pfrag->offset);
1457 if (getfrag(from,
1458 page_address(pfrag->page) + pfrag->offset,
1459 offset, copy, skb->len, skb) < 0)
1460 goto error_efault;
1462 pfrag->offset += copy;
1463 skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy);
1464 skb->len += copy;
1465 skb->data_len += copy;
1466 skb->truesize += copy;
1467 atomic_add(copy, &sk->sk_wmem_alloc);
1469 offset += copy;
1470 length -= copy;
1473 return 0;
1475 error_efault:
1476 err = -EFAULT;
1477 error:
1478 cork->length -= length;
1479 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1480 return err;
1482 EXPORT_SYMBOL_GPL(ip6_append_data);
1484 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1486 if (np->cork.opt) {
1487 kfree(np->cork.opt->dst0opt);
1488 kfree(np->cork.opt->dst1opt);
1489 kfree(np->cork.opt->hopopt);
1490 kfree(np->cork.opt->srcrt);
1491 kfree(np->cork.opt);
1492 np->cork.opt = NULL;
1495 if (inet->cork.base.dst) {
1496 dst_release(inet->cork.base.dst);
1497 inet->cork.base.dst = NULL;
1498 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1500 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1503 int ip6_push_pending_frames(struct sock *sk)
1505 struct sk_buff *skb, *tmp_skb;
1506 struct sk_buff **tail_skb;
1507 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1508 struct inet_sock *inet = inet_sk(sk);
1509 struct ipv6_pinfo *np = inet6_sk(sk);
1510 struct net *net = sock_net(sk);
1511 struct ipv6hdr *hdr;
1512 struct ipv6_txoptions *opt = np->cork.opt;
1513 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1514 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1515 unsigned char proto = fl6->flowi6_proto;
1516 int err = 0;
1518 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1519 goto out;
1520 tail_skb = &(skb_shinfo(skb)->frag_list);
1522 /* move skb->data to ip header from ext header */
1523 if (skb->data < skb_network_header(skb))
1524 __skb_pull(skb, skb_network_offset(skb));
1525 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1526 __skb_pull(tmp_skb, skb_network_header_len(skb));
1527 *tail_skb = tmp_skb;
1528 tail_skb = &(tmp_skb->next);
1529 skb->len += tmp_skb->len;
1530 skb->data_len += tmp_skb->len;
1531 skb->truesize += tmp_skb->truesize;
1532 tmp_skb->destructor = NULL;
1533 tmp_skb->sk = NULL;
1536 /* Allow local fragmentation. */
1537 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1538 skb->local_df = 1;
1540 *final_dst = fl6->daddr;
1541 __skb_pull(skb, skb_network_header_len(skb));
1542 if (opt && opt->opt_flen)
1543 ipv6_push_frag_opts(skb, opt, &proto);
1544 if (opt && opt->opt_nflen)
1545 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1547 skb_push(skb, sizeof(struct ipv6hdr));
1548 skb_reset_network_header(skb);
1549 hdr = ipv6_hdr(skb);
1551 *(__be32*)hdr = fl6->flowlabel |
1552 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1554 hdr->hop_limit = np->cork.hop_limit;
1555 hdr->nexthdr = proto;
1556 hdr->saddr = fl6->saddr;
1557 hdr->daddr = *final_dst;
1559 skb->priority = sk->sk_priority;
1560 skb->mark = sk->sk_mark;
1562 skb_dst_set(skb, dst_clone(&rt->dst));
1563 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1564 if (proto == IPPROTO_ICMPV6) {
1565 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1567 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1568 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1571 err = ip6_local_out(skb);
1572 if (err) {
1573 if (err > 0)
1574 err = net_xmit_errno(err);
1575 if (err)
1576 goto error;
1579 out:
1580 ip6_cork_release(inet, np);
1581 return err;
1582 error:
1583 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1584 goto out;
1586 EXPORT_SYMBOL_GPL(ip6_push_pending_frames);
1588 void ip6_flush_pending_frames(struct sock *sk)
1590 struct sk_buff *skb;
1592 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1593 if (skb_dst(skb))
1594 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1595 IPSTATS_MIB_OUTDISCARDS);
1596 kfree_skb(skb);
1599 ip6_cork_release(inet_sk(sk), inet6_sk(sk));
1601 EXPORT_SYMBOL_GPL(ip6_flush_pending_frames);