Merge git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/driver-core-2.6
[linux/fpc-iii.git] / net / ipv6 / ip6_output.c
blobcd48801a8d6f465d546b61a2e9629bf6e2091714
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 int __ip6_local_out(struct sk_buff *skb)
62 int len;
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
73 int ip6_local_out(struct sk_buff *skb)
75 int err;
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
81 return err;
83 EXPORT_SYMBOL_GPL(ip6_local_out);
85 static int ip6_output_finish(struct sk_buff *skb)
87 struct dst_entry *dst = skb_dst(skb);
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
110 netif_rx(newskb);
111 return 0;
115 static int ip6_output2(struct sk_buff *skb)
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
166 int ip6_output(struct sk_buff *skb)
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
184 * xmit an sk_buff (used by TCP)
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit = -1;
198 int tclass = 0;
199 u32 mtu;
201 if (opt) {
202 unsigned int head_room;
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
213 if (skb2 == NULL) {
214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
217 return -ENOBUFS;
219 kfree_skb(skb);
220 skb = skb2;
221 if (sk)
222 skb_set_owner_w(skb, sk);
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
232 hdr = ipv6_hdr(skb);
234 /* Allow local fragmentation. */
235 if (ipfragok)
236 skb->local_df = 1;
239 * Fill in the IPv6 header
241 if (np) {
242 tclass = np->tclass;
243 hlimit = np->hop_limit;
245 if (hlimit < 0)
246 hlimit = ip6_dst_hoplimit(dst);
248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
257 skb->priority = sk->sk_priority;
258 skb->mark = sk->sk_mark;
260 mtu = dst_mtu(dst);
261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len);
264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
265 dst_output);
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
272 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
273 kfree_skb(skb);
274 return -EMSGSIZE;
277 EXPORT_SYMBOL(ip6_xmit);
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 const struct in6_addr *saddr, const struct in6_addr *daddr,
288 int proto, int len)
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
297 totlen = len + sizeof(struct ipv6hdr);
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
301 hdr = ipv6_hdr(skb);
303 *(__be32*)hdr = htonl(0x60000000);
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
312 return 0;
315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
331 last = sk;
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
340 read_unlock(&ip6_ra_lock);
341 return 0;
344 static int ip6_forward_proxy_check(struct sk_buff *skb)
346 struct ipv6hdr *hdr = ipv6_hdr(skb);
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
362 return 0;
364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
376 return 1;
377 default:
378 break;
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
392 return 0;
395 static inline int ip6_forward_finish(struct sk_buff *skb)
397 return dst_output(skb);
400 int ip6_forward(struct sk_buff *skb)
402 struct dst_entry *dst = skb_dst(skb);
403 struct ipv6hdr *hdr = ipv6_hdr(skb);
404 struct inet6_skb_parm *opt = IP6CB(skb);
405 struct net *net = dev_net(dst->dev);
407 if (net->ipv6.devconf_all->forwarding == 0)
408 goto error;
410 if (skb_warn_if_lro(skb))
411 goto drop;
413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
415 goto drop;
418 skb_forward_csum(skb);
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
433 if (opt->ra) {
434 u8 *ptr = skb_network_header(skb) + opt->ra;
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
440 * check and decrement ttl
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446 0, skb->dev);
447 IP6_INC_STATS_BH(net,
448 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
450 kfree_skb(skb);
451 return -ETIMEDOUT;
454 /* XXX: idev->cnf.proxy_ndp? */
455 if (net->ipv6.devconf_all->proxy_ndp &&
456 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
457 int proxied = ip6_forward_proxy_check(skb);
458 if (proxied > 0)
459 return ip6_input(skb);
460 else if (proxied < 0) {
461 IP6_INC_STATS(net, ip6_dst_idev(dst),
462 IPSTATS_MIB_INDISCARDS);
463 goto drop;
467 if (!xfrm6_route_forward(skb)) {
468 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
469 goto drop;
471 dst = skb_dst(skb);
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
475 We don't send redirects to frames decapsulated from IPsec.
477 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
478 !skb_sec_path(skb)) {
479 struct in6_addr *target = NULL;
480 struct rt6_info *rt;
481 struct neighbour *n = dst->neighbour;
484 * incoming and outgoing devices are the same
485 * send a redirect.
488 rt = (struct rt6_info *) dst;
489 if ((rt->rt6i_flags & RTF_GATEWAY))
490 target = (struct in6_addr*)&n->primary_key;
491 else
492 target = &hdr->daddr;
494 /* Limit redirects both by destination (here)
495 and by source (inside ndisc_send_redirect)
497 if (xrlim_allow(dst, 1*HZ))
498 ndisc_send_redirect(skb, n, target);
499 } else {
500 int addrtype = ipv6_addr_type(&hdr->saddr);
502 /* This check is security critical. */
503 if (addrtype == IPV6_ADDR_ANY ||
504 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
505 goto error;
506 if (addrtype & IPV6_ADDR_LINKLOCAL) {
507 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
508 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
509 goto error;
513 if (skb->len > dst_mtu(dst)) {
514 /* Again, force OUTPUT device used as source address */
515 skb->dev = dst->dev;
516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
517 IP6_INC_STATS_BH(net,
518 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
519 IP6_INC_STATS_BH(net,
520 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
521 kfree_skb(skb);
522 return -EMSGSIZE;
525 if (skb_cow(skb, dst->dev->hard_header_len)) {
526 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
527 goto drop;
530 hdr = ipv6_hdr(skb);
532 /* Mangling hops number delayed to point after skb COW */
534 hdr->hop_limit--;
536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
537 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
538 ip6_forward_finish);
540 error:
541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
542 drop:
543 kfree_skb(skb);
544 return -EINVAL;
547 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
549 to->pkt_type = from->pkt_type;
550 to->priority = from->priority;
551 to->protocol = from->protocol;
552 skb_dst_drop(to);
553 skb_dst_set(to, dst_clone(skb_dst(from)));
554 to->dev = from->dev;
555 to->mark = from->mark;
557 #ifdef CONFIG_NET_SCHED
558 to->tc_index = from->tc_index;
559 #endif
560 nf_copy(to, from);
561 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
562 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
563 to->nf_trace = from->nf_trace;
564 #endif
565 skb_copy_secmark(to, from);
568 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
570 u16 offset = sizeof(struct ipv6hdr);
571 struct ipv6_opt_hdr *exthdr =
572 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
573 unsigned int packet_len = skb->tail - skb->network_header;
574 int found_rhdr = 0;
575 *nexthdr = &ipv6_hdr(skb)->nexthdr;
577 while (offset + 1 <= packet_len) {
579 switch (**nexthdr) {
581 case NEXTHDR_HOP:
582 break;
583 case NEXTHDR_ROUTING:
584 found_rhdr = 1;
585 break;
586 case NEXTHDR_DEST:
587 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
588 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
589 break;
590 #endif
591 if (found_rhdr)
592 return offset;
593 break;
594 default :
595 return offset;
598 offset += ipv6_optlen(exthdr);
599 *nexthdr = &exthdr->nexthdr;
600 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
601 offset);
604 return offset;
607 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
609 struct sk_buff *frag;
610 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
611 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
612 struct ipv6hdr *tmp_hdr;
613 struct frag_hdr *fh;
614 unsigned int mtu, hlen, left, len;
615 __be32 frag_id = 0;
616 int ptr, offset = 0, err=0;
617 u8 *prevhdr, nexthdr = 0;
618 struct net *net = dev_net(skb_dst(skb)->dev);
620 hlen = ip6_find_1stfragopt(skb, &prevhdr);
621 nexthdr = *prevhdr;
623 mtu = ip6_skb_dst_mtu(skb);
625 /* We must not fragment if the socket is set to force MTU discovery
626 * or if the skb it not generated by a local socket. (This last
627 * check should be redundant, but it's free.)
629 if (!skb->local_df) {
630 skb->dev = skb_dst(skb)->dev;
631 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
632 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
633 IPSTATS_MIB_FRAGFAILS);
634 kfree_skb(skb);
635 return -EMSGSIZE;
638 if (np && np->frag_size < mtu) {
639 if (np->frag_size)
640 mtu = np->frag_size;
642 mtu -= hlen + sizeof(struct frag_hdr);
644 if (skb_has_frags(skb)) {
645 int first_len = skb_pagelen(skb);
646 int truesizes = 0;
648 if (first_len - hlen > mtu ||
649 ((first_len - hlen) & 7) ||
650 skb_cloned(skb))
651 goto slow_path;
653 skb_walk_frags(skb, frag) {
654 /* Correct geometry. */
655 if (frag->len > mtu ||
656 ((frag->len & 7) && frag->next) ||
657 skb_headroom(frag) < hlen)
658 goto slow_path;
660 /* Partially cloned skb? */
661 if (skb_shared(frag))
662 goto slow_path;
664 BUG_ON(frag->sk);
665 if (skb->sk) {
666 frag->sk = skb->sk;
667 frag->destructor = sock_wfree;
668 truesizes += frag->truesize;
672 err = 0;
673 offset = 0;
674 frag = skb_shinfo(skb)->frag_list;
675 skb_frag_list_init(skb);
676 /* BUILD HEADER */
678 *prevhdr = NEXTHDR_FRAGMENT;
679 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
680 if (!tmp_hdr) {
681 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
682 IPSTATS_MIB_FRAGFAILS);
683 return -ENOMEM;
686 __skb_pull(skb, hlen);
687 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
688 __skb_push(skb, hlen);
689 skb_reset_network_header(skb);
690 memcpy(skb_network_header(skb), tmp_hdr, hlen);
692 ipv6_select_ident(fh);
693 fh->nexthdr = nexthdr;
694 fh->reserved = 0;
695 fh->frag_off = htons(IP6_MF);
696 frag_id = fh->identification;
698 first_len = skb_pagelen(skb);
699 skb->data_len = first_len - skb_headlen(skb);
700 skb->truesize -= truesizes;
701 skb->len = first_len;
702 ipv6_hdr(skb)->payload_len = htons(first_len -
703 sizeof(struct ipv6hdr));
705 dst_hold(&rt->u.dst);
707 for (;;) {
708 /* Prepare header of the next frame,
709 * before previous one went down. */
710 if (frag) {
711 frag->ip_summed = CHECKSUM_NONE;
712 skb_reset_transport_header(frag);
713 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
714 __skb_push(frag, hlen);
715 skb_reset_network_header(frag);
716 memcpy(skb_network_header(frag), tmp_hdr,
717 hlen);
718 offset += skb->len - hlen - sizeof(struct frag_hdr);
719 fh->nexthdr = nexthdr;
720 fh->reserved = 0;
721 fh->frag_off = htons(offset);
722 if (frag->next != NULL)
723 fh->frag_off |= htons(IP6_MF);
724 fh->identification = frag_id;
725 ipv6_hdr(frag)->payload_len =
726 htons(frag->len -
727 sizeof(struct ipv6hdr));
728 ip6_copy_metadata(frag, skb);
731 err = output(skb);
732 if(!err)
733 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
734 IPSTATS_MIB_FRAGCREATES);
736 if (err || !frag)
737 break;
739 skb = frag;
740 frag = skb->next;
741 skb->next = NULL;
744 kfree(tmp_hdr);
746 if (err == 0) {
747 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
748 IPSTATS_MIB_FRAGOKS);
749 dst_release(&rt->u.dst);
750 return 0;
753 while (frag) {
754 skb = frag->next;
755 kfree_skb(frag);
756 frag = skb;
759 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
760 IPSTATS_MIB_FRAGFAILS);
761 dst_release(&rt->u.dst);
762 return err;
765 slow_path:
766 left = skb->len - hlen; /* Space per frame */
767 ptr = hlen; /* Where to start from */
770 * Fragment the datagram.
773 *prevhdr = NEXTHDR_FRAGMENT;
776 * Keep copying data until we run out.
778 while(left > 0) {
779 len = left;
780 /* IF: it doesn't fit, use 'mtu' - the data space left */
781 if (len > mtu)
782 len = mtu;
783 /* IF: we are not sending upto and including the packet end
784 then align the next start on an eight byte boundary */
785 if (len < left) {
786 len &= ~7;
789 * Allocate buffer.
792 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
793 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
794 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
795 IPSTATS_MIB_FRAGFAILS);
796 err = -ENOMEM;
797 goto fail;
801 * Set up data on packet
804 ip6_copy_metadata(frag, skb);
805 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
806 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
807 skb_reset_network_header(frag);
808 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
809 frag->transport_header = (frag->network_header + hlen +
810 sizeof(struct frag_hdr));
813 * Charge the memory for the fragment to any owner
814 * it might possess
816 if (skb->sk)
817 skb_set_owner_w(frag, skb->sk);
820 * Copy the packet header into the new buffer.
822 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
825 * Build fragment header.
827 fh->nexthdr = nexthdr;
828 fh->reserved = 0;
829 if (!frag_id) {
830 ipv6_select_ident(fh);
831 frag_id = fh->identification;
832 } else
833 fh->identification = frag_id;
836 * Copy a block of the IP datagram.
838 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
839 BUG();
840 left -= len;
842 fh->frag_off = htons(offset);
843 if (left > 0)
844 fh->frag_off |= htons(IP6_MF);
845 ipv6_hdr(frag)->payload_len = htons(frag->len -
846 sizeof(struct ipv6hdr));
848 ptr += len;
849 offset += len;
852 * Put this fragment into the sending queue.
854 err = output(frag);
855 if (err)
856 goto fail;
858 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
859 IPSTATS_MIB_FRAGCREATES);
861 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
862 IPSTATS_MIB_FRAGOKS);
863 kfree_skb(skb);
864 return err;
866 fail:
867 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
868 IPSTATS_MIB_FRAGFAILS);
869 kfree_skb(skb);
870 return err;
873 static inline int ip6_rt_check(struct rt6key *rt_key,
874 struct in6_addr *fl_addr,
875 struct in6_addr *addr_cache)
877 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
878 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
881 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
882 struct dst_entry *dst,
883 struct flowi *fl)
885 struct ipv6_pinfo *np = inet6_sk(sk);
886 struct rt6_info *rt = (struct rt6_info *)dst;
888 if (!dst)
889 goto out;
891 /* Yes, checking route validity in not connected
892 * case is not very simple. Take into account,
893 * that we do not support routing by source, TOS,
894 * and MSG_DONTROUTE --ANK (980726)
896 * 1. ip6_rt_check(): If route was host route,
897 * check that cached destination is current.
898 * If it is network route, we still may
899 * check its validity using saved pointer
900 * to the last used address: daddr_cache.
901 * We do not want to save whole address now,
902 * (because main consumer of this service
903 * is tcp, which has not this problem),
904 * so that the last trick works only on connected
905 * sockets.
906 * 2. oif also should be the same.
908 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
909 #ifdef CONFIG_IPV6_SUBTREES
910 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
911 #endif
912 (fl->oif && fl->oif != dst->dev->ifindex)) {
913 dst_release(dst);
914 dst = NULL;
917 out:
918 return dst;
921 static int ip6_dst_lookup_tail(struct sock *sk,
922 struct dst_entry **dst, struct flowi *fl)
924 int err;
925 struct net *net = sock_net(sk);
927 if (*dst == NULL)
928 *dst = ip6_route_output(net, sk, fl);
930 if ((err = (*dst)->error))
931 goto out_err_release;
933 if (ipv6_addr_any(&fl->fl6_src)) {
934 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
935 &fl->fl6_dst,
936 sk ? inet6_sk(sk)->srcprefs : 0,
937 &fl->fl6_src);
938 if (err)
939 goto out_err_release;
942 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
944 * Here if the dst entry we've looked up
945 * has a neighbour entry that is in the INCOMPLETE
946 * state and the src address from the flow is
947 * marked as OPTIMISTIC, we release the found
948 * dst entry and replace it instead with the
949 * dst entry of the nexthop router
951 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
952 struct inet6_ifaddr *ifp;
953 struct flowi fl_gw;
954 int redirect;
956 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
957 (*dst)->dev, 1);
959 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
960 if (ifp)
961 in6_ifa_put(ifp);
963 if (redirect) {
965 * We need to get the dst entry for the
966 * default router instead
968 dst_release(*dst);
969 memcpy(&fl_gw, fl, sizeof(struct flowi));
970 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
971 *dst = ip6_route_output(net, sk, &fl_gw);
972 if ((err = (*dst)->error))
973 goto out_err_release;
976 #endif
978 return 0;
980 out_err_release:
981 if (err == -ENETUNREACH)
982 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
983 dst_release(*dst);
984 *dst = NULL;
985 return err;
989 * ip6_dst_lookup - perform route lookup on flow
990 * @sk: socket which provides route info
991 * @dst: pointer to dst_entry * for result
992 * @fl: flow to lookup
994 * This function performs a route lookup on the given flow.
996 * It returns zero on success, or a standard errno code on error.
998 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1000 *dst = NULL;
1001 return ip6_dst_lookup_tail(sk, dst, fl);
1003 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1006 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1007 * @sk: socket which provides the dst cache and route info
1008 * @dst: pointer to dst_entry * for result
1009 * @fl: flow to lookup
1011 * This function performs a route lookup on the given flow with the
1012 * possibility of using the cached route in the socket if it is valid.
1013 * It will take the socket dst lock when operating on the dst cache.
1014 * As a result, this function can only be used in process context.
1016 * It returns zero on success, or a standard errno code on error.
1018 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1020 *dst = NULL;
1021 if (sk) {
1022 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1023 *dst = ip6_sk_dst_check(sk, *dst, fl);
1026 return ip6_dst_lookup_tail(sk, dst, fl);
1028 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1030 static inline int ip6_ufo_append_data(struct sock *sk,
1031 int getfrag(void *from, char *to, int offset, int len,
1032 int odd, struct sk_buff *skb),
1033 void *from, int length, int hh_len, int fragheaderlen,
1034 int transhdrlen, int mtu,unsigned int flags)
1037 struct sk_buff *skb;
1038 int err;
1040 /* There is support for UDP large send offload by network
1041 * device, so create one single skb packet containing complete
1042 * udp datagram
1044 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1045 skb = sock_alloc_send_skb(sk,
1046 hh_len + fragheaderlen + transhdrlen + 20,
1047 (flags & MSG_DONTWAIT), &err);
1048 if (skb == NULL)
1049 return -ENOMEM;
1051 /* reserve space for Hardware header */
1052 skb_reserve(skb, hh_len);
1054 /* create space for UDP/IP header */
1055 skb_put(skb,fragheaderlen + transhdrlen);
1057 /* initialize network header pointer */
1058 skb_reset_network_header(skb);
1060 /* initialize protocol header pointer */
1061 skb->transport_header = skb->network_header + fragheaderlen;
1063 skb->ip_summed = CHECKSUM_PARTIAL;
1064 skb->csum = 0;
1065 sk->sk_sndmsg_off = 0;
1068 err = skb_append_datato_frags(sk,skb, getfrag, from,
1069 (length - transhdrlen));
1070 if (!err) {
1071 struct frag_hdr fhdr;
1073 /* Specify the length of each IPv6 datagram fragment.
1074 * It has to be a multiple of 8.
1076 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1077 sizeof(struct frag_hdr)) & ~7;
1078 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1079 ipv6_select_ident(&fhdr);
1080 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1081 __skb_queue_tail(&sk->sk_write_queue, skb);
1083 return 0;
1085 /* There is not enough support do UPD LSO,
1086 * so follow normal path
1088 kfree_skb(skb);
1090 return err;
1093 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1094 gfp_t gfp)
1096 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1099 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1100 gfp_t gfp)
1102 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1105 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1106 int offset, int len, int odd, struct sk_buff *skb),
1107 void *from, int length, int transhdrlen,
1108 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1109 struct rt6_info *rt, unsigned int flags)
1111 struct inet_sock *inet = inet_sk(sk);
1112 struct ipv6_pinfo *np = inet6_sk(sk);
1113 struct sk_buff *skb;
1114 unsigned int maxfraglen, fragheaderlen;
1115 int exthdrlen;
1116 int hh_len;
1117 int mtu;
1118 int copy;
1119 int err;
1120 int offset = 0;
1121 int csummode = CHECKSUM_NONE;
1123 if (flags&MSG_PROBE)
1124 return 0;
1125 if (skb_queue_empty(&sk->sk_write_queue)) {
1127 * setup for corking
1129 if (opt) {
1130 if (WARN_ON(np->cork.opt))
1131 return -EINVAL;
1133 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1134 if (unlikely(np->cork.opt == NULL))
1135 return -ENOBUFS;
1137 np->cork.opt->tot_len = opt->tot_len;
1138 np->cork.opt->opt_flen = opt->opt_flen;
1139 np->cork.opt->opt_nflen = opt->opt_nflen;
1141 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1142 sk->sk_allocation);
1143 if (opt->dst0opt && !np->cork.opt->dst0opt)
1144 return -ENOBUFS;
1146 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1147 sk->sk_allocation);
1148 if (opt->dst1opt && !np->cork.opt->dst1opt)
1149 return -ENOBUFS;
1151 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1152 sk->sk_allocation);
1153 if (opt->hopopt && !np->cork.opt->hopopt)
1154 return -ENOBUFS;
1156 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1157 sk->sk_allocation);
1158 if (opt->srcrt && !np->cork.opt->srcrt)
1159 return -ENOBUFS;
1161 /* need source address above miyazawa*/
1163 dst_hold(&rt->u.dst);
1164 inet->cork.dst = &rt->u.dst;
1165 inet->cork.fl = *fl;
1166 np->cork.hop_limit = hlimit;
1167 np->cork.tclass = tclass;
1168 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1169 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1170 if (np->frag_size < mtu) {
1171 if (np->frag_size)
1172 mtu = np->frag_size;
1174 inet->cork.fragsize = mtu;
1175 if (dst_allfrag(rt->u.dst.path))
1176 inet->cork.flags |= IPCORK_ALLFRAG;
1177 inet->cork.length = 0;
1178 sk->sk_sndmsg_page = NULL;
1179 sk->sk_sndmsg_off = 0;
1180 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1181 rt->rt6i_nfheader_len;
1182 length += exthdrlen;
1183 transhdrlen += exthdrlen;
1184 } else {
1185 rt = (struct rt6_info *)inet->cork.dst;
1186 fl = &inet->cork.fl;
1187 opt = np->cork.opt;
1188 transhdrlen = 0;
1189 exthdrlen = 0;
1190 mtu = inet->cork.fragsize;
1193 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1195 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1196 (opt ? opt->opt_nflen : 0);
1197 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1199 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1200 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1201 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1202 return -EMSGSIZE;
1207 * Let's try using as much space as possible.
1208 * Use MTU if total length of the message fits into the MTU.
1209 * Otherwise, we need to reserve fragment header and
1210 * fragment alignment (= 8-15 octects, in total).
1212 * Note that we may need to "move" the data from the tail of
1213 * of the buffer to the new fragment when we split
1214 * the message.
1216 * FIXME: It may be fragmented into multiple chunks
1217 * at once if non-fragmentable extension headers
1218 * are too large.
1219 * --yoshfuji
1222 inet->cork.length += length;
1223 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1224 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1226 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1227 fragheaderlen, transhdrlen, mtu,
1228 flags);
1229 if (err)
1230 goto error;
1231 return 0;
1234 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1235 goto alloc_new_skb;
1237 while (length > 0) {
1238 /* Check if the remaining data fits into current packet. */
1239 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1240 if (copy < length)
1241 copy = maxfraglen - skb->len;
1243 if (copy <= 0) {
1244 char *data;
1245 unsigned int datalen;
1246 unsigned int fraglen;
1247 unsigned int fraggap;
1248 unsigned int alloclen;
1249 struct sk_buff *skb_prev;
1250 alloc_new_skb:
1251 skb_prev = skb;
1253 /* There's no room in the current skb */
1254 if (skb_prev)
1255 fraggap = skb_prev->len - maxfraglen;
1256 else
1257 fraggap = 0;
1260 * If remaining data exceeds the mtu,
1261 * we know we need more fragment(s).
1263 datalen = length + fraggap;
1264 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1265 datalen = maxfraglen - fragheaderlen;
1267 fraglen = datalen + fragheaderlen;
1268 if ((flags & MSG_MORE) &&
1269 !(rt->u.dst.dev->features&NETIF_F_SG))
1270 alloclen = mtu;
1271 else
1272 alloclen = datalen + fragheaderlen;
1275 * The last fragment gets additional space at tail.
1276 * Note: we overallocate on fragments with MSG_MODE
1277 * because we have no idea if we're the last one.
1279 if (datalen == length + fraggap)
1280 alloclen += rt->u.dst.trailer_len;
1283 * We just reserve space for fragment header.
1284 * Note: this may be overallocation if the message
1285 * (without MSG_MORE) fits into the MTU.
1287 alloclen += sizeof(struct frag_hdr);
1289 if (transhdrlen) {
1290 skb = sock_alloc_send_skb(sk,
1291 alloclen + hh_len,
1292 (flags & MSG_DONTWAIT), &err);
1293 } else {
1294 skb = NULL;
1295 if (atomic_read(&sk->sk_wmem_alloc) <=
1296 2 * sk->sk_sndbuf)
1297 skb = sock_wmalloc(sk,
1298 alloclen + hh_len, 1,
1299 sk->sk_allocation);
1300 if (unlikely(skb == NULL))
1301 err = -ENOBUFS;
1303 if (skb == NULL)
1304 goto error;
1306 * Fill in the control structures
1308 skb->ip_summed = csummode;
1309 skb->csum = 0;
1310 /* reserve for fragmentation */
1311 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1314 * Find where to start putting bytes
1316 data = skb_put(skb, fraglen);
1317 skb_set_network_header(skb, exthdrlen);
1318 data += fragheaderlen;
1319 skb->transport_header = (skb->network_header +
1320 fragheaderlen);
1321 if (fraggap) {
1322 skb->csum = skb_copy_and_csum_bits(
1323 skb_prev, maxfraglen,
1324 data + transhdrlen, fraggap, 0);
1325 skb_prev->csum = csum_sub(skb_prev->csum,
1326 skb->csum);
1327 data += fraggap;
1328 pskb_trim_unique(skb_prev, maxfraglen);
1330 copy = datalen - transhdrlen - fraggap;
1331 if (copy < 0) {
1332 err = -EINVAL;
1333 kfree_skb(skb);
1334 goto error;
1335 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1336 err = -EFAULT;
1337 kfree_skb(skb);
1338 goto error;
1341 offset += copy;
1342 length -= datalen - fraggap;
1343 transhdrlen = 0;
1344 exthdrlen = 0;
1345 csummode = CHECKSUM_NONE;
1348 * Put the packet on the pending queue
1350 __skb_queue_tail(&sk->sk_write_queue, skb);
1351 continue;
1354 if (copy > length)
1355 copy = length;
1357 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1358 unsigned int off;
1360 off = skb->len;
1361 if (getfrag(from, skb_put(skb, copy),
1362 offset, copy, off, skb) < 0) {
1363 __skb_trim(skb, off);
1364 err = -EFAULT;
1365 goto error;
1367 } else {
1368 int i = skb_shinfo(skb)->nr_frags;
1369 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1370 struct page *page = sk->sk_sndmsg_page;
1371 int off = sk->sk_sndmsg_off;
1372 unsigned int left;
1374 if (page && (left = PAGE_SIZE - off) > 0) {
1375 if (copy >= left)
1376 copy = left;
1377 if (page != frag->page) {
1378 if (i == MAX_SKB_FRAGS) {
1379 err = -EMSGSIZE;
1380 goto error;
1382 get_page(page);
1383 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1384 frag = &skb_shinfo(skb)->frags[i];
1386 } else if(i < MAX_SKB_FRAGS) {
1387 if (copy > PAGE_SIZE)
1388 copy = PAGE_SIZE;
1389 page = alloc_pages(sk->sk_allocation, 0);
1390 if (page == NULL) {
1391 err = -ENOMEM;
1392 goto error;
1394 sk->sk_sndmsg_page = page;
1395 sk->sk_sndmsg_off = 0;
1397 skb_fill_page_desc(skb, i, page, 0, 0);
1398 frag = &skb_shinfo(skb)->frags[i];
1399 } else {
1400 err = -EMSGSIZE;
1401 goto error;
1403 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1404 err = -EFAULT;
1405 goto error;
1407 sk->sk_sndmsg_off += copy;
1408 frag->size += copy;
1409 skb->len += copy;
1410 skb->data_len += copy;
1411 skb->truesize += copy;
1412 atomic_add(copy, &sk->sk_wmem_alloc);
1414 offset += copy;
1415 length -= copy;
1417 return 0;
1418 error:
1419 inet->cork.length -= length;
1420 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1421 return err;
1424 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1426 if (np->cork.opt) {
1427 kfree(np->cork.opt->dst0opt);
1428 kfree(np->cork.opt->dst1opt);
1429 kfree(np->cork.opt->hopopt);
1430 kfree(np->cork.opt->srcrt);
1431 kfree(np->cork.opt);
1432 np->cork.opt = NULL;
1435 if (inet->cork.dst) {
1436 dst_release(inet->cork.dst);
1437 inet->cork.dst = NULL;
1438 inet->cork.flags &= ~IPCORK_ALLFRAG;
1440 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1443 int ip6_push_pending_frames(struct sock *sk)
1445 struct sk_buff *skb, *tmp_skb;
1446 struct sk_buff **tail_skb;
1447 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1448 struct inet_sock *inet = inet_sk(sk);
1449 struct ipv6_pinfo *np = inet6_sk(sk);
1450 struct net *net = sock_net(sk);
1451 struct ipv6hdr *hdr;
1452 struct ipv6_txoptions *opt = np->cork.opt;
1453 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1454 struct flowi *fl = &inet->cork.fl;
1455 unsigned char proto = fl->proto;
1456 int err = 0;
1458 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1459 goto out;
1460 tail_skb = &(skb_shinfo(skb)->frag_list);
1462 /* move skb->data to ip header from ext header */
1463 if (skb->data < skb_network_header(skb))
1464 __skb_pull(skb, skb_network_offset(skb));
1465 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1466 __skb_pull(tmp_skb, skb_network_header_len(skb));
1467 *tail_skb = tmp_skb;
1468 tail_skb = &(tmp_skb->next);
1469 skb->len += tmp_skb->len;
1470 skb->data_len += tmp_skb->len;
1471 skb->truesize += tmp_skb->truesize;
1472 tmp_skb->destructor = NULL;
1473 tmp_skb->sk = NULL;
1476 /* Allow local fragmentation. */
1477 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1478 skb->local_df = 1;
1480 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1481 __skb_pull(skb, skb_network_header_len(skb));
1482 if (opt && opt->opt_flen)
1483 ipv6_push_frag_opts(skb, opt, &proto);
1484 if (opt && opt->opt_nflen)
1485 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1487 skb_push(skb, sizeof(struct ipv6hdr));
1488 skb_reset_network_header(skb);
1489 hdr = ipv6_hdr(skb);
1491 *(__be32*)hdr = fl->fl6_flowlabel |
1492 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1494 hdr->hop_limit = np->cork.hop_limit;
1495 hdr->nexthdr = proto;
1496 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1497 ipv6_addr_copy(&hdr->daddr, final_dst);
1499 skb->priority = sk->sk_priority;
1500 skb->mark = sk->sk_mark;
1502 skb_dst_set(skb, dst_clone(&rt->u.dst));
1503 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1504 if (proto == IPPROTO_ICMPV6) {
1505 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1507 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1508 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1511 err = ip6_local_out(skb);
1512 if (err) {
1513 if (err > 0)
1514 err = net_xmit_errno(err);
1515 if (err)
1516 goto error;
1519 out:
1520 ip6_cork_release(inet, np);
1521 return err;
1522 error:
1523 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1524 goto out;
1527 void ip6_flush_pending_frames(struct sock *sk)
1529 struct sk_buff *skb;
1531 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1532 if (skb_dst(skb))
1533 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1534 IPSTATS_MIB_OUTDISCARDS);
1535 kfree_skb(skb);
1538 ip6_cork_release(inet_sk(sk), inet6_sk(sk));