initial commit with v2.6.32.60
[linux-2.6.32.60-moxart.git] / net / ipv6 / ip6_output.c
blob9ad57923440f4dee631eb14b9e502c03858aa551
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 int __ip6_local_out(struct sk_buff *skb)
62 int len;
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
73 int ip6_local_out(struct sk_buff *skb)
75 int err;
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
81 return err;
83 EXPORT_SYMBOL_GPL(ip6_local_out);
85 static int ip6_output_finish(struct sk_buff *skb)
87 struct dst_entry *dst = skb_dst(skb);
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
110 netif_rx(newskb);
111 return 0;
115 static int ip6_output2(struct sk_buff *skb)
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct ipv6_pinfo* np = skb->sk ? inet6_sk(skb->sk) : NULL;
125 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
127 if (!(dev->flags & IFF_LOOPBACK) && (!np || np->mc_loop) &&
128 ((mroute6_socket(dev_net(dev)) &&
129 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
130 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
131 &ipv6_hdr(skb)->saddr))) {
132 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
134 /* Do not check for IFF_ALLMULTI; multicast routing
135 is not supported in any case.
137 if (newskb)
138 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
139 NULL, newskb->dev,
140 ip6_dev_loopback_xmit);
142 if (ipv6_hdr(skb)->hop_limit == 0) {
143 IP6_INC_STATS(dev_net(dev), idev,
144 IPSTATS_MIB_OUTDISCARDS);
145 kfree_skb(skb);
146 return 0;
150 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
151 skb->len);
154 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
155 ip6_output_finish);
158 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
160 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
162 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
163 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
166 int ip6_output(struct sk_buff *skb)
168 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
169 if (unlikely(idev->cnf.disable_ipv6)) {
170 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
171 IPSTATS_MIB_OUTDISCARDS);
172 kfree_skb(skb);
173 return 0;
176 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
177 dst_allfrag(skb_dst(skb)))
178 return ip6_fragment(skb, ip6_output2);
179 else
180 return ip6_output2(skb);
184 * xmit an sk_buff (used by TCP)
187 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
188 struct ipv6_txoptions *opt, int ipfragok)
190 struct net *net = sock_net(sk);
191 struct ipv6_pinfo *np = inet6_sk(sk);
192 struct in6_addr *first_hop = &fl->fl6_dst;
193 struct dst_entry *dst = skb_dst(skb);
194 struct ipv6hdr *hdr;
195 u8 proto = fl->proto;
196 int seg_len = skb->len;
197 int hlimit = -1;
198 int tclass = 0;
199 u32 mtu;
201 if (opt) {
202 unsigned int head_room;
204 /* First: exthdrs may take lots of space (~8K for now)
205 MAX_HEADER is not enough.
207 head_room = opt->opt_nflen + opt->opt_flen;
208 seg_len += head_room;
209 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
211 if (skb_headroom(skb) < head_room) {
212 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
213 if (skb2 == NULL) {
214 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
215 IPSTATS_MIB_OUTDISCARDS);
216 kfree_skb(skb);
217 return -ENOBUFS;
219 kfree_skb(skb);
220 skb = skb2;
221 if (sk)
222 skb_set_owner_w(skb, sk);
224 if (opt->opt_flen)
225 ipv6_push_frag_opts(skb, opt, &proto);
226 if (opt->opt_nflen)
227 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
230 skb_push(skb, sizeof(struct ipv6hdr));
231 skb_reset_network_header(skb);
232 hdr = ipv6_hdr(skb);
234 /* Allow local fragmentation. */
235 if (ipfragok)
236 skb->local_df = 1;
239 * Fill in the IPv6 header
241 if (np) {
242 tclass = np->tclass;
243 hlimit = np->hop_limit;
245 if (hlimit < 0)
246 hlimit = ip6_dst_hoplimit(dst);
248 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
250 hdr->payload_len = htons(seg_len);
251 hdr->nexthdr = proto;
252 hdr->hop_limit = hlimit;
254 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
255 ipv6_addr_copy(&hdr->daddr, first_hop);
257 skb->priority = sk->sk_priority;
258 skb->mark = sk->sk_mark;
260 mtu = dst_mtu(dst);
261 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
262 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
263 IPSTATS_MIB_OUT, skb->len);
264 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
265 dst_output);
268 if (net_ratelimit())
269 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
270 skb->dev = dst->dev;
271 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
272 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
273 kfree_skb(skb);
274 return -EMSGSIZE;
277 EXPORT_SYMBOL(ip6_xmit);
280 * To avoid extra problems ND packets are send through this
281 * routine. It's code duplication but I really want to avoid
282 * extra checks since ipv6_build_header is used by TCP (which
283 * is for us performance critical)
286 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
287 const struct in6_addr *saddr, const struct in6_addr *daddr,
288 int proto, int len)
290 struct ipv6_pinfo *np = inet6_sk(sk);
291 struct ipv6hdr *hdr;
292 int totlen;
294 skb->protocol = htons(ETH_P_IPV6);
295 skb->dev = dev;
297 totlen = len + sizeof(struct ipv6hdr);
299 skb_reset_network_header(skb);
300 skb_put(skb, sizeof(struct ipv6hdr));
301 hdr = ipv6_hdr(skb);
303 *(__be32*)hdr = htonl(0x60000000);
305 hdr->payload_len = htons(len);
306 hdr->nexthdr = proto;
307 hdr->hop_limit = np->hop_limit;
309 ipv6_addr_copy(&hdr->saddr, saddr);
310 ipv6_addr_copy(&hdr->daddr, daddr);
312 return 0;
315 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
317 struct ip6_ra_chain *ra;
318 struct sock *last = NULL;
320 read_lock(&ip6_ra_lock);
321 for (ra = ip6_ra_chain; ra; ra = ra->next) {
322 struct sock *sk = ra->sk;
323 if (sk && ra->sel == sel &&
324 (!sk->sk_bound_dev_if ||
325 sk->sk_bound_dev_if == skb->dev->ifindex)) {
326 if (last) {
327 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
328 if (skb2)
329 rawv6_rcv(last, skb2);
331 last = sk;
335 if (last) {
336 rawv6_rcv(last, skb);
337 read_unlock(&ip6_ra_lock);
338 return 1;
340 read_unlock(&ip6_ra_lock);
341 return 0;
344 static int ip6_forward_proxy_check(struct sk_buff *skb)
346 struct ipv6hdr *hdr = ipv6_hdr(skb);
347 u8 nexthdr = hdr->nexthdr;
348 int offset;
350 if (ipv6_ext_hdr(nexthdr)) {
351 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
352 if (offset < 0)
353 return 0;
354 } else
355 offset = sizeof(struct ipv6hdr);
357 if (nexthdr == IPPROTO_ICMPV6) {
358 struct icmp6hdr *icmp6;
360 if (!pskb_may_pull(skb, (skb_network_header(skb) +
361 offset + 1 - skb->data)))
362 return 0;
364 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
366 switch (icmp6->icmp6_type) {
367 case NDISC_ROUTER_SOLICITATION:
368 case NDISC_ROUTER_ADVERTISEMENT:
369 case NDISC_NEIGHBOUR_SOLICITATION:
370 case NDISC_NEIGHBOUR_ADVERTISEMENT:
371 case NDISC_REDIRECT:
372 /* For reaction involving unicast neighbor discovery
373 * message destined to the proxied address, pass it to
374 * input function.
376 return 1;
377 default:
378 break;
383 * The proxying router can't forward traffic sent to a link-local
384 * address, so signal the sender and discard the packet. This
385 * behavior is clarified by the MIPv6 specification.
387 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
388 dst_link_failure(skb);
389 return -1;
392 return 0;
395 static inline int ip6_forward_finish(struct sk_buff *skb)
397 return dst_output(skb);
400 int ip6_forward(struct sk_buff *skb)
402 struct dst_entry *dst = skb_dst(skb);
403 struct ipv6hdr *hdr = ipv6_hdr(skb);
404 struct inet6_skb_parm *opt = IP6CB(skb);
405 struct net *net = dev_net(dst->dev);
407 if (net->ipv6.devconf_all->forwarding == 0)
408 goto error;
410 if (skb_warn_if_lro(skb))
411 goto drop;
413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
415 goto drop;
418 skb_forward_csum(skb);
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
433 if (opt->ra) {
434 u8 *ptr = skb_network_header(skb) + opt->ra;
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
440 * check and decrement ttl
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
446 0, skb->dev);
447 IP6_INC_STATS_BH(net,
448 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
450 kfree_skb(skb);
451 return -ETIMEDOUT;
454 /* XXX: idev->cnf.proxy_ndp? */
455 if (net->ipv6.devconf_all->proxy_ndp &&
456 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
457 int proxied = ip6_forward_proxy_check(skb);
458 if (proxied > 0)
459 return ip6_input(skb);
460 else if (proxied < 0) {
461 IP6_INC_STATS(net, ip6_dst_idev(dst),
462 IPSTATS_MIB_INDISCARDS);
463 goto drop;
467 if (!xfrm6_route_forward(skb)) {
468 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
469 goto drop;
471 dst = skb_dst(skb);
473 /* IPv6 specs say nothing about it, but it is clear that we cannot
474 send redirects to source routed frames.
475 We don't send redirects to frames decapsulated from IPsec.
477 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
478 !skb_sec_path(skb)) {
479 struct in6_addr *target = NULL;
480 struct rt6_info *rt;
481 struct neighbour *n = dst->neighbour;
484 * incoming and outgoing devices are the same
485 * send a redirect.
488 rt = (struct rt6_info *) dst;
489 if ((rt->rt6i_flags & RTF_GATEWAY))
490 target = (struct in6_addr*)&n->primary_key;
491 else
492 target = &hdr->daddr;
494 /* Limit redirects both by destination (here)
495 and by source (inside ndisc_send_redirect)
497 if (xrlim_allow(dst, 1*HZ))
498 ndisc_send_redirect(skb, n, target);
499 } else {
500 int addrtype = ipv6_addr_type(&hdr->saddr);
502 /* This check is security critical. */
503 if (addrtype == IPV6_ADDR_ANY ||
504 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
505 goto error;
506 if (addrtype & IPV6_ADDR_LINKLOCAL) {
507 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
508 ICMPV6_NOT_NEIGHBOUR, 0, skb->dev);
509 goto error;
513 if (skb->len > dst_mtu(dst) && !skb_is_gso(skb)) {
514 /* Again, force OUTPUT device used as source address */
515 skb->dev = dst->dev;
516 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, dst_mtu(dst), skb->dev);
517 IP6_INC_STATS_BH(net,
518 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
519 IP6_INC_STATS_BH(net,
520 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
521 kfree_skb(skb);
522 return -EMSGSIZE;
525 if (skb_cow(skb, dst->dev->hard_header_len)) {
526 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
527 goto drop;
530 hdr = ipv6_hdr(skb);
532 /* Mangling hops number delayed to point after skb COW */
534 hdr->hop_limit--;
536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
537 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
538 ip6_forward_finish);
540 error:
541 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
542 drop:
543 kfree_skb(skb);
544 return -EINVAL;
547 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
549 to->pkt_type = from->pkt_type;
550 to->priority = from->priority;
551 to->protocol = from->protocol;
552 skb_dst_drop(to);
553 skb_dst_set(to, dst_clone(skb_dst(from)));
554 to->dev = from->dev;
555 to->mark = from->mark;
557 #ifdef CONFIG_NET_SCHED
558 to->tc_index = from->tc_index;
559 #endif
560 nf_copy(to, from);
561 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
562 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
563 to->nf_trace = from->nf_trace;
564 #endif
565 skb_copy_secmark(to, from);
568 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
570 u16 offset = sizeof(struct ipv6hdr);
571 struct ipv6_opt_hdr *exthdr =
572 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
573 unsigned int packet_len = skb->tail - skb->network_header;
574 int found_rhdr = 0;
575 *nexthdr = &ipv6_hdr(skb)->nexthdr;
577 while (offset + 1 <= packet_len) {
579 switch (**nexthdr) {
581 case NEXTHDR_HOP:
582 break;
583 case NEXTHDR_ROUTING:
584 found_rhdr = 1;
585 break;
586 case NEXTHDR_DEST:
587 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
588 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
589 break;
590 #endif
591 if (found_rhdr)
592 return offset;
593 break;
594 default :
595 return offset;
598 offset += ipv6_optlen(exthdr);
599 *nexthdr = &exthdr->nexthdr;
600 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
601 offset);
604 return offset;
607 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
609 struct sk_buff *frag;
610 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
611 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
612 struct ipv6hdr *tmp_hdr;
613 struct frag_hdr *fh;
614 unsigned int mtu, hlen, left, len;
615 __be32 frag_id = 0;
616 int ptr, offset = 0, err=0;
617 u8 *prevhdr, nexthdr = 0;
618 struct net *net = dev_net(skb_dst(skb)->dev);
620 hlen = ip6_find_1stfragopt(skb, &prevhdr);
621 nexthdr = *prevhdr;
623 mtu = ip6_skb_dst_mtu(skb);
625 /* We must not fragment if the socket is set to force MTU discovery
626 * or if the skb it not generated by a local socket. (This last
627 * check should be redundant, but it's free.)
629 if (!skb->local_df) {
630 skb->dev = skb_dst(skb)->dev;
631 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu, skb->dev);
632 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
633 IPSTATS_MIB_FRAGFAILS);
634 kfree_skb(skb);
635 return -EMSGSIZE;
638 if (np && np->frag_size < mtu) {
639 if (np->frag_size)
640 mtu = np->frag_size;
642 mtu -= hlen + sizeof(struct frag_hdr);
644 if (skb_has_frags(skb)) {
645 int first_len = skb_pagelen(skb);
646 struct sk_buff *frag2;
648 if (first_len - hlen > mtu ||
649 ((first_len - hlen) & 7) ||
650 skb_cloned(skb))
651 goto slow_path;
653 skb_walk_frags(skb, frag) {
654 /* Correct geometry. */
655 if (frag->len > mtu ||
656 ((frag->len & 7) && frag->next) ||
657 skb_headroom(frag) < hlen)
658 goto slow_path_clean;
660 /* Partially cloned skb? */
661 if (skb_shared(frag))
662 goto slow_path_clean;
664 BUG_ON(frag->sk);
665 if (skb->sk) {
666 frag->sk = skb->sk;
667 frag->destructor = sock_wfree;
669 skb->truesize -= frag->truesize;
672 err = 0;
673 offset = 0;
674 frag = skb_shinfo(skb)->frag_list;
675 skb_frag_list_init(skb);
676 /* BUILD HEADER */
678 *prevhdr = NEXTHDR_FRAGMENT;
679 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
680 if (!tmp_hdr) {
681 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
682 IPSTATS_MIB_FRAGFAILS);
683 return -ENOMEM;
686 __skb_pull(skb, hlen);
687 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
688 __skb_push(skb, hlen);
689 skb_reset_network_header(skb);
690 memcpy(skb_network_header(skb), tmp_hdr, hlen);
692 ipv6_select_ident(fh);
693 fh->nexthdr = nexthdr;
694 fh->reserved = 0;
695 fh->frag_off = htons(IP6_MF);
696 frag_id = fh->identification;
698 first_len = skb_pagelen(skb);
699 skb->data_len = first_len - skb_headlen(skb);
700 skb->len = first_len;
701 ipv6_hdr(skb)->payload_len = htons(first_len -
702 sizeof(struct ipv6hdr));
704 dst_hold(&rt->u.dst);
706 for (;;) {
707 /* Prepare header of the next frame,
708 * before previous one went down. */
709 if (frag) {
710 frag->ip_summed = CHECKSUM_NONE;
711 skb_reset_transport_header(frag);
712 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
713 __skb_push(frag, hlen);
714 skb_reset_network_header(frag);
715 memcpy(skb_network_header(frag), tmp_hdr,
716 hlen);
717 offset += skb->len - hlen - sizeof(struct frag_hdr);
718 fh->nexthdr = nexthdr;
719 fh->reserved = 0;
720 fh->frag_off = htons(offset);
721 if (frag->next != NULL)
722 fh->frag_off |= htons(IP6_MF);
723 fh->identification = frag_id;
724 ipv6_hdr(frag)->payload_len =
725 htons(frag->len -
726 sizeof(struct ipv6hdr));
727 ip6_copy_metadata(frag, skb);
730 err = output(skb);
731 if(!err)
732 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
733 IPSTATS_MIB_FRAGCREATES);
735 if (err || !frag)
736 break;
738 skb = frag;
739 frag = skb->next;
740 skb->next = NULL;
743 kfree(tmp_hdr);
745 if (err == 0) {
746 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
747 IPSTATS_MIB_FRAGOKS);
748 dst_release(&rt->u.dst);
749 return 0;
752 while (frag) {
753 skb = frag->next;
754 kfree_skb(frag);
755 frag = skb;
758 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
759 IPSTATS_MIB_FRAGFAILS);
760 dst_release(&rt->u.dst);
761 return err;
763 slow_path_clean:
764 skb_walk_frags(skb, frag2) {
765 if (frag2 == frag)
766 break;
767 frag2->sk = NULL;
768 frag2->destructor = NULL;
769 skb->truesize += frag2->truesize;
773 slow_path:
774 left = skb->len - hlen; /* Space per frame */
775 ptr = hlen; /* Where to start from */
778 * Fragment the datagram.
781 *prevhdr = NEXTHDR_FRAGMENT;
784 * Keep copying data until we run out.
786 while(left > 0) {
787 len = left;
788 /* IF: it doesn't fit, use 'mtu' - the data space left */
789 if (len > mtu)
790 len = mtu;
791 /* IF: we are not sending upto and including the packet end
792 then align the next start on an eight byte boundary */
793 if (len < left) {
794 len &= ~7;
797 * Allocate buffer.
800 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
801 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
802 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
803 IPSTATS_MIB_FRAGFAILS);
804 err = -ENOMEM;
805 goto fail;
809 * Set up data on packet
812 ip6_copy_metadata(frag, skb);
813 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
814 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
815 skb_reset_network_header(frag);
816 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
817 frag->transport_header = (frag->network_header + hlen +
818 sizeof(struct frag_hdr));
821 * Charge the memory for the fragment to any owner
822 * it might possess
824 if (skb->sk)
825 skb_set_owner_w(frag, skb->sk);
828 * Copy the packet header into the new buffer.
830 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
833 * Build fragment header.
835 fh->nexthdr = nexthdr;
836 fh->reserved = 0;
837 if (!frag_id) {
838 ipv6_select_ident(fh);
839 frag_id = fh->identification;
840 } else
841 fh->identification = frag_id;
844 * Copy a block of the IP datagram.
846 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
847 BUG();
848 left -= len;
850 fh->frag_off = htons(offset);
851 if (left > 0)
852 fh->frag_off |= htons(IP6_MF);
853 ipv6_hdr(frag)->payload_len = htons(frag->len -
854 sizeof(struct ipv6hdr));
856 ptr += len;
857 offset += len;
860 * Put this fragment into the sending queue.
862 err = output(frag);
863 if (err)
864 goto fail;
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGCREATES);
869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
870 IPSTATS_MIB_FRAGOKS);
871 kfree_skb(skb);
872 return err;
874 fail:
875 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
876 IPSTATS_MIB_FRAGFAILS);
877 kfree_skb(skb);
878 return err;
881 static inline int ip6_rt_check(struct rt6key *rt_key,
882 struct in6_addr *fl_addr,
883 struct in6_addr *addr_cache)
885 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
886 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
889 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
890 struct dst_entry *dst,
891 struct flowi *fl)
893 struct ipv6_pinfo *np = inet6_sk(sk);
894 struct rt6_info *rt = (struct rt6_info *)dst;
896 if (!dst)
897 goto out;
899 /* Yes, checking route validity in not connected
900 * case is not very simple. Take into account,
901 * that we do not support routing by source, TOS,
902 * and MSG_DONTROUTE --ANK (980726)
904 * 1. ip6_rt_check(): If route was host route,
905 * check that cached destination is current.
906 * If it is network route, we still may
907 * check its validity using saved pointer
908 * to the last used address: daddr_cache.
909 * We do not want to save whole address now,
910 * (because main consumer of this service
911 * is tcp, which has not this problem),
912 * so that the last trick works only on connected
913 * sockets.
914 * 2. oif also should be the same.
916 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
917 #ifdef CONFIG_IPV6_SUBTREES
918 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
919 #endif
920 (fl->oif && fl->oif != dst->dev->ifindex)) {
921 dst_release(dst);
922 dst = NULL;
925 out:
926 return dst;
929 static int ip6_dst_lookup_tail(struct sock *sk,
930 struct dst_entry **dst, struct flowi *fl)
932 int err;
933 struct net *net = sock_net(sk);
935 if (*dst == NULL)
936 *dst = ip6_route_output(net, sk, fl);
938 if ((err = (*dst)->error))
939 goto out_err_release;
941 if (ipv6_addr_any(&fl->fl6_src)) {
942 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
943 &fl->fl6_dst,
944 sk ? inet6_sk(sk)->srcprefs : 0,
945 &fl->fl6_src);
946 if (err)
947 goto out_err_release;
950 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
952 * Here if the dst entry we've looked up
953 * has a neighbour entry that is in the INCOMPLETE
954 * state and the src address from the flow is
955 * marked as OPTIMISTIC, we release the found
956 * dst entry and replace it instead with the
957 * dst entry of the nexthop router
959 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
960 struct inet6_ifaddr *ifp;
961 struct flowi fl_gw;
962 int redirect;
964 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
965 (*dst)->dev, 1);
967 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
968 if (ifp)
969 in6_ifa_put(ifp);
971 if (redirect) {
973 * We need to get the dst entry for the
974 * default router instead
976 dst_release(*dst);
977 memcpy(&fl_gw, fl, sizeof(struct flowi));
978 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
979 *dst = ip6_route_output(net, sk, &fl_gw);
980 if ((err = (*dst)->error))
981 goto out_err_release;
984 #endif
986 return 0;
988 out_err_release:
989 if (err == -ENETUNREACH)
990 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
991 dst_release(*dst);
992 *dst = NULL;
993 return err;
997 * ip6_dst_lookup - perform route lookup on flow
998 * @sk: socket which provides route info
999 * @dst: pointer to dst_entry * for result
1000 * @fl: flow to lookup
1002 * This function performs a route lookup on the given flow.
1004 * It returns zero on success, or a standard errno code on error.
1006 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1008 *dst = NULL;
1009 return ip6_dst_lookup_tail(sk, dst, fl);
1011 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1014 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1015 * @sk: socket which provides the dst cache and route info
1016 * @dst: pointer to dst_entry * for result
1017 * @fl: flow to lookup
1019 * This function performs a route lookup on the given flow with the
1020 * possibility of using the cached route in the socket if it is valid.
1021 * It will take the socket dst lock when operating on the dst cache.
1022 * As a result, this function can only be used in process context.
1024 * It returns zero on success, or a standard errno code on error.
1026 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1028 *dst = NULL;
1029 if (sk) {
1030 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1031 *dst = ip6_sk_dst_check(sk, *dst, fl);
1034 return ip6_dst_lookup_tail(sk, dst, fl);
1036 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1038 static inline int ip6_ufo_append_data(struct sock *sk,
1039 int getfrag(void *from, char *to, int offset, int len,
1040 int odd, struct sk_buff *skb),
1041 void *from, int length, int hh_len, int fragheaderlen,
1042 int transhdrlen, int mtu,unsigned int flags)
1045 struct sk_buff *skb;
1046 int err;
1048 /* There is support for UDP large send offload by network
1049 * device, so create one single skb packet containing complete
1050 * udp datagram
1052 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1053 skb = sock_alloc_send_skb(sk,
1054 hh_len + fragheaderlen + transhdrlen + 20,
1055 (flags & MSG_DONTWAIT), &err);
1056 if (skb == NULL)
1057 return -ENOMEM;
1059 /* reserve space for Hardware header */
1060 skb_reserve(skb, hh_len);
1062 /* create space for UDP/IP header */
1063 skb_put(skb,fragheaderlen + transhdrlen);
1065 /* initialize network header pointer */
1066 skb_reset_network_header(skb);
1068 /* initialize protocol header pointer */
1069 skb->transport_header = skb->network_header + fragheaderlen;
1071 skb->ip_summed = CHECKSUM_PARTIAL;
1072 skb->csum = 0;
1073 sk->sk_sndmsg_off = 0;
1076 err = skb_append_datato_frags(sk,skb, getfrag, from,
1077 (length - transhdrlen));
1078 if (!err) {
1079 struct frag_hdr fhdr;
1081 /* Specify the length of each IPv6 datagram fragment.
1082 * It has to be a multiple of 8.
1084 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1085 sizeof(struct frag_hdr)) & ~7;
1086 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1087 ipv6_select_ident(&fhdr);
1088 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1089 __skb_queue_tail(&sk->sk_write_queue, skb);
1091 return 0;
1093 /* There is not enough support do UPD LSO,
1094 * so follow normal path
1096 kfree_skb(skb);
1098 return err;
1101 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1102 gfp_t gfp)
1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1107 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1108 gfp_t gfp)
1110 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1113 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1114 int offset, int len, int odd, struct sk_buff *skb),
1115 void *from, int length, int transhdrlen,
1116 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1117 struct rt6_info *rt, unsigned int flags)
1119 struct inet_sock *inet = inet_sk(sk);
1120 struct ipv6_pinfo *np = inet6_sk(sk);
1121 struct sk_buff *skb;
1122 unsigned int maxfraglen, fragheaderlen;
1123 int exthdrlen;
1124 int hh_len;
1125 int mtu;
1126 int copy;
1127 int err;
1128 int offset = 0;
1129 int csummode = CHECKSUM_NONE;
1131 if (flags&MSG_PROBE)
1132 return 0;
1133 if (skb_queue_empty(&sk->sk_write_queue)) {
1135 * setup for corking
1137 if (opt) {
1138 if (WARN_ON(np->cork.opt))
1139 return -EINVAL;
1141 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1142 if (unlikely(np->cork.opt == NULL))
1143 return -ENOBUFS;
1145 np->cork.opt->tot_len = opt->tot_len;
1146 np->cork.opt->opt_flen = opt->opt_flen;
1147 np->cork.opt->opt_nflen = opt->opt_nflen;
1149 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1150 sk->sk_allocation);
1151 if (opt->dst0opt && !np->cork.opt->dst0opt)
1152 return -ENOBUFS;
1154 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1155 sk->sk_allocation);
1156 if (opt->dst1opt && !np->cork.opt->dst1opt)
1157 return -ENOBUFS;
1159 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1160 sk->sk_allocation);
1161 if (opt->hopopt && !np->cork.opt->hopopt)
1162 return -ENOBUFS;
1164 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1165 sk->sk_allocation);
1166 if (opt->srcrt && !np->cork.opt->srcrt)
1167 return -ENOBUFS;
1169 /* need source address above miyazawa*/
1171 dst_hold(&rt->u.dst);
1172 inet->cork.dst = &rt->u.dst;
1173 inet->cork.fl = *fl;
1174 np->cork.hop_limit = hlimit;
1175 np->cork.tclass = tclass;
1176 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1177 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1178 if (np->frag_size < mtu) {
1179 if (np->frag_size)
1180 mtu = np->frag_size;
1182 inet->cork.fragsize = mtu;
1183 if (dst_allfrag(rt->u.dst.path))
1184 inet->cork.flags |= IPCORK_ALLFRAG;
1185 inet->cork.length = 0;
1186 sk->sk_sndmsg_page = NULL;
1187 sk->sk_sndmsg_off = 0;
1188 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1189 rt->rt6i_nfheader_len;
1190 length += exthdrlen;
1191 transhdrlen += exthdrlen;
1192 } else {
1193 rt = (struct rt6_info *)inet->cork.dst;
1194 fl = &inet->cork.fl;
1195 opt = np->cork.opt;
1196 transhdrlen = 0;
1197 exthdrlen = 0;
1198 mtu = inet->cork.fragsize;
1201 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1203 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1204 (opt ? opt->opt_nflen : 0);
1205 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1207 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1208 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1209 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1210 return -EMSGSIZE;
1215 * Let's try using as much space as possible.
1216 * Use MTU if total length of the message fits into the MTU.
1217 * Otherwise, we need to reserve fragment header and
1218 * fragment alignment (= 8-15 octects, in total).
1220 * Note that we may need to "move" the data from the tail of
1221 * of the buffer to the new fragment when we split
1222 * the message.
1224 * FIXME: It may be fragmented into multiple chunks
1225 * at once if non-fragmentable extension headers
1226 * are too large.
1227 * --yoshfuji
1230 inet->cork.length += length;
1231 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1232 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1234 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1235 fragheaderlen, transhdrlen, mtu,
1236 flags);
1237 if (err)
1238 goto error;
1239 return 0;
1242 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1243 goto alloc_new_skb;
1245 while (length > 0) {
1246 /* Check if the remaining data fits into current packet. */
1247 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1248 if (copy < length)
1249 copy = maxfraglen - skb->len;
1251 if (copy <= 0) {
1252 char *data;
1253 unsigned int datalen;
1254 unsigned int fraglen;
1255 unsigned int fraggap;
1256 unsigned int alloclen;
1257 struct sk_buff *skb_prev;
1258 alloc_new_skb:
1259 skb_prev = skb;
1261 /* There's no room in the current skb */
1262 if (skb_prev)
1263 fraggap = skb_prev->len - maxfraglen;
1264 else
1265 fraggap = 0;
1268 * If remaining data exceeds the mtu,
1269 * we know we need more fragment(s).
1271 datalen = length + fraggap;
1272 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1273 datalen = maxfraglen - fragheaderlen;
1275 fraglen = datalen + fragheaderlen;
1276 if ((flags & MSG_MORE) &&
1277 !(rt->u.dst.dev->features&NETIF_F_SG))
1278 alloclen = mtu;
1279 else
1280 alloclen = datalen + fragheaderlen;
1283 * The last fragment gets additional space at tail.
1284 * Note: we overallocate on fragments with MSG_MODE
1285 * because we have no idea if we're the last one.
1287 if (datalen == length + fraggap)
1288 alloclen += rt->u.dst.trailer_len;
1291 * We just reserve space for fragment header.
1292 * Note: this may be overallocation if the message
1293 * (without MSG_MORE) fits into the MTU.
1295 alloclen += sizeof(struct frag_hdr);
1297 if (transhdrlen) {
1298 skb = sock_alloc_send_skb(sk,
1299 alloclen + hh_len,
1300 (flags & MSG_DONTWAIT), &err);
1301 } else {
1302 skb = NULL;
1303 if (atomic_read(&sk->sk_wmem_alloc) <=
1304 2 * sk->sk_sndbuf)
1305 skb = sock_wmalloc(sk,
1306 alloclen + hh_len, 1,
1307 sk->sk_allocation);
1308 if (unlikely(skb == NULL))
1309 err = -ENOBUFS;
1311 if (skb == NULL)
1312 goto error;
1314 * Fill in the control structures
1316 skb->ip_summed = csummode;
1317 skb->csum = 0;
1318 /* reserve for fragmentation */
1319 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1322 * Find where to start putting bytes
1324 data = skb_put(skb, fraglen);
1325 skb_set_network_header(skb, exthdrlen);
1326 data += fragheaderlen;
1327 skb->transport_header = (skb->network_header +
1328 fragheaderlen);
1329 if (fraggap) {
1330 skb->csum = skb_copy_and_csum_bits(
1331 skb_prev, maxfraglen,
1332 data + transhdrlen, fraggap, 0);
1333 skb_prev->csum = csum_sub(skb_prev->csum,
1334 skb->csum);
1335 data += fraggap;
1336 pskb_trim_unique(skb_prev, maxfraglen);
1338 copy = datalen - transhdrlen - fraggap;
1339 if (copy < 0) {
1340 err = -EINVAL;
1341 kfree_skb(skb);
1342 goto error;
1343 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1344 err = -EFAULT;
1345 kfree_skb(skb);
1346 goto error;
1349 offset += copy;
1350 length -= datalen - fraggap;
1351 transhdrlen = 0;
1352 exthdrlen = 0;
1353 csummode = CHECKSUM_NONE;
1356 * Put the packet on the pending queue
1358 __skb_queue_tail(&sk->sk_write_queue, skb);
1359 continue;
1362 if (copy > length)
1363 copy = length;
1365 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1366 unsigned int off;
1368 off = skb->len;
1369 if (getfrag(from, skb_put(skb, copy),
1370 offset, copy, off, skb) < 0) {
1371 __skb_trim(skb, off);
1372 err = -EFAULT;
1373 goto error;
1375 } else {
1376 int i = skb_shinfo(skb)->nr_frags;
1377 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1378 struct page *page = sk->sk_sndmsg_page;
1379 int off = sk->sk_sndmsg_off;
1380 unsigned int left;
1382 if (page && (left = PAGE_SIZE - off) > 0) {
1383 if (copy >= left)
1384 copy = left;
1385 if (page != frag->page) {
1386 if (i == MAX_SKB_FRAGS) {
1387 err = -EMSGSIZE;
1388 goto error;
1390 get_page(page);
1391 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1392 frag = &skb_shinfo(skb)->frags[i];
1394 } else if(i < MAX_SKB_FRAGS) {
1395 if (copy > PAGE_SIZE)
1396 copy = PAGE_SIZE;
1397 page = alloc_pages(sk->sk_allocation, 0);
1398 if (page == NULL) {
1399 err = -ENOMEM;
1400 goto error;
1402 sk->sk_sndmsg_page = page;
1403 sk->sk_sndmsg_off = 0;
1405 skb_fill_page_desc(skb, i, page, 0, 0);
1406 frag = &skb_shinfo(skb)->frags[i];
1407 } else {
1408 err = -EMSGSIZE;
1409 goto error;
1411 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1412 err = -EFAULT;
1413 goto error;
1415 sk->sk_sndmsg_off += copy;
1416 frag->size += copy;
1417 skb->len += copy;
1418 skb->data_len += copy;
1419 skb->truesize += copy;
1420 atomic_add(copy, &sk->sk_wmem_alloc);
1422 offset += copy;
1423 length -= copy;
1425 return 0;
1426 error:
1427 inet->cork.length -= length;
1428 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1429 return err;
1432 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1434 if (np->cork.opt) {
1435 kfree(np->cork.opt->dst0opt);
1436 kfree(np->cork.opt->dst1opt);
1437 kfree(np->cork.opt->hopopt);
1438 kfree(np->cork.opt->srcrt);
1439 kfree(np->cork.opt);
1440 np->cork.opt = NULL;
1443 if (inet->cork.dst) {
1444 dst_release(inet->cork.dst);
1445 inet->cork.dst = NULL;
1446 inet->cork.flags &= ~IPCORK_ALLFRAG;
1448 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1451 int ip6_push_pending_frames(struct sock *sk)
1453 struct sk_buff *skb, *tmp_skb;
1454 struct sk_buff **tail_skb;
1455 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1456 struct inet_sock *inet = inet_sk(sk);
1457 struct ipv6_pinfo *np = inet6_sk(sk);
1458 struct net *net = sock_net(sk);
1459 struct ipv6hdr *hdr;
1460 struct ipv6_txoptions *opt = np->cork.opt;
1461 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1462 struct flowi *fl = &inet->cork.fl;
1463 unsigned char proto = fl->proto;
1464 int err = 0;
1466 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1467 goto out;
1468 tail_skb = &(skb_shinfo(skb)->frag_list);
1470 /* move skb->data to ip header from ext header */
1471 if (skb->data < skb_network_header(skb))
1472 __skb_pull(skb, skb_network_offset(skb));
1473 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1474 __skb_pull(tmp_skb, skb_network_header_len(skb));
1475 *tail_skb = tmp_skb;
1476 tail_skb = &(tmp_skb->next);
1477 skb->len += tmp_skb->len;
1478 skb->data_len += tmp_skb->len;
1479 skb->truesize += tmp_skb->truesize;
1480 tmp_skb->destructor = NULL;
1481 tmp_skb->sk = NULL;
1484 /* Allow local fragmentation. */
1485 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1486 skb->local_df = 1;
1488 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1489 __skb_pull(skb, skb_network_header_len(skb));
1490 if (opt && opt->opt_flen)
1491 ipv6_push_frag_opts(skb, opt, &proto);
1492 if (opt && opt->opt_nflen)
1493 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1495 skb_push(skb, sizeof(struct ipv6hdr));
1496 skb_reset_network_header(skb);
1497 hdr = ipv6_hdr(skb);
1499 *(__be32*)hdr = fl->fl6_flowlabel |
1500 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1502 hdr->hop_limit = np->cork.hop_limit;
1503 hdr->nexthdr = proto;
1504 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1505 ipv6_addr_copy(&hdr->daddr, final_dst);
1507 skb->priority = sk->sk_priority;
1508 skb->mark = sk->sk_mark;
1510 skb_dst_set(skb, dst_clone(&rt->u.dst));
1511 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1512 if (proto == IPPROTO_ICMPV6) {
1513 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1515 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1516 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1519 err = ip6_local_out(skb);
1520 if (err) {
1521 if (err > 0)
1522 err = net_xmit_errno(err);
1523 if (err)
1524 goto error;
1527 out:
1528 ip6_cork_release(inet, np);
1529 return err;
1530 error:
1531 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1532 goto out;
1535 void ip6_flush_pending_frames(struct sock *sk)
1537 struct sk_buff *skb;
1539 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1540 if (skb_dst(skb))
1541 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1542 IPSTATS_MIB_OUTDISCARDS);
1543 kfree_skb(skb);
1546 ip6_cork_release(inet_sk(sk), inet6_sk(sk));