ecryptfs: fix use with tmpfs by removing d_drop from ecryptfs_destroy_inode
[linux/fpc-iii.git] / net / ipv6 / ip6_output.c
blobdabf108ad81131138a5923268231809ebdb51494
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
41 #include <linux/netfilter.h>
42 #include <linux/netfilter_ipv6.h>
44 #include <net/sock.h>
45 #include <net/snmp.h>
47 #include <net/ipv6.h>
48 #include <net/ndisc.h>
49 #include <net/protocol.h>
50 #include <net/ip6_route.h>
51 #include <net/addrconf.h>
52 #include <net/rawv6.h>
53 #include <net/icmp.h>
54 #include <net/xfrm.h>
55 #include <net/checksum.h>
56 #include <linux/mroute6.h>
58 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
60 int __ip6_local_out(struct sk_buff *skb)
62 int len;
64 len = skb->len - sizeof(struct ipv6hdr);
65 if (len > IPV6_MAXPLEN)
66 len = 0;
67 ipv6_hdr(skb)->payload_len = htons(len);
69 return nf_hook(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, skb_dst(skb)->dev,
70 dst_output);
73 int ip6_local_out(struct sk_buff *skb)
75 int err;
77 err = __ip6_local_out(skb);
78 if (likely(err == 1))
79 err = dst_output(skb);
81 return err;
83 EXPORT_SYMBOL_GPL(ip6_local_out);
85 static int ip6_output_finish(struct sk_buff *skb)
87 struct dst_entry *dst = skb_dst(skb);
89 if (dst->hh)
90 return neigh_hh_output(dst->hh, skb);
91 else if (dst->neighbour)
92 return dst->neighbour->output(skb);
94 IP6_INC_STATS_BH(dev_net(dst->dev),
95 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
96 kfree_skb(skb);
97 return -EINVAL;
101 /* dev_loopback_xmit for use with netfilter. */
102 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
104 skb_reset_mac_header(newskb);
105 __skb_pull(newskb, skb_network_offset(newskb));
106 newskb->pkt_type = PACKET_LOOPBACK;
107 newskb->ip_summed = CHECKSUM_UNNECESSARY;
108 WARN_ON(!skb_dst(newskb));
110 netif_rx(newskb);
111 return 0;
115 static int ip6_output2(struct sk_buff *skb)
117 struct dst_entry *dst = skb_dst(skb);
118 struct net_device *dev = dst->dev;
120 skb->protocol = htons(ETH_P_IPV6);
121 skb->dev = dev;
123 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
124 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
126 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
127 ((mroute6_socket(dev_net(dev)) &&
128 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
129 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
130 &ipv6_hdr(skb)->saddr))) {
131 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
133 /* Do not check for IFF_ALLMULTI; multicast routing
134 is not supported in any case.
136 if (newskb)
137 NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, newskb,
138 NULL, newskb->dev,
139 ip6_dev_loopback_xmit);
141 if (ipv6_hdr(skb)->hop_limit == 0) {
142 IP6_INC_STATS(dev_net(dev), idev,
143 IPSTATS_MIB_OUTDISCARDS);
144 kfree_skb(skb);
145 return 0;
149 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
150 skb->len);
153 return NF_HOOK(PF_INET6, NF_INET_POST_ROUTING, skb, NULL, skb->dev,
154 ip6_output_finish);
157 static inline int ip6_skb_dst_mtu(struct sk_buff *skb)
159 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
161 return (np && np->pmtudisc == IPV6_PMTUDISC_PROBE) ?
162 skb_dst(skb)->dev->mtu : dst_mtu(skb_dst(skb));
165 int ip6_output(struct sk_buff *skb)
167 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
168 if (unlikely(idev->cnf.disable_ipv6)) {
169 IP6_INC_STATS(dev_net(skb_dst(skb)->dev), idev,
170 IPSTATS_MIB_OUTDISCARDS);
171 kfree_skb(skb);
172 return 0;
175 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
176 dst_allfrag(skb_dst(skb)))
177 return ip6_fragment(skb, ip6_output2);
178 else
179 return ip6_output2(skb);
183 * xmit an sk_buff (used by TCP)
186 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi *fl,
187 struct ipv6_txoptions *opt, int ipfragok)
189 struct net *net = sock_net(sk);
190 struct ipv6_pinfo *np = inet6_sk(sk);
191 struct in6_addr *first_hop = &fl->fl6_dst;
192 struct dst_entry *dst = skb_dst(skb);
193 struct ipv6hdr *hdr;
194 u8 proto = fl->proto;
195 int seg_len = skb->len;
196 int hlimit = -1;
197 int tclass = 0;
198 u32 mtu;
200 if (opt) {
201 unsigned int head_room;
203 /* First: exthdrs may take lots of space (~8K for now)
204 MAX_HEADER is not enough.
206 head_room = opt->opt_nflen + opt->opt_flen;
207 seg_len += head_room;
208 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
210 if (skb_headroom(skb) < head_room) {
211 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
212 if (skb2 == NULL) {
213 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
214 IPSTATS_MIB_OUTDISCARDS);
215 kfree_skb(skb);
216 return -ENOBUFS;
218 kfree_skb(skb);
219 skb = skb2;
220 if (sk)
221 skb_set_owner_w(skb, sk);
223 if (opt->opt_flen)
224 ipv6_push_frag_opts(skb, opt, &proto);
225 if (opt->opt_nflen)
226 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
229 skb_push(skb, sizeof(struct ipv6hdr));
230 skb_reset_network_header(skb);
231 hdr = ipv6_hdr(skb);
233 /* Allow local fragmentation. */
234 if (ipfragok)
235 skb->local_df = 1;
238 * Fill in the IPv6 header
240 if (np) {
241 tclass = np->tclass;
242 hlimit = np->hop_limit;
244 if (hlimit < 0)
245 hlimit = ip6_dst_hoplimit(dst);
247 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl->fl6_flowlabel;
249 hdr->payload_len = htons(seg_len);
250 hdr->nexthdr = proto;
251 hdr->hop_limit = hlimit;
253 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
254 ipv6_addr_copy(&hdr->daddr, first_hop);
256 skb->priority = sk->sk_priority;
257 skb->mark = sk->sk_mark;
259 mtu = dst_mtu(dst);
260 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
261 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
262 IPSTATS_MIB_OUT, skb->len);
263 return NF_HOOK(PF_INET6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev,
264 dst_output);
267 if (net_ratelimit())
268 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
269 skb->dev = dst->dev;
270 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
271 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
272 kfree_skb(skb);
273 return -EMSGSIZE;
276 EXPORT_SYMBOL(ip6_xmit);
279 * To avoid extra problems ND packets are send through this
280 * routine. It's code duplication but I really want to avoid
281 * extra checks since ipv6_build_header is used by TCP (which
282 * is for us performance critical)
285 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
286 const struct in6_addr *saddr, const struct in6_addr *daddr,
287 int proto, int len)
289 struct ipv6_pinfo *np = inet6_sk(sk);
290 struct ipv6hdr *hdr;
291 int totlen;
293 skb->protocol = htons(ETH_P_IPV6);
294 skb->dev = dev;
296 totlen = len + sizeof(struct ipv6hdr);
298 skb_reset_network_header(skb);
299 skb_put(skb, sizeof(struct ipv6hdr));
300 hdr = ipv6_hdr(skb);
302 *(__be32*)hdr = htonl(0x60000000);
304 hdr->payload_len = htons(len);
305 hdr->nexthdr = proto;
306 hdr->hop_limit = np->hop_limit;
308 ipv6_addr_copy(&hdr->saddr, saddr);
309 ipv6_addr_copy(&hdr->daddr, daddr);
311 return 0;
314 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
316 struct ip6_ra_chain *ra;
317 struct sock *last = NULL;
319 read_lock(&ip6_ra_lock);
320 for (ra = ip6_ra_chain; ra; ra = ra->next) {
321 struct sock *sk = ra->sk;
322 if (sk && ra->sel == sel &&
323 (!sk->sk_bound_dev_if ||
324 sk->sk_bound_dev_if == skb->dev->ifindex)) {
325 if (last) {
326 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
327 if (skb2)
328 rawv6_rcv(last, skb2);
330 last = sk;
334 if (last) {
335 rawv6_rcv(last, skb);
336 read_unlock(&ip6_ra_lock);
337 return 1;
339 read_unlock(&ip6_ra_lock);
340 return 0;
343 static int ip6_forward_proxy_check(struct sk_buff *skb)
345 struct ipv6hdr *hdr = ipv6_hdr(skb);
346 u8 nexthdr = hdr->nexthdr;
347 int offset;
349 if (ipv6_ext_hdr(nexthdr)) {
350 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
351 if (offset < 0)
352 return 0;
353 } else
354 offset = sizeof(struct ipv6hdr);
356 if (nexthdr == IPPROTO_ICMPV6) {
357 struct icmp6hdr *icmp6;
359 if (!pskb_may_pull(skb, (skb_network_header(skb) +
360 offset + 1 - skb->data)))
361 return 0;
363 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
365 switch (icmp6->icmp6_type) {
366 case NDISC_ROUTER_SOLICITATION:
367 case NDISC_ROUTER_ADVERTISEMENT:
368 case NDISC_NEIGHBOUR_SOLICITATION:
369 case NDISC_NEIGHBOUR_ADVERTISEMENT:
370 case NDISC_REDIRECT:
371 /* For reaction involving unicast neighbor discovery
372 * message destined to the proxied address, pass it to
373 * input function.
375 return 1;
376 default:
377 break;
382 * The proxying router can't forward traffic sent to a link-local
383 * address, so signal the sender and discard the packet. This
384 * behavior is clarified by the MIPv6 specification.
386 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
387 dst_link_failure(skb);
388 return -1;
391 return 0;
394 static inline int ip6_forward_finish(struct sk_buff *skb)
396 return dst_output(skb);
399 int ip6_forward(struct sk_buff *skb)
401 struct dst_entry *dst = skb_dst(skb);
402 struct ipv6hdr *hdr = ipv6_hdr(skb);
403 struct inet6_skb_parm *opt = IP6CB(skb);
404 struct net *net = dev_net(dst->dev);
405 u32 mtu;
407 if (net->ipv6.devconf_all->forwarding == 0)
408 goto error;
410 if (skb_warn_if_lro(skb))
411 goto drop;
413 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
414 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
415 goto drop;
418 skb_forward_csum(skb);
421 * We DO NOT make any processing on
422 * RA packets, pushing them to user level AS IS
423 * without ane WARRANTY that application will be able
424 * to interpret them. The reason is that we
425 * cannot make anything clever here.
427 * We are not end-node, so that if packet contains
428 * AH/ESP, we cannot make anything.
429 * Defragmentation also would be mistake, RA packets
430 * cannot be fragmented, because there is no warranty
431 * that different fragments will go along one path. --ANK
433 if (opt->ra) {
434 u8 *ptr = skb_network_header(skb) + opt->ra;
435 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
436 return 0;
440 * check and decrement ttl
442 if (hdr->hop_limit <= 1) {
443 /* Force OUTPUT device used as source address */
444 skb->dev = dst->dev;
445 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
446 IP6_INC_STATS_BH(net,
447 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
449 kfree_skb(skb);
450 return -ETIMEDOUT;
453 /* XXX: idev->cnf.proxy_ndp? */
454 if (net->ipv6.devconf_all->proxy_ndp &&
455 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
456 int proxied = ip6_forward_proxy_check(skb);
457 if (proxied > 0)
458 return ip6_input(skb);
459 else if (proxied < 0) {
460 IP6_INC_STATS(net, ip6_dst_idev(dst),
461 IPSTATS_MIB_INDISCARDS);
462 goto drop;
466 if (!xfrm6_route_forward(skb)) {
467 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
468 goto drop;
470 dst = skb_dst(skb);
472 /* IPv6 specs say nothing about it, but it is clear that we cannot
473 send redirects to source routed frames.
474 We don't send redirects to frames decapsulated from IPsec.
476 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
477 !skb_sec_path(skb)) {
478 struct in6_addr *target = NULL;
479 struct rt6_info *rt;
480 struct neighbour *n = dst->neighbour;
483 * incoming and outgoing devices are the same
484 * send a redirect.
487 rt = (struct rt6_info *) dst;
488 if ((rt->rt6i_flags & RTF_GATEWAY))
489 target = (struct in6_addr*)&n->primary_key;
490 else
491 target = &hdr->daddr;
493 /* Limit redirects both by destination (here)
494 and by source (inside ndisc_send_redirect)
496 if (xrlim_allow(dst, 1*HZ))
497 ndisc_send_redirect(skb, n, target);
498 } else {
499 int addrtype = ipv6_addr_type(&hdr->saddr);
501 /* This check is security critical. */
502 if (addrtype == IPV6_ADDR_ANY ||
503 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
504 goto error;
505 if (addrtype & IPV6_ADDR_LINKLOCAL) {
506 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
507 ICMPV6_NOT_NEIGHBOUR, 0);
508 goto error;
512 mtu = dst_mtu(dst);
513 if (mtu < IPV6_MIN_MTU)
514 mtu = IPV6_MIN_MTU;
516 if (skb->len > mtu) {
517 /* Again, force OUTPUT device used as source address */
518 skb->dev = dst->dev;
519 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
520 IP6_INC_STATS_BH(net,
521 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
522 IP6_INC_STATS_BH(net,
523 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
524 kfree_skb(skb);
525 return -EMSGSIZE;
528 if (skb_cow(skb, dst->dev->hard_header_len)) {
529 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
530 goto drop;
533 hdr = ipv6_hdr(skb);
535 /* Mangling hops number delayed to point after skb COW */
537 hdr->hop_limit--;
539 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
540 return NF_HOOK(PF_INET6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
541 ip6_forward_finish);
543 error:
544 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
545 drop:
546 kfree_skb(skb);
547 return -EINVAL;
550 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
552 to->pkt_type = from->pkt_type;
553 to->priority = from->priority;
554 to->protocol = from->protocol;
555 skb_dst_drop(to);
556 skb_dst_set(to, dst_clone(skb_dst(from)));
557 to->dev = from->dev;
558 to->mark = from->mark;
560 #ifdef CONFIG_NET_SCHED
561 to->tc_index = from->tc_index;
562 #endif
563 nf_copy(to, from);
564 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
565 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
566 to->nf_trace = from->nf_trace;
567 #endif
568 skb_copy_secmark(to, from);
571 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
573 u16 offset = sizeof(struct ipv6hdr);
574 struct ipv6_opt_hdr *exthdr =
575 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
576 unsigned int packet_len = skb->tail - skb->network_header;
577 int found_rhdr = 0;
578 *nexthdr = &ipv6_hdr(skb)->nexthdr;
580 while (offset + 1 <= packet_len) {
582 switch (**nexthdr) {
584 case NEXTHDR_HOP:
585 break;
586 case NEXTHDR_ROUTING:
587 found_rhdr = 1;
588 break;
589 case NEXTHDR_DEST:
590 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
591 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
592 break;
593 #endif
594 if (found_rhdr)
595 return offset;
596 break;
597 default :
598 return offset;
601 offset += ipv6_optlen(exthdr);
602 *nexthdr = &exthdr->nexthdr;
603 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
604 offset);
607 return offset;
610 static int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
612 struct sk_buff *frag;
613 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
614 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
615 struct ipv6hdr *tmp_hdr;
616 struct frag_hdr *fh;
617 unsigned int mtu, hlen, left, len;
618 __be32 frag_id = 0;
619 int ptr, offset = 0, err=0;
620 u8 *prevhdr, nexthdr = 0;
621 struct net *net = dev_net(skb_dst(skb)->dev);
623 hlen = ip6_find_1stfragopt(skb, &prevhdr);
624 nexthdr = *prevhdr;
626 mtu = ip6_skb_dst_mtu(skb);
628 /* We must not fragment if the socket is set to force MTU discovery
629 * or if the skb it not generated by a local socket.
631 if (!skb->local_df) {
632 skb->dev = skb_dst(skb)->dev;
633 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
634 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
635 IPSTATS_MIB_FRAGFAILS);
636 kfree_skb(skb);
637 return -EMSGSIZE;
640 if (np && np->frag_size < mtu) {
641 if (np->frag_size)
642 mtu = np->frag_size;
644 mtu -= hlen + sizeof(struct frag_hdr);
646 if (skb_has_frags(skb)) {
647 int first_len = skb_pagelen(skb);
648 int truesizes = 0;
650 if (first_len - hlen > mtu ||
651 ((first_len - hlen) & 7) ||
652 skb_cloned(skb))
653 goto slow_path;
655 skb_walk_frags(skb, frag) {
656 /* Correct geometry. */
657 if (frag->len > mtu ||
658 ((frag->len & 7) && frag->next) ||
659 skb_headroom(frag) < hlen)
660 goto slow_path;
662 /* Partially cloned skb? */
663 if (skb_shared(frag))
664 goto slow_path;
666 BUG_ON(frag->sk);
667 if (skb->sk) {
668 frag->sk = skb->sk;
669 frag->destructor = sock_wfree;
670 truesizes += frag->truesize;
674 err = 0;
675 offset = 0;
676 frag = skb_shinfo(skb)->frag_list;
677 skb_frag_list_init(skb);
678 /* BUILD HEADER */
680 *prevhdr = NEXTHDR_FRAGMENT;
681 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
682 if (!tmp_hdr) {
683 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
684 IPSTATS_MIB_FRAGFAILS);
685 return -ENOMEM;
688 __skb_pull(skb, hlen);
689 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
690 __skb_push(skb, hlen);
691 skb_reset_network_header(skb);
692 memcpy(skb_network_header(skb), tmp_hdr, hlen);
694 ipv6_select_ident(fh);
695 fh->nexthdr = nexthdr;
696 fh->reserved = 0;
697 fh->frag_off = htons(IP6_MF);
698 frag_id = fh->identification;
700 first_len = skb_pagelen(skb);
701 skb->data_len = first_len - skb_headlen(skb);
702 skb->truesize -= truesizes;
703 skb->len = first_len;
704 ipv6_hdr(skb)->payload_len = htons(first_len -
705 sizeof(struct ipv6hdr));
707 dst_hold(&rt->u.dst);
709 for (;;) {
710 /* Prepare header of the next frame,
711 * before previous one went down. */
712 if (frag) {
713 frag->ip_summed = CHECKSUM_NONE;
714 skb_reset_transport_header(frag);
715 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
716 __skb_push(frag, hlen);
717 skb_reset_network_header(frag);
718 memcpy(skb_network_header(frag), tmp_hdr,
719 hlen);
720 offset += skb->len - hlen - sizeof(struct frag_hdr);
721 fh->nexthdr = nexthdr;
722 fh->reserved = 0;
723 fh->frag_off = htons(offset);
724 if (frag->next != NULL)
725 fh->frag_off |= htons(IP6_MF);
726 fh->identification = frag_id;
727 ipv6_hdr(frag)->payload_len =
728 htons(frag->len -
729 sizeof(struct ipv6hdr));
730 ip6_copy_metadata(frag, skb);
733 err = output(skb);
734 if(!err)
735 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
736 IPSTATS_MIB_FRAGCREATES);
738 if (err || !frag)
739 break;
741 skb = frag;
742 frag = skb->next;
743 skb->next = NULL;
746 kfree(tmp_hdr);
748 if (err == 0) {
749 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
750 IPSTATS_MIB_FRAGOKS);
751 dst_release(&rt->u.dst);
752 return 0;
755 while (frag) {
756 skb = frag->next;
757 kfree_skb(frag);
758 frag = skb;
761 IP6_INC_STATS(net, ip6_dst_idev(&rt->u.dst),
762 IPSTATS_MIB_FRAGFAILS);
763 dst_release(&rt->u.dst);
764 return err;
767 slow_path:
768 left = skb->len - hlen; /* Space per frame */
769 ptr = hlen; /* Where to start from */
772 * Fragment the datagram.
775 *prevhdr = NEXTHDR_FRAGMENT;
778 * Keep copying data until we run out.
780 while(left > 0) {
781 len = left;
782 /* IF: it doesn't fit, use 'mtu' - the data space left */
783 if (len > mtu)
784 len = mtu;
785 /* IF: we are not sending upto and including the packet end
786 then align the next start on an eight byte boundary */
787 if (len < left) {
788 len &= ~7;
791 * Allocate buffer.
794 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->u.dst.dev), GFP_ATOMIC)) == NULL) {
795 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
796 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
797 IPSTATS_MIB_FRAGFAILS);
798 err = -ENOMEM;
799 goto fail;
803 * Set up data on packet
806 ip6_copy_metadata(frag, skb);
807 skb_reserve(frag, LL_RESERVED_SPACE(rt->u.dst.dev));
808 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
809 skb_reset_network_header(frag);
810 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
811 frag->transport_header = (frag->network_header + hlen +
812 sizeof(struct frag_hdr));
815 * Charge the memory for the fragment to any owner
816 * it might possess
818 if (skb->sk)
819 skb_set_owner_w(frag, skb->sk);
822 * Copy the packet header into the new buffer.
824 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
827 * Build fragment header.
829 fh->nexthdr = nexthdr;
830 fh->reserved = 0;
831 if (!frag_id) {
832 ipv6_select_ident(fh);
833 frag_id = fh->identification;
834 } else
835 fh->identification = frag_id;
838 * Copy a block of the IP datagram.
840 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
841 BUG();
842 left -= len;
844 fh->frag_off = htons(offset);
845 if (left > 0)
846 fh->frag_off |= htons(IP6_MF);
847 ipv6_hdr(frag)->payload_len = htons(frag->len -
848 sizeof(struct ipv6hdr));
850 ptr += len;
851 offset += len;
854 * Put this fragment into the sending queue.
856 err = output(frag);
857 if (err)
858 goto fail;
860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861 IPSTATS_MIB_FRAGCREATES);
863 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
864 IPSTATS_MIB_FRAGOKS);
865 kfree_skb(skb);
866 return err;
868 fail:
869 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
870 IPSTATS_MIB_FRAGFAILS);
871 kfree_skb(skb);
872 return err;
875 static inline int ip6_rt_check(struct rt6key *rt_key,
876 struct in6_addr *fl_addr,
877 struct in6_addr *addr_cache)
879 return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
880 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)));
883 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
884 struct dst_entry *dst,
885 struct flowi *fl)
887 struct ipv6_pinfo *np = inet6_sk(sk);
888 struct rt6_info *rt = (struct rt6_info *)dst;
890 if (!dst)
891 goto out;
893 /* Yes, checking route validity in not connected
894 * case is not very simple. Take into account,
895 * that we do not support routing by source, TOS,
896 * and MSG_DONTROUTE --ANK (980726)
898 * 1. ip6_rt_check(): If route was host route,
899 * check that cached destination is current.
900 * If it is network route, we still may
901 * check its validity using saved pointer
902 * to the last used address: daddr_cache.
903 * We do not want to save whole address now,
904 * (because main consumer of this service
905 * is tcp, which has not this problem),
906 * so that the last trick works only on connected
907 * sockets.
908 * 2. oif also should be the same.
910 if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) ||
911 #ifdef CONFIG_IPV6_SUBTREES
912 ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) ||
913 #endif
914 (fl->oif && fl->oif != dst->dev->ifindex)) {
915 dst_release(dst);
916 dst = NULL;
919 out:
920 return dst;
923 static int ip6_dst_lookup_tail(struct sock *sk,
924 struct dst_entry **dst, struct flowi *fl)
926 int err;
927 struct net *net = sock_net(sk);
929 if (*dst == NULL)
930 *dst = ip6_route_output(net, sk, fl);
932 if ((err = (*dst)->error))
933 goto out_err_release;
935 if (ipv6_addr_any(&fl->fl6_src)) {
936 err = ipv6_dev_get_saddr(net, ip6_dst_idev(*dst)->dev,
937 &fl->fl6_dst,
938 sk ? inet6_sk(sk)->srcprefs : 0,
939 &fl->fl6_src);
940 if (err)
941 goto out_err_release;
944 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
946 * Here if the dst entry we've looked up
947 * has a neighbour entry that is in the INCOMPLETE
948 * state and the src address from the flow is
949 * marked as OPTIMISTIC, we release the found
950 * dst entry and replace it instead with the
951 * dst entry of the nexthop router
953 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
954 struct inet6_ifaddr *ifp;
955 struct flowi fl_gw;
956 int redirect;
958 ifp = ipv6_get_ifaddr(net, &fl->fl6_src,
959 (*dst)->dev, 1);
961 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
962 if (ifp)
963 in6_ifa_put(ifp);
965 if (redirect) {
967 * We need to get the dst entry for the
968 * default router instead
970 dst_release(*dst);
971 memcpy(&fl_gw, fl, sizeof(struct flowi));
972 memset(&fl_gw.fl6_dst, 0, sizeof(struct in6_addr));
973 *dst = ip6_route_output(net, sk, &fl_gw);
974 if ((err = (*dst)->error))
975 goto out_err_release;
978 #endif
980 return 0;
982 out_err_release:
983 if (err == -ENETUNREACH)
984 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
985 dst_release(*dst);
986 *dst = NULL;
987 return err;
991 * ip6_dst_lookup - perform route lookup on flow
992 * @sk: socket which provides route info
993 * @dst: pointer to dst_entry * for result
994 * @fl: flow to lookup
996 * This function performs a route lookup on the given flow.
998 * It returns zero on success, or a standard errno code on error.
1000 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1002 *dst = NULL;
1003 return ip6_dst_lookup_tail(sk, dst, fl);
1005 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1008 * ip6_sk_dst_lookup - perform socket cached route lookup on flow
1009 * @sk: socket which provides the dst cache and route info
1010 * @dst: pointer to dst_entry * for result
1011 * @fl: flow to lookup
1013 * This function performs a route lookup on the given flow with the
1014 * possibility of using the cached route in the socket if it is valid.
1015 * It will take the socket dst lock when operating on the dst cache.
1016 * As a result, this function can only be used in process context.
1018 * It returns zero on success, or a standard errno code on error.
1020 int ip6_sk_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi *fl)
1022 *dst = NULL;
1023 if (sk) {
1024 *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1025 *dst = ip6_sk_dst_check(sk, *dst, fl);
1028 return ip6_dst_lookup_tail(sk, dst, fl);
1030 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup);
1032 static inline int ip6_ufo_append_data(struct sock *sk,
1033 int getfrag(void *from, char *to, int offset, int len,
1034 int odd, struct sk_buff *skb),
1035 void *from, int length, int hh_len, int fragheaderlen,
1036 int transhdrlen, int mtu,unsigned int flags)
1039 struct sk_buff *skb;
1040 int err;
1042 /* There is support for UDP large send offload by network
1043 * device, so create one single skb packet containing complete
1044 * udp datagram
1046 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1047 skb = sock_alloc_send_skb(sk,
1048 hh_len + fragheaderlen + transhdrlen + 20,
1049 (flags & MSG_DONTWAIT), &err);
1050 if (skb == NULL)
1051 return -ENOMEM;
1053 /* reserve space for Hardware header */
1054 skb_reserve(skb, hh_len);
1056 /* create space for UDP/IP header */
1057 skb_put(skb,fragheaderlen + transhdrlen);
1059 /* initialize network header pointer */
1060 skb_reset_network_header(skb);
1062 /* initialize protocol header pointer */
1063 skb->transport_header = skb->network_header + fragheaderlen;
1065 skb->ip_summed = CHECKSUM_PARTIAL;
1066 skb->csum = 0;
1067 sk->sk_sndmsg_off = 0;
1070 err = skb_append_datato_frags(sk,skb, getfrag, from,
1071 (length - transhdrlen));
1072 if (!err) {
1073 struct frag_hdr fhdr;
1075 /* Specify the length of each IPv6 datagram fragment.
1076 * It has to be a multiple of 8.
1078 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1079 sizeof(struct frag_hdr)) & ~7;
1080 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1081 ipv6_select_ident(&fhdr);
1082 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1083 __skb_queue_tail(&sk->sk_write_queue, skb);
1085 return 0;
1087 /* There is not enough support do UPD LSO,
1088 * so follow normal path
1090 kfree_skb(skb);
1092 return err;
1095 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1096 gfp_t gfp)
1098 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1101 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1102 gfp_t gfp)
1104 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1107 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1108 int offset, int len, int odd, struct sk_buff *skb),
1109 void *from, int length, int transhdrlen,
1110 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi *fl,
1111 struct rt6_info *rt, unsigned int flags)
1113 struct inet_sock *inet = inet_sk(sk);
1114 struct ipv6_pinfo *np = inet6_sk(sk);
1115 struct sk_buff *skb;
1116 unsigned int maxfraglen, fragheaderlen;
1117 int exthdrlen;
1118 int hh_len;
1119 int mtu;
1120 int copy;
1121 int err;
1122 int offset = 0;
1123 int csummode = CHECKSUM_NONE;
1125 if (flags&MSG_PROBE)
1126 return 0;
1127 if (skb_queue_empty(&sk->sk_write_queue)) {
1129 * setup for corking
1131 if (opt) {
1132 if (WARN_ON(np->cork.opt))
1133 return -EINVAL;
1135 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1136 if (unlikely(np->cork.opt == NULL))
1137 return -ENOBUFS;
1139 np->cork.opt->tot_len = opt->tot_len;
1140 np->cork.opt->opt_flen = opt->opt_flen;
1141 np->cork.opt->opt_nflen = opt->opt_nflen;
1143 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1144 sk->sk_allocation);
1145 if (opt->dst0opt && !np->cork.opt->dst0opt)
1146 return -ENOBUFS;
1148 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1149 sk->sk_allocation);
1150 if (opt->dst1opt && !np->cork.opt->dst1opt)
1151 return -ENOBUFS;
1153 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1154 sk->sk_allocation);
1155 if (opt->hopopt && !np->cork.opt->hopopt)
1156 return -ENOBUFS;
1158 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1159 sk->sk_allocation);
1160 if (opt->srcrt && !np->cork.opt->srcrt)
1161 return -ENOBUFS;
1163 /* need source address above miyazawa*/
1165 dst_hold(&rt->u.dst);
1166 inet->cork.dst = &rt->u.dst;
1167 inet->cork.fl = *fl;
1168 np->cork.hop_limit = hlimit;
1169 np->cork.tclass = tclass;
1170 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1171 rt->u.dst.dev->mtu : dst_mtu(rt->u.dst.path);
1172 if (np->frag_size < mtu) {
1173 if (np->frag_size)
1174 mtu = np->frag_size;
1176 inet->cork.fragsize = mtu;
1177 if (dst_allfrag(rt->u.dst.path))
1178 inet->cork.flags |= IPCORK_ALLFRAG;
1179 inet->cork.length = 0;
1180 sk->sk_sndmsg_page = NULL;
1181 sk->sk_sndmsg_off = 0;
1182 exthdrlen = rt->u.dst.header_len + (opt ? opt->opt_flen : 0) -
1183 rt->rt6i_nfheader_len;
1184 length += exthdrlen;
1185 transhdrlen += exthdrlen;
1186 } else {
1187 rt = (struct rt6_info *)inet->cork.dst;
1188 fl = &inet->cork.fl;
1189 opt = np->cork.opt;
1190 transhdrlen = 0;
1191 exthdrlen = 0;
1192 mtu = inet->cork.fragsize;
1195 hh_len = LL_RESERVED_SPACE(rt->u.dst.dev);
1197 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1198 (opt ? opt->opt_nflen : 0);
1199 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1201 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1202 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1203 ipv6_local_error(sk, EMSGSIZE, fl, mtu-exthdrlen);
1204 return -EMSGSIZE;
1209 * Let's try using as much space as possible.
1210 * Use MTU if total length of the message fits into the MTU.
1211 * Otherwise, we need to reserve fragment header and
1212 * fragment alignment (= 8-15 octects, in total).
1214 * Note that we may need to "move" the data from the tail of
1215 * of the buffer to the new fragment when we split
1216 * the message.
1218 * FIXME: It may be fragmented into multiple chunks
1219 * at once if non-fragmentable extension headers
1220 * are too large.
1221 * --yoshfuji
1224 inet->cork.length += length;
1225 if (((length > mtu) && (sk->sk_protocol == IPPROTO_UDP)) &&
1226 (rt->u.dst.dev->features & NETIF_F_UFO)) {
1228 err = ip6_ufo_append_data(sk, getfrag, from, length, hh_len,
1229 fragheaderlen, transhdrlen, mtu,
1230 flags);
1231 if (err)
1232 goto error;
1233 return 0;
1236 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1237 goto alloc_new_skb;
1239 while (length > 0) {
1240 /* Check if the remaining data fits into current packet. */
1241 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1242 if (copy < length)
1243 copy = maxfraglen - skb->len;
1245 if (copy <= 0) {
1246 char *data;
1247 unsigned int datalen;
1248 unsigned int fraglen;
1249 unsigned int fraggap;
1250 unsigned int alloclen;
1251 struct sk_buff *skb_prev;
1252 alloc_new_skb:
1253 skb_prev = skb;
1255 /* There's no room in the current skb */
1256 if (skb_prev)
1257 fraggap = skb_prev->len - maxfraglen;
1258 else
1259 fraggap = 0;
1262 * If remaining data exceeds the mtu,
1263 * we know we need more fragment(s).
1265 datalen = length + fraggap;
1266 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1267 datalen = maxfraglen - fragheaderlen;
1269 fraglen = datalen + fragheaderlen;
1270 if ((flags & MSG_MORE) &&
1271 !(rt->u.dst.dev->features&NETIF_F_SG))
1272 alloclen = mtu;
1273 else
1274 alloclen = datalen + fragheaderlen;
1277 * The last fragment gets additional space at tail.
1278 * Note: we overallocate on fragments with MSG_MODE
1279 * because we have no idea if we're the last one.
1281 if (datalen == length + fraggap)
1282 alloclen += rt->u.dst.trailer_len;
1285 * We just reserve space for fragment header.
1286 * Note: this may be overallocation if the message
1287 * (without MSG_MORE) fits into the MTU.
1289 alloclen += sizeof(struct frag_hdr);
1291 if (transhdrlen) {
1292 skb = sock_alloc_send_skb(sk,
1293 alloclen + hh_len,
1294 (flags & MSG_DONTWAIT), &err);
1295 } else {
1296 skb = NULL;
1297 if (atomic_read(&sk->sk_wmem_alloc) <=
1298 2 * sk->sk_sndbuf)
1299 skb = sock_wmalloc(sk,
1300 alloclen + hh_len, 1,
1301 sk->sk_allocation);
1302 if (unlikely(skb == NULL))
1303 err = -ENOBUFS;
1305 if (skb == NULL)
1306 goto error;
1308 * Fill in the control structures
1310 skb->ip_summed = csummode;
1311 skb->csum = 0;
1312 /* reserve for fragmentation */
1313 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1316 * Find where to start putting bytes
1318 data = skb_put(skb, fraglen);
1319 skb_set_network_header(skb, exthdrlen);
1320 data += fragheaderlen;
1321 skb->transport_header = (skb->network_header +
1322 fragheaderlen);
1323 if (fraggap) {
1324 skb->csum = skb_copy_and_csum_bits(
1325 skb_prev, maxfraglen,
1326 data + transhdrlen, fraggap, 0);
1327 skb_prev->csum = csum_sub(skb_prev->csum,
1328 skb->csum);
1329 data += fraggap;
1330 pskb_trim_unique(skb_prev, maxfraglen);
1332 copy = datalen - transhdrlen - fraggap;
1333 if (copy < 0) {
1334 err = -EINVAL;
1335 kfree_skb(skb);
1336 goto error;
1337 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1338 err = -EFAULT;
1339 kfree_skb(skb);
1340 goto error;
1343 offset += copy;
1344 length -= datalen - fraggap;
1345 transhdrlen = 0;
1346 exthdrlen = 0;
1347 csummode = CHECKSUM_NONE;
1350 * Put the packet on the pending queue
1352 __skb_queue_tail(&sk->sk_write_queue, skb);
1353 continue;
1356 if (copy > length)
1357 copy = length;
1359 if (!(rt->u.dst.dev->features&NETIF_F_SG)) {
1360 unsigned int off;
1362 off = skb->len;
1363 if (getfrag(from, skb_put(skb, copy),
1364 offset, copy, off, skb) < 0) {
1365 __skb_trim(skb, off);
1366 err = -EFAULT;
1367 goto error;
1369 } else {
1370 int i = skb_shinfo(skb)->nr_frags;
1371 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1372 struct page *page = sk->sk_sndmsg_page;
1373 int off = sk->sk_sndmsg_off;
1374 unsigned int left;
1376 if (page && (left = PAGE_SIZE - off) > 0) {
1377 if (copy >= left)
1378 copy = left;
1379 if (page != frag->page) {
1380 if (i == MAX_SKB_FRAGS) {
1381 err = -EMSGSIZE;
1382 goto error;
1384 get_page(page);
1385 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1386 frag = &skb_shinfo(skb)->frags[i];
1388 } else if(i < MAX_SKB_FRAGS) {
1389 if (copy > PAGE_SIZE)
1390 copy = PAGE_SIZE;
1391 page = alloc_pages(sk->sk_allocation, 0);
1392 if (page == NULL) {
1393 err = -ENOMEM;
1394 goto error;
1396 sk->sk_sndmsg_page = page;
1397 sk->sk_sndmsg_off = 0;
1399 skb_fill_page_desc(skb, i, page, 0, 0);
1400 frag = &skb_shinfo(skb)->frags[i];
1401 } else {
1402 err = -EMSGSIZE;
1403 goto error;
1405 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1406 err = -EFAULT;
1407 goto error;
1409 sk->sk_sndmsg_off += copy;
1410 frag->size += copy;
1411 skb->len += copy;
1412 skb->data_len += copy;
1413 skb->truesize += copy;
1414 atomic_add(copy, &sk->sk_wmem_alloc);
1416 offset += copy;
1417 length -= copy;
1419 return 0;
1420 error:
1421 inet->cork.length -= length;
1422 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1423 return err;
1426 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1428 if (np->cork.opt) {
1429 kfree(np->cork.opt->dst0opt);
1430 kfree(np->cork.opt->dst1opt);
1431 kfree(np->cork.opt->hopopt);
1432 kfree(np->cork.opt->srcrt);
1433 kfree(np->cork.opt);
1434 np->cork.opt = NULL;
1437 if (inet->cork.dst) {
1438 dst_release(inet->cork.dst);
1439 inet->cork.dst = NULL;
1440 inet->cork.flags &= ~IPCORK_ALLFRAG;
1442 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1445 int ip6_push_pending_frames(struct sock *sk)
1447 struct sk_buff *skb, *tmp_skb;
1448 struct sk_buff **tail_skb;
1449 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1450 struct inet_sock *inet = inet_sk(sk);
1451 struct ipv6_pinfo *np = inet6_sk(sk);
1452 struct net *net = sock_net(sk);
1453 struct ipv6hdr *hdr;
1454 struct ipv6_txoptions *opt = np->cork.opt;
1455 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1456 struct flowi *fl = &inet->cork.fl;
1457 unsigned char proto = fl->proto;
1458 int err = 0;
1460 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1461 goto out;
1462 tail_skb = &(skb_shinfo(skb)->frag_list);
1464 /* move skb->data to ip header from ext header */
1465 if (skb->data < skb_network_header(skb))
1466 __skb_pull(skb, skb_network_offset(skb));
1467 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1468 __skb_pull(tmp_skb, skb_network_header_len(skb));
1469 *tail_skb = tmp_skb;
1470 tail_skb = &(tmp_skb->next);
1471 skb->len += tmp_skb->len;
1472 skb->data_len += tmp_skb->len;
1473 skb->truesize += tmp_skb->truesize;
1474 tmp_skb->destructor = NULL;
1475 tmp_skb->sk = NULL;
1478 /* Allow local fragmentation. */
1479 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1480 skb->local_df = 1;
1482 ipv6_addr_copy(final_dst, &fl->fl6_dst);
1483 __skb_pull(skb, skb_network_header_len(skb));
1484 if (opt && opt->opt_flen)
1485 ipv6_push_frag_opts(skb, opt, &proto);
1486 if (opt && opt->opt_nflen)
1487 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1489 skb_push(skb, sizeof(struct ipv6hdr));
1490 skb_reset_network_header(skb);
1491 hdr = ipv6_hdr(skb);
1493 *(__be32*)hdr = fl->fl6_flowlabel |
1494 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1496 hdr->hop_limit = np->cork.hop_limit;
1497 hdr->nexthdr = proto;
1498 ipv6_addr_copy(&hdr->saddr, &fl->fl6_src);
1499 ipv6_addr_copy(&hdr->daddr, final_dst);
1501 skb->priority = sk->sk_priority;
1502 skb->mark = sk->sk_mark;
1504 skb_dst_set(skb, dst_clone(&rt->u.dst));
1505 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1506 if (proto == IPPROTO_ICMPV6) {
1507 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1509 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1510 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1513 err = ip6_local_out(skb);
1514 if (err) {
1515 if (err > 0)
1516 err = net_xmit_errno(err);
1517 if (err)
1518 goto error;
1521 out:
1522 ip6_cork_release(inet, np);
1523 return err;
1524 error:
1525 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1526 goto out;
1529 void ip6_flush_pending_frames(struct sock *sk)
1531 struct sk_buff *skb;
1533 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1534 if (skb_dst(skb))
1535 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1536 IPSTATS_MIB_OUTDISCARDS);
1537 kfree_skb(skb);
1540 ip6_cork_release(inet_sk(sk), inet6_sk(sk));