Add linux-next specific files for 20110421
[linux-2.6/next.git] / net / ipv6 / ip6_output.c
blobc614d02bf429c3d91912636b71149f9d9816d3b9
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
45 #include <net/sock.h>
46 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 int __ip6_local_out(struct sk_buff *skb)
63 int len;
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
74 int ip6_local_out(struct sk_buff *skb)
76 int err;
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
82 return err;
84 EXPORT_SYMBOL_GPL(ip6_local_out);
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
95 netif_rx_ni(newskb);
96 return 0;
99 static int ip6_finish_output2(struct sk_buff *skb)
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
104 skb->protocol = htons(ETH_P_IPV6);
105 skb->dev = dev;
107 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
108 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
110 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
111 ((mroute6_socket(dev_net(dev), skb) &&
112 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
113 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
114 &ipv6_hdr(skb)->saddr))) {
115 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
117 /* Do not check for IFF_ALLMULTI; multicast routing
118 is not supported in any case.
120 if (newskb)
121 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
122 newskb, NULL, newskb->dev,
123 ip6_dev_loopback_xmit);
125 if (ipv6_hdr(skb)->hop_limit == 0) {
126 IP6_INC_STATS(dev_net(dev), idev,
127 IPSTATS_MIB_OUTDISCARDS);
128 kfree_skb(skb);
129 return 0;
133 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
134 skb->len);
137 if (dst->hh)
138 return neigh_hh_output(dst->hh, skb);
139 else if (dst->neighbour)
140 return dst->neighbour->output(skb);
142 IP6_INC_STATS_BH(dev_net(dst->dev),
143 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
144 kfree_skb(skb);
145 return -EINVAL;
148 static int ip6_finish_output(struct sk_buff *skb)
150 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
151 dst_allfrag(skb_dst(skb)))
152 return ip6_fragment(skb, ip6_finish_output2);
153 else
154 return ip6_finish_output2(skb);
157 int ip6_output(struct sk_buff *skb)
159 struct net_device *dev = skb_dst(skb)->dev;
160 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
161 if (unlikely(idev->cnf.disable_ipv6)) {
162 IP6_INC_STATS(dev_net(dev), idev,
163 IPSTATS_MIB_OUTDISCARDS);
164 kfree_skb(skb);
165 return 0;
168 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
169 ip6_finish_output,
170 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
174 * xmit an sk_buff (used by TCP, SCTP and DCCP)
177 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
178 struct ipv6_txoptions *opt)
180 struct net *net = sock_net(sk);
181 struct ipv6_pinfo *np = inet6_sk(sk);
182 struct in6_addr *first_hop = &fl6->daddr;
183 struct dst_entry *dst = skb_dst(skb);
184 struct ipv6hdr *hdr;
185 u8 proto = fl6->flowi6_proto;
186 int seg_len = skb->len;
187 int hlimit = -1;
188 int tclass = 0;
189 u32 mtu;
191 if (opt) {
192 unsigned int head_room;
194 /* First: exthdrs may take lots of space (~8K for now)
195 MAX_HEADER is not enough.
197 head_room = opt->opt_nflen + opt->opt_flen;
198 seg_len += head_room;
199 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
201 if (skb_headroom(skb) < head_room) {
202 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
203 if (skb2 == NULL) {
204 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
205 IPSTATS_MIB_OUTDISCARDS);
206 kfree_skb(skb);
207 return -ENOBUFS;
209 kfree_skb(skb);
210 skb = skb2;
211 skb_set_owner_w(skb, sk);
213 if (opt->opt_flen)
214 ipv6_push_frag_opts(skb, opt, &proto);
215 if (opt->opt_nflen)
216 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
219 skb_push(skb, sizeof(struct ipv6hdr));
220 skb_reset_network_header(skb);
221 hdr = ipv6_hdr(skb);
224 * Fill in the IPv6 header
226 if (np) {
227 tclass = np->tclass;
228 hlimit = np->hop_limit;
230 if (hlimit < 0)
231 hlimit = ip6_dst_hoplimit(dst);
233 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
235 hdr->payload_len = htons(seg_len);
236 hdr->nexthdr = proto;
237 hdr->hop_limit = hlimit;
239 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
240 ipv6_addr_copy(&hdr->daddr, first_hop);
242 skb->priority = sk->sk_priority;
243 skb->mark = sk->sk_mark;
245 mtu = dst_mtu(dst);
246 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
247 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
248 IPSTATS_MIB_OUT, skb->len);
249 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
250 dst->dev, dst_output);
253 if (net_ratelimit())
254 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
255 skb->dev = dst->dev;
256 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
257 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
258 kfree_skb(skb);
259 return -EMSGSIZE;
262 EXPORT_SYMBOL(ip6_xmit);
265 * To avoid extra problems ND packets are send through this
266 * routine. It's code duplication but I really want to avoid
267 * extra checks since ipv6_build_header is used by TCP (which
268 * is for us performance critical)
271 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
272 const struct in6_addr *saddr, const struct in6_addr *daddr,
273 int proto, int len)
275 struct ipv6_pinfo *np = inet6_sk(sk);
276 struct ipv6hdr *hdr;
278 skb->protocol = htons(ETH_P_IPV6);
279 skb->dev = dev;
281 skb_reset_network_header(skb);
282 skb_put(skb, sizeof(struct ipv6hdr));
283 hdr = ipv6_hdr(skb);
285 *(__be32*)hdr = htonl(0x60000000);
287 hdr->payload_len = htons(len);
288 hdr->nexthdr = proto;
289 hdr->hop_limit = np->hop_limit;
291 ipv6_addr_copy(&hdr->saddr, saddr);
292 ipv6_addr_copy(&hdr->daddr, daddr);
294 return 0;
297 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
299 struct ip6_ra_chain *ra;
300 struct sock *last = NULL;
302 read_lock(&ip6_ra_lock);
303 for (ra = ip6_ra_chain; ra; ra = ra->next) {
304 struct sock *sk = ra->sk;
305 if (sk && ra->sel == sel &&
306 (!sk->sk_bound_dev_if ||
307 sk->sk_bound_dev_if == skb->dev->ifindex)) {
308 if (last) {
309 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
310 if (skb2)
311 rawv6_rcv(last, skb2);
313 last = sk;
317 if (last) {
318 rawv6_rcv(last, skb);
319 read_unlock(&ip6_ra_lock);
320 return 1;
322 read_unlock(&ip6_ra_lock);
323 return 0;
326 static int ip6_forward_proxy_check(struct sk_buff *skb)
328 struct ipv6hdr *hdr = ipv6_hdr(skb);
329 u8 nexthdr = hdr->nexthdr;
330 int offset;
332 if (ipv6_ext_hdr(nexthdr)) {
333 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
334 if (offset < 0)
335 return 0;
336 } else
337 offset = sizeof(struct ipv6hdr);
339 if (nexthdr == IPPROTO_ICMPV6) {
340 struct icmp6hdr *icmp6;
342 if (!pskb_may_pull(skb, (skb_network_header(skb) +
343 offset + 1 - skb->data)))
344 return 0;
346 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
348 switch (icmp6->icmp6_type) {
349 case NDISC_ROUTER_SOLICITATION:
350 case NDISC_ROUTER_ADVERTISEMENT:
351 case NDISC_NEIGHBOUR_SOLICITATION:
352 case NDISC_NEIGHBOUR_ADVERTISEMENT:
353 case NDISC_REDIRECT:
354 /* For reaction involving unicast neighbor discovery
355 * message destined to the proxied address, pass it to
356 * input function.
358 return 1;
359 default:
360 break;
365 * The proxying router can't forward traffic sent to a link-local
366 * address, so signal the sender and discard the packet. This
367 * behavior is clarified by the MIPv6 specification.
369 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
370 dst_link_failure(skb);
371 return -1;
374 return 0;
377 static inline int ip6_forward_finish(struct sk_buff *skb)
379 return dst_output(skb);
382 int ip6_forward(struct sk_buff *skb)
384 struct dst_entry *dst = skb_dst(skb);
385 struct ipv6hdr *hdr = ipv6_hdr(skb);
386 struct inet6_skb_parm *opt = IP6CB(skb);
387 struct net *net = dev_net(dst->dev);
388 u32 mtu;
390 if (net->ipv6.devconf_all->forwarding == 0)
391 goto error;
393 if (skb_warn_if_lro(skb))
394 goto drop;
396 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
397 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
398 goto drop;
401 if (skb->pkt_type != PACKET_HOST)
402 goto drop;
404 skb_forward_csum(skb);
407 * We DO NOT make any processing on
408 * RA packets, pushing them to user level AS IS
409 * without ane WARRANTY that application will be able
410 * to interpret them. The reason is that we
411 * cannot make anything clever here.
413 * We are not end-node, so that if packet contains
414 * AH/ESP, we cannot make anything.
415 * Defragmentation also would be mistake, RA packets
416 * cannot be fragmented, because there is no warranty
417 * that different fragments will go along one path. --ANK
419 if (opt->ra) {
420 u8 *ptr = skb_network_header(skb) + opt->ra;
421 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
422 return 0;
426 * check and decrement ttl
428 if (hdr->hop_limit <= 1) {
429 /* Force OUTPUT device used as source address */
430 skb->dev = dst->dev;
431 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
432 IP6_INC_STATS_BH(net,
433 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
435 kfree_skb(skb);
436 return -ETIMEDOUT;
439 /* XXX: idev->cnf.proxy_ndp? */
440 if (net->ipv6.devconf_all->proxy_ndp &&
441 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
442 int proxied = ip6_forward_proxy_check(skb);
443 if (proxied > 0)
444 return ip6_input(skb);
445 else if (proxied < 0) {
446 IP6_INC_STATS(net, ip6_dst_idev(dst),
447 IPSTATS_MIB_INDISCARDS);
448 goto drop;
452 if (!xfrm6_route_forward(skb)) {
453 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
454 goto drop;
456 dst = skb_dst(skb);
458 /* IPv6 specs say nothing about it, but it is clear that we cannot
459 send redirects to source routed frames.
460 We don't send redirects to frames decapsulated from IPsec.
462 if (skb->dev == dst->dev && dst->neighbour && opt->srcrt == 0 &&
463 !skb_sec_path(skb)) {
464 struct in6_addr *target = NULL;
465 struct rt6_info *rt;
466 struct neighbour *n = dst->neighbour;
469 * incoming and outgoing devices are the same
470 * send a redirect.
473 rt = (struct rt6_info *) dst;
474 if ((rt->rt6i_flags & RTF_GATEWAY))
475 target = (struct in6_addr*)&n->primary_key;
476 else
477 target = &hdr->daddr;
479 if (!rt->rt6i_peer)
480 rt6_bind_peer(rt, 1);
482 /* Limit redirects both by destination (here)
483 and by source (inside ndisc_send_redirect)
485 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
486 ndisc_send_redirect(skb, n, target);
487 } else {
488 int addrtype = ipv6_addr_type(&hdr->saddr);
490 /* This check is security critical. */
491 if (addrtype == IPV6_ADDR_ANY ||
492 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
493 goto error;
494 if (addrtype & IPV6_ADDR_LINKLOCAL) {
495 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
496 ICMPV6_NOT_NEIGHBOUR, 0);
497 goto error;
501 mtu = dst_mtu(dst);
502 if (mtu < IPV6_MIN_MTU)
503 mtu = IPV6_MIN_MTU;
505 if (skb->len > mtu && !skb_is_gso(skb)) {
506 /* Again, force OUTPUT device used as source address */
507 skb->dev = dst->dev;
508 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
509 IP6_INC_STATS_BH(net,
510 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
513 kfree_skb(skb);
514 return -EMSGSIZE;
517 if (skb_cow(skb, dst->dev->hard_header_len)) {
518 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
519 goto drop;
522 hdr = ipv6_hdr(skb);
524 /* Mangling hops number delayed to point after skb COW */
526 hdr->hop_limit--;
528 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
529 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
530 ip6_forward_finish);
532 error:
533 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
534 drop:
535 kfree_skb(skb);
536 return -EINVAL;
539 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
541 to->pkt_type = from->pkt_type;
542 to->priority = from->priority;
543 to->protocol = from->protocol;
544 skb_dst_drop(to);
545 skb_dst_set(to, dst_clone(skb_dst(from)));
546 to->dev = from->dev;
547 to->mark = from->mark;
549 #ifdef CONFIG_NET_SCHED
550 to->tc_index = from->tc_index;
551 #endif
552 nf_copy(to, from);
553 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
554 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
555 to->nf_trace = from->nf_trace;
556 #endif
557 skb_copy_secmark(to, from);
560 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
562 u16 offset = sizeof(struct ipv6hdr);
563 struct ipv6_opt_hdr *exthdr =
564 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
565 unsigned int packet_len = skb->tail - skb->network_header;
566 int found_rhdr = 0;
567 *nexthdr = &ipv6_hdr(skb)->nexthdr;
569 while (offset + 1 <= packet_len) {
571 switch (**nexthdr) {
573 case NEXTHDR_HOP:
574 break;
575 case NEXTHDR_ROUTING:
576 found_rhdr = 1;
577 break;
578 case NEXTHDR_DEST:
579 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
580 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
581 break;
582 #endif
583 if (found_rhdr)
584 return offset;
585 break;
586 default :
587 return offset;
590 offset += ipv6_optlen(exthdr);
591 *nexthdr = &exthdr->nexthdr;
592 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
593 offset);
596 return offset;
599 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
601 struct sk_buff *frag;
602 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
603 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
604 struct ipv6hdr *tmp_hdr;
605 struct frag_hdr *fh;
606 unsigned int mtu, hlen, left, len;
607 __be32 frag_id = 0;
608 int ptr, offset = 0, err=0;
609 u8 *prevhdr, nexthdr = 0;
610 struct net *net = dev_net(skb_dst(skb)->dev);
612 hlen = ip6_find_1stfragopt(skb, &prevhdr);
613 nexthdr = *prevhdr;
615 mtu = ip6_skb_dst_mtu(skb);
617 /* We must not fragment if the socket is set to force MTU discovery
618 * or if the skb it not generated by a local socket.
620 if (!skb->local_df && skb->len > mtu) {
621 skb->dev = skb_dst(skb)->dev;
622 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
623 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
624 IPSTATS_MIB_FRAGFAILS);
625 kfree_skb(skb);
626 return -EMSGSIZE;
629 if (np && np->frag_size < mtu) {
630 if (np->frag_size)
631 mtu = np->frag_size;
633 mtu -= hlen + sizeof(struct frag_hdr);
635 if (skb_has_frag_list(skb)) {
636 int first_len = skb_pagelen(skb);
637 struct sk_buff *frag2;
639 if (first_len - hlen > mtu ||
640 ((first_len - hlen) & 7) ||
641 skb_cloned(skb))
642 goto slow_path;
644 skb_walk_frags(skb, frag) {
645 /* Correct geometry. */
646 if (frag->len > mtu ||
647 ((frag->len & 7) && frag->next) ||
648 skb_headroom(frag) < hlen)
649 goto slow_path_clean;
651 /* Partially cloned skb? */
652 if (skb_shared(frag))
653 goto slow_path_clean;
655 BUG_ON(frag->sk);
656 if (skb->sk) {
657 frag->sk = skb->sk;
658 frag->destructor = sock_wfree;
660 skb->truesize -= frag->truesize;
663 err = 0;
664 offset = 0;
665 frag = skb_shinfo(skb)->frag_list;
666 skb_frag_list_init(skb);
667 /* BUILD HEADER */
669 *prevhdr = NEXTHDR_FRAGMENT;
670 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
671 if (!tmp_hdr) {
672 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
673 IPSTATS_MIB_FRAGFAILS);
674 return -ENOMEM;
677 __skb_pull(skb, hlen);
678 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
679 __skb_push(skb, hlen);
680 skb_reset_network_header(skb);
681 memcpy(skb_network_header(skb), tmp_hdr, hlen);
683 ipv6_select_ident(fh);
684 fh->nexthdr = nexthdr;
685 fh->reserved = 0;
686 fh->frag_off = htons(IP6_MF);
687 frag_id = fh->identification;
689 first_len = skb_pagelen(skb);
690 skb->data_len = first_len - skb_headlen(skb);
691 skb->len = first_len;
692 ipv6_hdr(skb)->payload_len = htons(first_len -
693 sizeof(struct ipv6hdr));
695 dst_hold(&rt->dst);
697 for (;;) {
698 /* Prepare header of the next frame,
699 * before previous one went down. */
700 if (frag) {
701 frag->ip_summed = CHECKSUM_NONE;
702 skb_reset_transport_header(frag);
703 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
704 __skb_push(frag, hlen);
705 skb_reset_network_header(frag);
706 memcpy(skb_network_header(frag), tmp_hdr,
707 hlen);
708 offset += skb->len - hlen - sizeof(struct frag_hdr);
709 fh->nexthdr = nexthdr;
710 fh->reserved = 0;
711 fh->frag_off = htons(offset);
712 if (frag->next != NULL)
713 fh->frag_off |= htons(IP6_MF);
714 fh->identification = frag_id;
715 ipv6_hdr(frag)->payload_len =
716 htons(frag->len -
717 sizeof(struct ipv6hdr));
718 ip6_copy_metadata(frag, skb);
721 err = output(skb);
722 if(!err)
723 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
724 IPSTATS_MIB_FRAGCREATES);
726 if (err || !frag)
727 break;
729 skb = frag;
730 frag = skb->next;
731 skb->next = NULL;
734 kfree(tmp_hdr);
736 if (err == 0) {
737 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
738 IPSTATS_MIB_FRAGOKS);
739 dst_release(&rt->dst);
740 return 0;
743 while (frag) {
744 skb = frag->next;
745 kfree_skb(frag);
746 frag = skb;
749 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
750 IPSTATS_MIB_FRAGFAILS);
751 dst_release(&rt->dst);
752 return err;
754 slow_path_clean:
755 skb_walk_frags(skb, frag2) {
756 if (frag2 == frag)
757 break;
758 frag2->sk = NULL;
759 frag2->destructor = NULL;
760 skb->truesize += frag2->truesize;
764 slow_path:
765 left = skb->len - hlen; /* Space per frame */
766 ptr = hlen; /* Where to start from */
769 * Fragment the datagram.
772 *prevhdr = NEXTHDR_FRAGMENT;
775 * Keep copying data until we run out.
777 while(left > 0) {
778 len = left;
779 /* IF: it doesn't fit, use 'mtu' - the data space left */
780 if (len > mtu)
781 len = mtu;
782 /* IF: we are not sending up to and including the packet end
783 then align the next start on an eight byte boundary */
784 if (len < left) {
785 len &= ~7;
788 * Allocate buffer.
791 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
792 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
793 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
794 IPSTATS_MIB_FRAGFAILS);
795 err = -ENOMEM;
796 goto fail;
800 * Set up data on packet
803 ip6_copy_metadata(frag, skb);
804 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
805 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
806 skb_reset_network_header(frag);
807 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
808 frag->transport_header = (frag->network_header + hlen +
809 sizeof(struct frag_hdr));
812 * Charge the memory for the fragment to any owner
813 * it might possess
815 if (skb->sk)
816 skb_set_owner_w(frag, skb->sk);
819 * Copy the packet header into the new buffer.
821 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
824 * Build fragment header.
826 fh->nexthdr = nexthdr;
827 fh->reserved = 0;
828 if (!frag_id) {
829 ipv6_select_ident(fh);
830 frag_id = fh->identification;
831 } else
832 fh->identification = frag_id;
835 * Copy a block of the IP datagram.
837 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
838 BUG();
839 left -= len;
841 fh->frag_off = htons(offset);
842 if (left > 0)
843 fh->frag_off |= htons(IP6_MF);
844 ipv6_hdr(frag)->payload_len = htons(frag->len -
845 sizeof(struct ipv6hdr));
847 ptr += len;
848 offset += len;
851 * Put this fragment into the sending queue.
853 err = output(frag);
854 if (err)
855 goto fail;
857 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
858 IPSTATS_MIB_FRAGCREATES);
860 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
861 IPSTATS_MIB_FRAGOKS);
862 kfree_skb(skb);
863 return err;
865 fail:
866 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
867 IPSTATS_MIB_FRAGFAILS);
868 kfree_skb(skb);
869 return err;
872 static inline int ip6_rt_check(struct rt6key *rt_key,
873 struct in6_addr *fl_addr,
874 struct in6_addr *addr_cache)
876 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
877 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
880 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
881 struct dst_entry *dst,
882 struct flowi6 *fl6)
884 struct ipv6_pinfo *np = inet6_sk(sk);
885 struct rt6_info *rt = (struct rt6_info *)dst;
887 if (!dst)
888 goto out;
890 /* Yes, checking route validity in not connected
891 * case is not very simple. Take into account,
892 * that we do not support routing by source, TOS,
893 * and MSG_DONTROUTE --ANK (980726)
895 * 1. ip6_rt_check(): If route was host route,
896 * check that cached destination is current.
897 * If it is network route, we still may
898 * check its validity using saved pointer
899 * to the last used address: daddr_cache.
900 * We do not want to save whole address now,
901 * (because main consumer of this service
902 * is tcp, which has not this problem),
903 * so that the last trick works only on connected
904 * sockets.
905 * 2. oif also should be the same.
907 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
908 #ifdef CONFIG_IPV6_SUBTREES
909 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
910 #endif
911 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
912 dst_release(dst);
913 dst = NULL;
916 out:
917 return dst;
920 static int ip6_dst_lookup_tail(struct sock *sk,
921 struct dst_entry **dst, struct flowi6 *fl6)
923 int err;
924 struct net *net = sock_net(sk);
926 if (*dst == NULL)
927 *dst = ip6_route_output(net, sk, fl6);
929 if ((err = (*dst)->error))
930 goto out_err_release;
932 if (ipv6_addr_any(&fl6->saddr)) {
933 struct rt6_info *rt = (struct rt6_info *) *dst;
934 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
935 sk ? inet6_sk(sk)->srcprefs : 0,
936 &fl6->saddr);
937 if (err)
938 goto out_err_release;
941 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
943 * Here if the dst entry we've looked up
944 * has a neighbour entry that is in the INCOMPLETE
945 * state and the src address from the flow is
946 * marked as OPTIMISTIC, we release the found
947 * dst entry and replace it instead with the
948 * dst entry of the nexthop router
950 if ((*dst)->neighbour && !((*dst)->neighbour->nud_state & NUD_VALID)) {
951 struct inet6_ifaddr *ifp;
952 struct flowi6 fl_gw6;
953 int redirect;
955 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
956 (*dst)->dev, 1);
958 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
959 if (ifp)
960 in6_ifa_put(ifp);
962 if (redirect) {
964 * We need to get the dst entry for the
965 * default router instead
967 dst_release(*dst);
968 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
969 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
970 *dst = ip6_route_output(net, sk, &fl_gw6);
971 if ((err = (*dst)->error))
972 goto out_err_release;
975 #endif
977 return 0;
979 out_err_release:
980 if (err == -ENETUNREACH)
981 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
982 dst_release(*dst);
983 *dst = NULL;
984 return err;
988 * ip6_dst_lookup - perform route lookup on flow
989 * @sk: socket which provides route info
990 * @dst: pointer to dst_entry * for result
991 * @fl6: flow to lookup
993 * This function performs a route lookup on the given flow.
995 * It returns zero on success, or a standard errno code on error.
997 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
999 *dst = NULL;
1000 return ip6_dst_lookup_tail(sk, dst, fl6);
1002 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1005 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1006 * @sk: socket which provides route info
1007 * @fl6: flow to lookup
1008 * @final_dst: final destination address for ipsec lookup
1009 * @can_sleep: we are in a sleepable context
1011 * This function performs a route lookup on the given flow.
1013 * It returns a valid dst pointer on success, or a pointer encoded
1014 * error code.
1016 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1017 const struct in6_addr *final_dst,
1018 bool can_sleep)
1020 struct dst_entry *dst = NULL;
1021 int err;
1023 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1024 if (err)
1025 return ERR_PTR(err);
1026 if (final_dst)
1027 ipv6_addr_copy(&fl6->daddr, final_dst);
1028 if (can_sleep)
1029 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1031 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1033 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1036 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1037 * @sk: socket which provides the dst cache and route info
1038 * @fl6: flow to lookup
1039 * @final_dst: final destination address for ipsec lookup
1040 * @can_sleep: we are in a sleepable context
1042 * This function performs a route lookup on the given flow with the
1043 * possibility of using the cached route in the socket if it is valid.
1044 * It will take the socket dst lock when operating on the dst cache.
1045 * As a result, this function can only be used in process context.
1047 * It returns a valid dst pointer on success, or a pointer encoded
1048 * error code.
1050 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1051 const struct in6_addr *final_dst,
1052 bool can_sleep)
1054 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1055 int err;
1057 dst = ip6_sk_dst_check(sk, dst, fl6);
1059 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1060 if (err)
1061 return ERR_PTR(err);
1062 if (final_dst)
1063 ipv6_addr_copy(&fl6->daddr, final_dst);
1064 if (can_sleep)
1065 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1067 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1069 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1071 static inline int ip6_ufo_append_data(struct sock *sk,
1072 int getfrag(void *from, char *to, int offset, int len,
1073 int odd, struct sk_buff *skb),
1074 void *from, int length, int hh_len, int fragheaderlen,
1075 int transhdrlen, int mtu,unsigned int flags)
1078 struct sk_buff *skb;
1079 int err;
1081 /* There is support for UDP large send offload by network
1082 * device, so create one single skb packet containing complete
1083 * udp datagram
1085 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1086 skb = sock_alloc_send_skb(sk,
1087 hh_len + fragheaderlen + transhdrlen + 20,
1088 (flags & MSG_DONTWAIT), &err);
1089 if (skb == NULL)
1090 return -ENOMEM;
1092 /* reserve space for Hardware header */
1093 skb_reserve(skb, hh_len);
1095 /* create space for UDP/IP header */
1096 skb_put(skb,fragheaderlen + transhdrlen);
1098 /* initialize network header pointer */
1099 skb_reset_network_header(skb);
1101 /* initialize protocol header pointer */
1102 skb->transport_header = skb->network_header + fragheaderlen;
1104 skb->ip_summed = CHECKSUM_PARTIAL;
1105 skb->csum = 0;
1108 err = skb_append_datato_frags(sk,skb, getfrag, from,
1109 (length - transhdrlen));
1110 if (!err) {
1111 struct frag_hdr fhdr;
1113 /* Specify the length of each IPv6 datagram fragment.
1114 * It has to be a multiple of 8.
1116 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1117 sizeof(struct frag_hdr)) & ~7;
1118 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1119 ipv6_select_ident(&fhdr);
1120 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1121 __skb_queue_tail(&sk->sk_write_queue, skb);
1123 return 0;
1125 /* There is not enough support do UPD LSO,
1126 * so follow normal path
1128 kfree_skb(skb);
1130 return err;
1133 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1134 gfp_t gfp)
1136 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1139 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1140 gfp_t gfp)
1142 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1145 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1146 int offset, int len, int odd, struct sk_buff *skb),
1147 void *from, int length, int transhdrlen,
1148 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1149 struct rt6_info *rt, unsigned int flags, int dontfrag)
1151 struct inet_sock *inet = inet_sk(sk);
1152 struct ipv6_pinfo *np = inet6_sk(sk);
1153 struct sk_buff *skb;
1154 unsigned int maxfraglen, fragheaderlen;
1155 int exthdrlen;
1156 int hh_len;
1157 int mtu;
1158 int copy;
1159 int err;
1160 int offset = 0;
1161 int csummode = CHECKSUM_NONE;
1162 __u8 tx_flags = 0;
1164 if (flags&MSG_PROBE)
1165 return 0;
1166 if (skb_queue_empty(&sk->sk_write_queue)) {
1168 * setup for corking
1170 if (opt) {
1171 if (WARN_ON(np->cork.opt))
1172 return -EINVAL;
1174 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1175 if (unlikely(np->cork.opt == NULL))
1176 return -ENOBUFS;
1178 np->cork.opt->tot_len = opt->tot_len;
1179 np->cork.opt->opt_flen = opt->opt_flen;
1180 np->cork.opt->opt_nflen = opt->opt_nflen;
1182 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1183 sk->sk_allocation);
1184 if (opt->dst0opt && !np->cork.opt->dst0opt)
1185 return -ENOBUFS;
1187 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1188 sk->sk_allocation);
1189 if (opt->dst1opt && !np->cork.opt->dst1opt)
1190 return -ENOBUFS;
1192 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1193 sk->sk_allocation);
1194 if (opt->hopopt && !np->cork.opt->hopopt)
1195 return -ENOBUFS;
1197 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1198 sk->sk_allocation);
1199 if (opt->srcrt && !np->cork.opt->srcrt)
1200 return -ENOBUFS;
1202 /* need source address above miyazawa*/
1204 dst_hold(&rt->dst);
1205 inet->cork.dst = &rt->dst;
1206 inet->cork.fl.u.ip6 = *fl6;
1207 np->cork.hop_limit = hlimit;
1208 np->cork.tclass = tclass;
1209 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1210 rt->dst.dev->mtu : dst_mtu(rt->dst.path);
1211 if (np->frag_size < mtu) {
1212 if (np->frag_size)
1213 mtu = np->frag_size;
1215 inet->cork.fragsize = mtu;
1216 if (dst_allfrag(rt->dst.path))
1217 inet->cork.flags |= IPCORK_ALLFRAG;
1218 inet->cork.length = 0;
1219 sk->sk_sndmsg_page = NULL;
1220 sk->sk_sndmsg_off = 0;
1221 exthdrlen = rt->dst.header_len + (opt ? opt->opt_flen : 0) -
1222 rt->rt6i_nfheader_len;
1223 length += exthdrlen;
1224 transhdrlen += exthdrlen;
1225 } else {
1226 rt = (struct rt6_info *)inet->cork.dst;
1227 fl6 = &inet->cork.fl.u.ip6;
1228 opt = np->cork.opt;
1229 transhdrlen = 0;
1230 exthdrlen = 0;
1231 mtu = inet->cork.fragsize;
1234 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1236 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1237 (opt ? opt->opt_nflen : 0);
1238 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1240 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1241 if (inet->cork.length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1242 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1243 return -EMSGSIZE;
1247 /* For UDP, check if TX timestamp is enabled */
1248 if (sk->sk_type == SOCK_DGRAM) {
1249 err = sock_tx_timestamp(sk, &tx_flags);
1250 if (err)
1251 goto error;
1255 * Let's try using as much space as possible.
1256 * Use MTU if total length of the message fits into the MTU.
1257 * Otherwise, we need to reserve fragment header and
1258 * fragment alignment (= 8-15 octects, in total).
1260 * Note that we may need to "move" the data from the tail of
1261 * of the buffer to the new fragment when we split
1262 * the message.
1264 * FIXME: It may be fragmented into multiple chunks
1265 * at once if non-fragmentable extension headers
1266 * are too large.
1267 * --yoshfuji
1270 inet->cork.length += length;
1271 if (length > mtu) {
1272 int proto = sk->sk_protocol;
1273 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1274 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1275 return -EMSGSIZE;
1278 if (proto == IPPROTO_UDP &&
1279 (rt->dst.dev->features & NETIF_F_UFO)) {
1281 err = ip6_ufo_append_data(sk, getfrag, from, length,
1282 hh_len, fragheaderlen,
1283 transhdrlen, mtu, flags);
1284 if (err)
1285 goto error;
1286 return 0;
1290 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1291 goto alloc_new_skb;
1293 while (length > 0) {
1294 /* Check if the remaining data fits into current packet. */
1295 copy = (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1296 if (copy < length)
1297 copy = maxfraglen - skb->len;
1299 if (copy <= 0) {
1300 char *data;
1301 unsigned int datalen;
1302 unsigned int fraglen;
1303 unsigned int fraggap;
1304 unsigned int alloclen;
1305 struct sk_buff *skb_prev;
1306 alloc_new_skb:
1307 skb_prev = skb;
1309 /* There's no room in the current skb */
1310 if (skb_prev)
1311 fraggap = skb_prev->len - maxfraglen;
1312 else
1313 fraggap = 0;
1316 * If remaining data exceeds the mtu,
1317 * we know we need more fragment(s).
1319 datalen = length + fraggap;
1320 if (datalen > (inet->cork.length <= mtu && !(inet->cork.flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1321 datalen = maxfraglen - fragheaderlen;
1323 fraglen = datalen + fragheaderlen;
1324 if ((flags & MSG_MORE) &&
1325 !(rt->dst.dev->features&NETIF_F_SG))
1326 alloclen = mtu;
1327 else
1328 alloclen = datalen + fragheaderlen;
1331 * The last fragment gets additional space at tail.
1332 * Note: we overallocate on fragments with MSG_MODE
1333 * because we have no idea if we're the last one.
1335 if (datalen == length + fraggap)
1336 alloclen += rt->dst.trailer_len;
1339 * We just reserve space for fragment header.
1340 * Note: this may be overallocation if the message
1341 * (without MSG_MORE) fits into the MTU.
1343 alloclen += sizeof(struct frag_hdr);
1345 if (transhdrlen) {
1346 skb = sock_alloc_send_skb(sk,
1347 alloclen + hh_len,
1348 (flags & MSG_DONTWAIT), &err);
1349 } else {
1350 skb = NULL;
1351 if (atomic_read(&sk->sk_wmem_alloc) <=
1352 2 * sk->sk_sndbuf)
1353 skb = sock_wmalloc(sk,
1354 alloclen + hh_len, 1,
1355 sk->sk_allocation);
1356 if (unlikely(skb == NULL))
1357 err = -ENOBUFS;
1358 else {
1359 /* Only the initial fragment
1360 * is time stamped.
1362 tx_flags = 0;
1365 if (skb == NULL)
1366 goto error;
1368 * Fill in the control structures
1370 skb->ip_summed = csummode;
1371 skb->csum = 0;
1372 /* reserve for fragmentation */
1373 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1375 if (sk->sk_type == SOCK_DGRAM)
1376 skb_shinfo(skb)->tx_flags = tx_flags;
1379 * Find where to start putting bytes
1381 data = skb_put(skb, fraglen);
1382 skb_set_network_header(skb, exthdrlen);
1383 data += fragheaderlen;
1384 skb->transport_header = (skb->network_header +
1385 fragheaderlen);
1386 if (fraggap) {
1387 skb->csum = skb_copy_and_csum_bits(
1388 skb_prev, maxfraglen,
1389 data + transhdrlen, fraggap, 0);
1390 skb_prev->csum = csum_sub(skb_prev->csum,
1391 skb->csum);
1392 data += fraggap;
1393 pskb_trim_unique(skb_prev, maxfraglen);
1395 copy = datalen - transhdrlen - fraggap;
1396 if (copy < 0) {
1397 err = -EINVAL;
1398 kfree_skb(skb);
1399 goto error;
1400 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1401 err = -EFAULT;
1402 kfree_skb(skb);
1403 goto error;
1406 offset += copy;
1407 length -= datalen - fraggap;
1408 transhdrlen = 0;
1409 exthdrlen = 0;
1410 csummode = CHECKSUM_NONE;
1413 * Put the packet on the pending queue
1415 __skb_queue_tail(&sk->sk_write_queue, skb);
1416 continue;
1419 if (copy > length)
1420 copy = length;
1422 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1423 unsigned int off;
1425 off = skb->len;
1426 if (getfrag(from, skb_put(skb, copy),
1427 offset, copy, off, skb) < 0) {
1428 __skb_trim(skb, off);
1429 err = -EFAULT;
1430 goto error;
1432 } else {
1433 int i = skb_shinfo(skb)->nr_frags;
1434 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1435 struct page *page = sk->sk_sndmsg_page;
1436 int off = sk->sk_sndmsg_off;
1437 unsigned int left;
1439 if (page && (left = PAGE_SIZE - off) > 0) {
1440 if (copy >= left)
1441 copy = left;
1442 if (page != frag->page) {
1443 if (i == MAX_SKB_FRAGS) {
1444 err = -EMSGSIZE;
1445 goto error;
1447 get_page(page);
1448 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1449 frag = &skb_shinfo(skb)->frags[i];
1451 } else if(i < MAX_SKB_FRAGS) {
1452 if (copy > PAGE_SIZE)
1453 copy = PAGE_SIZE;
1454 page = alloc_pages(sk->sk_allocation, 0);
1455 if (page == NULL) {
1456 err = -ENOMEM;
1457 goto error;
1459 sk->sk_sndmsg_page = page;
1460 sk->sk_sndmsg_off = 0;
1462 skb_fill_page_desc(skb, i, page, 0, 0);
1463 frag = &skb_shinfo(skb)->frags[i];
1464 } else {
1465 err = -EMSGSIZE;
1466 goto error;
1468 if (getfrag(from, page_address(frag->page)+frag->page_offset+frag->size, offset, copy, skb->len, skb) < 0) {
1469 err = -EFAULT;
1470 goto error;
1472 sk->sk_sndmsg_off += copy;
1473 frag->size += copy;
1474 skb->len += copy;
1475 skb->data_len += copy;
1476 skb->truesize += copy;
1477 atomic_add(copy, &sk->sk_wmem_alloc);
1479 offset += copy;
1480 length -= copy;
1482 return 0;
1483 error:
1484 inet->cork.length -= length;
1485 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1486 return err;
1489 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1491 if (np->cork.opt) {
1492 kfree(np->cork.opt->dst0opt);
1493 kfree(np->cork.opt->dst1opt);
1494 kfree(np->cork.opt->hopopt);
1495 kfree(np->cork.opt->srcrt);
1496 kfree(np->cork.opt);
1497 np->cork.opt = NULL;
1500 if (inet->cork.dst) {
1501 dst_release(inet->cork.dst);
1502 inet->cork.dst = NULL;
1503 inet->cork.flags &= ~IPCORK_ALLFRAG;
1505 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1508 int ip6_push_pending_frames(struct sock *sk)
1510 struct sk_buff *skb, *tmp_skb;
1511 struct sk_buff **tail_skb;
1512 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1513 struct inet_sock *inet = inet_sk(sk);
1514 struct ipv6_pinfo *np = inet6_sk(sk);
1515 struct net *net = sock_net(sk);
1516 struct ipv6hdr *hdr;
1517 struct ipv6_txoptions *opt = np->cork.opt;
1518 struct rt6_info *rt = (struct rt6_info *)inet->cork.dst;
1519 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1520 unsigned char proto = fl6->flowi6_proto;
1521 int err = 0;
1523 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1524 goto out;
1525 tail_skb = &(skb_shinfo(skb)->frag_list);
1527 /* move skb->data to ip header from ext header */
1528 if (skb->data < skb_network_header(skb))
1529 __skb_pull(skb, skb_network_offset(skb));
1530 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1531 __skb_pull(tmp_skb, skb_network_header_len(skb));
1532 *tail_skb = tmp_skb;
1533 tail_skb = &(tmp_skb->next);
1534 skb->len += tmp_skb->len;
1535 skb->data_len += tmp_skb->len;
1536 skb->truesize += tmp_skb->truesize;
1537 tmp_skb->destructor = NULL;
1538 tmp_skb->sk = NULL;
1541 /* Allow local fragmentation. */
1542 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1543 skb->local_df = 1;
1545 ipv6_addr_copy(final_dst, &fl6->daddr);
1546 __skb_pull(skb, skb_network_header_len(skb));
1547 if (opt && opt->opt_flen)
1548 ipv6_push_frag_opts(skb, opt, &proto);
1549 if (opt && opt->opt_nflen)
1550 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1552 skb_push(skb, sizeof(struct ipv6hdr));
1553 skb_reset_network_header(skb);
1554 hdr = ipv6_hdr(skb);
1556 *(__be32*)hdr = fl6->flowlabel |
1557 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1559 hdr->hop_limit = np->cork.hop_limit;
1560 hdr->nexthdr = proto;
1561 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1562 ipv6_addr_copy(&hdr->daddr, final_dst);
1564 skb->priority = sk->sk_priority;
1565 skb->mark = sk->sk_mark;
1567 skb_dst_set(skb, dst_clone(&rt->dst));
1568 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1569 if (proto == IPPROTO_ICMPV6) {
1570 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1572 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1573 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1576 err = ip6_local_out(skb);
1577 if (err) {
1578 if (err > 0)
1579 err = net_xmit_errno(err);
1580 if (err)
1581 goto error;
1584 out:
1585 ip6_cork_release(inet, np);
1586 return err;
1587 error:
1588 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1589 goto out;
1592 void ip6_flush_pending_frames(struct sock *sk)
1594 struct sk_buff *skb;
1596 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1597 if (skb_dst(skb))
1598 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1599 IPSTATS_MIB_OUTDISCARDS);
1600 kfree_skb(skb);
1603 ip6_cork_release(inet_sk(sk), inet6_sk(sk));