spi-topcliff-pch: Fix issue for transmitting over 4KByte
[zen-stable.git] / net / ipv6 / ip6_output.c
blobea58e27a42b230a4dfb29bba806210292e3ac6f2
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
45 #include <net/sock.h>
46 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 int __ip6_local_out(struct sk_buff *skb)
63 int len;
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
74 int ip6_local_out(struct sk_buff *skb)
76 int err;
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
82 return err;
84 EXPORT_SYMBOL_GPL(ip6_local_out);
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
95 netif_rx_ni(newskb);
96 return 0;
99 static int ip6_finish_output2(struct sk_buff *skb)
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103 struct neighbour *neigh;
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112 ((mroute6_socket(dev_net(dev), skb) &&
113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
121 if (newskb)
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
124 ip6_dev_loopback_xmit);
126 if (ipv6_hdr(skb)->hop_limit == 0) {
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb);
130 return 0;
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
138 rcu_read_lock();
139 neigh = dst_get_neighbour_noref(dst);
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
143 rcu_read_unlock();
144 return res;
146 rcu_read_unlock();
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
153 static int ip6_finish_output(struct sk_buff *skb)
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
162 int ip6_output(struct sk_buff *skb)
164 struct net_device *dev = skb_dst(skb)->dev;
165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 if (unlikely(idev->cnf.disable_ipv6)) {
167 IP6_INC_STATS(dev_net(dev), idev,
168 IPSTATS_MIB_OUTDISCARDS);
169 kfree_skb(skb);
170 return 0;
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
183 struct ipv6_txoptions *opt, int tclass)
185 struct net *net = sock_net(sk);
186 struct ipv6_pinfo *np = inet6_sk(sk);
187 struct in6_addr *first_hop = &fl6->daddr;
188 struct dst_entry *dst = skb_dst(skb);
189 struct ipv6hdr *hdr;
190 u8 proto = fl6->flowi6_proto;
191 int seg_len = skb->len;
192 int hlimit = -1;
193 u32 mtu;
195 if (opt) {
196 unsigned int head_room;
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207 if (skb2 == NULL) {
208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
211 return -ENOBUFS;
213 kfree_skb(skb);
214 skb = skb2;
215 skb_set_owner_w(skb, sk);
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
225 hdr = ipv6_hdr(skb);
228 * Fill in the IPv6 header
230 if (np)
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
233 hlimit = ip6_dst_hoplimit(dst);
235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
241 hdr->saddr = fl6->saddr;
242 hdr->daddr = *first_hop;
244 skb->priority = sk->sk_priority;
245 skb->mark = sk->sk_mark;
247 mtu = dst_mtu(dst);
248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
250 IPSTATS_MIB_OUT, skb->len);
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
260 kfree_skb(skb);
261 return -EMSGSIZE;
264 EXPORT_SYMBOL(ip6_xmit);
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
273 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274 const struct in6_addr *saddr, const struct in6_addr *daddr,
275 int proto, int len)
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
285 hdr = ipv6_hdr(skb);
287 *(__be32*)hdr = htonl(0x60000000);
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
293 hdr->saddr = *saddr;
294 hdr->daddr = *daddr;
296 return 0;
299 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
315 last = sk;
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
324 read_unlock(&ip6_ra_lock);
325 return 0;
328 static int ip6_forward_proxy_check(struct sk_buff *skb)
330 struct ipv6hdr *hdr = ipv6_hdr(skb);
331 u8 nexthdr = hdr->nexthdr;
332 __be16 frag_off;
333 int offset;
335 if (ipv6_ext_hdr(nexthdr)) {
336 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr, &frag_off);
337 if (offset < 0)
338 return 0;
339 } else
340 offset = sizeof(struct ipv6hdr);
342 if (nexthdr == IPPROTO_ICMPV6) {
343 struct icmp6hdr *icmp6;
345 if (!pskb_may_pull(skb, (skb_network_header(skb) +
346 offset + 1 - skb->data)))
347 return 0;
349 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
351 switch (icmp6->icmp6_type) {
352 case NDISC_ROUTER_SOLICITATION:
353 case NDISC_ROUTER_ADVERTISEMENT:
354 case NDISC_NEIGHBOUR_SOLICITATION:
355 case NDISC_NEIGHBOUR_ADVERTISEMENT:
356 case NDISC_REDIRECT:
357 /* For reaction involving unicast neighbor discovery
358 * message destined to the proxied address, pass it to
359 * input function.
361 return 1;
362 default:
363 break;
368 * The proxying router can't forward traffic sent to a link-local
369 * address, so signal the sender and discard the packet. This
370 * behavior is clarified by the MIPv6 specification.
372 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
373 dst_link_failure(skb);
374 return -1;
377 return 0;
380 static inline int ip6_forward_finish(struct sk_buff *skb)
382 return dst_output(skb);
385 int ip6_forward(struct sk_buff *skb)
387 struct dst_entry *dst = skb_dst(skb);
388 struct ipv6hdr *hdr = ipv6_hdr(skb);
389 struct inet6_skb_parm *opt = IP6CB(skb);
390 struct net *net = dev_net(dst->dev);
391 struct neighbour *n;
392 u32 mtu;
394 if (net->ipv6.devconf_all->forwarding == 0)
395 goto error;
397 if (skb_warn_if_lro(skb))
398 goto drop;
400 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
401 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
402 goto drop;
405 if (skb->pkt_type != PACKET_HOST)
406 goto drop;
408 skb_forward_csum(skb);
411 * We DO NOT make any processing on
412 * RA packets, pushing them to user level AS IS
413 * without ane WARRANTY that application will be able
414 * to interpret them. The reason is that we
415 * cannot make anything clever here.
417 * We are not end-node, so that if packet contains
418 * AH/ESP, we cannot make anything.
419 * Defragmentation also would be mistake, RA packets
420 * cannot be fragmented, because there is no warranty
421 * that different fragments will go along one path. --ANK
423 if (opt->ra) {
424 u8 *ptr = skb_network_header(skb) + opt->ra;
425 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
426 return 0;
430 * check and decrement ttl
432 if (hdr->hop_limit <= 1) {
433 /* Force OUTPUT device used as source address */
434 skb->dev = dst->dev;
435 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
436 IP6_INC_STATS_BH(net,
437 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
439 kfree_skb(skb);
440 return -ETIMEDOUT;
443 /* XXX: idev->cnf.proxy_ndp? */
444 if (net->ipv6.devconf_all->proxy_ndp &&
445 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
446 int proxied = ip6_forward_proxy_check(skb);
447 if (proxied > 0)
448 return ip6_input(skb);
449 else if (proxied < 0) {
450 IP6_INC_STATS(net, ip6_dst_idev(dst),
451 IPSTATS_MIB_INDISCARDS);
452 goto drop;
456 if (!xfrm6_route_forward(skb)) {
457 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
458 goto drop;
460 dst = skb_dst(skb);
462 /* IPv6 specs say nothing about it, but it is clear that we cannot
463 send redirects to source routed frames.
464 We don't send redirects to frames decapsulated from IPsec.
466 n = dst_get_neighbour_noref(dst);
467 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
468 struct in6_addr *target = NULL;
469 struct rt6_info *rt;
472 * incoming and outgoing devices are the same
473 * send a redirect.
476 rt = (struct rt6_info *) dst;
477 if ((rt->rt6i_flags & RTF_GATEWAY))
478 target = (struct in6_addr*)&n->primary_key;
479 else
480 target = &hdr->daddr;
482 if (!rt->rt6i_peer)
483 rt6_bind_peer(rt, 1);
485 /* Limit redirects both by destination (here)
486 and by source (inside ndisc_send_redirect)
488 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
489 ndisc_send_redirect(skb, n, target);
490 } else {
491 int addrtype = ipv6_addr_type(&hdr->saddr);
493 /* This check is security critical. */
494 if (addrtype == IPV6_ADDR_ANY ||
495 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
496 goto error;
497 if (addrtype & IPV6_ADDR_LINKLOCAL) {
498 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
499 ICMPV6_NOT_NEIGHBOUR, 0);
500 goto error;
504 mtu = dst_mtu(dst);
505 if (mtu < IPV6_MIN_MTU)
506 mtu = IPV6_MIN_MTU;
508 if (skb->len > mtu && !skb_is_gso(skb)) {
509 /* Again, force OUTPUT device used as source address */
510 skb->dev = dst->dev;
511 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
512 IP6_INC_STATS_BH(net,
513 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
514 IP6_INC_STATS_BH(net,
515 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
516 kfree_skb(skb);
517 return -EMSGSIZE;
520 if (skb_cow(skb, dst->dev->hard_header_len)) {
521 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
522 goto drop;
525 hdr = ipv6_hdr(skb);
527 /* Mangling hops number delayed to point after skb COW */
529 hdr->hop_limit--;
531 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
532 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
533 ip6_forward_finish);
535 error:
536 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
537 drop:
538 kfree_skb(skb);
539 return -EINVAL;
542 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
544 to->pkt_type = from->pkt_type;
545 to->priority = from->priority;
546 to->protocol = from->protocol;
547 skb_dst_drop(to);
548 skb_dst_set(to, dst_clone(skb_dst(from)));
549 to->dev = from->dev;
550 to->mark = from->mark;
552 #ifdef CONFIG_NET_SCHED
553 to->tc_index = from->tc_index;
554 #endif
555 nf_copy(to, from);
556 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
557 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
558 to->nf_trace = from->nf_trace;
559 #endif
560 skb_copy_secmark(to, from);
563 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
565 u16 offset = sizeof(struct ipv6hdr);
566 struct ipv6_opt_hdr *exthdr =
567 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
568 unsigned int packet_len = skb->tail - skb->network_header;
569 int found_rhdr = 0;
570 *nexthdr = &ipv6_hdr(skb)->nexthdr;
572 while (offset + 1 <= packet_len) {
574 switch (**nexthdr) {
576 case NEXTHDR_HOP:
577 break;
578 case NEXTHDR_ROUTING:
579 found_rhdr = 1;
580 break;
581 case NEXTHDR_DEST:
582 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
583 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
584 break;
585 #endif
586 if (found_rhdr)
587 return offset;
588 break;
589 default :
590 return offset;
593 offset += ipv6_optlen(exthdr);
594 *nexthdr = &exthdr->nexthdr;
595 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
596 offset);
599 return offset;
602 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
604 static atomic_t ipv6_fragmentation_id;
605 int old, new;
607 if (rt && !(rt->dst.flags & DST_NOPEER)) {
608 struct inet_peer *peer;
610 if (!rt->rt6i_peer)
611 rt6_bind_peer(rt, 1);
612 peer = rt->rt6i_peer;
613 if (peer) {
614 fhdr->identification = htonl(inet_getid(peer, 0));
615 return;
618 do {
619 old = atomic_read(&ipv6_fragmentation_id);
620 new = old + 1;
621 if (!new)
622 new = 1;
623 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
624 fhdr->identification = htonl(new);
627 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
629 struct sk_buff *frag;
630 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
631 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
632 struct ipv6hdr *tmp_hdr;
633 struct frag_hdr *fh;
634 unsigned int mtu, hlen, left, len;
635 int hroom, troom;
636 __be32 frag_id = 0;
637 int ptr, offset = 0, err=0;
638 u8 *prevhdr, nexthdr = 0;
639 struct net *net = dev_net(skb_dst(skb)->dev);
641 hlen = ip6_find_1stfragopt(skb, &prevhdr);
642 nexthdr = *prevhdr;
644 mtu = ip6_skb_dst_mtu(skb);
646 /* We must not fragment if the socket is set to force MTU discovery
647 * or if the skb it not generated by a local socket.
649 if (!skb->local_df && skb->len > mtu) {
650 skb->dev = skb_dst(skb)->dev;
651 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
652 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
653 IPSTATS_MIB_FRAGFAILS);
654 kfree_skb(skb);
655 return -EMSGSIZE;
658 if (np && np->frag_size < mtu) {
659 if (np->frag_size)
660 mtu = np->frag_size;
662 mtu -= hlen + sizeof(struct frag_hdr);
664 if (skb_has_frag_list(skb)) {
665 int first_len = skb_pagelen(skb);
666 struct sk_buff *frag2;
668 if (first_len - hlen > mtu ||
669 ((first_len - hlen) & 7) ||
670 skb_cloned(skb))
671 goto slow_path;
673 skb_walk_frags(skb, frag) {
674 /* Correct geometry. */
675 if (frag->len > mtu ||
676 ((frag->len & 7) && frag->next) ||
677 skb_headroom(frag) < hlen)
678 goto slow_path_clean;
680 /* Partially cloned skb? */
681 if (skb_shared(frag))
682 goto slow_path_clean;
684 BUG_ON(frag->sk);
685 if (skb->sk) {
686 frag->sk = skb->sk;
687 frag->destructor = sock_wfree;
689 skb->truesize -= frag->truesize;
692 err = 0;
693 offset = 0;
694 frag = skb_shinfo(skb)->frag_list;
695 skb_frag_list_init(skb);
696 /* BUILD HEADER */
698 *prevhdr = NEXTHDR_FRAGMENT;
699 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
700 if (!tmp_hdr) {
701 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
702 IPSTATS_MIB_FRAGFAILS);
703 return -ENOMEM;
706 __skb_pull(skb, hlen);
707 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
708 __skb_push(skb, hlen);
709 skb_reset_network_header(skb);
710 memcpy(skb_network_header(skb), tmp_hdr, hlen);
712 ipv6_select_ident(fh, rt);
713 fh->nexthdr = nexthdr;
714 fh->reserved = 0;
715 fh->frag_off = htons(IP6_MF);
716 frag_id = fh->identification;
718 first_len = skb_pagelen(skb);
719 skb->data_len = first_len - skb_headlen(skb);
720 skb->len = first_len;
721 ipv6_hdr(skb)->payload_len = htons(first_len -
722 sizeof(struct ipv6hdr));
724 dst_hold(&rt->dst);
726 for (;;) {
727 /* Prepare header of the next frame,
728 * before previous one went down. */
729 if (frag) {
730 frag->ip_summed = CHECKSUM_NONE;
731 skb_reset_transport_header(frag);
732 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
733 __skb_push(frag, hlen);
734 skb_reset_network_header(frag);
735 memcpy(skb_network_header(frag), tmp_hdr,
736 hlen);
737 offset += skb->len - hlen - sizeof(struct frag_hdr);
738 fh->nexthdr = nexthdr;
739 fh->reserved = 0;
740 fh->frag_off = htons(offset);
741 if (frag->next != NULL)
742 fh->frag_off |= htons(IP6_MF);
743 fh->identification = frag_id;
744 ipv6_hdr(frag)->payload_len =
745 htons(frag->len -
746 sizeof(struct ipv6hdr));
747 ip6_copy_metadata(frag, skb);
750 err = output(skb);
751 if(!err)
752 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
753 IPSTATS_MIB_FRAGCREATES);
755 if (err || !frag)
756 break;
758 skb = frag;
759 frag = skb->next;
760 skb->next = NULL;
763 kfree(tmp_hdr);
765 if (err == 0) {
766 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
767 IPSTATS_MIB_FRAGOKS);
768 dst_release(&rt->dst);
769 return 0;
772 while (frag) {
773 skb = frag->next;
774 kfree_skb(frag);
775 frag = skb;
778 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
779 IPSTATS_MIB_FRAGFAILS);
780 dst_release(&rt->dst);
781 return err;
783 slow_path_clean:
784 skb_walk_frags(skb, frag2) {
785 if (frag2 == frag)
786 break;
787 frag2->sk = NULL;
788 frag2->destructor = NULL;
789 skb->truesize += frag2->truesize;
793 slow_path:
794 left = skb->len - hlen; /* Space per frame */
795 ptr = hlen; /* Where to start from */
798 * Fragment the datagram.
801 *prevhdr = NEXTHDR_FRAGMENT;
802 hroom = LL_RESERVED_SPACE(rt->dst.dev);
803 troom = rt->dst.dev->needed_tailroom;
806 * Keep copying data until we run out.
808 while(left > 0) {
809 len = left;
810 /* IF: it doesn't fit, use 'mtu' - the data space left */
811 if (len > mtu)
812 len = mtu;
813 /* IF: we are not sending up to and including the packet end
814 then align the next start on an eight byte boundary */
815 if (len < left) {
816 len &= ~7;
819 * Allocate buffer.
822 if ((frag = alloc_skb(len + hlen + sizeof(struct frag_hdr) +
823 hroom + troom, GFP_ATOMIC)) == NULL) {
824 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
825 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
826 IPSTATS_MIB_FRAGFAILS);
827 err = -ENOMEM;
828 goto fail;
832 * Set up data on packet
835 ip6_copy_metadata(frag, skb);
836 skb_reserve(frag, hroom);
837 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
838 skb_reset_network_header(frag);
839 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
840 frag->transport_header = (frag->network_header + hlen +
841 sizeof(struct frag_hdr));
844 * Charge the memory for the fragment to any owner
845 * it might possess
847 if (skb->sk)
848 skb_set_owner_w(frag, skb->sk);
851 * Copy the packet header into the new buffer.
853 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
856 * Build fragment header.
858 fh->nexthdr = nexthdr;
859 fh->reserved = 0;
860 if (!frag_id) {
861 ipv6_select_ident(fh, rt);
862 frag_id = fh->identification;
863 } else
864 fh->identification = frag_id;
867 * Copy a block of the IP datagram.
869 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
870 BUG();
871 left -= len;
873 fh->frag_off = htons(offset);
874 if (left > 0)
875 fh->frag_off |= htons(IP6_MF);
876 ipv6_hdr(frag)->payload_len = htons(frag->len -
877 sizeof(struct ipv6hdr));
879 ptr += len;
880 offset += len;
883 * Put this fragment into the sending queue.
885 err = output(frag);
886 if (err)
887 goto fail;
889 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
890 IPSTATS_MIB_FRAGCREATES);
892 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
893 IPSTATS_MIB_FRAGOKS);
894 kfree_skb(skb);
895 return err;
897 fail:
898 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
899 IPSTATS_MIB_FRAGFAILS);
900 kfree_skb(skb);
901 return err;
904 static inline int ip6_rt_check(const struct rt6key *rt_key,
905 const struct in6_addr *fl_addr,
906 const struct in6_addr *addr_cache)
908 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
909 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
912 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
913 struct dst_entry *dst,
914 const struct flowi6 *fl6)
916 struct ipv6_pinfo *np = inet6_sk(sk);
917 struct rt6_info *rt = (struct rt6_info *)dst;
919 if (!dst)
920 goto out;
922 /* Yes, checking route validity in not connected
923 * case is not very simple. Take into account,
924 * that we do not support routing by source, TOS,
925 * and MSG_DONTROUTE --ANK (980726)
927 * 1. ip6_rt_check(): If route was host route,
928 * check that cached destination is current.
929 * If it is network route, we still may
930 * check its validity using saved pointer
931 * to the last used address: daddr_cache.
932 * We do not want to save whole address now,
933 * (because main consumer of this service
934 * is tcp, which has not this problem),
935 * so that the last trick works only on connected
936 * sockets.
937 * 2. oif also should be the same.
939 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
940 #ifdef CONFIG_IPV6_SUBTREES
941 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
942 #endif
943 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
944 dst_release(dst);
945 dst = NULL;
948 out:
949 return dst;
952 static int ip6_dst_lookup_tail(struct sock *sk,
953 struct dst_entry **dst, struct flowi6 *fl6)
955 struct net *net = sock_net(sk);
956 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
957 struct neighbour *n;
958 #endif
959 int err;
961 if (*dst == NULL)
962 *dst = ip6_route_output(net, sk, fl6);
964 if ((err = (*dst)->error))
965 goto out_err_release;
967 if (ipv6_addr_any(&fl6->saddr)) {
968 struct rt6_info *rt = (struct rt6_info *) *dst;
969 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
970 sk ? inet6_sk(sk)->srcprefs : 0,
971 &fl6->saddr);
972 if (err)
973 goto out_err_release;
976 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
978 * Here if the dst entry we've looked up
979 * has a neighbour entry that is in the INCOMPLETE
980 * state and the src address from the flow is
981 * marked as OPTIMISTIC, we release the found
982 * dst entry and replace it instead with the
983 * dst entry of the nexthop router
985 rcu_read_lock();
986 n = dst_get_neighbour_noref(*dst);
987 if (n && !(n->nud_state & NUD_VALID)) {
988 struct inet6_ifaddr *ifp;
989 struct flowi6 fl_gw6;
990 int redirect;
992 rcu_read_unlock();
993 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
994 (*dst)->dev, 1);
996 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
997 if (ifp)
998 in6_ifa_put(ifp);
1000 if (redirect) {
1002 * We need to get the dst entry for the
1003 * default router instead
1005 dst_release(*dst);
1006 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1007 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1008 *dst = ip6_route_output(net, sk, &fl_gw6);
1009 if ((err = (*dst)->error))
1010 goto out_err_release;
1012 } else {
1013 rcu_read_unlock();
1015 #endif
1017 return 0;
1019 out_err_release:
1020 if (err == -ENETUNREACH)
1021 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1022 dst_release(*dst);
1023 *dst = NULL;
1024 return err;
1028 * ip6_dst_lookup - perform route lookup on flow
1029 * @sk: socket which provides route info
1030 * @dst: pointer to dst_entry * for result
1031 * @fl6: flow to lookup
1033 * This function performs a route lookup on the given flow.
1035 * It returns zero on success, or a standard errno code on error.
1037 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1039 *dst = NULL;
1040 return ip6_dst_lookup_tail(sk, dst, fl6);
1042 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1045 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1046 * @sk: socket which provides route info
1047 * @fl6: flow to lookup
1048 * @final_dst: final destination address for ipsec lookup
1049 * @can_sleep: we are in a sleepable context
1051 * This function performs a route lookup on the given flow.
1053 * It returns a valid dst pointer on success, or a pointer encoded
1054 * error code.
1056 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1057 const struct in6_addr *final_dst,
1058 bool can_sleep)
1060 struct dst_entry *dst = NULL;
1061 int err;
1063 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1064 if (err)
1065 return ERR_PTR(err);
1066 if (final_dst)
1067 fl6->daddr = *final_dst;
1068 if (can_sleep)
1069 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1071 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1073 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1076 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1077 * @sk: socket which provides the dst cache and route info
1078 * @fl6: flow to lookup
1079 * @final_dst: final destination address for ipsec lookup
1080 * @can_sleep: we are in a sleepable context
1082 * This function performs a route lookup on the given flow with the
1083 * possibility of using the cached route in the socket if it is valid.
1084 * It will take the socket dst lock when operating on the dst cache.
1085 * As a result, this function can only be used in process context.
1087 * It returns a valid dst pointer on success, or a pointer encoded
1088 * error code.
1090 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1091 const struct in6_addr *final_dst,
1092 bool can_sleep)
1094 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1095 int err;
1097 dst = ip6_sk_dst_check(sk, dst, fl6);
1099 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1100 if (err)
1101 return ERR_PTR(err);
1102 if (final_dst)
1103 fl6->daddr = *final_dst;
1104 if (can_sleep)
1105 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1107 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1109 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1111 static inline int ip6_ufo_append_data(struct sock *sk,
1112 int getfrag(void *from, char *to, int offset, int len,
1113 int odd, struct sk_buff *skb),
1114 void *from, int length, int hh_len, int fragheaderlen,
1115 int transhdrlen, int mtu,unsigned int flags,
1116 struct rt6_info *rt)
1119 struct sk_buff *skb;
1120 int err;
1122 /* There is support for UDP large send offload by network
1123 * device, so create one single skb packet containing complete
1124 * udp datagram
1126 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1127 skb = sock_alloc_send_skb(sk,
1128 hh_len + fragheaderlen + transhdrlen + 20,
1129 (flags & MSG_DONTWAIT), &err);
1130 if (skb == NULL)
1131 return err;
1133 /* reserve space for Hardware header */
1134 skb_reserve(skb, hh_len);
1136 /* create space for UDP/IP header */
1137 skb_put(skb,fragheaderlen + transhdrlen);
1139 /* initialize network header pointer */
1140 skb_reset_network_header(skb);
1142 /* initialize protocol header pointer */
1143 skb->transport_header = skb->network_header + fragheaderlen;
1145 skb->ip_summed = CHECKSUM_PARTIAL;
1146 skb->csum = 0;
1149 err = skb_append_datato_frags(sk,skb, getfrag, from,
1150 (length - transhdrlen));
1151 if (!err) {
1152 struct frag_hdr fhdr;
1154 /* Specify the length of each IPv6 datagram fragment.
1155 * It has to be a multiple of 8.
1157 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1158 sizeof(struct frag_hdr)) & ~7;
1159 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1160 ipv6_select_ident(&fhdr, rt);
1161 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1162 __skb_queue_tail(&sk->sk_write_queue, skb);
1164 return 0;
1166 /* There is not enough support do UPD LSO,
1167 * so follow normal path
1169 kfree_skb(skb);
1171 return err;
1174 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1175 gfp_t gfp)
1177 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1180 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1181 gfp_t gfp)
1183 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1186 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1187 int offset, int len, int odd, struct sk_buff *skb),
1188 void *from, int length, int transhdrlen,
1189 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1190 struct rt6_info *rt, unsigned int flags, int dontfrag)
1192 struct inet_sock *inet = inet_sk(sk);
1193 struct ipv6_pinfo *np = inet6_sk(sk);
1194 struct inet_cork *cork;
1195 struct sk_buff *skb;
1196 unsigned int maxfraglen, fragheaderlen;
1197 int exthdrlen;
1198 int dst_exthdrlen;
1199 int hh_len;
1200 int mtu;
1201 int copy;
1202 int err;
1203 int offset = 0;
1204 int csummode = CHECKSUM_NONE;
1205 __u8 tx_flags = 0;
1207 if (flags&MSG_PROBE)
1208 return 0;
1209 cork = &inet->cork.base;
1210 if (skb_queue_empty(&sk->sk_write_queue)) {
1212 * setup for corking
1214 if (opt) {
1215 if (WARN_ON(np->cork.opt))
1216 return -EINVAL;
1218 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1219 if (unlikely(np->cork.opt == NULL))
1220 return -ENOBUFS;
1222 np->cork.opt->tot_len = opt->tot_len;
1223 np->cork.opt->opt_flen = opt->opt_flen;
1224 np->cork.opt->opt_nflen = opt->opt_nflen;
1226 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1227 sk->sk_allocation);
1228 if (opt->dst0opt && !np->cork.opt->dst0opt)
1229 return -ENOBUFS;
1231 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1232 sk->sk_allocation);
1233 if (opt->dst1opt && !np->cork.opt->dst1opt)
1234 return -ENOBUFS;
1236 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1237 sk->sk_allocation);
1238 if (opt->hopopt && !np->cork.opt->hopopt)
1239 return -ENOBUFS;
1241 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1242 sk->sk_allocation);
1243 if (opt->srcrt && !np->cork.opt->srcrt)
1244 return -ENOBUFS;
1246 /* need source address above miyazawa*/
1248 dst_hold(&rt->dst);
1249 cork->dst = &rt->dst;
1250 inet->cork.fl.u.ip6 = *fl6;
1251 np->cork.hop_limit = hlimit;
1252 np->cork.tclass = tclass;
1253 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1254 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1255 if (np->frag_size < mtu) {
1256 if (np->frag_size)
1257 mtu = np->frag_size;
1259 cork->fragsize = mtu;
1260 if (dst_allfrag(rt->dst.path))
1261 cork->flags |= IPCORK_ALLFRAG;
1262 cork->length = 0;
1263 sk->sk_sndmsg_page = NULL;
1264 sk->sk_sndmsg_off = 0;
1265 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1266 length += exthdrlen;
1267 transhdrlen += exthdrlen;
1268 dst_exthdrlen = rt->dst.header_len;
1269 } else {
1270 rt = (struct rt6_info *)cork->dst;
1271 fl6 = &inet->cork.fl.u.ip6;
1272 opt = np->cork.opt;
1273 transhdrlen = 0;
1274 exthdrlen = 0;
1275 dst_exthdrlen = 0;
1276 mtu = cork->fragsize;
1279 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1281 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1282 (opt ? opt->opt_nflen : 0);
1283 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1285 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1286 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1287 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1288 return -EMSGSIZE;
1292 /* For UDP, check if TX timestamp is enabled */
1293 if (sk->sk_type == SOCK_DGRAM) {
1294 err = sock_tx_timestamp(sk, &tx_flags);
1295 if (err)
1296 goto error;
1300 * Let's try using as much space as possible.
1301 * Use MTU if total length of the message fits into the MTU.
1302 * Otherwise, we need to reserve fragment header and
1303 * fragment alignment (= 8-15 octects, in total).
1305 * Note that we may need to "move" the data from the tail of
1306 * of the buffer to the new fragment when we split
1307 * the message.
1309 * FIXME: It may be fragmented into multiple chunks
1310 * at once if non-fragmentable extension headers
1311 * are too large.
1312 * --yoshfuji
1315 cork->length += length;
1316 if (length > mtu) {
1317 int proto = sk->sk_protocol;
1318 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1319 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1320 return -EMSGSIZE;
1323 if (proto == IPPROTO_UDP &&
1324 (rt->dst.dev->features & NETIF_F_UFO)) {
1326 err = ip6_ufo_append_data(sk, getfrag, from, length,
1327 hh_len, fragheaderlen,
1328 transhdrlen, mtu, flags, rt);
1329 if (err)
1330 goto error;
1331 return 0;
1335 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1336 goto alloc_new_skb;
1338 while (length > 0) {
1339 /* Check if the remaining data fits into current packet. */
1340 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1341 if (copy < length)
1342 copy = maxfraglen - skb->len;
1344 if (copy <= 0) {
1345 char *data;
1346 unsigned int datalen;
1347 unsigned int fraglen;
1348 unsigned int fraggap;
1349 unsigned int alloclen;
1350 struct sk_buff *skb_prev;
1351 alloc_new_skb:
1352 skb_prev = skb;
1354 /* There's no room in the current skb */
1355 if (skb_prev)
1356 fraggap = skb_prev->len - maxfraglen;
1357 else
1358 fraggap = 0;
1361 * If remaining data exceeds the mtu,
1362 * we know we need more fragment(s).
1364 datalen = length + fraggap;
1365 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1366 datalen = maxfraglen - fragheaderlen;
1368 fraglen = datalen + fragheaderlen;
1369 if ((flags & MSG_MORE) &&
1370 !(rt->dst.dev->features&NETIF_F_SG))
1371 alloclen = mtu;
1372 else
1373 alloclen = datalen + fragheaderlen;
1375 alloclen += dst_exthdrlen;
1378 * The last fragment gets additional space at tail.
1379 * Note: we overallocate on fragments with MSG_MODE
1380 * because we have no idea if we're the last one.
1382 if (datalen == length + fraggap)
1383 alloclen += rt->dst.trailer_len;
1386 * We just reserve space for fragment header.
1387 * Note: this may be overallocation if the message
1388 * (without MSG_MORE) fits into the MTU.
1390 alloclen += sizeof(struct frag_hdr);
1392 if (transhdrlen) {
1393 skb = sock_alloc_send_skb(sk,
1394 alloclen + hh_len,
1395 (flags & MSG_DONTWAIT), &err);
1396 } else {
1397 skb = NULL;
1398 if (atomic_read(&sk->sk_wmem_alloc) <=
1399 2 * sk->sk_sndbuf)
1400 skb = sock_wmalloc(sk,
1401 alloclen + hh_len, 1,
1402 sk->sk_allocation);
1403 if (unlikely(skb == NULL))
1404 err = -ENOBUFS;
1405 else {
1406 /* Only the initial fragment
1407 * is time stamped.
1409 tx_flags = 0;
1412 if (skb == NULL)
1413 goto error;
1415 * Fill in the control structures
1417 skb->ip_summed = csummode;
1418 skb->csum = 0;
1419 /* reserve for fragmentation and ipsec header */
1420 skb_reserve(skb, hh_len + sizeof(struct frag_hdr) +
1421 dst_exthdrlen);
1423 if (sk->sk_type == SOCK_DGRAM)
1424 skb_shinfo(skb)->tx_flags = tx_flags;
1427 * Find where to start putting bytes
1429 data = skb_put(skb, fraglen);
1430 skb_set_network_header(skb, exthdrlen);
1431 data += fragheaderlen;
1432 skb->transport_header = (skb->network_header +
1433 fragheaderlen);
1434 if (fraggap) {
1435 skb->csum = skb_copy_and_csum_bits(
1436 skb_prev, maxfraglen,
1437 data + transhdrlen, fraggap, 0);
1438 skb_prev->csum = csum_sub(skb_prev->csum,
1439 skb->csum);
1440 data += fraggap;
1441 pskb_trim_unique(skb_prev, maxfraglen);
1443 copy = datalen - transhdrlen - fraggap;
1445 if (copy < 0) {
1446 err = -EINVAL;
1447 kfree_skb(skb);
1448 goto error;
1449 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1450 err = -EFAULT;
1451 kfree_skb(skb);
1452 goto error;
1455 offset += copy;
1456 length -= datalen - fraggap;
1457 transhdrlen = 0;
1458 exthdrlen = 0;
1459 dst_exthdrlen = 0;
1460 csummode = CHECKSUM_NONE;
1463 * Put the packet on the pending queue
1465 __skb_queue_tail(&sk->sk_write_queue, skb);
1466 continue;
1469 if (copy > length)
1470 copy = length;
1472 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1473 unsigned int off;
1475 off = skb->len;
1476 if (getfrag(from, skb_put(skb, copy),
1477 offset, copy, off, skb) < 0) {
1478 __skb_trim(skb, off);
1479 err = -EFAULT;
1480 goto error;
1482 } else {
1483 int i = skb_shinfo(skb)->nr_frags;
1484 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1485 struct page *page = sk->sk_sndmsg_page;
1486 int off = sk->sk_sndmsg_off;
1487 unsigned int left;
1489 if (page && (left = PAGE_SIZE - off) > 0) {
1490 if (copy >= left)
1491 copy = left;
1492 if (page != skb_frag_page(frag)) {
1493 if (i == MAX_SKB_FRAGS) {
1494 err = -EMSGSIZE;
1495 goto error;
1497 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1498 skb_frag_ref(skb, i);
1499 frag = &skb_shinfo(skb)->frags[i];
1501 } else if(i < MAX_SKB_FRAGS) {
1502 if (copy > PAGE_SIZE)
1503 copy = PAGE_SIZE;
1504 page = alloc_pages(sk->sk_allocation, 0);
1505 if (page == NULL) {
1506 err = -ENOMEM;
1507 goto error;
1509 sk->sk_sndmsg_page = page;
1510 sk->sk_sndmsg_off = 0;
1512 skb_fill_page_desc(skb, i, page, 0, 0);
1513 frag = &skb_shinfo(skb)->frags[i];
1514 } else {
1515 err = -EMSGSIZE;
1516 goto error;
1518 if (getfrag(from,
1519 skb_frag_address(frag) + skb_frag_size(frag),
1520 offset, copy, skb->len, skb) < 0) {
1521 err = -EFAULT;
1522 goto error;
1524 sk->sk_sndmsg_off += copy;
1525 skb_frag_size_add(frag, copy);
1526 skb->len += copy;
1527 skb->data_len += copy;
1528 skb->truesize += copy;
1529 atomic_add(copy, &sk->sk_wmem_alloc);
1531 offset += copy;
1532 length -= copy;
1534 return 0;
1535 error:
1536 cork->length -= length;
1537 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1538 return err;
1541 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1543 if (np->cork.opt) {
1544 kfree(np->cork.opt->dst0opt);
1545 kfree(np->cork.opt->dst1opt);
1546 kfree(np->cork.opt->hopopt);
1547 kfree(np->cork.opt->srcrt);
1548 kfree(np->cork.opt);
1549 np->cork.opt = NULL;
1552 if (inet->cork.base.dst) {
1553 dst_release(inet->cork.base.dst);
1554 inet->cork.base.dst = NULL;
1555 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1557 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1560 int ip6_push_pending_frames(struct sock *sk)
1562 struct sk_buff *skb, *tmp_skb;
1563 struct sk_buff **tail_skb;
1564 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1565 struct inet_sock *inet = inet_sk(sk);
1566 struct ipv6_pinfo *np = inet6_sk(sk);
1567 struct net *net = sock_net(sk);
1568 struct ipv6hdr *hdr;
1569 struct ipv6_txoptions *opt = np->cork.opt;
1570 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1571 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1572 unsigned char proto = fl6->flowi6_proto;
1573 int err = 0;
1575 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1576 goto out;
1577 tail_skb = &(skb_shinfo(skb)->frag_list);
1579 /* move skb->data to ip header from ext header */
1580 if (skb->data < skb_network_header(skb))
1581 __skb_pull(skb, skb_network_offset(skb));
1582 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1583 __skb_pull(tmp_skb, skb_network_header_len(skb));
1584 *tail_skb = tmp_skb;
1585 tail_skb = &(tmp_skb->next);
1586 skb->len += tmp_skb->len;
1587 skb->data_len += tmp_skb->len;
1588 skb->truesize += tmp_skb->truesize;
1589 tmp_skb->destructor = NULL;
1590 tmp_skb->sk = NULL;
1593 /* Allow local fragmentation. */
1594 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1595 skb->local_df = 1;
1597 *final_dst = fl6->daddr;
1598 __skb_pull(skb, skb_network_header_len(skb));
1599 if (opt && opt->opt_flen)
1600 ipv6_push_frag_opts(skb, opt, &proto);
1601 if (opt && opt->opt_nflen)
1602 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1604 skb_push(skb, sizeof(struct ipv6hdr));
1605 skb_reset_network_header(skb);
1606 hdr = ipv6_hdr(skb);
1608 *(__be32*)hdr = fl6->flowlabel |
1609 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1611 hdr->hop_limit = np->cork.hop_limit;
1612 hdr->nexthdr = proto;
1613 hdr->saddr = fl6->saddr;
1614 hdr->daddr = *final_dst;
1616 skb->priority = sk->sk_priority;
1617 skb->mark = sk->sk_mark;
1619 skb_dst_set(skb, dst_clone(&rt->dst));
1620 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1621 if (proto == IPPROTO_ICMPV6) {
1622 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1624 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1625 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1628 err = ip6_local_out(skb);
1629 if (err) {
1630 if (err > 0)
1631 err = net_xmit_errno(err);
1632 if (err)
1633 goto error;
1636 out:
1637 ip6_cork_release(inet, np);
1638 return err;
1639 error:
1640 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1641 goto out;
1644 void ip6_flush_pending_frames(struct sock *sk)
1646 struct sk_buff *skb;
1648 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1649 if (skb_dst(skb))
1650 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1651 IPSTATS_MIB_OUTDISCARDS);
1652 kfree_skb(skb);
1655 ip6_cork_release(inet_sk(sk), inet6_sk(sk));