Merge branch 'drm-fixes' of git://people.freedesktop.org/~airlied/linux
[zen-stable.git] / net / ipv6 / ip6_output.c
blob84d0bd5cac939814edaed4379f09464a958d61bf
1 /*
2 * IPv6 output functions
3 * Linux INET6 implementation
5 * Authors:
6 * Pedro Roque <roque@di.fc.ul.pt>
8 * Based on linux/net/ipv4/ip_output.c
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version
13 * 2 of the License, or (at your option) any later version.
15 * Changes:
16 * A.N.Kuznetsov : airthmetics in fragmentation.
17 * extension headers are implemented.
18 * route changes now work.
19 * ip6_forward does not confuse sniffers.
20 * etc.
22 * H. von Brand : Added missing #include <linux/string.h>
23 * Imran Patel : frag id should be in NBO
24 * Kazunori MIYAZAWA @USAGI
25 * : add ip6_append_data and related functions
26 * for datagram xmit
29 #include <linux/errno.h>
30 #include <linux/kernel.h>
31 #include <linux/string.h>
32 #include <linux/socket.h>
33 #include <linux/net.h>
34 #include <linux/netdevice.h>
35 #include <linux/if_arp.h>
36 #include <linux/in6.h>
37 #include <linux/tcp.h>
38 #include <linux/route.h>
39 #include <linux/module.h>
40 #include <linux/slab.h>
42 #include <linux/netfilter.h>
43 #include <linux/netfilter_ipv6.h>
45 #include <net/sock.h>
46 #include <net/snmp.h>
48 #include <net/ipv6.h>
49 #include <net/ndisc.h>
50 #include <net/protocol.h>
51 #include <net/ip6_route.h>
52 #include <net/addrconf.h>
53 #include <net/rawv6.h>
54 #include <net/icmp.h>
55 #include <net/xfrm.h>
56 #include <net/checksum.h>
57 #include <linux/mroute6.h>
59 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *));
61 int __ip6_local_out(struct sk_buff *skb)
63 int len;
65 len = skb->len - sizeof(struct ipv6hdr);
66 if (len > IPV6_MAXPLEN)
67 len = 0;
68 ipv6_hdr(skb)->payload_len = htons(len);
70 return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
71 skb_dst(skb)->dev, dst_output);
74 int ip6_local_out(struct sk_buff *skb)
76 int err;
78 err = __ip6_local_out(skb);
79 if (likely(err == 1))
80 err = dst_output(skb);
82 return err;
84 EXPORT_SYMBOL_GPL(ip6_local_out);
86 /* dev_loopback_xmit for use with netfilter. */
87 static int ip6_dev_loopback_xmit(struct sk_buff *newskb)
89 skb_reset_mac_header(newskb);
90 __skb_pull(newskb, skb_network_offset(newskb));
91 newskb->pkt_type = PACKET_LOOPBACK;
92 newskb->ip_summed = CHECKSUM_UNNECESSARY;
93 WARN_ON(!skb_dst(newskb));
95 netif_rx_ni(newskb);
96 return 0;
99 static int ip6_finish_output2(struct sk_buff *skb)
101 struct dst_entry *dst = skb_dst(skb);
102 struct net_device *dev = dst->dev;
103 struct neighbour *neigh;
105 skb->protocol = htons(ETH_P_IPV6);
106 skb->dev = dev;
108 if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) {
109 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
111 if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) &&
112 ((mroute6_socket(dev_net(dev), skb) &&
113 !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) ||
114 ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr,
115 &ipv6_hdr(skb)->saddr))) {
116 struct sk_buff *newskb = skb_clone(skb, GFP_ATOMIC);
118 /* Do not check for IFF_ALLMULTI; multicast routing
119 is not supported in any case.
121 if (newskb)
122 NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING,
123 newskb, NULL, newskb->dev,
124 ip6_dev_loopback_xmit);
126 if (ipv6_hdr(skb)->hop_limit == 0) {
127 IP6_INC_STATS(dev_net(dev), idev,
128 IPSTATS_MIB_OUTDISCARDS);
129 kfree_skb(skb);
130 return 0;
134 IP6_UPD_PO_STATS(dev_net(dev), idev, IPSTATS_MIB_OUTMCAST,
135 skb->len);
138 rcu_read_lock();
139 neigh = dst_get_neighbour(dst);
140 if (neigh) {
141 int res = neigh_output(neigh, skb);
143 rcu_read_unlock();
144 return res;
146 rcu_read_unlock();
147 IP6_INC_STATS_BH(dev_net(dst->dev),
148 ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES);
149 kfree_skb(skb);
150 return -EINVAL;
153 static int ip6_finish_output(struct sk_buff *skb)
155 if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) ||
156 dst_allfrag(skb_dst(skb)))
157 return ip6_fragment(skb, ip6_finish_output2);
158 else
159 return ip6_finish_output2(skb);
162 int ip6_output(struct sk_buff *skb)
164 struct net_device *dev = skb_dst(skb)->dev;
165 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
166 if (unlikely(idev->cnf.disable_ipv6)) {
167 IP6_INC_STATS(dev_net(dev), idev,
168 IPSTATS_MIB_OUTDISCARDS);
169 kfree_skb(skb);
170 return 0;
173 return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev,
174 ip6_finish_output,
175 !(IP6CB(skb)->flags & IP6SKB_REROUTED));
179 * xmit an sk_buff (used by TCP, SCTP and DCCP)
182 int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6,
183 struct ipv6_txoptions *opt, int tclass)
185 struct net *net = sock_net(sk);
186 struct ipv6_pinfo *np = inet6_sk(sk);
187 struct in6_addr *first_hop = &fl6->daddr;
188 struct dst_entry *dst = skb_dst(skb);
189 struct ipv6hdr *hdr;
190 u8 proto = fl6->flowi6_proto;
191 int seg_len = skb->len;
192 int hlimit = -1;
193 u32 mtu;
195 if (opt) {
196 unsigned int head_room;
198 /* First: exthdrs may take lots of space (~8K for now)
199 MAX_HEADER is not enough.
201 head_room = opt->opt_nflen + opt->opt_flen;
202 seg_len += head_room;
203 head_room += sizeof(struct ipv6hdr) + LL_RESERVED_SPACE(dst->dev);
205 if (skb_headroom(skb) < head_room) {
206 struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room);
207 if (skb2 == NULL) {
208 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
209 IPSTATS_MIB_OUTDISCARDS);
210 kfree_skb(skb);
211 return -ENOBUFS;
213 kfree_skb(skb);
214 skb = skb2;
215 skb_set_owner_w(skb, sk);
217 if (opt->opt_flen)
218 ipv6_push_frag_opts(skb, opt, &proto);
219 if (opt->opt_nflen)
220 ipv6_push_nfrag_opts(skb, opt, &proto, &first_hop);
223 skb_push(skb, sizeof(struct ipv6hdr));
224 skb_reset_network_header(skb);
225 hdr = ipv6_hdr(skb);
228 * Fill in the IPv6 header
230 if (np)
231 hlimit = np->hop_limit;
232 if (hlimit < 0)
233 hlimit = ip6_dst_hoplimit(dst);
235 *(__be32 *)hdr = htonl(0x60000000 | (tclass << 20)) | fl6->flowlabel;
237 hdr->payload_len = htons(seg_len);
238 hdr->nexthdr = proto;
239 hdr->hop_limit = hlimit;
241 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
242 ipv6_addr_copy(&hdr->daddr, first_hop);
244 skb->priority = sk->sk_priority;
245 skb->mark = sk->sk_mark;
247 mtu = dst_mtu(dst);
248 if ((skb->len <= mtu) || skb->local_df || skb_is_gso(skb)) {
249 IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)),
250 IPSTATS_MIB_OUT, skb->len);
251 return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL,
252 dst->dev, dst_output);
255 if (net_ratelimit())
256 printk(KERN_DEBUG "IPv6: sending pkt_too_big to self\n");
257 skb->dev = dst->dev;
258 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
259 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS);
260 kfree_skb(skb);
261 return -EMSGSIZE;
264 EXPORT_SYMBOL(ip6_xmit);
267 * To avoid extra problems ND packets are send through this
268 * routine. It's code duplication but I really want to avoid
269 * extra checks since ipv6_build_header is used by TCP (which
270 * is for us performance critical)
273 int ip6_nd_hdr(struct sock *sk, struct sk_buff *skb, struct net_device *dev,
274 const struct in6_addr *saddr, const struct in6_addr *daddr,
275 int proto, int len)
277 struct ipv6_pinfo *np = inet6_sk(sk);
278 struct ipv6hdr *hdr;
280 skb->protocol = htons(ETH_P_IPV6);
281 skb->dev = dev;
283 skb_reset_network_header(skb);
284 skb_put(skb, sizeof(struct ipv6hdr));
285 hdr = ipv6_hdr(skb);
287 *(__be32*)hdr = htonl(0x60000000);
289 hdr->payload_len = htons(len);
290 hdr->nexthdr = proto;
291 hdr->hop_limit = np->hop_limit;
293 ipv6_addr_copy(&hdr->saddr, saddr);
294 ipv6_addr_copy(&hdr->daddr, daddr);
296 return 0;
299 static int ip6_call_ra_chain(struct sk_buff *skb, int sel)
301 struct ip6_ra_chain *ra;
302 struct sock *last = NULL;
304 read_lock(&ip6_ra_lock);
305 for (ra = ip6_ra_chain; ra; ra = ra->next) {
306 struct sock *sk = ra->sk;
307 if (sk && ra->sel == sel &&
308 (!sk->sk_bound_dev_if ||
309 sk->sk_bound_dev_if == skb->dev->ifindex)) {
310 if (last) {
311 struct sk_buff *skb2 = skb_clone(skb, GFP_ATOMIC);
312 if (skb2)
313 rawv6_rcv(last, skb2);
315 last = sk;
319 if (last) {
320 rawv6_rcv(last, skb);
321 read_unlock(&ip6_ra_lock);
322 return 1;
324 read_unlock(&ip6_ra_lock);
325 return 0;
328 static int ip6_forward_proxy_check(struct sk_buff *skb)
330 struct ipv6hdr *hdr = ipv6_hdr(skb);
331 u8 nexthdr = hdr->nexthdr;
332 int offset;
334 if (ipv6_ext_hdr(nexthdr)) {
335 offset = ipv6_skip_exthdr(skb, sizeof(*hdr), &nexthdr);
336 if (offset < 0)
337 return 0;
338 } else
339 offset = sizeof(struct ipv6hdr);
341 if (nexthdr == IPPROTO_ICMPV6) {
342 struct icmp6hdr *icmp6;
344 if (!pskb_may_pull(skb, (skb_network_header(skb) +
345 offset + 1 - skb->data)))
346 return 0;
348 icmp6 = (struct icmp6hdr *)(skb_network_header(skb) + offset);
350 switch (icmp6->icmp6_type) {
351 case NDISC_ROUTER_SOLICITATION:
352 case NDISC_ROUTER_ADVERTISEMENT:
353 case NDISC_NEIGHBOUR_SOLICITATION:
354 case NDISC_NEIGHBOUR_ADVERTISEMENT:
355 case NDISC_REDIRECT:
356 /* For reaction involving unicast neighbor discovery
357 * message destined to the proxied address, pass it to
358 * input function.
360 return 1;
361 default:
362 break;
367 * The proxying router can't forward traffic sent to a link-local
368 * address, so signal the sender and discard the packet. This
369 * behavior is clarified by the MIPv6 specification.
371 if (ipv6_addr_type(&hdr->daddr) & IPV6_ADDR_LINKLOCAL) {
372 dst_link_failure(skb);
373 return -1;
376 return 0;
379 static inline int ip6_forward_finish(struct sk_buff *skb)
381 return dst_output(skb);
384 int ip6_forward(struct sk_buff *skb)
386 struct dst_entry *dst = skb_dst(skb);
387 struct ipv6hdr *hdr = ipv6_hdr(skb);
388 struct inet6_skb_parm *opt = IP6CB(skb);
389 struct net *net = dev_net(dst->dev);
390 struct neighbour *n;
391 u32 mtu;
393 if (net->ipv6.devconf_all->forwarding == 0)
394 goto error;
396 if (skb_warn_if_lro(skb))
397 goto drop;
399 if (!xfrm6_policy_check(NULL, XFRM_POLICY_FWD, skb)) {
400 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
401 goto drop;
404 if (skb->pkt_type != PACKET_HOST)
405 goto drop;
407 skb_forward_csum(skb);
410 * We DO NOT make any processing on
411 * RA packets, pushing them to user level AS IS
412 * without ane WARRANTY that application will be able
413 * to interpret them. The reason is that we
414 * cannot make anything clever here.
416 * We are not end-node, so that if packet contains
417 * AH/ESP, we cannot make anything.
418 * Defragmentation also would be mistake, RA packets
419 * cannot be fragmented, because there is no warranty
420 * that different fragments will go along one path. --ANK
422 if (opt->ra) {
423 u8 *ptr = skb_network_header(skb) + opt->ra;
424 if (ip6_call_ra_chain(skb, (ptr[2]<<8) + ptr[3]))
425 return 0;
429 * check and decrement ttl
431 if (hdr->hop_limit <= 1) {
432 /* Force OUTPUT device used as source address */
433 skb->dev = dst->dev;
434 icmpv6_send(skb, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT, 0);
435 IP6_INC_STATS_BH(net,
436 ip6_dst_idev(dst), IPSTATS_MIB_INHDRERRORS);
438 kfree_skb(skb);
439 return -ETIMEDOUT;
442 /* XXX: idev->cnf.proxy_ndp? */
443 if (net->ipv6.devconf_all->proxy_ndp &&
444 pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) {
445 int proxied = ip6_forward_proxy_check(skb);
446 if (proxied > 0)
447 return ip6_input(skb);
448 else if (proxied < 0) {
449 IP6_INC_STATS(net, ip6_dst_idev(dst),
450 IPSTATS_MIB_INDISCARDS);
451 goto drop;
455 if (!xfrm6_route_forward(skb)) {
456 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_INDISCARDS);
457 goto drop;
459 dst = skb_dst(skb);
461 /* IPv6 specs say nothing about it, but it is clear that we cannot
462 send redirects to source routed frames.
463 We don't send redirects to frames decapsulated from IPsec.
465 n = dst_get_neighbour(dst);
466 if (skb->dev == dst->dev && n && opt->srcrt == 0 && !skb_sec_path(skb)) {
467 struct in6_addr *target = NULL;
468 struct rt6_info *rt;
471 * incoming and outgoing devices are the same
472 * send a redirect.
475 rt = (struct rt6_info *) dst;
476 if ((rt->rt6i_flags & RTF_GATEWAY))
477 target = (struct in6_addr*)&n->primary_key;
478 else
479 target = &hdr->daddr;
481 if (!rt->rt6i_peer)
482 rt6_bind_peer(rt, 1);
484 /* Limit redirects both by destination (here)
485 and by source (inside ndisc_send_redirect)
487 if (inet_peer_xrlim_allow(rt->rt6i_peer, 1*HZ))
488 ndisc_send_redirect(skb, n, target);
489 } else {
490 int addrtype = ipv6_addr_type(&hdr->saddr);
492 /* This check is security critical. */
493 if (addrtype == IPV6_ADDR_ANY ||
494 addrtype & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LOOPBACK))
495 goto error;
496 if (addrtype & IPV6_ADDR_LINKLOCAL) {
497 icmpv6_send(skb, ICMPV6_DEST_UNREACH,
498 ICMPV6_NOT_NEIGHBOUR, 0);
499 goto error;
503 mtu = dst_mtu(dst);
504 if (mtu < IPV6_MIN_MTU)
505 mtu = IPV6_MIN_MTU;
507 if (skb->len > mtu && !skb_is_gso(skb)) {
508 /* Again, force OUTPUT device used as source address */
509 skb->dev = dst->dev;
510 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
511 IP6_INC_STATS_BH(net,
512 ip6_dst_idev(dst), IPSTATS_MIB_INTOOBIGERRORS);
513 IP6_INC_STATS_BH(net,
514 ip6_dst_idev(dst), IPSTATS_MIB_FRAGFAILS);
515 kfree_skb(skb);
516 return -EMSGSIZE;
519 if (skb_cow(skb, dst->dev->hard_header_len)) {
520 IP6_INC_STATS(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTDISCARDS);
521 goto drop;
524 hdr = ipv6_hdr(skb);
526 /* Mangling hops number delayed to point after skb COW */
528 hdr->hop_limit--;
530 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS);
531 return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev,
532 ip6_forward_finish);
534 error:
535 IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_INADDRERRORS);
536 drop:
537 kfree_skb(skb);
538 return -EINVAL;
541 static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from)
543 to->pkt_type = from->pkt_type;
544 to->priority = from->priority;
545 to->protocol = from->protocol;
546 skb_dst_drop(to);
547 skb_dst_set(to, dst_clone(skb_dst(from)));
548 to->dev = from->dev;
549 to->mark = from->mark;
551 #ifdef CONFIG_NET_SCHED
552 to->tc_index = from->tc_index;
553 #endif
554 nf_copy(to, from);
555 #if defined(CONFIG_NETFILTER_XT_TARGET_TRACE) || \
556 defined(CONFIG_NETFILTER_XT_TARGET_TRACE_MODULE)
557 to->nf_trace = from->nf_trace;
558 #endif
559 skb_copy_secmark(to, from);
562 int ip6_find_1stfragopt(struct sk_buff *skb, u8 **nexthdr)
564 u16 offset = sizeof(struct ipv6hdr);
565 struct ipv6_opt_hdr *exthdr =
566 (struct ipv6_opt_hdr *)(ipv6_hdr(skb) + 1);
567 unsigned int packet_len = skb->tail - skb->network_header;
568 int found_rhdr = 0;
569 *nexthdr = &ipv6_hdr(skb)->nexthdr;
571 while (offset + 1 <= packet_len) {
573 switch (**nexthdr) {
575 case NEXTHDR_HOP:
576 break;
577 case NEXTHDR_ROUTING:
578 found_rhdr = 1;
579 break;
580 case NEXTHDR_DEST:
581 #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE)
582 if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0)
583 break;
584 #endif
585 if (found_rhdr)
586 return offset;
587 break;
588 default :
589 return offset;
592 offset += ipv6_optlen(exthdr);
593 *nexthdr = &exthdr->nexthdr;
594 exthdr = (struct ipv6_opt_hdr *)(skb_network_header(skb) +
595 offset);
598 return offset;
601 void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt)
603 static atomic_t ipv6_fragmentation_id;
604 int old, new;
606 if (rt) {
607 struct inet_peer *peer;
609 if (!rt->rt6i_peer)
610 rt6_bind_peer(rt, 1);
611 peer = rt->rt6i_peer;
612 if (peer) {
613 fhdr->identification = htonl(inet_getid(peer, 0));
614 return;
617 do {
618 old = atomic_read(&ipv6_fragmentation_id);
619 new = old + 1;
620 if (!new)
621 new = 1;
622 } while (atomic_cmpxchg(&ipv6_fragmentation_id, old, new) != old);
623 fhdr->identification = htonl(new);
626 int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *))
628 struct sk_buff *frag;
629 struct rt6_info *rt = (struct rt6_info*)skb_dst(skb);
630 struct ipv6_pinfo *np = skb->sk ? inet6_sk(skb->sk) : NULL;
631 struct ipv6hdr *tmp_hdr;
632 struct frag_hdr *fh;
633 unsigned int mtu, hlen, left, len;
634 __be32 frag_id = 0;
635 int ptr, offset = 0, err=0;
636 u8 *prevhdr, nexthdr = 0;
637 struct net *net = dev_net(skb_dst(skb)->dev);
639 hlen = ip6_find_1stfragopt(skb, &prevhdr);
640 nexthdr = *prevhdr;
642 mtu = ip6_skb_dst_mtu(skb);
644 /* We must not fragment if the socket is set to force MTU discovery
645 * or if the skb it not generated by a local socket.
647 if (!skb->local_df && skb->len > mtu) {
648 skb->dev = skb_dst(skb)->dev;
649 icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
650 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
651 IPSTATS_MIB_FRAGFAILS);
652 kfree_skb(skb);
653 return -EMSGSIZE;
656 if (np && np->frag_size < mtu) {
657 if (np->frag_size)
658 mtu = np->frag_size;
660 mtu -= hlen + sizeof(struct frag_hdr);
662 if (skb_has_frag_list(skb)) {
663 int first_len = skb_pagelen(skb);
664 struct sk_buff *frag2;
666 if (first_len - hlen > mtu ||
667 ((first_len - hlen) & 7) ||
668 skb_cloned(skb))
669 goto slow_path;
671 skb_walk_frags(skb, frag) {
672 /* Correct geometry. */
673 if (frag->len > mtu ||
674 ((frag->len & 7) && frag->next) ||
675 skb_headroom(frag) < hlen)
676 goto slow_path_clean;
678 /* Partially cloned skb? */
679 if (skb_shared(frag))
680 goto slow_path_clean;
682 BUG_ON(frag->sk);
683 if (skb->sk) {
684 frag->sk = skb->sk;
685 frag->destructor = sock_wfree;
687 skb->truesize -= frag->truesize;
690 err = 0;
691 offset = 0;
692 frag = skb_shinfo(skb)->frag_list;
693 skb_frag_list_init(skb);
694 /* BUILD HEADER */
696 *prevhdr = NEXTHDR_FRAGMENT;
697 tmp_hdr = kmemdup(skb_network_header(skb), hlen, GFP_ATOMIC);
698 if (!tmp_hdr) {
699 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
700 IPSTATS_MIB_FRAGFAILS);
701 return -ENOMEM;
704 __skb_pull(skb, hlen);
705 fh = (struct frag_hdr*)__skb_push(skb, sizeof(struct frag_hdr));
706 __skb_push(skb, hlen);
707 skb_reset_network_header(skb);
708 memcpy(skb_network_header(skb), tmp_hdr, hlen);
710 ipv6_select_ident(fh, rt);
711 fh->nexthdr = nexthdr;
712 fh->reserved = 0;
713 fh->frag_off = htons(IP6_MF);
714 frag_id = fh->identification;
716 first_len = skb_pagelen(skb);
717 skb->data_len = first_len - skb_headlen(skb);
718 skb->len = first_len;
719 ipv6_hdr(skb)->payload_len = htons(first_len -
720 sizeof(struct ipv6hdr));
722 dst_hold(&rt->dst);
724 for (;;) {
725 /* Prepare header of the next frame,
726 * before previous one went down. */
727 if (frag) {
728 frag->ip_summed = CHECKSUM_NONE;
729 skb_reset_transport_header(frag);
730 fh = (struct frag_hdr*)__skb_push(frag, sizeof(struct frag_hdr));
731 __skb_push(frag, hlen);
732 skb_reset_network_header(frag);
733 memcpy(skb_network_header(frag), tmp_hdr,
734 hlen);
735 offset += skb->len - hlen - sizeof(struct frag_hdr);
736 fh->nexthdr = nexthdr;
737 fh->reserved = 0;
738 fh->frag_off = htons(offset);
739 if (frag->next != NULL)
740 fh->frag_off |= htons(IP6_MF);
741 fh->identification = frag_id;
742 ipv6_hdr(frag)->payload_len =
743 htons(frag->len -
744 sizeof(struct ipv6hdr));
745 ip6_copy_metadata(frag, skb);
748 err = output(skb);
749 if(!err)
750 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
751 IPSTATS_MIB_FRAGCREATES);
753 if (err || !frag)
754 break;
756 skb = frag;
757 frag = skb->next;
758 skb->next = NULL;
761 kfree(tmp_hdr);
763 if (err == 0) {
764 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
765 IPSTATS_MIB_FRAGOKS);
766 dst_release(&rt->dst);
767 return 0;
770 while (frag) {
771 skb = frag->next;
772 kfree_skb(frag);
773 frag = skb;
776 IP6_INC_STATS(net, ip6_dst_idev(&rt->dst),
777 IPSTATS_MIB_FRAGFAILS);
778 dst_release(&rt->dst);
779 return err;
781 slow_path_clean:
782 skb_walk_frags(skb, frag2) {
783 if (frag2 == frag)
784 break;
785 frag2->sk = NULL;
786 frag2->destructor = NULL;
787 skb->truesize += frag2->truesize;
791 slow_path:
792 left = skb->len - hlen; /* Space per frame */
793 ptr = hlen; /* Where to start from */
796 * Fragment the datagram.
799 *prevhdr = NEXTHDR_FRAGMENT;
802 * Keep copying data until we run out.
804 while(left > 0) {
805 len = left;
806 /* IF: it doesn't fit, use 'mtu' - the data space left */
807 if (len > mtu)
808 len = mtu;
809 /* IF: we are not sending up to and including the packet end
810 then align the next start on an eight byte boundary */
811 if (len < left) {
812 len &= ~7;
815 * Allocate buffer.
818 if ((frag = alloc_skb(len+hlen+sizeof(struct frag_hdr)+LL_ALLOCATED_SPACE(rt->dst.dev), GFP_ATOMIC)) == NULL) {
819 NETDEBUG(KERN_INFO "IPv6: frag: no memory for new fragment!\n");
820 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
821 IPSTATS_MIB_FRAGFAILS);
822 err = -ENOMEM;
823 goto fail;
827 * Set up data on packet
830 ip6_copy_metadata(frag, skb);
831 skb_reserve(frag, LL_RESERVED_SPACE(rt->dst.dev));
832 skb_put(frag, len + hlen + sizeof(struct frag_hdr));
833 skb_reset_network_header(frag);
834 fh = (struct frag_hdr *)(skb_network_header(frag) + hlen);
835 frag->transport_header = (frag->network_header + hlen +
836 sizeof(struct frag_hdr));
839 * Charge the memory for the fragment to any owner
840 * it might possess
842 if (skb->sk)
843 skb_set_owner_w(frag, skb->sk);
846 * Copy the packet header into the new buffer.
848 skb_copy_from_linear_data(skb, skb_network_header(frag), hlen);
851 * Build fragment header.
853 fh->nexthdr = nexthdr;
854 fh->reserved = 0;
855 if (!frag_id) {
856 ipv6_select_ident(fh, rt);
857 frag_id = fh->identification;
858 } else
859 fh->identification = frag_id;
862 * Copy a block of the IP datagram.
864 if (skb_copy_bits(skb, ptr, skb_transport_header(frag), len))
865 BUG();
866 left -= len;
868 fh->frag_off = htons(offset);
869 if (left > 0)
870 fh->frag_off |= htons(IP6_MF);
871 ipv6_hdr(frag)->payload_len = htons(frag->len -
872 sizeof(struct ipv6hdr));
874 ptr += len;
875 offset += len;
878 * Put this fragment into the sending queue.
880 err = output(frag);
881 if (err)
882 goto fail;
884 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
885 IPSTATS_MIB_FRAGCREATES);
887 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
888 IPSTATS_MIB_FRAGOKS);
889 kfree_skb(skb);
890 return err;
892 fail:
893 IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)),
894 IPSTATS_MIB_FRAGFAILS);
895 kfree_skb(skb);
896 return err;
899 static inline int ip6_rt_check(const struct rt6key *rt_key,
900 const struct in6_addr *fl_addr,
901 const struct in6_addr *addr_cache)
903 return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) &&
904 (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache));
907 static struct dst_entry *ip6_sk_dst_check(struct sock *sk,
908 struct dst_entry *dst,
909 const struct flowi6 *fl6)
911 struct ipv6_pinfo *np = inet6_sk(sk);
912 struct rt6_info *rt = (struct rt6_info *)dst;
914 if (!dst)
915 goto out;
917 /* Yes, checking route validity in not connected
918 * case is not very simple. Take into account,
919 * that we do not support routing by source, TOS,
920 * and MSG_DONTROUTE --ANK (980726)
922 * 1. ip6_rt_check(): If route was host route,
923 * check that cached destination is current.
924 * If it is network route, we still may
925 * check its validity using saved pointer
926 * to the last used address: daddr_cache.
927 * We do not want to save whole address now,
928 * (because main consumer of this service
929 * is tcp, which has not this problem),
930 * so that the last trick works only on connected
931 * sockets.
932 * 2. oif also should be the same.
934 if (ip6_rt_check(&rt->rt6i_dst, &fl6->daddr, np->daddr_cache) ||
935 #ifdef CONFIG_IPV6_SUBTREES
936 ip6_rt_check(&rt->rt6i_src, &fl6->saddr, np->saddr_cache) ||
937 #endif
938 (fl6->flowi6_oif && fl6->flowi6_oif != dst->dev->ifindex)) {
939 dst_release(dst);
940 dst = NULL;
943 out:
944 return dst;
947 static int ip6_dst_lookup_tail(struct sock *sk,
948 struct dst_entry **dst, struct flowi6 *fl6)
950 struct net *net = sock_net(sk);
951 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
952 struct neighbour *n;
953 #endif
954 int err;
956 if (*dst == NULL)
957 *dst = ip6_route_output(net, sk, fl6);
959 if ((err = (*dst)->error))
960 goto out_err_release;
962 if (ipv6_addr_any(&fl6->saddr)) {
963 struct rt6_info *rt = (struct rt6_info *) *dst;
964 err = ip6_route_get_saddr(net, rt, &fl6->daddr,
965 sk ? inet6_sk(sk)->srcprefs : 0,
966 &fl6->saddr);
967 if (err)
968 goto out_err_release;
971 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
973 * Here if the dst entry we've looked up
974 * has a neighbour entry that is in the INCOMPLETE
975 * state and the src address from the flow is
976 * marked as OPTIMISTIC, we release the found
977 * dst entry and replace it instead with the
978 * dst entry of the nexthop router
980 rcu_read_lock();
981 n = dst_get_neighbour(*dst);
982 if (n && !(n->nud_state & NUD_VALID)) {
983 struct inet6_ifaddr *ifp;
984 struct flowi6 fl_gw6;
985 int redirect;
987 rcu_read_unlock();
988 ifp = ipv6_get_ifaddr(net, &fl6->saddr,
989 (*dst)->dev, 1);
991 redirect = (ifp && ifp->flags & IFA_F_OPTIMISTIC);
992 if (ifp)
993 in6_ifa_put(ifp);
995 if (redirect) {
997 * We need to get the dst entry for the
998 * default router instead
1000 dst_release(*dst);
1001 memcpy(&fl_gw6, fl6, sizeof(struct flowi6));
1002 memset(&fl_gw6.daddr, 0, sizeof(struct in6_addr));
1003 *dst = ip6_route_output(net, sk, &fl_gw6);
1004 if ((err = (*dst)->error))
1005 goto out_err_release;
1007 } else {
1008 rcu_read_unlock();
1010 #endif
1012 return 0;
1014 out_err_release:
1015 if (err == -ENETUNREACH)
1016 IP6_INC_STATS_BH(net, NULL, IPSTATS_MIB_OUTNOROUTES);
1017 dst_release(*dst);
1018 *dst = NULL;
1019 return err;
1023 * ip6_dst_lookup - perform route lookup on flow
1024 * @sk: socket which provides route info
1025 * @dst: pointer to dst_entry * for result
1026 * @fl6: flow to lookup
1028 * This function performs a route lookup on the given flow.
1030 * It returns zero on success, or a standard errno code on error.
1032 int ip6_dst_lookup(struct sock *sk, struct dst_entry **dst, struct flowi6 *fl6)
1034 *dst = NULL;
1035 return ip6_dst_lookup_tail(sk, dst, fl6);
1037 EXPORT_SYMBOL_GPL(ip6_dst_lookup);
1040 * ip6_dst_lookup_flow - perform route lookup on flow with ipsec
1041 * @sk: socket which provides route info
1042 * @fl6: flow to lookup
1043 * @final_dst: final destination address for ipsec lookup
1044 * @can_sleep: we are in a sleepable context
1046 * This function performs a route lookup on the given flow.
1048 * It returns a valid dst pointer on success, or a pointer encoded
1049 * error code.
1051 struct dst_entry *ip6_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1052 const struct in6_addr *final_dst,
1053 bool can_sleep)
1055 struct dst_entry *dst = NULL;
1056 int err;
1058 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1059 if (err)
1060 return ERR_PTR(err);
1061 if (final_dst)
1062 ipv6_addr_copy(&fl6->daddr, final_dst);
1063 if (can_sleep)
1064 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1066 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1068 EXPORT_SYMBOL_GPL(ip6_dst_lookup_flow);
1071 * ip6_sk_dst_lookup_flow - perform socket cached route lookup on flow
1072 * @sk: socket which provides the dst cache and route info
1073 * @fl6: flow to lookup
1074 * @final_dst: final destination address for ipsec lookup
1075 * @can_sleep: we are in a sleepable context
1077 * This function performs a route lookup on the given flow with the
1078 * possibility of using the cached route in the socket if it is valid.
1079 * It will take the socket dst lock when operating on the dst cache.
1080 * As a result, this function can only be used in process context.
1082 * It returns a valid dst pointer on success, or a pointer encoded
1083 * error code.
1085 struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6,
1086 const struct in6_addr *final_dst,
1087 bool can_sleep)
1089 struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie);
1090 int err;
1092 dst = ip6_sk_dst_check(sk, dst, fl6);
1094 err = ip6_dst_lookup_tail(sk, &dst, fl6);
1095 if (err)
1096 return ERR_PTR(err);
1097 if (final_dst)
1098 ipv6_addr_copy(&fl6->daddr, final_dst);
1099 if (can_sleep)
1100 fl6->flowi6_flags |= FLOWI_FLAG_CAN_SLEEP;
1102 return xfrm_lookup(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0);
1104 EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow);
1106 static inline int ip6_ufo_append_data(struct sock *sk,
1107 int getfrag(void *from, char *to, int offset, int len,
1108 int odd, struct sk_buff *skb),
1109 void *from, int length, int hh_len, int fragheaderlen,
1110 int transhdrlen, int mtu,unsigned int flags,
1111 struct rt6_info *rt)
1114 struct sk_buff *skb;
1115 int err;
1117 /* There is support for UDP large send offload by network
1118 * device, so create one single skb packet containing complete
1119 * udp datagram
1121 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL) {
1122 skb = sock_alloc_send_skb(sk,
1123 hh_len + fragheaderlen + transhdrlen + 20,
1124 (flags & MSG_DONTWAIT), &err);
1125 if (skb == NULL)
1126 return err;
1128 /* reserve space for Hardware header */
1129 skb_reserve(skb, hh_len);
1131 /* create space for UDP/IP header */
1132 skb_put(skb,fragheaderlen + transhdrlen);
1134 /* initialize network header pointer */
1135 skb_reset_network_header(skb);
1137 /* initialize protocol header pointer */
1138 skb->transport_header = skb->network_header + fragheaderlen;
1140 skb->ip_summed = CHECKSUM_PARTIAL;
1141 skb->csum = 0;
1144 err = skb_append_datato_frags(sk,skb, getfrag, from,
1145 (length - transhdrlen));
1146 if (!err) {
1147 struct frag_hdr fhdr;
1149 /* Specify the length of each IPv6 datagram fragment.
1150 * It has to be a multiple of 8.
1152 skb_shinfo(skb)->gso_size = (mtu - fragheaderlen -
1153 sizeof(struct frag_hdr)) & ~7;
1154 skb_shinfo(skb)->gso_type = SKB_GSO_UDP;
1155 ipv6_select_ident(&fhdr, rt);
1156 skb_shinfo(skb)->ip6_frag_id = fhdr.identification;
1157 __skb_queue_tail(&sk->sk_write_queue, skb);
1159 return 0;
1161 /* There is not enough support do UPD LSO,
1162 * so follow normal path
1164 kfree_skb(skb);
1166 return err;
1169 static inline struct ipv6_opt_hdr *ip6_opt_dup(struct ipv6_opt_hdr *src,
1170 gfp_t gfp)
1172 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1175 static inline struct ipv6_rt_hdr *ip6_rthdr_dup(struct ipv6_rt_hdr *src,
1176 gfp_t gfp)
1178 return src ? kmemdup(src, (src->hdrlen + 1) * 8, gfp) : NULL;
1181 int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to,
1182 int offset, int len, int odd, struct sk_buff *skb),
1183 void *from, int length, int transhdrlen,
1184 int hlimit, int tclass, struct ipv6_txoptions *opt, struct flowi6 *fl6,
1185 struct rt6_info *rt, unsigned int flags, int dontfrag)
1187 struct inet_sock *inet = inet_sk(sk);
1188 struct ipv6_pinfo *np = inet6_sk(sk);
1189 struct inet_cork *cork;
1190 struct sk_buff *skb;
1191 unsigned int maxfraglen, fragheaderlen;
1192 int exthdrlen;
1193 int dst_exthdrlen;
1194 int hh_len;
1195 int mtu;
1196 int copy;
1197 int err;
1198 int offset = 0;
1199 int csummode = CHECKSUM_NONE;
1200 __u8 tx_flags = 0;
1202 if (flags&MSG_PROBE)
1203 return 0;
1204 cork = &inet->cork.base;
1205 if (skb_queue_empty(&sk->sk_write_queue)) {
1207 * setup for corking
1209 if (opt) {
1210 if (WARN_ON(np->cork.opt))
1211 return -EINVAL;
1213 np->cork.opt = kmalloc(opt->tot_len, sk->sk_allocation);
1214 if (unlikely(np->cork.opt == NULL))
1215 return -ENOBUFS;
1217 np->cork.opt->tot_len = opt->tot_len;
1218 np->cork.opt->opt_flen = opt->opt_flen;
1219 np->cork.opt->opt_nflen = opt->opt_nflen;
1221 np->cork.opt->dst0opt = ip6_opt_dup(opt->dst0opt,
1222 sk->sk_allocation);
1223 if (opt->dst0opt && !np->cork.opt->dst0opt)
1224 return -ENOBUFS;
1226 np->cork.opt->dst1opt = ip6_opt_dup(opt->dst1opt,
1227 sk->sk_allocation);
1228 if (opt->dst1opt && !np->cork.opt->dst1opt)
1229 return -ENOBUFS;
1231 np->cork.opt->hopopt = ip6_opt_dup(opt->hopopt,
1232 sk->sk_allocation);
1233 if (opt->hopopt && !np->cork.opt->hopopt)
1234 return -ENOBUFS;
1236 np->cork.opt->srcrt = ip6_rthdr_dup(opt->srcrt,
1237 sk->sk_allocation);
1238 if (opt->srcrt && !np->cork.opt->srcrt)
1239 return -ENOBUFS;
1241 /* need source address above miyazawa*/
1243 dst_hold(&rt->dst);
1244 cork->dst = &rt->dst;
1245 inet->cork.fl.u.ip6 = *fl6;
1246 np->cork.hop_limit = hlimit;
1247 np->cork.tclass = tclass;
1248 mtu = np->pmtudisc == IPV6_PMTUDISC_PROBE ?
1249 rt->dst.dev->mtu : dst_mtu(&rt->dst);
1250 if (np->frag_size < mtu) {
1251 if (np->frag_size)
1252 mtu = np->frag_size;
1254 cork->fragsize = mtu;
1255 if (dst_allfrag(rt->dst.path))
1256 cork->flags |= IPCORK_ALLFRAG;
1257 cork->length = 0;
1258 sk->sk_sndmsg_page = NULL;
1259 sk->sk_sndmsg_off = 0;
1260 exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len;
1261 length += exthdrlen;
1262 transhdrlen += exthdrlen;
1263 dst_exthdrlen = rt->dst.header_len;
1264 } else {
1265 rt = (struct rt6_info *)cork->dst;
1266 fl6 = &inet->cork.fl.u.ip6;
1267 opt = np->cork.opt;
1268 transhdrlen = 0;
1269 exthdrlen = 0;
1270 dst_exthdrlen = 0;
1271 mtu = cork->fragsize;
1274 hh_len = LL_RESERVED_SPACE(rt->dst.dev);
1276 fragheaderlen = sizeof(struct ipv6hdr) + rt->rt6i_nfheader_len +
1277 (opt ? opt->opt_nflen : 0);
1278 maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr);
1280 if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) {
1281 if (cork->length + length > sizeof(struct ipv6hdr) + IPV6_MAXPLEN - fragheaderlen) {
1282 ipv6_local_error(sk, EMSGSIZE, fl6, mtu-exthdrlen);
1283 return -EMSGSIZE;
1287 /* For UDP, check if TX timestamp is enabled */
1288 if (sk->sk_type == SOCK_DGRAM) {
1289 err = sock_tx_timestamp(sk, &tx_flags);
1290 if (err)
1291 goto error;
1295 * Let's try using as much space as possible.
1296 * Use MTU if total length of the message fits into the MTU.
1297 * Otherwise, we need to reserve fragment header and
1298 * fragment alignment (= 8-15 octects, in total).
1300 * Note that we may need to "move" the data from the tail of
1301 * of the buffer to the new fragment when we split
1302 * the message.
1304 * FIXME: It may be fragmented into multiple chunks
1305 * at once if non-fragmentable extension headers
1306 * are too large.
1307 * --yoshfuji
1310 cork->length += length;
1311 if (length > mtu) {
1312 int proto = sk->sk_protocol;
1313 if (dontfrag && (proto == IPPROTO_UDP || proto == IPPROTO_RAW)){
1314 ipv6_local_rxpmtu(sk, fl6, mtu-exthdrlen);
1315 return -EMSGSIZE;
1318 if (proto == IPPROTO_UDP &&
1319 (rt->dst.dev->features & NETIF_F_UFO)) {
1321 err = ip6_ufo_append_data(sk, getfrag, from, length,
1322 hh_len, fragheaderlen,
1323 transhdrlen, mtu, flags, rt);
1324 if (err)
1325 goto error;
1326 return 0;
1330 if ((skb = skb_peek_tail(&sk->sk_write_queue)) == NULL)
1331 goto alloc_new_skb;
1333 while (length > 0) {
1334 /* Check if the remaining data fits into current packet. */
1335 copy = (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - skb->len;
1336 if (copy < length)
1337 copy = maxfraglen - skb->len;
1339 if (copy <= 0) {
1340 char *data;
1341 unsigned int datalen;
1342 unsigned int fraglen;
1343 unsigned int fraggap;
1344 unsigned int alloclen;
1345 struct sk_buff *skb_prev;
1346 alloc_new_skb:
1347 skb_prev = skb;
1349 /* There's no room in the current skb */
1350 if (skb_prev)
1351 fraggap = skb_prev->len - maxfraglen;
1352 else
1353 fraggap = 0;
1356 * If remaining data exceeds the mtu,
1357 * we know we need more fragment(s).
1359 datalen = length + fraggap;
1360 if (datalen > (cork->length <= mtu && !(cork->flags & IPCORK_ALLFRAG) ? mtu : maxfraglen) - fragheaderlen)
1361 datalen = maxfraglen - fragheaderlen;
1363 fraglen = datalen + fragheaderlen;
1364 if ((flags & MSG_MORE) &&
1365 !(rt->dst.dev->features&NETIF_F_SG))
1366 alloclen = mtu;
1367 else
1368 alloclen = datalen + fragheaderlen;
1370 alloclen += dst_exthdrlen;
1373 * The last fragment gets additional space at tail.
1374 * Note: we overallocate on fragments with MSG_MODE
1375 * because we have no idea if we're the last one.
1377 if (datalen == length + fraggap)
1378 alloclen += rt->dst.trailer_len;
1381 * We just reserve space for fragment header.
1382 * Note: this may be overallocation if the message
1383 * (without MSG_MORE) fits into the MTU.
1385 alloclen += sizeof(struct frag_hdr);
1387 if (transhdrlen) {
1388 skb = sock_alloc_send_skb(sk,
1389 alloclen + hh_len,
1390 (flags & MSG_DONTWAIT), &err);
1391 } else {
1392 skb = NULL;
1393 if (atomic_read(&sk->sk_wmem_alloc) <=
1394 2 * sk->sk_sndbuf)
1395 skb = sock_wmalloc(sk,
1396 alloclen + hh_len, 1,
1397 sk->sk_allocation);
1398 if (unlikely(skb == NULL))
1399 err = -ENOBUFS;
1400 else {
1401 /* Only the initial fragment
1402 * is time stamped.
1404 tx_flags = 0;
1407 if (skb == NULL)
1408 goto error;
1410 * Fill in the control structures
1412 skb->ip_summed = csummode;
1413 skb->csum = 0;
1414 /* reserve for fragmentation */
1415 skb_reserve(skb, hh_len+sizeof(struct frag_hdr));
1417 if (sk->sk_type == SOCK_DGRAM)
1418 skb_shinfo(skb)->tx_flags = tx_flags;
1421 * Find where to start putting bytes
1423 data = skb_put(skb, fraglen + dst_exthdrlen);
1424 skb_set_network_header(skb, exthdrlen + dst_exthdrlen);
1425 data += fragheaderlen + dst_exthdrlen;
1426 skb->transport_header = (skb->network_header +
1427 fragheaderlen);
1428 if (fraggap) {
1429 skb->csum = skb_copy_and_csum_bits(
1430 skb_prev, maxfraglen,
1431 data + transhdrlen, fraggap, 0);
1432 skb_prev->csum = csum_sub(skb_prev->csum,
1433 skb->csum);
1434 data += fraggap;
1435 pskb_trim_unique(skb_prev, maxfraglen);
1437 copy = datalen - transhdrlen - fraggap;
1439 if (copy < 0) {
1440 err = -EINVAL;
1441 kfree_skb(skb);
1442 goto error;
1443 } else if (copy > 0 && getfrag(from, data + transhdrlen, offset, copy, fraggap, skb) < 0) {
1444 err = -EFAULT;
1445 kfree_skb(skb);
1446 goto error;
1449 offset += copy;
1450 length -= datalen - fraggap;
1451 transhdrlen = 0;
1452 exthdrlen = 0;
1453 dst_exthdrlen = 0;
1454 csummode = CHECKSUM_NONE;
1457 * Put the packet on the pending queue
1459 __skb_queue_tail(&sk->sk_write_queue, skb);
1460 continue;
1463 if (copy > length)
1464 copy = length;
1466 if (!(rt->dst.dev->features&NETIF_F_SG)) {
1467 unsigned int off;
1469 off = skb->len;
1470 if (getfrag(from, skb_put(skb, copy),
1471 offset, copy, off, skb) < 0) {
1472 __skb_trim(skb, off);
1473 err = -EFAULT;
1474 goto error;
1476 } else {
1477 int i = skb_shinfo(skb)->nr_frags;
1478 skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1];
1479 struct page *page = sk->sk_sndmsg_page;
1480 int off = sk->sk_sndmsg_off;
1481 unsigned int left;
1483 if (page && (left = PAGE_SIZE - off) > 0) {
1484 if (copy >= left)
1485 copy = left;
1486 if (page != skb_frag_page(frag)) {
1487 if (i == MAX_SKB_FRAGS) {
1488 err = -EMSGSIZE;
1489 goto error;
1491 skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0);
1492 skb_frag_ref(skb, i);
1493 frag = &skb_shinfo(skb)->frags[i];
1495 } else if(i < MAX_SKB_FRAGS) {
1496 if (copy > PAGE_SIZE)
1497 copy = PAGE_SIZE;
1498 page = alloc_pages(sk->sk_allocation, 0);
1499 if (page == NULL) {
1500 err = -ENOMEM;
1501 goto error;
1503 sk->sk_sndmsg_page = page;
1504 sk->sk_sndmsg_off = 0;
1506 skb_fill_page_desc(skb, i, page, 0, 0);
1507 frag = &skb_shinfo(skb)->frags[i];
1508 } else {
1509 err = -EMSGSIZE;
1510 goto error;
1512 if (getfrag(from,
1513 skb_frag_address(frag) + skb_frag_size(frag),
1514 offset, copy, skb->len, skb) < 0) {
1515 err = -EFAULT;
1516 goto error;
1518 sk->sk_sndmsg_off += copy;
1519 skb_frag_size_add(frag, copy);
1520 skb->len += copy;
1521 skb->data_len += copy;
1522 skb->truesize += copy;
1523 atomic_add(copy, &sk->sk_wmem_alloc);
1525 offset += copy;
1526 length -= copy;
1528 return 0;
1529 error:
1530 cork->length -= length;
1531 IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1532 return err;
1535 static void ip6_cork_release(struct inet_sock *inet, struct ipv6_pinfo *np)
1537 if (np->cork.opt) {
1538 kfree(np->cork.opt->dst0opt);
1539 kfree(np->cork.opt->dst1opt);
1540 kfree(np->cork.opt->hopopt);
1541 kfree(np->cork.opt->srcrt);
1542 kfree(np->cork.opt);
1543 np->cork.opt = NULL;
1546 if (inet->cork.base.dst) {
1547 dst_release(inet->cork.base.dst);
1548 inet->cork.base.dst = NULL;
1549 inet->cork.base.flags &= ~IPCORK_ALLFRAG;
1551 memset(&inet->cork.fl, 0, sizeof(inet->cork.fl));
1554 int ip6_push_pending_frames(struct sock *sk)
1556 struct sk_buff *skb, *tmp_skb;
1557 struct sk_buff **tail_skb;
1558 struct in6_addr final_dst_buf, *final_dst = &final_dst_buf;
1559 struct inet_sock *inet = inet_sk(sk);
1560 struct ipv6_pinfo *np = inet6_sk(sk);
1561 struct net *net = sock_net(sk);
1562 struct ipv6hdr *hdr;
1563 struct ipv6_txoptions *opt = np->cork.opt;
1564 struct rt6_info *rt = (struct rt6_info *)inet->cork.base.dst;
1565 struct flowi6 *fl6 = &inet->cork.fl.u.ip6;
1566 unsigned char proto = fl6->flowi6_proto;
1567 int err = 0;
1569 if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
1570 goto out;
1571 tail_skb = &(skb_shinfo(skb)->frag_list);
1573 /* move skb->data to ip header from ext header */
1574 if (skb->data < skb_network_header(skb))
1575 __skb_pull(skb, skb_network_offset(skb));
1576 while ((tmp_skb = __skb_dequeue(&sk->sk_write_queue)) != NULL) {
1577 __skb_pull(tmp_skb, skb_network_header_len(skb));
1578 *tail_skb = tmp_skb;
1579 tail_skb = &(tmp_skb->next);
1580 skb->len += tmp_skb->len;
1581 skb->data_len += tmp_skb->len;
1582 skb->truesize += tmp_skb->truesize;
1583 tmp_skb->destructor = NULL;
1584 tmp_skb->sk = NULL;
1587 /* Allow local fragmentation. */
1588 if (np->pmtudisc < IPV6_PMTUDISC_DO)
1589 skb->local_df = 1;
1591 ipv6_addr_copy(final_dst, &fl6->daddr);
1592 __skb_pull(skb, skb_network_header_len(skb));
1593 if (opt && opt->opt_flen)
1594 ipv6_push_frag_opts(skb, opt, &proto);
1595 if (opt && opt->opt_nflen)
1596 ipv6_push_nfrag_opts(skb, opt, &proto, &final_dst);
1598 skb_push(skb, sizeof(struct ipv6hdr));
1599 skb_reset_network_header(skb);
1600 hdr = ipv6_hdr(skb);
1602 *(__be32*)hdr = fl6->flowlabel |
1603 htonl(0x60000000 | ((int)np->cork.tclass << 20));
1605 hdr->hop_limit = np->cork.hop_limit;
1606 hdr->nexthdr = proto;
1607 ipv6_addr_copy(&hdr->saddr, &fl6->saddr);
1608 ipv6_addr_copy(&hdr->daddr, final_dst);
1610 skb->priority = sk->sk_priority;
1611 skb->mark = sk->sk_mark;
1613 skb_dst_set(skb, dst_clone(&rt->dst));
1614 IP6_UPD_PO_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len);
1615 if (proto == IPPROTO_ICMPV6) {
1616 struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb));
1618 ICMP6MSGOUT_INC_STATS_BH(net, idev, icmp6_hdr(skb)->icmp6_type);
1619 ICMP6_INC_STATS_BH(net, idev, ICMP6_MIB_OUTMSGS);
1622 err = ip6_local_out(skb);
1623 if (err) {
1624 if (err > 0)
1625 err = net_xmit_errno(err);
1626 if (err)
1627 goto error;
1630 out:
1631 ip6_cork_release(inet, np);
1632 return err;
1633 error:
1634 IP6_INC_STATS(net, rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS);
1635 goto out;
1638 void ip6_flush_pending_frames(struct sock *sk)
1640 struct sk_buff *skb;
1642 while ((skb = __skb_dequeue_tail(&sk->sk_write_queue)) != NULL) {
1643 if (skb_dst(skb))
1644 IP6_INC_STATS(sock_net(sk), ip6_dst_idev(skb_dst(skb)),
1645 IPSTATS_MIB_OUTDISCARDS);
1646 kfree_skb(skb);
1649 ip6_cork_release(inet_sk(sk), inet6_sk(sk));