1 /* $NetBSD: frag6.c,v 1.46 2008/05/21 17:08:07 drochner Exp $ */
2 /* $KAME: frag6.c,v 1.40 2002/05/27 21:40:31 itojun Exp $ */
5 * Copyright (C) 1995, 1996, 1997, and 1998 WIDE Project.
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. Neither the name of the project nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
20 * THIS SOFTWARE IS PROVIDED BY THE PROJECT AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE PROJECT OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: frag6.c,v 1.46 2008/05/21 17:08:07 drochner Exp $");
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/malloc.h>
40 #include <sys/domain.h>
41 #include <sys/protosw.h>
42 #include <sys/socket.h>
43 #include <sys/socketvar.h>
44 #include <sys/errno.h>
46 #include <sys/kernel.h>
47 #include <sys/syslog.h>
50 #include <net/route.h>
52 #include <netinet/in.h>
53 #include <netinet/in_var.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet6/ip6_private.h>
57 #include <netinet/icmp6.h>
59 #include <net/net_osdep.h>
61 static void frag6_enq(struct ip6asfrag
*, struct ip6asfrag
*);
62 static void frag6_deq(struct ip6asfrag
*);
63 static void frag6_insque(struct ip6q
*, struct ip6q
*);
64 static void frag6_remque(struct ip6q
*);
65 static void frag6_freef(struct ip6q
*);
67 static int ip6q_locked
;
68 u_int frag6_nfragpackets
;
70 struct ip6q ip6q
; /* ip6 reassemble queue */
72 static inline int ip6q_lock_try(void);
73 static inline void ip6q_unlock(void);
81 * Use splvm() -- we're bloking things that would cause
105 #define IP6Q_LOCK() \
107 if (ip6q_lock_try() == 0) { \
108 printf("%s:%d: ip6q already locked\n", __FILE__, __LINE__); \
109 panic("ip6q_lock"); \
111 } while (/*CONSTCOND*/ 0)
112 #define IP6Q_LOCK_CHECK() \
114 if (ip6q_locked == 0) { \
115 printf("%s:%d: ip6q lock not held\n", __FILE__, __LINE__); \
116 panic("ip6q lock check"); \
118 } while (/*CONSTCOND*/ 0)
120 #define IP6Q_LOCK() (void) ip6q_lock_try()
121 #define IP6Q_LOCK_CHECK() /* nothing */
124 #define IP6Q_UNLOCK() ip6q_unlock()
126 #ifndef offsetof /* XXX */
127 #define offsetof(type, member) ((size_t)(&((type *)0)->member))
131 * Initialise reassembly queue and fragment identifier.
137 ip6q
.ip6q_next
= ip6q
.ip6q_prev
= &ip6q
;
141 * In RFC2460, fragment and reassembly rule do not agree with each other,
142 * in terms of next header field handling in fragment header.
143 * While the sender will use the same value for all of the fragmented packets,
144 * receiver is suggested not to check the consistency.
146 * fragment rule (p20):
147 * (2) A Fragment header containing:
148 * The Next Header value that identifies the first header of
149 * the Fragmentable Part of the original packet.
150 * -> next header field is same for all fragments
152 * reassembly rule (p21):
153 * The Next Header field of the last header of the Unfragmentable
154 * Part is obtained from the Next Header field of the first
155 * fragment's Fragment header.
156 * -> should grab it from the first fragment only
158 * The following note also contradicts with fragment rule - noone is going to
159 * send different fragment with different next header field.
161 * additional note (p22):
162 * The Next Header values in the Fragment headers of different
163 * fragments of the same original packet may differ. Only the value
164 * from the Offset zero fragment packet is used for reassembly.
165 * -> should grab it from the first fragment only
167 * There is no explicit reason given in the RFC. Historical reason maybe?
173 frag6_input(struct mbuf
**mp
, int *offp
, int proto
)
176 struct mbuf
*m
= *mp
, *t
;
178 struct ip6_frag
*ip6f
;
180 struct ip6asfrag
*af6
, *ip6af
, *af6dwn
;
181 int offset
= *offp
, nxt
, i
, next
;
183 int fragoff
, frgpartlen
; /* must be larger than u_int16_t */
184 struct ifnet
*dstifp
;
185 static struct route ro
;
188 struct sockaddr_in6 dst6
;
191 ip6
= mtod(m
, struct ip6_hdr
*);
192 IP6_EXTHDR_GET(ip6f
, struct ip6_frag
*, m
, offset
, sizeof(*ip6f
));
197 /* find the destination interface of the packet. */
198 sockaddr_in6_init(&u
.dst6
, &ip6
->ip6_dst
, 0, 0, 0);
199 if ((rt
= rtcache_lookup(&ro
, &u
.dst
)) != NULL
&& rt
->rt_ifa
!= NULL
)
200 dstifp
= ((struct in6_ifaddr
*)rt
->rt_ifa
)->ia_ifp
;
202 /* jumbo payload can't contain a fragment header */
203 if (ip6
->ip6_plen
== 0) {
204 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
, offset
);
205 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
210 * check whether fragment packet's fragment length is
211 * multiple of 8 octets.
212 * sizeof(struct ip6_frag) == 8
213 * sizeof(struct ip6_hdr) = 40
215 if ((ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
) &&
216 (((ntohs(ip6
->ip6_plen
) - offset
) & 0x7) != 0)) {
217 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
218 offsetof(struct ip6_hdr
, ip6_plen
));
219 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
223 IP6_STATINC(IP6_STAT_FRAGMENTS
);
224 in6_ifstat_inc(dstifp
, ifs6_reass_reqd
);
226 /* offset now points to data portion */
227 offset
+= sizeof(struct ip6_frag
);
232 * Enforce upper bound on number of fragments.
233 * If maxfrag is 0, never accept fragments.
234 * If maxfrag is -1, accept all fragments without limitation.
236 if (ip6_maxfrags
< 0)
238 else if (frag6_nfrags
>= (u_int
)ip6_maxfrags
)
241 for (q6
= ip6q
.ip6q_next
; q6
!= &ip6q
; q6
= q6
->ip6q_next
)
242 if (ip6f
->ip6f_ident
== q6
->ip6q_ident
&&
243 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_src
, &q6
->ip6q_src
) &&
244 IN6_ARE_ADDR_EQUAL(&ip6
->ip6_dst
, &q6
->ip6q_dst
))
249 * the first fragment to arrive, create a reassembly queue.
254 * Enforce upper bound on number of fragmented packets
255 * for which we attempt reassembly;
256 * If maxfragpackets is 0, never accept fragments.
257 * If maxfragpackets is -1, accept all fragments without
260 if (ip6_maxfragpackets
< 0)
262 else if (frag6_nfragpackets
>= (u_int
)ip6_maxfragpackets
)
264 frag6_nfragpackets
++;
265 q6
= (struct ip6q
*)malloc(sizeof(struct ip6q
), M_FTABLE
,
269 memset(q6
, 0, sizeof(*q6
));
271 frag6_insque(q6
, &ip6q
);
273 /* ip6q_nxt will be filled afterwards, from 1st fragment */
274 q6
->ip6q_down
= q6
->ip6q_up
= (struct ip6asfrag
*)q6
;
276 q6
->ip6q_nxtp
= (u_char
*)nxtp
;
278 q6
->ip6q_ident
= ip6f
->ip6f_ident
;
279 q6
->ip6q_arrive
= 0; /* Is it used anywhere? */
280 q6
->ip6q_ttl
= IPV6_FRAGTTL
;
281 q6
->ip6q_src
= ip6
->ip6_src
;
282 q6
->ip6q_dst
= ip6
->ip6_dst
;
283 q6
->ip6q_unfrglen
= -1; /* The 1st fragment has not arrived. */
289 * If it's the 1st fragment, record the length of the
290 * unfragmentable part and the next header of the fragment header.
292 fragoff
= ntohs(ip6f
->ip6f_offlg
& IP6F_OFF_MASK
);
294 q6
->ip6q_unfrglen
= offset
- sizeof(struct ip6_hdr
) -
295 sizeof(struct ip6_frag
);
296 q6
->ip6q_nxt
= ip6f
->ip6f_nxt
;
300 * Check that the reassembled packet would not exceed 65535 bytes
302 * If it would exceed, discard the fragment and return an ICMP error.
304 frgpartlen
= sizeof(struct ip6_hdr
) + ntohs(ip6
->ip6_plen
) - offset
;
305 if (q6
->ip6q_unfrglen
>= 0) {
306 /* The 1st fragment has already arrived. */
307 if (q6
->ip6q_unfrglen
+ fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
308 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
309 offset
- sizeof(struct ip6_frag
) +
310 offsetof(struct ip6_frag
, ip6f_offlg
));
312 return (IPPROTO_DONE
);
314 } else if (fragoff
+ frgpartlen
> IPV6_MAXPACKET
) {
315 icmp6_error(m
, ICMP6_PARAM_PROB
, ICMP6_PARAMPROB_HEADER
,
316 offset
- sizeof(struct ip6_frag
) +
317 offsetof(struct ip6_frag
, ip6f_offlg
));
319 return (IPPROTO_DONE
);
322 * If it's the first fragment, do the above check for each
323 * fragment already stored in the reassembly queue.
326 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
328 af6dwn
= af6
->ip6af_down
;
330 if (q6
->ip6q_unfrglen
+ af6
->ip6af_off
+ af6
->ip6af_frglen
>
332 struct mbuf
*merr
= IP6_REASS_MBUF(af6
);
333 struct ip6_hdr
*ip6err
;
334 int erroff
= af6
->ip6af_offset
;
336 /* dequeue the fragment. */
340 /* adjust pointer. */
341 ip6err
= mtod(merr
, struct ip6_hdr
*);
344 * Restore source and destination addresses
345 * in the erroneous IPv6 header.
347 ip6err
->ip6_src
= q6
->ip6q_src
;
348 ip6err
->ip6_dst
= q6
->ip6q_dst
;
350 icmp6_error(merr
, ICMP6_PARAM_PROB
,
351 ICMP6_PARAMPROB_HEADER
,
352 erroff
- sizeof(struct ip6_frag
) +
353 offsetof(struct ip6_frag
, ip6f_offlg
));
358 ip6af
= (struct ip6asfrag
*)malloc(sizeof(struct ip6asfrag
), M_FTABLE
,
362 memset(ip6af
, 0, sizeof(*ip6af
));
363 ip6af
->ip6af_head
= ip6
->ip6_flow
;
364 ip6af
->ip6af_len
= ip6
->ip6_plen
;
365 ip6af
->ip6af_nxt
= ip6
->ip6_nxt
;
366 ip6af
->ip6af_hlim
= ip6
->ip6_hlim
;
367 ip6af
->ip6af_mff
= ip6f
->ip6f_offlg
& IP6F_MORE_FRAG
;
368 ip6af
->ip6af_off
= fragoff
;
369 ip6af
->ip6af_frglen
= frgpartlen
;
370 ip6af
->ip6af_offset
= offset
;
371 IP6_REASS_MBUF(ip6af
) = m
;
374 af6
= (struct ip6asfrag
*)q6
;
379 * Find a segment which begins after this one does.
381 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
382 af6
= af6
->ip6af_down
)
383 if (af6
->ip6af_off
> ip6af
->ip6af_off
)
388 * If there is a preceding segment, it may provide some of
389 * our data already. If so, drop the data from the incoming
390 * segment. If it provides all of our data, drop us.
392 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
393 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
396 if (i
>= ip6af
->ip6af_frglen
)
398 m_adj(IP6_REASS_MBUF(ip6af
), i
);
399 ip6af
->ip6af_off
+= i
;
400 ip6af
->ip6af_frglen
-= i
;
405 * While we overlap succeeding segments trim them or,
406 * if they are completely covered, dequeue them.
408 while (af6
!= (struct ip6asfrag
*)q6
&&
409 ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
> af6
->ip6af_off
) {
410 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
411 if (i
< af6
->ip6af_frglen
) {
412 af6
->ip6af_frglen
-= i
;
414 m_adj(IP6_REASS_MBUF(af6
), i
);
417 af6
= af6
->ip6af_down
;
418 m_freem(IP6_REASS_MBUF(af6
->ip6af_up
));
419 frag6_deq(af6
->ip6af_up
);
423 * If the incoming framgent overlaps some existing fragments in
424 * the reassembly queue, drop it, since it is dangerous to override
425 * existing fragments from a security point of view.
426 * We don't know which fragment is the bad guy - here we trust
427 * fragment that came in earlier, with no real reason.
429 if (af6
->ip6af_up
!= (struct ip6asfrag
*)q6
) {
430 i
= af6
->ip6af_up
->ip6af_off
+ af6
->ip6af_up
->ip6af_frglen
433 #if 0 /* suppress the noisy log */
434 log(LOG_ERR
, "%d bytes of a fragment from %s "
435 "overlaps the previous fragment\n",
436 i
, ip6_sprintf(&q6
->ip6q_src
));
438 free(ip6af
, M_FTABLE
);
442 if (af6
!= (struct ip6asfrag
*)q6
) {
443 i
= (ip6af
->ip6af_off
+ ip6af
->ip6af_frglen
) - af6
->ip6af_off
;
445 #if 0 /* suppress the noisy log */
446 log(LOG_ERR
, "%d bytes of a fragment from %s "
447 "overlaps the succeeding fragment",
448 i
, ip6_sprintf(&q6
->ip6q_src
));
450 free(ip6af
, M_FTABLE
);
459 * Stick new segment in its place;
460 * check for complete reassembly.
461 * Move to front of packet queue, as we are
462 * the most recently active fragmented packet.
464 frag6_enq(ip6af
, af6
->ip6af_up
);
468 if (q6
!= ip6q
.ip6q_next
) {
470 frag6_insque(q6
, &ip6q
);
474 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
475 af6
= af6
->ip6af_down
) {
476 if (af6
->ip6af_off
!= next
) {
480 next
+= af6
->ip6af_frglen
;
482 if (af6
->ip6af_up
->ip6af_mff
) {
488 * Reassembly is complete; concatenate fragments.
490 ip6af
= q6
->ip6q_down
;
491 t
= m
= IP6_REASS_MBUF(ip6af
);
492 af6
= ip6af
->ip6af_down
;
494 while (af6
!= (struct ip6asfrag
*)q6
) {
495 af6dwn
= af6
->ip6af_down
;
499 t
->m_next
= IP6_REASS_MBUF(af6
);
500 m_adj(t
->m_next
, af6
->ip6af_offset
);
505 /* adjust offset to point where the original next header starts */
506 offset
= ip6af
->ip6af_offset
- sizeof(struct ip6_frag
);
507 free(ip6af
, M_FTABLE
);
508 ip6
= mtod(m
, struct ip6_hdr
*);
509 ip6
->ip6_plen
= htons(next
+ offset
- sizeof(struct ip6_hdr
));
510 ip6
->ip6_src
= q6
->ip6q_src
;
511 ip6
->ip6_dst
= q6
->ip6q_dst
;
514 *q6
->ip6q_nxtp
= (u_char
)(nxt
& 0xff);
518 * Delete frag6 header with as a few cost as possible.
520 if (offset
< m
->m_len
) {
521 memmove((char *)ip6
+ sizeof(struct ip6_frag
), ip6
, offset
);
522 m
->m_data
+= sizeof(struct ip6_frag
);
523 m
->m_len
-= sizeof(struct ip6_frag
);
525 /* this comes with no copy if the boundary is on cluster */
526 if ((t
= m_split(m
, offset
, M_DONTWAIT
)) == NULL
) {
528 frag6_nfrags
-= q6
->ip6q_nfrag
;
530 frag6_nfragpackets
--;
533 m_adj(t
, sizeof(struct ip6_frag
));
538 * Store NXT to the original.
541 u_int8_t
*prvnxtp
= ip6_get_prevhdr(m
, offset
); /* XXX */
546 frag6_nfrags
-= q6
->ip6q_nfrag
;
548 frag6_nfragpackets
--;
550 if (m
->m_flags
& M_PKTHDR
) { /* Isn't it always true? */
552 for (t
= m
; t
; t
= t
->m_next
)
554 m
->m_pkthdr
.len
= plen
;
557 IP6_STATINC(IP6_STAT_REASSEMBLED
);
558 in6_ifstat_inc(dstifp
, ifs6_reass_ok
);
561 * Tell launch routine the next header
571 in6_ifstat_inc(dstifp
, ifs6_reass_fail
);
572 IP6_STATINC(IP6_STAT_FRAGDROPPED
);
579 * Free a fragment reassembly header and all
580 * associated datagrams.
583 frag6_freef(struct ip6q
*q6
)
585 struct ip6asfrag
*af6
, *down6
;
589 for (af6
= q6
->ip6q_down
; af6
!= (struct ip6asfrag
*)q6
;
591 struct mbuf
*m
= IP6_REASS_MBUF(af6
);
593 down6
= af6
->ip6af_down
;
597 * Return ICMP time exceeded error for the 1st fragment.
598 * Just free other fragments.
600 if (af6
->ip6af_off
== 0) {
604 ip6
= mtod(m
, struct ip6_hdr
*);
606 /* restoure source and destination addresses */
607 ip6
->ip6_src
= q6
->ip6q_src
;
608 ip6
->ip6_dst
= q6
->ip6q_dst
;
610 icmp6_error(m
, ICMP6_TIME_EXCEEDED
,
611 ICMP6_TIME_EXCEED_REASSEMBLY
, 0);
617 frag6_nfrags
-= q6
->ip6q_nfrag
;
619 frag6_nfragpackets
--;
623 * Put an ip fragment on a reassembly chain.
624 * Like insque, but pointers in middle of structure.
627 frag6_enq(struct ip6asfrag
*af6
, struct ip6asfrag
*up6
)
633 af6
->ip6af_down
= up6
->ip6af_down
;
634 up6
->ip6af_down
->ip6af_up
= af6
;
635 up6
->ip6af_down
= af6
;
639 * To frag6_enq as remque is to insque.
642 frag6_deq(struct ip6asfrag
*af6
)
647 af6
->ip6af_up
->ip6af_down
= af6
->ip6af_down
;
648 af6
->ip6af_down
->ip6af_up
= af6
->ip6af_up
;
652 frag6_insque(struct ip6q
*new, struct ip6q
*old
)
657 new->ip6q_prev
= old
;
658 new->ip6q_next
= old
->ip6q_next
;
659 old
->ip6q_next
->ip6q_prev
= new;
660 old
->ip6q_next
= new;
664 frag6_remque(struct ip6q
*p6
)
669 p6
->ip6q_prev
->ip6q_next
= p6
->ip6q_next
;
670 p6
->ip6q_next
->ip6q_prev
= p6
->ip6q_prev
;
674 * IPv6 reassembling timer processing;
675 * if a timer expires on a reassembly
683 mutex_enter(softnet_lock
);
684 KERNEL_LOCK(1, NULL
);
689 while (q6
!= &ip6q
) {
692 if (q6
->ip6q_prev
->ip6q_ttl
== 0) {
693 IP6_STATINC(IP6_STAT_FRAGTIMEOUT
);
694 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
695 frag6_freef(q6
->ip6q_prev
);
699 * If we are over the maximum number of fragments
700 * (due to the limit being lowered), drain off
701 * enough to get down to the new limit.
703 while (frag6_nfragpackets
> (u_int
)ip6_maxfragpackets
&&
705 IP6_STATINC(IP6_STAT_FRAGOVERFLOW
);
706 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
707 frag6_freef(ip6q
.ip6q_prev
);
713 * Routing changes might produce a better route than we last used;
714 * make sure we notice eventually, even if forwarding only for one
715 * destination and the cache is never replaced.
717 rtcache_free(&ip6_forward_rt
);
718 rtcache_free(&ipsrcchk_rt
);
721 KERNEL_UNLOCK_ONE(NULL
);
722 mutex_exit(softnet_lock
);
726 * Drain off all datagram fragments.
732 KERNEL_LOCK(1, NULL
);
733 if (ip6q_lock_try() != 0) {
734 while (ip6q
.ip6q_next
!= &ip6q
) {
735 IP6_STATINC(IP6_STAT_FRAGDROPPED
);
736 /* XXX in6_ifstat_inc(ifp, ifs6_reass_fail) */
737 frag6_freef(ip6q
.ip6q_next
);
741 KERNEL_UNLOCK_ONE(NULL
);