1 /* $NetBSD: uipc_mbuf.c,v 1.131 2009/03/15 17:14:40 cegger Exp $ */
4 * Copyright (c) 1999, 2001 The NetBSD Foundation, Inc.
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center.
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
34 * Copyright (c) 1982, 1986, 1988, 1991, 1993
35 * The Regents of the University of California. All rights reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. Neither the name of the University nor the names of its contributors
46 * may be used to endorse or promote products derived from this software
47 * without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
50 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
53 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
54 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
55 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
56 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
57 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
58 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
61 * @(#)uipc_mbuf.c 8.4 (Berkeley) 2/14/95
64 #include <sys/cdefs.h>
65 __KERNEL_RCSID(0, "$NetBSD: uipc_mbuf.c,v 1.131 2009/03/15 17:14:40 cegger Exp $");
67 #include "opt_mbuftrace.h"
70 #include <sys/param.h>
71 #include <sys/systm.h>
72 #include <sys/atomic.h>
75 #include <sys/malloc.h>
78 #include <sys/kernel.h>
79 #include <sys/syslog.h>
80 #include <sys/domain.h>
81 #include <sys/protosw.h>
82 #include <sys/percpu.h>
84 #include <sys/socket.h>
85 #include <sys/sysctl.h>
91 pool_cache_t mb_cache
; /* mbuf cache */
92 pool_cache_t mcl_cache
; /* mbuf cluster cache */
100 static int mb_ctor(void *, void *, int);
102 static void *mclpool_alloc(struct pool
*, int);
103 static void mclpool_release(struct pool
*, void *);
105 static void sysctl_kern_mbuf_setup(void);
107 static struct sysctllog
*mbuf_sysctllog
;
109 static struct pool_allocator mclpool_allocator
= {
110 .pa_alloc
= mclpool_alloc
,
111 .pa_free
= mclpool_release
,
114 static struct mbuf
*m_copym0(struct mbuf
*, int, int, int, int);
115 static struct mbuf
*m_split0(struct mbuf
*, int, int, int);
116 static int m_copyback0(struct mbuf
**, int, int, const void *, int, int);
118 /* flags for m_copyback0 */
119 #define M_COPYBACK0_COPYBACK 0x0001 /* copyback from cp */
120 #define M_COPYBACK0_PRESERVE 0x0002 /* preserve original data */
121 #define M_COPYBACK0_COW 0x0004 /* do copy-on-write */
122 #define M_COPYBACK0_EXTEND 0x0008 /* extend chain */
124 static const char mclpool_warnmsg
[] =
125 "WARNING: mclpool limit reached; increase NMBCLUSTERS";
127 MALLOC_DEFINE(M_MBUF
, "mbuf", "mbuf");
129 static percpu_t
*mbstat_percpu
;
132 struct mownerhead mowners
= LIST_HEAD_INITIALIZER(mowners
);
133 struct mowner unknown_mowners
[] = {
134 MOWNER_INIT("unknown", "free"),
135 MOWNER_INIT("unknown", "data"),
136 MOWNER_INIT("unknown", "header"),
137 MOWNER_INIT("unknown", "soname"),
138 MOWNER_INIT("unknown", "soopts"),
139 MOWNER_INIT("unknown", "ftable"),
140 MOWNER_INIT("unknown", "control"),
141 MOWNER_INIT("unknown", "oobdata"),
143 struct mowner revoked_mowner
= MOWNER_INIT("revoked", "");
146 #define MEXT_ISEMBEDDED(m) ((m)->m_ext_ref == (m))
148 #define MCLADDREFERENCE(o, n) \
150 KASSERT(((o)->m_flags & M_EXT) != 0); \
151 KASSERT(((n)->m_flags & M_EXT) == 0); \
152 KASSERT((o)->m_ext.ext_refcnt >= 1); \
153 (n)->m_flags |= ((o)->m_flags & M_EXTCOPYFLAGS); \
154 atomic_inc_uint(&(o)->m_ext.ext_refcnt); \
155 (n)->m_ext_ref = (o)->m_ext_ref; \
156 mowner_ref((n), (n)->m_flags); \
157 MCLREFDEBUGN((n), __FILE__, __LINE__); \
158 } while (/* CONSTCOND */ 0)
161 * Initialize the mbuf allocator.
167 CTASSERT(sizeof(struct _m_ext
) <= MHLEN
);
168 CTASSERT(sizeof(struct mbuf
) == MSIZE
);
170 sysctl_kern_mbuf_setup();
172 mclpool_allocator
.pa_backingmap
= mb_map
;
174 mb_cache
= pool_cache_init(msize
, 0, 0, 0, "mbpl",
175 NULL
, IPL_VM
, mb_ctor
, NULL
, NULL
);
176 KASSERT(mb_cache
!= NULL
);
178 mcl_cache
= pool_cache_init(mclbytes
, 0, 0, 0, "mclpl",
179 &mclpool_allocator
, IPL_VM
, NULL
, NULL
, NULL
);
180 KASSERT(mcl_cache
!= NULL
);
182 pool_cache_set_drain_hook(mb_cache
, m_reclaim
, NULL
);
183 pool_cache_set_drain_hook(mcl_cache
, m_reclaim
, NULL
);
186 * Set the hard limit on the mclpool to the number of
187 * mbuf clusters the kernel is to support. Log the limit
188 * reached message max once a minute.
190 pool_cache_sethardlimit(mcl_cache
, nmbclusters
, mclpool_warnmsg
, 60);
192 mbstat_percpu
= percpu_alloc(sizeof(struct mbstat_cpu
));
195 * Set a low water mark for both mbufs and clusters. This should
196 * help ensure that they can be allocated in a memory starvation
197 * situation. This is important for e.g. diskless systems which
198 * must allocate mbufs in order for the pagedaemon to clean pages.
200 pool_cache_setlowat(mb_cache
, mblowat
);
201 pool_cache_setlowat(mcl_cache
, mcllowat
);
206 * Attach the unknown mowners.
209 MOWNER_ATTACH(&revoked_mowner
);
210 for (i
= sizeof(unknown_mowners
)/sizeof(unknown_mowners
[0]);
212 MOWNER_ATTACH(&unknown_mowners
[i
]);
218 * sysctl helper routine for the kern.mbuf subtree. nmbclusters may
219 * or may not be writable, and mblowat and mcllowat need range
220 * checking and pool tweaking after being reset.
223 sysctl_kern_mbuf(SYSCTLFN_ARGS
)
226 struct sysctlnode node
;
229 node
.sysctl_data
= &newval
;
230 switch (rnode
->sysctl_num
) {
231 case MBUF_NMBCLUSTERS
:
232 if (mb_map
!= NULL
) {
233 node
.sysctl_flags
&= ~CTLFLAG_READWRITE
;
234 node
.sysctl_flags
|= CTLFLAG_READONLY
;
239 newval
= *(int*)rnode
->sysctl_data
;
245 error
= sysctl_lookup(SYSCTLFN_CALL(&node
));
246 if (error
|| newp
== NULL
)
251 switch (node
.sysctl_num
) {
252 case MBUF_NMBCLUSTERS
:
253 if (newval
< nmbclusters
)
255 nmbclusters
= newval
;
256 pool_cache_sethardlimit(mcl_cache
, nmbclusters
,
257 mclpool_warnmsg
, 60);
261 pool_cache_setlowat(mb_cache
, mblowat
);
265 pool_cache_setlowat(mcl_cache
, mcllowat
);
274 mowner_conver_to_user_cb(void *v1
, void *v2
, struct cpu_info
*ci
)
276 struct mowner_counter
*mc
= v1
;
277 struct mowner_user
*mo_user
= v2
;
280 for (i
= 0; i
< MOWNER_COUNTER_NCOUNTERS
; i
++) {
281 mo_user
->mo_counter
[i
] += mc
->mc_counter
[i
];
286 mowner_convert_to_user(struct mowner
*mo
, struct mowner_user
*mo_user
)
289 memset(mo_user
, 0, sizeof(*mo_user
));
290 CTASSERT(sizeof(mo_user
->mo_name
) == sizeof(mo
->mo_name
));
291 CTASSERT(sizeof(mo_user
->mo_descr
) == sizeof(mo
->mo_descr
));
292 memcpy(mo_user
->mo_name
, mo
->mo_name
, sizeof(mo
->mo_name
));
293 memcpy(mo_user
->mo_descr
, mo
->mo_descr
, sizeof(mo
->mo_descr
));
294 percpu_foreach(mo
->mo_counters
, mowner_conver_to_user_cb
, mo_user
);
298 sysctl_kern_mbuf_mowners(SYSCTLFN_ARGS
)
309 LIST_FOREACH(mo
, &mowners
, mo_link
) {
310 struct mowner_user mo_user
;
312 mowner_convert_to_user(mo
, &mo_user
);
315 if (*oldlenp
- len
< sizeof(mo_user
)) {
319 error
= copyout(&mo_user
, (char *)oldp
+ len
,
324 len
+= sizeof(mo_user
);
332 #endif /* MBUFTRACE */
335 mbstat_conver_to_user_cb(void *v1
, void *v2
, struct cpu_info
*ci
)
337 struct mbstat_cpu
*mbsc
= v1
;
338 struct mbstat
*mbs
= v2
;
341 for (i
= 0; i
< __arraycount(mbs
->m_mtypes
); i
++) {
342 mbs
->m_mtypes
[i
] += mbsc
->m_mtypes
[i
];
347 mbstat_convert_to_user(struct mbstat
*mbs
)
350 memset(mbs
, 0, sizeof(*mbs
));
351 mbs
->m_drain
= mbstat
.m_drain
;
352 percpu_foreach(mbstat_percpu
, mbstat_conver_to_user_cb
, mbs
);
356 sysctl_kern_mbuf_stats(SYSCTLFN_ARGS
)
358 struct sysctlnode node
;
361 mbstat_convert_to_user(&mbs
);
363 node
.sysctl_data
= &mbs
;
364 node
.sysctl_size
= sizeof(mbs
);
365 return sysctl_lookup(SYSCTLFN_CALL(&node
));
369 sysctl_kern_mbuf_setup(void)
372 KASSERT(mbuf_sysctllog
== NULL
);
373 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
375 CTLTYPE_NODE
, "kern", NULL
,
378 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
380 CTLTYPE_NODE
, "mbuf",
381 SYSCTL_DESCR("mbuf control variables"),
383 CTL_KERN
, KERN_MBUF
, CTL_EOL
);
385 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
386 CTLFLAG_PERMANENT
|CTLFLAG_IMMEDIATE
,
387 CTLTYPE_INT
, "msize",
388 SYSCTL_DESCR("mbuf base size"),
389 NULL
, msize
, NULL
, 0,
390 CTL_KERN
, KERN_MBUF
, MBUF_MSIZE
, CTL_EOL
);
391 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
392 CTLFLAG_PERMANENT
|CTLFLAG_IMMEDIATE
,
393 CTLTYPE_INT
, "mclbytes",
394 SYSCTL_DESCR("mbuf cluster size"),
395 NULL
, mclbytes
, NULL
, 0,
396 CTL_KERN
, KERN_MBUF
, MBUF_MCLBYTES
, CTL_EOL
);
397 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
398 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
399 CTLTYPE_INT
, "nmbclusters",
400 SYSCTL_DESCR("Limit on the number of mbuf clusters"),
401 sysctl_kern_mbuf
, 0, &nmbclusters
, 0,
402 CTL_KERN
, KERN_MBUF
, MBUF_NMBCLUSTERS
, CTL_EOL
);
403 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
404 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
405 CTLTYPE_INT
, "mblowat",
406 SYSCTL_DESCR("mbuf low water mark"),
407 sysctl_kern_mbuf
, 0, &mblowat
, 0,
408 CTL_KERN
, KERN_MBUF
, MBUF_MBLOWAT
, CTL_EOL
);
409 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
410 CTLFLAG_PERMANENT
|CTLFLAG_READWRITE
,
411 CTLTYPE_INT
, "mcllowat",
412 SYSCTL_DESCR("mbuf cluster low water mark"),
413 sysctl_kern_mbuf
, 0, &mcllowat
, 0,
414 CTL_KERN
, KERN_MBUF
, MBUF_MCLLOWAT
, CTL_EOL
);
415 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
417 CTLTYPE_STRUCT
, "stats",
418 SYSCTL_DESCR("mbuf allocation statistics"),
419 sysctl_kern_mbuf_stats
, 0, NULL
, 0,
420 CTL_KERN
, KERN_MBUF
, MBUF_STATS
, CTL_EOL
);
422 sysctl_createv(&mbuf_sysctllog
, 0, NULL
, NULL
,
424 CTLTYPE_STRUCT
, "mowners",
425 SYSCTL_DESCR("Information about mbuf owners"),
426 sysctl_kern_mbuf_mowners
, 0, NULL
, 0,
427 CTL_KERN
, KERN_MBUF
, MBUF_MOWNERS
, CTL_EOL
);
428 #endif /* MBUFTRACE */
432 mclpool_alloc(struct pool
*pp
, int flags
)
434 bool waitok
= (flags
& PR_WAITOK
) ? true : false;
436 return ((void *)uvm_km_alloc_poolpage(mb_map
, waitok
));
440 mclpool_release(struct pool
*pp
, void *v
)
443 uvm_km_free_poolpage(mb_map
, (vaddr_t
)v
);
448 mb_ctor(void *arg
, void *object
, int flags
)
450 struct mbuf
*m
= object
;
453 m
->m_paddr
= POOL_VTOPHYS(m
);
455 m
->m_paddr
= M_PADDR_INVALID
;
461 m_reclaim(void *arg
, int flags
)
464 const struct protosw
*pr
;
468 KERNEL_LOCK(1, NULL
);
471 for (pr
= dp
->dom_protosw
;
472 pr
< dp
->dom_protoswNPROTOSW
; pr
++)
478 (*ifp
->if_drain
)(ifp
);
482 KERNEL_UNLOCK_ONE(NULL
);
486 * Space allocation routines.
487 * These are also available as macros
488 * for critical paths.
491 m_get(int nowait
, int type
)
495 m
= pool_cache_get(mb_cache
,
496 nowait
== M_WAIT
? PR_WAITOK
|PR_LIMITFAIL
: 0);
500 mbstat_type_add(type
, 1);
501 mowner_init(m
, type
);
506 m
->m_data
= m
->m_dat
;
513 m_gethdr(int nowait
, int type
)
517 m
= m_get(nowait
, type
);
521 m
->m_data
= m
->m_pktdat
;
522 m
->m_flags
= M_PKTHDR
;
523 m
->m_pkthdr
.rcvif
= NULL
;
524 m
->m_pkthdr
.csum_flags
= 0;
525 m
->m_pkthdr
.csum_data
= 0;
526 SLIST_INIT(&m
->m_pkthdr
.tags
);
532 m_getclr(int nowait
, int type
)
536 MGET(m
, nowait
, type
);
539 memset(mtod(m
, void *), 0, MLEN
);
544 m_clget(struct mbuf
*m
, int nowait
)
551 m_free(struct mbuf
*m
)
560 m_freem(struct mbuf
*m
)
574 * Walk a chain of mbufs, claiming ownership of each mbuf in the chain.
577 m_claimm(struct mbuf
*m
, struct mowner
*mo
)
580 for (; m
!= NULL
; m
= m
->m_next
)
586 * Mbuffer utility routines.
590 * Lesser-used path for M_PREPEND:
591 * allocate new mbuf to prepend to chain,
595 m_prepend(struct mbuf
*m
, int len
, int how
)
599 MGET(mn
, how
, m
->m_type
);
600 if (mn
== (struct mbuf
*)NULL
) {
602 return ((struct mbuf
*)NULL
);
604 if (m
->m_flags
& M_PKTHDR
) {
605 M_MOVE_PKTHDR(mn
, m
);
607 MCLAIM(mn
, m
->m_owner
);
618 * Make a copy of an mbuf chain starting "off0" bytes from the beginning,
619 * continuing for "len" bytes. If len is M_COPYALL, copy to end of mbuf.
620 * The wait parameter is a choice of M_WAIT/M_DONTWAIT from caller.
625 m_copym(struct mbuf
*m
, int off0
, int len
, int wait
)
628 return m_copym0(m
, off0
, len
, wait
, 0); /* shallow copy on M_EXT */
632 m_dup(struct mbuf
*m
, int off0
, int len
, int wait
)
635 return m_copym0(m
, off0
, len
, wait
, 1); /* deep copy */
639 m_copym0(struct mbuf
*m
, int off0
, int len
, int wait
, int deep
)
641 struct mbuf
*n
, **np
;
646 if (off
< 0 || len
< 0)
647 panic("m_copym: off %d, len %d", off
, len
);
648 if (off
== 0 && m
->m_flags
& M_PKTHDR
)
652 panic("m_copym: m == 0, off %d", off
);
662 if (len
!= M_COPYALL
)
663 panic("m_copym: m == 0, len %d [!COPYALL]",
667 MGET(n
, wait
, m
->m_type
);
671 MCLAIM(n
, m
->m_owner
);
674 if (len
== M_COPYALL
)
675 n
->m_pkthdr
.len
-= off0
;
677 n
->m_pkthdr
.len
= len
;
680 n
->m_len
= min(len
, m
->m_len
- off
);
681 if (m
->m_flags
& M_EXT
) {
683 n
->m_data
= m
->m_data
+ off
;
684 MCLADDREFERENCE(m
, n
);
687 * we are unsure about the way m was allocated.
688 * copy into multiple MCLBYTES cluster mbufs.
692 n
->m_len
= M_TRAILINGSPACE(n
);
693 n
->m_len
= min(n
->m_len
, len
);
694 n
->m_len
= min(n
->m_len
, m
->m_len
- off
);
695 memcpy(mtod(n
, void *), mtod(m
, char *) + off
,
699 memcpy(mtod(n
, void *), mtod(m
, char *) + off
,
701 if (len
!= M_COPYALL
)
706 panic("m_copym0 overrun");
708 if (off
== m
->m_len
) {
724 * Copy an entire packet, including header (which must be present).
725 * An optimization of the common case `m_copym(m, 0, M_COPYALL, how)'.
728 m_copypacket(struct mbuf
*m
, int how
)
730 struct mbuf
*top
, *n
, *o
;
732 MGET(n
, how
, m
->m_type
);
737 MCLAIM(n
, m
->m_owner
);
740 if (m
->m_flags
& M_EXT
) {
741 n
->m_data
= m
->m_data
;
742 MCLADDREFERENCE(m
, n
);
744 memcpy(mtod(n
, char *), mtod(m
, char *), n
->m_len
);
749 MGET(o
, how
, m
->m_type
);
753 MCLAIM(o
, m
->m_owner
);
758 if (m
->m_flags
& M_EXT
) {
759 n
->m_data
= m
->m_data
;
760 MCLADDREFERENCE(m
, n
);
762 memcpy(mtod(n
, char *), mtod(m
, char *), n
->m_len
);
775 * Copy data from an mbuf chain starting "off" bytes from the beginning,
776 * continuing for "len" bytes, into the indicated buffer.
779 m_copydata(struct mbuf
*m
, int off
, int len
, void *vp
)
784 if (off
< 0 || len
< 0)
785 panic("m_copydata: off %d, len %d", off
, len
);
788 panic("m_copydata: m == NULL, off %d", off
);
796 panic("m_copydata: m == NULL, len %d", len
);
797 count
= min(m
->m_len
- off
, len
);
798 memcpy(cp
, mtod(m
, char *) + off
, count
);
800 cp
= (char *)cp
+ count
;
807 * Concatenate mbuf chain n to m.
808 * n might be copied into m (when n->m_len is small), therefore data portion of
809 * n could be copied into an mbuf of different mbuf type.
810 * Any m_pkthdr is not updated.
813 m_cat(struct mbuf
*m
, struct mbuf
*n
)
819 if (M_READONLY(m
) || n
->m_len
> M_TRAILINGSPACE(m
)) {
820 /* just join the two chains */
824 /* splat the data from one into the other */
825 memcpy(mtod(m
, char *) + m
->m_len
, mtod(n
, void *),
827 m
->m_len
+= n
->m_len
;
833 m_adj(struct mbuf
*mp
, int req_len
)
839 if ((m
= mp
) == NULL
)
845 while (m
!= NULL
&& len
> 0) {
846 if (m
->m_len
<= len
) {
857 if (mp
->m_flags
& M_PKTHDR
)
858 m
->m_pkthdr
.len
-= (req_len
- len
);
861 * Trim from tail. Scan the mbuf chain,
862 * calculating its length and finding the last mbuf.
863 * If the adjustment only affects this mbuf, then just
864 * adjust and return. Otherwise, rescan and truncate
865 * after the remaining size.
871 if (m
->m_next
== (struct mbuf
*)0)
875 if (m
->m_len
>= len
) {
877 if (mp
->m_flags
& M_PKTHDR
)
878 mp
->m_pkthdr
.len
-= len
;
885 * Correct length for chain is "count".
886 * Find the mbuf with last data, adjust its length,
887 * and toss data from remaining mbufs on chain.
890 if (m
->m_flags
& M_PKTHDR
)
891 m
->m_pkthdr
.len
= count
;
892 for (; m
; m
= m
->m_next
) {
893 if (m
->m_len
>= count
) {
901 (m
= m
->m_next
)->m_len
= 0;
906 * Rearrange an mbuf chain so that len bytes are contiguous
907 * and in the data area of an mbuf (so that mtod and dtom
908 * will work for a structure of size len). Returns the resulting
909 * mbuf chain on success, frees it and returns null on failure.
910 * If there is room, it will add up to max_protohdr-len extra bytes to the
911 * contiguous region in an attempt to avoid being called next time.
916 m_pullup(struct mbuf
*n
, int len
)
923 * If first mbuf has no cluster, and has room for len bytes
924 * without shifting current data, pullup into it,
925 * otherwise allocate a new mbuf to prepend to the chain.
927 if ((n
->m_flags
& M_EXT
) == 0 &&
928 n
->m_data
+ len
< &n
->m_dat
[MLEN
] && n
->m_next
) {
937 MGET(m
, M_DONTWAIT
, n
->m_type
);
940 MCLAIM(m
, n
->m_owner
);
942 if (n
->m_flags
& M_PKTHDR
) {
946 space
= &m
->m_dat
[MLEN
] - (m
->m_data
+ m
->m_len
);
948 count
= min(min(max(len
, max_protohdr
), space
), n
->m_len
);
949 memcpy(mtod(m
, char *) + m
->m_len
, mtod(n
, void *),
959 } while (len
> 0 && n
);
973 * Like m_pullup(), except a new mbuf is always allocated, and we allow
974 * the amount of empty space before the data in the new mbuf to be specified
975 * (in the event that the caller expects to prepend later).
980 m_copyup(struct mbuf
*n
, int len
, int dstoff
)
985 if (len
> (MHLEN
- dstoff
))
987 MGET(m
, M_DONTWAIT
, n
->m_type
);
990 MCLAIM(m
, n
->m_owner
);
992 if (n
->m_flags
& M_PKTHDR
) {
996 space
= &m
->m_dat
[MLEN
] - (m
->m_data
+ m
->m_len
);
998 count
= min(min(max(len
, max_protohdr
), space
), n
->m_len
);
999 memcpy(mtod(m
, char *) + m
->m_len
, mtod(n
, void *),
1009 } while (len
> 0 && n
);
1023 * Partition an mbuf chain in two pieces, returning the tail --
1024 * all but the first len0 bytes. In case of failure, it returns NULL and
1025 * attempts to restore the chain to its original state.
1028 m_split(struct mbuf
*m0
, int len0
, int wait
)
1031 return m_split0(m0
, len0
, wait
, 1);
1034 static struct mbuf
*
1035 m_split0(struct mbuf
*m0
, int len0
, int wait
, int copyhdr
)
1038 unsigned len
= len0
, remain
, len_save
;
1040 for (m
= m0
; m
&& len
> m
->m_len
; m
= m
->m_next
)
1044 remain
= m
->m_len
- len
;
1045 if (copyhdr
&& (m0
->m_flags
& M_PKTHDR
)) {
1046 MGETHDR(n
, wait
, m0
->m_type
);
1049 MCLAIM(n
, m0
->m_owner
);
1050 n
->m_pkthdr
.rcvif
= m0
->m_pkthdr
.rcvif
;
1051 n
->m_pkthdr
.len
= m0
->m_pkthdr
.len
- len0
;
1052 len_save
= m0
->m_pkthdr
.len
;
1053 m0
->m_pkthdr
.len
= len0
;
1054 if (m
->m_flags
& M_EXT
)
1056 if (remain
> MHLEN
) {
1057 /* m can't be the lead packet */
1060 n
->m_next
= m_split(m
, len
, wait
);
1061 if (n
->m_next
== 0) {
1063 m0
->m_pkthdr
.len
= len_save
;
1068 MH_ALIGN(n
, remain
);
1069 } else if (remain
== 0) {
1074 MGET(n
, wait
, m
->m_type
);
1077 MCLAIM(n
, m
->m_owner
);
1081 if (m
->m_flags
& M_EXT
) {
1082 n
->m_data
= m
->m_data
+ len
;
1083 MCLADDREFERENCE(m
, n
);
1085 memcpy(mtod(n
, void *), mtod(m
, char *) + len
, remain
);
1089 n
->m_next
= m
->m_next
;
1094 * Routine to copy from device local memory into mbufs.
1097 m_devget(char *buf
, int totlen
, int off0
, struct ifnet
*ifp
,
1098 void (*copy
)(const void *from
, void *to
, size_t len
))
1101 struct mbuf
*top
= 0, **mp
= &top
;
1102 int off
= off0
, len
;
1110 * If 'off' is non-zero, packet is trailer-encapsulated,
1111 * so we have to skip the type and length fields.
1113 cp
+= off
+ 2 * sizeof(uint16_t);
1114 totlen
-= 2 * sizeof(uint16_t);
1116 MGETHDR(m
, M_DONTWAIT
, MT_DATA
);
1119 m
->m_pkthdr
.rcvif
= ifp
;
1120 m
->m_pkthdr
.len
= totlen
;
1123 while (totlen
> 0) {
1125 MGET(m
, M_DONTWAIT
, MT_DATA
);
1132 len
= min(totlen
, epkt
- cp
);
1133 if (len
>= MINCLSIZE
) {
1134 MCLGET(m
, M_DONTWAIT
);
1135 if ((m
->m_flags
& M_EXT
) == 0) {
1140 m
->m_len
= len
= min(len
, MCLBYTES
);
1143 * Place initial small packet/header at end of mbuf.
1145 if (len
< m
->m_len
) {
1146 if (top
== 0 && len
+ max_linkhdr
<= m
->m_len
)
1147 m
->m_data
+= max_linkhdr
;
1153 copy(cp
, mtod(m
, void *), (size_t)len
);
1155 memcpy(mtod(m
, void *), cp
, (size_t)len
);
1167 * Copy data from a buffer back into the indicated mbuf chain,
1168 * starting "off" bytes from the beginning, extending the mbuf
1169 * chain if necessary.
1172 m_copyback(struct mbuf
*m0
, int off
, int len
, const void *cp
)
1175 struct mbuf
*origm
= m0
;
1177 #endif /* defined(DEBUG) */
1184 #endif /* defined(DEBUG) */
1185 m_copyback0(&m0
, off
, len
, cp
,
1186 M_COPYBACK0_COPYBACK
|M_COPYBACK0_EXTEND
, M_DONTWAIT
);
1189 if (error
!= 0 || (m0
!= NULL
&& origm
!= m0
))
1190 panic("m_copyback");
1191 #endif /* defined(DEBUG) */
1195 m_copyback_cow(struct mbuf
*m0
, int off
, int len
, const void *cp
, int how
)
1199 /* don't support chain expansion */
1200 KDASSERT(off
+ len
<= m_length(m0
));
1202 error
= m_copyback0(&m0
, off
, len
, cp
,
1203 M_COPYBACK0_COPYBACK
|M_COPYBACK0_COW
, how
);
1206 * no way to recover from partial success.
1207 * just free the chain.
1216 * m_makewritable: ensure the specified range writable.
1219 m_makewritable(struct mbuf
**mp
, int off
, int len
, int how
)
1224 int origlen
, reslen
;
1226 origlen
= m_length(*mp
);
1227 #endif /* defined(DEBUG) */
1229 #if 0 /* M_COPYALL is large enough */
1230 if (len
== M_COPYALL
)
1231 len
= m_length(*mp
) - off
; /* XXX */
1234 error
= m_copyback0(mp
, off
, len
, NULL
,
1235 M_COPYBACK0_PRESERVE
|M_COPYBACK0_COW
, how
);
1239 for (n
= *mp
; n
; n
= n
->m_next
)
1241 if (origlen
!= reslen
)
1242 panic("m_makewritable: length changed");
1243 if (((*mp
)->m_flags
& M_PKTHDR
) != 0 && reslen
!= (*mp
)->m_pkthdr
.len
)
1244 panic("m_makewritable: inconsist");
1245 #endif /* defined(DEBUG) */
1251 m_copyback0(struct mbuf
**mp0
, int off
, int len
, const void *vp
, int flags
,
1258 const char *cp
= vp
;
1260 KASSERT(mp0
!= NULL
);
1261 KASSERT(*mp0
!= NULL
);
1262 KASSERT((flags
& M_COPYBACK0_PRESERVE
) == 0 || cp
== NULL
);
1263 KASSERT((flags
& M_COPYBACK0_COPYBACK
) == 0 || cp
!= NULL
);
1266 * we don't bother to update "totlen" in the case of M_COPYBACK0_COW,
1267 * assuming that M_COPYBACK0_EXTEND and M_COPYBACK0_COW are exclusive.
1270 KASSERT((~flags
& (M_COPYBACK0_EXTEND
|M_COPYBACK0_COW
)) != 0);
1274 while (off
> (mlen
= m
->m_len
)) {
1277 if (m
->m_next
== NULL
) {
1280 if ((flags
& M_COPYBACK0_EXTEND
) == 0)
1284 * try to make some space at the end of "m".
1288 if (off
+ len
>= MINCLSIZE
&&
1289 (m
->m_flags
& M_EXT
) == 0 && m
->m_len
== 0) {
1292 tspace
= M_TRAILINGSPACE(m
);
1294 tspace
= min(tspace
, off
+ len
);
1295 KASSERT(tspace
> 0);
1296 memset(mtod(m
, char *) + m
->m_len
, 0,
1305 * need to allocate an mbuf.
1308 if (off
+ len
>= MINCLSIZE
) {
1309 n
= m_getcl(how
, m
->m_type
, 0);
1311 n
= m_get(how
, m
->m_type
);
1317 n
->m_len
= min(M_TRAILINGSPACE(n
), off
+ len
);
1318 memset(mtod(n
, char *), 0, min(n
->m_len
, off
));
1325 mlen
= m
->m_len
- off
;
1326 if (mlen
!= 0 && M_READONLY(m
)) {
1331 * this mbuf is read-only.
1332 * allocate a new writable mbuf and try again.
1335 #if defined(DIAGNOSTIC)
1336 if ((flags
& M_COPYBACK0_COW
) == 0)
1337 panic("m_copyback0: read-only");
1338 #endif /* defined(DIAGNOSTIC) */
1341 * if we're going to write into the middle of
1342 * a mbuf, split it first.
1344 if (off
> 0 && len
< mlen
) {
1345 n
= m_split0(m
, off
, how
, 0);
1356 * XXX TODO coalesce into the trailingspace of
1357 * the previous mbuf when possible.
1361 * allocate a new mbuf. copy packet header if needed.
1363 MGET(n
, how
, m
->m_type
);
1366 MCLAIM(n
, m
->m_owner
);
1367 if (off
== 0 && (m
->m_flags
& M_PKTHDR
) != 0) {
1368 M_MOVE_PKTHDR(n
, m
);
1371 if (len
>= MINCLSIZE
)
1372 MCLGET(n
, M_DONTWAIT
);
1374 (n
->m_flags
& M_EXT
) ? MCLBYTES
: MLEN
;
1380 * free the region which has been overwritten.
1381 * copying data from old mbufs if requested.
1383 if (flags
& M_COPYBACK0_PRESERVE
)
1384 datap
= mtod(n
, char *);
1388 KDASSERT(off
== 0 || eatlen
>= mlen
);
1390 KDASSERT(len
>= mlen
);
1394 m_copydata(m
, off
, mlen
, datap
);
1401 while (m
!= NULL
&& M_READONLY(m
) &&
1402 n
->m_type
== m
->m_type
&& eatlen
> 0) {
1403 mlen
= min(eatlen
, m
->m_len
);
1405 m_copydata(m
, 0, mlen
, datap
);
1412 *mp
= m
= m_free(m
);
1420 mlen
= min(mlen
, len
);
1421 if (flags
& M_COPYBACK0_COPYBACK
) {
1422 memcpy(mtod(m
, char *) + off
, cp
, (unsigned)mlen
);
1431 if (m
->m_next
== NULL
) {
1437 out
: if (((m
= *mp0
)->m_flags
& M_PKTHDR
) && (m
->m_pkthdr
.len
< totlen
)) {
1438 KASSERT((flags
& M_COPYBACK0_EXTEND
) != 0);
1439 m
->m_pkthdr
.len
= totlen
;
1449 m_move_pkthdr(struct mbuf
*to
, struct mbuf
*from
)
1452 KASSERT((to
->m_flags
& M_EXT
) == 0);
1453 KASSERT((to
->m_flags
& M_PKTHDR
) == 0 || m_tag_first(to
) == NULL
);
1454 KASSERT((from
->m_flags
& M_PKTHDR
) != 0);
1456 to
->m_pkthdr
= from
->m_pkthdr
;
1457 to
->m_flags
= from
->m_flags
& M_COPYFLAGS
;
1458 to
->m_data
= to
->m_pktdat
;
1460 from
->m_flags
&= ~M_PKTHDR
;
1464 * Apply function f to the data in an mbuf chain starting "off" bytes from the
1465 * beginning, continuing for "len" bytes.
1468 m_apply(struct mbuf
*m
, int off
, int len
,
1469 int (*f
)(void *, void *, unsigned int), void *arg
)
1486 count
= min(m
->m_len
- off
, len
);
1488 rval
= (*f
)(arg
, mtod(m
, char *) + off
, count
);
1501 * Return a pointer to mbuf/offset of location in mbuf chain.
1504 m_getptr(struct mbuf
*m
, int loc
, int *off
)
1508 /* Normal end of search */
1509 if (m
->m_len
> loc
) {
1515 if (m
->m_next
== NULL
) {
1517 /* Point at the end of valid data */
1531 * m_ext_free: release a reference to the mbuf external storage.
1533 * => free the mbuf m itsself as well.
1537 m_ext_free(struct mbuf
*m
)
1539 bool embedded
= MEXT_ISEMBEDDED(m
);
1543 KASSERT((m
->m_flags
& M_EXT
) != 0);
1544 KASSERT(MEXT_ISEMBEDDED(m
->m_ext_ref
));
1545 KASSERT((m
->m_ext_ref
->m_flags
& M_EXT
) != 0);
1546 KASSERT((m
->m_flags
& M_EXT_CLUSTER
) ==
1547 (m
->m_ext_ref
->m_flags
& M_EXT_CLUSTER
));
1549 if (__predict_true(m
->m_ext
.ext_refcnt
== 1)) {
1550 refcnt
= m
->m_ext
.ext_refcnt
= 0;
1552 refcnt
= atomic_dec_uint_nv(&m
->m_ext
.ext_refcnt
);
1557 * other mbuf's m_ext_ref still points to us.
1565 * dropping the last reference
1568 m
->m_ext
.ext_refcnt
++; /* XXX */
1569 m_ext_free(m
->m_ext_ref
);
1571 } else if ((m
->m_flags
& M_EXT_CLUSTER
) != 0) {
1572 pool_cache_put_paddr((struct pool_cache
*)
1574 m
->m_ext
.ext_buf
, m
->m_ext
.ext_paddr
);
1575 } else if (m
->m_ext
.ext_free
) {
1576 (*m
->m_ext
.ext_free
)(m
,
1577 m
->m_ext
.ext_buf
, m
->m_ext
.ext_size
,
1580 * 'm' is already freed by the ext_free callback.
1584 free(m
->m_ext
.ext_buf
, m
->m_ext
.ext_type
);
1588 pool_cache_put(mb_cache
, m
);
1594 m_print(const struct mbuf
*m
, const char *modif
, void (*pr
)(const char *, ...))
1600 while ((ch
= *(modif
++)) != '\0') {
1609 (*pr
)("MBUF %p\n", m
);
1610 snprintb(buf
, sizeof(buf
), M_FLAGS_BITS
, (u_int
)m
->m_flags
);
1611 (*pr
)(" data=%p, len=%d, type=%d, flags=0x%s\n",
1612 m
->m_data
, m
->m_len
, m
->m_type
, buf
);
1613 (*pr
)(" owner=%p, next=%p, nextpkt=%p\n", m
->m_owner
, m
->m_next
,
1615 (*pr
)(" leadingspace=%u, trailingspace=%u, readonly=%u\n",
1616 (int)M_LEADINGSPACE(m
), (int)M_TRAILINGSPACE(m
),
1617 (int)M_READONLY(m
));
1618 if ((m
->m_flags
& M_PKTHDR
) != 0) {
1619 snprintb(buf
, sizeof(buf
), M_CSUM_BITS
, m
->m_pkthdr
.csum_flags
);
1620 (*pr
)(" pktlen=%d, rcvif=%p, csum_flags=0x%s, csum_data=0x%"
1621 PRIx32
", segsz=%u\n",
1622 m
->m_pkthdr
.len
, m
->m_pkthdr
.rcvif
,
1623 buf
, m
->m_pkthdr
.csum_data
, m
->m_pkthdr
.segsz
);
1625 if ((m
->m_flags
& M_EXT
)) {
1626 (*pr
)(" ext_refcnt=%u, ext_buf=%p, ext_size=%zd, "
1627 "ext_free=%p, ext_arg=%p\n",
1628 m
->m_ext
.ext_refcnt
,
1629 m
->m_ext
.ext_buf
, m
->m_ext
.ext_size
,
1630 m
->m_ext
.ext_free
, m
->m_ext
.ext_arg
);
1632 if ((~m
->m_flags
& (M_EXT
|M_EXT_PAGES
)) == 0) {
1633 vaddr_t sva
= (vaddr_t
)m
->m_ext
.ext_buf
;
1634 vaddr_t eva
= sva
+ m
->m_ext
.ext_size
;
1635 int n
= (round_page(eva
) - trunc_page(sva
)) >> PAGE_SHIFT
;
1639 for (i
= 0; i
< n
; i
++) {
1640 (*pr
)(" %p", m
->m_ext
.ext_pgs
[i
]);
1652 #endif /* defined(DDB) */
1655 mbstat_type_add(int type
, int diff
)
1657 struct mbstat_cpu
*mb
;
1661 mb
= percpu_getref(mbstat_percpu
);
1662 mb
->m_mtypes
[type
] += diff
;
1663 percpu_putref(mbstat_percpu
);
1667 #if defined(MBUFTRACE)
1669 mowner_attach(struct mowner
*mo
)
1672 KASSERT(mo
->mo_counters
== NULL
);
1673 mo
->mo_counters
= percpu_alloc(sizeof(struct mowner_counter
));
1676 LIST_INSERT_HEAD(&mowners
, mo
, mo_link
);
1680 mowner_detach(struct mowner
*mo
)
1683 KASSERT(mo
->mo_counters
!= NULL
);
1686 LIST_REMOVE(mo
, mo_link
);
1688 percpu_free(mo
->mo_counters
, sizeof(struct mowner_counter
));
1689 mo
->mo_counters
= NULL
;
1693 mowner_init(struct mbuf
*m
, int type
)
1695 struct mowner_counter
*mc
;
1699 m
->m_owner
= mo
= &unknown_mowners
[type
];
1701 mc
= percpu_getref(mo
->mo_counters
);
1702 mc
->mc_counter
[MOWNER_COUNTER_CLAIMS
]++;
1703 percpu_putref(mo
->mo_counters
);
1708 mowner_ref(struct mbuf
*m
, int flags
)
1710 struct mowner
*mo
= m
->m_owner
;
1711 struct mowner_counter
*mc
;
1715 mc
= percpu_getref(mo
->mo_counters
);
1716 if ((flags
& M_EXT
) != 0)
1717 mc
->mc_counter
[MOWNER_COUNTER_EXT_CLAIMS
]++;
1718 if ((flags
& M_CLUSTER
) != 0)
1719 mc
->mc_counter
[MOWNER_COUNTER_CLUSTER_CLAIMS
]++;
1720 percpu_putref(mo
->mo_counters
);
1725 mowner_revoke(struct mbuf
*m
, bool all
, int flags
)
1727 struct mowner
*mo
= m
->m_owner
;
1728 struct mowner_counter
*mc
;
1732 mc
= percpu_getref(mo
->mo_counters
);
1733 if ((flags
& M_EXT
) != 0)
1734 mc
->mc_counter
[MOWNER_COUNTER_EXT_RELEASES
]++;
1735 if ((flags
& M_CLUSTER
) != 0)
1736 mc
->mc_counter
[MOWNER_COUNTER_CLUSTER_RELEASES
]++;
1738 mc
->mc_counter
[MOWNER_COUNTER_RELEASES
]++;
1739 percpu_putref(mo
->mo_counters
);
1742 m
->m_owner
= &revoked_mowner
;
1746 mowner_claim(struct mbuf
*m
, struct mowner
*mo
)
1748 struct mowner_counter
*mc
;
1749 int flags
= m
->m_flags
;
1753 mc
= percpu_getref(mo
->mo_counters
);
1754 mc
->mc_counter
[MOWNER_COUNTER_CLAIMS
]++;
1755 if ((flags
& M_EXT
) != 0)
1756 mc
->mc_counter
[MOWNER_COUNTER_EXT_CLAIMS
]++;
1757 if ((flags
& M_CLUSTER
) != 0)
1758 mc
->mc_counter
[MOWNER_COUNTER_CLUSTER_CLAIMS
]++;
1759 percpu_putref(mo
->mo_counters
);
1765 m_claim(struct mbuf
*m
, struct mowner
*mo
)
1768 if (m
->m_owner
== mo
|| mo
== NULL
)
1771 mowner_revoke(m
, true, m
->m_flags
);
1772 mowner_claim(m
, mo
);
1774 #endif /* defined(MBUFTRACE) */