1 /* $NetBSD: subr_percpu.c,v 1.9 2008/12/15 11:59:22 ad Exp $ */
4 * Copyright (c)2007,2008 YAMAMOTO Takashi,
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
33 #include <sys/cdefs.h>
34 __KERNEL_RCSID(0, "$NetBSD: subr_percpu.c,v 1.9 2008/12/15 11:59:22 ad Exp $");
36 #include <sys/param.h>
39 #include <sys/kernel.h>
40 #include <sys/mutex.h>
41 #include <sys/percpu.h>
42 #include <sys/rwlock.h>
44 #include <sys/xcall.h>
46 #include <uvm/uvm_extern.h>
48 #define PERCPU_QUANTUM_SIZE (ALIGNBYTES + 1)
49 #define PERCPU_QCACHE_MAX 0
50 #define PERCPU_IMPORT_SIZE 2048
52 #if defined(DIAGNOSTIC)
53 #define MAGIC 0x50435055 /* "PCPU" */
54 #define percpu_encrypt(pc) ((pc) ^ MAGIC)
55 #define percpu_decrypt(pc) ((pc) ^ MAGIC)
56 #else /* defined(DIAGNOSTIC) */
57 #define percpu_encrypt(pc) (pc)
58 #define percpu_decrypt(pc) (pc)
59 #endif /* defined(DIAGNOSTIC) */
61 static krwlock_t percpu_swap_lock
;
62 static kmutex_t percpu_allocation_lock
;
63 static vmem_t
*percpu_offset_arena
;
64 static unsigned int percpu_nextoff
= PERCPU_QUANTUM_SIZE
;
67 cpu_percpu(struct cpu_info
*ci
)
70 return &ci
->ci_data
.cpu_percpu
;
74 percpu_offset(percpu_t
*pc
)
76 const unsigned int off
= percpu_decrypt((uintptr_t)pc
);
78 KASSERT(off
< percpu_nextoff
);
83 * percpu_cpu_swap: crosscall handler for percpu_cpu_enlarge
87 percpu_cpu_swap(void *p1
, void *p2
)
89 struct cpu_info
* const ci
= p1
;
90 percpu_cpu_t
* const newpcc
= p2
;
91 percpu_cpu_t
* const pcc
= cpu_percpu(ci
);
94 * swap *pcc and *newpcc unless anyone has beaten us.
97 rw_enter(&percpu_swap_lock
, RW_WRITER
);
98 if (newpcc
->pcc_size
> pcc
->pcc_size
) {
105 * block interrupts so that we don't lose their modifications.
111 * copy data to new storage.
114 memcpy(newpcc
->pcc_data
, pcc
->pcc_data
, pcc
->pcc_size
);
117 * this assignment needs to be atomic for percpu_getptr_remote.
120 pcc
->pcc_data
= newpcc
->pcc_data
;
124 pcc
->pcc_size
= newpcc
->pcc_size
;
127 rw_exit(&percpu_swap_lock
);
131 * percpu_cpu_enlarge: ensure that percpu_cpu_t of each cpus have enough space
135 percpu_cpu_enlarge(size_t size
)
137 CPU_INFO_ITERATOR cii
;
140 for (CPU_INFO_FOREACH(cii
, ci
)) {
143 pcc
.pcc_data
= kmem_alloc(size
, KM_SLEEP
); /* XXX cacheline */
146 percpu_cpu_swap(ci
, &pcc
);
150 where
= xc_unicast(0, percpu_cpu_swap
, ci
, &pcc
, ci
);
153 KASSERT(pcc
.pcc_size
< size
);
154 if (pcc
.pcc_data
!= NULL
) {
155 kmem_free(pcc
.pcc_data
, pcc
.pcc_size
);
161 * percpu_backend_alloc: vmem import callback for percpu_offset_arena
165 percpu_backend_alloc(vmem_t
*dummy
, vmem_size_t size
, vmem_size_t
*resultsize
,
169 unsigned int nextoff
;
172 KASSERT(dummy
== NULL
);
174 if ((vmflags
& VM_NOSLEEP
) != 0)
175 return VMEM_ADDR_NULL
;
177 size
= roundup(size
, PERCPU_IMPORT_SIZE
);
178 mutex_enter(&percpu_allocation_lock
);
179 offset
= percpu_nextoff
;
180 percpu_nextoff
= nextoff
= percpu_nextoff
+ size
;
181 mutex_exit(&percpu_allocation_lock
);
183 percpu_cpu_enlarge(nextoff
);
186 return (vmem_addr_t
)offset
;
190 percpu_zero_cb(void *vp
, void *vp2
, struct cpu_info
*ci
)
192 size_t sz
= (uintptr_t)vp2
;
198 * percpu_zero: initialize percpu storage with zero.
202 percpu_zero(percpu_t
*pc
, size_t sz
)
205 percpu_foreach(pc
, percpu_zero_cb
, (void *)(uintptr_t)sz
);
209 * percpu_init: subsystem initialization
217 rw_init(&percpu_swap_lock
);
218 mutex_init(&percpu_allocation_lock
, MUTEX_DEFAULT
, IPL_NONE
);
220 percpu_offset_arena
= vmem_create("percpu", 0, 0, PERCPU_QUANTUM_SIZE
,
221 percpu_backend_alloc
, NULL
, NULL
, PERCPU_QCACHE_MAX
, VM_SLEEP
,
226 * percpu_init_cpu: cpu initialization
228 * => should be called before the cpu appears on the list for CPU_INFO_FOREACH.
232 percpu_init_cpu(struct cpu_info
*ci
)
234 percpu_cpu_t
* const pcc
= cpu_percpu(ci
);
235 size_t size
= percpu_nextoff
; /* XXX racy */
238 pcc
->pcc_size
= size
;
240 pcc
->pcc_data
= kmem_zalloc(pcc
->pcc_size
, KM_SLEEP
);
245 * percpu_alloc: allocate percpu storage
247 * => called in thread context.
248 * => considered as an expensive and rare operation.
249 * => allocated storage is initialized with zeros.
253 percpu_alloc(size_t size
)
259 offset
= vmem_alloc(percpu_offset_arena
, size
, VM_SLEEP
| VM_BESTFIT
);
260 pc
= (percpu_t
*)percpu_encrypt((uintptr_t)offset
);
261 percpu_zero(pc
, size
);
266 * percpu_free: free percpu storage
268 * => called in thread context.
269 * => considered as an expensive and rare operation.
273 percpu_free(percpu_t
*pc
, size_t size
)
277 vmem_free(percpu_offset_arena
, (vmem_addr_t
)percpu_offset(pc
), size
);
283 * => safe to be used in either thread or interrupt context
284 * => disables preemption; must be bracketed with a percpu_putref()
288 percpu_getref(percpu_t
*pc
)
291 KPREEMPT_DISABLE(curlwp
);
292 return percpu_getptr_remote(pc
, curcpu());
298 * => drops the preemption-disabled count after caller is done with per-cpu
303 percpu_putref(percpu_t
*pc
)
306 KPREEMPT_ENABLE(curlwp
);
310 * percpu_traverse_enter, percpu_traverse_exit, percpu_getptr_remote:
311 * helpers to access remote cpu's percpu data.
313 * => called in thread context.
314 * => percpu_traverse_enter can block low-priority xcalls.
315 * => typical usage would be:
318 * percpu_traverse_enter();
319 * for (CPU_INFO_FOREACH(cii, ci)) {
320 * unsigned int *p = percpu_getptr_remote(pc, ci);
323 * percpu_traverse_exit();
327 percpu_traverse_enter(void)
331 rw_enter(&percpu_swap_lock
, RW_READER
);
335 percpu_traverse_exit(void)
338 rw_exit(&percpu_swap_lock
);
342 percpu_getptr_remote(percpu_t
*pc
, struct cpu_info
*ci
)
345 return &((char *)cpu_percpu(ci
)->pcc_data
)[percpu_offset(pc
)];
349 * percpu_foreach: call the specified callback function for each cpus.
351 * => called in thread context.
352 * => caller should not rely on the cpu iteration order.
353 * => the callback function should be minimum because it is executed with
354 * holding a global lock, which can block low-priority xcalls.
355 * eg. it's illegal for a callback function to sleep for memory allocation.
358 percpu_foreach(percpu_t
*pc
, percpu_callback_t cb
, void *arg
)
360 CPU_INFO_ITERATOR cii
;
363 percpu_traverse_enter();
364 for (CPU_INFO_FOREACH(cii
, ci
)) {
365 (*cb
)(percpu_getptr_remote(pc
, ci
), arg
, ci
);
367 percpu_traverse_exit();