4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2009-2010, Intel Corporation.
23 * All rights reserved.
28 * This file implements a CPU event notification mechanism to signal clients
29 * which are interested in CPU related events.
30 * Currently it only supports CPU idle state change events which will be
31 * triggered just before CPU entering hardware idle state and just after CPU
32 * wakes up from hardware idle state.
33 * Please refer to PSARC/2009/115 for detail information.
36 * 1) cpu_idle_prop_busy/free are protected by cpu_idle_prop_lock.
37 * 2) No protection for cpu_idle_cb_state because it's per-CPU data.
38 * 3) cpu_idle_cb_busy is protected by cpu_idle_cb_lock.
39 * 4) cpu_idle_cb_array is protected by pause_cpus/start_cpus logic.
40 * 5) cpu_idle_cb_max/curr are protected by both cpu_idle_cb_lock and
41 * pause_cpus/start_cpus logic.
42 * We have optimized the algorithm for hot path on read side access.
43 * In the current algorithm, it's lock free on read side access.
44 * On write side, we use pause_cpus() to keep other CPUs in the pause thread,
45 * which will guarantee that no other threads will access
46 * cpu_idle_cb_max/curr/array data structure.
49 #include <sys/types.h>
50 #include <sys/cmn_err.h>
51 #include <sys/cpuvar.h>
54 #include <sys/machcpuvar.h>
56 #include <sys/sysmacros.h>
57 #include <sys/synch.h>
58 #include <sys/systm.h>
59 #include <sys/sunddi.h>
61 #include <sys/machsystm.h>
63 #include <sys/archsystm.h>
65 #include <sys/cpu_event.h>
67 /* Define normal state for CPU on different platforms. */
69 #define CPU_IDLE_STATE_NORMAL IDLE_STATE_C0
70 #elif defined(__sparc)
72 * At the time of this implementation IDLE_STATE_NORMAL is defined
73 * in mach_startup.c, and not in a header file. So if we find it is
74 * undefined, then we set it to the value as defined in mach_startup.c
75 * Should it eventually be defined, we will pick it up.
77 #ifndef IDLE_STATE_NORMAL
78 #define IDLE_STATE_NORMAL 0
80 #define CPU_IDLE_STATE_NORMAL IDLE_STATE_NORMAL
84 * To improve cache efficiency and avoid cache false sharing, CPU idle
85 * properties are grouped into cache lines as below:
86 * | CPU0 | CPU1 |.........| CPUn |
87 * | cache line 0 | cache line 1 |.........| cache line n |
88 * | v0 | ... | vm | v0 | ... | vm |.........| v0 | ... | vm |
89 * To access value of property m for CPU n, using following value as index:
90 * index = seq_id_of_CPUn * CPU_IDLE_VALUE_GROUP_SIZE + m.
92 #define CPU_IDLE_VALUE_GROUP_SIZE \
93 (CPU_CACHE_COHERENCE_SIZE / sizeof (cpu_idle_prop_value_t))
95 /* Get callback context handle for current CPU. */
96 #define CPU_IDLE_GET_CTX(cp) \
97 ((cpu_idle_callback_context_t)(intptr_t)((cp)->cpu_seqid))
99 /* Get CPU sequential id from ctx. */
100 #define CPU_IDLE_CTX2CPUID(ctx) ((processorid_t)(intptr_t)(ctx))
102 /* Compute index from callback context handle. */
103 #define CPU_IDLE_CTX2IDX(ctx) \
104 (((int)(intptr_t)(ctx)) * CPU_IDLE_VALUE_GROUP_SIZE)
106 #define CPU_IDLE_HDL2VALP(hdl, idx) \
107 (&((cpu_idle_prop_impl_t *)(hdl))->value[(idx)])
110 * When cpu_idle_cb_array is NULL or full, increase CPU_IDLE_ARRAY_CAPACITY_INC
111 * entries every time. Here we prefer linear growth instead of exponential.
113 #define CPU_IDLE_ARRAY_CAPACITY_INC 0x10
115 typedef struct cpu_idle_prop_impl
{
116 cpu_idle_prop_value_t
*value
;
117 struct cpu_idle_prop_impl
*next
;
119 cpu_idle_prop_update_t update
;
121 cpu_idle_prop_type_t type
;
123 } cpu_idle_prop_impl_t
;
125 typedef struct cpu_idle_prop_item
{
126 cpu_idle_prop_type_t type
;
128 cpu_idle_prop_update_t update
;
130 cpu_idle_prop_handle_t handle
;
131 } cpu_idle_prop_item_t
;
133 /* Structure to maintain registered callbacks in list. */
134 typedef struct cpu_idle_cb_impl
{
135 struct cpu_idle_cb_impl
*next
;
136 cpu_idle_callback_t
*callback
;
139 } cpu_idle_cb_impl_t
;
142 * Structure to maintain registered callbacks in priority order and also
143 * optimized for cache efficiency for reading access.
145 typedef struct cpu_idle_cb_item
{
146 cpu_idle_enter_cbfn_t enter
;
147 cpu_idle_exit_cbfn_t exit
;
149 cpu_idle_cb_impl_t
*impl
;
150 } cpu_idle_cb_item_t
;
152 /* Per-CPU state aligned to CPU_CACHE_COHERENCE_SIZE to avoid false sharing. */
153 typedef union cpu_idle_cb_state
{
155 /* Index of already invoked callbacks. */
157 /* Invoke registered callbacks if true. */
159 /* Property values are valid if true. */
161 /* Pointers to per-CPU properties. */
162 cpu_idle_prop_value_t
*idle_state
;
163 cpu_idle_prop_value_t
*enter_ts
;
164 cpu_idle_prop_value_t
*exit_ts
;
165 cpu_idle_prop_value_t
*last_idle
;
166 cpu_idle_prop_value_t
*last_busy
;
167 cpu_idle_prop_value_t
*total_idle
;
168 cpu_idle_prop_value_t
*total_busy
;
169 cpu_idle_prop_value_t
*intr_cnt
;
172 char align
[2 * CPU_CACHE_COHERENCE_SIZE
];
174 char align
[CPU_CACHE_COHERENCE_SIZE
];
176 } cpu_idle_cb_state_t
;
178 static kmutex_t cpu_idle_prop_lock
;
179 static cpu_idle_prop_impl_t
*cpu_idle_prop_busy
= NULL
;
180 static cpu_idle_prop_impl_t
*cpu_idle_prop_free
= NULL
;
182 static kmutex_t cpu_idle_cb_lock
;
183 static cpu_idle_cb_impl_t
*cpu_idle_cb_busy
= NULL
;
184 static cpu_idle_cb_item_t
*cpu_idle_cb_array
= NULL
;
185 static int cpu_idle_cb_curr
= 0;
186 static int cpu_idle_cb_max
= 0;
188 static cpu_idle_cb_state_t
*cpu_idle_cb_state
;
192 * cpuset used to intercept CPUs before powering them off.
193 * The control CPU sets the bit corresponding to the target CPU and waits
194 * until the bit is cleared.
195 * The target CPU disables interrupts before clearing corresponding bit and
196 * then loops for ever.
198 static cpuset_t cpu_idle_intercept_set
;
201 static int cpu_idle_prop_update_intr_cnt(void *arg
, uint64_t seqnum
,
202 cpu_idle_prop_value_t
*valp
);
204 static cpu_idle_prop_item_t cpu_idle_prop_array
[] = {
206 CPU_IDLE_PROP_TYPE_INTPTR
, CPU_IDLE_PROP_IDLE_STATE
,
210 CPU_IDLE_PROP_TYPE_HRTIME
, CPU_IDLE_PROP_ENTER_TIMESTAMP
,
214 CPU_IDLE_PROP_TYPE_HRTIME
, CPU_IDLE_PROP_EXIT_TIMESTAMP
,
218 CPU_IDLE_PROP_TYPE_HRTIME
, CPU_IDLE_PROP_LAST_IDLE_TIME
,
222 CPU_IDLE_PROP_TYPE_HRTIME
, CPU_IDLE_PROP_LAST_BUSY_TIME
,
226 CPU_IDLE_PROP_TYPE_HRTIME
, CPU_IDLE_PROP_TOTAL_IDLE_TIME
,
230 CPU_IDLE_PROP_TYPE_HRTIME
, CPU_IDLE_PROP_TOTAL_BUSY_TIME
,
234 CPU_IDLE_PROP_TYPE_UINT64
, CPU_IDLE_PROP_INTERRUPT_COUNT
,
235 cpu_idle_prop_update_intr_cnt
, NULL
, NULL
239 #define CPU_IDLE_PROP_IDX_IDLE_STATE 0
240 #define CPU_IDLE_PROP_IDX_ENTER_TS 1
241 #define CPU_IDLE_PROP_IDX_EXIT_TS 2
242 #define CPU_IDLE_PROP_IDX_LAST_IDLE 3
243 #define CPU_IDLE_PROP_IDX_LAST_BUSY 4
244 #define CPU_IDLE_PROP_IDX_TOTAL_IDLE 5
245 #define CPU_IDLE_PROP_IDX_TOTAL_BUSY 6
246 #define CPU_IDLE_PROP_IDX_INTR_CNT 7
250 cpu_idle_dtrace_enter(void *arg
, cpu_idle_callback_context_t ctx
,
251 cpu_idle_check_wakeup_t check_func
, void *check_arg
)
255 state
= cpu_idle_prop_get_intptr(
256 cpu_idle_prop_array
[CPU_IDLE_PROP_IDX_IDLE_STATE
].handle
, ctx
);
257 DTRACE_PROBE1(idle__state__transition
, uint_t
, state
);
262 cpu_idle_dtrace_exit(void *arg
, cpu_idle_callback_context_t ctx
, int flag
)
264 DTRACE_PROBE1(idle__state__transition
, uint_t
, CPU_IDLE_STATE_NORMAL
);
267 static cpu_idle_callback_handle_t cpu_idle_cb_handle_dtrace
;
268 static cpu_idle_callback_t cpu_idle_callback_dtrace
= {
269 CPU_IDLE_CALLBACK_VERS
,
270 cpu_idle_dtrace_enter
,
271 cpu_idle_dtrace_exit
,
274 #if defined(__x86) && !defined(__xpv)
275 extern void tlb_going_idle(void);
276 extern void tlb_service(void);
278 static cpu_idle_callback_handle_t cpu_idle_cb_handle_tlb
;
279 static cpu_idle_callback_t cpu_idle_callback_tlb
= {
280 CPU_IDLE_CALLBACK_VERS
,
281 (cpu_idle_enter_cbfn_t
)tlb_going_idle
,
282 (cpu_idle_exit_cbfn_t
)tlb_service
,
292 cpu_idle_cb_state_t
*sp
;
293 cpu_idle_prop_item_t
*ip
;
295 mutex_init(&cpu_idle_cb_lock
, NULL
, MUTEX_DRIVER
, NULL
);
296 mutex_init(&cpu_idle_prop_lock
, NULL
, MUTEX_DRIVER
, NULL
);
298 /* Create internal properties. */
299 for (i
= 0, ip
= cpu_idle_prop_array
;
300 i
< sizeof (cpu_idle_prop_array
) / sizeof (cpu_idle_prop_array
[0]);
302 (void) cpu_idle_prop_create_property(ip
->name
, ip
->type
,
303 ip
->update
, ip
->arg
, &ip
->handle
);
304 ASSERT(ip
->handle
!= NULL
);
307 /* Allocate buffer and align to CPU_CACHE_COHERENCE_SIZE. */
308 sz
= sizeof (cpu_idle_cb_state_t
) * max_ncpus
;
309 sz
+= CPU_CACHE_COHERENCE_SIZE
;
310 buf
= (intptr_t)kmem_zalloc(sz
, KM_SLEEP
);
311 cpu_idle_cb_state
= (cpu_idle_cb_state_t
*)P2ROUNDUP(buf
,
312 CPU_CACHE_COHERENCE_SIZE
);
314 /* Cache frequently used property value pointers. */
315 for (sp
= cpu_idle_cb_state
, i
= 0; i
< max_ncpus
; i
++, sp
++) {
316 idx
= CPU_IDLE_CTX2IDX(i
);
317 #define ___INIT_P(f, i) \
318 sp->v.f = CPU_IDLE_HDL2VALP(cpu_idle_prop_array[(i)].handle, idx)
319 ___INIT_P(idle_state
, CPU_IDLE_PROP_IDX_IDLE_STATE
);
320 ___INIT_P(enter_ts
, CPU_IDLE_PROP_IDX_ENTER_TS
);
321 ___INIT_P(exit_ts
, CPU_IDLE_PROP_IDX_EXIT_TS
);
322 ___INIT_P(last_idle
, CPU_IDLE_PROP_IDX_LAST_IDLE
);
323 ___INIT_P(last_busy
, CPU_IDLE_PROP_IDX_LAST_BUSY
);
324 ___INIT_P(total_idle
, CPU_IDLE_PROP_IDX_TOTAL_IDLE
);
325 ___INIT_P(total_busy
, CPU_IDLE_PROP_IDX_TOTAL_BUSY
);
326 ___INIT_P(last_idle
, CPU_IDLE_PROP_IDX_INTR_CNT
);
330 /* Register built-in callbacks. */
331 if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_DTRACE
,
332 &cpu_idle_callback_dtrace
, NULL
, &cpu_idle_cb_handle_dtrace
) != 0) {
334 "cpu_idle: failed to register callback for dtrace.");
336 #if defined(__x86) && !defined(__xpv)
337 if (cpu_idle_register_callback(CPU_IDLE_CB_PRIO_TLB
,
338 &cpu_idle_callback_tlb
, NULL
, &cpu_idle_cb_handle_tlb
) != 0) {
340 "cpu_idle: failed to register callback for tlb_flush.");
346 * This function is called to initialize per CPU state when starting CPUs.
349 cpu_event_init_cpu(cpu_t
*cp
)
351 ASSERT(cp
->cpu_seqid
< max_ncpus
);
352 cpu_idle_cb_state
[cp
->cpu_seqid
].v
.index
= 0;
353 cpu_idle_cb_state
[cp
->cpu_seqid
].v
.ready
= B_FALSE
;
354 cpu_idle_cb_state
[cp
->cpu_seqid
].v
.enabled
= B_TRUE
;
358 * This function is called to clean up per CPU state when stopping CPUs.
361 cpu_event_fini_cpu(cpu_t
*cp
)
363 ASSERT(cp
->cpu_seqid
< max_ncpus
);
364 cpu_idle_cb_state
[cp
->cpu_seqid
].v
.enabled
= B_FALSE
;
365 cpu_idle_cb_state
[cp
->cpu_seqid
].v
.ready
= B_FALSE
;
369 cpu_idle_insert_callback(cpu_idle_cb_impl_t
*cip
)
371 int unlock
= 0, unpause
= 0;
372 int i
, cnt_new
= 0, cnt_old
= 0;
373 char *buf_new
= NULL
, *buf_old
= NULL
;
375 ASSERT(MUTEX_HELD(&cpu_idle_cb_lock
));
378 * Expand array if it's full.
379 * Memory must be allocated out of pause/start_cpus() scope because
380 * kmem_zalloc() can't be called with KM_SLEEP flag within that scope.
382 if (cpu_idle_cb_curr
== cpu_idle_cb_max
) {
383 cnt_new
= cpu_idle_cb_max
+ CPU_IDLE_ARRAY_CAPACITY_INC
;
384 buf_new
= kmem_zalloc(cnt_new
*
385 sizeof (cpu_idle_cb_item_t
), KM_SLEEP
);
388 /* Try to acquire cpu_lock if not held yet. */
389 if (!MUTEX_HELD(&cpu_lock
)) {
390 mutex_enter(&cpu_lock
);
394 * Pause all other CPUs (and let them run pause thread).
395 * It's guaranteed that no other threads will access cpu_idle_cb_array
396 * after pause_cpus().
398 if (!cpus_paused()) {
399 pause_cpus(NULL
, NULL
);
403 /* Copy content to new buffer if needed. */
404 if (buf_new
!= NULL
) {
405 buf_old
= (char *)cpu_idle_cb_array
;
406 cnt_old
= cpu_idle_cb_max
;
407 if (buf_old
!= NULL
) {
408 ASSERT(cnt_old
!= 0);
409 bcopy(cpu_idle_cb_array
, buf_new
,
410 sizeof (cpu_idle_cb_item_t
) * cnt_old
);
412 cpu_idle_cb_array
= (cpu_idle_cb_item_t
*)buf_new
;
413 cpu_idle_cb_max
= cnt_new
;
416 /* Insert into array according to priority. */
417 ASSERT(cpu_idle_cb_curr
< cpu_idle_cb_max
);
418 for (i
= cpu_idle_cb_curr
; i
> 0; i
--) {
419 if (cpu_idle_cb_array
[i
- 1].impl
->priority
>= cip
->priority
) {
422 cpu_idle_cb_array
[i
] = cpu_idle_cb_array
[i
- 1];
424 cpu_idle_cb_array
[i
].arg
= cip
->argument
;
425 cpu_idle_cb_array
[i
].enter
= cip
->callback
->idle_enter
;
426 cpu_idle_cb_array
[i
].exit
= cip
->callback
->idle_exit
;
427 cpu_idle_cb_array
[i
].impl
= cip
;
430 /* Resume other CPUs from paused state if needed. */
435 mutex_exit(&cpu_lock
);
438 /* Free old resource if needed. */
439 if (buf_old
!= NULL
) {
440 ASSERT(cnt_old
!= 0);
441 kmem_free(buf_old
, cnt_old
* sizeof (cpu_idle_cb_item_t
));
446 cpu_idle_remove_callback(cpu_idle_cb_impl_t
*cip
)
449 int unlock
= 0, unpause
= 0;
450 cpu_idle_cb_state_t
*sp
;
452 ASSERT(MUTEX_HELD(&cpu_idle_cb_lock
));
454 /* Try to acquire cpu_lock if not held yet. */
455 if (!MUTEX_HELD(&cpu_lock
)) {
456 mutex_enter(&cpu_lock
);
460 * Pause all other CPUs.
461 * It's guaranteed that no other threads will access cpu_idle_cb_array
462 * after pause_cpus().
464 if (!cpus_paused()) {
465 pause_cpus(NULL
, NULL
);
469 /* Remove cip from array. */
470 for (i
= 0; i
< cpu_idle_cb_curr
; i
++) {
472 if (cpu_idle_cb_array
[i
].impl
== cip
) {
476 cpu_idle_cb_array
[i
- 1] = cpu_idle_cb_array
[i
];
483 * Reset property ready flag for all CPUs if no registered callback
484 * left because cpu_idle_enter/exit will stop updating property if
485 * there's no callback registered.
487 if (cpu_idle_cb_curr
== 0) {
488 for (sp
= cpu_idle_cb_state
, i
= 0; i
< max_ncpus
; i
++, sp
++) {
489 sp
->v
.ready
= B_FALSE
;
493 /* Resume other CPUs from paused state if needed. */
498 mutex_exit(&cpu_lock
);
503 cpu_idle_register_callback(uint_t prio
, cpu_idle_callback_t
*cbp
,
504 void *arg
, cpu_idle_callback_handle_t
*hdlp
)
506 cpu_idle_cb_state_t
*sp
;
507 cpu_idle_cb_impl_t
*cip
= NULL
;
509 /* First validate parameters. */
510 ASSERT(!CPU_ON_INTR(CPU
));
511 ASSERT(CPU
->cpu_seqid
< max_ncpus
);
512 sp
= &cpu_idle_cb_state
[CPU
->cpu_seqid
];
513 if (sp
->v
.index
!= 0) {
515 "!cpu_event: register_callback called from callback.");
517 } else if (cbp
== NULL
|| hdlp
== NULL
) {
519 "!cpu_event: NULL parameters in register_callback.");
521 } else if (prio
< CPU_IDLE_CB_PRIO_LOW_BASE
||
522 prio
>= CPU_IDLE_CB_PRIO_RESV_BASE
) {
524 "!cpu_event: priority 0x%x out of range.", prio
);
526 } else if (cbp
->version
!= CPU_IDLE_CALLBACK_VERS
) {
528 "!cpu_event: callback version %d is not supported.",
533 mutex_enter(&cpu_idle_cb_lock
);
534 /* Check whether callback with priority exists if not dynamic. */
535 if (prio
!= CPU_IDLE_CB_PRIO_DYNAMIC
) {
536 for (cip
= cpu_idle_cb_busy
; cip
!= NULL
;
538 if (cip
->priority
== prio
) {
539 mutex_exit(&cpu_idle_cb_lock
);
540 cmn_err(CE_NOTE
, "!cpu_event: callback with "
541 "priority 0x%x already exists.", prio
);
547 cip
= kmem_zalloc(sizeof (*cip
), KM_SLEEP
);
550 cip
->priority
= prio
;
551 cip
->next
= cpu_idle_cb_busy
;
552 cpu_idle_cb_busy
= cip
;
553 cpu_idle_insert_callback(cip
);
554 mutex_exit(&cpu_idle_cb_lock
);
556 *hdlp
= (cpu_idle_callback_handle_t
)cip
;
562 cpu_idle_unregister_callback(cpu_idle_callback_handle_t hdl
)
565 cpu_idle_cb_state_t
*sp
;
566 cpu_idle_cb_impl_t
*ip
, **ipp
;
568 ASSERT(!CPU_ON_INTR(CPU
));
569 ASSERT(CPU
->cpu_seqid
< max_ncpus
);
570 sp
= &cpu_idle_cb_state
[CPU
->cpu_seqid
];
571 if (sp
->v
.index
!= 0) {
573 "!cpu_event: unregister_callback called from callback.");
575 } else if (hdl
== NULL
) {
577 "!cpu_event: hdl is NULL in unregister_callback.");
581 ip
= (cpu_idle_cb_impl_t
*)hdl
;
582 mutex_enter(&cpu_idle_cb_lock
);
583 for (ipp
= &cpu_idle_cb_busy
; *ipp
!= NULL
; ipp
= &(*ipp
)->next
) {
586 cpu_idle_remove_callback(ip
);
591 mutex_exit(&cpu_idle_cb_lock
);
594 kmem_free(ip
, sizeof (*ip
));
597 "!cpu_event: callback handle %p not found.", (void *)hdl
);
604 cpu_idle_enter_state(cpu_idle_cb_state_t
*sp
, intptr_t state
)
606 sp
->v
.idle_state
->cipv_intptr
= state
;
607 sp
->v
.enter_ts
->cipv_hrtime
= gethrtime_unscaled();
608 sp
->v
.last_busy
->cipv_hrtime
= sp
->v
.enter_ts
->cipv_hrtime
-
609 sp
->v
.exit_ts
->cipv_hrtime
;
610 sp
->v
.total_busy
->cipv_hrtime
+= sp
->v
.last_busy
->cipv_hrtime
;
611 if (sp
->v
.ready
== B_FALSE
) {
612 sp
->v
.ready
= B_TRUE
;
620 cpu_idle_exit_state(cpu_idle_cb_state_t
*sp
)
622 sp
->v
.idle_state
->cipv_intptr
= CPU_IDLE_STATE_NORMAL
;
623 sp
->v
.exit_ts
->cipv_hrtime
= gethrtime_unscaled();
624 sp
->v
.last_idle
->cipv_hrtime
= sp
->v
.exit_ts
->cipv_hrtime
-
625 sp
->v
.enter_ts
->cipv_hrtime
;
626 sp
->v
.total_idle
->cipv_hrtime
+= sp
->v
.last_idle
->cipv_hrtime
;
631 cpu_idle_enter(int state
, int flag
,
632 cpu_idle_check_wakeup_t check_func
, void *check_arg
)
635 cpu_idle_cb_item_t
*cip
;
636 cpu_idle_cb_state_t
*sp
;
637 cpu_idle_callback_context_t ctx
;
642 ctx
= CPU_IDLE_GET_CTX(CPU
);
643 ASSERT(CPU
->cpu_seqid
< max_ncpus
);
644 sp
= &cpu_idle_cb_state
[CPU
->cpu_seqid
];
645 ASSERT(sp
->v
.index
== 0);
646 if (sp
->v
.enabled
== B_FALSE
) {
648 /* Intercept CPU at a safe point before powering off it. */
649 if (CPU_IN_SET(cpu_idle_intercept_set
, CPU
->cpu_id
)) {
650 iflags
= intr_clear();
651 CPUSET_ATOMIC_DEL(cpu_idle_intercept_set
, CPU
->cpu_id
);
663 * On x86, cpu_idle_enter can be called from idle thread with either
664 * interrupts enabled or disabled, so we need to make sure interrupts
666 * On SPARC, cpu_idle_enter will be called from idle thread with
667 * interrupt disabled, so no special handling necessary.
670 iflags
= intr_clear();
673 /* Skip calling callback if state is not ready for current CPU. */
674 if (cpu_idle_enter_state(sp
, state
) == 0) {
676 intr_restore(iflags
);
681 for (i
= 0, cip
= cpu_idle_cb_array
; i
< cpu_idle_cb_curr
; i
++, cip
++) {
683 * Increase index so corresponding idle_exit callback
684 * will be invoked should interrupt happen during
685 * idle_enter callback.
689 /* Call idle_enter callback function if it's not NULL. */
690 if (cip
->enter
!= NULL
) {
691 cip
->enter(cip
->arg
, ctx
, check_func
, check_arg
);
694 * cpu_idle_enter runs with interrupts
695 * disabled, so the idle_enter callbacks will
696 * also be called with interrupts disabled.
697 * It is permissible for the callbacks to
698 * enable the interrupts, if they can also
699 * handle the condition if the interrupt
702 * However, if an interrupt occurs and we
703 * return here without dealing with it, we
704 * return to the cpu_idle_enter() caller
705 * with an EBUSY, and the caller will not
706 * enter the idle state.
708 * We detect the interrupt, by checking the
709 * index value of the state pointer. If it
710 * is not the index we incremented above,
711 * then it was cleared while processing
714 * Also note, that at this point of the code
715 * the normal index value will be one greater
716 * than the variable 'i' in the loop, as it
717 * hasn't yet been incremented.
719 if (sp
->v
.index
!= i
+ 1) {
721 intr_restore(iflags
);
728 intr_restore(iflags
);
735 cpu_idle_exit(int flag
)
738 cpu_idle_cb_item_t
*cip
;
739 cpu_idle_cb_state_t
*sp
;
740 cpu_idle_callback_context_t ctx
;
745 ASSERT(CPU
->cpu_seqid
< max_ncpus
);
746 sp
= &cpu_idle_cb_state
[CPU
->cpu_seqid
];
750 * On SPARC, cpu_idle_exit will only be called from idle thread
751 * with interrupt disabled.
754 if (sp
->v
.index
!= 0) {
755 ctx
= CPU_IDLE_GET_CTX(CPU
);
756 cpu_idle_exit_state(sp
);
757 for (i
= sp
->v
.index
- 1; i
>= 0; i
--) {
758 cip
= &cpu_idle_cb_array
[i
];
759 if (cip
->exit
!= NULL
) {
760 cip
->exit(cip
->arg
, ctx
, flag
);
767 * On x86, cpu_idle_exit will be called from idle thread or interrupt
768 * handler. When called from interrupt handler, interrupts will be
769 * disabled. When called from idle thread, interrupts may be disabled
773 /* Called from interrupt, interrupts are already disabled. */
774 if (flag
& CPU_IDLE_CB_FLAG_INTR
) {
776 * return if cpu_idle_exit already called or
777 * there is no registered callback.
779 if (sp
->v
.index
== 0) {
782 ctx
= CPU_IDLE_GET_CTX(CPU
);
783 cpu_idle_exit_state(sp
);
784 for (i
= sp
->v
.index
- 1; i
>= 0; i
--) {
785 cip
= &cpu_idle_cb_array
[i
];
786 if (cip
->exit
!= NULL
) {
787 cip
->exit(cip
->arg
, ctx
, flag
);
792 /* Called from idle thread, need to disable interrupt. */
794 iflags
= intr_clear();
795 if (sp
->v
.index
!= 0) {
796 ctx
= CPU_IDLE_GET_CTX(CPU
);
797 cpu_idle_exit_state(sp
);
798 for (i
= sp
->v
.index
- 1; i
>= 0; i
--) {
799 cip
= &cpu_idle_cb_array
[i
];
800 if (cip
->exit
!= NULL
) {
801 cip
->exit(cip
->arg
, ctx
, flag
);
806 intr_restore(iflags
);
811 cpu_idle_callback_context_t
812 cpu_idle_get_context(void)
814 return (CPU_IDLE_GET_CTX(CPU
));
818 * Allocate property structure in group of CPU_IDLE_VALUE_GROUP_SIZE to improve
819 * cache efficiency. To simplify implementation, allocated memory for property
820 * structure won't be freed.
823 cpu_idle_prop_allocate_impl(void)
828 cpu_idle_prop_impl_t
*prop
;
829 cpu_idle_prop_value_t
*valp
;
831 ASSERT(!CPU_ON_INTR(CPU
));
832 prop
= kmem_zalloc(sizeof (*prop
) * CPU_IDLE_VALUE_GROUP_SIZE
,
834 sz
= sizeof (*valp
) * CPU_IDLE_VALUE_GROUP_SIZE
* max_ncpus
;
835 sz
+= CPU_CACHE_COHERENCE_SIZE
;
836 buf
= (intptr_t)kmem_zalloc(sz
, KM_SLEEP
);
837 valp
= (cpu_idle_prop_value_t
*)P2ROUNDUP(buf
,
838 CPU_CACHE_COHERENCE_SIZE
);
840 for (i
= 0; i
< CPU_IDLE_VALUE_GROUP_SIZE
; i
++, prop
++, valp
++) {
842 prop
->next
= cpu_idle_prop_free
;
843 cpu_idle_prop_free
= prop
;
848 cpu_idle_prop_create_property(const char *name
, cpu_idle_prop_type_t type
,
849 cpu_idle_prop_update_t update
, void *arg
, cpu_idle_prop_handle_t
*hdlp
)
852 cpu_idle_prop_impl_t
*prop
;
854 ASSERT(!CPU_ON_INTR(CPU
));
855 if (name
== NULL
|| hdlp
== NULL
) {
857 "!cpu_event: NULL parameters in create_property.");
861 mutex_enter(&cpu_idle_prop_lock
);
862 for (prop
= cpu_idle_prop_busy
; prop
!= NULL
; prop
= prop
->next
) {
863 if (strcmp(prop
->name
, name
) == 0) {
865 "!cpu_event: property %s already exists.", name
);
870 if (cpu_idle_prop_free
== NULL
) {
871 cpu_idle_prop_allocate_impl();
873 ASSERT(cpu_idle_prop_free
!= NULL
);
874 prop
= cpu_idle_prop_free
;
875 cpu_idle_prop_free
= prop
->next
;
876 prop
->next
= cpu_idle_prop_busy
;
877 cpu_idle_prop_busy
= prop
;
879 ASSERT(prop
->value
!= NULL
);
880 prop
->name
= strdup(name
);
882 prop
->update
= update
;
888 mutex_exit(&cpu_idle_prop_lock
);
894 cpu_idle_prop_destroy_property(cpu_idle_prop_handle_t hdl
)
897 cpu_idle_prop_impl_t
*prop
, **propp
;
898 cpu_idle_prop_value_t
*valp
;
900 ASSERT(!CPU_ON_INTR(CPU
));
903 "!cpu_event: hdl is NULL in destroy_property.");
907 prop
= (cpu_idle_prop_impl_t
*)hdl
;
908 mutex_enter(&cpu_idle_prop_lock
);
909 for (propp
= &cpu_idle_prop_busy
; *propp
!= NULL
;
910 propp
= &(*propp
)->next
) {
911 if (*propp
== prop
) {
912 ASSERT(prop
->refcnt
> 0);
913 if (atomic_cas_32(&prop
->refcnt
, 1, 0) == 1) {
917 bzero(prop
, sizeof (*prop
));
919 prop
->next
= cpu_idle_prop_free
;
920 cpu_idle_prop_free
= prop
;
928 mutex_exit(&cpu_idle_prop_lock
);
934 cpu_idle_prop_create_handle(const char *name
, cpu_idle_prop_handle_t
*hdlp
)
937 cpu_idle_prop_impl_t
*prop
;
939 ASSERT(!CPU_ON_INTR(CPU
));
940 if (name
== NULL
|| hdlp
== NULL
) {
942 "!cpu_event: NULL parameters in create_handle.");
946 mutex_enter(&cpu_idle_prop_lock
);
947 for (prop
= cpu_idle_prop_busy
; prop
!= NULL
; prop
= prop
->next
) {
948 if (strcmp(prop
->name
, name
) == 0) {
949 /* Hold one refcount on object. */
950 ASSERT(prop
->refcnt
> 0);
951 atomic_inc_32(&prop
->refcnt
);
952 *hdlp
= (cpu_idle_prop_handle_t
)prop
;
957 mutex_exit(&cpu_idle_prop_lock
);
963 cpu_idle_prop_destroy_handle(cpu_idle_prop_handle_t hdl
)
966 cpu_idle_prop_impl_t
*prop
;
968 ASSERT(!CPU_ON_INTR(CPU
));
971 "!cpu_event: hdl is NULL in destroy_handle.");
975 mutex_enter(&cpu_idle_prop_lock
);
976 for (prop
= cpu_idle_prop_busy
; prop
!= NULL
; prop
= prop
->next
) {
978 /* Release refcnt held in create_handle. */
979 ASSERT(prop
->refcnt
> 1);
980 atomic_dec_32(&prop
->refcnt
);
985 mutex_exit(&cpu_idle_prop_lock
);
991 cpu_idle_prop_get_type(cpu_idle_prop_handle_t hdl
)
994 return (((cpu_idle_prop_impl_t
*)hdl
)->type
);
998 cpu_idle_prop_get_name(cpu_idle_prop_handle_t hdl
)
1000 ASSERT(hdl
!= NULL
);
1001 return (((cpu_idle_prop_impl_t
*)hdl
)->name
);
1005 cpu_idle_prop_get_value(cpu_idle_prop_handle_t hdl
,
1006 cpu_idle_callback_context_t ctx
, cpu_idle_prop_value_t
*valp
)
1009 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1011 ASSERT(CPU_IDLE_CTX2CPUID(ctx
) < max_ncpus
);
1012 if (hdl
== NULL
|| valp
== NULL
) {
1013 cmn_err(CE_NOTE
, "!cpu_event: NULL parameters in prop_get.");
1016 idx
= CPU_IDLE_CTX2IDX(ctx
);
1017 if (prop
->update
!= NULL
) {
1018 cpu_idle_cb_state_t
*sp
;
1020 ASSERT(CPU
->cpu_seqid
< max_ncpus
);
1021 sp
= &cpu_idle_cb_state
[CPU
->cpu_seqid
];
1022 /* CPU's idle enter timestamp as sequence number. */
1023 rc
= prop
->update(prop
->private,
1024 (uint64_t)sp
->v
.enter_ts
->cipv_hrtime
, &prop
->value
[idx
]);
1027 *valp
= prop
->value
[idx
];
1034 cpu_idle_prop_get_uint32(cpu_idle_prop_handle_t hdl
,
1035 cpu_idle_callback_context_t ctx
)
1038 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1040 ASSERT(hdl
!= NULL
);
1041 ASSERT(CPU_IDLE_CTX2CPUID(ctx
) < max_ncpus
);
1042 idx
= CPU_IDLE_CTX2IDX(ctx
);
1043 return (prop
->value
[idx
].cipv_uint32
);
1047 cpu_idle_prop_get_uint64(cpu_idle_prop_handle_t hdl
,
1048 cpu_idle_callback_context_t ctx
)
1051 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1053 ASSERT(hdl
!= NULL
);
1054 ASSERT(CPU_IDLE_CTX2CPUID(ctx
) < max_ncpus
);
1055 idx
= CPU_IDLE_CTX2IDX(ctx
);
1056 return (prop
->value
[idx
].cipv_uint64
);
1060 cpu_idle_prop_get_intptr(cpu_idle_prop_handle_t hdl
,
1061 cpu_idle_callback_context_t ctx
)
1064 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1066 ASSERT(hdl
!= NULL
);
1067 ASSERT(CPU_IDLE_CTX2CPUID(ctx
) < max_ncpus
);
1068 idx
= CPU_IDLE_CTX2IDX(ctx
);
1069 return (prop
->value
[idx
].cipv_intptr
);
1073 cpu_idle_prop_get_hrtime(cpu_idle_prop_handle_t hdl
,
1074 cpu_idle_callback_context_t ctx
)
1077 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1079 ASSERT(hdl
!= NULL
);
1080 ASSERT(CPU_IDLE_CTX2CPUID(ctx
) < max_ncpus
);
1081 idx
= CPU_IDLE_CTX2IDX(ctx
);
1082 return (prop
->value
[idx
].cipv_hrtime
);
1086 cpu_idle_prop_set_value(cpu_idle_prop_handle_t hdl
,
1087 cpu_idle_callback_context_t ctx
, cpu_idle_prop_value_t val
)
1090 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1092 ASSERT(hdl
!= NULL
);
1093 ASSERT(CPU_IDLE_CTX2CPUID(ctx
) < max_ncpus
);
1094 idx
= CPU_IDLE_CTX2IDX(ctx
);
1095 prop
->value
[idx
] = val
;
1099 cpu_idle_prop_set_all(cpu_idle_prop_handle_t hdl
, cpu_idle_prop_value_t val
)
1102 cpu_idle_prop_impl_t
*prop
= (cpu_idle_prop_impl_t
*)hdl
;
1104 ASSERT(hdl
!= NULL
);
1105 for (i
= 0; i
< max_ncpus
; i
++) {
1106 idx
= CPU_IDLE_CTX2IDX(i
);
1107 prop
->value
[idx
] = val
;
1112 static int cpu_idle_prop_update_intr_cnt(void *arg
, uint64_t seqnum
,
1113 cpu_idle_prop_value_t
*valp
)
1118 for (val
= 0, i
= 0; i
< PIL_MAX
; i
++) {
1119 val
+= CPU
->cpu_stats
.sys
.intr
[i
];
1121 valp
->cipv_uint64
= val
;
1127 cpu_idle_get_cpu_state(cpu_t
*cp
)
1129 ASSERT(cp
!= NULL
&& cp
->cpu_seqid
< max_ncpus
);
1130 return ((uint_t
)cpu_idle_prop_get_uint32(
1131 cpu_idle_prop_array
[CPU_IDLE_PROP_IDX_IDLE_STATE
].handle
,
1132 CPU_IDLE_GET_CTX(cp
)));
1137 * Intercept CPU at a safe point in idle() before powering it off.
1140 cpu_idle_intercept_cpu(cpu_t
*cp
)
1142 ASSERT(cp
->cpu_seqid
< max_ncpus
);
1143 ASSERT(cpu_idle_cb_state
[cp
->cpu_seqid
].v
.enabled
== B_FALSE
);
1145 /* Set flag to intercept CPU. */
1146 CPUSET_ATOMIC_ADD(cpu_idle_intercept_set
, cp
->cpu_id
);
1147 /* Wake up CPU from possible sleep state. */
1148 poke_cpu(cp
->cpu_id
);
1149 while (CPU_IN_SET(cpu_idle_intercept_set
, cp
->cpu_id
)) {
1153 * Now target CPU is spinning in a pause loop with interrupts disabled.