5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
26 * Copyright (c) 2009-2010, Intel Corporation.
27 * All rights reserved.
31 #include <sys/smp_impldefs.h>
33 #include <sys/psm_modctl.h>
35 #include <sys/cmn_err.h>
36 #include <sys/strlog.h>
37 #include <sys/clock.h>
38 #include <sys/debug.h>
40 #include <sys/x86_archext.h>
41 #include <sys/cpupart.h>
42 #include <sys/cpuvar.h>
43 #include <sys/cpu_event.h>
47 #include <sys/archsystm.h>
48 #include <sys/machsystm.h>
49 #include <sys/sysmacros.h>
50 #include <sys/memlist.h>
51 #include <sys/param.h>
52 #include <sys/promif.h>
53 #include <sys/cpu_pm.h>
55 #include <sys/hypervisor.h>
57 #include <sys/mach_intr.h>
58 #include <vm/hat_i86.h>
59 #include <sys/kdi_machimpl.h>
62 #include <sys/sunddi.h>
63 #include <sys/sunndi.h>
64 #include <sys/cpc_pcbe.h>
66 #define OFFSETOF(s, m) (size_t)(&(((s *)0)->m))
69 * Local function prototypes
71 static int mp_disable_intr(processorid_t cpun
);
72 static void mp_enable_intr(processorid_t cpun
);
73 static void mach_init();
74 static void mach_picinit();
75 static int machhztomhz(uint64_t cpu_freq_hz
);
76 static uint64_t mach_getcpufreq(void);
77 static void mach_fixcpufreq(void);
78 static int mach_clkinit(int, int *);
79 static void mach_smpinit(void);
80 static int mach_softlvl_to_vect(int ipl
);
81 static void mach_get_platform(int owner
);
82 static void mach_construct_info();
83 static int mach_translate_irq(dev_info_t
*dip
, int irqno
);
84 static int mach_intr_ops(dev_info_t
*, ddi_intr_handle_impl_t
*,
85 psm_intr_op_t
, int *);
86 static void mach_notify_error(int level
, char *errmsg
);
87 static hrtime_t
dummy_hrtime(void);
88 static void dummy_scalehrtime(hrtime_t
*);
89 static uint64_t dummy_unscalehrtime(hrtime_t
);
91 static void cpu_wakeup(cpu_t
*, int);
93 void cpu_idle_mwait(void);
94 static void cpu_wakeup_mwait(cpu_t
*, int);
96 static int mach_cpu_create_devinfo(cpu_t
*cp
, dev_info_t
**dipp
);
99 * External reference functions
101 extern void return_instr();
102 extern uint64_t freq_tsc(uint32_t *);
104 extern uint64_t freq_notsc(uint32_t *);
106 extern void pc_gethrestime(timestruc_t
*);
107 extern int cpuid_get_coreid(cpu_t
*);
108 extern int cpuid_get_chipid(cpu_t
*);
111 * PSM functions initialization
113 void (*psm_shutdownf
)(int, int) = (void (*)(int, int))return_instr
;
114 void (*psm_preshutdownf
)(int, int) = (void (*)(int, int))return_instr
;
115 void (*psm_notifyf
)(int) = (void (*)(int))return_instr
;
116 void (*psm_set_idle_cpuf
)(int) = (void (*)(int))return_instr
;
117 void (*psm_unset_idle_cpuf
)(int) = (void (*)(int))return_instr
;
118 void (*psminitf
)() = mach_init
;
119 void (*picinitf
)() = return_instr
;
120 int (*clkinitf
)(int, int *) = (int (*)(int, int *))return_instr
;
121 int (*ap_mlsetup
)() = (int (*)(void))return_instr
;
122 void (*send_dirintf
)() = return_instr
;
123 void (*setspl
)(int) = (void (*)(int))return_instr
;
124 int (*addspl
)(int, int, int, int) = (int (*)(int, int, int, int))return_instr
;
125 int (*delspl
)(int, int, int, int) = (int (*)(int, int, int, int))return_instr
;
126 int (*get_pending_spl
)(void) = (int (*)(void))return_instr
;
127 int (*addintr
)(void *, int, avfunc
, char *, int, caddr_t
, caddr_t
,
128 uint64_t *, dev_info_t
*) = NULL
;
129 void (*remintr
)(void *, int, avfunc
, int) = NULL
;
130 void (*kdisetsoftint
)(int, struct av_softinfo
*)=
131 (void (*)(int, struct av_softinfo
*))return_instr
;
132 void (*setsoftint
)(int, struct av_softinfo
*)=
133 (void (*)(int, struct av_softinfo
*))return_instr
;
134 int (*slvltovect
)(int) = (int (*)(int))return_instr
;
135 int (*setlvl
)(int, int *) = (int (*)(int, int *))return_instr
;
136 void (*setlvlx
)(int, int) = (void (*)(int, int))return_instr
;
137 int (*psm_disable_intr
)(int) = mp_disable_intr
;
138 void (*psm_enable_intr
)(int) = mp_enable_intr
;
139 hrtime_t (*gethrtimef
)(void) = dummy_hrtime
;
140 hrtime_t (*gethrtimeunscaledf
)(void) = dummy_hrtime
;
141 void (*scalehrtimef
)(hrtime_t
*) = dummy_scalehrtime
;
142 uint64_t (*unscalehrtimef
)(hrtime_t
) = dummy_unscalehrtime
;
143 int (*psm_translate_irq
)(dev_info_t
*, int) = mach_translate_irq
;
144 void (*gethrestimef
)(timestruc_t
*) = pc_gethrestime
;
145 void (*psm_notify_error
)(int, char *) = (void (*)(int, char *))NULL
;
146 int (*psm_get_clockirq
)(int) = NULL
;
147 int (*psm_get_ipivect
)(int, int) = NULL
;
148 uchar_t (*psm_get_ioapicid
)(uchar_t
) = NULL
;
149 uint32_t (*psm_get_localapicid
)(uint32_t) = NULL
;
150 uchar_t (*psm_xlate_vector_by_irq
)(uchar_t
) = NULL
;
152 int (*psm_clkinit
)(int) = NULL
;
153 void (*psm_timer_reprogram
)(hrtime_t
) = NULL
;
154 void (*psm_timer_enable
)(void) = NULL
;
155 void (*psm_timer_disable
)(void) = NULL
;
156 void (*psm_post_cyclic_setup
)(void *arg
) = NULL
;
157 int (*psm_intr_ops
)(dev_info_t
*, ddi_intr_handle_impl_t
*, psm_intr_op_t
,
158 int *) = mach_intr_ops
;
159 int (*psm_state
)(psm_state_request_t
*) = (int (*)(psm_state_request_t
*))
162 void (*notify_error
)(int, char *) = (void (*)(int, char *))return_instr
;
163 void (*hrtime_tick
)(void) = return_instr
;
165 int (*psm_cpu_create_devinfo
)(cpu_t
*, dev_info_t
**) = mach_cpu_create_devinfo
;
166 int (*psm_cpu_get_devinfo
)(cpu_t
*, dev_info_t
**) = NULL
;
168 /* global IRM pool for APIX (PSM) module */
169 ddi_irm_pool_t
*apix_irm_pool_p
= NULL
;
172 * True if the generic TSC code is our source of hrtime, rather than whatever
173 * the PSM can provide.
176 int tsc_gethrtime_enable
= 0;
178 int tsc_gethrtime_enable
= 1;
180 int tsc_gethrtime_initted
= 0;
183 * True if the hrtime implementation is "hires"; namely, better than microdata.
185 int gethrtime_hires
= 0;
190 static struct psm_ops mach_ops
;
191 static struct psm_ops
*mach_set
[4] = {&mach_ops
, NULL
, NULL
, NULL
};
192 static ushort_t mach_ver
[4] = {0, 0, 0, 0};
195 * virtualization support for psm
197 void *psm_vt_ops
= NULL
;
199 * If non-zero, idle cpus will become "halted" when there's
202 int idle_cpu_use_hlt
= 1;
206 * If non-zero, idle cpus will use mwait if available to halt instead of hlt.
208 int idle_cpu_prefer_mwait
= 1;
210 * Set to 0 to avoid MONITOR+CLFLUSH assertion.
212 int idle_cpu_assert_cflush_monitor
= 1;
215 * If non-zero, idle cpus will not use power saving Deep C-States idle loop.
217 int idle_cpu_no_deep_c
= 0;
219 * Non-power saving idle loop and wakeup pointers.
220 * Allows user to toggle Deep Idle power saving feature on/off.
222 void (*non_deep_idle_cpu
)() = cpu_idle
;
223 void (*non_deep_idle_disp_enq_thread
)(cpu_t
*, int);
226 * Object for the kernel to access the HPET.
230 #endif /* ifndef __xpv */
232 uint_t cp_haltset_fanout
= 0;
236 pg_plat_hw_shared(cpu_t
*cp
, pghw_type_t hw
)
240 if (is_x86_feature(x86_featureset
, X86FSET_HTT
)) {
242 * Hyper-threading is SMT
249 if (cpuid_get_cores_per_compunit(cp
) > 1)
254 if (cpuid_get_procnodes_per_pkg(cp
) > 1)
259 if (is_x86_feature(x86_featureset
, X86FSET_CMP
) ||
260 is_x86_feature(x86_featureset
, X86FSET_HTT
))
265 if (cpuid_get_ncpu_sharing_last_cache(cp
) > 1)
269 case PGHW_POW_ACTIVE
:
270 if (cpupm_domain_id(cp
, CPUPM_DTYPE_ACTIVE
) != (id_t
)-1)
275 if (cpupm_domain_id(cp
, CPUPM_DTYPE_IDLE
) != (id_t
)-1)
285 * Compare two CPUs and see if they have a pghw_type_t sharing relationship
286 * If pghw_type_t is an unsupported hardware type, then return -1
289 pg_plat_cpus_share(cpu_t
*cpu_a
, cpu_t
*cpu_b
, pghw_type_t hw
)
293 pgp_a
= pg_plat_hw_instance_id(cpu_a
, hw
);
294 pgp_b
= pg_plat_hw_instance_id(cpu_b
, hw
);
296 if (pgp_a
== -1 || pgp_b
== -1)
299 return (pgp_a
== pgp_b
);
303 * Return a physical instance identifier for known hardware sharing
307 pg_plat_hw_instance_id(cpu_t
*cpu
, pghw_type_t hw
)
311 return (cpuid_get_coreid(cpu
));
313 return (cpuid_get_last_lvl_cacheid(cpu
));
315 return (cpuid_get_compunitid(cpu
));
317 return (cpuid_get_procnodeid(cpu
));
319 return (cpuid_get_chipid(cpu
));
320 case PGHW_POW_ACTIVE
:
321 return (cpupm_domain_id(cpu
, CPUPM_DTYPE_ACTIVE
));
323 return (cpupm_domain_id(cpu
, CPUPM_DTYPE_IDLE
));
330 * Express preference for optimizing for sharing relationship
334 pg_plat_hw_rank(pghw_type_t hw1
, pghw_type_t hw2
)
338 static pghw_type_t hw_hier
[] = {
349 for (i
= 0; hw_hier
[i
] != PGHW_NUM_COMPONENTS
; i
++) {
350 if (hw_hier
[i
] == hw1
)
352 if (hw_hier
[i
] == hw2
)
363 * Override the default CMT dispatcher policy for the specified
364 * hardware sharing relationship
367 pg_plat_cmt_policy(pghw_type_t hw
)
370 * For shared caches, also load balance across them to
371 * maximize aggregate cache capacity
373 * On AMD family 0x15 CPUs, cores come in pairs called
374 * compute units, sharing the FPU and the I$ and L2
375 * caches. Use balancing and cache affinity.
380 return (CMT_BALANCE
|CMT_AFFINITY
);
382 return (CMT_NO_POLICY
);
387 pg_plat_get_core_id(cpu_t
*cpu
)
389 return ((id_t
)cpuid_get_coreid(cpu
));
393 cmp_set_nosteal_interval(void)
395 /* Set the nosteal interval (used by disp_getbest()) to 100us */
396 nosteal_nsec
= 100000UL;
400 * Routine to ensure initial callers to hrtime gets 0 as return
410 dummy_scalehrtime(hrtime_t
*ticks
)
414 dummy_unscalehrtime(hrtime_t nsecs
)
416 return ((uint64_t)nsecs
);
420 * Supports Deep C-State power saving idle loop.
423 cpu_idle_adaptive(void)
425 (*CPU
->cpu_m
.mcpu_idle_cpu
)();
429 * Function called by CPU idle notification framework to check whether CPU
430 * has been awakened. It will be called with interrupt disabled.
431 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
432 * notification framework.
436 cpu_idle_check_wakeup(void *arg
)
439 * Toggle interrupt flag to detect pending interrupts.
440 * If interrupt happened, do_interrupt() will notify CPU idle
441 * notification framework so no need to call cpu_idle_exit() here.
449 * Idle the present CPU until wakened via an interrupt
455 processorid_t cpu_sid
= cpup
->cpu_seqid
;
456 cpupart_t
*cp
= cpup
->cpu_part
;
460 * If this CPU is online, and there's multiple CPUs
461 * in the system, then we should notate our halting
462 * by adding ourselves to the partition's halted CPU
463 * bitmap. This allows other CPUs to find/awaken us when
464 * work becomes available.
466 if (cpup
->cpu_flags
& CPU_OFFLINE
|| ncpus
== 1)
470 * Add ourselves to the partition's halted CPUs bitmap
471 * and set our HALTED flag, if necessary.
473 * When a thread becomes runnable, it is placed on the queue
474 * and then the halted CPU bitmap is checked to determine who
475 * (if anyone) should be awakened. We therefore need to first
476 * add ourselves to the bitmap, and and then check if there
477 * is any work available. The order is important to prevent a race
478 * that can lead to work languishing on a run queue somewhere while
479 * this CPU remains halted.
481 * Either the producing CPU will see we're halted and will awaken us,
482 * or this CPU will see the work available in disp_anywork().
484 * Note that memory barriers after updating the HALTED flag
485 * are not necessary since an atomic operation (updating the bitset)
486 * immediately follows. On x86 the atomic operation acts as a
487 * memory barrier for the update of cpu_disp_flags.
490 cpup
->cpu_disp_flags
|= CPU_DISP_HALTED
;
491 bitset_atomic_add(&cp
->cp_haltset
, cpu_sid
);
495 * Check to make sure there's really nothing to do.
496 * Work destined for this CPU may become available after
497 * this check. We'll be notified through the clearing of our
498 * bit in the halted CPU bitmap, and a poke.
500 if (disp_anywork()) {
502 cpup
->cpu_disp_flags
&= ~CPU_DISP_HALTED
;
503 bitset_atomic_del(&cp
->cp_haltset
, cpu_sid
);
509 * We're on our way to being halted.
511 * Disable interrupts now, so that we'll awaken immediately
512 * after halting if someone tries to poke us between now and
513 * the time we actually halt.
515 * We check for the presence of our bit after disabling interrupts.
516 * If it's cleared, we'll return. If the bit is cleared after
517 * we check then the poke will pop us out of the halted state.
519 * This means that the ordering of the poke and the clearing
520 * of the bit by cpu_wakeup is important.
521 * cpu_wakeup() must clear, then poke.
522 * cpu_idle() must disable interrupts, then check for the bit.
526 if (hset_update
&& bitset_in_set(&cp
->cp_haltset
, cpu_sid
) == 0) {
527 cpup
->cpu_disp_flags
&= ~CPU_DISP_HALTED
;
533 * The check for anything locally runnable is here for performance
534 * and isn't needed for correctness. disp_nrunnable ought to be
535 * in our cache still, so it's inexpensive to check, and if there
536 * is anything runnable we won't have to wait for the poke.
538 if (cpup
->cpu_disp
->disp_nrunnable
!= 0) {
540 cpup
->cpu_disp_flags
&= ~CPU_DISP_HALTED
;
541 bitset_atomic_del(&cp
->cp_haltset
, cpu_sid
);
547 if (cpu_idle_enter(IDLE_STATE_C1
, 0,
548 cpu_idle_check_wakeup
, NULL
) == 0) {
550 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE
);
554 * We're no longer halted
557 cpup
->cpu_disp_flags
&= ~CPU_DISP_HALTED
;
558 bitset_atomic_del(&cp
->cp_haltset
, cpu_sid
);
564 * If "cpu" is halted, then wake it up clearing its halted bit in advance.
565 * Otherwise, see if other CPUs in the cpu partition are halted and need to
566 * be woken up so that they can steal the thread we placed on this CPU.
567 * This function is only used on MP systems.
570 cpu_wakeup(cpu_t
*cpu
, int bound
)
573 processorid_t cpu_sid
;
577 cpu_sid
= cpu
->cpu_seqid
;
578 if (bitset_in_set(&cp
->cp_haltset
, cpu_sid
)) {
580 * Clear the halted bit for that CPU since it will be
583 bitset_atomic_del(&cp
->cp_haltset
, cpu_sid
);
585 * We may find the current CPU present in the halted cpuset
586 * if we're in the context of an interrupt that occurred
587 * before we had a chance to clear our bit in cpu_idle().
588 * Poking ourself is obviously unnecessary, since if
589 * we're here, we're not halted.
592 poke_cpu(cpu
->cpu_id
);
596 * This cpu isn't halted, but it's idle or undergoing a
597 * context switch. No need to awaken anyone else.
599 if (cpu
->cpu_thread
== cpu
->cpu_idle_thread
||
600 cpu
->cpu_disp_flags
& CPU_DISP_DONTSTEAL
)
605 * No need to wake up other CPUs if this is for a bound thread.
611 * The CPU specified for wakeup isn't currently halted, so check
612 * to see if there are any other halted CPUs in the partition,
613 * and if there are then awaken one.
616 cpu_found
= bitset_find(&cp
->cp_haltset
);
617 if (cpu_found
== (uint_t
)-1)
619 } while (bitset_atomic_test_and_del(&cp
->cp_haltset
, cpu_found
) < 0);
621 if (cpu_found
!= CPU
->cpu_seqid
) {
622 poke_cpu(cpu_seq
[cpu_found
]->cpu_id
);
628 * Function called by CPU idle notification framework to check whether CPU
629 * has been awakened. It will be called with interrupt disabled.
630 * If CPU has been awakened, call cpu_idle_exit() to notify CPU idle
631 * notification framework.
634 cpu_idle_mwait_check_wakeup(void *arg
)
636 volatile uint32_t *mcpu_mwait
= (volatile uint32_t *)arg
;
639 if (*mcpu_mwait
!= MWAIT_HALTED
) {
641 * CPU has been awakened, notify CPU idle notification system.
643 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE
);
646 * Toggle interrupt flag to detect pending interrupts.
647 * If interrupt happened, do_interrupt() will notify CPU idle
648 * notification framework so no need to call cpu_idle_exit()
658 * Idle the present CPU until awakened via touching its monitored line
663 volatile uint32_t *mcpu_mwait
= CPU
->cpu_m
.mcpu_mwait
;
665 processorid_t cpu_sid
= cpup
->cpu_seqid
;
666 cpupart_t
*cp
= cpup
->cpu_part
;
670 * Set our mcpu_mwait here, so we can tell if anyone tries to
671 * wake us between now and when we call mwait. No other cpu will
672 * attempt to set our mcpu_mwait until we add ourself to the halted
675 *mcpu_mwait
= MWAIT_HALTED
;
678 * If this CPU is online, and there's multiple CPUs
679 * in the system, then we should note our halting
680 * by adding ourselves to the partition's halted CPU
681 * bitmap. This allows other CPUs to find/awaken us when
682 * work becomes available.
684 if (cpup
->cpu_flags
& CPU_OFFLINE
|| ncpus
== 1)
688 * Add ourselves to the partition's halted CPUs bitmap
689 * and set our HALTED flag, if necessary.
691 * When a thread becomes runnable, it is placed on the queue
692 * and then the halted CPU bitmap is checked to determine who
693 * (if anyone) should be awakened. We therefore need to first
694 * add ourselves to the bitmap, and and then check if there
695 * is any work available.
697 * Note that memory barriers after updating the HALTED flag
698 * are not necessary since an atomic operation (updating the bitmap)
699 * immediately follows. On x86 the atomic operation acts as a
700 * memory barrier for the update of cpu_disp_flags.
703 cpup
->cpu_disp_flags
|= CPU_DISP_HALTED
;
704 bitset_atomic_add(&cp
->cp_haltset
, cpu_sid
);
708 * Check to make sure there's really nothing to do.
709 * Work destined for this CPU may become available after
710 * this check. We'll be notified through the clearing of our
711 * bit in the halted CPU bitmap, and a write to our mcpu_mwait.
713 * disp_anywork() checks disp_nrunnable, so we do not have to later.
715 if (disp_anywork()) {
717 cpup
->cpu_disp_flags
&= ~CPU_DISP_HALTED
;
718 bitset_atomic_del(&cp
->cp_haltset
, cpu_sid
);
724 * We're on our way to being halted.
725 * To avoid a lost wakeup, arm the monitor before checking if another
726 * cpu wrote to mcpu_mwait to wake us up.
728 i86_monitor(mcpu_mwait
, 0, 0);
729 if (*mcpu_mwait
== MWAIT_HALTED
) {
730 if (cpu_idle_enter(IDLE_STATE_C1
, 0,
731 cpu_idle_mwait_check_wakeup
, (void *)mcpu_mwait
) == 0) {
732 if (*mcpu_mwait
== MWAIT_HALTED
) {
735 cpu_idle_exit(CPU_IDLE_CB_FLAG_IDLE
);
740 * We're no longer halted
743 cpup
->cpu_disp_flags
&= ~CPU_DISP_HALTED
;
744 bitset_atomic_del(&cp
->cp_haltset
, cpu_sid
);
749 * If "cpu" is halted in mwait, then wake it up clearing its halted bit in
750 * advance. Otherwise, see if other CPUs in the cpu partition are halted and
751 * need to be woken up so that they can steal the thread we placed on this CPU.
752 * This function is only used on MP systems.
755 cpu_wakeup_mwait(cpu_t
*cp
, int bound
)
759 processorid_t cpu_sid
;
761 cpu_part
= cp
->cpu_part
;
762 cpu_sid
= cp
->cpu_seqid
;
765 * Clear the halted bit for that CPU since it will be woken up
768 if (bitset_in_set(&cpu_part
->cp_haltset
, cpu_sid
)) {
770 * Clear the halted bit for that CPU since it will be
773 bitset_atomic_del(&cpu_part
->cp_haltset
, cpu_sid
);
775 * We may find the current CPU present in the halted cpuset
776 * if we're in the context of an interrupt that occurred
777 * before we had a chance to clear our bit in cpu_idle().
778 * Waking ourself is obviously unnecessary, since if
779 * we're here, we're not halted.
781 * monitor/mwait wakeup via writing to our cache line is
782 * harmless and less expensive than always checking if we
783 * are waking ourself which is an uncommon case.
785 MWAIT_WAKEUP(cp
); /* write to monitored line */
789 * This cpu isn't halted, but it's idle or undergoing a
790 * context switch. No need to awaken anyone else.
792 if (cp
->cpu_thread
== cp
->cpu_idle_thread
||
793 cp
->cpu_disp_flags
& CPU_DISP_DONTSTEAL
)
798 * No need to wake up other CPUs if the thread we just enqueued
801 if (bound
|| ncpus
== 1)
805 * See if there's any other halted CPUs. If there are, then
806 * select one, and awaken it.
807 * It's possible that after we find a CPU, somebody else
808 * will awaken it before we get the chance.
809 * In that case, look again.
812 cpu_found
= bitset_find(&cpu_part
->cp_haltset
);
813 if (cpu_found
== (uint_t
)-1)
815 } while (bitset_atomic_test_and_del(&cpu_part
->cp_haltset
,
819 * Do not check if cpu_found is ourself as monitor/mwait
822 MWAIT_WAKEUP(cpu_seq
[cpu_found
]); /* write to monitored line */
827 void (*cpu_pause_handler
)(volatile char *) = NULL
;
830 mp_disable_intr(int cpun
)
833 * switch to the offline cpu
837 * raise ipl to just below cross call
839 splx(XC_SYS_PIL
- 1);
841 * set base spl to prevent the next swtch to idle from
842 * lowering back to ipl 0
844 CPU
->cpu_intr_actv
|= (1 << (XC_SYS_PIL
- 1));
847 return (DDI_SUCCESS
);
851 mp_enable_intr(int cpun
)
854 * switch to the online cpu
858 * clear the interrupt active mask
860 CPU
->cpu_intr_actv
&= ~(1 << (XC_SYS_PIL
- 1));
867 mach_get_platform(int owner
)
875 srv_opsp
= (void **)mach_set
[0];
876 clt_opsp
= (void **)mach_set
[owner
];
877 if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01
)
878 total_ops
= sizeof (struct psm_ops_ver01
) /
879 sizeof (void (*)(void));
880 else if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01_1
)
881 /* no psm_notify_func */
882 total_ops
= OFFSETOF(struct psm_ops
, psm_notify_func
) /
883 sizeof (void (*)(void));
884 else if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01_2
)
885 /* no psm_timer funcs */
886 total_ops
= OFFSETOF(struct psm_ops
, psm_timer_reprogram
) /
887 sizeof (void (*)(void));
888 else if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01_3
)
889 /* no psm_preshutdown function */
890 total_ops
= OFFSETOF(struct psm_ops
, psm_preshutdown
) /
891 sizeof (void (*)(void));
892 else if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01_4
)
893 /* no psm_intr_ops function */
894 total_ops
= OFFSETOF(struct psm_ops
, psm_intr_ops
) /
895 sizeof (void (*)(void));
896 else if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01_5
)
897 /* no psm_state function */
898 total_ops
= OFFSETOF(struct psm_ops
, psm_state
) /
899 sizeof (void (*)(void));
900 else if (mach_ver
[owner
] == (ushort_t
)PSM_INFO_VER01_6
)
901 /* no psm_cpu_ops function */
902 total_ops
= OFFSETOF(struct psm_ops
, psm_cpu_ops
) /
903 sizeof (void (*)(void));
905 total_ops
= sizeof (struct psm_ops
) / sizeof (void (*)(void));
908 * Save the version of the PSM module, in case we need to
909 * behave differently based on version.
911 mach_ver
[0] = mach_ver
[owner
];
913 for (i
= 0; i
< total_ops
; i
++)
914 if (clt_opsp
[i
] != NULL
)
915 srv_opsp
[i
] = clt_opsp
[i
];
919 mach_construct_info()
922 int mach_cnt
[PSM_OWN_OVERRIDE
+1] = {0};
923 int conflict_owner
= 0;
925 if (psmsw
->psw_forw
== psmsw
)
926 panic("No valid PSM modules found");
927 mutex_enter(&psmsw_lock
);
928 for (swp
= psmsw
->psw_forw
; swp
!= psmsw
; swp
= swp
->psw_forw
) {
929 if (!(swp
->psw_flag
& PSM_MOD_IDENTIFY
))
931 mach_set
[swp
->psw_infop
->p_owner
] = swp
->psw_infop
->p_ops
;
932 mach_ver
[swp
->psw_infop
->p_owner
] = swp
->psw_infop
->p_version
;
933 mach_cnt
[swp
->psw_infop
->p_owner
]++;
935 mutex_exit(&psmsw_lock
);
937 mach_get_platform(PSM_OWN_SYS_DEFAULT
);
939 /* check to see are there any conflicts */
940 if (mach_cnt
[PSM_OWN_EXCLUSIVE
] > 1)
941 conflict_owner
= PSM_OWN_EXCLUSIVE
;
942 if (mach_cnt
[PSM_OWN_OVERRIDE
] > 1)
943 conflict_owner
= PSM_OWN_OVERRIDE
;
944 if (conflict_owner
) {
945 /* remove all psm modules except uppc */
947 "Conflicts detected on the following PSM modules:");
948 mutex_enter(&psmsw_lock
);
949 for (swp
= psmsw
->psw_forw
; swp
!= psmsw
; swp
= swp
->psw_forw
) {
950 if (swp
->psw_infop
->p_owner
== conflict_owner
)
951 cmn_err(CE_WARN
, "%s ",
952 swp
->psw_infop
->p_mach_idstring
);
954 mutex_exit(&psmsw_lock
);
956 "Setting the system back to SINGLE processor mode!");
958 "Please edit /etc/mach to remove the invalid PSM module.");
962 if (mach_set
[PSM_OWN_EXCLUSIVE
])
963 mach_get_platform(PSM_OWN_EXCLUSIVE
);
965 if (mach_set
[PSM_OWN_OVERRIDE
])
966 mach_get_platform(PSM_OWN_OVERRIDE
);
972 struct psm_ops
*pops
;
974 mach_construct_info();
978 /* register the interrupt and clock initialization rotuines */
979 picinitf
= mach_picinit
;
980 clkinitf
= mach_clkinit
;
981 psm_get_clockirq
= pops
->psm_get_clockirq
;
983 /* register the interrupt setup code */
984 slvltovect
= mach_softlvl_to_vect
;
985 addspl
= pops
->psm_addspl
;
986 delspl
= pops
->psm_delspl
;
988 if (pops
->psm_translate_irq
)
989 psm_translate_irq
= pops
->psm_translate_irq
;
990 if (pops
->psm_intr_ops
)
991 psm_intr_ops
= pops
->psm_intr_ops
;
993 #if defined(PSMI_1_2) || defined(PSMI_1_3) || defined(PSMI_1_4)
995 * Time-of-day functionality now handled in TOD modules.
996 * (Warn about PSM modules that think that we're going to use
997 * their ops vectors.)
999 if (pops
->psm_tod_get
)
1000 cmn_err(CE_WARN
, "obsolete psm_tod_get op %p",
1001 (void *)pops
->psm_tod_get
);
1003 if (pops
->psm_tod_set
)
1004 cmn_err(CE_WARN
, "obsolete psm_tod_set op %p",
1005 (void *)pops
->psm_tod_set
);
1008 if (pops
->psm_notify_error
) {
1009 psm_notify_error
= mach_notify_error
;
1010 notify_error
= pops
->psm_notify_error
;
1013 (*pops
->psm_softinit
)();
1016 * Initialize the dispatcher's function hooks to enable CPU halting
1017 * when idle. Set both the deep-idle and non-deep-idle hooks.
1019 * Assume we can use power saving deep-idle loop cpu_idle_adaptive.
1020 * Platform deep-idle driver will reset our idle loop to
1021 * non_deep_idle_cpu if power saving deep-idle feature is not available.
1023 * Do not use monitor/mwait if idle_cpu_use_hlt is not set(spin idle)
1024 * or idle_cpu_prefer_mwait is not set.
1025 * Allocate monitor/mwait buffer for cpu0.
1028 non_deep_idle_disp_enq_thread
= disp_enq_thread
;
1030 if (idle_cpu_use_hlt
) {
1031 idle_cpu
= cpu_idle_adaptive
;
1032 CPU
->cpu_m
.mcpu_idle_cpu
= cpu_idle
;
1034 if (is_x86_feature(x86_featureset
, X86FSET_MWAIT
) &&
1035 idle_cpu_prefer_mwait
) {
1036 CPU
->cpu_m
.mcpu_mwait
= cpuid_mwait_alloc(CPU
);
1038 * Protect ourself from insane mwait size.
1040 if (CPU
->cpu_m
.mcpu_mwait
== NULL
) {
1042 cmn_err(CE_NOTE
, "Using hlt idle. Cannot "
1043 "handle cpu 0 mwait size.");
1045 idle_cpu_prefer_mwait
= 0;
1046 CPU
->cpu_m
.mcpu_idle_cpu
= cpu_idle
;
1048 CPU
->cpu_m
.mcpu_idle_cpu
= cpu_idle_mwait
;
1051 CPU
->cpu_m
.mcpu_idle_cpu
= cpu_idle
;
1053 non_deep_idle_cpu
= CPU
->cpu_m
.mcpu_idle_cpu
;
1056 * Disable power saving deep idle loop?
1058 if (idle_cpu_no_deep_c
) {
1059 idle_cpu
= non_deep_idle_cpu
;
1070 struct psm_ops
*pops
;
1071 processorid_t cpu_id
;
1076 CPUSET_ZERO(cpumask
);
1079 cpu_id
= (*pops
->psm_get_next_processorid
)(cpu_id
);
1081 * Only add boot_ncpus CPUs to mp_cpus. Other CPUs will be handled
1082 * by CPU DR driver at runtime.
1084 for (cnt
= 0; cpu_id
!= -1 && cnt
< boot_ncpus
; cnt
++) {
1085 CPUSET_ADD(cpumask
, cpu_id
);
1086 cpu_id
= (*pops
->psm_get_next_processorid
)(cpu_id
);
1091 /* MP related routines */
1092 ap_mlsetup
= pops
->psm_post_cpu_start
;
1093 send_dirintf
= pops
->psm_send_ipi
;
1095 /* optional MP related routines */
1096 if (pops
->psm_shutdown
)
1097 psm_shutdownf
= pops
->psm_shutdown
;
1098 if (pops
->psm_preshutdown
)
1099 psm_preshutdownf
= pops
->psm_preshutdown
;
1100 if (pops
->psm_notify_func
)
1101 psm_notifyf
= pops
->psm_notify_func
;
1102 if (pops
->psm_set_idlecpu
)
1103 psm_set_idle_cpuf
= pops
->psm_set_idlecpu
;
1104 if (pops
->psm_unset_idlecpu
)
1105 psm_unset_idle_cpuf
= pops
->psm_unset_idlecpu
;
1107 psm_clkinit
= pops
->psm_clkinit
;
1109 if (pops
->psm_timer_reprogram
)
1110 psm_timer_reprogram
= pops
->psm_timer_reprogram
;
1112 if (pops
->psm_timer_enable
)
1113 psm_timer_enable
= pops
->psm_timer_enable
;
1115 if (pops
->psm_timer_disable
)
1116 psm_timer_disable
= pops
->psm_timer_disable
;
1118 if (pops
->psm_post_cyclic_setup
)
1119 psm_post_cyclic_setup
= pops
->psm_post_cyclic_setup
;
1121 if (pops
->psm_state
)
1122 psm_state
= pops
->psm_state
;
1125 * Set these vectors here so they can be used by Suspend/Resume
1128 if (pops
->psm_disable_intr
)
1129 psm_disable_intr
= pops
->psm_disable_intr
;
1130 if (pops
->psm_enable_intr
)
1131 psm_enable_intr
= pops
->psm_enable_intr
;
1133 /* check for multiple CPUs */
1134 if (cnt
< 2 && plat_dr_support_cpu() == B_FALSE
)
1137 /* check for MP platforms */
1138 if (pops
->psm_cpu_start
== NULL
)
1142 * Set the dispatcher hook to enable cpu "wake up"
1143 * when a thread becomes runnable.
1145 if (idle_cpu_use_hlt
) {
1146 disp_enq_thread
= cpu_wakeup
;
1148 if (is_x86_feature(x86_featureset
, X86FSET_MWAIT
) &&
1149 idle_cpu_prefer_mwait
)
1150 disp_enq_thread
= cpu_wakeup_mwait
;
1151 non_deep_idle_disp_enq_thread
= disp_enq_thread
;
1155 psm_get_ipivect
= pops
->psm_get_ipivect
;
1157 (void) add_avintr((void *)NULL
, XC_HI_PIL
, xc_serv
, "xc_intr",
1158 (*pops
->psm_get_ipivect
)(XC_HI_PIL
, PSM_INTR_IPI_HI
),
1159 NULL
, NULL
, NULL
, NULL
);
1161 (void) (*pops
->psm_get_ipivect
)(XC_CPUPOKE_PIL
, PSM_INTR_POKE
);
1167 struct psm_ops
*pops
;
1171 /* register the interrupt handlers */
1172 setlvl
= pops
->psm_intr_enter
;
1173 setlvlx
= pops
->psm_intr_exit
;
1175 /* initialize the interrupt hardware */
1176 (*pops
->psm_picinit
)();
1178 /* set interrupt mask for current ipl */
1179 setspl
= pops
->psm_setspl
;
1181 setspl(CPU
->cpu_pri
);
1184 uint_t cpu_freq
; /* MHz */
1185 uint64_t cpu_freq_hz
; /* measured (in hertz) */
1187 #define MEGA_HZ 1000000
1191 int xpv_cpufreq_workaround
= 1;
1192 int xpv_cpufreq_verbose
= 0;
1197 mach_calchz(uint32_t pit_counter
, uint64_t *processor_clks
)
1201 if ((pit_counter
== 0) || (*processor_clks
== 0) ||
1202 (*processor_clks
> (((uint64_t)-1) / PIT_HZ
)))
1205 cpu_hz
= ((uint64_t)PIT_HZ
* *processor_clks
) / pit_counter
;
1213 mach_getcpufreq(void)
1216 vcpu_time_info_t
*vti
= &CPU
->cpu_m
.mcpu_vcpu_info
->time
;
1220 * During dom0 bringup, it was noted that on at least one older
1221 * Intel HT machine, the hypervisor initially gives a tsc_to_system_mul
1222 * value that is quite wrong (the 3.06GHz clock was reported
1225 * The curious thing is, that if you stop the kernel at entry,
1226 * breakpoint here and inspect the value with kmdb, the value
1227 * is correct - but if you don't stop and simply enable the
1228 * printf statement (below), you can see the bad value printed
1229 * here. Almost as if something kmdb did caused the hypervisor to
1230 * figure it out correctly. And, note that the hypervisor
1231 * eventually -does- figure it out correctly ... if you look at
1232 * the field later in the life of dom0, it is correct.
1234 * For now, on dom0, we employ a slightly cheesy workaround of
1235 * using the DOM0_PHYSINFO hypercall.
1237 if (DOMAIN_IS_INITDOMAIN(xen_info
) && xpv_cpufreq_workaround
) {
1238 cpu_hz
= 1000 * xpv_cpu_khz();
1240 cpu_hz
= (UINT64_C(1000000000) << 32) / vti
->tsc_to_system_mul
;
1242 if (vti
->tsc_shift
< 0)
1243 cpu_hz
<<= -vti
->tsc_shift
;
1245 cpu_hz
>>= vti
->tsc_shift
;
1248 if (xpv_cpufreq_verbose
)
1249 printf("mach_getcpufreq: system_mul 0x%x, shift %d, "
1250 "cpu_hz %" PRId64
"Hz\n",
1251 vti
->tsc_to_system_mul
, vti
->tsc_shift
, cpu_hz
);
1255 uint32_t pit_counter
;
1256 uint64_t processor_clks
;
1258 if (is_x86_feature(x86_featureset
, X86FSET_TSC
)) {
1260 * We have a TSC. freq_tsc() knows how to measure the number
1261 * of clock cycles sampled against the PIT.
1263 ulong_t flags
= clear_int_flag();
1264 processor_clks
= freq_tsc(&pit_counter
);
1265 restore_int_flag(flags
);
1266 return (mach_calchz(pit_counter
, &processor_clks
));
1267 } else if (x86_vendor
== X86_VENDOR_Cyrix
|| x86_type
== X86_TYPE_P5
) {
1268 #if defined(__amd64)
1269 panic("mach_getcpufreq: no TSC!");
1270 #elif defined(__i386)
1272 * We are a Cyrix based on a 6x86 core or an Intel Pentium
1273 * for which freq_notsc() knows how to measure the number of
1274 * elapsed clock cycles sampled against the PIT
1276 ulong_t flags
= clear_int_flag();
1277 processor_clks
= freq_notsc(&pit_counter
);
1278 restore_int_flag(flags
);
1279 return (mach_calchz(pit_counter
, &processor_clks
));
1283 /* We do not know how to calculate cpu frequency for this cpu. */
1289 * If the clock speed of a cpu is found to be reported incorrectly, do not add
1290 * to this array, instead improve the accuracy of the algorithm that determines
1291 * the clock speed of the processor or extend the implementation to support the
1292 * vendor as appropriate. This is here only to support adjusting the speed on
1293 * older slower processors that mach_fixcpufreq() would not be able to account
1296 static int x86_cpu_freq
[] = { 60, 75, 80, 90, 120, 160, 166, 175, 180, 233 };
1299 * On fast processors the clock frequency that is measured may be off by
1300 * a few MHz from the value printed on the part. This is a combination of
1301 * the factors that for such fast parts being off by this much is within
1302 * the tolerances for manufacture and because of the difficulties in the
1303 * measurement that can lead to small error. This function uses some
1304 * heuristics in order to tweak the value that was measured to match what
1305 * is most likely printed on the part.
1308 * AMD Athlon 1000 mhz measured as 998 mhz
1309 * Intel Pentium III Xeon 733 mhz measured as 731 mhz
1310 * Intel Pentium IV 1500 mhz measured as 1495mhz
1312 * If in the future this function is no longer sufficient to correct
1313 * for the error in the measurement, then the algorithm used to perform
1314 * the measurement will have to be improved in order to increase accuracy
1315 * rather than adding horrible and questionable kludges here.
1317 * This is called after the cyclics subsystem because of the potential
1318 * that the heuristics within may give a worse estimate of the clock
1319 * frequency than the value that was measured.
1322 mach_fixcpufreq(void)
1324 uint32_t freq
, mul
, near66
, delta66
, near50
, delta50
, fixed
, delta
, i
;
1326 freq
= (uint32_t)cpu_freq
;
1329 * Find the nearest integer multiple of 200/3 (about 66) MHz to the
1330 * measured speed taking into account that the 667 MHz parts were
1331 * the first to round-up.
1333 mul
= (uint32_t)((3 * (uint64_t)freq
+ 100) / 200);
1334 near66
= (uint32_t)((200 * (uint64_t)mul
+ ((mul
>= 10) ? 1 : 0)) / 3);
1335 delta66
= (near66
> freq
) ? (near66
- freq
) : (freq
- near66
);
1337 /* Find the nearest integer multiple of 50 MHz to the measured speed */
1338 mul
= (freq
+ 25) / 50;
1340 delta50
= (near50
> freq
) ? (near50
- freq
) : (freq
- near50
);
1342 /* Find the closer of the two */
1343 if (delta66
< delta50
) {
1351 if (fixed
> INT_MAX
)
1355 * Some older parts have a core clock frequency that is not an
1356 * integral multiple of 50 or 66 MHz. Check if one of the old
1357 * clock frequencies is closer to the measured value than any
1358 * of the integral multiples of 50 an 66, and if so set fixed
1359 * and delta appropriately to represent the closest value.
1361 i
= sizeof (x86_cpu_freq
) / sizeof (int);
1365 if (x86_cpu_freq
[i
] <= freq
) {
1366 mul
= freq
- x86_cpu_freq
[i
];
1369 fixed
= x86_cpu_freq
[i
];
1376 mul
= x86_cpu_freq
[i
] - freq
;
1379 fixed
= x86_cpu_freq
[i
];
1385 * Set a reasonable maximum for how much to correct the measured
1386 * result by. This check is here to prevent the adjustment made
1387 * by this function from being more harm than good. It is entirely
1388 * possible that in the future parts will be made that are not
1389 * integral multiples of 66 or 50 in clock frequency or that
1390 * someone may overclock a part to some odd frequency. If the
1391 * measured value is farther from the corrected value than
1392 * allowed, then assume the corrected value is in error and use
1393 * the measured value.
1398 cpu_freq
= (int)fixed
;
1403 machhztomhz(uint64_t cpu_freq_hz
)
1407 /* Round to nearest MHZ */
1408 cpu_mhz
= (cpu_freq_hz
+ (MEGA_HZ
/ 2)) / MEGA_HZ
;
1410 if (cpu_mhz
> INT_MAX
)
1413 return ((int)cpu_mhz
);
1419 mach_clkinit(int preferred_mode
, int *set_mode
)
1421 struct psm_ops
*pops
;
1426 cpu_freq_hz
= mach_getcpufreq();
1428 cpu_freq
= machhztomhz(cpu_freq_hz
);
1430 if (!is_x86_feature(x86_featureset
, X86FSET_TSC
) || (cpu_freq
== 0))
1431 tsc_gethrtime_enable
= 0;
1434 if (tsc_gethrtime_enable
) {
1435 tsc_hrtimeinit(cpu_freq_hz
);
1439 if (pops
->psm_hrtimeinit
)
1440 (*pops
->psm_hrtimeinit
)();
1441 gethrtimef
= pops
->psm_gethrtime
;
1442 gethrtimeunscaledf
= gethrtimef
;
1443 /* scalehrtimef will remain dummy */
1448 if (mach_ver
[0] >= PSM_INFO_VER01_3
) {
1449 if (preferred_mode
== TIMER_ONESHOT
) {
1451 resolution
= (*pops
->psm_clkinit
)(0);
1452 if (resolution
!= 0) {
1453 *set_mode
= TIMER_ONESHOT
;
1454 return (resolution
);
1459 * either periodic mode was requested or could not set to
1462 resolution
= (*pops
->psm_clkinit
)(hz
);
1464 * psm should be able to do periodic, so we do not check
1465 * for return value of psm_clkinit here.
1467 *set_mode
= TIMER_PERIODIC
;
1468 return (resolution
);
1471 * PSMI interface prior to PSMI_3 does not define a return
1472 * value for psm_clkinit, so the return value is ignored.
1474 (void) (*pops
->psm_clkinit
)(hz
);
1475 *set_mode
= TIMER_PERIODIC
;
1476 return (nsec_per_tick
);
1483 mach_softlvl_to_vect(int ipl
)
1485 setsoftint
= av_set_softint_pending
;
1486 kdisetsoftint
= kdi_av_set_softint_pending
;
1488 return (PSM_SV_SOFTWARE
);
1493 * This is here to allow us to simulate cpus that refuse to start.
1495 cpuset_t cpufailset
;
1499 mach_cpu_start(struct cpu
*cp
, void *ctx
)
1501 struct psm_ops
*pops
= mach_set
[0];
1502 processorid_t id
= cp
->cpu_id
;
1505 if (CPU_IN_SET(cpufailset
, id
))
1508 return ((*pops
->psm_cpu_start
)(id
, ctx
));
1512 mach_cpuid_start(processorid_t id
, void *ctx
)
1514 struct psm_ops
*pops
= mach_set
[0];
1517 if (CPU_IN_SET(cpufailset
, id
))
1520 return ((*pops
->psm_cpu_start
)(id
, ctx
));
1524 mach_cpu_stop(cpu_t
*cp
, void *ctx
)
1526 struct psm_ops
*pops
= mach_set
[0];
1527 psm_cpu_request_t request
;
1529 if (pops
->psm_cpu_ops
== NULL
) {
1533 ASSERT(cp
->cpu_id
!= -1);
1534 request
.pcr_cmd
= PSM_CPU_STOP
;
1535 request
.req
.cpu_stop
.cpuid
= cp
->cpu_id
;
1536 request
.req
.cpu_stop
.ctx
= ctx
;
1538 return ((*pops
->psm_cpu_ops
)(&request
));
1542 mach_cpu_add(mach_cpu_add_arg_t
*argp
, processorid_t
*cpuidp
)
1545 struct psm_ops
*pops
= mach_set
[0];
1546 psm_cpu_request_t request
;
1548 if (pops
->psm_cpu_ops
== NULL
) {
1552 request
.pcr_cmd
= PSM_CPU_ADD
;
1553 request
.req
.cpu_add
.argp
= argp
;
1554 request
.req
.cpu_add
.cpuid
= -1;
1555 rc
= (*pops
->psm_cpu_ops
)(&request
);
1557 ASSERT(request
.req
.cpu_add
.cpuid
!= -1);
1558 *cpuidp
= request
.req
.cpu_add
.cpuid
;
1565 mach_cpu_remove(processorid_t cpuid
)
1567 struct psm_ops
*pops
= mach_set
[0];
1568 psm_cpu_request_t request
;
1570 if (pops
->psm_cpu_ops
== NULL
) {
1574 request
.pcr_cmd
= PSM_CPU_REMOVE
;
1575 request
.req
.cpu_remove
.cpuid
= cpuid
;
1577 return ((*pops
->psm_cpu_ops
)(&request
));
1581 * Default handler to create device node for CPU.
1582 * One reference count will be held on created device node.
1585 mach_cpu_create_devinfo(cpu_t
*cp
, dev_info_t
**dipp
)
1589 static kmutex_t cpu_node_lock
;
1590 static dev_info_t
*cpu_nex_devi
= NULL
;
1593 ASSERT(dipp
!= NULL
);
1596 if (cpu_nex_devi
== NULL
) {
1597 mutex_enter(&cpu_node_lock
);
1598 /* First check whether cpus exists. */
1599 cpu_nex_devi
= ddi_find_devinfo("cpus", -1, 0);
1600 /* Create cpus if it doesn't exist. */
1601 if (cpu_nex_devi
== NULL
) {
1602 ndi_devi_enter(ddi_root_node(), &circ
);
1603 rv
= ndi_devi_alloc(ddi_root_node(), "cpus",
1604 (pnode_t
)DEVI_SID_NODEID
, &dip
);
1605 if (rv
!= NDI_SUCCESS
) {
1606 mutex_exit(&cpu_node_lock
);
1608 "?failed to create cpu nexus device.\n");
1609 return (PSM_FAILURE
);
1611 ASSERT(dip
!= NULL
);
1612 (void) ndi_devi_online(dip
, 0);
1613 ndi_devi_exit(ddi_root_node(), circ
);
1616 mutex_exit(&cpu_node_lock
);
1620 * create a child node for cpu identified as 'cpu_id'
1622 ndi_devi_enter(cpu_nex_devi
, &circ
);
1623 dip
= ddi_add_child(cpu_nex_devi
, "cpu", DEVI_SID_NODEID
, -1);
1626 "?failed to create device node for cpu%d.\n", cp
->cpu_id
);
1630 (void) ndi_hold_devi(dip
);
1633 ndi_devi_exit(cpu_nex_devi
, circ
);
1639 * Create cpu device node in device tree and online it.
1640 * Return created dip with reference count held if requested.
1643 mach_cpu_create_device_node(struct cpu
*cp
, dev_info_t
**dipp
)
1646 dev_info_t
*dip
= NULL
;
1648 ASSERT(psm_cpu_create_devinfo
!= NULL
);
1649 rv
= psm_cpu_create_devinfo(cp
, &dip
);
1650 if (rv
== PSM_SUCCESS
) {
1651 cpuid_set_cpu_properties(dip
, cp
->cpu_id
, cp
->cpu_m
.mcpu_cpi
);
1652 /* Recursively attach driver for parent nexus device. */
1653 if (i_ddi_attach_node_hierarchy(ddi_get_parent(dip
)) ==
1655 /* Configure cpu itself and descendants. */
1656 (void) ndi_devi_online(dip
,
1657 NDI_ONLINE_ATTACH
| NDI_CONFIG
);
1662 (void) ndi_rele_devi(dip
);
1670 * The dipp contains one of following values on return:
1671 * - NULL if no device node found
1672 * - pointer to device node if found
1675 mach_cpu_get_device_node(struct cpu
*cp
, dev_info_t
**dipp
)
1678 if (psm_cpu_get_devinfo
!= NULL
) {
1679 if (psm_cpu_get_devinfo(cp
, dipp
) == PSM_SUCCESS
) {
1680 return (PSM_SUCCESS
);
1684 return (PSM_FAILURE
);
1689 mach_translate_irq(dev_info_t
*dip
, int irqno
)
1691 return (irqno
); /* default to NO translation */
1695 mach_notify_error(int level
, char *errmsg
)
1698 * SL_FATAL is pass in once panicstr is set, deliver it
1699 * as CE_PANIC. Also, translate SL_ codes back to CE_
1700 * codes for the psmi handler
1702 if (level
& SL_FATAL
)
1703 (*notify_error
)(CE_PANIC
, errmsg
);
1704 else if (level
& SL_WARN
)
1705 (*notify_error
)(CE_WARN
, errmsg
);
1706 else if (level
& SL_NOTE
)
1707 (*notify_error
)(CE_NOTE
, errmsg
);
1708 else if (level
& SL_CONSOLE
)
1709 (*notify_error
)(CE_CONT
, errmsg
);
1713 * It provides the default basic intr_ops interface for the new DDI
1714 * interrupt framework if the PSM doesn't have one.
1717 * dip - pointer to the dev_info structure of the requested device
1718 * hdlp - pointer to the internal interrupt handle structure for the
1719 * requested interrupt
1720 * intr_op - opcode for this call
1721 * result - pointer to the integer that will hold the result to be
1722 * passed back if return value is PSM_SUCCESS
1725 * return value is either PSM_SUCCESS or PSM_FAILURE
1728 mach_intr_ops(dev_info_t
*dip
, ddi_intr_handle_impl_t
*hdlp
,
1729 psm_intr_op_t intr_op
, int *result
)
1731 struct intrspec
*ispec
;
1734 case PSM_INTR_OP_CHECK_MSI
:
1735 *result
= hdlp
->ih_type
& ~(DDI_INTR_TYPE_MSI
|
1736 DDI_INTR_TYPE_MSIX
);
1738 case PSM_INTR_OP_ALLOC_VECTORS
:
1739 if (hdlp
->ih_type
== DDI_INTR_TYPE_FIXED
)
1744 case PSM_INTR_OP_FREE_VECTORS
:
1746 case PSM_INTR_OP_NAVAIL_VECTORS
:
1747 if (hdlp
->ih_type
== DDI_INTR_TYPE_FIXED
)
1752 case PSM_INTR_OP_XLATE_VECTOR
:
1753 ispec
= ((ihdl_plat_t
*)hdlp
->ih_private
)->ip_ispecp
;
1754 *result
= psm_translate_irq(dip
, ispec
->intrspec_vec
);
1756 case PSM_INTR_OP_GET_CAP
:
1759 case PSM_INTR_OP_GET_PENDING
:
1760 case PSM_INTR_OP_CLEAR_MASK
:
1761 case PSM_INTR_OP_SET_MASK
:
1762 case PSM_INTR_OP_GET_SHARED
:
1763 case PSM_INTR_OP_SET_PRI
:
1764 case PSM_INTR_OP_SET_CAP
:
1765 case PSM_INTR_OP_SET_CPU
:
1766 case PSM_INTR_OP_GET_INTR
:
1768 return (PSM_FAILURE
);
1770 return (PSM_SUCCESS
);
1773 * Return 1 if CMT load balancing policies should be
1774 * implemented across instances of the specified hardware
1775 * sharing relationship.
1778 pg_cmt_load_bal_hw(pghw_type_t hw
)
1780 if (hw
== PGHW_IPIPE
||
1782 hw
== PGHW_PROCNODE
||
1789 * Return 1 if thread affinity polices should be implemented
1790 * for instances of the specifed hardware sharing relationship.
1793 pg_cmt_affinity_hw(pghw_type_t hw
)
1795 if (hw
== PGHW_CACHE
)
1802 * Return number of counter events requested to measure hardware capacity and
1803 * utilization and setup CPC requests for specified CPU as needed
1805 * May return 0 when platform or processor specific code knows that no CPC
1806 * events should be programmed on this CPU or -1 when platform or processor
1807 * specific code doesn't know which counter events are best to use and common
1808 * code should decide for itself
1811 /* LINTED E_FUNC_ARG_UNUSED */
1812 cu_plat_cpc_init(cpu_t
*cp
, kcpc_request_list_t
*reqs
, int nreqs
)
1814 const char *impl_name
;
1817 * Return error if pcbe_ops not set
1819 if (pcbe_ops
== NULL
)
1823 * Return that no CPC events should be programmed on hyperthreaded
1824 * Pentium 4 and return error for all other x86 processors to tell
1825 * common code to decide what counter events to program on those CPUs
1826 * for measuring hardware capacity and utilization
1828 impl_name
= pcbe_ops
->pcbe_impl_name();
1829 if (impl_name
!= NULL
&& strcmp(impl_name
, PCBE_IMPL_NAME_P4HT
) == 0)