4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2009, Intel Corporation.
23 * All rights reserved.
26 #include <sys/atomic.h>
27 #include <sys/cpuvar.h>
29 #include <sys/cpu_event.h>
30 #include <sys/cmn_err.h>
33 #include <sys/kstat.h>
35 #include <sys/sunddi.h>
36 #include <sys/sunndi.h>
37 #include <sys/synch.h>
38 #include <sys/sysmacros.h>
42 /* Current PM policy, configurable through /etc/system and fipe.conf. */
43 fipe_pm_policy_t fipe_pm_policy
= FIPE_PM_POLICY_BALANCE
;
44 int fipe_pm_throttle_level
= 1;
46 /* Enable kstat support. */
47 #define FIPE_KSTAT_SUPPORT 1
49 /* Enable performance relative statistics. */
50 #define FIPE_KSTAT_DETAIL 1
52 /* Enable builtin IOAT driver if no IOAT driver is available. */
53 #define FIPE_IOAT_BUILTIN 0
54 #if defined(FIPE_IOAT_BUILTIN) && (FIPE_IOAT_BUILTIN == 0)
55 #undef FIPE_IOAT_BUILTIN
58 #ifdef FIPE_IOAT_BUILTIN
59 /* Use IOAT channel 3 to generate memory transactions. */
60 #define FIPE_IOAT_CHAN_CTRL 0x200
61 #define FIPE_IOAT_CHAN_STS_LO 0x204
62 #define FIPE_IOAT_CHAN_STS_HI 0x208
63 #define FIPE_IOAT_CHAN_ADDR_LO 0x20C
64 #define FIPE_IOAT_CHAN_ADDR_HI 0x210
65 #define FIPE_IOAT_CHAN_CMD 0x214
66 #define FIPE_IOAT_CHAN_ERR 0x228
67 #else /* FIPE_IOAT_BUILTIN */
68 #include <sys/dcopy.h>
69 #endif /* FIPE_IOAT_BUILTIN */
71 /* Memory controller relative PCI configuration constants. */
72 #define FIPE_MC_GBLACT 0x60
73 #define FIPE_MC_THRTLOW 0x64
74 #define FIPE_MC_THRTCTRL 0x67
75 #define FIPE_MC_THRTCTRL_HUNT 0x1
77 /* Hardware recommended values. */
78 #define FIPE_MC_MEMORY_OFFSET 1024
79 #define FIPE_MC_MEMORY_SIZE 128
81 /* Number of IOAT commands posted when entering idle. */
82 #define FIPE_IOAT_CMD_NUM 2
84 /* Resource allocation retry interval in microsecond. */
85 #define FIPE_IOAT_RETRY_INTERVAL (15 * 1000 * 1000)
87 /* Statistics update interval in nanosecond. */
88 #define FIPE_STAT_INTERVAL (10 * 1000 * 1000)
90 /* Configuration profile support. */
91 #define FIPE_PROFILE_FIELD(field) (fipe_profile_curr->field)
92 #define FIPE_PROF_IDLE_COUNT FIPE_PROFILE_FIELD(idle_count)
93 #define FIPE_PROF_BUSY_THRESHOLD FIPE_PROFILE_FIELD(busy_threshold)
94 #define FIPE_PROF_INTR_THRESHOLD FIPE_PROFILE_FIELD(intr_threshold)
95 #define FIPE_PROF_INTR_BUSY_THRESHOLD FIPE_PROFILE_FIELD(intr_busy_threshold)
96 #define FIPE_PROF_INTR_BUSY_THROTTLE FIPE_PROFILE_FIELD(intr_busy_throttle)
98 /* Priority assigned to FIPE memory power management driver on x86. */
99 #define CPU_IDLE_CB_PRIO_FIPE (CPU_IDLE_CB_PRIO_LOW_BASE + 0x4000000)
101 /* Structure to support power management profile. */
102 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_profiles)
103 static struct fipe_profile
{
105 uint32_t busy_threshold
;
106 uint32_t intr_threshold
;
107 uint32_t intr_busy_threshold
;
108 uint32_t intr_busy_throttle
;
109 } fipe_profiles
[FIPE_PM_POLICY_MAX
] = {
111 { 5, 30, 20, 50, 5 },
112 { 10, 40, 40, 75, 4 },
113 { 15, 50, 60, 100, 2 },
116 /* Structure to store memory controller relative data. */
117 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_mc_ctrl)
118 static struct fipe_mc_ctrl
{
119 ddi_acc_handle_t mc_pci_hdl
;
120 unsigned char mc_thrtctrl
;
121 unsigned char mc_thrtlow
;
122 unsigned char mc_gblact
;
124 boolean_t mc_initialized
;
127 /* Structure to store IOAT relative information. */
128 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_ioat_ctrl)
129 static struct fipe_ioat_control
{
131 boolean_t ioat_ready
;
132 #ifdef FIPE_IOAT_BUILTIN
133 boolean_t ioat_reg_mapped
;
134 ddi_acc_handle_t ioat_reg_handle
;
135 uint8_t *ioat_reg_addr
;
136 uint64_t ioat_cmd_physaddr
;
137 #else /* FIPE_IOAT_BUILTIN */
138 dcopy_cmd_t ioat_cmds
[FIPE_IOAT_CMD_NUM
+ 1];
139 dcopy_handle_t ioat_handle
;
140 #endif /* FIPE_IOAT_BUILTIN */
141 dev_info_t
*ioat_dev_info
;
142 uint64_t ioat_buf_physaddr
;
143 char *ioat_buf_virtaddr
;
144 char *ioat_buf_start
;
145 size_t ioat_buf_size
;
146 timeout_id_t ioat_timerid
;
147 boolean_t ioat_failed
;
148 boolean_t ioat_cancel
;
149 boolean_t ioat_try_alloc
;
152 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_idle_ctrl)
153 static struct fipe_idle_ctrl
{
154 boolean_t idle_ready
;
155 cpu_idle_callback_handle_t cb_handle
;
156 cpu_idle_prop_handle_t prop_enter
;
157 cpu_idle_prop_handle_t prop_exit
;
158 cpu_idle_prop_handle_t prop_busy
;
159 cpu_idle_prop_handle_t prop_idle
;
160 cpu_idle_prop_handle_t prop_intr
;
162 /* Put here for cache efficiency, it should be in fipe_global_ctrl. */
163 hrtime_t tick_interval
;
167 * Global control structure.
168 * Solaris idle thread has no reentrance issue, so it's enough to count CPUs
169 * in idle state. Otherwise cpuset_t bitmap should be used to track idle CPUs.
171 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_gbl_ctrl)
172 static struct fipe_global_ctrl
{
174 boolean_t pm_enabled
;
175 volatile boolean_t pm_active
;
176 volatile uint32_t cpu_count
;
177 volatile uint64_t io_waiters
;
182 #ifdef FIPE_KSTAT_SUPPORT
184 #endif /* FIPE_KSTAT_SUPPORT */
187 #define FIPE_CPU_STATE_PAD (128 - \
188 2 * sizeof (boolean_t) - 4 * sizeof (hrtime_t) - \
189 2 * sizeof (uint64_t) - 2 * sizeof (uint32_t))
191 /* Per-CPU status. */
193 typedef struct fipe_cpu_state
{
194 boolean_t cond_ready
;
195 boolean_t state_ready
;
197 uint32_t throttle_cnt
;
198 hrtime_t throttle_ts
;
203 uint64_t last_iowait
;
204 char pad1
[FIPE_CPU_STATE_PAD
];
208 #ifdef FIPE_KSTAT_SUPPORT
209 #pragma align CPU_CACHE_COHERENCE_SIZE(fipe_kstat)
210 static struct fipe_kstat_s
{
211 kstat_named_t fipe_enabled
;
212 kstat_named_t fipe_policy
;
213 kstat_named_t fipe_pm_time
;
214 #ifdef FIPE_KSTAT_DETAIL
215 kstat_named_t ioat_ready
;
216 kstat_named_t pm_tryenter_cnt
;
217 kstat_named_t pm_success_cnt
;
218 kstat_named_t pm_race_cnt
;
219 kstat_named_t cpu_loop_cnt
;
220 kstat_named_t cpu_busy_cnt
;
221 kstat_named_t cpu_idle_cnt
;
222 kstat_named_t cpu_intr_busy_cnt
;
223 kstat_named_t cpu_intr_throttle_cnt
;
224 kstat_named_t bio_busy_cnt
;
225 kstat_named_t ioat_start_fail_cnt
;
226 kstat_named_t ioat_stop_fail_cnt
;
227 #endif /* FIPE_KSTAT_DETAIL */
229 { "fipe_enabled", KSTAT_DATA_INT32
},
230 { "fipe_policy", KSTAT_DATA_INT32
},
231 { "fipe_pm_time", KSTAT_DATA_UINT64
},
232 #ifdef FIPE_KSTAT_DETAIL
233 { "ioat_ready", KSTAT_DATA_INT32
},
234 { "pm_tryenter_cnt", KSTAT_DATA_UINT64
},
235 { "pm_success_cnt", KSTAT_DATA_UINT64
},
236 { "pm_race_cnt", KSTAT_DATA_UINT64
},
237 { "cpu_loop_cnt", KSTAT_DATA_UINT64
},
238 { "cpu_busy_cnt", KSTAT_DATA_UINT64
},
239 { "cpu_idle_cnt", KSTAT_DATA_UINT64
},
240 { "cpu_intr_busy_cnt", KSTAT_DATA_UINT64
},
241 { "cpu_intr_thrt_cnt", KSTAT_DATA_UINT64
},
242 { "bio_busy_cnt", KSTAT_DATA_UINT64
},
243 { "ioat_start_fail_cnt", KSTAT_DATA_UINT64
},
244 { "ioat_stop_fail_cnt", KSTAT_DATA_UINT64
}
245 #endif /* FIPE_KSTAT_DETAIL */
248 #define FIPE_KSTAT_INC(v) \
249 atomic_inc_64(&fipe_kstat.v.value.ui64)
250 #ifdef FIPE_KSTAT_DETAIL
251 #define FIPE_KSTAT_DETAIL_INC(v) \
252 atomic_inc_64(&fipe_kstat.v.value.ui64)
253 #else /* FIPE_KSTAT_DETAIL */
254 #define FIPE_KSTAT_DETAIL_INC(v)
255 #endif /* FIPE_KSTAT_DETAIL */
257 #else /* FIPE_KSTAT_SUPPORT */
259 #define FIPE_KSTAT_INC(v)
260 #define FIPE_KSTAT_DETAIL_INC(v)
262 #endif /* FIPE_KSTAT_SUPPORT */
264 /* Save current power management profile during suspend/resume. */
265 static fipe_pm_policy_t fipe_pm_policy_saved
= FIPE_PM_POLICY_BALANCE
;
266 static fipe_cpu_state_t
*fipe_cpu_states
= NULL
;
269 * There is no lock to protect fipe_profile_curr, so fipe_profile_curr
270 * could change on threads in fipe_idle_enter. This is not an issue,
271 * as it always points to a valid profile, and though it might make
272 * an incorrect choice for the new profile, it will still be a valid
273 * selection, and would do the correct operation for the new profile on
274 * next cpu_idle_enter cycle. Since the selections would always be
275 * valid for some profile, the overhead for the lock is not wasted.
277 static struct fipe_profile
*fipe_profile_curr
= NULL
;
279 static void fipe_idle_enter(void *arg
, cpu_idle_callback_context_t ctx
,
280 cpu_idle_check_wakeup_t check_func
, void* check_arg
);
281 static void fipe_idle_exit(void* arg
, cpu_idle_callback_context_t ctx
,
283 static cpu_idle_callback_t fipe_idle_cb
= {
284 CPU_IDLE_CALLBACK_VER0
,
290 * Configure memory controller into power saving mode:
291 * 1) OLTT activation limit is set to unlimited
292 * 2) MC works in S-CLTT mode
295 fipe_mc_change(int throttle
)
297 /* Enable OLTT/disable S-CLTT mode */
298 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_THRTCTRL
,
299 fipe_mc_ctrl
.mc_thrtctrl
& ~FIPE_MC_THRTCTRL_HUNT
);
300 /* Set OLTT activation limit to unlimited */
301 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_GBLACT
, 0);
303 * Set S-CLTT low throttling to desired value. The lower value,
304 * the more power saving and the less available memory bandwidth.
306 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_THRTLOW
, throttle
);
307 /* Enable S-CLTT/disable OLTT mode */
308 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_THRTCTRL
,
309 fipe_mc_ctrl
.mc_thrtctrl
| FIPE_MC_THRTCTRL_HUNT
);
315 * Restore memory controller's original configuration.
318 fipe_mc_restore(void)
320 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_THRTCTRL
,
321 fipe_mc_ctrl
.mc_thrtctrl
& ~FIPE_MC_THRTCTRL_HUNT
);
322 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_GBLACT
,
323 fipe_mc_ctrl
.mc_gblact
);
324 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_THRTLOW
,
325 fipe_mc_ctrl
.mc_thrtlow
);
326 pci_config_put8(fipe_mc_ctrl
.mc_pci_hdl
, FIPE_MC_THRTCTRL
,
327 fipe_mc_ctrl
.mc_thrtctrl
);
331 * Initialize memory controller's data structure and status.
334 fipe_mc_init(dev_info_t
*dip
)
336 ddi_acc_handle_t handle
;
338 bzero(&fipe_mc_ctrl
, sizeof (fipe_mc_ctrl
));
340 /* Hold one reference count and will be released in fipe_mc_fini. */
343 /* Setup pci configuration handler. */
344 if (pci_config_setup(dip
, &handle
) != DDI_SUCCESS
) {
346 "!fipe: failed to setup pcicfg handler in mc_init.");
351 /* Save original configuration. */
352 fipe_mc_ctrl
.mc_thrtctrl
= pci_config_get8(handle
, FIPE_MC_THRTCTRL
);
353 fipe_mc_ctrl
.mc_thrtlow
= pci_config_get8(handle
, FIPE_MC_THRTLOW
);
354 fipe_mc_ctrl
.mc_gblact
= pci_config_get8(handle
, FIPE_MC_GBLACT
);
355 fipe_mc_ctrl
.mc_dip
= dip
;
356 fipe_mc_ctrl
.mc_pci_hdl
= handle
;
357 fipe_mc_ctrl
.mc_initialized
= B_TRUE
;
363 * Restore memory controller's configuration and release resources.
368 if (fipe_mc_ctrl
.mc_initialized
) {
370 pci_config_teardown(&fipe_mc_ctrl
.mc_pci_hdl
);
371 ndi_rele_devi(fipe_mc_ctrl
.mc_dip
);
372 fipe_mc_ctrl
.mc_initialized
= B_FALSE
;
374 bzero(&fipe_mc_ctrl
, sizeof (fipe_mc_ctrl
));
377 /* Search device with specific pci ids. */
378 struct fipe_pci_ioat_id
{
386 static struct fipe_pci_ioat_id fipe_pci_ioat_ids
[] = {
387 { 0x8086, 0x1a38, 0xffff, 0xffff, NULL
},
388 { 0x8086, 0x360b, 0xffff, 0xffff, NULL
},
393 fipe_search_ioat_dev(dev_info_t
*dip
, void *arg
)
396 struct fipe_pci_ioat_id
*id
;
397 int i
, max
, venid
, devid
, subvenid
, subsysid
;
399 /* Query PCI id properties. */
400 venid
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
401 "vendor-id", 0xffffffff);
402 if (venid
== 0xffffffff) {
403 return (DDI_WALK_CONTINUE
);
405 devid
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
406 "device-id", 0xffffffff);
407 if (devid
== 0xffffffff) {
408 return (DDI_WALK_CONTINUE
);
410 subvenid
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
411 "subsystem-vendor-id", 0xffffffff);
412 if (subvenid
== 0xffffffff) {
413 return (DDI_WALK_CONTINUE
);
415 subsysid
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
416 "subsystem-id", 0xffffffff);
417 if (subvenid
== 0xffffffff) {
418 return (DDI_WALK_CONTINUE
);
420 if (ddi_prop_lookup_string(DDI_DEV_T_ANY
, dip
, DDI_PROP_DONTPASS
,
421 "unit-address", &unit
) != DDI_PROP_SUCCESS
) {
422 return (DDI_WALK_CONTINUE
);
425 max
= sizeof (fipe_pci_ioat_ids
) / sizeof (fipe_pci_ioat_ids
[0]);
426 for (i
= 0; i
< max
; i
++) {
427 id
= &fipe_pci_ioat_ids
[i
];
428 if ((id
->venid
== 0xffffu
|| id
->venid
== venid
) &&
429 (id
->devid
== 0xffffu
|| id
->devid
== devid
) &&
430 (id
->subvenid
== 0xffffu
|| id
->subvenid
== subvenid
) &&
431 (id
->subsysid
== 0xffffu
|| id
->subsysid
== subsysid
) &&
432 (id
->unitaddr
== NULL
|| strcmp(id
->unitaddr
, unit
) == 0)) {
438 return (DDI_WALK_CONTINUE
);
441 /* Found IOAT device, hold one reference count. */
443 fipe_ioat_ctrl
.ioat_dev_info
= dip
;
445 return (DDI_WALK_TERMINATE
);
449 * To enable FBDIMM idle power enhancement mechanism, IOAT will be used to
450 * generate enough memory traffic to trigger memory controller thermal throttle
452 * If dcopy/ioat is available, we will use dcopy interface to communicate
453 * with IOAT. Otherwise the built-in driver will directly talk to IOAT
456 #ifdef FIPE_IOAT_BUILTIN
458 fipe_ioat_trigger(void)
462 uint8_t *addr
= fipe_ioat_ctrl
.ioat_reg_addr
;
463 ddi_acc_handle_t handle
= fipe_ioat_ctrl
.ioat_reg_handle
;
465 /* Check channel in use flag. */
466 ctrl
= ddi_get16(handle
, (uint16_t *)(addr
+ FIPE_IOAT_CHAN_CTRL
));
469 * Channel is in use by somebody else. IOAT driver may have
470 * been loaded, forbid fipe from accessing IOAT hardware
473 fipe_ioat_ctrl
.ioat_ready
= B_FALSE
;
474 fipe_ioat_ctrl
.ioat_failed
= B_TRUE
;
475 FIPE_KSTAT_INC(ioat_start_fail_cnt
);
478 /* Set channel in use flag. */
480 (uint16_t *)(addr
+ FIPE_IOAT_CHAN_CTRL
), 0x100);
483 /* Write command address. */
485 (uint32_t *)(addr
+ FIPE_IOAT_CHAN_ADDR_LO
),
486 (uint32_t)fipe_ioat_ctrl
.ioat_cmd_physaddr
);
487 ddi_put32(handle
, (uint32_t *)(addr
+ FIPE_IOAT_CHAN_ADDR_HI
),
488 (uint32_t)(fipe_ioat_ctrl
.ioat_cmd_physaddr
>> 32));
490 /* Check and clear error flags. */
491 err
= ddi_get32(handle
, (uint32_t *)(addr
+ FIPE_IOAT_CHAN_ERR
));
493 ddi_put32(handle
, (uint32_t *)(addr
+ FIPE_IOAT_CHAN_ERR
), err
);
497 ddi_put8(handle
, (uint8_t *)(addr
+ FIPE_IOAT_CHAN_CMD
), 0x1);
503 fipe_ioat_cancel(void)
506 uint8_t *addr
= fipe_ioat_ctrl
.ioat_reg_addr
;
507 ddi_acc_handle_t handle
= fipe_ioat_ctrl
.ioat_reg_handle
;
510 * Reset channel. Sometimes reset is not reliable,
511 * so check completion or abort status after reset.
513 /* LINTED: constant in conditional context */
515 /* Issue reset channel command. */
516 ddi_put8(handle
, (uint8_t *)(addr
+ FIPE_IOAT_CHAN_CMD
), 0x20);
518 /* Query command status. */
519 status
= ddi_get32(handle
,
520 (uint32_t *)(addr
+ FIPE_IOAT_CHAN_STS_LO
));
522 /* Reset channel completed. */
529 /* Put channel into "not in use" state. */
530 ddi_put16(handle
, (uint16_t *)(addr
+ FIPE_IOAT_CHAN_CTRL
), 0);
535 fipe_ioat_alloc(void *arg
)
539 ddi_device_acc_attr_t attr
;
540 boolean_t fatal
= B_FALSE
;
542 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
544 * fipe_ioat_alloc() is called in DEVICE ATTACH context when loaded.
545 * In DEVICE ATTACH context, it can't call ddi_walk_devs(), so just
546 * schedule a timer and exit.
548 if (fipe_ioat_ctrl
.ioat_try_alloc
== B_FALSE
) {
549 fipe_ioat_ctrl
.ioat_try_alloc
= B_TRUE
;
553 /* Check whether has been initialized or encountered permanent error. */
554 if (fipe_ioat_ctrl
.ioat_ready
|| fipe_ioat_ctrl
.ioat_failed
||
555 fipe_ioat_ctrl
.ioat_cancel
) {
556 fipe_ioat_ctrl
.ioat_timerid
= 0;
557 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
561 if (fipe_ioat_ctrl
.ioat_dev_info
== NULL
) {
562 /* Find dev_info_t for IOAT engine. */
563 ddi_walk_devs(ddi_root_node(), fipe_search_ioat_dev
, NULL
);
564 if (fipe_ioat_ctrl
.ioat_dev_info
== NULL
) {
566 "!fipe: no IOAT hardware found, disable pm.");
572 /* Map in IOAT control register window. */
573 ASSERT(fipe_ioat_ctrl
.ioat_dev_info
!= NULL
);
574 ASSERT(fipe_ioat_ctrl
.ioat_reg_mapped
== B_FALSE
);
575 dip
= fipe_ioat_ctrl
.ioat_dev_info
;
576 if (ddi_dev_nregs(dip
, &nregs
) != DDI_SUCCESS
|| nregs
< 2) {
577 cmn_err(CE_WARN
, "!fipe: ioat has not enough register bars.");
581 attr
.devacc_attr_version
= DDI_DEVICE_ATTR_V0
;
582 attr
.devacc_attr_endian_flags
= DDI_NEVERSWAP_ACC
;
583 attr
.devacc_attr_dataorder
= DDI_STRICTORDER_ACC
;
584 rc
= ddi_regs_map_setup(dip
, 1,
585 (caddr_t
*)&fipe_ioat_ctrl
.ioat_reg_addr
,
586 0, 0, &attr
, &fipe_ioat_ctrl
.ioat_reg_handle
);
587 if (rc
!= DDI_SUCCESS
) {
588 cmn_err(CE_WARN
, "!fipe: failed to map IOAT registeres.");
593 /* Mark IOAT status. */
594 fipe_ioat_ctrl
.ioat_reg_mapped
= B_TRUE
;
595 fipe_ioat_ctrl
.ioat_ready
= B_TRUE
;
596 fipe_ioat_ctrl
.ioat_failed
= B_FALSE
;
597 fipe_ioat_ctrl
.ioat_timerid
= 0;
598 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
603 fipe_ioat_ctrl
.ioat_timerid
= 0;
604 if (!fipe_ioat_ctrl
.ioat_ready
&& !fipe_ioat_ctrl
.ioat_cancel
) {
606 /* Mark permanent error and give up. */
607 fipe_ioat_ctrl
.ioat_failed
= B_TRUE
;
608 /* Release reference count hold by ddi_find_devinfo. */
609 if (fipe_ioat_ctrl
.ioat_dev_info
!= NULL
) {
610 ndi_rele_devi(fipe_ioat_ctrl
.ioat_dev_info
);
611 fipe_ioat_ctrl
.ioat_dev_info
= NULL
;
615 * Schedule another timer to keep on trying.
616 * timeout() should always succeed, no need to check
619 fipe_ioat_ctrl
.ioat_timerid
= timeout(fipe_ioat_alloc
,
620 NULL
, drv_usectohz(FIPE_IOAT_RETRY_INTERVAL
));
623 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
629 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
630 /* Cancel timeout to avoid race condition. */
631 if (fipe_ioat_ctrl
.ioat_timerid
!= 0) {
632 fipe_ioat_ctrl
.ioat_cancel
= B_TRUE
;
633 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
634 (void) untimeout(fipe_ioat_ctrl
.ioat_timerid
);
635 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
636 fipe_ioat_ctrl
.ioat_timerid
= 0;
637 fipe_ioat_ctrl
.ioat_cancel
= B_FALSE
;
640 if (fipe_ioat_ctrl
.ioat_reg_mapped
) {
641 ddi_regs_map_free(&fipe_ioat_ctrl
.ioat_reg_handle
);
642 fipe_ioat_ctrl
.ioat_reg_mapped
= B_FALSE
;
645 fipe_ioat_ctrl
.ioat_ready
= B_FALSE
;
646 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
649 #else /* FIPE_IOAT_BUILTIN */
652 * Trigger IOAT memory copy operation when entering power saving state.
653 * A group of commands will be posted to IOAT driver and those commands
654 * will be placed into an IOAT ring buffer.
657 fipe_ioat_trigger(void)
660 dcopy_cmd_t
*cmds
= fipe_ioat_ctrl
.ioat_cmds
;
662 for (idx
= FIPE_IOAT_CMD_NUM
; idx
> 0; idx
--) {
663 if (dcopy_cmd_post(cmds
[idx
]) == DCOPY_SUCCESS
) {
667 * Don't rollback on failure, it doesn't hurt much more
668 * than some small memory copy operations.
670 FIPE_KSTAT_DETAIL_INC(ioat_start_fail_cnt
);
679 * Cancel the memory copy operations posted by fipe_ioat_trigger.
680 * It's achieved by posting a new command which will break the ring
681 * created by fipe_ioat_trigger. If it fails, the best way to recover
682 * is to just let it go. IOAT will recover when posting next command
683 * on the same channel.
686 fipe_ioat_cancel(void)
688 if (dcopy_cmd_post(fipe_ioat_ctrl
.ioat_cmds
[0]) != DCOPY_SUCCESS
) {
689 FIPE_KSTAT_DETAIL_INC(ioat_stop_fail_cnt
);
694 * This function will be called from allocate IOAT resources.
695 * Allocation may fail due to following reasons:
696 * 1) IOAT driver hasn't been loaded yet. Keep on trying in this case.
697 * 2) IOAT resources are temporarily unavailable. Keep on trying in this case.
698 * 3) Other no recoverable reasons. Disable power management function.
702 fipe_ioat_alloc(void *arg
)
704 int idx
, flags
, rc
= 0;
706 boolean_t fatal
= B_FALSE
;
708 dcopy_handle_t handle
;
709 dcopy_cmd_t cmds
[FIPE_IOAT_CMD_NUM
+ 1];
711 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
713 * fipe_ioat_alloc() is called in DEVICE ATTACH context when loaded.
714 * In DEVICE ATTACH context, it can't call ddi_walk_devs(), so just
715 * schedule a timer and exit.
717 if (fipe_ioat_ctrl
.ioat_try_alloc
== B_FALSE
) {
718 fipe_ioat_ctrl
.ioat_try_alloc
= B_TRUE
;
719 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
724 * Check whether device has been initialized or if it encountered
725 * some permanent error.
727 if (fipe_ioat_ctrl
.ioat_ready
|| fipe_ioat_ctrl
.ioat_failed
||
728 fipe_ioat_ctrl
.ioat_cancel
) {
729 fipe_ioat_ctrl
.ioat_timerid
= 0;
730 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
734 if (fipe_ioat_ctrl
.ioat_dev_info
== NULL
) {
735 /* Find dev_info_t for IOAT engine. */
736 ddi_walk_devs(ddi_root_node(), fipe_search_ioat_dev
, NULL
);
737 if (fipe_ioat_ctrl
.ioat_dev_info
== NULL
) {
739 "!fipe: no IOAT hardware found, disable pm.");
740 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
745 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
747 /* Check, allocate and initialize IOAT resources with lock released. */
749 if (info
.dq_version
< DCOPY_QUERY_V0
) {
750 /* Permanent error, give up. */
751 cmn_err(CE_WARN
, "!fipe: IOAT driver version mismatch.");
754 } else if (info
.dq_num_channels
== 0) {
755 /* IOAT driver hasn't been loaded, keep trying. */
759 /* Allocate IOAT channel. */
760 rc
= dcopy_alloc(DCOPY_NOSLEEP
, &handle
);
761 if (rc
== DCOPY_NORESOURCES
) {
762 /* Resource temporarily not available, keep trying. */
764 } else if (rc
!= DCOPY_SUCCESS
) {
765 /* Permanent error, give up. */
766 cmn_err(CE_WARN
, "!fipe: failed to allocate IOAT channel.");
772 * Allocate multiple IOAT commands and organize them into a ring to
773 * loop forever. Commands number is determined by IOAT descriptor size
774 * and memory interleave pattern.
775 * cmd[0] is used break the loop and disable IOAT operation.
776 * cmd[1, FIPE_IOAT_CMD_NUM] are grouped into a ring and cmd[1] is the
779 bzero(cmds
, sizeof (cmds
));
780 physaddr
= fipe_ioat_ctrl
.ioat_buf_physaddr
;
781 for (idx
= FIPE_IOAT_CMD_NUM
; idx
>= 0; idx
--) {
782 /* Allocate IOAT commands. */
783 if (idx
== 0 || idx
== FIPE_IOAT_CMD_NUM
) {
784 flags
= DCOPY_NOSLEEP
;
787 * To link commands into a list, the initial value of
788 * cmd need to be set to next cmd on list.
790 flags
= DCOPY_NOSLEEP
| DCOPY_ALLOC_LINK
;
791 cmds
[idx
] = cmds
[idx
+ 1];
793 rc
= dcopy_cmd_alloc(handle
, flags
, &cmds
[idx
]);
794 if (rc
== DCOPY_NORESOURCES
) {
796 } else if (rc
!= DCOPY_SUCCESS
) {
797 /* Permanent error, give up. */
799 "!fipe: failed to allocate IOAT command.");
804 /* Disable src/dst snoop to improve CPU cache efficiency. */
805 cmds
[idx
]->dp_flags
= DCOPY_CMD_NOSRCSNP
| DCOPY_CMD_NODSTSNP
;
806 /* Specially handle commands on the list. */
808 /* Disable IOAT status. */
809 cmds
[idx
]->dp_flags
|= DCOPY_CMD_NOSTAT
;
810 /* Disable waiting for resources. */
811 cmds
[idx
]->dp_flags
|= DCOPY_CMD_NOWAIT
;
813 /* The list head, chain command into loop. */
814 cmds
[idx
]->dp_flags
|= DCOPY_CMD_LOOP
;
816 /* Queue all other commands except head. */
817 cmds
[idx
]->dp_flags
|= DCOPY_CMD_QUEUE
;
820 cmds
[idx
]->dp_cmd
= DCOPY_CMD_COPY
;
821 cmds
[idx
]->dp
.copy
.cc_source
= physaddr
;
822 cmds
[idx
]->dp
.copy
.cc_dest
= physaddr
+ FIPE_MC_MEMORY_OFFSET
;
825 * Command 0 is used to cancel memory copy by breaking
826 * the ring created in fipe_ioat_trigger().
827 * For efficiency, use the smallest memory copy size.
829 cmds
[idx
]->dp
.copy
.cc_size
= 1;
831 cmds
[idx
]->dp
.copy
.cc_size
= FIPE_MC_MEMORY_SIZE
;
835 /* Update IOAT control status if it hasn't been initialized yet. */
836 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
837 if (!fipe_ioat_ctrl
.ioat_ready
&& !fipe_ioat_ctrl
.ioat_cancel
) {
838 fipe_ioat_ctrl
.ioat_handle
= handle
;
839 for (idx
= 0; idx
<= FIPE_IOAT_CMD_NUM
; idx
++) {
840 fipe_ioat_ctrl
.ioat_cmds
[idx
] = cmds
[idx
];
842 fipe_ioat_ctrl
.ioat_ready
= B_TRUE
;
843 fipe_ioat_ctrl
.ioat_failed
= B_FALSE
;
844 fipe_ioat_ctrl
.ioat_timerid
= 0;
845 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
848 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
849 /* Initialized by another thread, fall through to free resources. */
852 if (cmds
[0] != NULL
) {
853 dcopy_cmd_free(&cmds
[0]);
855 /* Only need to free head, dcopy will free all commands on the list. */
856 for (idx
= 1; idx
<= FIPE_IOAT_CMD_NUM
; idx
++) {
857 if (cmds
[idx
] != NULL
) {
858 dcopy_cmd_free(&cmds
[idx
]);
865 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
866 fipe_ioat_ctrl
.ioat_timerid
= 0;
867 if (!fipe_ioat_ctrl
.ioat_ready
&& !fipe_ioat_ctrl
.ioat_cancel
) {
869 /* Mark permanent error and give up. */
870 fipe_ioat_ctrl
.ioat_failed
= B_TRUE
;
871 /* Release reference count hold by ddi_find_devinfo. */
872 if (fipe_ioat_ctrl
.ioat_dev_info
!= NULL
) {
873 ndi_rele_devi(fipe_ioat_ctrl
.ioat_dev_info
);
874 fipe_ioat_ctrl
.ioat_dev_info
= NULL
;
878 * Schedule another timer to keep on trying.
879 * timeout() should always success, no need to check.
881 fipe_ioat_ctrl
.ioat_timerid
= timeout(fipe_ioat_alloc
,
882 NULL
, drv_usectohz(FIPE_IOAT_RETRY_INTERVAL
));
885 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
889 * Free resources allocated in fipe_ioat_alloc.
895 dcopy_cmd_t
*cmds
= fipe_ioat_ctrl
.ioat_cmds
;
897 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
899 /* Cancel timeout to avoid race condition. */
900 if (fipe_ioat_ctrl
.ioat_timerid
!= 0) {
901 fipe_ioat_ctrl
.ioat_cancel
= B_TRUE
;
902 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
903 (void) untimeout(fipe_ioat_ctrl
.ioat_timerid
);
904 mutex_enter(&fipe_ioat_ctrl
.ioat_lock
);
905 fipe_ioat_ctrl
.ioat_timerid
= 0;
906 fipe_ioat_ctrl
.ioat_cancel
= B_FALSE
;
909 /* Free ioat resources. */
910 if (fipe_ioat_ctrl
.ioat_ready
) {
911 if (cmds
[0] != NULL
) {
912 dcopy_cmd_free(&cmds
[0]);
914 for (idx
= 1; idx
<= FIPE_IOAT_CMD_NUM
; idx
++) {
915 if (cmds
[idx
] != NULL
) {
916 dcopy_cmd_free(&cmds
[idx
]);
920 bzero(fipe_ioat_ctrl
.ioat_cmds
,
921 sizeof (fipe_ioat_ctrl
.ioat_cmds
));
922 dcopy_free(&fipe_ioat_ctrl
.ioat_handle
);
923 fipe_ioat_ctrl
.ioat_handle
= NULL
;
924 fipe_ioat_ctrl
.ioat_ready
= B_FALSE
;
927 /* Release reference count hold by ddi_find_devinfo. */
928 if (fipe_ioat_ctrl
.ioat_dev_info
!= NULL
) {
929 ndi_rele_devi(fipe_ioat_ctrl
.ioat_dev_info
);
930 fipe_ioat_ctrl
.ioat_dev_info
= NULL
;
933 mutex_exit(&fipe_ioat_ctrl
.ioat_lock
);
935 #endif /* FIPE_IOAT_BUILTIN */
938 * Initialize IOAT relative resources.
946 bzero(&fipe_ioat_ctrl
, sizeof (fipe_ioat_ctrl
));
947 mutex_init(&fipe_ioat_ctrl
.ioat_lock
, NULL
, MUTEX_DRIVER
, NULL
);
950 * Allocate memory for IOAT memory copy operation.
951 * The allocated memory should be page aligned to achieve better power
953 * Don't use ddi_dma_mem_alloc here to keep thing simple. This also
954 * makes quiesce easier.
957 buf
= kmem_zalloc(size
, KM_SLEEP
);
958 if ((intptr_t)buf
& PAGEOFFSET
) {
959 kmem_free(buf
, PAGESIZE
);
961 buf
= kmem_zalloc(size
, KM_SLEEP
);
963 fipe_ioat_ctrl
.ioat_buf_size
= size
;
964 fipe_ioat_ctrl
.ioat_buf_start
= buf
;
965 buf
= (char *)P2ROUNDUP((intptr_t)buf
, PAGESIZE
);
966 fipe_ioat_ctrl
.ioat_buf_virtaddr
= buf
;
967 fipe_ioat_ctrl
.ioat_buf_physaddr
= hat_getpfnum(kas
.a_hat
, buf
);
968 fipe_ioat_ctrl
.ioat_buf_physaddr
<<= PAGESHIFT
;
970 #ifdef FIPE_IOAT_BUILTIN
973 /* IOAT descriptor data structure copied from ioat.h. */
974 struct fipe_ioat_cmd_desc
{
977 uint64_t dd_src_paddr
;
978 uint64_t dd_dest_paddr
;
979 uint64_t dd_next_desc
;
987 * Build two IOAT command descriptors and chain them into ring.
988 * Control flags as below:
989 * 0x2: disable source snoop
990 * 0x4: disable destination snoop
991 * 0x0 << 24: memory copy operation
992 * The layout for command descriptors and memory buffers are
993 * organized for power saving effect, please don't change it.
995 buf
= fipe_ioat_ctrl
.ioat_buf_virtaddr
;
996 bufpa
= fipe_ioat_ctrl
.ioat_buf_physaddr
;
997 fipe_ioat_ctrl
.ioat_cmd_physaddr
= bufpa
;
999 /* First command descriptor. */
1000 desc
= (struct fipe_ioat_cmd_desc
*)(buf
);
1001 desc
->dd_size
= 128;
1002 desc
->dd_ctrl
= 0x6;
1003 desc
->dd_src_paddr
= bufpa
+ 2048;
1004 desc
->dd_dest_paddr
= bufpa
+ 3072;
1005 /* Point to second descriptor. */
1006 desc
->dd_next_desc
= bufpa
+ 64;
1008 /* Second command descriptor. */
1009 desc
= (struct fipe_ioat_cmd_desc
*)(buf
+ 64);
1010 desc
->dd_size
= 128;
1011 desc
->dd_ctrl
= 0x6;
1012 desc
->dd_src_paddr
= bufpa
+ 2048;
1013 desc
->dd_dest_paddr
= bufpa
+ 3072;
1014 /* Point to first descriptor. */
1015 desc
->dd_next_desc
= bufpa
;
1017 #endif /* FIPE_IOAT_BUILTIN */
1023 fipe_ioat_fini(void)
1025 /* Release reference count hold by ddi_find_devinfo. */
1026 if (fipe_ioat_ctrl
.ioat_dev_info
!= NULL
) {
1027 ndi_rele_devi(fipe_ioat_ctrl
.ioat_dev_info
);
1028 fipe_ioat_ctrl
.ioat_dev_info
= NULL
;
1031 if (fipe_ioat_ctrl
.ioat_buf_start
!= NULL
) {
1032 ASSERT(fipe_ioat_ctrl
.ioat_buf_size
!= 0);
1033 kmem_free(fipe_ioat_ctrl
.ioat_buf_start
,
1034 fipe_ioat_ctrl
.ioat_buf_size
);
1037 mutex_destroy(&fipe_ioat_ctrl
.ioat_lock
);
1038 bzero(&fipe_ioat_ctrl
, sizeof (fipe_ioat_ctrl
));
1042 fipe_idle_start(void)
1046 if (fipe_idle_ctrl
.idle_ready
) {
1050 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_ENTER_TIMESTAMP
,
1051 &fipe_idle_ctrl
.prop_enter
) != 0) {
1052 cmn_err(CE_WARN
, "!fipe: failed to get enter_ts property.");
1055 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_EXIT_TIMESTAMP
,
1056 &fipe_idle_ctrl
.prop_exit
) != 0) {
1057 cmn_err(CE_WARN
, "!fipe: failed to get exit_ts property.");
1058 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_enter
);
1061 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_TOTAL_IDLE_TIME
,
1062 &fipe_idle_ctrl
.prop_idle
) != 0) {
1063 cmn_err(CE_WARN
, "!fipe: failed to get idle_time property.");
1064 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_exit
);
1065 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_enter
);
1068 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_TOTAL_BUSY_TIME
,
1069 &fipe_idle_ctrl
.prop_busy
) != 0) {
1070 cmn_err(CE_WARN
, "!fipe: failed to get busy_time property.");
1071 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_idle
);
1072 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_exit
);
1073 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_enter
);
1076 if (cpu_idle_prop_create_handle(CPU_IDLE_PROP_INTERRUPT_COUNT
,
1077 &fipe_idle_ctrl
.prop_intr
) != 0) {
1078 cmn_err(CE_WARN
, "!fipe: failed to get intr_count property.");
1079 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_busy
);
1080 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_idle
);
1081 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_exit
);
1082 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_enter
);
1086 /* Register idle state notification callback. */
1087 rc
= cpu_idle_register_callback(CPU_IDLE_CB_PRIO_FIPE
, &fipe_idle_cb
,
1088 NULL
, &fipe_idle_ctrl
.cb_handle
);
1090 cmn_err(CE_WARN
, "!fipe: failed to register cpuidle callback.");
1091 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_intr
);
1092 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_busy
);
1093 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_idle
);
1094 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_exit
);
1095 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_enter
);
1099 fipe_idle_ctrl
.idle_ready
= B_TRUE
;
1105 fipe_idle_stop(void)
1109 if (fipe_idle_ctrl
.idle_ready
== B_FALSE
) {
1113 rc
= cpu_idle_unregister_callback(fipe_idle_ctrl
.cb_handle
);
1116 "!fipe: failed to unregister cpuidle callback.");
1120 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_intr
);
1121 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_busy
);
1122 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_idle
);
1123 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_exit
);
1124 (void) cpu_idle_prop_destroy_handle(fipe_idle_ctrl
.prop_enter
);
1126 fipe_idle_ctrl
.idle_ready
= B_FALSE
;
1131 #ifdef FIPE_KSTAT_SUPPORT
1133 fipe_kstat_update(kstat_t
*ksp
, int rw
)
1135 struct fipe_kstat_s
*sp
;
1138 if (rw
== KSTAT_WRITE
) {
1143 sp
->fipe_enabled
.value
.i32
= fipe_gbl_ctrl
.pm_enabled
? 1 : 0;
1144 sp
->fipe_policy
.value
.i32
= fipe_pm_policy
;
1146 hrt
= fipe_gbl_ctrl
.time_in_pm
;
1148 sp
->fipe_pm_time
.value
.ui64
= (uint64_t)hrt
;
1150 #ifdef FIPE_KSTAT_DETAIL
1151 sp
->ioat_ready
.value
.i32
= fipe_ioat_ctrl
.ioat_ready
? 1 : 0;
1152 #endif /* FIPE_KSTAT_DETAIL */
1156 #endif /* FIPE_KSTAT_SUPPORT */
1159 * Initialize memory power management subsystem.
1160 * Note: This function should only be called from ATTACH.
1161 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1164 fipe_init(dev_info_t
*dip
)
1169 /* Initialize global control structure. */
1170 bzero(&fipe_gbl_ctrl
, sizeof (fipe_gbl_ctrl
));
1171 mutex_init(&fipe_gbl_ctrl
.lock
, NULL
, MUTEX_DRIVER
, NULL
);
1173 /* Query power management policy from device property. */
1174 fipe_pm_policy
= ddi_prop_get_int(DDI_DEV_T_ANY
, dip
, 0,
1175 FIPE_PROP_PM_POLICY
, fipe_pm_policy
);
1176 if (fipe_pm_policy
< 0 || fipe_pm_policy
>= FIPE_PM_POLICY_MAX
) {
1178 "?fipe: invalid power management policy %d.\n",
1180 fipe_pm_policy
= FIPE_PM_POLICY_BALANCE
;
1182 fipe_profile_curr
= &fipe_profiles
[fipe_pm_policy
];
1185 * Compute unscaled hrtime value corresponding to FIPE_STAT_INTERVAL.
1186 * (1 << 36) should be big enough here.
1190 fipe_idle_ctrl
.tick_interval
= FIPE_STAT_INTERVAL
* (1ULL << 36) / hrt
;
1192 if (fipe_mc_init(dip
) != 0) {
1193 cmn_err(CE_WARN
, "!fipe: failed to initialize mc state.");
1196 if (fipe_ioat_init() != 0) {
1197 cmn_err(CE_NOTE
, "!fipe: failed to initialize ioat state.");
1198 goto out_ioat_error
;
1201 /* Allocate per-CPU structure. */
1202 nsize
= max_ncpus
* sizeof (fipe_cpu_state_t
);
1203 nsize
+= CPU_CACHE_COHERENCE_SIZE
;
1204 fipe_gbl_ctrl
.state_buf
= kmem_zalloc(nsize
, KM_SLEEP
);
1205 fipe_gbl_ctrl
.state_size
= nsize
;
1206 fipe_cpu_states
= (fipe_cpu_state_t
*)P2ROUNDUP(
1207 (intptr_t)fipe_gbl_ctrl
.state_buf
, CPU_CACHE_COHERENCE_SIZE
);
1209 #ifdef FIPE_KSTAT_SUPPORT
1210 fipe_gbl_ctrl
.fipe_kstat
= kstat_create("fipe", 0, "fipe-pm", "misc",
1211 KSTAT_TYPE_NAMED
, sizeof (fipe_kstat
) / sizeof (kstat_named_t
),
1212 KSTAT_FLAG_VIRTUAL
);
1213 if (fipe_gbl_ctrl
.fipe_kstat
== NULL
) {
1214 cmn_err(CE_CONT
, "?fipe: failed to create kstat object.\n");
1216 fipe_gbl_ctrl
.fipe_kstat
->ks_lock
= &fipe_gbl_ctrl
.lock
;
1217 fipe_gbl_ctrl
.fipe_kstat
->ks_data
= &fipe_kstat
;
1218 fipe_gbl_ctrl
.fipe_kstat
->ks_update
= fipe_kstat_update
;
1219 kstat_install(fipe_gbl_ctrl
.fipe_kstat
);
1221 #endif /* FIPE_KSTAT_SUPPORT */
1228 mutex_destroy(&fipe_gbl_ctrl
.lock
);
1229 bzero(&fipe_gbl_ctrl
, sizeof (fipe_gbl_ctrl
));
1235 * Destroy memory power management subsystem.
1236 * Note: This function should only be called from DETACH.
1237 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1242 if (fipe_gbl_ctrl
.pm_enabled
) {
1243 cmn_err(CE_NOTE
, "!fipe: call fipe_fini without stopping PM.");
1247 ASSERT(!fipe_gbl_ctrl
.pm_active
);
1251 #ifdef FIPE_KSTAT_SUPPORT
1252 if (fipe_gbl_ctrl
.fipe_kstat
!= NULL
) {
1253 kstat_delete(fipe_gbl_ctrl
.fipe_kstat
);
1254 fipe_gbl_ctrl
.fipe_kstat
= NULL
;
1256 #endif /* FIPE_KSTAT_SUPPORT */
1258 if (fipe_gbl_ctrl
.state_buf
!= NULL
) {
1259 ASSERT(fipe_gbl_ctrl
.state_size
!= 0);
1260 kmem_free(fipe_gbl_ctrl
.state_buf
, fipe_gbl_ctrl
.state_size
);
1261 fipe_cpu_states
= NULL
;
1264 fipe_profile_curr
= NULL
;
1265 mutex_destroy(&fipe_gbl_ctrl
.lock
);
1266 bzero(&fipe_gbl_ctrl
, sizeof (fipe_gbl_ctrl
));
1272 * Start memory power management subsystem.
1273 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1278 if (fipe_gbl_ctrl
.pm_enabled
== B_TRUE
) {
1282 bzero(fipe_cpu_states
, max_ncpus
* sizeof (fipe_cpu_states
[0]));
1283 fipe_ioat_alloc(NULL
);
1284 if (fipe_idle_start() != 0) {
1285 cmn_err(CE_NOTE
, "!fipe: failed to start PM subsystem.");
1290 fipe_gbl_ctrl
.pm_enabled
= B_TRUE
;
1296 * Stop memory power management subsystem.
1297 * Note: caller must ensure exclusive access to all fipe_xxx interfaces.
1302 if (fipe_gbl_ctrl
.pm_enabled
) {
1303 if (fipe_idle_stop() != 0) {
1305 "!fipe: failed to stop PM subsystem.");
1309 fipe_gbl_ctrl
.pm_enabled
= B_FALSE
;
1311 ASSERT(!fipe_gbl_ctrl
.pm_active
);
1319 /* Save current power management policy. */
1320 fipe_pm_policy_saved
= fipe_pm_policy
;
1321 /* Disable PM by setting profile to FIPE_PM_POLICY_DISABLE. */
1322 fipe_pm_policy
= FIPE_PM_POLICY_DISABLE
;
1323 fipe_profile_curr
= &fipe_profiles
[fipe_pm_policy
];
1331 /* Restore saved power management policy. */
1332 fipe_pm_policy
= fipe_pm_policy_saved
;
1333 fipe_profile_curr
= &fipe_profiles
[fipe_pm_policy
];
1339 fipe_get_pmpolicy(void)
1341 return (fipe_pm_policy
);
1345 fipe_set_pmpolicy(fipe_pm_policy_t policy
)
1347 if (policy
< 0 || policy
>= FIPE_PM_POLICY_MAX
) {
1350 fipe_pm_policy
= policy
;
1351 fipe_profile_curr
= &fipe_profiles
[fipe_pm_policy
];
1357 * Check condition (fipe_gbl_ctrl.cpu_cnt == ncpus) to make sure that
1358 * there is other CPU trying to wake up system from memory power saving state.
1359 * If a CPU is waking up system, fipe_disable() will set
1360 * fipe_gbl_ctrl.pm_active to false as soon as possible and allow other CPU's
1361 * to continue, and it will take the responsibility to recover system from
1362 * memory power saving state.
1365 fipe_enable(int throttle
, cpu_idle_check_wakeup_t check_func
, void* check_arg
)
1367 extern void membar_sync(void);
1369 FIPE_KSTAT_DETAIL_INC(pm_tryenter_cnt
);
1372 * Check CPU wakeup events.
1374 if (check_func
!= NULL
) {
1375 (*check_func
)(check_arg
);
1379 * Try to acquire mutex, which also implicitly has the same effect
1380 * of calling membar_sync().
1381 * If mutex_tryenter fails, that means other CPU is waking up.
1383 if (mutex_tryenter(&fipe_gbl_ctrl
.lock
) == 0) {
1384 FIPE_KSTAT_DETAIL_INC(pm_race_cnt
);
1386 * Handle a special race condition for the case that a CPU wakes
1387 * and then enters into idle state within a short period.
1388 * This case can't be reliably detected by cpu_count mechanism.
1390 } else if (fipe_gbl_ctrl
.pm_active
) {
1391 FIPE_KSTAT_DETAIL_INC(pm_race_cnt
);
1392 mutex_exit(&fipe_gbl_ctrl
.lock
);
1394 fipe_gbl_ctrl
.pm_active
= B_TRUE
;
1396 if (fipe_gbl_ctrl
.cpu_count
!= ncpus
) {
1397 FIPE_KSTAT_DETAIL_INC(pm_race_cnt
);
1398 fipe_gbl_ctrl
.pm_active
= B_FALSE
;
1399 } else if (fipe_ioat_trigger() != 0) {
1400 fipe_gbl_ctrl
.pm_active
= B_FALSE
;
1401 } else if (fipe_gbl_ctrl
.cpu_count
!= ncpus
||
1402 fipe_mc_change(throttle
) != 0) {
1403 fipe_gbl_ctrl
.pm_active
= B_FALSE
;
1405 if (fipe_gbl_ctrl
.cpu_count
!= ncpus
) {
1406 FIPE_KSTAT_DETAIL_INC(pm_race_cnt
);
1408 } else if (fipe_gbl_ctrl
.cpu_count
!= ncpus
) {
1409 fipe_gbl_ctrl
.pm_active
= B_FALSE
;
1412 FIPE_KSTAT_DETAIL_INC(pm_race_cnt
);
1414 FIPE_KSTAT_DETAIL_INC(pm_success_cnt
);
1416 mutex_exit(&fipe_gbl_ctrl
.lock
);
1424 * Try to acquire lock, which also implicitly has the same effect
1425 * of calling membar_sync().
1427 while (mutex_tryenter(&fipe_gbl_ctrl
.lock
) == 0) {
1429 * If power saving is inactive, just return and all dirty
1430 * house-keeping work will be handled in fipe_enable().
1432 if (fipe_gbl_ctrl
.pm_active
== B_FALSE
) {
1439 /* Disable power saving if it's active. */
1440 if (fipe_gbl_ctrl
.pm_active
) {
1442 * Set pm_active to FALSE as soon as possible to prevent
1443 * other CPUs from waiting on pm_active flag.
1445 fipe_gbl_ctrl
.pm_active
= B_FALSE
;
1451 mutex_exit(&fipe_gbl_ctrl
.lock
);
1456 fipe_check_cpu(struct fipe_cpu_state
*sp
, cpu_idle_callback_context_t ctx
,
1459 if (cpu_flagged_offline(CPU
->cpu_flags
)) {
1460 /* Treat CPU in offline state as ready. */
1461 sp
->cond_ready
= B_TRUE
;
1463 } else if (sp
->next_ts
<= ts
) {
1465 hrtime_t idle
, busy
, diff
;
1466 cpu_idle_prop_value_t val
;
1468 /* Set default value. */
1469 sp
->cond_ready
= B_TRUE
;
1472 /* Calculate idle percent. */
1473 idle
= sp
->last_idle
;
1474 sp
->last_idle
= cpu_idle_prop_get_hrtime(
1475 fipe_idle_ctrl
.prop_idle
, ctx
);
1476 idle
= sp
->last_idle
- idle
;
1477 busy
= sp
->last_busy
;
1478 sp
->last_busy
= cpu_idle_prop_get_hrtime(
1479 fipe_idle_ctrl
.prop_busy
, ctx
);
1480 busy
= sp
->last_busy
- busy
;
1481 /* Check idle condition. */
1482 if (idle
> 0 && busy
> 0) {
1483 if (busy
* (100 - FIPE_PROF_BUSY_THRESHOLD
) >
1484 idle
* FIPE_PROF_BUSY_THRESHOLD
) {
1485 FIPE_KSTAT_DETAIL_INC(cpu_busy_cnt
);
1486 sp
->cond_ready
= B_FALSE
;
1488 FIPE_KSTAT_DETAIL_INC(cpu_idle_cnt
);
1491 FIPE_KSTAT_DETAIL_INC(cpu_busy_cnt
);
1492 sp
->cond_ready
= B_FALSE
;
1495 /* Calculate interrupt count. */
1497 sp
->next_ts
= ts
+ fipe_idle_ctrl
.tick_interval
;
1498 diff
= sp
->next_ts
- diff
;
1499 intr
= sp
->last_intr
;
1500 if (cpu_idle_prop_get_value(fipe_idle_ctrl
.prop_intr
, ctx
,
1502 sp
->last_intr
= val
.cipv_uint64
;
1503 intr
= sp
->last_intr
- intr
;
1505 intr
= intr
* fipe_idle_ctrl
.tick_interval
;
1508 intr
= FIPE_PROF_INTR_THRESHOLD
;
1511 intr
= FIPE_PROF_INTR_THRESHOLD
;
1515 * System is busy with interrupts, so disable all PM
1516 * status checks for INTR_BUSY_THROTTLE ticks.
1517 * Interrupts are disabled when FIPE callbacks are called,
1518 * so this optimization will help to reduce interrupt
1521 if (intr
>= FIPE_PROF_INTR_BUSY_THRESHOLD
) {
1522 FIPE_KSTAT_DETAIL_INC(cpu_intr_busy_cnt
);
1523 sp
->throttle_ts
= ts
+ FIPE_PROF_INTR_BUSY_THROTTLE
*
1524 fipe_idle_ctrl
.tick_interval
;
1525 sp
->cond_ready
= B_FALSE
;
1526 } else if (intr
>= FIPE_PROF_INTR_THRESHOLD
) {
1527 FIPE_KSTAT_DETAIL_INC(cpu_intr_throttle_cnt
);
1528 sp
->cond_ready
= B_FALSE
;
1530 } else if (++sp
->idle_count
>= FIPE_PROF_IDLE_COUNT
) {
1531 /* Too many idle enter/exit in this tick. */
1532 FIPE_KSTAT_DETAIL_INC(cpu_loop_cnt
);
1533 sp
->throttle_ts
= sp
->next_ts
+ fipe_idle_ctrl
.tick_interval
;
1535 sp
->cond_ready
= B_FALSE
;
1539 return (sp
->cond_ready
);
1544 fipe_idle_enter(void *arg
, cpu_idle_callback_context_t ctx
,
1545 cpu_idle_check_wakeup_t check_func
, void* check_arg
)
1551 struct fipe_cpu_state
*sp
;
1553 sp
= &fipe_cpu_states
[cp
->cpu_id
];
1554 ts
= cpu_idle_prop_get_hrtime(fipe_idle_ctrl
.prop_enter
, ctx
);
1556 if (fipe_pm_policy
!= FIPE_PM_POLICY_DISABLE
&&
1557 fipe_ioat_ctrl
.ioat_ready
&&
1558 sp
->state_ready
&& sp
->throttle_ts
<= ts
) {
1559 /* Adjust iowait count for local CPU. */
1560 iowait
= CPU_STATS(cp
, sys
.iowait
);
1561 if (iowait
!= sp
->last_iowait
) {
1562 atomic_add_64(&fipe_gbl_ctrl
.io_waiters
,
1563 iowait
- sp
->last_iowait
);
1564 sp
->last_iowait
= iowait
;
1567 /* Check current CPU status. */
1568 if (fipe_check_cpu(sp
, ctx
, ts
)) {
1569 /* Increase count of CPU ready for power saving. */
1571 cnt
= fipe_gbl_ctrl
.cpu_count
;
1572 ASSERT(cnt
< ncpus
);
1573 } while (atomic_cas_32(&fipe_gbl_ctrl
.cpu_count
,
1574 cnt
, cnt
+ 1) != cnt
);
1577 * Enable power saving if all CPUs are idle.
1579 if (cnt
+ 1 == ncpus
) {
1580 if (fipe_gbl_ctrl
.io_waiters
== 0) {
1581 fipe_gbl_ctrl
.enter_ts
= ts
;
1582 fipe_enable(fipe_pm_throttle_level
,
1583 check_func
, check_arg
);
1584 /* There are ongoing block io operations. */
1586 FIPE_KSTAT_DETAIL_INC(bio_busy_cnt
);
1590 } else if (fipe_pm_policy
== FIPE_PM_POLICY_DISABLE
||
1591 fipe_ioat_ctrl
.ioat_ready
== B_FALSE
) {
1592 if (sp
->cond_ready
== B_TRUE
) {
1593 sp
->cond_ready
= B_FALSE
;
1595 } else if (sp
->state_ready
== B_FALSE
) {
1596 sp
->cond_ready
= B_FALSE
;
1597 sp
->state_ready
= B_TRUE
;
1598 sp
->throttle_ts
= 0;
1599 sp
->next_ts
= ts
+ fipe_idle_ctrl
.tick_interval
;
1600 sp
->last_busy
= cpu_idle_prop_get_hrtime(
1601 fipe_idle_ctrl
.prop_busy
, ctx
);
1602 sp
->last_idle
= cpu_idle_prop_get_hrtime(
1603 fipe_idle_ctrl
.prop_idle
, ctx
);
1604 sp
->last_intr
= cpu_idle_prop_get_hrtime(
1605 fipe_idle_ctrl
.prop_intr
, ctx
);
1612 fipe_idle_exit(void* arg
, cpu_idle_callback_context_t ctx
, int flags
)
1616 struct fipe_cpu_state
*sp
;
1618 sp
= &fipe_cpu_states
[CPU
->cpu_id
];
1619 if (sp
->cond_ready
) {
1621 cnt
= fipe_gbl_ctrl
.cpu_count
;
1623 } while (atomic_cas_32(&fipe_gbl_ctrl
.cpu_count
,
1624 cnt
, cnt
- 1) != cnt
);
1627 * Try to disable power saving state.
1628 * Only the first CPU waking from idle state will try to
1629 * disable power saving state, all other CPUs will just go
1630 * on and not try to wait for memory to recover from power
1632 * So there are possible periods during which some CPUs are in
1633 * active state but memory is in power saving state.
1634 * This is OK, since it is an uncommon case, and it is
1635 * better for performance to let them continue as their
1636 * blocking latency is smaller than a mutex, and is only
1637 * hit in the uncommon condition.
1641 ts
= cpu_idle_prop_get_hrtime(fipe_idle_ctrl
.prop_exit
,
1643 fipe_gbl_ctrl
.time_in_pm
+= ts
- fipe_gbl_ctrl
.enter_ts
;