4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright (c) 2004, 2010, Oracle and/or its affiliates. All rights reserved.
26 * This file contains preset event names from the Performance Application
27 * Programming Interface v3.5 which included the following notice:
29 * Copyright (c) 2005,6
30 * Innovative Computing Labs
31 * Computer Science Department,
32 * University of Tennessee,
34 * All Rights Reserved.
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions are met:
40 * * Redistributions of source code must retain the above copyright notice,
41 * this list of conditions and the following disclaimer.
42 * * Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * * Neither the name of the University of Tennessee nor the names of its
46 * contributors may be used to endorse or promote products derived from
47 * this software without specific prior written permission.
49 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
50 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
51 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
52 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
53 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
54 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
55 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
56 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
57 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
58 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
59 * POSSIBILITY OF SUCH DAMAGE.
62 * This open source software license conforms to the BSD License template.
66 * Performance Counter Back-End for Pentiums I, II, and III.
69 #include <sys/cpuvar.h>
70 #include <sys/param.h>
71 #include <sys/cpc_impl.h>
72 #include <sys/cpc_pcbe.h>
73 #include <sys/modctl.h>
74 #include <sys/inttypes.h>
75 #include <sys/systm.h>
76 #include <sys/cmn_err.h>
77 #include <sys/x86_archext.h>
79 #include <sys/archsystm.h>
80 #include <sys/privregs.h>
82 #include <sys/sunddi.h>
84 static int64_t diff3931(uint64_t sample
, uint64_t old
);
85 static uint64_t trunc3931(uint64_t value
);
87 static int ptm_pcbe_init(void);
88 static uint_t
ptm_pcbe_ncounters(void);
89 static const char *ptm_pcbe_impl_name(void);
90 static const char *ptm_pcbe_cpuref(void);
91 static char *ptm_pcbe_list_events(uint_t picnum
);
92 static char *ptm_pcbe_list_attrs(void);
93 static uint64_t ptm_pcbe_event_coverage(char *event
);
94 static int ptm_pcbe_pic_index(char *picname
);
95 static uint64_t ptm_pcbe_overflow_bitmap(void);
96 static int ptm_pcbe_configure(uint_t picnum
, char *event
, uint64_t preset
,
97 uint32_t flags
, uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
,
99 static void ptm_pcbe_program(void *token
);
100 static void ptm_pcbe_allstop(void);
101 static void ptm_pcbe_sample(void *token
);
102 static void ptm_pcbe_free(void *config
);
104 pcbe_ops_t ptm_pcbe_ops
= {
110 ptm_pcbe_list_events
,
112 ptm_pcbe_event_coverage
,
113 ptm_pcbe_overflow_bitmap
,
121 typedef enum _ptm_ver
{
126 static ptm_ver_t ptm_ver
;
127 static const char *ptm_impl_name
;
128 static const char *ptm_cpuref
;
129 static char *pic_events
[2] = { NULL
, NULL
};
132 * Indicates whether the "rdpmc" instruction is available on this processor.
134 static int ptm_rdpmc_avail
= 0;
136 #define ALL_STOPPED 0ULL
138 typedef struct _ptm_pcbe_config
{
139 uint8_t ptm_picno
; /* 0 for pic0 or 1 for pic1 */
140 uint32_t ptm_ctl
; /* P6: PerfEventSelect; P5: cesr, shifted */
149 typedef struct _ptm_generic_events
{
153 } ptm_generic_event_t
;
156 #define CPC_GEN_END { NULL, NULL }
159 * Basic Pentium events
162 {0x0, "data_read"}, \
163 {0x1, "data_write"}, \
164 {0x2, "data_tlb_miss"}, \
165 {0x3, "data_read_miss"}, \
166 {0x4, "data_write_miss"}, \
167 {0x5, "write_hit_to_M_or_E"}, \
168 {0x6, "dcache_lines_wrback"}, \
169 {0x7, "external_snoops"}, \
170 {0x8, "external_dcache_snoop_hits"}, \
171 {0x9, "memory_access_in_both_pipes"}, \
172 {0xa, "bank_conflicts"}, \
173 {0xb, "misaligned_ref"}, \
174 {0xc, "code_read"}, \
175 {0xd, "code_tlb_miss"}, \
176 {0xe, "code_cache_miss"}, \
177 {0xf, "any_segreg_loaded"}, \
178 {0x12, "branches"}, \
179 {0x13, "btb_hits"}, \
180 {0x14, "taken_or_btb_hit"}, \
181 {0x15, "pipeline_flushes"}, \
182 {0x16, "instr_exec"}, \
183 {0x17, "instr_exec_V_pipe"}, \
184 {0x18, "clks_bus_cycle"}, \
185 {0x19, "clks_full_wbufs"}, \
186 {0x1a, "pipe_stall_read"}, \
187 {0x1b, "stall_on_write_ME"}, \
188 {0x1c, "locked_bus_cycle"}, \
189 {0x1d, "io_rw_cycles"}, \
190 {0x1e, "reads_noncache_mem"}, \
191 {0x1f, "pipeline_agi_stalls"}, \
193 {0x23, "bp_match_dr0"}, \
194 {0x24, "bp_match_dr1"}, \
195 {0x25, "bp_match_dr2"}, \
196 {0x26, "bp_match_dr3"}, \
197 {0x27, "hw_intrs"}, \
199 {0x29, "data_rw_miss"}
201 static const struct nametable P5mmx_names0
[] = {
203 {0x2a, "bus_ownership_latency"},
204 {0x2b, "mmx_instr_upipe"},
205 {0x2c, "cache_M_line_sharing"},
206 {0x2d, "emms_instr"},
207 {0x2e, "bus_util_processor"},
208 {0x2f, "sat_mmx_instr"},
209 {0x30, "clks_not_HLT"},
210 {0x31, "mmx_data_read"},
211 {0x32, "clks_fp_stall"},
212 {0x33, "d1_starv_fifo_0"},
213 {0x34, "mmx_data_write"},
214 {0x35, "pipe_flush_wbp"},
215 {0x36, "mmx_misalign_data_refs"},
216 {0x37, "rets_pred_incorrect"},
217 {0x38, "mmx_multiply_unit_interlock"},
219 {0x3a, "btb_false_entries"},
220 {0x3b, "clocks_stall_full_wb"},
224 static const struct nametable P5mmx_names1
[] = {
226 {0x2a, "bus_ownership_transfers"},
227 {0x2b, "mmx_instr_vpipe"},
228 {0x2c, "cache_lint_sharing"},
229 {0x2d, "mmx_fp_transitions"},
230 {0x2e, "writes_noncache_mem"},
231 {0x2f, "sats_performed"},
232 {0x30, "clks_dcache_tlb_miss"},
233 {0x31, "mmx_data_read_miss"},
235 {0x33, "d1_starv_fifo_1"},
236 {0x34, "mmx_data_write_miss"},
237 {0x35, "pipe_flush_wbp_wb"},
238 {0x36, "mmx_pipe_stall_data_read"},
240 {0x38, "movd_movq_stall"},
241 {0x39, "rsb_overflow"},
242 {0x3a, "btb_mispred_nt"},
243 {0x3b, "mmx_stall_write_ME"},
247 static const struct nametable
*P5mmx_names
[2] = {
253 * Pentium Pro and Pentium II events
255 static const struct nametable _P6_names
[] = {
259 {0x43, "data_mem_refs"},
260 {0x45, "dcu_lines_in"},
261 {0x46, "dcu_m_lines_in"},
262 {0x47, "dcu_m_lines_out"},
263 {0x48, "dcu_miss_outstanding"},
266 * Instruction fetch unit
268 {0x80, "ifu_ifetch"},
269 {0x81, "ifu_ifetch_miss"},
271 {0x86, "ifu_mem_stall"},
280 {0x24, "l2_lines_in"},
281 {0x26, "l2_lines_out"},
282 {0x25, "l2_m_lines_inm"},
283 {0x27, "l2_m_lines_outm"},
286 {0x22, "l2_dbus_busy"},
287 {0x23, "l2_dbus_busy_rd"},
292 {0x62, "bus_drdy_clocks"},
293 {0x63, "bus_lock_clocks"},
294 {0x60, "bus_req_outstanding"},
295 {0x65, "bus_tran_brd"},
296 {0x66, "bus_tran_rfo"},
297 {0x67, "bus_trans_wb"},
298 {0x68, "bus_tran_ifetch"},
299 {0x69, "bus_tran_inval"},
300 {0x6a, "bus_tran_pwr"},
301 {0x6b, "bus_trans_p"},
302 {0x6c, "bus_trans_io"},
303 {0x6d, "bus_tran_def"},
304 {0x6e, "bus_tran_burst"},
305 {0x70, "bus_tran_any"},
306 {0x6f, "bus_tran_mem"},
307 {0x64, "bus_data_rcv"},
308 {0x61, "bus_bnr_drv"},
309 {0x7a, "bus_hit_drv"},
310 {0x7b, "bus_hitm_drv"},
311 {0x7e, "bus_snoop_stall"},
314 * Floating point unit
316 {0xc1, "flops"}, /* 0 only */
317 {0x10, "fp_comp_ops_exe"}, /* 0 only */
318 {0x11, "fp_assist"}, /* 1 only */
319 {0x12, "mul"}, /* 1 only */
320 {0x13, "div"}, /* 1 only */
321 {0x14, "cycles_div_busy"}, /* 0 only */
328 {0x5, "misalign_mem_ref"},
331 * Instruction decoding and retirement
333 {0xc0, "inst_retired"},
334 {0xc2, "uops_retired"},
335 {0xd0, "inst_decoder"},
341 {0xc6, "cycles_int_masked"},
342 {0xc7, "cycles_int_pending_and_masked"},
347 {0xc4, "br_inst_retired"},
348 {0xc5, "br_miss_pred_retired"},
349 {0xc9, "br_taken_retired"},
350 {0xca, "br_miss_pred_taken_ret"},
351 {0xe0, "br_inst_decoded"},
352 {0xe2, "btb_misses"},
359 {0xa2, "resource_stalls"},
360 {0xd2, "partial_rat_stalls"},
363 * Segment register loads
365 {0x6, "segment_reg_loads"},
370 {0x79, "cpu_clk_unhalted"},
375 {0xb0, "mmx_instr_exec"},
376 {0xb1, "mmx_sat_instr_exec"},
377 {0xb2, "mmx_uops_exec"},
378 {0xb3, "mmx_instr_type_exec"},
379 {0xcc, "fp_mmx_trans"},
380 {0xcd, "mmx_assists"},
381 {0xce, "mmx_instr_ret"},
382 {0xd4, "seg_rename_stalls"},
383 {0xd5, "seg_reg_renames"},
384 {0xd6, "ret_seg_renames"},
389 static const struct nametable
*P6_names
[2] = {
394 #define P5_GENERIC_EVENTS \
395 { "PAPI_tot_ins", "instr_exec", 0x0 }, \
396 { "PAPI_tlb_dm", "data_tlb_miss", 0x0 }, \
397 { "PAPI_tlb_im", "code_tlb_miss", 0x0 }, \
398 { "PAPI_fp_ops", "flops" }
400 static const ptm_generic_event_t P5mmx_generic_names0
[] = {
402 { "PAPI_tot_cyc", "clks_not_HLT", 0x0 },
406 static const ptm_generic_event_t P5mmx_generic_names1
[] = {
408 { "PAPI_br_ins", "taken_br", 0x0 },
412 static const ptm_generic_event_t
*P5mmx_generic_names
[2] = {
413 P5mmx_generic_names0
,
417 static const ptm_generic_event_t _P6_generic_names
[] = {
418 { "PAPI_ca_shr", "l2_ifetch", 0xf },
419 { "PAPI_ca_cln", "bus_tran_rfo", 0x0 },
420 { "PAPI_ca_itv", "bus_tran_inval", 0x0 },
421 { "PAPI_tlb_im", "itlb_miss", 0x0 },
422 { "PAPI_btac_m", "btb_misses", 0x0 },
423 { "PAPI_hw_int", "hw_int_rx", 0x0 },
424 { "PAPI_br_cn", "br_inst_retired", 0x0 },
425 { "PAPI_br_tkn", "br_taken_retired", 0x0 },
426 { "PAPI_br_msp", "br_miss_pred_taken_ret", 0x0 },
427 { "PAPI_br_ins", "br_inst_retired", 0x0 },
428 { "PAPI_res_stl", "resource_stalls", 0x0 },
429 { "PAPI_tot_iis", "inst_decoder", 0x0 },
430 { "PAPI_tot_ins", "inst_retired", 0x0 },
431 { "PAPI_tot_cyc", "cpu_clk_unhalted", 0x0 },
432 { "PAPI_l1_dcm", "dcu_lines_in", 0x0 },
433 { "PAPI_l1_icm", "l2_ifetch", 0xf },
434 { "PAPI_l1_tcm", "l2_rqsts", 0xf },
435 { "PAPI_l1_dca", "data_mem_refs", 0x0 },
436 { "PAPI_l1_stm", "l2_st", 0xf },
437 { "PAPI_l2_icm", "bus_tran_ifetch", 0x0 },
438 { "PAPI_l2_dcr", "l2_ld", 0xf },
439 { "PAPI_l2_dcw", "l2_st", 0xf },
440 { "PAPI_l2_tcm", "l2_lines_in", 0x0 },
441 { "PAPI_l2_tca", "l2_rqsts", 0xf },
442 { "PAPI_l2_tcw", "l2_st", 0xf },
443 { "PAPI_l2_stm", "l2_m_lines_inm", 0x0 },
444 { "PAPI_fp_ins", "flops", 0x0 },
445 { "PAPI_fp_ops", "flops", 0x0 },
446 { "PAPI_fml_ins", "mul", 0x0 },
447 { "PAPI_fdv_ins", "div", 0x0 },
451 static const ptm_generic_event_t
*P6_generic_names
[2] = {
456 static const struct nametable
**events
;
457 static const ptm_generic_event_t
**generic_events
;
459 #define BITS(v, u, l) \
460 (((v) >> (l)) & ((1 << (1 + (u) - (l))) - 1))
463 * "Well known" bit fields in the Pentium CES register
464 * The interfaces in libcpc should make these #defines uninteresting.
466 #define CPC_P5_CESR_ES0_SHIFT 0
467 #define CPC_P5_CESR_ES0_MASK 0x3f
468 #define CPC_P5_CESR_ES1_SHIFT 16
469 #define CPC_P5_CESR_ES1_MASK 0x3f
471 #define CPC_P5_CESR_OS0 6
472 #define CPC_P5_CESR_USR0 7
473 #define CPC_P5_CESR_CLK0 8
474 #define CPC_P5_CESR_PC0 9
475 #define CPC_P5_CESR_OS1 (CPC_P5_CESR_OS0 + 16)
476 #define CPC_P5_CESR_USR1 (CPC_P5_CESR_USR0 + 16)
477 #define CPC_P5_CESR_CLK1 (CPC_P5_CESR_CLK0 + 16)
478 #define CPC_P5_CESR_PC1 (CPC_P5_CESR_PC0 + 16)
481 * "Well known" bit fields in the Pentium Pro PerfEvtSel registers
482 * The interfaces in libcpc should make these #defines uninteresting.
484 #define CPC_P6_PES_INV 23
485 #define CPC_P6_PES_EN 22
486 #define CPC_P6_PES_INT 20
487 #define CPC_P6_PES_PC 19
488 #define CPC_P6_PES_E 18
489 #define CPC_P6_PES_OS 17
490 #define CPC_P6_PES_USR 16
492 #define CPC_P6_PES_UMASK_SHIFT 8
493 #define CPC_P6_PES_UMASK_MASK (0xffu)
495 #define CPC_P6_PES_CMASK_SHIFT 24
496 #define CPC_P6_PES_CMASK_MASK (0xffu)
498 #define CPC_P6_PES_PIC0_MASK (0xffu)
499 #define CPC_P6_PES_PIC1_MASK (0xffu)
501 #define P6_PES_EN (UINT32_C(1) << CPC_P6_PES_EN)
502 #define P6_PES_INT (UINT32_C(1) << CPC_P6_PES_INT)
503 #define P6_PES_OS (UINT32_C(1) << CPC_P6_PES_OS)
506 * Pentium 5 attributes
508 #define P5_NOEDGE 0x1 /* "noedge" - no edge detection */
509 #define P5_PC 0x2 /* "pc" - pin control */
512 * Pentium 6 attributes
514 #define P6_NOEDGE 0x1
516 #define P6_INV 0x4 /* "inv" - count inverted transitions */
517 #define P6_INT 0x8 /* "int" - interrupt on overflow */
520 * CPU reference strings
523 #define P5_CPUREF "See Appendix A.4 of the \"IA-32 Intel Architecture " \
524 "Software Developer's Manual Volume 3: System " \
525 "Programming Guide,\" Order # 245472-012, 2003"
527 #define P6_CPUREF "See Appendix A.3 of the \"IA-32 Intel Architecture " \
528 "Software Developer's Manual Volume 3: System " \
529 "Programming Guide,\" Order # 245472-012, 2003"
534 const struct nametable
*n
;
535 const ptm_generic_event_t
*gevp
;
539 if (is_x86_feature(x86_featureset
, X86FSET_MMX
))
543 * Discover type of CPU and set events pointer appropriately.
545 * Map family and model into the performance
546 * counter architectures we currently understand.
548 * See application note AP485 (from developer.intel.com)
549 * for further explanation.
551 if (cpuid_getvendor(CPU
) != X86_VENDOR_Intel
)
553 switch (cpuid_getfamily(CPU
)) {
554 case 5: /* Pentium and Pentium with MMX */
555 events
= P5mmx_names
;
556 generic_events
= P5mmx_generic_names
;
557 ptm_ver
= PTM_VER_P5
;
558 ptm_cpuref
= P5_CPUREF
;
559 if (cpuid_getmodel(CPU
) < 4)
560 ptm_impl_name
= "Pentium";
562 ptm_impl_name
= "Pentium with MMX";
564 case 6: /* Pentium Pro and Pentium II and III */
566 generic_events
= P6_generic_names
;
567 ptm_ver
= PTM_VER_P6
;
568 ptm_cpuref
= P6_CPUREF
;
569 ptm_pcbe_ops
.pcbe_caps
= CPC_CAP_OVERFLOW_INTERRUPT
;
570 if (is_x86_feature(x86_featureset
, X86FSET_MMX
))
571 ptm_impl_name
= "Pentium Pro with MMX, Pentium II";
573 ptm_impl_name
= "Pentium Pro, Pentium II";
580 * Initialize the list of events for each PIC.
581 * Do two passes: one to compute the size necessary and another
582 * to copy the strings. Need room for event, comma, and NULL terminator.
584 for (i
= 0; i
< 2; i
++) {
586 for (n
= events
[i
]; n
->bits
!= NT_END
; n
++)
587 size
+= strlen(n
->name
) + 1;
588 for (gevp
= generic_events
[i
]; gevp
->name
!= NULL
; gevp
++)
589 size
+= strlen(gevp
->name
) + 1;
590 pic_events
[i
] = kmem_alloc(size
+ 1, KM_SLEEP
);
591 *pic_events
[i
] = '\0';
592 for (n
= events
[i
]; n
->bits
!= NT_END
; n
++) {
593 (void) strcat(pic_events
[i
], n
->name
);
594 (void) strcat(pic_events
[i
], ",");
596 for (gevp
= generic_events
[i
]; gevp
->name
!= NULL
; gevp
++) {
597 (void) strcat(pic_events
[i
], gevp
->name
);
598 (void) strcat(pic_events
[i
], ",");
602 * Remove trailing comma.
604 pic_events
[i
][size
- 1] = '\0';
611 ptm_pcbe_ncounters(void)
617 ptm_pcbe_impl_name(void)
619 return (ptm_impl_name
);
623 ptm_pcbe_cpuref(void)
629 ptm_pcbe_list_events(uint_t picnum
)
631 ASSERT(picnum
>= 0 && picnum
< cpc_ncounters
);
633 if (pic_events
[0] == NULL
) {
634 ASSERT(pic_events
[1] == NULL
);
637 return (pic_events
[picnum
]);
641 ptm_pcbe_list_attrs(void)
643 if (ptm_ver
== PTM_VER_P5
)
644 return ("noedge,pc");
646 return ("noedge,pc,inv,int,umask,cmask");
649 static const ptm_generic_event_t
*
650 find_generic_event(int regno
, char *name
)
652 const ptm_generic_event_t
*gevp
;
654 for (gevp
= generic_events
[regno
]; gevp
->name
!= NULL
; gevp
++)
655 if (strcmp(name
, gevp
->name
) == 0)
661 static const struct nametable
*
662 find_event(int regno
, char *name
)
664 const struct nametable
*n
;
668 for (; n
->bits
!= NT_END
; n
++)
669 if (strcmp(name
, n
->name
) == 0)
676 ptm_pcbe_event_coverage(char *event
)
680 if ((find_event(0, event
) != NULL
) ||
681 (find_generic_event(0, event
) != NULL
))
683 if ((find_event(1, event
) != NULL
) ||
684 (find_generic_event(1, event
) != NULL
))
691 ptm_pcbe_overflow_bitmap(void)
697 * P5 is not capable of generating interrupts.
699 ASSERT(ptm_ver
== PTM_VER_P6
);
702 * CPC could have caused an interrupt provided that
704 * 1) Counters are enabled
705 * 2) Either counter has requested an interrupt
708 pes
[0] = rdmsr(REG_PERFEVNT0
);
709 if (((uint32_t)pes
[0] & P6_PES_EN
) != P6_PES_EN
)
713 * If a particular counter requested an interrupt, assume it caused
714 * this interrupt. There is no way to determine which counter overflowed
715 * on this hardware other than by using unreliable heuristics.
718 pes
[1] = rdmsr(REG_PERFEVNT1
);
719 if ((uint32_t)pes
[0] & P6_PES_INT
)
721 if ((uint32_t)pes
[1] & P6_PES_INT
)
729 ptm_pcbe_configure(uint_t picnum
, char *eventname
, uint64_t preset
,
730 uint32_t flags
, uint_t nattrs
, kcpc_attr_t
*attrs
, void **data
,
733 ptm_pcbe_config_t
*conf
;
734 const struct nametable
*n
;
735 const ptm_generic_event_t
*gevp
;
736 struct nametable nt_raw
= { 0, "raw" };
741 * If we've been handed an existing configuration, we need only preset
746 conf
->ptm_rawpic
= trunc3931(preset
);
750 if (picnum
!= 0 && picnum
!= 1)
751 return (CPC_INVALID_PICNUM
);
753 conf
= kmem_alloc(sizeof (ptm_pcbe_config_t
), KM_SLEEP
);
755 conf
->ptm_picno
= picnum
;
756 conf
->ptm_rawpic
= trunc3931(preset
);
759 if ((n
= find_event(picnum
, eventname
)) == NULL
) {
760 if ((gevp
= find_generic_event(picnum
, eventname
)) != NULL
) {
761 n
= find_event(picnum
, gevp
->event
);
765 kmem_free(conf
, sizeof (ptm_pcbe_config_t
));
766 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
769 if (ptm_ver
== PTM_VER_P6
)
770 conf
->ptm_ctl
|= gevp
->umask
<<
771 CPC_P6_PES_UMASK_SHIFT
;
776 * If ddi_strtol() likes this event, use it as a raw
779 if (ddi_strtol(eventname
, NULL
, 0, &tmp
) != 0) {
780 kmem_free(conf
, sizeof (ptm_pcbe_config_t
));
781 return (CPC_INVALID_EVENT
);
786 if (ptm_ver
== PTM_VER_P5
)
787 nt_raw
.bits
&= CPC_P5_CESR_ES0_MASK
;
789 nt_raw
.bits
&= CPC_P6_PES_PIC0_MASK
;
795 if (ptm_ver
== PTM_VER_P5
) {
797 picshift
= (picnum
== 0) ? 0 : 16;
799 for (i
= 0; i
< nattrs
; i
++) {
801 * Value of these attributes is ignored; their presence
802 * alone tells us to set the corresponding flag.
804 if (strncmp(attrs
[i
].ka_name
, "noedge", 7) == 0) {
805 if (attrs
[i
].ka_val
!= 0)
806 ptm_flags
|= P5_NOEDGE
;
807 } else if (strncmp(attrs
[i
].ka_name
, "pc", 3) == 0) {
808 if (attrs
[i
].ka_val
!= 0)
811 kmem_free(conf
, sizeof (ptm_pcbe_config_t
));
812 return (CPC_INVALID_ATTRIBUTE
);
816 if (flags
& CPC_COUNT_USER
)
817 conf
->ptm_ctl
|= (1 << (CPC_P5_CESR_USR0
+ picshift
));
818 if (flags
& CPC_COUNT_SYSTEM
)
819 conf
->ptm_ctl
|= (1 << (CPC_P5_CESR_OS0
+ picshift
));
820 if (ptm_flags
& P5_NOEDGE
)
821 conf
->ptm_ctl
|= (1 << (CPC_P5_CESR_CLK0
+ picshift
));
822 if (ptm_flags
& P5_PC
)
823 conf
->ptm_ctl
|= (1 << (CPC_P5_CESR_PC0
+ picshift
));
825 ASSERT((n
->bits
| CPC_P5_CESR_ES0_MASK
) ==
826 CPC_P5_CESR_ES0_MASK
);
828 conf
->ptm_ctl
|= (n
->bits
<< picshift
);
830 for (i
= 0; i
< nattrs
; i
++) {
831 if (strncmp(attrs
[i
].ka_name
, "noedge", 6) == 0) {
832 if (attrs
[i
].ka_val
!= 0)
833 ptm_flags
|= P6_NOEDGE
;
834 } else if (strncmp(attrs
[i
].ka_name
, "pc", 2) == 0) {
835 if (attrs
[i
].ka_val
!= 0)
837 } else if (strncmp(attrs
[i
].ka_name
, "inv", 3) == 0) {
838 if (attrs
[i
].ka_val
!= 0)
840 } else if (strncmp(attrs
[i
].ka_name
, "umask", 5) == 0) {
841 if ((attrs
[i
].ka_val
| CPC_P6_PES_UMASK_MASK
) !=
842 CPC_P6_PES_UMASK_MASK
) {
844 sizeof (ptm_pcbe_config_t
));
845 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
847 conf
->ptm_ctl
|= (uint8_t)attrs
[i
].ka_val
<<
848 CPC_P6_PES_UMASK_SHIFT
;
849 } else if (strncmp(attrs
[i
].ka_name
, "cmask", 5) == 0) {
850 if ((attrs
[i
].ka_val
| CPC_P6_PES_CMASK_MASK
) !=
851 CPC_P6_PES_CMASK_MASK
) {
853 sizeof (ptm_pcbe_config_t
));
854 return (CPC_ATTRIBUTE_OUT_OF_RANGE
);
856 conf
->ptm_ctl
|= (uint8_t)attrs
[i
].ka_val
<<
857 CPC_P6_PES_CMASK_SHIFT
;
858 } else if (strncmp(attrs
[i
].ka_name
, "int", 3) == 0) {
859 if (attrs
[i
].ka_val
!= 0)
862 kmem_free(conf
, sizeof (ptm_pcbe_config_t
));
863 return (CPC_INVALID_ATTRIBUTE
);
867 if (flags
& CPC_OVF_NOTIFY_EMT
)
869 * If the user has requested notification of overflows,
870 * we automatically program the hardware to generate
871 * overflow interrupts.
874 if (flags
& CPC_COUNT_USER
)
875 conf
->ptm_ctl
|= (1 << CPC_P6_PES_USR
);
876 if (flags
& CPC_COUNT_SYSTEM
)
877 conf
->ptm_ctl
|= (1 << CPC_P6_PES_OS
);
878 if ((ptm_flags
& P6_NOEDGE
) == 0)
879 conf
->ptm_ctl
|= (1 << CPC_P6_PES_E
);
880 if (ptm_flags
& P6_PC
)
881 conf
->ptm_ctl
|= (1 << CPC_P6_PES_PC
);
882 if (ptm_flags
& P6_INV
)
883 conf
->ptm_ctl
|= (1 << CPC_P6_PES_INV
);
884 if (ptm_flags
& P6_INT
)
885 conf
->ptm_ctl
|= (1 << CPC_P6_PES_INT
);
887 ASSERT((n
->bits
| CPC_P6_PES_PIC0_MASK
) ==
888 CPC_P6_PES_PIC0_MASK
);
890 conf
->ptm_ctl
|= n
->bits
;
898 ptm_pcbe_program(void *token
)
900 ptm_pcbe_config_t
*pic0
;
901 ptm_pcbe_config_t
*pic1
;
902 ptm_pcbe_config_t
*tmp
;
903 ptm_pcbe_config_t empty
= { 1, 0, 0 }; /* assume pic1 to start */
905 if ((pic0
= kcpc_next_config(token
, NULL
, NULL
)) == NULL
)
906 panic("ptm_pcbe: token %p has no configs", token
);
908 if ((pic1
= kcpc_next_config(token
, pic0
, NULL
)) == NULL
)
911 if (pic0
->ptm_picno
!= 0) {
918 ASSERT(pic0
->ptm_picno
== 0 && pic1
->ptm_picno
== 1);
920 if (ptm_rdpmc_avail
) {
921 ulong_t curcr4
= getcr4();
922 if (kcpc_allow_nonpriv(token
))
923 setcr4(curcr4
| CR4_PCE
);
925 setcr4(curcr4
& ~CR4_PCE
);
928 if (ptm_ver
== PTM_VER_P5
) {
929 wrmsr(P5_CESR
, ALL_STOPPED
);
930 wrmsr(P5_CTR0
, pic0
->ptm_rawpic
);
931 wrmsr(P5_CTR1
, pic1
->ptm_rawpic
);
932 wrmsr(P5_CESR
, pic0
->ptm_ctl
| pic1
->ptm_ctl
);
933 pic0
->ptm_rawpic
= rdmsr(P5_CTR0
);
934 pic1
->ptm_rawpic
= rdmsr(P5_CTR1
);
937 wrmsr(REG_PERFEVNT0
, ALL_STOPPED
);
938 wrmsr(REG_PERFCTR0
, pic0
->ptm_rawpic
);
939 wrmsr(REG_PERFCTR1
, pic1
->ptm_rawpic
);
941 DTRACE_PROBE1(ptm__pes1
, uint64_t, pes
);
942 wrmsr(REG_PERFEVNT1
, pes
);
943 pes
= pic0
->ptm_ctl
| (1 << CPC_P6_PES_EN
);
944 DTRACE_PROBE1(ptm__pes0
, uint64_t, pes
);
945 wrmsr(REG_PERFEVNT0
, pes
);
950 ptm_pcbe_allstop(void)
952 if (ptm_ver
== PTM_VER_P5
)
953 wrmsr(P5_CESR
, ALL_STOPPED
);
955 wrmsr(REG_PERFEVNT0
, ALL_STOPPED
);
956 setcr4(getcr4() & ~CR4_PCE
);
961 ptm_pcbe_sample(void *token
)
963 ptm_pcbe_config_t
*pic0
;
964 ptm_pcbe_config_t
*pic1
;
965 ptm_pcbe_config_t
*swap
;
966 ptm_pcbe_config_t empty
= { 1, 0, 0 }; /* assume pic1 to start */
973 if ((pic0
= kcpc_next_config(token
, NULL
, &pic0_data
)) == NULL
)
974 panic("ptm_pcbe: token %p has no configs", token
);
976 if ((pic1
= kcpc_next_config(token
, pic0
, &pic1_data
)) == NULL
) {
981 if (pic0
->ptm_picno
!= 0) {
987 pic0_data
= pic1_data
;
991 ASSERT(pic0
->ptm_picno
== 0 && pic1
->ptm_picno
== 1);
993 if (ptm_ver
== PTM_VER_P5
) {
994 curpic
[0] = rdmsr(P5_CTR0
);
995 curpic
[1] = rdmsr(P5_CTR1
);
997 curpic
[0] = rdmsr(REG_PERFCTR0
);
998 curpic
[1] = rdmsr(REG_PERFCTR1
);
1001 DTRACE_PROBE1(ptm__curpic0
, uint64_t, curpic
[0]);
1002 DTRACE_PROBE1(ptm__curpic1
, uint64_t, curpic
[1]);
1004 *pic0_data
+= diff3931(curpic
[0], pic0
->ptm_rawpic
);
1005 pic0
->ptm_rawpic
= trunc3931(*pic0_data
);
1007 *pic1_data
+= diff3931(curpic
[1], pic1
->ptm_rawpic
);
1008 pic1
->ptm_rawpic
= trunc3931(*pic1_data
);
1012 ptm_pcbe_free(void *config
)
1014 kmem_free(config
, sizeof (ptm_pcbe_config_t
));
1018 * Virtualizes the 40-bit field of the %pic
1019 * register into a 64-bit software register.
1021 * We can retrieve 40 (signed) bits from the counters,
1022 * but we can set only 32 (signed) bits into the counters.
1023 * This makes virtualizing more than 31-bits of registers
1026 * If bits 39 to 31 are set in the virtualized pic register,
1027 * then we can preset the counter to this value using the fact
1028 * that wrmsr sign extends bit 31. Though it might look easier
1029 * to only use the bottom 31-bits of the register, we have to allow
1030 * the full 40-bits to be used to perform overflow profiling.
1033 #define MASK40 UINT64_C(0xffffffffff)
1034 #define MASK31 UINT64_C(0x7fffffff)
1035 #define BITS_39_31 UINT64_C(0xff80000000)
1038 diff3931(uint64_t sample
, uint64_t old
)
1042 if ((old
& BITS_39_31
) == BITS_39_31
) {
1043 diff
= (MASK40
& sample
) - old
;
1045 diff
+= (UINT64_C(1) << 40);
1047 diff
= (MASK31
& sample
) - old
;
1049 diff
+= (UINT64_C(1) << 31);
1055 trunc3931(uint64_t value
)
1057 if ((value
& BITS_39_31
) == BITS_39_31
)
1058 return (MASK40
& value
);
1059 return (MASK31
& value
);
1062 static struct modlpcbe modlpcbe
= {
1064 "Pentium Performance Counters",
1068 static struct modlinkage modl
= {
1076 if (ptm_pcbe_init() != 0)
1078 return (mod_install(&modl
));
1084 return (mod_remove(&modl
));
1088 _info(struct modinfo
*mi
)
1090 return (mod_info(&modl
, mi
));