4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
27 #include <sys/modctl.h>
28 #include <sys/dtrace.h>
32 #include <sys/sunddi.h>
35 #define FBT_PUSHL_EBP 0x55
36 #define FBT_MOVL_ESP_EBP0_V0 0x8b
37 #define FBT_MOVL_ESP_EBP1_V0 0xec
38 #define FBT_MOVL_ESP_EBP0_V1 0x89
39 #define FBT_MOVL_ESP_EBP1_V1 0xe5
40 #define FBT_REX_RSP_RBP 0x48
42 #define FBT_POPL_EBP 0x5d
44 #define FBT_RET_IMM16 0xc2
45 #define FBT_LEAVE 0xc9
48 #define FBT_PATCHVAL 0xcc
50 #define FBT_PATCHVAL 0xf0
53 #define FBT_ENTRY "entry"
54 #define FBT_RETURN "return"
55 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
56 #define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */
58 typedef struct fbt_probe
{
59 struct fbt_probe
*fbtp_hashnext
;
60 uint8_t *fbtp_patchpoint
;
62 uint8_t fbtp_patchval
;
63 uint8_t fbtp_savedval
;
64 uintptr_t fbtp_roffset
;
67 struct modctl
*fbtp_ctl
;
71 struct fbt_probe
*fbtp_next
;
74 static dev_info_t
*fbt_devi
;
75 static dtrace_provider_id_t fbt_id
;
76 static fbt_probe_t
**fbt_probetab
;
77 static int fbt_probetab_size
;
78 static int fbt_probetab_mask
;
79 static int fbt_verbose
= 0;
82 fbt_invop(uintptr_t addr
, uintptr_t *stack
, uintptr_t rval
)
84 uintptr_t stack0
, stack1
, stack2
, stack3
, stack4
;
85 fbt_probe_t
*fbt
= fbt_probetab
[FBT_ADDR2NDX(addr
)];
87 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_hashnext
) {
88 if ((uintptr_t)fbt
->fbtp_patchpoint
== addr
) {
89 if (fbt
->fbtp_roffset
== 0) {
92 * When accessing the arguments on the stack,
93 * we must protect against accessing beyond
94 * the stack. We can safely set NOFAULT here
95 * -- we know that interrupts are already
98 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
99 CPU
->cpu_dtrace_caller
= stack
[i
++];
102 * On amd64, stack[0] contains the dereferenced
103 * stack pointer, stack[1] contains savfp,
104 * stack[2] contains savpc. We want to step
105 * over these entries.
114 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
|
117 dtrace_probe(fbt
->fbtp_id
, stack0
, stack1
,
118 stack2
, stack3
, stack4
);
120 CPU
->cpu_dtrace_caller
= NULL
;
124 * On amd64, we instrument the ret, not the
125 * leave. We therefore need to set the caller
126 * to assure that the top frame of a stack()
129 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
130 CPU
->cpu_dtrace_caller
= stack
[0];
131 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
|
135 dtrace_probe(fbt
->fbtp_id
, fbt
->fbtp_roffset
,
137 CPU
->cpu_dtrace_caller
= NULL
;
140 return (fbt
->fbtp_rval
);
149 fbt_provide_module(void *arg
, struct modctl
*ctl
)
151 struct module
*mp
= ctl
->mod_mp
;
152 char *str
= mp
->strings
;
153 int nsyms
= mp
->nsyms
;
154 Shdr
*symhdr
= mp
->symhdr
;
155 char *modname
= ctl
->mod_modname
;
157 fbt_probe_t
*fbt
, *retfbt
;
162 * Employees of dtrace and their families are ineligible. Void
165 if (strcmp(modname
, "dtrace") == 0)
168 if (ctl
->mod_requisites
!= NULL
) {
169 struct modctl_list
*list
;
171 list
= (struct modctl_list
*)ctl
->mod_requisites
;
173 for (; list
!= NULL
; list
= list
->modl_next
) {
174 if (strcmp(list
->modl_modp
->mod_modname
, "dtrace") == 0)
180 * KMDB is ineligible for instrumentation -- it may execute in
181 * any context, including probe context.
183 if (strcmp(modname
, "kmdbmod") == 0)
186 if (str
== NULL
|| symhdr
== NULL
|| symhdr
->sh_addr
== NULL
) {
188 * If this module doesn't (yet) have its string or symbol
189 * table allocated, clear out.
194 symsize
= symhdr
->sh_entsize
;
196 if (mp
->fbt_nentries
) {
198 * This module has some FBT entries allocated; we're afraid
204 for (i
= 1; i
< nsyms
; i
++) {
205 uint8_t *instr
, *limit
;
206 Sym
*sym
= (Sym
*)(symhdr
->sh_addr
+ i
* symsize
);
209 if (ELF_ST_TYPE(sym
->st_info
) != STT_FUNC
)
213 * Weak symbols are not candidates. This could be made to
214 * work (where weak functions and their underlying function
215 * appear as two disjoint probes), but it's not simple.
217 if (ELF_ST_BIND(sym
->st_info
) == STB_WEAK
)
220 name
= str
+ sym
->st_name
;
222 if (strstr(name
, "dtrace_") == name
&&
223 strstr(name
, "dtrace_safe_") != name
) {
225 * Anything beginning with "dtrace_" may be called
226 * from probe context unless it explitly indicates
227 * that it won't be called from probe context by
228 * using the prefix "dtrace_safe_".
233 if (strstr(name
, "kdi_") == name
||
234 strstr(name
, "_kdi_") != NULL
) {
236 * Any function name beginning with "kdi_" or
237 * containing the string "_kdi_" is a part of the
238 * kernel debugger interface and may be called in
239 * arbitrary context -- including probe context.
245 * Due to 4524008, _init and _fini may have a bloated st_size.
246 * While this bug was fixed quite some time ago, old drivers
247 * may be lurking. We need to develop a better solution to
248 * this problem, such that correct _init and _fini functions
249 * (the vast majority) may be correctly traced. One solution
250 * may be to scan through the entire symbol table to see if
251 * any symbol overlaps with _init. If none does, set a bit in
252 * the module structure that this module has correct _init and
253 * _fini sizes. This will cause some pain the first time a
254 * module is scanned, but at least it would be O(N) instead of
257 if (strcmp(name
, "_init") == 0)
260 if (strcmp(name
, "_fini") == 0)
264 * In order to be eligible, the function must begin with the
265 * following sequence:
270 * Note that there are two variants of encodings that generate
271 * the movl; we must check for both. For 64-bit, we would
272 * normally insist that a function begin with the following
278 * However, the compiler for 64-bit often splits these two
279 * instructions -- and the first instruction in the function
280 * is often not the pushq. As a result, on 64-bit we look
281 * for any "pushq %rbp" in the function and we instrument
282 * this with a breakpoint instruction.
284 instr
= (uint8_t *)sym
->st_value
;
285 limit
= (uint8_t *)(sym
->st_value
+ sym
->st_size
);
288 while (instr
< limit
) {
289 if (*instr
== FBT_PUSHL_EBP
)
292 if ((size
= dtrace_instr_size(instr
)) <= 0)
298 if (instr
>= limit
|| *instr
!= FBT_PUSHL_EBP
) {
300 * We either don't save the frame pointer in this
301 * function, or we ran into some disassembly
302 * screw-up. Either way, we bail.
307 if (instr
[0] != FBT_PUSHL_EBP
)
310 if (!(instr
[1] == FBT_MOVL_ESP_EBP0_V0
&&
311 instr
[2] == FBT_MOVL_ESP_EBP1_V0
) &&
312 !(instr
[1] == FBT_MOVL_ESP_EBP0_V1
&&
313 instr
[2] == FBT_MOVL_ESP_EBP1_V1
))
317 fbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
318 fbt
->fbtp_name
= name
;
319 fbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
,
320 name
, FBT_ENTRY
, 3, fbt
);
321 fbt
->fbtp_patchpoint
= instr
;
323 fbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
324 fbt
->fbtp_rval
= DTRACE_INVOP_PUSHL_EBP
;
325 fbt
->fbtp_savedval
= *instr
;
326 fbt
->fbtp_patchval
= FBT_PATCHVAL
;
328 fbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
329 fbt
->fbtp_symndx
= i
;
330 fbt_probetab
[FBT_ADDR2NDX(instr
)] = fbt
;
340 * If this disassembly fails, then we've likely walked off into
341 * a jump table or some other unsuitable area. Bail out of the
344 if ((size
= dtrace_instr_size(instr
)) <= 0)
349 * We only instrument "ret" on amd64 -- we don't yet instrument
350 * ret imm16, largely because the compiler doesn't seem to
351 * (yet) emit them in the kernel...
353 if (*instr
!= FBT_RET
) {
359 (*instr
== FBT_POPL_EBP
|| *instr
== FBT_LEAVE
) &&
360 (*(instr
+ 1) == FBT_RET
||
361 *(instr
+ 1) == FBT_RET_IMM16
))) {
368 * We (desperately) want to avoid erroneously instrumenting a
369 * jump table, especially given that our markers are pretty
370 * short: two bytes on x86, and just one byte on amd64. To
371 * determine if we're looking at a true instruction sequence
372 * or an inline jump table that happens to contain the same
373 * byte sequences, we resort to some heuristic sleeze: we
374 * treat this instruction as being contained within a pointer,
375 * and see if that pointer points to within the body of the
376 * function. If it does, we refuse to instrument it.
378 for (j
= 0; j
< sizeof (uintptr_t); j
++) {
379 uintptr_t check
= (uintptr_t)instr
- j
;
382 if (check
< sym
->st_value
)
385 if (check
+ sizeof (uintptr_t) > (uintptr_t)limit
)
388 ptr
= *(uint8_t **)check
;
390 if (ptr
>= (uint8_t *)sym
->st_value
&& ptr
< limit
) {
399 fbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
400 fbt
->fbtp_name
= name
;
402 if (retfbt
== NULL
) {
403 fbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
,
404 name
, FBT_RETURN
, 3, fbt
);
406 retfbt
->fbtp_next
= fbt
;
407 fbt
->fbtp_id
= retfbt
->fbtp_id
;
411 fbt
->fbtp_patchpoint
= instr
;
413 fbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
416 if (*instr
== FBT_POPL_EBP
) {
417 fbt
->fbtp_rval
= DTRACE_INVOP_POPL_EBP
;
419 ASSERT(*instr
== FBT_LEAVE
);
420 fbt
->fbtp_rval
= DTRACE_INVOP_LEAVE
;
423 (uintptr_t)(instr
- (uint8_t *)sym
->st_value
) + 1;
426 ASSERT(*instr
== FBT_RET
);
427 fbt
->fbtp_rval
= DTRACE_INVOP_RET
;
429 (uintptr_t)(instr
- (uint8_t *)sym
->st_value
);
432 fbt
->fbtp_savedval
= *instr
;
433 fbt
->fbtp_patchval
= FBT_PATCHVAL
;
434 fbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
435 fbt
->fbtp_symndx
= i
;
436 fbt_probetab
[FBT_ADDR2NDX(instr
)] = fbt
;
447 fbt_destroy(void *arg
, dtrace_id_t id
, void *parg
)
449 fbt_probe_t
*fbt
= parg
, *next
, *hash
, *last
;
450 struct modctl
*ctl
= fbt
->fbtp_ctl
;
454 if (ctl
!= NULL
&& ctl
->mod_loadcnt
== fbt
->fbtp_loadcnt
) {
455 if ((ctl
->mod_loadcnt
== fbt
->fbtp_loadcnt
&&
458 (ctl
->mod_mp
))->fbt_nentries
--;
463 * Now we need to remove this probe from the fbt_probetab.
465 ndx
= FBT_ADDR2NDX(fbt
->fbtp_patchpoint
);
467 hash
= fbt_probetab
[ndx
];
469 while (hash
!= fbt
) {
470 ASSERT(hash
!= NULL
);
472 hash
= hash
->fbtp_hashnext
;
476 last
->fbtp_hashnext
= fbt
->fbtp_hashnext
;
478 fbt_probetab
[ndx
] = fbt
->fbtp_hashnext
;
481 next
= fbt
->fbtp_next
;
482 kmem_free(fbt
, sizeof (fbt_probe_t
));
485 } while (fbt
!= NULL
);
490 fbt_enable(void *arg
, dtrace_id_t id
, void *parg
)
492 fbt_probe_t
*fbt
= parg
;
493 struct modctl
*ctl
= fbt
->fbtp_ctl
;
497 if (!ctl
->mod_loaded
) {
499 cmn_err(CE_NOTE
, "fbt is failing for probe %s "
500 "(module %s unloaded)",
501 fbt
->fbtp_name
, ctl
->mod_modname
);
508 * Now check that our modctl has the expected load count. If it
509 * doesn't, this module must have been unloaded and reloaded -- and
510 * we're not going to touch it.
512 if (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
) {
514 cmn_err(CE_NOTE
, "fbt is failing for probe %s "
515 "(module %s reloaded)",
516 fbt
->fbtp_name
, ctl
->mod_modname
);
522 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
)
523 *fbt
->fbtp_patchpoint
= fbt
->fbtp_patchval
;
530 fbt_disable(void *arg
, dtrace_id_t id
, void *parg
)
532 fbt_probe_t
*fbt
= parg
;
533 struct modctl
*ctl
= fbt
->fbtp_ctl
;
535 ASSERT(ctl
->mod_nenabled
> 0);
538 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
))
541 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
)
542 *fbt
->fbtp_patchpoint
= fbt
->fbtp_savedval
;
547 fbt_suspend(void *arg
, dtrace_id_t id
, void *parg
)
549 fbt_probe_t
*fbt
= parg
;
550 struct modctl
*ctl
= fbt
->fbtp_ctl
;
552 ASSERT(ctl
->mod_nenabled
> 0);
554 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
))
557 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
)
558 *fbt
->fbtp_patchpoint
= fbt
->fbtp_savedval
;
563 fbt_resume(void *arg
, dtrace_id_t id
, void *parg
)
565 fbt_probe_t
*fbt
= parg
;
566 struct modctl
*ctl
= fbt
->fbtp_ctl
;
568 ASSERT(ctl
->mod_nenabled
> 0);
570 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
))
573 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
)
574 *fbt
->fbtp_patchpoint
= fbt
->fbtp_patchval
;
579 fbt_getargdesc(void *arg
, dtrace_id_t id
, void *parg
, dtrace_argdesc_t
*desc
)
581 fbt_probe_t
*fbt
= parg
;
582 struct modctl
*ctl
= fbt
->fbtp_ctl
;
583 struct module
*mp
= ctl
->mod_mp
;
584 ctf_file_t
*fp
= NULL
, *pfp
;
587 ctf_id_t argv
[32], type
;
588 int argc
= sizeof (argv
) / sizeof (ctf_id_t
);
591 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
))
594 if (fbt
->fbtp_roffset
!= 0 && desc
->dtargd_ndx
== 0) {
595 (void) strcpy(desc
->dtargd_native
, "int");
599 if ((fp
= ctf_modopen(mp
, &error
)) == NULL
) {
601 * We have no CTF information for this module -- and therefore
602 * no args[] information.
608 * If we have a parent container, we must manually import it.
610 if ((parent
= ctf_parent_name(fp
)) != NULL
) {
611 struct modctl
*mp
= &modules
;
612 struct modctl
*mod
= NULL
;
615 * We must iterate over all modules to find the module that
619 if (strcmp(mp
->mod_modname
, parent
) == 0) {
623 } while ((mp
= mp
->mod_next
) != &modules
);
628 if ((pfp
= ctf_modopen(mod
->mod_mp
, &error
)) == NULL
) {
632 if (ctf_import(fp
, pfp
) != 0) {
640 if (ctf_func_info(fp
, fbt
->fbtp_symndx
, &f
) == CTF_ERR
)
643 if (fbt
->fbtp_roffset
!= 0) {
644 if (desc
->dtargd_ndx
> 1)
647 ASSERT(desc
->dtargd_ndx
== 1);
650 if (desc
->dtargd_ndx
+ 1 > f
.ctc_argc
)
653 if (ctf_func_args(fp
, fbt
->fbtp_symndx
, argc
, argv
) == CTF_ERR
)
656 type
= argv
[desc
->dtargd_ndx
];
659 if (ctf_type_name(fp
, type
, desc
->dtargd_native
,
660 DTRACE_ARGTYPELEN
) != NULL
) {
668 desc
->dtargd_ndx
= DTRACE_ARGNONE
;
671 static dtrace_pattr_t fbt_attr
= {
672 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_ISA
},
673 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
674 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
675 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_ISA
},
676 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_ISA
},
679 static dtrace_pops_t fbt_pops
= {
693 fbt_cleanup(dev_info_t
*devi
)
695 dtrace_invop_remove(fbt_invop
);
696 ddi_remove_minor_node(devi
, NULL
);
697 kmem_free(fbt_probetab
, fbt_probetab_size
* sizeof (fbt_probe_t
*));
699 fbt_probetab_mask
= 0;
703 fbt_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
709 return (DDI_SUCCESS
);
711 return (DDI_FAILURE
);
714 if (fbt_probetab_size
== 0)
715 fbt_probetab_size
= FBT_PROBETAB_SIZE
;
717 fbt_probetab_mask
= fbt_probetab_size
- 1;
719 kmem_zalloc(fbt_probetab_size
* sizeof (fbt_probe_t
*), KM_SLEEP
);
721 dtrace_invop_add(fbt_invop
);
723 if (ddi_create_minor_node(devi
, "fbt", S_IFCHR
, 0,
724 DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
725 dtrace_register("fbt", &fbt_attr
, DTRACE_PRIV_KERNEL
, NULL
,
726 &fbt_pops
, NULL
, &fbt_id
) != 0) {
728 return (DDI_FAILURE
);
731 ddi_report_dev(devi
);
734 return (DDI_SUCCESS
);
738 fbt_detach(dev_info_t
*devi
, ddi_detach_cmd_t cmd
)
744 return (DDI_SUCCESS
);
746 return (DDI_FAILURE
);
749 if (dtrace_unregister(fbt_id
) != 0)
750 return (DDI_FAILURE
);
754 return (DDI_SUCCESS
);
759 fbt_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
764 case DDI_INFO_DEVT2DEVINFO
:
765 *result
= (void *)fbt_devi
;
768 case DDI_INFO_DEVT2INSTANCE
:
780 fbt_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
785 static struct cb_ops fbt_cb_ops
= {
788 nulldev
, /* strategy */
798 ddi_prop_op
, /* cb_prop_op */
800 D_NEW
| D_MP
/* Driver compatibility flag */
803 static struct dev_ops fbt_ops
= {
804 DEVO_REV
, /* devo_rev */
806 fbt_info
, /* get_dev_info */
807 nulldev
, /* identify */
809 fbt_attach
, /* attach */
810 fbt_detach
, /* detach */
812 &fbt_cb_ops
, /* driver operations */
813 NULL
, /* bus operations */
814 nodev
, /* dev power */
815 ddi_quiesce_not_needed
, /* quiesce */
819 * Module linkage information for the kernel.
821 static struct modldrv modldrv
= {
822 &mod_driverops
, /* module type (this is a pseudo driver) */
823 "Function Boundary Tracing", /* name of module */
824 &fbt_ops
, /* driver ops */
827 static struct modlinkage modlinkage
= {
836 return (mod_install(&modlinkage
));
840 _info(struct modinfo
*modinfop
)
842 return (mod_info(&modlinkage
, modinfop
));
848 return (mod_remove(&modlinkage
));