4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
22 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
26 #include <mach-o/loader.h>
27 #include <libkern/kernel_mach_header.h>
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/sysctl.h>
32 #include <sys/errno.h>
34 #include <sys/ioctl.h>
36 #include <sys/fcntl.h>
37 #include <miscfs/devfs/devfs.h>
38 #include <pexpert/pexpert.h>
40 #include <sys/dtrace.h>
41 #include <sys/dtrace_impl.h>
44 #include <sys/dtrace_glue.h>
45 #include <san/kasan.h>
49 /* #include <machine/trap.h> */
50 struct savearea_t
; /* Used anonymously */
52 #if defined(__arm__) || defined(__arm64__)
53 typedef kern_return_t (*perfCallback
)(int, struct savearea_t
*, __unused
int, __unused
int);
54 extern perfCallback tempDTraceTrapHook
;
55 extern kern_return_t
fbt_perfCallback(int, struct savearea_t
*, __unused
int, __unused
int);
56 #elif defined(__x86_64__)
57 typedef kern_return_t (*perfCallback
)(int, struct savearea_t
*, uintptr_t *, __unused
int);
58 extern perfCallback tempDTraceTrapHook
;
59 extern kern_return_t
fbt_perfCallback(int, struct savearea_t
*, uintptr_t *, __unused
int);
61 #error Unknown architecture
66 qsort(void *a
, size_t n
, size_t es
, int (*cmp
)(const void *, const void *));
68 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
69 #define FBT_PROBETAB_SIZE 0x8000 /* 32k entries -- 128K total */
71 static int fbt_probetab_size
;
72 dtrace_provider_id_t fbt_id
;
73 fbt_probe_t
**fbt_probetab
;
74 int fbt_probetab_mask
;
75 static int fbt_verbose
= 0;
77 extern int ignore_fbt_blacklist
;
79 extern int dtrace_kernel_symbol_mode
;
82 void fbt_init( void );
86 fbt_destroy(void *arg
, dtrace_id_t id
, void *parg
)
88 #pragma unused(arg,id)
89 fbt_probe_t
*fbt
= parg
, *next
, *hash
, *last
;
94 * Now we need to remove this probe from the fbt_probetab.
96 ndx
= FBT_ADDR2NDX(fbt
->fbtp_patchpoint
);
98 hash
= fbt_probetab
[ndx
];
100 while (hash
!= fbt
) {
101 ASSERT(hash
!= NULL
);
103 hash
= hash
->fbtp_hashnext
;
107 last
->fbtp_hashnext
= fbt
->fbtp_hashnext
;
109 fbt_probetab
[ndx
] = fbt
->fbtp_hashnext
;
112 next
= fbt
->fbtp_next
;
113 kmem_free(fbt
, sizeof(fbt_probe_t
));
116 } while (fbt
!= NULL
);
121 fbt_enable(void *arg
, dtrace_id_t id
, void *parg
)
123 #pragma unused(arg,id)
124 fbt_probe_t
*fbt
= parg
;
125 struct modctl
*ctl
= NULL
;
127 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
) {
130 if (!ctl
->mod_loaded
) {
132 cmn_err(CE_NOTE
, "fbt is failing for probe %s "
133 "(module %s unloaded)",
134 fbt
->fbtp_name
, ctl
->mod_modname
);
141 * Now check that our modctl has the expected load count. If it
142 * doesn't, this module must have been unloaded and reloaded -- and
143 * we're not going to touch it.
145 if (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
) {
147 cmn_err(CE_NOTE
, "fbt is failing for probe %s "
148 "(module %s reloaded)",
149 fbt
->fbtp_name
, ctl
->mod_modname
);
155 dtrace_casptr(&tempDTraceTrapHook
, NULL
, ptrauth_nop_cast(void *, &fbt_perfCallback
));
156 if (tempDTraceTrapHook
!= (perfCallback
)fbt_perfCallback
) {
158 cmn_err(CE_NOTE
, "fbt_enable is failing for probe %s "
159 "in module %s: tempDTraceTrapHook already occupied.",
160 fbt
->fbtp_name
, ctl
->mod_modname
);
165 if (fbt
->fbtp_currentval
!= fbt
->fbtp_patchval
) {
167 /* Since dtrace probes can call into KASan and vice versa, things can get
168 * very slow if we have a lot of probes. This call will disable the KASan
169 * fakestack after a threshold of probes is reached. */
170 kasan_fakestack_suspend();
173 (void)ml_nofault_copy((vm_offset_t
)&fbt
->fbtp_patchval
, (vm_offset_t
)fbt
->fbtp_patchpoint
,
174 sizeof(fbt
->fbtp_patchval
));
176 * Make the patched instruction visible via a data + instruction
177 * cache flush for the platforms that need it
179 flush_dcache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_patchval
), 0);
180 invalidate_icache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_patchval
), 0);
181 fbt
->fbtp_currentval
= fbt
->fbtp_patchval
;
187 dtrace_membar_consumer();
194 fbt_disable(void *arg
, dtrace_id_t id
, void *parg
)
196 #pragma unused(arg,id)
197 fbt_probe_t
*fbt
= parg
;
198 struct modctl
*ctl
= NULL
;
200 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
) {
203 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
)) {
207 if (fbt
->fbtp_currentval
!= fbt
->fbtp_savedval
) {
208 (void)ml_nofault_copy((vm_offset_t
)&fbt
->fbtp_savedval
, (vm_offset_t
)fbt
->fbtp_patchpoint
,
209 sizeof(fbt
->fbtp_savedval
));
211 * Make the patched instruction visible via a data + instruction
212 * cache flush for the platforms that need it
214 flush_dcache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_patchval
), 0);
215 invalidate_icache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_patchval
), 0);
217 fbt
->fbtp_currentval
= fbt
->fbtp_savedval
;
218 ASSERT(ctl
->mod_nenabled
> 0);
222 kasan_fakestack_resume();
226 dtrace_membar_consumer();
231 fbt_suspend(void *arg
, dtrace_id_t id
, void *parg
)
233 #pragma unused(arg,id)
234 fbt_probe_t
*fbt
= parg
;
235 struct modctl
*ctl
= NULL
;
237 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
) {
240 ASSERT(ctl
->mod_nenabled
> 0);
241 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
)) {
245 (void)ml_nofault_copy((vm_offset_t
)&fbt
->fbtp_savedval
, (vm_offset_t
)fbt
->fbtp_patchpoint
,
246 sizeof(fbt
->fbtp_savedval
));
249 * Make the patched instruction visible via a data + instruction
250 * cache flush for the platforms that need it
252 flush_dcache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_savedval
), 0);
253 invalidate_icache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_savedval
), 0);
255 fbt
->fbtp_currentval
= fbt
->fbtp_savedval
;
258 dtrace_membar_consumer();
263 fbt_resume(void *arg
, dtrace_id_t id
, void *parg
)
265 #pragma unused(arg,id)
266 fbt_probe_t
*fbt
= parg
;
267 struct modctl
*ctl
= NULL
;
269 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_next
) {
272 ASSERT(ctl
->mod_nenabled
> 0);
273 if (!ctl
->mod_loaded
|| (ctl
->mod_loadcnt
!= fbt
->fbtp_loadcnt
)) {
277 dtrace_casptr(&tempDTraceTrapHook
, NULL
, ptrauth_nop_cast(void *, &fbt_perfCallback
));
278 if (tempDTraceTrapHook
!= (perfCallback
)fbt_perfCallback
) {
280 cmn_err(CE_NOTE
, "fbt_resume is failing for probe %s "
281 "in module %s: tempDTraceTrapHook already occupied.",
282 fbt
->fbtp_name
, ctl
->mod_modname
);
287 (void)ml_nofault_copy((vm_offset_t
)&fbt
->fbtp_patchval
, (vm_offset_t
)fbt
->fbtp_patchpoint
,
288 sizeof(fbt
->fbtp_patchval
));
291 * Make the patched instruction visible via a data + instruction cache flush.
293 flush_dcache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_patchval
), 0);
294 invalidate_icache((vm_offset_t
)fbt
->fbtp_patchpoint
, (vm_size_t
)sizeof(fbt
->fbtp_patchval
), 0);
296 fbt
->fbtp_currentval
= fbt
->fbtp_patchval
;
299 dtrace_membar_consumer();
303 fbt_provide_module_user_syms(struct modctl
*ctl
)
306 char *modname
= ctl
->mod_modname
;
308 dtrace_module_symbols_t
* module_symbols
= ctl
->mod_user_symbols
;
309 if (module_symbols
) {
310 for (i
= 0; i
< module_symbols
->dtmodsyms_count
; i
++) {
312 * symbol->dtsym_addr (the symbol address) passed in from
313 * user space, is already slid for both kexts and kernel.
315 dtrace_symbol_t
* symbol
= &module_symbols
->dtmodsyms_symbols
[i
];
317 char* name
= symbol
->dtsym_name
;
319 /* Lop off omnipresent leading underscore. */
324 if (fbt_excluded(name
)) {
329 * Ignore symbols with a null address
331 if (!symbol
->dtsym_addr
) {
336 * Ignore symbols not part of this module
338 if (!dtrace_addr_in_module((void*)symbol
->dtsym_addr
, ctl
)) {
342 fbt_provide_probe(ctl
, modname
, name
, (machine_inst_t
*)(uintptr_t)symbol
->dtsym_addr
, (machine_inst_t
*)(uintptr_t)(symbol
->dtsym_addr
+ symbol
->dtsym_size
));
347 fbt_provide_kernel_section(struct modctl
*ctl
, kernel_section_t
*sect
, kernel_nlist_t
*sym
, uint32_t nsyms
, const char *strings
)
349 uintptr_t sect_start
= (uintptr_t)sect
->addr
;
350 uintptr_t sect_end
= (uintptr_t)sect
->size
+ sect
->addr
;
353 if ((sect
->flags
& S_ATTR_PURE_INSTRUCTIONS
) != S_ATTR_PURE_INSTRUCTIONS
) {
357 for (i
= 0; i
< nsyms
; i
++) {
358 uint8_t n_type
= sym
[i
].n_type
& (N_TYPE
| N_EXT
);
359 const char *name
= strings
+ sym
[i
].n_un
.n_strx
;
362 if (sym
[i
].n_value
< sect_start
|| sym
[i
].n_value
> sect_end
) {
366 /* Check that the symbol is a global and that it has a name. */
367 if (((N_SECT
| N_EXT
) != n_type
&& (N_ABS
| N_EXT
) != n_type
)) {
371 if (0 == sym
[i
].n_un
.n_strx
) { /* iff a null, "", name. */
375 /* Lop off omnipresent leading underscore. */
381 // Skip non-thumb functions on arm32
382 if (sym
[i
].n_sect
== 1 && !(sym
[i
].n_desc
& N_ARM_THUMB_DEF
)) {
385 #endif /* defined(__arm__) */
387 if (fbt_excluded(name
)) {
392 * Find the function boundary by looking at either the
393 * end of the section or the beginning of the next symbol
395 if (i
== nsyms
- 1) {
398 limit
= sym
[i
+ 1].n_value
;
401 fbt_provide_probe(ctl
, ctl
->mod_modname
, name
, (machine_inst_t
*)sym
[i
].n_value
, (machine_inst_t
*)limit
);
406 fbt_sym_cmp(const void *ap
, const void *bp
)
408 return (int)(((const kernel_nlist_t
*)ap
)->n_value
- ((const kernel_nlist_t
*)bp
)->n_value
);
412 fbt_provide_module_kernel_syms(struct modctl
*ctl
)
414 kernel_mach_header_t
*mh
= (kernel_mach_header_t
*)(ctl
->mod_address
);
415 kernel_segment_command_t
*seg
;
416 struct load_command
*cmd
;
417 kernel_segment_command_t
*linkedit
= NULL
;
418 struct symtab_command
*symtab
= NULL
;
419 kernel_nlist_t
*syms
= NULL
, *sorted_syms
= NULL
;
424 if (mh
->magic
!= MH_MAGIC_KERNEL
) {
428 cmd
= (struct load_command
*) &mh
[1];
429 for (i
= 0; i
< mh
->ncmds
; i
++) {
430 if (cmd
->cmd
== LC_SEGMENT_KERNEL
) {
431 kernel_segment_command_t
*orig_sg
= (kernel_segment_command_t
*) cmd
;
432 if (LIT_STRNEQL(orig_sg
->segname
, SEG_LINKEDIT
)) {
435 } else if (cmd
->cmd
== LC_SYMTAB
) {
436 symtab
= (struct symtab_command
*) cmd
;
438 if (symtab
&& linkedit
) {
441 cmd
= (struct load_command
*) ((caddr_t
) cmd
+ cmd
->cmdsize
);
444 if ((symtab
== NULL
) || (linkedit
== NULL
)) {
448 syms
= (kernel_nlist_t
*)(linkedit
->vmaddr
+ symtab
->symoff
- linkedit
->fileoff
);
449 strings
= (const char *)(linkedit
->vmaddr
+ symtab
->stroff
- linkedit
->fileoff
);
452 * Make a copy of the symbol table and sort it to not cross into the next function
453 * when disassembling the function
455 symlen
= sizeof(kernel_nlist_t
) * symtab
->nsyms
;
456 sorted_syms
= kmem_alloc(symlen
, KM_SLEEP
);
457 bcopy(syms
, sorted_syms
, symlen
);
458 qsort(sorted_syms
, symtab
->nsyms
, sizeof(kernel_nlist_t
), fbt_sym_cmp
);
460 for (seg
= firstsegfromheader(mh
); seg
!= NULL
; seg
= nextsegfromheader(mh
, seg
)) {
461 kernel_section_t
*sect
= firstsect(seg
);
463 if (strcmp(seg
->segname
, "__KLD") == 0) {
467 for (sect
= firstsect(seg
); sect
!= NULL
; sect
= nextsect(seg
, sect
)) {
468 fbt_provide_kernel_section(ctl
, sect
, sorted_syms
, symtab
->nsyms
, strings
);
472 kmem_free(sorted_syms
, symlen
);
476 fbt_provide_module(void *arg
, struct modctl
*ctl
)
480 ASSERT(dtrace_kernel_symbol_mode
!= DTRACE_KERNEL_SYMBOLS_NEVER
);
481 LCK_MTX_ASSERT(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
483 if (dtrace_fbt_probes_restricted()) {
487 // Update the "ignore blacklist" bit
488 if (ignore_fbt_blacklist
) {
489 ctl
->mod_flags
|= MODCTL_FBT_PROVIDE_BLACKLISTED_PROBES
;
492 if (MOD_FBT_DONE(ctl
)) {
496 if (fbt_module_excluded(ctl
)) {
497 ctl
->mod_flags
|= MODCTL_FBT_INVALID
;
501 if (MOD_HAS_KERNEL_SYMBOLS(ctl
)) {
502 fbt_provide_module_kernel_syms(ctl
);
503 ctl
->mod_flags
|= MODCTL_FBT_PROBES_PROVIDED
;
504 if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl
)) {
505 ctl
->mod_flags
|= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED
;
510 if (MOD_HAS_USERSPACE_SYMBOLS(ctl
)) {
511 fbt_provide_module_user_syms(ctl
);
512 ctl
->mod_flags
|= MODCTL_FBT_PROBES_PROVIDED
;
513 if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl
)) {
514 ctl
->mod_flags
|= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED
;
520 static dtrace_pattr_t fbt_attr
= {
521 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_ISA
},
522 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
523 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
524 { DTRACE_STABILITY_EVOLVING
, DTRACE_STABILITY_EVOLVING
, DTRACE_CLASS_ISA
},
525 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_ISA
},
528 static dtrace_pops_t fbt_pops
= {
529 .dtps_provide
= NULL
,
530 .dtps_provide_module
= fbt_provide_module
,
531 .dtps_enable
= fbt_enable
,
532 .dtps_disable
= fbt_disable
,
533 .dtps_suspend
= fbt_suspend
,
534 .dtps_resume
= fbt_resume
,
535 .dtps_getargdesc
= NULL
, /* APPLE NOTE: fbt_getargdesc implemented in userspace */
536 .dtps_getargval
= NULL
,
537 .dtps_usermode
= NULL
,
538 .dtps_destroy
= fbt_destroy
542 fbt_cleanup(dev_info_t
*devi
)
544 dtrace_invop_remove(fbt_invop
);
545 ddi_remove_minor_node(devi
, NULL
);
546 kmem_free(fbt_probetab
, fbt_probetab_size
* sizeof(fbt_probe_t
*));
548 fbt_probetab_mask
= 0;
552 fbt_attach(dev_info_t
*devi
)
554 if (fbt_probetab_size
== 0) {
555 fbt_probetab_size
= FBT_PROBETAB_SIZE
;
558 fbt_probetab_mask
= fbt_probetab_size
- 1;
560 kmem_zalloc(fbt_probetab_size
* sizeof(fbt_probe_t
*), KM_SLEEP
);
562 dtrace_invop_add(fbt_invop
);
564 if (ddi_create_minor_node(devi
, "fbt", S_IFCHR
, 0,
565 DDI_PSEUDO
, 0) == DDI_FAILURE
||
566 dtrace_register("fbt", &fbt_attr
, DTRACE_PRIV_KERNEL
, NULL
,
567 &fbt_pops
, NULL
, &fbt_id
) != 0) {
575 static d_open_t _fbt_open
;
578 _fbt_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
580 #pragma unused(dev,flags,devtype,p)
584 #define FBT_MAJOR -24 /* let the kernel pick the device number */
586 static const struct cdevsw fbt_cdevsw
=
591 .d_write
= eno_rdwrt
,
592 .d_ioctl
= eno_ioctl
,
593 .d_stop
= (stop_fcn_t
*)nulldev
,
594 .d_reset
= (reset_fcn_t
*)nulldev
,
595 .d_select
= eno_select
,
597 .d_strategy
= eno_strat
,
598 .d_reserved_1
= eno_getc
,
599 .d_reserved_2
= eno_putc
,
602 #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
603 #undef kmem_free /* from its binding to dt_kmem_free glue */
604 #include <vm/vm_kern.h>
610 int majdevno
= cdevsw_add(FBT_MAJOR
, &fbt_cdevsw
);
613 printf("fbt_init: failed to allocate a major number!\n");
617 fbt_blacklist_init();
618 fbt_attach((dev_info_t
*)(uintptr_t)majdevno
);