4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
31 #define _KERNEL /* Solaris vs. Darwin */
35 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36 #include <kern/cpu_data.h>
37 #include <kern/thread.h>
38 #include <mach/thread_status.h>
40 #include <mach-o/loader.h>
41 #include <mach-o/nlist.h>
43 extern struct mach_header _mh_execute_header
; /* the kernel's mach header */
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
49 #include <sys/ioctl.h>
51 #include <sys/fcntl.h>
52 #include <miscfs/devfs/devfs.h>
54 #include <sys/dtrace.h>
55 #include <sys/dtrace_impl.h>
58 #include <sys/dtrace_glue.h>
59 #include <machine/cpu_capabilities.h>
61 #define DTRACE_INVOP_NOP_SKIP 4
63 #define DTRACE_INVOP_MFLR_R0 11
64 #define DTRACE_INVOP_MFLR_R0_SKIP 4
66 #define FBT_MFLR_R0 0x7c0802a6
68 #define FBT_MTLR_R0 0x7c0803a6
69 #define FBT_BLR 0x4e800020
70 #define FBT_BCTR 0x4e800420
72 #define FBT_LI_MASK 0x03fffffc
73 #define FBT_JUMP 0x48000000
74 #define IS_JUMP(instr) (((instr) & ~FBT_LI_MASK) == FBT_JUMP) /* Relative, No LR update -- AA == 0b, LK == 0b */
75 #define FBT_LI_EXTD64(instr) \
76 (((instr) & 0x02000000) ? \
77 (((uint64_t)((instr) & FBT_LI_MASK)) | 0xfffffffffc000000ULL) : \
78 ((uint64_t)((instr) & FBT_LI_MASK)))
80 #define FBT_PATCHVAL 0x7c810808
81 #define FBT_AFRAMES_ENTRY 6
82 #define FBT_AFRAMES_RETURN 6
84 #define FBT_ENTRY "entry"
85 #define FBT_RETURN "return"
86 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
88 extern dtrace_provider_id_t fbt_id
;
89 extern fbt_probe_t
**fbt_probetab
;
90 extern int fbt_probetab_mask
;
92 kern_return_t
fbt_perfCallback(int, ppc_saved_state_t
*, int, int);
93 kern_return_t
fbt_perfIntCallback(int, ppc_saved_state_t
*, int, int);
96 * Critical routines that must not be probed. PR_5221096, PR_5379018.
99 static const char * critical_blacklist
[] =
109 "cpu_per_proc_alloc",
111 "cpu_signal_handler",
118 "mapSkipListVerifyC",
120 "register_cpu_setup_func",
121 "unregister_cpu_setup_func"
123 #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
126 * The transitive closure of entry points that can be reached from probe context.
127 * (Apart from routines whose names begin with dtrace_).
129 static const char * probe_ctx_closure
[] =
132 "MapUserMemoryWindow",
134 "absolutetime_to_microtime",
136 "clock_get_calendar_nanotime_nowait",
150 "get_bsdthread_info",
153 "hw_compare_and_store",
158 "mach_absolute_time",
161 "mapping_phys_lookup",
162 "max_valid_stack_address",
163 "ml_at_interrupt_context",
164 "ml_phys_write_byte_64",
165 "ml_phys_write_half_64",
166 "ml_phys_write_word_64",
167 "ml_set_interrupts_enabled",
183 #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
185 static int _cmp(const void *a
, const void *b
)
187 return strncmp((const char *)a
, *(const char **)b
, strlen((const char *)a
) + 1);
190 static const void * bsearch(
191 register const void *key
,
194 register size_t size
,
195 register int (*compar
)(const void *, const void *)) {
197 register const char *base
= base0
;
200 register const void *p
;
202 for (lim
= nmemb
; lim
!= 0; lim
>>= 1) {
203 p
= base
+ (lim
>> 1) * size
;
204 cmp
= (*compar
)(key
, p
);
207 if (cmp
> 0) { /* key > p: move right */
208 base
= (const char *)p
+ size
;
210 } /* else move left */
216 fbt_invop(uintptr_t addr
, uintptr_t *stack
, uintptr_t rval
)
218 fbt_probe_t
*fbt
= fbt_probetab
[FBT_ADDR2NDX(addr
)];
219 uint64_t mask
= (_cpu_capabilities
& k64Bit
) ? 0xffffffffffffffffULL
: 0x00000000ffffffffULL
;
221 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_hashnext
) {
222 if ((uintptr_t)fbt
->fbtp_patchpoint
== addr
) {
224 if (fbt
->fbtp_roffset
== 0) {
225 ppc_saved_state_t
*regs
= (ppc_saved_state_t
*)stack
;
227 CPU
->cpu_dtrace_caller
= regs
->save_lr
;
229 dtrace_probe(fbt
->fbtp_id
, regs
->save_r3
& mask
, regs
->save_r4
& mask
,
230 regs
->save_r5
& mask
, regs
->save_r6
& mask
, regs
->save_r7
& mask
);
232 CPU
->cpu_dtrace_caller
= (uintptr_t)NULL
;
235 dtrace_probe(fbt
->fbtp_id
, fbt
->fbtp_roffset
, rval
, 0, 0, 0);
237 if (fbt
->fbtp_rval
== DTRACE_INVOP_TAILJUMP
) {
238 ppc_saved_state_t
*regs
= (ppc_saved_state_t
*)stack
;
240 regs
->save_srr0
= (uint64_t)fbt
->fbtp_patchpoint
+ FBT_LI_EXTD64(fbt
->fbtp_savedval
);
241 regs
->save_srr0
&= mask
;
244 CPU
->cpu_dtrace_caller
= (uintptr_t)NULL
;
247 return (fbt
->fbtp_rval
);
254 #include <ppc/proc_reg.h> /* For USER_MODE */
255 #define IS_USER_TRAP(regs) USER_MODE((regs)->save_srr1)
256 #define T_VECTOR_SIZE 4 /* function pointer size */
257 #define T_PROGRAM (0x07 * T_VECTOR_SIZE)
258 #define FBT_EXCEPTION_CODE T_PROGRAM
263 ppc_saved_state_t
*regs
,
267 #pragma unused (unused1)
268 #pragma unused (unused2)
269 kern_return_t retval
= KERN_FAILURE
;
271 if (!IS_USER_TRAP(regs
) && FBT_EXCEPTION_CODE
== trapno
) {
274 oldlevel
= ml_set_interrupts_enabled(FALSE
);
276 switch (dtrace_invop( regs
->save_srr0
, (uintptr_t *)regs
, regs
->save_r3
)) {
277 case DTRACE_INVOP_NOP
:
278 regs
->save_srr0
+= DTRACE_INVOP_NOP_SKIP
; /* Skip over the bytes of the patched NOP */
279 retval
= KERN_SUCCESS
;
282 case DTRACE_INVOP_MFLR_R0
:
283 regs
->save_r0
= regs
->save_lr
; /* Emulate patched mflr r0 */
284 regs
->save_srr0
+= DTRACE_INVOP_MFLR_R0_SKIP
; /* Skip over the bytes of the patched mflr r0 */
285 retval
= KERN_SUCCESS
;
288 case DTRACE_INVOP_RET
:
289 regs
->save_srr0
= regs
->save_lr
; /* Emulate patched blr by resuming execution at the LR */
290 retval
= KERN_SUCCESS
;
293 case DTRACE_INVOP_BCTR
:
294 regs
->save_srr0
= regs
->save_ctr
; /* Emulate patched bctr by resuming execution at the CTR */
295 retval
= KERN_SUCCESS
;
298 case DTRACE_INVOP_TAILJUMP
:
299 retval
= KERN_SUCCESS
;
303 retval
= KERN_FAILURE
;
306 ml_set_interrupts_enabled(oldlevel
);
315 ppc_saved_state_t
*regs
,
319 kern_return_t retval
= KERN_FAILURE
;
321 if (KERN_SUCCESS
== (retval
= fbt_perfCallback(trapno
, regs
, unused1
, unused2
)))
329 __fbt_provide_module(void *arg
, struct modctl
*ctl
)
332 struct mach_header
*mh
;
333 struct load_command
*cmd
;
334 struct segment_command
*orig_ts
= NULL
, *orig_le
= NULL
;
335 struct symtab_command
*orig_st
= NULL
;
336 struct nlist
*sym
= NULL
;
338 uintptr_t instrLow
, instrHigh
;
342 int gIgnoreFBTBlacklist
= 0;
343 PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist
, sizeof (gIgnoreFBTBlacklist
));
345 mh
= (struct mach_header
*)(ctl
->address
);
346 modname
= ctl
->mod_modname
;
348 if (0 == ctl
->address
|| 0 == ctl
->size
) /* Has the linker been jettisoned? */
352 * Employees of dtrace and their families are ineligible. Void
356 if (LIT_STRNEQL(modname
, "com.apple.driver.dtrace"))
359 if (strstr(modname
, "CHUD") != NULL
)
362 if (mh
->magic
!= MH_MAGIC
)
365 cmd
= (struct load_command
*) &mh
[1];
366 for (i
= 0; i
< mh
->ncmds
; i
++) {
367 if (cmd
->cmd
== LC_SEGMENT
) {
368 struct segment_command
*orig_sg
= (struct segment_command
*) cmd
;
370 if (LIT_STRNEQL(orig_sg
->segname
, SEG_TEXT
))
372 else if (LIT_STRNEQL(orig_sg
->segname
, SEG_LINKEDIT
))
374 else if (LIT_STRNEQL(orig_sg
->segname
, ""))
375 orig_ts
= orig_sg
; /* kexts have a single unnamed segment */
377 else if (cmd
->cmd
== LC_SYMTAB
)
378 orig_st
= (struct symtab_command
*) cmd
;
380 cmd
= (struct load_command
*) ((caddr_t
) cmd
+ cmd
->cmdsize
);
383 if ((orig_ts
== NULL
) || (orig_st
== NULL
) || (orig_le
== NULL
))
386 sym
= (struct nlist
*)(orig_le
->vmaddr
+ orig_st
->symoff
- orig_le
->fileoff
);
387 strings
= (char *)(orig_le
->vmaddr
+ orig_st
->stroff
- orig_le
->fileoff
);
389 /* Find extent of the TEXT section */
390 instrLow
= (uintptr_t)orig_ts
->vmaddr
;
391 instrHigh
= (uintptr_t)(orig_ts
->vmaddr
+ orig_ts
->vmsize
);
393 for (i
= 0; i
< orig_st
->nsyms
; i
++) {
394 fbt_probe_t
*fbt
, *retfbt
;
395 machine_inst_t
*instr
, *limit
, theInstr
;
396 uint8_t n_type
= sym
[i
].n_type
& (N_TYPE
| N_EXT
);
397 char *name
= strings
+ sym
[i
].n_un
.n_strx
;
400 /* Check that the symbol is a global and that it has a name. */
401 if (((N_SECT
| N_EXT
) != n_type
&& (N_ABS
| N_EXT
) != n_type
))
404 if (0 == sym
[i
].n_un
.n_strx
) /* iff a null, "", name. */
407 /* Lop off omnipresent leading underscore. */
411 if (LIT_STRNSTART(name
, "dtrace_") && !LIT_STRNSTART(name
, "dtrace_safe_")) {
413 * Anything beginning with "dtrace_" may be called
414 * from probe context unless it explitly indicates
415 * that it won't be called from probe context by
416 * using the prefix "dtrace_safe_".
421 if (LIT_STRNSTART(name
, "fasttrap_") ||
422 LIT_STRNSTART(name
, "fuword") ||
423 LIT_STRNSTART(name
, "suword") ||
424 LIT_STRNEQL(name
, "sprlock") ||
425 LIT_STRNEQL(name
, "sprunlock") ||
426 LIT_STRNEQL(name
, "uread") ||
427 LIT_STRNEQL(name
, "uwrite"))
428 continue; /* Fasttrap inner-workings. */
430 if (LIT_STRNSTART(name
, "dsmos_"))
431 continue; /* Don't Steal Mac OS X! */
433 if (LIT_STRNSTART(name
, "_dtrace"))
434 continue; /* Shims in dtrace.c */
436 if (LIT_STRNSTART(name
, "chud"))
437 continue; /* Professional courtesy. */
439 if (LIT_STRNSTART(name
, "hibernate_"))
440 continue; /* Let sleeping dogs lie. */
442 if (LIT_STRNEQL(name
, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
443 LIT_STRNEQL(name
, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
444 continue; /* Per the fire code */
447 * Place no probes (illegal instructions) in the exception handling path!
449 if (LIT_STRNEQL(name
, "L_handler700") ||
450 LIT_STRNEQL(name
, "save_get_phys_64") ||
451 LIT_STRNEQL(name
, "save_get_phys_32") ||
452 LIT_STRNEQL(name
, "EmulExit") ||
453 LIT_STRNEQL(name
, "Emulate") ||
454 LIT_STRNEQL(name
, "Emulate64") ||
455 LIT_STRNEQL(name
, "switchSegs") ||
456 LIT_STRNEQL(name
, "save_ret_phys"))
459 if (LIT_STRNEQL(name
, "thandler") ||
460 LIT_STRNEQL(name
, "versave") ||
461 LIT_STRNEQL(name
, "timer_event") ||
462 LIT_STRNEQL(name
, "hw_atomic_or") ||
463 LIT_STRNEQL(name
, "trap"))
466 if (LIT_STRNEQL(name
, "fbt_perfCallback") ||
467 LIT_STRNEQL(name
, "fbt_perfIntCallback") ||
468 LIT_STRNEQL(name
, "ml_set_interrupts_enabled") ||
469 LIT_STRNEQL(name
, "dtrace_invop") ||
470 LIT_STRNEQL(name
, "fbt_invop") ||
471 LIT_STRNEQL(name
, "sdt_invop") ||
472 LIT_STRNEQL(name
, "max_valid_stack_address"))
476 * Probes encountered while we're on the interrupt stack are routed along
477 * the interrupt handling path. No probes allowed there either!
479 if (LIT_STRNEQL(name
, "ihandler") ||
480 LIT_STRNEQL(name
, "interrupt") ||
481 LIT_STRNEQL(name
, "disable_preemption"))
485 * Avoid weird stack voodoo in and under machine_stack_handoff et al
487 if (LIT_STRNSTART(name
, "machine_stack") ||
488 LIT_STRNEQL(name
, "getPerProc") || /* Called in machine_stack_handoff with weird stack state */
489 LIT_STRNEQL(name
, "fpu_save") || /* Called in machine_stack_handoff with weird stack state */
490 LIT_STRNEQL(name
, "vec_save") || /* Called in machine_stack_handoff with weird stack state */
491 LIT_STRNEQL(name
, "pmap_switch")) /* Called in machine_stack_handoff with weird stack state */
495 * Avoid machine_ routines. PR_5346750.
497 if (LIT_STRNSTART(name
, "machine_"))
501 * Avoid low level pmap and virtual machine monitor PowerPC routines. See PR_5379018.
504 if (LIT_STRNSTART(name
, "hw_") ||
505 LIT_STRNSTART(name
, "mapping_") ||
506 LIT_STRNSTART(name
, "commpage_") ||
507 LIT_STRNSTART(name
, "pmap_") ||
508 LIT_STRNSTART(name
, "vmm_"))
511 * Place no probes on critical routines. PR_5221096
513 if (!gIgnoreFBTBlacklist
&&
514 bsearch( name
, critical_blacklist
, CRITICAL_BLACKLIST_COUNT
, sizeof(name
), _cmp
) != NULL
)
518 * Place no probes that could be hit in probe context.
520 if (!gIgnoreFBTBlacklist
&&
521 bsearch( name
, probe_ctx_closure
, PROBE_CTX_CLOSURE_COUNT
, sizeof(name
), _cmp
) != NULL
)
525 * Place no probes that could be hit on the way to the debugger.
527 if (LIT_STRNSTART(name
, "kdp_") ||
528 LIT_STRNSTART(name
, "kdb_") ||
529 LIT_STRNSTART(name
, "kdbg_") ||
530 LIT_STRNSTART(name
, "kdebug_") ||
531 LIT_STRNEQL(name
, "kernel_debug") ||
532 LIT_STRNEQL(name
, "Debugger") ||
533 LIT_STRNEQL(name
, "Call_DebuggerC") ||
534 LIT_STRNEQL(name
, "lock_debugger") ||
535 LIT_STRNEQL(name
, "unlock_debugger") ||
536 LIT_STRNEQL(name
, "SysChoked"))
540 * Place no probes that could be hit on the way to a panic.
542 if (NULL
!= strstr(name
, "panic_") ||
543 LIT_STRNEQL(name
, "panic") ||
544 LIT_STRNEQL(name
, "handleMck") ||
545 LIT_STRNEQL(name
, "unresolved_kernel_trap"))
548 if (dtrace_probe_lookup(fbt_id
, modname
, name
, NULL
) != 0)
552 * Scan forward for mflr r0.
554 for (j
= 0, instr
= (machine_inst_t
*)sym
[i
].n_value
, theInstr
= 0;
555 (j
< 4) && ((uintptr_t)instr
>= instrLow
) && (instrHigh
> (uintptr_t)instr
);
559 if (theInstr
== FBT_MFLR_R0
) /* Place the entry probe here. */
561 if (theInstr
== FBT_MTLR_R0
) /* We've gone too far, bail. */
563 if (theInstr
== FBT_BLR
) /* We've gone too far, bail. */
567 if (theInstr
!= FBT_MFLR_R0
)
570 limit
= (machine_inst_t
*)instrHigh
;
572 fbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
573 strlcpy( (char *)&(fbt
->fbtp_name
), name
, MAX_FBTP_NAME_CHARS
);
574 fbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
, name
, FBT_ENTRY
, FBT_AFRAMES_ENTRY
, fbt
);
575 fbt
->fbtp_patchpoint
= instr
;
577 fbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
578 fbt
->fbtp_rval
= DTRACE_INVOP_MFLR_R0
;
579 fbt
->fbtp_savedval
= theInstr
;
580 fbt
->fbtp_patchval
= FBT_PATCHVAL
;
582 fbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
583 fbt
->fbtp_symndx
= i
;
584 fbt_probetab
[FBT_ADDR2NDX(instr
)] = fbt
;
586 instr
++; /* Move on down the line */
593 * We (desperately) want to avoid erroneously instrumenting a
594 * jump table. To determine if we're looking at a true instruction
595 * or an inline jump table that happens to contain the same
596 * byte sequences, we resort to some heuristic sleeze: we
597 * treat this instruction as being contained within a pointer,
598 * and see if that pointer points to within the body of the
599 * function. If it does, we refuse to instrument it.
602 machine_inst_t
*ptr
= *(machine_inst_t
**)instr
;
604 if (ptr
>= (machine_inst_t
*)sym
[i
].n_value
&& ptr
< limit
) {
611 * OK, it's an instruction.
615 /* Walked onto the start of the next routine? If so, bail out from this function. */
616 if (theInstr
== FBT_MFLR_R0
)
619 if (theInstr
!= FBT_MTLR_R0
) {
626 * Scan forward for a blr, bctr, or a jump (relative, no LR change).
629 for (j
= 0; (j
< 12) && (instr
< limit
); j
++, instr
++) {
631 if (theInstr
== FBT_BLR
|| theInstr
== FBT_BCTR
|| IS_JUMP(theInstr
) ||
632 theInstr
== FBT_MFLR_R0
|| theInstr
== FBT_MTLR_R0
)
636 if (!(theInstr
== FBT_BLR
|| theInstr
== FBT_BCTR
|| IS_JUMP(theInstr
)))
640 * We have a winner: "mtlr r0; ... ; {blr, bctr, j}" !
642 fbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
643 strlcpy( (char *)&(fbt
->fbtp_name
), name
, MAX_FBTP_NAME_CHARS
);
645 if (retfbt
== NULL
) {
646 fbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
,
647 name
, FBT_RETURN
, FBT_AFRAMES_RETURN
, fbt
);
649 retfbt
->fbtp_next
= fbt
;
650 fbt
->fbtp_id
= retfbt
->fbtp_id
;
654 fbt
->fbtp_patchpoint
= instr
;
656 fbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
658 if (theInstr
== FBT_BLR
)
659 fbt
->fbtp_rval
= DTRACE_INVOP_RET
;
660 else if (theInstr
== FBT_BCTR
)
661 fbt
->fbtp_rval
= DTRACE_INVOP_BCTR
;
663 fbt
->fbtp_rval
= DTRACE_INVOP_TAILJUMP
;
666 (uintptr_t)((uint8_t *)instr
- (uint8_t *)sym
[i
].n_value
);
668 fbt
->fbtp_savedval
= *instr
;
669 fbt
->fbtp_patchval
= FBT_PATCHVAL
;
670 fbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
671 fbt
->fbtp_symndx
= i
;
672 fbt_probetab
[FBT_ADDR2NDX(instr
)] = fbt
;
678 extern struct modctl g_fbt_kernctl
;
679 #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
680 #undef kmem_free /* from its binding to dt_kmem_free glue */
681 #include <vm/vm_kern.h>
685 fbt_provide_module(void *arg
, struct modctl
*ctl
)
688 __fbt_provide_module(arg
, &g_fbt_kernctl
);
690 if ( (vm_offset_t
)g_fbt_kernctl
.address
!= (vm_offset_t
)NULL
)
691 kmem_free(kernel_map
, (vm_offset_t
)g_fbt_kernctl
.address
, round_page(g_fbt_kernctl
.size
));
692 g_fbt_kernctl
.address
= 0;
693 g_fbt_kernctl
.size
= 0;