4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* #pragma ident "@(#)fbt.c 1.15 05/09/19 SMI" */
31 #define _KERNEL /* Solaris vs. Darwin */
35 #define MACH__POSIX_C_SOURCE_PRIVATE 1 /* pulls in suitable savearea from mach/ppc/thread_status.h */
36 #include <kern/cpu_data.h>
37 #include <kern/thread.h>
38 #include <mach/thread_status.h>
40 #include <mach-o/loader.h>
41 #include <mach-o/nlist.h>
43 extern struct mach_header _mh_execute_header
; /* the kernel's mach header */
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/errno.h>
49 #include <sys/ioctl.h>
51 #include <sys/fcntl.h>
52 #include <miscfs/devfs/devfs.h>
54 #include <sys/dtrace.h>
55 #include <sys/dtrace_impl.h>
58 #include <sys/dtrace_glue.h>
59 #include <machine/cpu_capabilities.h>
61 #define DTRACE_INVOP_NOP_SKIP 4
63 #define DTRACE_INVOP_MFLR_R0 11
64 #define DTRACE_INVOP_MFLR_R0_SKIP 4
66 #define FBT_MFLR_R0 0x7c0802a6
68 #define FBT_MTLR_R0 0x7c0803a6
69 #define FBT_BLR 0x4e800020
70 #define FBT_BCTR 0x4e800420
72 #define FBT_LI_MASK 0x03fffffc
73 #define FBT_JUMP 0x48000000
74 #define IS_JUMP(instr) (((instr) & ~FBT_LI_MASK) == FBT_JUMP) /* Relative, No LR update -- AA == 0b, LK == 0b */
75 #define FBT_LI_EXTD64(instr) \
76 (((instr) & 0x02000000) ? \
77 (((uint64_t)((instr) & FBT_LI_MASK)) | 0xfffffffffc000000ULL) : \
78 ((uint64_t)((instr) & FBT_LI_MASK)))
80 #define FBT_PATCHVAL 0x7c810808
81 #define FBT_AFRAMES_ENTRY 6
82 #define FBT_AFRAMES_RETURN 6
84 #define FBT_ENTRY "entry"
85 #define FBT_RETURN "return"
86 #define FBT_ADDR2NDX(addr) ((((uintptr_t)(addr)) >> 4) & fbt_probetab_mask)
88 extern dtrace_provider_id_t fbt_id
;
89 extern fbt_probe_t
**fbt_probetab
;
90 extern int fbt_probetab_mask
;
93 * Critical routines that must not be probed. PR_5221096, PR_5379018.
96 static const char * critical_blacklist
[] =
106 "cpu_per_proc_alloc",
108 "cpu_signal_handler",
115 "mapSkipListVerifyC",
117 "register_cpu_setup_func",
118 "unregister_cpu_setup_func"
120 #define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
123 * The transitive closure of entry points that can be reached from probe context.
124 * (Apart from routines whose names begin with dtrace_ or dtxnu_.)
126 static const char * probe_ctx_closure
[] =
129 "MapUserMemoryWindow",
131 "absolutetime_to_microtime",
133 "clock_get_calendar_nanotime_nowait",
147 "get_bsdthread_info",
150 "hw_compare_and_store",
155 "mach_absolute_time",
158 "mapping_phys_lookup",
159 "max_valid_stack_address",
160 "ml_at_interrupt_context",
161 "ml_phys_write_byte_64",
162 "ml_phys_write_half_64",
163 "ml_phys_write_word_64",
164 "ml_set_interrupts_enabled",
177 #define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
179 static int _cmp(const void *a
, const void *b
)
181 return strcmp((const char *)a
, *(const char **)b
);
184 static const void * bsearch(
185 register const void *key
,
188 register size_t size
,
189 register int (*compar
)(const void *, const void *)) {
191 register const char *base
= base0
;
194 register const void *p
;
196 for (lim
= nmemb
; lim
!= 0; lim
>>= 1) {
197 p
= base
+ (lim
>> 1) * size
;
198 cmp
= (*compar
)(key
, p
);
201 if (cmp
> 0) { /* key > p: move right */
202 base
= (const char *)p
+ size
;
204 } /* else move left */
210 fbt_invop(uintptr_t addr
, uintptr_t *stack
, uintptr_t rval
)
212 fbt_probe_t
*fbt
= fbt_probetab
[FBT_ADDR2NDX(addr
)];
213 uint64_t mask
= (_cpu_capabilities
& k64Bit
) ? 0xffffffffffffffffULL
: 0x00000000ffffffffULL
;
215 for (; fbt
!= NULL
; fbt
= fbt
->fbtp_hashnext
) {
216 if ((uintptr_t)fbt
->fbtp_patchpoint
== addr
) {
218 if (fbt
->fbtp_roffset
== 0) {
219 ppc_saved_state_t
*regs
= (ppc_saved_state_t
*)stack
;
221 CPU
->cpu_dtrace_caller
= addr
;
223 dtrace_probe(fbt
->fbtp_id
, regs
->save_r3
& mask
, regs
->save_r4
& mask
,
224 regs
->save_r5
& mask
, regs
->save_r6
& mask
, regs
->save_r7
& mask
);
226 CPU
->cpu_dtrace_caller
= NULL
;
229 dtrace_probe(fbt
->fbtp_id
, fbt
->fbtp_roffset
, rval
, 0, 0, 0);
231 if (fbt
->fbtp_rval
== DTRACE_INVOP_TAILJUMP
) {
232 ppc_saved_state_t
*regs
= (ppc_saved_state_t
*)stack
;
234 regs
->save_srr0
= (uint64_t)fbt
->fbtp_patchpoint
+ FBT_LI_EXTD64(fbt
->fbtp_savedval
);
235 regs
->save_srr0
&= mask
;
238 CPU
->cpu_dtrace_caller
= NULL
;
241 return (fbt
->fbtp_rval
);
248 #include <ppc/proc_reg.h> /* For USER_MODE */
249 #define IS_USER_TRAP(regs) USER_MODE((regs)->save_srr1)
250 #define T_VECTOR_SIZE 4 /* function pointer size */
251 #define T_PROGRAM (0x07 * T_VECTOR_SIZE)
252 #define FBT_EXCEPTION_CODE T_PROGRAM
257 ppc_saved_state_t
*regs
,
261 #pragma unused (unused1)
262 #pragma unused (unused2)
263 kern_return_t retval
= KERN_FAILURE
;
265 if (!IS_USER_TRAP(regs
) && FBT_EXCEPTION_CODE
== trapno
) {
268 oldlevel
= ml_set_interrupts_enabled(FALSE
);
270 switch (dtrace_invop( regs
->save_srr0
, (uintptr_t *)regs
, regs
->save_r3
)) {
271 case DTRACE_INVOP_NOP
:
272 regs
->save_srr0
+= DTRACE_INVOP_NOP_SKIP
; /* Skip over the bytes of the patched NOP */
273 retval
= KERN_SUCCESS
;
276 case DTRACE_INVOP_MFLR_R0
:
277 regs
->save_r0
= regs
->save_lr
; /* Emulate patched mflr r0 */
278 regs
->save_srr0
+= DTRACE_INVOP_MFLR_R0_SKIP
; /* Skip over the bytes of the patched mflr r0 */
279 retval
= KERN_SUCCESS
;
282 case DTRACE_INVOP_RET
:
283 regs
->save_srr0
= regs
->save_lr
; /* Emulate patched blr by resuming execution at the LR */
284 retval
= KERN_SUCCESS
;
287 case DTRACE_INVOP_BCTR
:
288 regs
->save_srr0
= regs
->save_ctr
; /* Emulate patched bctr by resuming execution at the CTR */
289 retval
= KERN_SUCCESS
;
292 case DTRACE_INVOP_TAILJUMP
:
293 retval
= KERN_SUCCESS
;
297 retval
= KERN_FAILURE
;
300 ml_set_interrupts_enabled(oldlevel
);
309 ppc_saved_state_t
*regs
,
313 kern_return_t retval
= KERN_FAILURE
;
315 if (KERN_SUCCESS
== (retval
= fbt_perfCallback(trapno
, regs
, unused1
, unused2
)))
323 __fbt_provide_module(void *arg
, struct modctl
*ctl
)
326 struct mach_header
*mh
;
327 struct load_command
*cmd
;
328 struct segment_command
*orig_ts
= NULL
, *orig_le
= NULL
;
329 struct symtab_command
*orig_st
= NULL
;
330 struct nlist
*sym
= NULL
;
332 uintptr_t instrLow
, instrHigh
;
336 int gIgnoreFBTBlacklist
= 0;
337 PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist
, sizeof (gIgnoreFBTBlacklist
));
339 mh
= (struct mach_header
*)(ctl
->address
);
340 modname
= ctl
->mod_modname
;
342 if (0 == ctl
->address
|| 0 == ctl
->size
) /* Has the linker been jettisoned? */
346 * Employees of dtrace and their families are ineligible. Void
350 if (strcmp(modname
, "com.apple.driver.dtrace") == 0)
353 if (strstr(modname
, "CHUD") != NULL
)
356 if (mh
->magic
!= MH_MAGIC
)
359 cmd
= (struct load_command
*) &mh
[1];
360 for (i
= 0; i
< mh
->ncmds
; i
++) {
361 if (cmd
->cmd
== LC_SEGMENT
) {
362 struct segment_command
*orig_sg
= (struct segment_command
*) cmd
;
364 if (strcmp(SEG_TEXT
, orig_sg
->segname
) == 0)
366 else if (strcmp(SEG_LINKEDIT
, orig_sg
->segname
) == 0)
368 else if (strcmp("", orig_sg
->segname
) == 0)
369 orig_ts
= orig_sg
; /* kexts have a single unnamed segment */
371 else if (cmd
->cmd
== LC_SYMTAB
)
372 orig_st
= (struct symtab_command
*) cmd
;
374 cmd
= (struct load_command
*) ((caddr_t
) cmd
+ cmd
->cmdsize
);
377 if ((orig_ts
== NULL
) || (orig_st
== NULL
) || (orig_le
== NULL
))
380 sym
= (struct nlist
*)orig_le
->vmaddr
;
381 strings
= ((char *)sym
) + orig_st
->nsyms
* sizeof(struct nlist
);
383 /* Find extent of the TEXT section */
384 instrLow
= (uintptr_t)orig_ts
->vmaddr
;
385 instrHigh
= (uintptr_t)(orig_ts
->vmaddr
+ orig_ts
->vmsize
);
387 for (i
= 0; i
< orig_st
->nsyms
; i
++) {
388 fbt_probe_t
*fbt
, *retfbt
;
389 machine_inst_t
*instr
, *limit
, theInstr
;
390 uint8_t n_type
= sym
[i
].n_type
& (N_TYPE
| N_EXT
);
391 char *name
= strings
+ sym
[i
].n_un
.n_strx
;
394 /* Check that the symbol is a global and that it has a name. */
395 if (((N_SECT
| N_EXT
) != n_type
&& (N_ABS
| N_EXT
) != n_type
))
398 if (0 == sym
[i
].n_un
.n_strx
) /* iff a null, "", name. */
401 /* Lop off omnipresent leading underscore. */
405 if (strstr(name
, "dtrace_") == name
&&
406 strstr(name
, "dtrace_safe_") != name
) {
408 * Anything beginning with "dtrace_" may be called
409 * from probe context unless it explitly indicates
410 * that it won't be called from probe context by
411 * using the prefix "dtrace_safe_".
416 if (strstr(name
, "dsmos_") == name
)
417 continue; /* Don't Steal Mac OS X! */
419 if (strstr(name
, "dtxnu_") == name
||
420 strstr(name
, "_dtrace") == name
)
421 continue; /* Shims in dtrace.c */
423 if (strstr(name
, "chud") == name
)
424 continue; /* Professional courtesy. */
426 if (strstr(name
, "hibernate_") == name
)
427 continue; /* Let sleeping dogs lie. */
429 if (0 == strcmp(name
, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
430 0 == strcmp(name
, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
431 continue; /* Per the fire code */
434 * Place no probes (illegal instructions) in the exception handling path!
436 if (0 == strcmp(name
, "L_handler700") ||
437 0 == strcmp(name
, "save_get_phys_64") ||
438 0 == strcmp(name
, "save_get_phys_32") ||
439 0 == strcmp(name
, "EmulExit") ||
440 0 == strcmp(name
, "Emulate") ||
441 0 == strcmp(name
, "Emulate64") ||
442 0 == strcmp(name
, "switchSegs") ||
443 0 == strcmp(name
, "save_ret_phys"))
446 if (0 == strcmp(name
, "thandler") ||
447 0 == strcmp(name
, "versave") ||
448 0 == strcmp(name
, "timer_event") ||
449 0 == strcmp(name
, "hw_atomic_or") ||
450 0 == strcmp(name
, "trap"))
453 if (0 == strcmp(name
, "fbt_perfCallback") ||
454 0 == strcmp(name
, "fbt_perfIntCallback") ||
455 0 == strcmp(name
, "ml_set_interrupts_enabled") ||
456 0 == strcmp(name
, "dtrace_invop") ||
457 0 == strcmp(name
, "fbt_invop") ||
458 0 == strcmp(name
, "sdt_invop") ||
459 0 == strcmp(name
, "max_valid_stack_address"))
463 * Probes encountered while we're on the interrupt stack are routed along
464 * the interrupt handling path. No probes allowed there either!
466 if (0 == strcmp(name
, "ihandler") ||
467 0 == strcmp(name
, "interrupt") ||
468 0 == strcmp(name
, "disable_preemption"))
472 * Avoid weird stack voodoo in and under machine_stack_handoff et al
474 if (strstr(name
, "machine_stack") == name
||
475 0 == strcmp(name
, "getPerProc") || /* Called in machine_stack_handoff with weird stack state */
476 0 == strcmp(name
, "fpu_save") || /* Called in machine_stack_handoff with weird stack state */
477 0 == strcmp(name
, "vec_save") || /* Called in machine_stack_handoff with weird stack state */
478 0 == strcmp(name
, "pmap_switch")) /* Called in machine_stack_handoff with weird stack state */
482 * Avoid machine_ routines. PR_5346750.
484 if (strstr(name
, "machine_") == name
)
488 * Avoid low level pmap and virtual machine monitor PowerPC routines. See PR_5379018.
491 if (strstr(name
, "hw_") == name
||
492 strstr(name
, "mapping_") == name
||
493 strstr(name
, "commpage_") == name
||
494 strstr(name
, "pmap_") == name
||
495 strstr(name
, "vmm_") == name
)
498 * Place no probes on critical routines. PR_5221096
500 if (!gIgnoreFBTBlacklist
&&
501 bsearch( name
, critical_blacklist
, CRITICAL_BLACKLIST_COUNT
, sizeof(name
), _cmp
) != NULL
)
505 * Place no probes that could be hit in probe context.
507 if (!gIgnoreFBTBlacklist
&&
508 bsearch( name
, probe_ctx_closure
, PROBE_CTX_CLOSURE_COUNT
, sizeof(name
), _cmp
) != NULL
)
512 * Place no probes that could be hit on the way to the debugger.
514 if (strstr(name
, "kdp_") == name
||
515 strstr(name
, "kdb_") == name
||
516 strstr(name
, "kdbg_") == name
||
517 strstr(name
, "kdebug_") == name
||
518 0 == strcmp(name
, "kernel_debug") ||
519 0 == strcmp(name
, "Debugger") ||
520 0 == strcmp(name
, "Call_DebuggerC") ||
521 0 == strcmp(name
, "lock_debugger") ||
522 0 == strcmp(name
, "unlock_debugger") ||
523 0 == strcmp(name
, "SysChoked"))
527 * Place no probes that could be hit on the way to a panic.
529 if (NULL
!= strstr(name
, "panic_") ||
530 0 == strcmp(name
, "panic") ||
531 0 == strcmp(name
, "handleMck") ||
532 0 == strcmp(name
, "unresolved_kernel_trap"))
535 if (dtrace_probe_lookup(fbt_id
, modname
, name
, NULL
) != 0)
539 * Scan forward for mflr r0.
541 for (j
= 0, instr
= (machine_inst_t
*)sym
[i
].n_value
, theInstr
= 0;
542 (j
< 4) && ((uintptr_t)instr
>= instrLow
) && (instrHigh
> (uintptr_t)instr
);
546 if (theInstr
== FBT_MFLR_R0
) /* Place the entry probe here. */
548 if (theInstr
== FBT_MTLR_R0
) /* We've gone too far, bail. */
550 if (theInstr
== FBT_BLR
) /* We've gone too far, bail. */
554 if (theInstr
!= FBT_MFLR_R0
)
557 limit
= (machine_inst_t
*)instrHigh
;
559 fbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
560 strlcpy( (char *)&(fbt
->fbtp_name
), name
, MAX_FBTP_NAME_CHARS
);
561 fbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
, name
, FBT_ENTRY
, FBT_AFRAMES_ENTRY
, fbt
);
562 fbt
->fbtp_patchpoint
= instr
;
564 fbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
565 fbt
->fbtp_rval
= DTRACE_INVOP_MFLR_R0
;
566 fbt
->fbtp_savedval
= theInstr
;
567 fbt
->fbtp_patchval
= FBT_PATCHVAL
;
569 fbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
570 fbt
->fbtp_symndx
= i
;
571 fbt_probetab
[FBT_ADDR2NDX(instr
)] = fbt
;
573 instr
++; /* Move on down the line */
580 * We (desperately) want to avoid erroneously instrumenting a
581 * jump table. To determine if we're looking at a true instruction
582 * or an inline jump table that happens to contain the same
583 * byte sequences, we resort to some heuristic sleeze: we
584 * treat this instruction as being contained within a pointer,
585 * and see if that pointer points to within the body of the
586 * function. If it does, we refuse to instrument it.
589 machine_inst_t
*ptr
= *(machine_inst_t
**)instr
;
591 if (ptr
>= (machine_inst_t
*)sym
[i
].n_value
&& ptr
< limit
) {
598 * OK, it's an instruction.
602 /* Walked onto the start of the next routine? If so, bail out from this function. */
603 if (theInstr
== FBT_MFLR_R0
)
606 if (theInstr
!= FBT_MTLR_R0
) {
613 * Scan forward for a blr, bctr, or a jump (relative, no LR change).
616 for (j
= 0; (j
< 12) && (instr
< limit
); j
++, instr
++) {
618 if (theInstr
== FBT_BLR
|| theInstr
== FBT_BCTR
|| IS_JUMP(theInstr
) ||
619 theInstr
== FBT_MFLR_R0
|| theInstr
== FBT_MTLR_R0
)
623 if (!(theInstr
== FBT_BLR
|| theInstr
== FBT_BCTR
|| IS_JUMP(theInstr
)))
627 * We have a winner: "mtlr r0; ... ; {blr, bctr, j}" !
629 fbt
= kmem_zalloc(sizeof (fbt_probe_t
), KM_SLEEP
);
630 strlcpy( (char *)&(fbt
->fbtp_name
), name
, MAX_FBTP_NAME_CHARS
);
632 if (retfbt
== NULL
) {
633 fbt
->fbtp_id
= dtrace_probe_create(fbt_id
, modname
,
634 name
, FBT_RETURN
, FBT_AFRAMES_RETURN
, fbt
);
636 retfbt
->fbtp_next
= fbt
;
637 fbt
->fbtp_id
= retfbt
->fbtp_id
;
641 fbt
->fbtp_patchpoint
= instr
;
643 fbt
->fbtp_loadcnt
= ctl
->mod_loadcnt
;
645 if (theInstr
== FBT_BLR
)
646 fbt
->fbtp_rval
= DTRACE_INVOP_RET
;
647 else if (theInstr
== FBT_BCTR
)
648 fbt
->fbtp_rval
= DTRACE_INVOP_BCTR
;
650 fbt
->fbtp_rval
= DTRACE_INVOP_TAILJUMP
;
653 (uintptr_t)((uint8_t *)instr
- (uint8_t *)sym
[i
].n_value
);
655 fbt
->fbtp_savedval
= *instr
;
656 fbt
->fbtp_patchval
= FBT_PATCHVAL
;
657 fbt
->fbtp_hashnext
= fbt_probetab
[FBT_ADDR2NDX(instr
)];
658 fbt
->fbtp_symndx
= i
;
659 fbt_probetab
[FBT_ADDR2NDX(instr
)] = fbt
;
665 extern struct modctl g_fbt_kernctl
;
666 #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
667 #undef kmem_free /* from its binding to dt_kmem_free glue */
668 #include <vm/vm_kern.h>
672 fbt_provide_module(void *arg
, struct modctl
*ctl
)
675 __fbt_provide_module(arg
, &g_fbt_kernctl
);
677 kmem_free(kernel_map
, (vm_offset_t
)g_fbt_kernctl
.address
, round_page_32(g_fbt_kernctl
.size
));
678 g_fbt_kernctl
.address
= 0;
679 g_fbt_kernctl
.size
= 0;