#include <mach-o/loader.h>
#include <mach-o/nlist.h>
#include <libkern/kernel_mach_header.h>
+#include <libkern/OSAtomic.h>
#include <sys/param.h>
#include <sys/systm.h>
extern fbt_probe_t **fbt_probetab;
extern int fbt_probetab_mask;
-kern_return_t fbt_perfCallback(int, x86_saved_state_t *, __unused int, __unused int);
+extern int gIgnoreFBTBlacklist; /* From fbt_init */
+
+kern_return_t fbt_perfCallback(int, x86_saved_state_t *, uintptr_t *, __unused int);
/*
* Critical routines that must not be probed. PR_5221096, PR_5379018.
"cpu_topology_start_cpu",
"cpu_type",
"cpuid_cpu_display",
+ "cpuid_extfeatures",
"handle_pending_TLB_flushes",
"hw_compare_and_store",
"machine_idle_cstate",
"IS_64BIT_PROCESS",
"OSCompareAndSwap",
"absolutetime_to_microtime",
+ "act_set_astbsd",
"ast_pending",
- "astbsd_on",
"clock_get_calendar_nanotime_nowait",
"copyin",
"copyin_user",
"prf",
"proc_is64bit",
"proc_selfname",
- "proc_selfpid",
- "proc_selfppid",
"psignal_lock",
"rtc_nanotime_load",
"rtc_nanotime_read",
return (NULL);
}
-#if defined(__i386__)
+/*
+ * Module validation
+ */
+static int
+is_module_valid(struct modctl* ctl)
+{
+ ASSERT(!MOD_FBT_PROBES_PROVIDED(ctl));
+ ASSERT(!MOD_FBT_INVALID(ctl));
+
+ if (0 == ctl->mod_address || 0 == ctl->mod_size) {
+ return FALSE;
+ }
+
+ if (0 == ctl->mod_loaded) {
+ return FALSE;
+ }
+
+ if (strstr(ctl->mod_modname, "CHUD") != NULL)
+ return FALSE;
+
+ /*
+ * If the user sets this, trust they know what they are doing.
+ */
+ if (gIgnoreFBTBlacklist) /* per boot-arg set in fbt_init() */
+ return TRUE;
+
+ /*
+ * These drivers control low level functions that when traced
+ * cause problems often in the sleep/wake paths as well as
+ * critical debug and panic paths.
+ * If somebody really wants to drill in on one of these kexts, then
+ * they can override blacklisting using the boot-arg above.
+ */
+
+ if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "AppleRTC") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "AppleProfile") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL)
+ return FALSE;
+
+ if (strstr(ctl->mod_modname, "AppleEFI") != NULL)
+ return FALSE;
+
+ return TRUE;
+}
+
+/*
+ * FBT probe name validation
+ */
+static int
+is_symbol_valid(const char* name)
+{
+ /*
+ * If the user set this, trust they know what they are doing.
+ */
+ if (gIgnoreFBTBlacklist)
+ return TRUE;
+
+ if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
+ /*
+ * Anything beginning with "dtrace_" may be called
+ * from probe context unless it explitly indicates
+ * that it won't be called from probe context by
+ * using the prefix "dtrace_safe_".
+ */
+ return FALSE;
+ }
+
+ if (LIT_STRNSTART(name, "fasttrap_") ||
+ LIT_STRNSTART(name, "fuword") ||
+ LIT_STRNSTART(name, "suword") ||
+ LIT_STRNEQL(name, "sprlock") ||
+ LIT_STRNEQL(name, "sprunlock") ||
+ LIT_STRNEQL(name, "uread") ||
+ LIT_STRNEQL(name, "uwrite")) {
+ return FALSE; /* Fasttrap inner-workings. */
+ }
+
+ if (LIT_STRNSTART(name, "dsmos_"))
+ return FALSE; /* Don't Steal Mac OS X! */
+
+ if (LIT_STRNSTART(name, "_dtrace"))
+ return FALSE; /* Shims in dtrace.c */
+
+ if (LIT_STRNSTART(name, "chud"))
+ return FALSE; /* Professional courtesy. */
+
+ if (LIT_STRNSTART(name, "hibernate_"))
+ return FALSE; /* Let sleeping dogs lie. */
+
+ if (LIT_STRNEQL(name, "_ZNK6OSData14getBytesNoCopyEv"))
+ return FALSE; /* Data::getBytesNoCopy, IOHibernateSystemWake path */
+
+ if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
+ LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) { /* IOService::temperatureCriticalForZone */
+ return FALSE; /* Per the fire code */
+ }
+
+ /*
+ * Place no probes (illegal instructions) in the exception handling path!
+ */
+ if (LIT_STRNEQL(name, "t_invop") ||
+ LIT_STRNEQL(name, "enter_lohandler") ||
+ LIT_STRNEQL(name, "lo_alltraps") ||
+ LIT_STRNEQL(name, "kernel_trap") ||
+ LIT_STRNEQL(name, "interrupt") ||
+ LIT_STRNEQL(name, "i386_astintr")) {
+ return FALSE;
+ }
+
+ if (LIT_STRNEQL(name, "current_thread") ||
+ LIT_STRNEQL(name, "ast_pending") ||
+ LIT_STRNEQL(name, "fbt_perfCallback") ||
+ LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
+ LIT_STRNEQL(name, "get_threadtask") ||
+ LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
+ LIT_STRNEQL(name, "dtrace_invop") ||
+ LIT_STRNEQL(name, "fbt_invop") ||
+ LIT_STRNEQL(name, "sdt_invop") ||
+ LIT_STRNEQL(name, "max_valid_stack_address")) {
+ return FALSE;
+ }
+
+ /*
+ * Voodoo.
+ */
+ if (LIT_STRNSTART(name, "machine_stack_") ||
+ LIT_STRNSTART(name, "mapping_") ||
+ LIT_STRNEQL(name, "tmrCvt") ||
+
+ LIT_STRNSTART(name, "tsc_") ||
+
+ LIT_STRNSTART(name, "pmCPU") ||
+ LIT_STRNEQL(name, "pmKextRegister") ||
+ LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
+ LIT_STRNEQL(name, "pmSafeMode") ||
+ LIT_STRNEQL(name, "pmTimerSave") ||
+ LIT_STRNEQL(name, "pmTimerRestore") ||
+ LIT_STRNEQL(name, "pmUnRegister") ||
+ LIT_STRNSTART(name, "pms") ||
+ LIT_STRNEQL(name, "power_management_init") ||
+ LIT_STRNSTART(name, "usimple_") ||
+ LIT_STRNSTART(name, "lck_spin_lock") ||
+ LIT_STRNSTART(name, "lck_spin_unlock") ||
+
+ LIT_STRNSTART(name, "rtc_") ||
+ LIT_STRNSTART(name, "_rtc_") ||
+ LIT_STRNSTART(name, "rtclock_") ||
+ LIT_STRNSTART(name, "clock_") ||
+ LIT_STRNSTART(name, "absolutetime_to_") ||
+ LIT_STRNEQL(name, "setPop") ||
+ LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
+ LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
+
+ LIT_STRNSTART(name, "etimer_") ||
+
+ LIT_STRNSTART(name, "commpage_") ||
+ LIT_STRNSTART(name, "pmap_") ||
+ LIT_STRNSTART(name, "ml_") ||
+ LIT_STRNSTART(name, "PE_") ||
+ LIT_STRNEQL(name, "kprintf") ||
+ LIT_STRNSTART(name, "lapic_") ||
+ LIT_STRNSTART(name, "act_machine") ||
+ LIT_STRNSTART(name, "acpi_") ||
+ LIT_STRNSTART(name, "pal_")){
+ return FALSE;
+ }
+
+ /*
+ * Avoid machine_ routines. PR_5346750.
+ */
+ if (LIT_STRNSTART(name, "machine_"))
+ return FALSE;
+
+ if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
+ return FALSE;
+
+ /*
+ * Place no probes on critical routines. PR_5221096
+ */
+ if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
+ return FALSE;
+
+ /*
+ * Place no probes that could be hit in probe context.
+ */
+ if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) {
+ return FALSE;
+ }
+
+ /*
+ * Place no probes that could be hit on the way to the debugger.
+ */
+ if (LIT_STRNSTART(name, "kdp_") ||
+ LIT_STRNSTART(name, "kdb_") ||
+ LIT_STRNSTART(name, "kdbg_") ||
+ LIT_STRNSTART(name, "kdebug_") ||
+ LIT_STRNSTART(name, "kernel_debug") ||
+ LIT_STRNSTART(name, "debug_") ||
+ LIT_STRNEQL(name, "Debugger") ||
+ LIT_STRNEQL(name, "Call_DebuggerC") ||
+ LIT_STRNEQL(name, "lock_debugger") ||
+ LIT_STRNEQL(name, "unlock_debugger") ||
+ LIT_STRNEQL(name, "packA") ||
+ LIT_STRNEQL(name, "unpackA") ||
+ LIT_STRNEQL(name, "SysChoked")) {
+ return FALSE;
+ }
+
+
+ /*
+ * Place no probes that could be hit on the way to a panic.
+ */
+ if (NULL != strstr(name, "panic_") ||
+ LIT_STRNEQL(name, "panic") ||
+ LIT_STRNEQL(name, "preemption_underflow_panic")) {
+ return FALSE;
+ }
+
+ return TRUE;
+}
+
int
-fbt_invop(uintptr_t addr, uintptr_t *stack, uintptr_t rval)
+fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
{
- uintptr_t stack0 = 0, stack1 = 0, stack2 = 0, stack3 = 0, stack4 = 0;
fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
-
+
for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
if (fbt->fbtp_roffset == 0) {
- uintptr_t *stacktop;
- if (CPU_ON_INTR(CPU))
- stacktop = (uintptr_t *)dtrace_get_cpu_int_stack_top();
- else
- stacktop = (uintptr_t *)(dtrace_get_kernel_stack(current_thread()) + kernel_stack_size);
-
- stack += 1; /* skip over the target's pushl'd %ebp */
-
- if (stack <= stacktop)
- CPU->cpu_dtrace_caller = *stack++;
- if (stack <= stacktop)
- stack0 = *stack++;
- if (stack <= stacktop)
- stack1 = *stack++;
- if (stack <= stacktop)
- stack2 = *stack++;
- if (stack <= stacktop)
- stack3 = *stack++;
- if (stack <= stacktop)
- stack4 = *stack++;
-
- /* 32-bit ABI, arguments passed on stack. */
- dtrace_probe(fbt->fbtp_id, stack0, stack1, stack2, stack3, stack4);
+ x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
+
+ CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
+ /* 64-bit ABI, arguments passed in registers. */
+ dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
CPU->cpu_dtrace_caller = 0;
} else {
+
dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
CPU->cpu_dtrace_caller = 0;
}
return (0);
}
-#define IS_USER_TRAP(regs) (regs && (((regs)->cs & 3) != 0))
+#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
#define T_INVALID_OPCODE 6
#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
#define T_PREEMPT 255
fbt_perfCallback(
int trapno,
x86_saved_state_t *tagged_regs,
- __unused int unused1,
+ uintptr_t *lo_spp,
__unused int unused2)
{
kern_return_t retval = KERN_FAILURE;
- x86_saved_state32_t *saved_state = saved_state32(tagged_regs);
- struct x86_saved_state32_from_kernel *regs = (struct x86_saved_state32_from_kernel *)saved_state;
+ x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
- boolean_t oldlevel, cpu_64bit;
- uint32_t esp_probe, *ebp, edi, fp, *pDst, delta = 0;
+ boolean_t oldlevel;
+ uint64_t rsp_probe, fp, delta = 0;
+ uintptr_t old_sp;
+ uint32_t *pDst;
int emul;
- cpu_64bit = ml_is64bit();
+
oldlevel = ml_set_interrupts_enabled(FALSE);
/* Calculate where the stack pointer was when the probe instruction "fired." */
- if (cpu_64bit) {
- esp_probe = saved_state->uesp; /* Easy, x86_64 establishes this value in idt64.s */
- } else {
- esp_probe = (uint32_t)&(regs[1]); /* Nasty, infer the location above the save area */
- }
+ rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
- emul = dtrace_invop( saved_state->eip, (uintptr_t *)esp_probe, saved_state->eax );
- __asm__ volatile(".globl _dtrace_invop_callsite");
- __asm__ volatile("_dtrace_invop_callsite:");
+ __asm__ volatile(
+ "Ldtrace_invop_callsite_pre_label:\n"
+ ".data\n"
+ ".private_extern _dtrace_invop_callsite_pre\n"
+ "_dtrace_invop_callsite_pre:\n"
+ " .quad Ldtrace_invop_callsite_pre_label\n"
+ ".text\n"
+ );
+
+ emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
+
+ __asm__ volatile(
+ "Ldtrace_invop_callsite_post_label:\n"
+ ".data\n"
+ ".private_extern _dtrace_invop_callsite_post\n"
+ "_dtrace_invop_callsite_post:\n"
+ " .quad Ldtrace_invop_callsite_post_label\n"
+ ".text\n"
+ );
switch (emul) {
case DTRACE_INVOP_NOP:
- saved_state->eip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt.) */
+ saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
retval = KERN_SUCCESS;
break;
- case DTRACE_INVOP_MOVL_ESP_EBP:
- saved_state->ebp = esp_probe; /* Emulate patched movl %esp,%ebp */
- saved_state->eip += DTRACE_INVOP_MOVL_ESP_EBP_SKIP; /* Skip over the bytes of the patched movl %esp,%ebp */
+ case DTRACE_INVOP_MOV_RSP_RBP:
+ saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
+ saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
retval = KERN_SUCCESS;
break;
- case DTRACE_INVOP_POPL_EBP:
+ case DTRACE_INVOP_POP_RBP:
case DTRACE_INVOP_LEAVE:
/*
- * Emulate first micro-op of patched leave: movl %ebp,%esp
+ * Emulate first micro-op of patched leave: mov %rbp,%rsp
* fp points just below the return address slot for target's ret
* and at the slot holding the frame pointer saved by the target's prologue.
*/
- fp = saved_state->ebp;
-/* Emulate second micro-op of patched leave: patched popl %ebp
- * savearea ebp is set for the frame of the caller to target
- * The *live* %esp will be adjusted below for pop increment(s)
+ fp = saved_state->rbp;
+/* Emulate second micro-op of patched leave: patched pop %rbp
+ * savearea rbp is set for the frame of the caller to target
+ * The *live* %rsp will be adjusted below for pop increment(s)
*/
- saved_state->ebp = *(uint32_t *)fp;
+ saved_state->rbp = *(uint64_t *)fp;
/* Skip over the patched leave */
- saved_state->eip += DTRACE_INVOP_LEAVE_SKIP;
+ saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
/*
* Lift the stack to account for the emulated leave
* Account for words local in this frame
* (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
*/
- delta = ((uint32_t *)fp) - ((uint32_t *)esp_probe);
-/* Account for popping off the ebp (just accomplished by the emulation
+ delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
+/* Account for popping off the rbp (just accomplished by the emulation
* above...)
*/
- delta += 1;
-
- if (cpu_64bit)
- saved_state->uesp += (delta << 2);
-
-/* XXX Fragile in the extreme. Obtain the value of %edi that our caller pushed
- * (on behalf of its caller -- trap_from_kernel()). Ultimately,
- * trap_from_kernel's stack pointer is restored from this slot.
- * This is sensitive to the manner in which the compiler preserves %edi,
- * and trap_from_kernel()'s internals.
- */
- ebp = (uint32_t *)__builtin_frame_address(0);
- ebp = (uint32_t *)*ebp;
- edi = *(ebp - 1);
+ delta += 2;
+ saved_state->isf.rsp += (delta << 2);
+/* Obtain the stack pointer recorded by the trampolines */
+ old_sp = *lo_spp;
/* Shift contents of stack */
for (pDst = (uint32_t *)fp;
- pDst > (((uint32_t *)edi));
+ pDst > (((uint32_t *)old_sp));
pDst--)
*pDst = pDst[-delta];
/* Track the stack lift in "saved_state". */
- saved_state = (x86_saved_state32_t *) (((uintptr_t)saved_state) + (delta << 2));
-
-/* Now adjust the value of %edi in our caller (kernel_trap)'s frame */
- *(ebp - 1) = edi + (delta << 2);
+ saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
+/* Adjust the stack pointer utilized by the trampolines */
+ *lo_spp = old_sp + (delta << 2);
retval = KERN_SUCCESS;
break;
retval = KERN_FAILURE;
break;
}
- saved_state->trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
+ saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
ml_set_interrupts_enabled(oldlevel);
}
/*ARGSUSED*/
static void
-__fbt_provide_module(void *arg, struct modctl *ctl)
+__provide_probe_64(struct modctl *ctl, uintptr_t instrLow, uintptr_t instrHigh, char *modname, char* symbolName, machine_inst_t* symbolStart)
{
-#pragma unused(arg)
- kernel_mach_header_t *mh;
- struct load_command *cmd;
- kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
- struct symtab_command *orig_st = NULL;
- struct nlist *sym = NULL;
- char *strings;
- uintptr_t instrLow, instrHigh;
- char *modname;
- unsigned int i, j;
-
- int gIgnoreFBTBlacklist = 0;
- PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
+ unsigned int j;
+ unsigned int doenable = 0;
+ dtrace_id_t thisid;
- mh = (kernel_mach_header_t *)(ctl->address);
- modname = ctl->mod_modname;
-
- if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
+ fbt_probe_t *newfbt, *retfbt, *entryfbt;
+ machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
+ int size;
+
+ for (j = 0, instr = symbolStart, theInstr = 0;
+ (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
+ j++) {
+ theInstr = instr[0];
+ if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
+ break;
+
+ if ((size = dtrace_instr_size(instr)) <= 0)
+ break;
+
+ instr += size;
+ }
+
+ if (theInstr != FBT_PUSH_RBP)
return;
-
+
+ i1 = instr[1];
+ i2 = instr[2];
+ i3 = instr[3];
+
+ limit = (machine_inst_t *)instrHigh;
+
+ if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
+ instr += 1; /* Advance to the mov %rsp,%rbp */
+ theInstr = i1;
+ } else {
+ return;
+ }
+#if 0
+ else {
+ /*
+ * Sometimes, the compiler will schedule an intervening instruction
+ * in the function prologue. Example:
+ *
+ * _mach_vm_read:
+ * 000006d8 pushl %ebp
+ * 000006d9 movl $0x00000004,%edx
+ * 000006de movl %esp,%ebp
+ *
+ * Try the next instruction, to see if it is a movl %esp,%ebp
+ */
+
+ instr += 1; /* Advance past the pushl %ebp */
+ if ((size = dtrace_instr_size(instr)) <= 0)
+ return;
+
+ instr += size;
+
+ if ((instr + 1) >= limit)
+ return;
+
+ i1 = instr[0];
+ i2 = instr[1];
+
+ if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
+ !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
+ return;
+
+ /* instr already points at the movl %esp,%ebp */
+ theInstr = i1;
+ }
+#endif
+ thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_ENTRY);
+ newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
+ strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
+
+ if (thisid != 0) {
+ /*
+ * The dtrace_probe previously existed, so we have to hook
+ * the newfbt entry onto the end of the existing fbt's chain.
+ * If we find an fbt entry that was previously patched to
+ * fire, (as indicated by the current patched value), then
+ * we want to enable this newfbt on the spot.
+ */
+ entryfbt = dtrace_probe_arg (fbt_id, thisid);
+ ASSERT (entryfbt != NULL);
+ for(; entryfbt != NULL; entryfbt = entryfbt->fbtp_next) {
+ if (entryfbt->fbtp_currentval == entryfbt->fbtp_patchval)
+ doenable++;
+
+ if (entryfbt->fbtp_next == NULL) {
+ entryfbt->fbtp_next = newfbt;
+ newfbt->fbtp_id = entryfbt->fbtp_id;
+ break;
+ }
+ }
+ }
+ else {
+ /*
+ * The dtrace_probe did not previously exist, so we
+ * create it and hook in the newfbt. Since the probe is
+ * new, we obviously do not need to enable it on the spot.
+ */
+ newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname, symbolName, FBT_ENTRY, FBT_AFRAMES_ENTRY, newfbt);
+ doenable = 0;
+ }
+
+ newfbt->fbtp_patchpoint = instr;
+ newfbt->fbtp_ctl = ctl;
+ newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
+ newfbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
+ newfbt->fbtp_savedval = theInstr;
+ newfbt->fbtp_patchval = FBT_PATCHVAL;
+ newfbt->fbtp_currentval = 0;
+ newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
+ fbt_probetab[FBT_ADDR2NDX(instr)] = newfbt;
+
+ if (doenable)
+ fbt_enable(NULL, newfbt->fbtp_id, newfbt);
+
/*
- * Employees of dtrace and their families are ineligible. Void
- * where prohibited.
+ * The fbt entry chain is in place, one entry point per symbol.
+ * The fbt return chain can have multiple return points per symbol.
+ * Here we find the end of the fbt return chain.
*/
-
- if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
+
+ doenable=0;
+
+ thisid = dtrace_probe_lookup(fbt_id, modname, symbolName, FBT_RETURN);
+ if (thisid != 0) {
+ /* The dtrace_probe previously existed, so we have to
+ * find the end of the existing fbt chain. If we find
+ * an fbt return that was previously patched to fire,
+ * (as indicated by the currrent patched value), then
+ * we want to enable any new fbts on the spot.
+ */
+ retfbt = dtrace_probe_arg (fbt_id, thisid);
+ ASSERT(retfbt != NULL);
+ for (; retfbt != NULL; retfbt = retfbt->fbtp_next) {
+ if (retfbt->fbtp_currentval == retfbt->fbtp_patchval)
+ doenable++;
+ if(retfbt->fbtp_next == NULL)
+ break;
+ }
+ }
+ else {
+ doenable = 0;
+ retfbt = NULL;
+ }
+
+again:
+ if (instr >= limit)
return;
-
- if (strstr(modname, "CHUD") != NULL)
+
+ /*
+ * If this disassembly fails, then we've likely walked off into
+ * a jump table or some other unsuitable area. Bail out of the
+ * disassembly now.
+ */
+ if ((size = dtrace_instr_size(instr)) <= 0)
return;
-
- if (mh->magic != MH_MAGIC)
+
+ /*
+ * We (desperately) want to avoid erroneously instrumenting a
+ * jump table, especially given that our markers are pretty
+ * short: two bytes on x86, and just one byte on amd64. To
+ * determine if we're looking at a true instruction sequence
+ * or an inline jump table that happens to contain the same
+ * byte sequences, we resort to some heuristic sleeze: we
+ * treat this instruction as being contained within a pointer,
+ * and see if that pointer points to within the body of the
+ * function. If it does, we refuse to instrument it.
+ */
+ for (j = 0; j < sizeof (uintptr_t); j++) {
+ uintptr_t check = (uintptr_t)instr - j;
+ uint8_t *ptr;
+
+ if (check < (uintptr_t)symbolStart)
+ break;
+
+ if (check + sizeof (uintptr_t) > (uintptr_t)limit)
+ continue;
+
+ ptr = *(uint8_t **)check;
+
+ if (ptr >= (uint8_t *)symbolStart && ptr < limit) {
+ instr += size;
+ goto again;
+ }
+ }
+
+ /*
+ * OK, it's an instruction.
+ */
+ theInstr = instr[0];
+
+ /* Walked onto the start of the next routine? If so, bail out of this function. */
+ if (theInstr == FBT_PUSH_RBP)
+ return;
+
+ if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
+ instr += size;
+ goto again;
+ }
+
+ /*
+ * Found the pop %rbp; or leave.
+ */
+ machine_inst_t *patch_instr = instr;
+
+ /*
+ * Scan forward for a "ret", or "jmp".
+ */
+ instr += size;
+ if (instr >= limit)
+ return;
+
+ size = dtrace_instr_size(instr);
+ if (size <= 0) /* Failed instruction decode? */
+ return;
+
+ theInstr = instr[0];
+
+ if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
+ !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
+ !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
+ !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
+ !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
return;
+
+ /*
+ * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
+ */
+ newfbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
+ strlcpy( (char *)&(newfbt->fbtp_name), symbolName, MAX_FBTP_NAME_CHARS );
+
+ if (retfbt == NULL) {
+ newfbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
+ symbolName, FBT_RETURN, FBT_AFRAMES_RETURN, newfbt);
+ } else {
+ retfbt->fbtp_next = newfbt;
+ newfbt->fbtp_id = retfbt->fbtp_id;
+ }
+
+ retfbt = newfbt;
+ newfbt->fbtp_patchpoint = patch_instr;
+ newfbt->fbtp_ctl = ctl;
+ newfbt->fbtp_loadcnt = ctl->mod_loadcnt;
+
+ if (*patch_instr == FBT_POP_RBP) {
+ newfbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
+ } else {
+ ASSERT(*patch_instr == FBT_LEAVE);
+ newfbt->fbtp_rval = DTRACE_INVOP_LEAVE;
+ }
+ newfbt->fbtp_roffset =
+ (uintptr_t)(patch_instr - (uint8_t *)symbolStart);
+
+ newfbt->fbtp_savedval = *patch_instr;
+ newfbt->fbtp_patchval = FBT_PATCHVAL;
+ newfbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
+ fbt_probetab[FBT_ADDR2NDX(patch_instr)] = newfbt;
+
+ if (doenable)
+ fbt_enable(NULL, newfbt->fbtp_id, newfbt);
+
+ instr += size;
+ goto again;
+}
+static void
+__kernel_syms_provide_module(void *arg, struct modctl *ctl)
+{
+#pragma unused(arg)
+ kernel_mach_header_t *mh;
+ struct load_command *cmd;
+ kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
+ struct symtab_command *orig_st = NULL;
+ kernel_nlist_t *sym = NULL;
+ char *strings;
+ uintptr_t instrLow, instrHigh;
+ char *modname;
+ unsigned int i;
+
+ mh = (kernel_mach_header_t *)(ctl->mod_address);
+ modname = ctl->mod_modname;
+
+ if (mh->magic != MH_MAGIC_KERNEL)
+ return;
+
cmd = (struct load_command *) &mh[1];
for (i = 0; i < mh->ncmds; i++) {
if (cmd->cmd == LC_SEGMENT_KERNEL) {
kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
-
+
if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
orig_ts = orig_sg;
else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
}
else if (cmd->cmd == LC_SYMTAB)
orig_st = (struct symtab_command *) cmd;
-
+
cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
}
-
+
if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
return;
-
- sym = (struct nlist *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
+
+ sym = (kernel_nlist_t *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
-
+
/* Find extent of the TEXT section */
instrLow = (uintptr_t)orig_ts->vmaddr;
instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
-
+
for (i = 0; i < orig_st->nsyms; i++) {
- fbt_probe_t *fbt, *retfbt;
- machine_inst_t *instr, *limit, theInstr, i1, i2;
uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
char *name = strings + sym[i].n_un.n_strx;
- int size;
-
+
/* Check that the symbol is a global and that it has a name. */
if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
continue;
-
+
if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
continue;
/* Lop off omnipresent leading underscore. */
if (*name == '_')
name += 1;
-
- if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
- /*
- * Anything beginning with "dtrace_" may be called
- * from probe context unless it explitly indicates
- * that it won't be called from probe context by
- * using the prefix "dtrace_safe_".
- */
- continue;
- }
-
- if (LIT_STRNSTART(name, "dsmos_"))
- continue; /* Don't Steal Mac OS X! */
-
- if (LIT_STRNSTART(name, "_dtrace"))
- continue; /* Shims in dtrace.c */
-
- if (LIT_STRNSTART(name, "chud"))
- continue; /* Professional courtesy. */
-
- if (LIT_STRNSTART(name, "hibernate_"))
- continue; /* Let sleeping dogs lie. */
- if (LIT_STRNEQL(name, "_ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
- LIT_STRNEQL(name, "_ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
- continue; /* Per the fire code */
-
- /*
- * Place no probes (illegal instructions) in the exception handling path!
- */
- if (LIT_STRNEQL(name, "t_invop") ||
- LIT_STRNEQL(name, "enter_lohandler") ||
- LIT_STRNEQL(name, "lo_alltraps") ||
- LIT_STRNEQL(name, "kernel_trap") ||
- LIT_STRNEQL(name, "interrupt") ||
- LIT_STRNEQL(name, "i386_astintr"))
- continue;
-
- if (LIT_STRNEQL(name, "current_thread") ||
- LIT_STRNEQL(name, "ast_pending") ||
- LIT_STRNEQL(name, "fbt_perfCallback") ||
- LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
- LIT_STRNEQL(name, "get_threadtask") ||
- LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
- LIT_STRNEQL(name, "dtrace_invop") ||
- LIT_STRNEQL(name, "fbt_invop") ||
- LIT_STRNEQL(name, "sdt_invop") ||
- LIT_STRNEQL(name, "max_valid_stack_address"))
- continue;
-
- /*
- * Voodoo.
- */
- if (LIT_STRNSTART(name, "machine_stack_") ||
- LIT_STRNSTART(name, "mapping_") ||
- LIT_STRNEQL(name, "tmrCvt") ||
-
- LIT_STRNSTART(name, "tsc_") ||
-
- LIT_STRNSTART(name, "pmCPU") ||
- LIT_STRNEQL(name, "pmKextRegister") ||
- LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
- LIT_STRNEQL(name, "pmSafeMode") ||
- LIT_STRNEQL(name, "pmTimerSave") ||
- LIT_STRNEQL(name, "pmTimerRestore") ||
- LIT_STRNEQL(name, "pmUnRegister") ||
- LIT_STRNSTART(name, "pms") ||
- LIT_STRNEQL(name, "power_management_init") ||
- LIT_STRNSTART(name, "usimple_") ||
- LIT_STRNEQL(name, "lck_spin_lock") ||
- LIT_STRNEQL(name, "lck_spin_unlock") ||
-
- LIT_STRNSTART(name, "rtc_") ||
- LIT_STRNSTART(name, "_rtc_") ||
- LIT_STRNSTART(name, "rtclock_") ||
- LIT_STRNSTART(name, "clock_") ||
- LIT_STRNSTART(name, "absolutetime_to_") ||
- LIT_STRNEQL(name, "setPop") ||
- LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
- LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
-
- LIT_STRNSTART(name, "etimer_") ||
-
- LIT_STRNSTART(name, "commpage_") ||
- LIT_STRNSTART(name, "pmap_") ||
- LIT_STRNSTART(name, "ml_") ||
- LIT_STRNSTART(name, "PE_") ||
- LIT_STRNEQL(name, "kprintf") ||
- LIT_STRNSTART(name, "lapic_") ||
- LIT_STRNSTART(name, "acpi_"))
- continue;
-
- /*
- * Avoid machine_ routines. PR_5346750.
- */
- if (LIT_STRNSTART(name, "machine_"))
- continue;
-
- if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
- continue;
-
- /*
- * Place no probes on critical routines. PR_5221096
- */
- if (!gIgnoreFBTBlacklist &&
- bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
- continue;
-
- /*
- * Place no probes that could be hit in probe context.
- */
- if (!gIgnoreFBTBlacklist &&
- bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
- continue;
-
/*
- * Place no probes that could be hit on the way to the debugger.
+ * We're only blacklisting functions in the kernel for now.
*/
- if (LIT_STRNSTART(name, "kdp_") ||
- LIT_STRNSTART(name, "kdb_") ||
- LIT_STRNSTART(name, "kdbg_") ||
- LIT_STRNSTART(name, "kdebug_") ||
- LIT_STRNEQL(name, "kernel_debug") ||
- LIT_STRNEQL(name, "Debugger") ||
- LIT_STRNEQL(name, "Call_DebuggerC") ||
- LIT_STRNEQL(name, "lock_debugger") ||
- LIT_STRNEQL(name, "unlock_debugger") ||
- LIT_STRNEQL(name, "SysChoked"))
- continue;
-
- /*
- * Place no probes that could be hit on the way to a panic.
- */
- if (NULL != strstr(name, "panic_") ||
- LIT_STRNEQL(name, "panic") ||
- LIT_STRNEQL(name, "handleMck") ||
- LIT_STRNEQL(name, "unresolved_kernel_trap"))
+ if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
continue;
- if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
- continue;
-
- for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
- (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
- j++) {
- theInstr = instr[0];
- if (theInstr == FBT_PUSHL_EBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
- break;
-
- if ((size = dtrace_instr_size(instr)) <= 0)
- break;
-
- instr += size;
- }
-
- if (theInstr != FBT_PUSHL_EBP)
- continue;
-
- i1 = instr[1];
- i2 = instr[2];
-
- limit = (machine_inst_t *)instrHigh;
-
- if ((i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) ||
- (i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1)) {
- instr += 1; /* Advance to the movl %esp,%ebp */
- theInstr = i1;
- } else {
- /*
- * Sometimes, the compiler will schedule an intervening instruction
- * in the function prologue. Example:
- *
- * _mach_vm_read:
- * 000006d8 pushl %ebp
- * 000006d9 movl $0x00000004,%edx
- * 000006de movl %esp,%ebp
- *
- * Try the next instruction, to see if it is a movl %esp,%ebp
- */
-
- instr += 1; /* Advance past the pushl %ebp */
- if ((size = dtrace_instr_size(instr)) <= 0)
- continue;
-
- instr += size;
-
- if ((instr + 1) >= limit)
- continue;
-
- i1 = instr[0];
- i2 = instr[1];
-
- if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
- !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
- continue;
-
- /* instr already points at the movl %esp,%ebp */
- theInstr = i1;
- }
-
- fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
- strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
- fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
- fbt->fbtp_patchpoint = instr;
- fbt->fbtp_ctl = ctl;
- fbt->fbtp_loadcnt = ctl->mod_loadcnt;
- fbt->fbtp_rval = DTRACE_INVOP_MOVL_ESP_EBP;
- fbt->fbtp_savedval = theInstr;
- fbt->fbtp_patchval = FBT_PATCHVAL;
-
- fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
- fbt->fbtp_symndx = i;
- fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
-
- retfbt = NULL;
-again:
- if (instr >= limit)
- continue;
-
- /*
- * If this disassembly fails, then we've likely walked off into
- * a jump table or some other unsuitable area. Bail out of the
- * disassembly now.
- */
- if ((size = dtrace_instr_size(instr)) <= 0)
- continue;
-
- /*
- * We (desperately) want to avoid erroneously instrumenting a
- * jump table, especially given that our markers are pretty
- * short: two bytes on x86, and just one byte on amd64. To
- * determine if we're looking at a true instruction sequence
- * or an inline jump table that happens to contain the same
- * byte sequences, we resort to some heuristic sleeze: we
- * treat this instruction as being contained within a pointer,
- * and see if that pointer points to within the body of the
- * function. If it does, we refuse to instrument it.
- */
- for (j = 0; j < sizeof (uintptr_t); j++) {
- uintptr_t check = (uintptr_t)instr - j;
- uint8_t *ptr;
-
- if (check < sym[i].n_value)
- break;
-
- if (check + sizeof (uintptr_t) > (uintptr_t)limit)
- continue;
-
- ptr = *(uint8_t **)check;
-
- if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
- instr += size;
- goto again;
- }
- }
-
- /*
- * OK, it's an instruction.
- */
- theInstr = instr[0];
-
- /* Walked onto the start of the next routine? If so, bail out of this function. */
- if (theInstr == FBT_PUSHL_EBP)
- continue;
-
- if (!(size == 1 && (theInstr == FBT_POPL_EBP || theInstr == FBT_LEAVE))) {
- instr += size;
- goto again;
- }
-
- /*
- * Found the popl %ebp; or leave.
- */
- machine_inst_t *patch_instr = instr;
-
- /*
- * Scan forward for a "ret", or "jmp".
- */
- instr += size;
- if (instr >= limit)
- continue;
-
- size = dtrace_instr_size(instr);
- if (size <= 0) /* Failed instruction decode? */
- continue;
-
- theInstr = instr[0];
-
- if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
- !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
- !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
- !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
- !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
- continue;
-
- /*
- * popl %ebp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
- */
- fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
- strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-
- if (retfbt == NULL) {
- fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
- name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
- } else {
- retfbt->fbtp_next = fbt;
- fbt->fbtp_id = retfbt->fbtp_id;
- }
-
- retfbt = fbt;
- fbt->fbtp_patchpoint = patch_instr;
- fbt->fbtp_ctl = ctl;
- fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-
- if (*patch_instr == FBT_POPL_EBP) {
- fbt->fbtp_rval = DTRACE_INVOP_POPL_EBP;
- } else {
- ASSERT(*patch_instr == FBT_LEAVE);
- fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
- }
- fbt->fbtp_roffset =
- (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
-
- fbt->fbtp_savedval = *patch_instr;
- fbt->fbtp_patchval = FBT_PATCHVAL;
- fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
- fbt->fbtp_symndx = i;
- fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
-
- instr += size;
- goto again;
- }
-}
-#elif defined(__x86_64__)
-int
-fbt_invop(uintptr_t addr, uintptr_t *state, uintptr_t rval)
-{
- fbt_probe_t *fbt = fbt_probetab[FBT_ADDR2NDX(addr)];
-
- for (; fbt != NULL; fbt = fbt->fbtp_hashnext) {
- if ((uintptr_t)fbt->fbtp_patchpoint == addr) {
-
- if (fbt->fbtp_roffset == 0) {
- x86_saved_state64_t *regs = (x86_saved_state64_t *)state;
-
- CPU->cpu_dtrace_caller = *(uintptr_t *)(((uintptr_t)(regs->isf.rsp))+sizeof(uint64_t)); // 8(%rsp)
- /* 64-bit ABI, arguments passed in registers. */
- dtrace_probe(fbt->fbtp_id, regs->rdi, regs->rsi, regs->rdx, regs->rcx, regs->r8);
- CPU->cpu_dtrace_caller = 0;
- } else {
-
- dtrace_probe(fbt->fbtp_id, fbt->fbtp_roffset, rval, 0, 0, 0);
- CPU->cpu_dtrace_caller = 0;
- }
-
- return (fbt->fbtp_rval);
- }
- }
-
- return (0);
-}
-
-#define IS_USER_TRAP(regs) (regs && (((regs)->isf.cs & 3) != 0))
-#define T_INVALID_OPCODE 6
-#define FBT_EXCEPTION_CODE T_INVALID_OPCODE
-#define T_PREEMPT 255
-
-kern_return_t
-fbt_perfCallback(
- int trapno,
- x86_saved_state_t *tagged_regs,
- __unused int unused1,
- __unused int unused2)
-{
- kern_return_t retval = KERN_FAILURE;
- x86_saved_state64_t *saved_state = saved_state64(tagged_regs);
-
- if (FBT_EXCEPTION_CODE == trapno && !IS_USER_TRAP(saved_state)) {
- boolean_t oldlevel;
- uint64_t rsp_probe, *rbp, r12, fp, delta = 0;
- uint32_t *pDst;
- int emul;
-
- oldlevel = ml_set_interrupts_enabled(FALSE);
-
- /* Calculate where the stack pointer was when the probe instruction "fired." */
- rsp_probe = saved_state->isf.rsp; /* Easy, x86_64 establishes this value in idt64.s */
-
- emul = dtrace_invop( saved_state->isf.rip, (uintptr_t *)saved_state, saved_state->rax );
- __asm__ volatile(".globl _dtrace_invop_callsite");
- __asm__ volatile("_dtrace_invop_callsite:");
-
- switch (emul) {
- case DTRACE_INVOP_NOP:
- saved_state->isf.rip += DTRACE_INVOP_NOP_SKIP; /* Skip over the patched NOP (planted by sdt). */
- retval = KERN_SUCCESS;
- break;
-
- case DTRACE_INVOP_MOV_RSP_RBP:
- saved_state->rbp = rsp_probe; /* Emulate patched mov %rsp,%rbp */
- saved_state->isf.rip += DTRACE_INVOP_MOV_RSP_RBP_SKIP; /* Skip over the bytes of the patched mov %rsp,%rbp */
- retval = KERN_SUCCESS;
- break;
-
- case DTRACE_INVOP_POP_RBP:
- case DTRACE_INVOP_LEAVE:
-/*
- * Emulate first micro-op of patched leave: mov %rbp,%rsp
- * fp points just below the return address slot for target's ret
- * and at the slot holding the frame pointer saved by the target's prologue.
- */
- fp = saved_state->rbp;
-/* Emulate second micro-op of patched leave: patched pop %rbp
- * savearea rbp is set for the frame of the caller to target
- * The *live* %rsp will be adjusted below for pop increment(s)
- */
- saved_state->rbp = *(uint64_t *)fp;
-/* Skip over the patched leave */
- saved_state->isf.rip += DTRACE_INVOP_LEAVE_SKIP;
-/*
- * Lift the stack to account for the emulated leave
- * Account for words local in this frame
- * (in "case DTRACE_INVOP_POPL_EBP:" this is zero.)
- */
- delta = ((uint32_t *)fp) - ((uint32_t *)rsp_probe); /* delta is a *word* increment */
-/* Account for popping off the rbp (just accomplished by the emulation
- * above...)
- */
- delta += 2;
- saved_state->isf.rsp += (delta << 2);
-
-/* XXX Fragile in the extreme.
- * This is sensitive to trap_from_kernel()'s internals.
- */
- rbp = (uint64_t *)__builtin_frame_address(0);
- rbp = (uint64_t *)*rbp;
- r12 = *(rbp - 4);
-
-/* Shift contents of stack */
- for (pDst = (uint32_t *)fp;
- pDst > (((uint32_t *)r12));
- pDst--)
- *pDst = pDst[-delta];
-
-/* Track the stack lift in "saved_state". */
- saved_state = (x86_saved_state64_t *) (((uintptr_t)saved_state) + (delta << 2));
-
-/* Now adjust the value of %r12 in our caller (kernel_trap)'s frame */
- *(rbp - 4) = r12 + (delta << 2);
-
- retval = KERN_SUCCESS;
- break;
-
- default:
- retval = KERN_FAILURE;
- break;
- }
- saved_state->isf.trapno = T_PREEMPT; /* Avoid call to i386_astintr()! */
-
- ml_set_interrupts_enabled(oldlevel);
+ __provide_probe_64(ctl, instrLow, instrHigh, modname, name, (machine_inst_t*)sym[i].n_value);
}
-
- return retval;
}
-/*ARGSUSED*/
static void
-__fbt_provide_module(void *arg, struct modctl *ctl)
+__user_syms_provide_module(void *arg, struct modctl *ctl)
{
#pragma unused(arg)
- kernel_mach_header_t *mh;
- struct load_command *cmd;
- kernel_segment_command_t *orig_ts = NULL, *orig_le = NULL;
- struct symtab_command *orig_st = NULL;
- struct nlist_64 *sym = NULL;
- char *strings;
- uintptr_t instrLow, instrHigh;
- char *modname;
- unsigned int i, j;
-
- int gIgnoreFBTBlacklist = 0;
- PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
-
- mh = (kernel_mach_header_t *)(ctl->address);
+ char *modname;
+ unsigned int i;
+
modname = ctl->mod_modname;
+
+ dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
+ if (module_symbols) {
+ for (i=0; i<module_symbols->dtmodsyms_count; i++) {
- if (0 == ctl->address || 0 == ctl->size) /* Has the linker been jettisoned? */
- return;
-
- /*
- * Employees of dtrace and their families are ineligible. Void
- * where prohibited.
- */
-
- if (LIT_STRNEQL(modname, "com.apple.driver.dtrace"))
- return;
-
- if (strstr(modname, "CHUD") != NULL)
- return;
-
- if (mh->magic != MH_MAGIC_64)
- return;
-
- cmd = (struct load_command *) &mh[1];
- for (i = 0; i < mh->ncmds; i++) {
- if (cmd->cmd == LC_SEGMENT_KERNEL) {
- kernel_segment_command_t *orig_sg = (kernel_segment_command_t *) cmd;
-
- if (LIT_STRNEQL(orig_sg->segname, SEG_TEXT))
- orig_ts = orig_sg;
- else if (LIT_STRNEQL(orig_sg->segname, SEG_LINKEDIT))
- orig_le = orig_sg;
- else if (LIT_STRNEQL(orig_sg->segname, ""))
- orig_ts = orig_sg; /* kexts have a single unnamed segment */
- }
- else if (cmd->cmd == LC_SYMTAB)
- orig_st = (struct symtab_command *) cmd;
-
- cmd = (struct load_command *) ((caddr_t) cmd + cmd->cmdsize);
- }
-
- if ((orig_ts == NULL) || (orig_st == NULL) || (orig_le == NULL))
- return;
-
- sym = (struct nlist_64 *)(orig_le->vmaddr + orig_st->symoff - orig_le->fileoff);
- strings = (char *)(orig_le->vmaddr + orig_st->stroff - orig_le->fileoff);
-
- /* Find extent of the TEXT section */
- instrLow = (uintptr_t)orig_ts->vmaddr;
- instrHigh = (uintptr_t)(orig_ts->vmaddr + orig_ts->vmsize);
-
- for (i = 0; i < orig_st->nsyms; i++) {
- fbt_probe_t *fbt, *retfbt;
- machine_inst_t *instr, *limit, theInstr, i1, i2, i3;
- uint8_t n_type = sym[i].n_type & (N_TYPE | N_EXT);
- char *name = strings + sym[i].n_un.n_strx;
- int size;
-
- /* Check that the symbol is a global and that it has a name. */
- if (((N_SECT | N_EXT) != n_type && (N_ABS | N_EXT) != n_type))
- continue;
-
- if (0 == sym[i].n_un.n_strx) /* iff a null, "", name. */
- continue;
-
- /* Lop off omnipresent leading underscore. */
- if (*name == '_')
- name += 1;
-
- if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
- /*
- * Anything beginning with "dtrace_" may be called
- * from probe context unless it explitly indicates
- * that it won't be called from probe context by
- * using the prefix "dtrace_safe_".
+ /*
+ * symbol->dtsym_addr (the symbol address) passed in from
+ * user space, is already slid for both kexts and kernel.
*/
- continue;
- }
-
- if (LIT_STRNSTART(name, "fasttrap_") ||
- LIT_STRNSTART(name, "fuword") ||
- LIT_STRNSTART(name, "suword") ||
- LIT_STRNEQL(name, "sprlock") ||
- LIT_STRNEQL(name, "sprunlock") ||
- LIT_STRNEQL(name, "uread") ||
- LIT_STRNEQL(name, "uwrite"))
- continue; /* Fasttrap inner-workings. */
-
- if (LIT_STRNSTART(name, "dsmos_"))
- continue; /* Don't Steal Mac OS X! */
-
- if (LIT_STRNSTART(name, "_dtrace"))
- continue; /* Shims in dtrace.c */
-
- if (LIT_STRNSTART(name, "chud"))
- continue; /* Professional courtesy. */
-
- if (LIT_STRNSTART(name, "hibernate_"))
- continue; /* Let sleeping dogs lie. */
-
- if (LIT_STRNEQL(name, "ZN9IOService14newTemperatureElPS_") || /* IOService::newTemperature */
- LIT_STRNEQL(name, "ZN9IOService26temperatureCriticalForZoneEPS_")) /* IOService::temperatureCriticalForZone */
- continue; /* Per the fire code */
-
- /*
- * Place no probes (illegal instructions) in the exception handling path!
- */
- if (LIT_STRNEQL(name, "t_invop") ||
- LIT_STRNEQL(name, "enter_lohandler") ||
- LIT_STRNEQL(name, "lo_alltraps") ||
- LIT_STRNEQL(name, "kernel_trap") ||
- LIT_STRNEQL(name, "interrupt") ||
- LIT_STRNEQL(name, "i386_astintr"))
- continue;
-
- if (LIT_STRNEQL(name, "current_thread") ||
- LIT_STRNEQL(name, "ast_pending") ||
- LIT_STRNEQL(name, "fbt_perfCallback") ||
- LIT_STRNEQL(name, "machine_thread_get_kern_state") ||
- LIT_STRNEQL(name, "get_threadtask") ||
- LIT_STRNEQL(name, "ml_set_interrupts_enabled") ||
- LIT_STRNEQL(name, "dtrace_invop") ||
- LIT_STRNEQL(name, "fbt_invop") ||
- LIT_STRNEQL(name, "sdt_invop") ||
- LIT_STRNEQL(name, "max_valid_stack_address"))
- continue;
-
- /*
- * Voodoo.
- */
- if (LIT_STRNSTART(name, "machine_stack_") ||
- LIT_STRNSTART(name, "mapping_") ||
- LIT_STRNEQL(name, "tmrCvt") ||
-
- LIT_STRNSTART(name, "tsc_") ||
-
- LIT_STRNSTART(name, "pmCPU") ||
- LIT_STRNEQL(name, "pmKextRegister") ||
- LIT_STRNEQL(name, "pmMarkAllCPUsOff") ||
- LIT_STRNEQL(name, "pmSafeMode") ||
- LIT_STRNEQL(name, "pmTimerSave") ||
- LIT_STRNEQL(name, "pmTimerRestore") ||
- LIT_STRNEQL(name, "pmUnRegister") ||
- LIT_STRNSTART(name, "pms") ||
- LIT_STRNEQL(name, "power_management_init") ||
- LIT_STRNSTART(name, "usimple_") ||
- LIT_STRNSTART(name, "lck_spin_lock") ||
- LIT_STRNSTART(name, "lck_spin_unlock") ||
-
- LIT_STRNSTART(name, "rtc_") ||
- LIT_STRNSTART(name, "_rtc_") ||
- LIT_STRNSTART(name, "rtclock_") ||
- LIT_STRNSTART(name, "clock_") ||
- LIT_STRNSTART(name, "absolutetime_to_") ||
- LIT_STRNEQL(name, "setPop") ||
- LIT_STRNEQL(name, "nanoseconds_to_absolutetime") ||
- LIT_STRNEQL(name, "nanotime_to_absolutetime") ||
-
- LIT_STRNSTART(name, "etimer_") ||
-
- LIT_STRNSTART(name, "commpage_") ||
- LIT_STRNSTART(name, "pmap_") ||
- LIT_STRNSTART(name, "ml_") ||
- LIT_STRNSTART(name, "PE_") ||
- LIT_STRNEQL(name, "kprintf") ||
- LIT_STRNSTART(name, "lapic_") ||
- LIT_STRNSTART(name, "acpi_"))
- continue;
-
- /*
- * Avoid machine_ routines. PR_5346750.
- */
- if (LIT_STRNSTART(name, "machine_"))
- continue;
-
- if (LIT_STRNEQL(name, "handle_pending_TLB_flushes"))
- continue;
+ dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
- /*
- * Place no probes on critical routines. PR_5221096
- */
- if (!gIgnoreFBTBlacklist &&
- bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
- continue;
-
- /*
- * Place no probes that could be hit in probe context.
- */
- if (!gIgnoreFBTBlacklist &&
- bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL)
- continue;
-
- /*
- * Place no probes that could be hit on the way to the debugger.
- */
- if (LIT_STRNSTART(name, "kdp_") ||
- LIT_STRNSTART(name, "kdb_") ||
- LIT_STRNSTART(name, "kdbg_") ||
- LIT_STRNSTART(name, "kdebug_") ||
- LIT_STRNEQL(name, "kernel_debug") ||
- LIT_STRNEQL(name, "Debugger") ||
- LIT_STRNEQL(name, "Call_DebuggerC") ||
- LIT_STRNEQL(name, "lock_debugger") ||
- LIT_STRNEQL(name, "unlock_debugger") ||
- LIT_STRNEQL(name, "SysChoked"))
- continue;
-
- /*
- * Place no probes that could be hit on the way to a panic.
- */
- if (NULL != strstr(name, "panic_") ||
- LIT_STRNEQL(name, "panic") ||
- LIT_STRNEQL(name, "handleMck") ||
- LIT_STRNEQL(name, "unresolved_kernel_trap"))
- continue;
-
- if (dtrace_probe_lookup(fbt_id, modname, name, NULL) != 0)
- continue;
-
- for (j = 0, instr = (machine_inst_t *)sym[i].n_value, theInstr = 0;
- (j < 4) && ((uintptr_t)instr >= instrLow) && (instrHigh > (uintptr_t)(instr + 2));
- j++) {
- theInstr = instr[0];
- if (theInstr == FBT_PUSH_RBP || theInstr == FBT_RET || theInstr == FBT_RET_IMM16)
- break;
-
- if ((size = dtrace_instr_size(instr)) <= 0)
- break;
-
- instr += size;
- }
-
- if (theInstr != FBT_PUSH_RBP)
- continue;
-
- i1 = instr[1];
- i2 = instr[2];
- i3 = instr[3];
-
- limit = (machine_inst_t *)instrHigh;
-
- if (i1 == FBT_REX_RSP_RBP && i2 == FBT_MOV_RSP_RBP0 && i3 == FBT_MOV_RSP_RBP1) {
- instr += 1; /* Advance to the mov %rsp,%rbp */
- theInstr = i1;
- } else {
- continue;
- }
-#if 0
- else {
+ char* name = symbol->dtsym_name;
+
+ /* Lop off omnipresent leading underscore. */
+ if (*name == '_')
+ name += 1;
+
/*
- * Sometimes, the compiler will schedule an intervening instruction
- * in the function prologue. Example:
- *
- * _mach_vm_read:
- * 000006d8 pushl %ebp
- * 000006d9 movl $0x00000004,%edx
- * 000006de movl %esp,%ebp
- *
- * Try the next instruction, to see if it is a movl %esp,%ebp
+ * We're only blacklisting functions in the kernel for now.
*/
-
- instr += 1; /* Advance past the pushl %ebp */
- if ((size = dtrace_instr_size(instr)) <= 0)
- continue;
-
- instr += size;
-
- if ((instr + 1) >= limit)
- continue;
-
- i1 = instr[0];
- i2 = instr[1];
-
- if (!(i1 == FBT_MOVL_ESP_EBP0_V0 && i2 == FBT_MOVL_ESP_EBP1_V0) &&
- !(i1 == FBT_MOVL_ESP_EBP0_V1 && i2 == FBT_MOVL_ESP_EBP1_V1))
- continue;
-
- /* instr already points at the movl %esp,%ebp */
- theInstr = i1;
- }
-#endif
-
- fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
- strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
- fbt->fbtp_id = dtrace_probe_create(fbt_id, modname, name, FBT_ENTRY, FBT_AFRAMES_ENTRY, fbt);
- fbt->fbtp_patchpoint = instr;
- fbt->fbtp_ctl = ctl;
- fbt->fbtp_loadcnt = ctl->mod_loadcnt;
- fbt->fbtp_rval = DTRACE_INVOP_MOV_RSP_RBP;
- fbt->fbtp_savedval = theInstr;
- fbt->fbtp_patchval = FBT_PATCHVAL;
-
- fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(instr)];
- fbt->fbtp_symndx = i;
- fbt_probetab[FBT_ADDR2NDX(instr)] = fbt;
-
- retfbt = NULL;
-again:
- if (instr >= limit)
- continue;
-
- /*
- * If this disassembly fails, then we've likely walked off into
- * a jump table or some other unsuitable area. Bail out of the
- * disassembly now.
- */
- if ((size = dtrace_instr_size(instr)) <= 0)
- continue;
-
- /*
- * We (desperately) want to avoid erroneously instrumenting a
- * jump table, especially given that our markers are pretty
- * short: two bytes on x86, and just one byte on amd64. To
- * determine if we're looking at a true instruction sequence
- * or an inline jump table that happens to contain the same
- * byte sequences, we resort to some heuristic sleeze: we
- * treat this instruction as being contained within a pointer,
- * and see if that pointer points to within the body of the
- * function. If it does, we refuse to instrument it.
- */
- for (j = 0; j < sizeof (uintptr_t); j++) {
- uintptr_t check = (uintptr_t)instr - j;
- uint8_t *ptr;
-
- if (check < sym[i].n_value)
- break;
-
- if (check + sizeof (uintptr_t) > (uintptr_t)limit)
- continue;
-
- ptr = *(uint8_t **)check;
-
- if (ptr >= (uint8_t *)sym[i].n_value && ptr < limit) {
- instr += size;
- goto again;
- }
- }
-
- /*
- * OK, it's an instruction.
- */
- theInstr = instr[0];
-
- /* Walked onto the start of the next routine? If so, bail out of this function. */
- if (theInstr == FBT_PUSH_RBP)
- continue;
-
- if (!(size == 1 && (theInstr == FBT_POP_RBP || theInstr == FBT_LEAVE))) {
- instr += size;
- goto again;
- }
-
- /*
- * Found the pop %rbp; or leave.
- */
- machine_inst_t *patch_instr = instr;
-
- /*
- * Scan forward for a "ret", or "jmp".
- */
- instr += size;
- if (instr >= limit)
- continue;
-
- size = dtrace_instr_size(instr);
- if (size <= 0) /* Failed instruction decode? */
- continue;
-
- theInstr = instr[0];
-
- if (!(size == FBT_RET_LEN && (theInstr == FBT_RET)) &&
- !(size == FBT_RET_IMM16_LEN && (theInstr == FBT_RET_IMM16)) &&
- !(size == FBT_JMP_SHORT_REL_LEN && (theInstr == FBT_JMP_SHORT_REL)) &&
- !(size == FBT_JMP_NEAR_REL_LEN && (theInstr == FBT_JMP_NEAR_REL)) &&
- !(size == FBT_JMP_FAR_ABS_LEN && (theInstr == FBT_JMP_FAR_ABS)))
- continue;
-
- /*
- * pop %rbp; ret; or leave; ret; or leave; jmp tailCalledFun; -- We have a winner!
- */
- fbt = kmem_zalloc(sizeof (fbt_probe_t), KM_SLEEP);
- strlcpy( (char *)&(fbt->fbtp_name), name, MAX_FBTP_NAME_CHARS );
-
- if (retfbt == NULL) {
- fbt->fbtp_id = dtrace_probe_create(fbt_id, modname,
- name, FBT_RETURN, FBT_AFRAMES_RETURN, fbt);
- } else {
- retfbt->fbtp_next = fbt;
- fbt->fbtp_id = retfbt->fbtp_id;
- }
-
- retfbt = fbt;
- fbt->fbtp_patchpoint = patch_instr;
- fbt->fbtp_ctl = ctl;
- fbt->fbtp_loadcnt = ctl->mod_loadcnt;
-
- if (*patch_instr == FBT_POP_RBP) {
- fbt->fbtp_rval = DTRACE_INVOP_POP_RBP;
- } else {
- ASSERT(*patch_instr == FBT_LEAVE);
- fbt->fbtp_rval = DTRACE_INVOP_LEAVE;
+ if (MOD_IS_MACH_KERNEL(ctl) && !is_symbol_valid(name))
+ continue;
+
+ __provide_probe_64(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
}
- fbt->fbtp_roffset =
- (uintptr_t)(patch_instr - (uint8_t *)sym[i].n_value);
-
- fbt->fbtp_savedval = *patch_instr;
- fbt->fbtp_patchval = FBT_PATCHVAL;
- fbt->fbtp_hashnext = fbt_probetab[FBT_ADDR2NDX(patch_instr)];
- fbt->fbtp_symndx = i;
- fbt_probetab[FBT_ADDR2NDX(patch_instr)] = fbt;
-
- instr += size;
- goto again;
}
}
-#else
-#error Unknown arch
-#endif
-extern struct modctl g_fbt_kernctl;
-#undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
-#undef kmem_free /* from its binding to dt_kmem_free glue */
-#include <vm/vm_kern.h>
+extern int dtrace_kernel_symbol_mode;
/*ARGSUSED*/
void
fbt_provide_module(void *arg, struct modctl *ctl)
{
-#pragma unused(ctl)
- __fbt_provide_module(arg, &g_fbt_kernctl);
+ ASSERT(ctl != NULL);
+ ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
+ lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
- if ( (vm_offset_t)g_fbt_kernctl.address != (vm_offset_t )NULL )
- kmem_free(kernel_map, (vm_offset_t)g_fbt_kernctl.address, round_page(g_fbt_kernctl.size));
- g_fbt_kernctl.address = 0;
- g_fbt_kernctl.size = 0;
+ if (MOD_FBT_DONE(ctl))
+ return;
+
+ if (!is_module_valid(ctl)) {
+ ctl->mod_flags |= MODCTL_FBT_INVALID;
+ return;
+ }
+
+ if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
+ __kernel_syms_provide_module(arg, ctl);
+ ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
+ return;
+ }
+
+ if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
+ __user_syms_provide_module(arg, ctl);
+ ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
+ if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl))
+ ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED;
+ return;
+ }
}