X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/4d15aeb193b2c68f1d38666c317f8d3734f5f083..5ba3f43ea354af8ad55bea84372a2bc834d8757c:/bsd/dev/dtrace/fbt.c?ds=inline diff --git a/bsd/dev/dtrace/fbt.c b/bsd/dev/dtrace/fbt.c index e05d5a922..25f052f1a 100644 --- a/bsd/dev/dtrace/fbt.c +++ b/bsd/dev/dtrace/fbt.c @@ -31,11 +31,12 @@ #endif #endif -#include +#include #include #include #include +#include #include #include #include @@ -53,7 +54,11 @@ /* #include */ struct savearea_t; /* Used anonymously */ -#if defined(__x86_64__) +#if defined(__arm__) || defined(__arm64__) +typedef kern_return_t (*perfCallback)(int, struct savearea_t *, __unused int, __unused int); +extern perfCallback tempDTraceTrapHook; +extern kern_return_t fbt_perfCallback(int, struct savearea_t *, __unused int, __unused int); +#elif defined(__x86_64__) typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int); extern perfCallback tempDTraceTrapHook; extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int); @@ -71,8 +76,407 @@ fbt_probe_t **fbt_probetab; int fbt_probetab_mask; static int fbt_verbose = 0; +int ignore_fbt_blacklist = 0; + +extern int dtrace_kernel_symbol_mode; + + void fbt_init( void ); +/* + * Critical routines that must not be probed. PR_5221096, PR_5379018. + * The blacklist must be kept in alphabetic order for purposes of bsearch(). + */ +static const char * critical_blacklist[] = +{ + "Call_DebuggerC", + "SysChoked", + "_ZN9IOService14newTemperatureElPS_", /* IOService::newTemperature */ + "_ZN9IOService26temperatureCriticalForZoneEPS_", /* IOService::temperatureCriticalForZone */ + "_ZNK6OSData14getBytesNoCopyEv", /* Data::getBytesNoCopy, IOHibernateSystemWake path */ + "_disable_preemption", + "_enable_preemption", + "bcopy_phys", + "console_cpu_alloc", + "console_cpu_free", + "cpu_IA32e_disable", + "cpu_IA32e_enable", + "cpu_NMI_interrupt", + "cpu_control", + "cpu_data_alloc", + "cpu_desc_init", + "cpu_desc_init64", + "cpu_desc_load", + "cpu_desc_load64", + "cpu_exit_wait", + "cpu_info", + "cpu_info_count", + "cpu_init", + "cpu_interrupt", + "cpu_machine_init", + "cpu_mode_init", + "cpu_processor_alloc", + "cpu_processor_free", + "cpu_signal_handler", + "cpu_sleep", + "cpu_start", + "cpu_subtype", + "cpu_thread_alloc", + "cpu_thread_halt", + "cpu_thread_init", + "cpu_threadtype", + "cpu_to_processor", + "cpu_topology_sort", + "cpu_topology_start_cpu", + "cpu_type", + "cpuid_cpu_display", + "cpuid_extfeatures", + "dtrace_invop", + "enter_lohandler", + "fbt_invop", + "fbt_perfCallback", + "get_threadtask", + "handle_pending_TLB_flushes", + "hw_compare_and_store", + "interrupt", + "kernel_trap", + "kprintf", + "lo_alltraps", + "lock_debugger", + "machine_idle_cstate", + "machine_thread_get_kern_state", + "mca_cpu_alloc", + "mca_cpu_init", + "ml_nofault_copy", + "nanoseconds_to_absolutetime", + "nanotime_to_absolutetime", + "packA", + "panic", + "pmKextRegister", + "pmMarkAllCPUsOff", + "pmSafeMode", + "pmTimerRestore", + "pmTimerSave", + "pmUnRegister", + "pmap_cpu_alloc", + "pmap_cpu_free", + "pmap_cpu_high_map_vaddr", + "pmap_cpu_high_shared_remap", + "pmap_cpu_init", + "power_management_init", + "preemption_underflow_panic", + "register_cpu_setup_func", + "sdt_invop", + "sprlock", + "sprunlock", + "t_invop", + "tmrCvt", + "uread", + "uwrite", + "unlock_debugger", + "unpackA", + "unregister_cpu_setup_func", + "vstart" +}; +#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0])) + +/* + * The transitive closure of entry points that can be reached from probe context. + * (Apart from routines whose names begin with dtrace_). + */ +static const char * probe_ctx_closure[] = +{ + "ClearIdlePop", + "Debugger", + "IS_64BIT_PROCESS", + "OSCompareAndSwap", + "SetIdlePop", + "absolutetime_to_microtime", + "act_set_astbsd", + "arm_init_idle_cpu", + "ast_dtrace_on", + "ast_pending", + "clean_dcache", + "clean_mmu_dcache", + "clock_get_calendar_nanotime_nowait", + "copyin", + "copyin_kern", + "copyin_user", + "copyinstr", + "copyout", + "copyoutstr", + "cpu_number", + "current_proc", + "current_processor", + "current_task", + "current_thread", + "debug_enter", + "drain_write_buffer", + "find_user_regs", + "flush_dcache", + "flush_tlb64", + "get_bsdtask_info", + "get_bsdthread_info", + "hertz_tick", + "hw_atomic_and", + "invalidate_mmu_icache", + "kauth_cred_get", + "kauth_getgid", + "kauth_getuid", + "kernel_preempt_check", + "kvtophys", + "mach_absolute_time", + "max_valid_stack_address", + "memcpy", + "memmove", + "ml_at_interrupt_context", + "ml_phys_write_byte_64", + "ml_phys_write_half_64", + "ml_phys_write_word_64", + "ml_set_interrupts_enabled", + "mt_core_snap", + "mt_cur_cpu_cycles", + "mt_cur_cpu_instrs", + "mt_cur_thread_cycles", + "mt_cur_thread_instrs", + "mt_fixed_counts", + "mt_fixed_counts_internal", + "mt_mtc_update_count", + "mt_update_thread", + "ovbcopy", + "panic", + "pmap64_pde", + "pmap64_pdpt", + "pmap_find_phys", + "pmap_get_mapwindow", + "pmap_pde", + "pmap_pte", + "pmap_put_mapwindow", + "pmap_valid_page", + "prf", + "proc_is64bit", + "proc_selfname", + "psignal_lock", + "rtc_nanotime_load", + "rtc_nanotime_read", + "sdt_getargdesc", + "setPop", + "strlcpy", + "sync_iss_to_iks_unconditionally", + "systrace_stub", + "timer_grab" +}; +#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0])) + +#pragma clang diagnostic push +#pragma clang diagnostic ignored "-Wcast-qual" +static int _cmp(const void *a, const void *b) +{ + return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1); +} +#pragma clang diagnostic pop +/* + * Module validation + */ +int +fbt_module_excluded(struct modctl* ctl) +{ + ASSERT(!MOD_FBT_DONE(ctl)); + + if (ctl->mod_address == 0 || ctl->mod_size == 0) { + return TRUE; + } + + if (ctl->mod_loaded == 0) { + return TRUE; + } + + /* + * If the user sets this, trust they know what they are doing. + */ + if (ignore_fbt_blacklist) + return FALSE; + + /* + * These drivers control low level functions that when traced + * cause problems often in the sleep/wake paths as well as + * critical debug and panic paths. + * If somebody really wants to drill in on one of these kexts, then + * they can override blacklisting using the boot-arg above. + */ + +#ifdef __x86_64__ + if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "AppleRTC") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "AppleProfile") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL) + return TRUE; + + if (strstr(ctl->mod_modname, "AppleEFI") != NULL) + return TRUE; + +#elif __arm__ || __arm64__ + if (LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPlatform") || + LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPL192VIC") || + LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleInterruptController")) + return TRUE; +#endif + + return FALSE; +} + +/* + * FBT probe name validation + */ +int +fbt_excluded(const char* name) +{ + /* + * If the user set this, trust they know what they are doing. + */ + if (ignore_fbt_blacklist) + return FALSE; + + if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) { + /* + * Anything beginning with "dtrace_" may be called + * from probe context unless it explitly indicates + * that it won't be called from probe context by + * using the prefix "dtrace_safe_". + */ + return TRUE; + } + + /* + * Place no probes on critical routines (5221096) + */ + if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL) + return TRUE; + + /* + * Place no probes that could be hit in probe context. + */ + if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) { + return TRUE; + } + + /* + * Place no probes that could be hit in probe context. + * In the interests of safety, some of these may be overly cautious. + * Also exclude very low-level "firmware" class calls. + */ + if (LIT_STRNSTART(name, "cpu_") || /* Coarse */ + LIT_STRNSTART(name, "platform_") || /* Coarse */ + LIT_STRNSTART(name, "machine_") || /* Coarse */ + LIT_STRNSTART(name, "ml_") || /* Coarse */ + LIT_STRNSTART(name, "PE_") || /* Coarse */ + LIT_STRNSTART(name, "rtc_") || /* Coarse */ + LIT_STRNSTART(name, "_rtc_") || + LIT_STRNSTART(name, "rtclock_") || + LIT_STRNSTART(name, "clock_") || + LIT_STRNSTART(name, "bcopy") || + LIT_STRNSTART(name, "pmap_") || + LIT_STRNSTART(name, "hw_") || /* Coarse */ + LIT_STRNSTART(name, "lapic_") || /* Coarse */ + LIT_STRNSTART(name, "OSAdd") || + LIT_STRNSTART(name, "OSBit") || + LIT_STRNSTART(name, "OSDecrement") || + LIT_STRNSTART(name, "OSIncrement") || + LIT_STRNSTART(name, "OSCompareAndSwap") || + LIT_STRNSTART(name, "etimer_") || + LIT_STRNSTART(name, "dtxnu_kern_") || + LIT_STRNSTART(name, "flush_mmu_tlb_")) + return TRUE; + /* + * Fasttrap inner-workings we can't instrument + * on Intel (6230149) + */ + if (LIT_STRNSTART(name, "fasttrap_") || + LIT_STRNSTART(name, "fuword") || + LIT_STRNSTART(name, "suword")) + return TRUE; + + if (LIT_STRNSTART(name, "_dtrace")) + return TRUE; /* Shims in dtrace.c */ + + if (LIT_STRNSTART(name, "hibernate_")) + return TRUE; + + /* + * Place no probes in the exception handling path + */ +#if __arm__ || __arm64__ + if (LIT_STRNSTART(name, "fleh_") || + LIT_STRNSTART(name, "sleh_") || + LIT_STRNSTART(name, "timer_state_event") || + LIT_STRNEQL(name, "get_vfp_enabled")) + return TRUE; + + if (LIT_STRNSTART(name, "_ZNK15OSMetaClassBase8metaCastEPK11OSMetaClass") || + LIT_STRNSTART(name, "_ZN15OSMetaClassBase12safeMetaCastEPKS_PK11OSMetaClass") || + LIT_STRNSTART(name, "_ZNK11OSMetaClass13checkMetaCastEPK15OSMetaClassBase")) + return TRUE; +#endif + + +#ifdef __x86_64__ + if (LIT_STRNSTART(name, "machine_") || + LIT_STRNSTART(name, "mapping_") || + LIT_STRNSTART(name, "tsc_") || + LIT_STRNSTART(name, "pmCPU") || + LIT_STRNSTART(name, "pms") || + LIT_STRNSTART(name, "usimple_") || + LIT_STRNSTART(name, "lck_spin_lock") || + LIT_STRNSTART(name, "lck_spin_unlock") || + LIT_STRNSTART(name, "absolutetime_to_") || + LIT_STRNSTART(name, "commpage_") || + LIT_STRNSTART(name, "ml_") || + LIT_STRNSTART(name, "PE_") || + LIT_STRNSTART(name, "act_machine") || + LIT_STRNSTART(name, "acpi_") || + LIT_STRNSTART(name, "pal_")) { + return TRUE; + } + // Don't Steal Mac OS X + if (LIT_STRNSTART(name, "dsmos_")) + return TRUE; + +#endif + + /* + * Place no probes that could be hit on the way to the debugger. + */ + if (LIT_STRNSTART(name, "kdp_") || + LIT_STRNSTART(name, "kdb_") || + LIT_STRNSTART(name, "debug_")) { + return TRUE; + } + + /* + * Place no probes that could be hit on the way to a panic. + */ + if (NULL != strstr(name, "panic_")) + return TRUE; + + return FALSE; +} + + /*ARGSUSED*/ static void fbt_destroy(void *arg, dtrace_id_t id, void *parg) @@ -267,6 +671,13 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg) (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, sizeof(fbt->fbtp_patchval)); +#if CONFIG_EMBEDDED + /* + * Make the patched instruction visible via a data + instruction cache flush. + */ + flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); + invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0); +#endif fbt->fbtp_currentval = fbt->fbtp_patchval; } @@ -374,6 +785,85 @@ err: } #endif /* __APPLE__ */ +static void +fbt_provide_module_user_syms(struct modctl *ctl) +{ + unsigned int i; + char *modname = ctl->mod_modname; + + dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols; + if (module_symbols) { + for (i=0; idtmodsyms_count; i++) { + + /* + * symbol->dtsym_addr (the symbol address) passed in from + * user space, is already slid for both kexts and kernel. + */ + dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i]; + + char* name = symbol->dtsym_name; + + /* Lop off omnipresent leading underscore. */ + if (*name == '_') + name += 1; + + /* + * We're only blacklisting functions in the kernel for now. + */ + if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name)) + continue; + + /* + * Ignore symbols with a null address + */ + if (!symbol->dtsym_addr) + continue; + + fbt_provide_probe(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr); + } + } +} + + +void +fbt_provide_module(void *arg, struct modctl *ctl) +{ +#pragma unused(arg) + ASSERT(ctl != NULL); + ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); + + // Update the "ignore blacklist" bit + if (ignore_fbt_blacklist) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_BLACKLISTED_PROBES; + + if (MOD_FBT_DONE(ctl)) + return; + + if (fbt_module_excluded(ctl)) { + ctl->mod_flags |= MODCTL_FBT_INVALID; + return; + } + + if (MOD_HAS_KERNEL_SYMBOLS(ctl)) { + fbt_provide_module_kernel_syms(ctl); + ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; + if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl)) + ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED; + return; + } + + if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) { + fbt_provide_module_user_syms(ctl); + ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED; + if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl)) + ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED; + if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl)) + ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED; + return; + } +} + static dtrace_pattr_t fbt_attr = { { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA }, { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN }, @@ -451,6 +941,47 @@ _fbt_open(dev_t dev, int flags, int devtype, struct proc *p) #define FBT_MAJOR -24 /* let the kernel pick the device number */ +SYSCTL_DECL(_kern_dtrace); + +static int +sysctl_dtrace_ignore_fbt_blacklist SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int err; + int value = *(int*)arg1; + + err = sysctl_io_number(req, value, sizeof(value), &value, NULL); + if (err) + return (err); + if (req->newptr) { + if (!(value == 0 || value == 1)) + return (ERANGE); + + /* + * We do not allow setting the blacklist back to on, as we have no way + * of knowing if those unsafe probes are still used. + * + * If we are using kernel symbols, we also do not allow any change, + * since the symbols are jettison'd after the first pass. + * + * We do not need to take any locks here because those symbol modes + * are permanent and do not change after boot. + */ + if (value != 1 || dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER || + dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) + return (EPERM); + + ignore_fbt_blacklist = 1; + } + + return (0); +} + +SYSCTL_PROC(_kern_dtrace, OID_AUTO, ignore_fbt_blacklist, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &ignore_fbt_blacklist, 0, + sysctl_dtrace_ignore_fbt_blacklist, "I", "fbt provider ignore blacklist"); + /* * A struct describing which functions will get invoked for certain * actions. @@ -473,8 +1004,8 @@ static struct cdevsw fbt_cdevsw = 0 /* type */ }; -int gIgnoreFBTBlacklist = 0; -static int gFBTInited = 0; +static int fbt_inited = 0; + #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */ #undef kmem_free /* from its binding to dt_kmem_free glue */ #include @@ -482,7 +1013,7 @@ static int gFBTInited = 0; void fbt_init( void ) { - if (0 == gFBTInited) + if (0 == fbt_inited) { int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw); @@ -491,11 +1022,11 @@ fbt_init( void ) return; } - PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist)); + PE_parse_boot_argn("IgnoreFBTBlacklist", &ignore_fbt_blacklist, sizeof (ignore_fbt_blacklist)); fbt_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH ); - gFBTInited = 1; /* Ensure this initialization occurs just one time. */ + fbt_inited = 1; /* Ensure this initialization occurs just one time. */ } else panic("fbt_init: called twice!\n");