]> git.saurik.com Git - apple/xnu.git/blobdiff - bsd/dev/dtrace/fbt.c
xnu-4570.1.46.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / fbt.c
index e05d5a92275868aeb561cf08dde21d94b4f1c526..25f052f1a4b3dd0993ce86b871cc958c4598e7c0 100644 (file)
 #endif
 #endif
 
-#include <mach-o/loader.h> 
+#include <mach-o/loader.h>
 #include <libkern/kernel_mach_header.h>
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/sysctl.h>
 #include <sys/errno.h>
 #include <sys/stat.h>
 #include <sys/ioctl.h>
 /* #include <machine/trap.h> */
 struct savearea_t; /* Used anonymously */
 
-#if   defined(__x86_64__)
+#if defined(__arm__) || defined(__arm64__)
+typedef kern_return_t (*perfCallback)(int, struct savearea_t *, __unused int, __unused int);
+extern perfCallback tempDTraceTrapHook;
+extern kern_return_t fbt_perfCallback(int, struct savearea_t *, __unused int, __unused int);
+#elif defined(__x86_64__)
 typedef kern_return_t (*perfCallback)(int, struct savearea_t *, uintptr_t *, __unused int);
 extern perfCallback tempDTraceTrapHook;
 extern kern_return_t fbt_perfCallback(int, struct savearea_t *, uintptr_t *, __unused int);
@@ -71,8 +76,407 @@ fbt_probe_t                         **fbt_probetab;
 int                                            fbt_probetab_mask;
 static int                             fbt_verbose = 0;
 
+int ignore_fbt_blacklist = 0;
+
+extern int dtrace_kernel_symbol_mode;
+
+
 void fbt_init( void );
 
+/*
+ * Critical routines that must not be probed. PR_5221096, PR_5379018.
+ * The blacklist must be kept in alphabetic order for purposes of bsearch().
+ */
+static const char * critical_blacklist[] =
+{
+       "Call_DebuggerC",
+       "SysChoked",
+       "_ZN9IOService14newTemperatureElPS_", /* IOService::newTemperature */
+       "_ZN9IOService26temperatureCriticalForZoneEPS_", /* IOService::temperatureCriticalForZone */
+       "_ZNK6OSData14getBytesNoCopyEv", /* Data::getBytesNoCopy, IOHibernateSystemWake path */
+       "_disable_preemption",
+       "_enable_preemption",
+       "bcopy_phys",
+       "console_cpu_alloc",
+       "console_cpu_free",
+       "cpu_IA32e_disable",
+       "cpu_IA32e_enable",
+       "cpu_NMI_interrupt",
+       "cpu_control",
+       "cpu_data_alloc",
+       "cpu_desc_init",
+       "cpu_desc_init64",
+       "cpu_desc_load",
+       "cpu_desc_load64",
+       "cpu_exit_wait",
+       "cpu_info",
+       "cpu_info_count",
+       "cpu_init",
+       "cpu_interrupt",
+       "cpu_machine_init",
+       "cpu_mode_init",
+       "cpu_processor_alloc",
+       "cpu_processor_free",
+       "cpu_signal_handler",
+       "cpu_sleep",
+       "cpu_start",
+       "cpu_subtype",
+       "cpu_thread_alloc",
+       "cpu_thread_halt",
+       "cpu_thread_init",
+       "cpu_threadtype",
+       "cpu_to_processor",
+       "cpu_topology_sort",
+       "cpu_topology_start_cpu",
+       "cpu_type",
+       "cpuid_cpu_display",
+       "cpuid_extfeatures",
+       "dtrace_invop",
+       "enter_lohandler",
+       "fbt_invop",
+       "fbt_perfCallback",
+       "get_threadtask",
+       "handle_pending_TLB_flushes",
+       "hw_compare_and_store",
+       "interrupt",
+       "kernel_trap",
+       "kprintf",
+       "lo_alltraps",
+       "lock_debugger",
+       "machine_idle_cstate",
+       "machine_thread_get_kern_state",
+       "mca_cpu_alloc",
+       "mca_cpu_init",
+       "ml_nofault_copy",
+       "nanoseconds_to_absolutetime",
+       "nanotime_to_absolutetime",
+       "packA",
+       "panic",
+       "pmKextRegister",
+       "pmMarkAllCPUsOff",
+       "pmSafeMode",
+       "pmTimerRestore",
+       "pmTimerSave",
+       "pmUnRegister",
+       "pmap_cpu_alloc",
+       "pmap_cpu_free",
+       "pmap_cpu_high_map_vaddr",
+       "pmap_cpu_high_shared_remap",
+       "pmap_cpu_init",
+       "power_management_init",
+       "preemption_underflow_panic",
+       "register_cpu_setup_func",
+       "sdt_invop",
+       "sprlock",
+       "sprunlock",
+       "t_invop",
+       "tmrCvt",
+       "uread",
+       "uwrite",
+       "unlock_debugger",
+       "unpackA",
+       "unregister_cpu_setup_func",
+       "vstart"
+};
+#define CRITICAL_BLACKLIST_COUNT (sizeof(critical_blacklist)/sizeof(critical_blacklist[0]))
+
+/*
+ * The transitive closure of entry points that can be reached from probe context.
+ * (Apart from routines whose names begin with dtrace_).
+ */
+static const char * probe_ctx_closure[] =
+{
+       "ClearIdlePop",
+       "Debugger",
+       "IS_64BIT_PROCESS",
+       "OSCompareAndSwap",
+       "SetIdlePop",
+       "absolutetime_to_microtime",
+       "act_set_astbsd",
+       "arm_init_idle_cpu",
+       "ast_dtrace_on",
+       "ast_pending",
+       "clean_dcache",
+       "clean_mmu_dcache",
+       "clock_get_calendar_nanotime_nowait",
+       "copyin",
+       "copyin_kern",
+       "copyin_user",
+       "copyinstr",
+       "copyout",
+       "copyoutstr",
+       "cpu_number",
+       "current_proc",
+       "current_processor",
+       "current_task",
+       "current_thread",
+       "debug_enter",
+       "drain_write_buffer",
+       "find_user_regs",
+       "flush_dcache",
+       "flush_tlb64",
+       "get_bsdtask_info",
+       "get_bsdthread_info",
+       "hertz_tick",
+       "hw_atomic_and",
+       "invalidate_mmu_icache",
+       "kauth_cred_get",
+       "kauth_getgid",
+       "kauth_getuid",
+       "kernel_preempt_check",
+       "kvtophys",
+       "mach_absolute_time",
+       "max_valid_stack_address",
+       "memcpy",
+       "memmove",
+       "ml_at_interrupt_context",
+       "ml_phys_write_byte_64",
+       "ml_phys_write_half_64",
+       "ml_phys_write_word_64",
+       "ml_set_interrupts_enabled",
+       "mt_core_snap",
+       "mt_cur_cpu_cycles",
+       "mt_cur_cpu_instrs",
+       "mt_cur_thread_cycles",
+       "mt_cur_thread_instrs",
+       "mt_fixed_counts",
+       "mt_fixed_counts_internal",
+       "mt_mtc_update_count",
+       "mt_update_thread",
+       "ovbcopy",
+       "panic",
+       "pmap64_pde",
+       "pmap64_pdpt",
+       "pmap_find_phys",
+       "pmap_get_mapwindow",
+       "pmap_pde",
+       "pmap_pte",
+       "pmap_put_mapwindow",
+       "pmap_valid_page",
+       "prf",
+       "proc_is64bit",
+       "proc_selfname",
+       "psignal_lock",
+       "rtc_nanotime_load",
+       "rtc_nanotime_read",
+       "sdt_getargdesc",
+       "setPop",
+       "strlcpy",
+       "sync_iss_to_iks_unconditionally",
+       "systrace_stub",
+       "timer_grab"
+};
+#define PROBE_CTX_CLOSURE_COUNT (sizeof(probe_ctx_closure)/sizeof(probe_ctx_closure[0]))
+
+#pragma clang diagnostic push
+#pragma clang diagnostic ignored "-Wcast-qual"
+static int _cmp(const void *a, const void *b)
+{
+    return strncmp((const char *)a, *(const char **)b, strlen((const char *)a) + 1);
+}
+#pragma clang diagnostic pop
+/*
+ * Module validation
+ */
+int
+fbt_module_excluded(struct modctl* ctl)
+{
+       ASSERT(!MOD_FBT_DONE(ctl));
+
+       if (ctl->mod_address == 0 || ctl->mod_size == 0) {
+               return TRUE;
+       }
+       
+       if (ctl->mod_loaded == 0) {
+               return TRUE;
+       }
+
+        /*
+        * If the user sets this, trust they know what they are doing.
+        */
+       if (ignore_fbt_blacklist)
+               return FALSE;
+
+       /*
+        * These drivers control low level functions that when traced
+        * cause problems often in the sleep/wake paths as well as
+        * critical debug and panic paths.
+        * If somebody really wants to drill in on one of these kexts, then
+        * they can override blacklisting using the boot-arg above.
+        */
+
+#ifdef __x86_64__
+       if (strstr(ctl->mod_modname, "AppleACPIEC") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "AppleACPIPlatform") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "AppleRTC") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "IOACPIFamily") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "AppleIntelCPUPowerManagement") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "AppleProfile") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "AppleIntelProfile") != NULL)
+               return TRUE;
+
+       if (strstr(ctl->mod_modname, "AppleEFI") != NULL)
+               return TRUE;
+
+#elif __arm__ || __arm64__
+       if (LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPlatform") ||
+       LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleARMPL192VIC") ||
+       LIT_STRNEQL(ctl->mod_modname, "com.apple.driver.AppleInterruptController"))
+               return TRUE;
+#endif
+
+       return FALSE;
+}
+
+/*
+ * FBT probe name validation
+ */
+int
+fbt_excluded(const char* name)
+{
+       /*
+        * If the user set this, trust they know what they are doing.
+        */
+       if (ignore_fbt_blacklist)
+               return FALSE;
+
+       if (LIT_STRNSTART(name, "dtrace_") && !LIT_STRNSTART(name, "dtrace_safe_")) {
+               /*
+                * Anything beginning with "dtrace_" may be called
+                * from probe context unless it explitly indicates
+                * that it won't be called from probe context by
+                * using the prefix "dtrace_safe_".
+                */
+               return TRUE;
+       }
+
+       /*
+       * Place no probes on critical routines (5221096)
+       */
+       if (bsearch( name, critical_blacklist, CRITICAL_BLACKLIST_COUNT, sizeof(name), _cmp ) != NULL)
+               return TRUE;
+
+       /*
+       * Place no probes that could be hit in probe context.
+       */
+       if (bsearch( name, probe_ctx_closure, PROBE_CTX_CLOSURE_COUNT, sizeof(name), _cmp ) != NULL) {
+               return TRUE;
+       }
+
+       /*
+       * Place no probes that could be hit in probe context.
+       * In the interests of safety, some of these may be overly cautious.
+       * Also exclude very low-level "firmware" class calls.
+       */
+       if (LIT_STRNSTART(name, "cpu_") ||      /* Coarse */
+               LIT_STRNSTART(name, "platform_") ||     /* Coarse */
+               LIT_STRNSTART(name, "machine_") ||      /* Coarse */
+               LIT_STRNSTART(name, "ml_") ||   /* Coarse */
+               LIT_STRNSTART(name, "PE_") ||   /* Coarse */
+               LIT_STRNSTART(name, "rtc_") ||  /* Coarse */
+               LIT_STRNSTART(name, "_rtc_") ||
+               LIT_STRNSTART(name, "rtclock_") ||
+               LIT_STRNSTART(name, "clock_") ||
+               LIT_STRNSTART(name, "bcopy") ||
+               LIT_STRNSTART(name, "pmap_") ||
+               LIT_STRNSTART(name, "hw_") ||   /* Coarse */
+               LIT_STRNSTART(name, "lapic_") ||        /* Coarse */
+               LIT_STRNSTART(name, "OSAdd") ||
+               LIT_STRNSTART(name, "OSBit") ||
+               LIT_STRNSTART(name, "OSDecrement") ||
+               LIT_STRNSTART(name, "OSIncrement") ||
+               LIT_STRNSTART(name, "OSCompareAndSwap") ||
+               LIT_STRNSTART(name, "etimer_") ||
+               LIT_STRNSTART(name, "dtxnu_kern_") ||
+               LIT_STRNSTART(name, "flush_mmu_tlb_"))
+               return TRUE;
+       /*
+        * Fasttrap inner-workings we can't instrument
+        * on Intel (6230149)
+       */
+       if (LIT_STRNSTART(name, "fasttrap_") ||
+               LIT_STRNSTART(name, "fuword") ||
+               LIT_STRNSTART(name, "suword"))
+               return TRUE;
+
+       if (LIT_STRNSTART(name, "_dtrace"))
+               return TRUE; /* Shims in dtrace.c */
+
+       if (LIT_STRNSTART(name, "hibernate_"))
+               return TRUE;
+
+       /*
+        * Place no probes in the exception handling path
+        */
+#if __arm__ || __arm64__
+       if (LIT_STRNSTART(name, "fleh_") ||
+               LIT_STRNSTART(name, "sleh_") ||
+               LIT_STRNSTART(name, "timer_state_event") ||
+               LIT_STRNEQL(name, "get_vfp_enabled"))
+               return TRUE;
+
+       if (LIT_STRNSTART(name, "_ZNK15OSMetaClassBase8metaCastEPK11OSMetaClass") ||
+               LIT_STRNSTART(name, "_ZN15OSMetaClassBase12safeMetaCastEPKS_PK11OSMetaClass") ||
+               LIT_STRNSTART(name, "_ZNK11OSMetaClass13checkMetaCastEPK15OSMetaClassBase"))
+               return TRUE;
+#endif
+
+
+#ifdef __x86_64__
+       if (LIT_STRNSTART(name, "machine_") ||
+               LIT_STRNSTART(name, "mapping_") ||
+               LIT_STRNSTART(name, "tsc_") ||
+               LIT_STRNSTART(name, "pmCPU") ||
+               LIT_STRNSTART(name, "pms") ||
+               LIT_STRNSTART(name, "usimple_") ||
+               LIT_STRNSTART(name, "lck_spin_lock") ||
+               LIT_STRNSTART(name, "lck_spin_unlock") ||
+               LIT_STRNSTART(name, "absolutetime_to_") ||
+               LIT_STRNSTART(name, "commpage_") ||
+               LIT_STRNSTART(name, "ml_") ||
+               LIT_STRNSTART(name, "PE_") ||
+               LIT_STRNSTART(name, "act_machine") ||
+               LIT_STRNSTART(name, "acpi_")  ||
+               LIT_STRNSTART(name, "pal_")) {
+               return TRUE;
+       }
+       // Don't Steal Mac OS X
+       if (LIT_STRNSTART(name, "dsmos_"))
+               return TRUE;
+
+#endif
+
+       /*
+       * Place no probes that could be hit on the way to the debugger.
+       */
+       if (LIT_STRNSTART(name, "kdp_") ||
+               LIT_STRNSTART(name, "kdb_") ||
+               LIT_STRNSTART(name, "debug_")) {
+               return TRUE;
+       }
+
+       /*
+        * Place no probes that could be hit on the way to a panic.
+        */
+       if (NULL != strstr(name, "panic_"))
+               return TRUE;
+
+       return FALSE;
+}
+
+
 /*ARGSUSED*/
 static void
 fbt_destroy(void *arg, dtrace_id_t id, void *parg)
@@ -267,6 +671,13 @@ fbt_resume(void *arg, dtrace_id_t id, void *parg)
            (void)ml_nofault_copy( (vm_offset_t)&fbt->fbtp_patchval, (vm_offset_t)fbt->fbtp_patchpoint, 
                                                                sizeof(fbt->fbtp_patchval));
 
+#if CONFIG_EMBEDDED
+               /*
+                * Make the patched instruction visible via a data + instruction cache flush.
+                */
+               flush_dcache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
+               invalidate_icache((vm_offset_t)fbt->fbtp_patchpoint,(vm_size_t)sizeof(fbt->fbtp_patchval), 0);
+#endif
                
            fbt->fbtp_currentval = fbt->fbtp_patchval;
        }
@@ -374,6 +785,85 @@ err:
 }
 #endif /* __APPLE__ */
 
+static void
+fbt_provide_module_user_syms(struct modctl *ctl)
+{
+       unsigned int i;
+       char *modname = ctl->mod_modname;
+
+       dtrace_module_symbols_t* module_symbols = ctl->mod_user_symbols;
+       if (module_symbols) {
+               for (i=0; i<module_symbols->dtmodsyms_count; i++) {
+
+                       /*
+                        * symbol->dtsym_addr (the symbol address) passed in from
+                        * user space, is already slid for both kexts and kernel.
+                        */
+                       dtrace_symbol_t* symbol = &module_symbols->dtmodsyms_symbols[i];
+
+                       char* name = symbol->dtsym_name;
+
+                       /* Lop off omnipresent leading underscore. */
+                       if (*name == '_')
+                               name += 1;
+
+                       /*
+                        * We're only blacklisting functions in the kernel for now.
+                        */
+                        if (MOD_IS_MACH_KERNEL(ctl) && fbt_excluded(name))
+                               continue;
+
+                       /*
+                        * Ignore symbols with a null address
+                        */
+                       if (!symbol->dtsym_addr)
+                               continue;
+
+                       fbt_provide_probe(ctl, (uintptr_t)symbol->dtsym_addr, (uintptr_t)(symbol->dtsym_addr + symbol->dtsym_size), modname, name, (machine_inst_t*)(uintptr_t)symbol->dtsym_addr);
+               }
+       }
+}
+
+
+void
+fbt_provide_module(void *arg, struct modctl *ctl)
+{
+#pragma unused(arg)
+       ASSERT(ctl != NULL);
+       ASSERT(dtrace_kernel_symbol_mode != DTRACE_KERNEL_SYMBOLS_NEVER);
+       LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED);
+
+       // Update the "ignore blacklist" bit
+       if (ignore_fbt_blacklist)
+               ctl->mod_flags |= MODCTL_FBT_PROVIDE_BLACKLISTED_PROBES;
+
+       if (MOD_FBT_DONE(ctl))
+               return;
+
+       if (fbt_module_excluded(ctl)) {
+               ctl->mod_flags |= MODCTL_FBT_INVALID;
+               return;
+       }
+
+       if (MOD_HAS_KERNEL_SYMBOLS(ctl)) {
+               fbt_provide_module_kernel_syms(ctl);
+               ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
+               if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl))
+                       ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED;
+               return;
+       }
+
+       if (MOD_HAS_USERSPACE_SYMBOLS(ctl)) {
+               fbt_provide_module_user_syms(ctl);
+               ctl->mod_flags |= MODCTL_FBT_PROBES_PROVIDED;
+               if (MOD_FBT_PROVIDE_PRIVATE_PROBES(ctl))
+                       ctl->mod_flags |= MODCTL_FBT_PRIVATE_PROBES_PROVIDED;
+               if (MOD_FBT_PROVIDE_BLACKLISTED_PROBES(ctl))
+                       ctl->mod_flags |= MODCTL_FBT_BLACKLISTED_PROBES_PROVIDED;
+               return;
+       }
+}
+
 static dtrace_pattr_t fbt_attr = {
 { DTRACE_STABILITY_EVOLVING, DTRACE_STABILITY_EVOLVING, DTRACE_CLASS_ISA },
 { DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
@@ -451,6 +941,47 @@ _fbt_open(dev_t dev, int flags, int devtype, struct proc *p)
 
 #define FBT_MAJOR  -24 /* let the kernel pick the device number */
 
+SYSCTL_DECL(_kern_dtrace);
+
+static int
+sysctl_dtrace_ignore_fbt_blacklist SYSCTL_HANDLER_ARGS
+{
+#pragma unused(oidp, arg2)
+       int err;
+       int value = *(int*)arg1;
+
+       err = sysctl_io_number(req, value, sizeof(value), &value, NULL);
+       if (err)
+               return (err);
+       if (req->newptr) {
+               if (!(value == 0 || value == 1))
+                       return (ERANGE);
+
+               /*
+                * We do not allow setting the blacklist back to on, as we have no way
+                * of knowing if those unsafe probes are still used.
+                *
+                * If we are using kernel symbols, we also do not allow any change,
+                * since the symbols are jettison'd after the first pass.
+                *
+                * We do not need to take any locks here because those symbol modes
+                * are permanent and do not change after boot.
+                */
+               if (value != 1 || dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
+                 dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL)
+                       return (EPERM);
+
+               ignore_fbt_blacklist = 1;
+       }
+
+       return (0);
+}
+
+SYSCTL_PROC(_kern_dtrace, OID_AUTO, ignore_fbt_blacklist,
+       CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
+       &ignore_fbt_blacklist, 0,
+       sysctl_dtrace_ignore_fbt_blacklist, "I", "fbt provider ignore blacklist");
+
 /*
  * A struct describing which functions will get invoked for certain
  * actions.
@@ -473,8 +1004,8 @@ static struct cdevsw fbt_cdevsw =
        0                                       /* type */
 };
 
-int gIgnoreFBTBlacklist = 0;
-static int gFBTInited = 0;
+static int fbt_inited = 0;
+
 #undef kmem_alloc /* from its binding to dt_kmem_alloc glue */
 #undef kmem_free /* from its binding to dt_kmem_free glue */
 #include <vm/vm_kern.h>
@@ -482,7 +1013,7 @@ static int gFBTInited = 0;
 void
 fbt_init( void )
 {
-       if (0 == gFBTInited)
+       if (0 == fbt_inited)
        {
                int majdevno = cdevsw_add(FBT_MAJOR, &fbt_cdevsw);
                
@@ -491,11 +1022,11 @@ fbt_init( void )
                        return;
                }
                
-               PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
+               PE_parse_boot_argn("IgnoreFBTBlacklist", &ignore_fbt_blacklist, sizeof (ignore_fbt_blacklist));
 
                fbt_attach( (dev_info_t *)(uintptr_t)majdevno, DDI_ATTACH );
                
-               gFBTInited = 1; /* Ensure this initialization occurs just one time. */
+               fbt_inited = 1; /* Ensure this initialization occurs just one time. */
        }
        else
                panic("fbt_init: called twice!\n");