X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/c910b4d9d2451126ae3917b931cd4390c11e1d52..a991bd8d3e7fe02dbca0644054bab73c5b75324a:/bsd/dev/dtrace/dtrace.c diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index bdbe6a874..36d4f8223 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -20,11 +20,14 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. - * Use is subject to license terms. + * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved. + * Portions Copyright (c) 2013 by Delphix. All rights reserved. */ -/* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */ +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. + * Use is subject to license terms. + */ /* * DTrace - Dynamic Tracing for Solaris @@ -56,6 +59,7 @@ * - Enabling functions * - DOF functions * - Anonymous enabling functions + * - Process functions * - Consumer state functions * - Helper functions * - Hook functions @@ -65,16 +69,15 @@ * [Group] Functions", allowing one to find each block by searching forward * on capital-f functions. */ - -#define _DTRACE_WANT_PROC_GLUE_ 1 - #include #include #include #include +#include #include #include #include +#include #include #include #include @@ -87,31 +90,61 @@ #include #include #include +#include #include +#include +#include +#include +#include +#include +#include + +#if MONOTONIC +#include +#include +#endif /* MONOTONIC */ + +#include "dtrace_xoroshiro128_plus.h" + +#include + +#include + +extern addr64_t kvtophys(vm_offset_t va); + +extern uint32_t pmap_find_phys(void *, uint64_t); +extern boolean_t pmap_valid_page(uint32_t); +extern void OSKextRegisterKextsWithDTrace(void); +extern kmod_info_t g_kernel_kmod_info; +extern void commpage_update_dof(boolean_t enabled); + +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */ extern void dtrace_suspend(void); extern void dtrace_resume(void); +extern void dtrace_early_init(void); +extern int dtrace_keep_kernel_symbols(void); extern void dtrace_init(void); extern void helper_init(void); +extern void fasttrap_init(void); -#if defined(__APPLE__) - -#include "../../../osfmk/chud/chud_dtrace.h" +static int dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); +extern void dtrace_lazy_dofs_destroy(proc_t *); +extern void dtrace_postinit(void); -extern kern_return_t chudxnu_dtrace_callback - (uint64_t selector, uint64_t *args, uint32_t count); -#endif +extern void dtrace_proc_fork(proc_t*, proc_t*, int); +extern void dtrace_proc_exec(proc_t*); +extern void dtrace_proc_exit(proc_t*); /* * DTrace Tunable Variables * - * The following variables may be tuned by adding a line to /etc/system that - * includes both the name of the DTrace module ("dtrace") and the name of the - * variable. For example: - * - * set dtrace:dtrace_destructive_disallow = 1 + * The following variables may be dynamically tuned by using sysctl(8), the + * variables being stored in the kern.dtrace namespace. For example: + * sysctl kern.dtrace.dof_maxsize = 1048575 # 1M * * In general, the only variables that one should be tuning this way are those * that affect system-wide DTrace behavior, and for which the default behavior @@ -120,26 +153,26 @@ extern kern_return_t chudxnu_dtrace_callback * When tuning these variables, avoid pathological values; while some attempt * is made to verify the integrity of these variables, they are not considered * part of the supported interface to DTrace, and they are therefore not - * checked comprehensively. Further, these variables should not be tuned - * dynamically via "mdb -kw" or other means; they should only be tuned via - * /etc/system. + * checked comprehensively. */ -int dtrace_destructive_disallow = 0; -#if defined(__APPLE__) -#define proc_t struct proc -#endif /* __APPLE__ */ +uint64_t dtrace_buffer_memory_maxsize = 0; /* initialized in dtrace_init */ +uint64_t dtrace_buffer_memory_inuse = 0; +int dtrace_destructive_disallow = 1; dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); -dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); -size_t dtrace_global_maxsize = (16 * 1024); +dtrace_optval_t dtrace_dof_maxsize = (512 * 1024); +dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024); +dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; dtrace_optval_t dtrace_helper_actions_max = 32; -dtrace_optval_t dtrace_helper_providers_max = 32; +dtrace_optval_t dtrace_helper_providers_max = 64; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; -dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ -dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ +dtrace_optval_t dtrace_strsize_min = 8; +dtrace_optval_t dtrace_strsize_max = 65536; +dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */ +dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */ dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ @@ -151,6 +184,10 @@ dtrace_optval_t dtrace_stackframes_default = 20; dtrace_optval_t dtrace_ustackframes_default = 20; dtrace_optval_t dtrace_jstackframes_default = 50; dtrace_optval_t dtrace_jstackstrsize_default = 512; +dtrace_optval_t dtrace_buflimit_default = 75; +dtrace_optval_t dtrace_buflimit_min = 1; +dtrace_optval_t dtrace_buflimit_max = 99; +size_t dtrace_nprobes_default = 4; int dtrace_msgdsize_max = 128; hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ @@ -170,21 +207,20 @@ hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; * it is used by some translators as an implementation detail. */ const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ - +unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */ /* * DTrace Internal Variables */ static dev_info_t *dtrace_devi; /* device info */ static vmem_t *dtrace_arena; /* probe ID arena */ -static vmem_t *dtrace_minor; /* minor number arena */ -static taskq_t *dtrace_taskq; /* task queue */ static dtrace_probe_t **dtrace_probes; /* array of all probes */ static int dtrace_nprobes; /* number of probes */ static dtrace_provider_t *dtrace_provider; /* provider list */ static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ static int dtrace_opens; /* number of opens */ static int dtrace_helpers; /* number of helpers */ -static void *dtrace_softstate; /* softstate pointer */ +static dtrace_hash_t *dtrace_strings; +static dtrace_hash_t *dtrace_byprov; /* probes hashed by provider */ static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ @@ -199,23 +235,30 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ +static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ -#if defined(__APPLE__) -static int dtrace_dof_mode; /* dof mode */ -#endif -#if defined(__APPLE__) +static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */ + + /* + * This does't quite fit as an internal variable, as it must be accessed in + * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either... + */ +int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */ +static uint32_t dtrace_wake_clients; +static uint8_t dtrace_kerneluuid[16]; /* the 128-bit uuid */ /* * To save memory, some common memory allocations are given a - * unique zone. In example, dtrace_probe_t is 72 bytes in size, + * unique zone. For example, dtrace_probe_t is 72 bytes in size, * which means it would fall into the kalloc.128 bucket. With * 20k elements allocated, the space saved is substantial. */ -struct zone *dtrace_probe_t_zone; +static ZONE_DECLARE(dtrace_probe_t_zone, "dtrace.dtrace_probe_t", + sizeof(dtrace_probe_t), ZC_NONE); -#endif +static int dtrace_module_unloaded(struct kmod_info *kmod); /* * DTrace Locking @@ -248,12 +291,14 @@ struct zone *dtrace_probe_t_zone; * acquired _between_ dtrace_provider_lock and dtrace_lock. */ + /* * APPLE NOTE: * - * All kmutex_t vars have been changed to lck_mtx_t. - * Note that lck_mtx_t's require explicit initialization. + * For porting purposes, all kmutex_t vars have been changed + * to lck_mtx_t, which require explicit initialization. * + * kmutex_t becomes lck_mtx_t * mutex_enter() becomes lck_mtx_lock() * mutex_exit() becomes lck_mtx_unlock() * @@ -261,16 +306,13 @@ struct zone *dtrace_probe_t_zone; * * ASSERT(MUTEX_HELD(&cpu_lock)); * becomes: - * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + * LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); * - * Due to the number of these changes, they are not called out explicitly. */ static lck_mtx_t dtrace_lock; /* probe state lock */ static lck_mtx_t dtrace_provider_lock; /* provider state lock */ static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */ -#if defined(__APPLE__) static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */ -#endif /* * DTrace Provider Variables @@ -287,20 +329,60 @@ static dtrace_pattr_t dtrace_provider_attr = { }; static void -dtrace_nullop(void) -{} - -static dtrace_pops_t dtrace_provider_ops = { - (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, - (void (*)(void *, struct modctl *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - NULL, - NULL, - NULL, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop +dtrace_provide_nullop(void *arg, const dtrace_probedesc_t *desc) +{ +#pragma unused(arg, desc) +} + +static void +dtrace_provide_module_nullop(void *arg, struct modctl *ctl) +{ +#pragma unused(arg, ctl) +} + +static int +dtrace_enable_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) + return (0); +} + +static void +dtrace_disable_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + +static void +dtrace_suspend_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + +static void +dtrace_resume_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + +static void +dtrace_destroy_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + + +static dtrace_pops_t dtrace_provider_ops = { + .dtps_provide = dtrace_provide_nullop, + .dtps_provide_module = dtrace_provide_module_nullop, + .dtps_enable = dtrace_enable_nullop, + .dtps_disable = dtrace_disable_nullop, + .dtps_suspend = dtrace_suspend_nullop, + .dtps_resume = dtrace_resume_nullop, + .dtps_getargdesc = NULL, + .dtps_getargval = NULL, + .dtps_usermode = NULL, + .dtps_destroy = dtrace_destroy_nullop, }; static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ @@ -313,14 +395,24 @@ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ uint32_t dtrace_helptrace_next = 0; uint32_t dtrace_helptrace_nlocals; char *dtrace_helptrace_buffer; -int dtrace_helptrace_bufsize = 512 * 1024; +size_t dtrace_helptrace_bufsize = 512 * 1024; -#ifdef DEBUG +#if DEBUG int dtrace_helptrace_enabled = 1; #else int dtrace_helptrace_enabled = 0; #endif +#if defined (__arm64__) +/* + * The ioctl for adding helper DOF is based on the + * size of a user_addr_t. We need to recognize both + * U32 and U64 as the same action. + */ +#define DTRACEHIOC_ADDDOF_U32 _IOW('h', 4, user32_addr_t) +#define DTRACEHIOC_ADDDOF_U64 _IOW('h', 4, user64_addr_t) +#endif /* __arm64__ */ + /* * DTrace Error Hashing * @@ -330,7 +422,7 @@ int dtrace_helptrace_enabled = 0; * debugging problems in the DIF code generator or in DOF generation . The * error hash may be examined with the ::dtrace_errhash MDB dcmd. */ -#ifdef DEBUG +#if DEBUG static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; static const char *dtrace_errlast; static kthread_t *dtrace_errthread; @@ -345,24 +437,30 @@ static lck_mtx_t dtrace_errlock; * outside of the implementation. There is no real structure to this cpp * mishmash -- but is there ever? */ -#define DTRACE_HASHSTR(hash, probe) \ - dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) -#define DTRACE_HASHNEXT(hash, probe) \ - (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) +#define DTRACE_GETSTR(hash, elm) \ + (hash->dth_getstr(elm, hash->dth_stroffs)) + +#define DTRACE_HASHSTR(hash, elm) \ + dtrace_hash_str(DTRACE_GETSTR(hash, elm)) -#define DTRACE_HASHPREV(hash, probe) \ - (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) +#define DTRACE_HASHNEXT(hash, elm) \ + (void**)((uintptr_t)(elm) + (hash)->dth_nextoffs) + +#define DTRACE_HASHPREV(hash, elm) \ + (void**)((uintptr_t)(elm) + (hash)->dth_prevoffs) #define DTRACE_HASHEQ(hash, lhs, rhs) \ - (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ - *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) + (strcmp(DTRACE_GETSTR(hash, lhs), \ + DTRACE_GETSTR(hash, rhs)) == 0) #define DTRACE_AGGHASHSIZE_SLEW 17 +#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) + /* * The key for a thread-local variable consists of the lower 61 bits of the - * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. + * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL. * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never * equal to a variable identifier. This is necessary (but not sufficient) to * assure that global associative arrays never collide with thread-local @@ -375,79 +473,49 @@ static lck_mtx_t dtrace_errlock; * no way for a global variable key signature to match a thread-local key * signature. */ -#if !defined(__APPLE__) +#if defined (__x86_64__) +/* FIXME: two function calls!! */ #define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = 0; \ - uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ - for (; actv; actv >>= 1) \ - intr++; \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ ASSERT(intr < (1 << 3)); \ - (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ + (where) = ((thr + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } -#else +#elif defined(__arm__) +/* FIXME: three function calls!!! */ #define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \ - uint_t thr = (uint_t)current_thread(); \ - uint_t pid = (uint_t)proc_selfpid(); \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + uint_t pid = (uint_t)dtrace_proc_selfpid(); \ ASSERT(intr < (1 << 3)); \ - (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \ + (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } -#endif /* __APPLE__ */ +#elif defined (__arm64__) +/* FIXME: two function calls!! */ +#define DTRACE_TLS_THRKEY(where) { \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + ASSERT(intr < (1 << 3)); \ + (where) = ((thr + DIF_VARIABLE_MAX) & \ + (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ +} +#else +#error Unknown architecture +#endif + +#define DT_BSWAP_8(x) ((x) & 0xff) +#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) +#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) +#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) + +#define DT_MASK_LO 0x00000000FFFFFFFFULL #define DTRACE_STORE(type, tomax, offset, what) \ *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); -#if !defined(__APPLE__) -#if !(defined(__i386__) || defined (__x86_64__)) -#define DTRACE_ALIGNCHECK(addr, size, flags) \ - if (addr & (size - 1)) { \ - *flags |= CPU_DTRACE_BADALIGN; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } -#else -#define DTRACE_ALIGNCHECK(addr, size, flags) -#endif -#define DTRACE_LOADFUNC(bits) \ -/*CSTYLED*/ \ -uint##bits##_t \ -dtrace_load##bits(uintptr_t addr) \ -{ \ - size_t size = bits / NBBY; \ - /*CSTYLED*/ \ - uint##bits##_t rval; \ - int i; \ - volatile uint16_t *flags = (volatile uint16_t *) \ - &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ - \ - DTRACE_ALIGNCHECK(addr, size, flags); \ - \ - for (i = 0; i < dtrace_toxranges; i++) { \ - if (addr >= dtrace_toxrange[i].dtt_limit) \ - continue; \ - \ - if (addr + size <= dtrace_toxrange[i].dtt_base) \ - continue; \ - \ - /* \ - * This address falls within a toxic region; return 0. \ - */ \ - *flags |= CPU_DTRACE_BADADDR; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } \ - \ - *flags |= CPU_DTRACE_NOFAULT; \ - /*CSTYLED*/ \ - rval = *((volatile uint##bits##_t *)addr); \ - *flags &= ~CPU_DTRACE_NOFAULT; \ - \ - return (rval); \ -} -#else #define DTRACE_ALIGNCHECK(addr, size, flags) \ if (addr & (MIN(size,4) - 1)) { \ *flags |= CPU_DTRACE_BADALIGN; \ @@ -455,11 +523,41 @@ dtrace_load##bits(uintptr_t addr) \ return (0); \ } -#define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" ); +#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ +do { \ + if ((remp) != NULL) { \ + *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ + } \ +} while (0) + + +/* + * Test whether a range of memory starting at testaddr of size testsz falls + * within the range of memory described by addr, sz. We take care to avoid + * problems with overflow and underflow of the unsigned quantities, and + * disallow all negative sizes. Ranges of size 0 are allowed. + */ +#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ + ((testaddr) - (baseaddr) < (basesz) && \ + (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ + (testaddr) + (testsz) >= (testaddr)) + +/* + * Test whether alloc_sz bytes will fit in the scratch region. We isolate + * alloc_sz on the righthand side of the comparison in order to avoid overflow + * or underflow in the comparison with it. This is simpler than the INRANGE + * check above, because we know that the dtms_scratch_ptr is valid in the + * range. Allocations of size zero are allowed. + */ +#define DTRACE_INSCRATCH(mstate, alloc_sz) \ + ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ + (mstate)->dtms_scratch_ptr >= (alloc_sz)) +#define RECOVER_LABEL(bits) dtraceLoadRecover##bits: + +#if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__)) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ -extern vm_offset_t dtraceLoadRecover##bits; \ uint##bits##_t dtrace_load##bits(uintptr_t addr); \ \ uint##bits##_t \ @@ -469,7 +567,6 @@ dtrace_load##bits(uintptr_t addr) \ /*CSTYLED*/ \ uint##bits##_t rval = 0; \ int i; \ - ppnum_t pp; \ volatile uint16_t *flags = (volatile uint16_t *) \ &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ \ @@ -490,21 +587,23 @@ dtrace_load##bits(uintptr_t addr) \ return (0); \ } \ \ - pp = pmap_find_phys(kernel_pmap, addr); \ - \ - if (0 == pp || /* pmap_find_phys failed ? */ \ - !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \ + { \ + volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \ + *flags |= CPU_DTRACE_NOFAULT; \ + recover = dtrace_sign_and_set_thread_recover(current_thread(), recover); \ + /*CSTYLED*/ \ + /* \ + * PR6394061 - avoid device memory that is unpredictably \ + * mapped and unmapped \ + */ \ + if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \ + rval = *((volatile uint##bits##_t *)addr); \ + else { \ *flags |= CPU_DTRACE_BADADDR; \ cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ return (0); \ } \ \ - { \ - volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \ - *flags |= CPU_DTRACE_NOFAULT; \ - recover = dtrace_set_thread_recover(current_thread(), recover); \ - /*CSTYLED*/ \ - rval = *((volatile uint##bits##_t *)addr); \ RECOVER_LABEL(bits); \ (void)dtrace_set_thread_recover(current_thread(), recover); \ *flags &= ~CPU_DTRACE_NOFAULT; \ @@ -512,8 +611,9 @@ dtrace_load##bits(uintptr_t addr) \ \ return (rval); \ } -#endif /* __APPLE__ */ - +#else /* all other architectures */ +#error Unknown Architecture +#endif #ifdef __LP64__ #define dtrace_loadptr dtrace_load64 @@ -525,6 +625,7 @@ dtrace_load##bits(uintptr_t addr) \ #define DTRACE_DYNHASH_SINK 1 #define DTRACE_DYNHASH_VALID 2 +#define DTRACE_MATCH_FAIL -1 #define DTRACE_MATCH_NEXT 0 #define DTRACE_MATCH_DONE 1 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') @@ -539,15 +640,19 @@ dtrace_load##bits(uintptr_t addr) \ ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ + ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ DTRACEFLT_UNKNOWN) #define DTRACEACT_ISSTRING(act) \ ((act)->dta_kind == DTRACEACT_DIFEXPR && \ (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) + +static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); -static int dtrace_enabling_match(dtrace_enabling_t *, int *); +static int dtrace_enabling_match(dtrace_enabling_t *, int *, dtrace_match_cond_t *cond); +static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond); static void dtrace_enabling_matchall(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, @@ -558,8 +663,223 @@ static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); -static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); +static int dtrace_ecb_create_enable(dtrace_probe_t *, void *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); +static int dtrace_canload_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); +static int dtrace_canstore_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); + + +/* + * DTrace sysctl handlers + * + * These declarations and functions are used for a deeper DTrace configuration. + * Most of them are not per-consumer basis and may impact the other DTrace + * consumers. Correctness may not be supported for all the variables, so you + * should be careful about what values you are using. + */ + +SYSCTL_DECL(_kern_dtrace); +SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "dtrace"); + +static int +sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int changed, error; + int value = *(int *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value != 0 && value != 1) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_err_verbose = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.err_verbose + * + * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld). + * Errors are reported when a DIFO or a DOF has been rejected by the kernel. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, err_verbose, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_err_verbose, 0, + sysctl_dtrace_err_verbose, "I", "dtrace error verbose"); + +static int +sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + uint64_t value = *(uint64_t *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= dtrace_buffer_memory_inuse) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_buffer_memory_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.buffer_memory_maxsize + * + * Set DTrace maximal size in bytes used by all the consumers' state buffers. By default + * the limit is PHYS_MEM / 3 for *all* consumers. Attempting to set a null, a negative value + * or a value <= to dtrace_buffer_memory_inuse will result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, buffer_memory_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_buffer_memory_maxsize, 0, + sysctl_dtrace_buffer_memory_maxsize, "Q", "dtrace state buffer memory maxsize"); + +/* + * kern.dtrace.buffer_memory_inuse + * + * Current state buffer memory used, in bytes, by all the DTrace consumers. + * This value is read-only. + */ +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, buffer_memory_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, + &dtrace_buffer_memory_inuse, "dtrace state buffer memory in-use"); + +static int +sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + size_t value = *(size_t*) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_difo_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.difo_maxsize + * + * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize + * to get the default value. Attempting to set a null or negative size will + * result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, difo_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_difo_maxsize, 0, + sysctl_dtrace_difo_maxsize, "Q", "dtrace difo maxsize"); + +static int +sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + dtrace_optval_t value = *(dtrace_optval_t *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + if (value >= dtrace_copy_maxsize()) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_dof_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.dof_maxsize + * + * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to + * get the default value. Attempting to set a null or negative size will result + * in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_dof_maxsize, 0, + sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize"); + +static int +sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + dtrace_optval_t value = *(dtrace_optval_t*) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + if (value > dtrace_statvar_maxsize_max) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_statvar_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.global_maxsize + * + * Set the variable max size in bytes, check the definition of + * dtrace_statvar_maxsize to get the default value. Attempting to set a null, + * too high or negative size will result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_statvar_maxsize, 0, + sysctl_dtrace_statvar_maxsize, "Q", "dtrace statvar maxsize"); + + +/* + * kern.dtrace.provide_private_probes + * + * Set whether the providers must provide the private probes. This is + * kept as compatibility as they are always provided. + */ +SYSCTL_INT(_kern_dtrace, OID_AUTO, provide_private_probes, + CTLFLAG_RD | CTLFLAG_LOCKED, + (int *)NULL, 1, "provider must provide the private probes"); + +/* + * kern.dtrace.dof_mode + * + * Returns the current DOF mode. + * This value is read-only. + */ +SYSCTL_INT(_kern_dtrace, OID_AUTO, dof_mode, CTLFLAG_RD | CTLFLAG_LOCKED, + &dtrace_dof_mode, 0, "dtrace dof mode"); /* * DTrace Probe Context Functions @@ -578,20 +898,11 @@ static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); * for these functions, there will be a comment above the function reading * "Note: not called from probe context." */ -void -dtrace_panic(const char *format, ...) -{ - va_list alist; - - va_start(alist, format); - dtrace_vpanic(format, alist); - va_end(alist); -} int dtrace_assfail(const char *a, const char *f, int l) { - dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); + panic("dtrace: assertion failed: %s, file: %s, line: %d", a, f, l); /* * We just need something here that even the most clever compiler @@ -664,20 +975,43 @@ dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) } static int -dtrace_canstore_statvar(uint64_t addr, size_t sz, +dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, dtrace_statvar_t **svars, int nsvars) { int i; + size_t maxglobalsize, maxlocalsize; + + maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); + maxlocalsize = (maxglobalsize) * NCPU; + + if (nsvars == 0) + return (0); + for (i = 0; i < nsvars; i++) { dtrace_statvar_t *svar = svars[i]; + uint8_t scope; + size_t size; - if (svar == NULL || svar->dtsv_size == 0) + if (svar == NULL || (size = svar->dtsv_size) == 0) continue; - if (addr - svar->dtsv_data < svar->dtsv_size && - addr + sz <= svar->dtsv_data + svar->dtsv_size) + scope = svar->dtsv_var.dtdv_scope; + + /** + * We verify that our size is valid in the spirit of providing + * defense in depth: we want to prevent attackers from using + * DTrace to escalate an orthogonal kernel heap corruption bug + * into the ability to store to arbitrary locations in memory. + */ + VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || + (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); + + if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) { + DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, + svar->dtsv_size); return (1); + } } return (0); @@ -693,48 +1027,314 @@ static int dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { - uintptr_t a; - size_t s; - - /* - * First, check to see if the address is in scratch space... - */ - a = mstate->dtms_scratch_base; - s = mstate->dtms_scratch_size; - - if (addr - a < s && addr + sz <= a + s) + return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); +} +/* + * Implementation of dtrace_canstore which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + /* + * First, check to see if the address is in scratch space... + */ + if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, + mstate->dtms_scratch_size)) { + DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, + mstate->dtms_scratch_size); return (1); - + } /* * Now check to see if it's a dynamic variable. This check will pick * up both thread-local variables and any global dynamically-allocated * variables. */ - a = (uintptr_t)vstate->dtvs_dynvars.dtds_base; - s = vstate->dtvs_dynvars.dtds_size; - if (addr - a < s && addr + sz <= a + s) + if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, + vstate->dtvs_dynvars.dtds_size)) { + dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; + uintptr_t base = (uintptr_t)dstate->dtds_base + + (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); + uintptr_t chunkoffs; + dtrace_dynvar_t *dvar; + + /* + * Before we assume that we can store here, we need to make + * sure that it isn't in our metadata -- storing to our + * dynamic variable metadata would corrupt our state. For + * the range to not include any dynamic variable metadata, + * it must: + * + * (1) Start above the hash table that is at the base of + * the dynamic variable space + * + * (2) Have a starting chunk offset that is beyond the + * dtrace_dynvar_t that is at the base of every chunk + * + * (3) Not span a chunk boundary + * + * (4) Not be in the tuple space of a dynamic variable + * + */ + if (addr < base) + return (0); + + chunkoffs = (addr - base) % dstate->dtds_chunksize; + + if (chunkoffs < sizeof (dtrace_dynvar_t)) + return (0); + + if (chunkoffs + sz > dstate->dtds_chunksize) + return (0); + + dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); + + if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) + return (0); + + if (chunkoffs < sizeof (dtrace_dynvar_t) + + ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) + return (0); + return (1); + } /* * Finally, check the static local and global variables. These checks * take the longest, so we perform them last. */ - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_locals, vstate->dtvs_nlocals)) return (1); - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_globals, vstate->dtvs_nglobals)) return (1); return (0); } + +/* + * Convenience routine to check to see if the address is within a memory + * region in which a load may be issued given the user's privilege level; + * if not, it sets the appropriate error flags and loads 'addr' into the + * illegal value slot. + * + * DTrace subroutines (DIF_SUBR_*) should use this helper to implement + * appropriate memory access protection. + */ +int +dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, + dtrace_vstate_t *vstate) +{ + return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); +} + +/* + * Implementation of dtrace_canload which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); + return (1); + } + + /* + * You can obviously read that which you can store. + */ + if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) + return (1); + + /* + * We're allowed to read from our own string table. + */ + if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen)) { + DTRACE_RANGE_REMAIN(remain, addr, + mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen); + return (1); + } + + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + *illval = addr; + return (0); +} + +/* + * Convenience routine to check to see if a given string is within a memory + * region in which a load may be issued given the user's privilege level; + * this exists so that we don't need to issue unnecessary dtrace_strlen() + * calls in the event that the user has all privileges. + */ +static int +dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + size_t rsize; + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); + return (1); + } + + /* + * Even if the caller is uninterested in querying the remaining valid + * range, it is required to ensure that the access is allowed. + */ + if (remain == NULL) { + remain = &rsize; + } + if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { + size_t strsz; + /* + * Perform the strlen after determining the length of the + * memory region which is accessible. This prevents timing + * information from being used to find NULs in memory which is + * not accessible to the caller. + */ + strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, + MIN(sz, *remain)); + if (strsz <= *remain) { + return (1); + } + } + + return (0); +} + +/* + * Convenience routine to check to see if a given variable is within a memory + * region in which a load may be issued given the user's privilege level. + */ +static int +dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + size_t sz; + ASSERT(type->dtdt_flags & DIF_TF_BYREF); + + /* + * Calculate the max size before performing any checks since even + * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function + * return the max length via 'remain'. + */ + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_state_t *state = vstate->dtvs_state; + + if (state != NULL) { + sz = state->dts_options[DTRACEOPT_STRSIZE]; + } else { + /* + * In helper context, we have a NULL state; fall back + * to using the system-wide default for the string size + * in this case. + */ + sz = dtrace_strsize_default; + } + } else { + sz = type->dtdt_size; + } + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); + return (1); + } + + if (type->dtdt_kind == DIF_TYPE_STRING) { + return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, + vstate)); + } + return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, + vstate)); +} + +#define isdigit(ch) ((ch) >= '0' && (ch) <= '9') +#define islower(ch) ((ch) >= 'a' && (ch) <= 'z') +#define isspace(ch) (((ch) == ' ') || ((ch) == '\r') || ((ch) == '\n') || \ + ((ch) == '\t') || ((ch) == '\f')) +#define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \ + ((ch) >= 'A' && (ch) <= 'F')) +#define lisalnum(x) \ + (isdigit(x) || ((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z')) + +#define DIGIT(x) \ + (isdigit(x) ? (x) - '0' : islower(x) ? (x) + 10 - 'a' : (x) + 10 - 'A') + +/* + * Convert a string to a signed integer using safe loads. + */ +static int64_t +dtrace_strtoll(char *input, int base, size_t limit) +{ + uintptr_t pos = (uintptr_t)input; + int64_t val = 0; + int x; + boolean_t neg = B_FALSE; + char c, cc, ccc; + uintptr_t end = pos + limit; + + /* + * Consume any whitespace preceding digits. + */ + while ((c = dtrace_load8(pos)) == ' ' || c == '\t') + pos++; + + /* + * Handle an explicit sign if one is present. + */ + if (c == '-' || c == '+') { + if (c == '-') + neg = B_TRUE; + c = dtrace_load8(++pos); + } + + /* + * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it + * if present. + */ + if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || + cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { + pos += 2; + c = ccc; + } + + /* + * Read in contiguous digits until the first non-digit character. + */ + for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; + c = dtrace_load8(++pos)) + val = val * base + x; + + return (neg ? -val : val); +} + + /* * Compare two strings using safe loads. */ static int -dtrace_strncmp(char *s1, char *s2, size_t limit) +dtrace_strncmp(const char *s1, const char *s2, size_t limit) { uint8_t c1, c2; volatile uint16_t *flags; @@ -745,15 +1345,17 @@ dtrace_strncmp(char *s1, char *s2, size_t limit) flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; do { - if (s1 == NULL) + if (s1 == NULL) { c1 = '\0'; - else + } else { c1 = dtrace_load8((uintptr_t)s1++); + } - if (s2 == NULL) + if (s2 == NULL) { c2 = '\0'; - else + } else { c2 = dtrace_load8((uintptr_t)s2++); + } if (c1 != c2) return (c1 - c2); @@ -771,9 +1373,10 @@ dtrace_strlen(const char *s, size_t lim) { uint_t len; - for (len = 0; len != lim; len++) + for (len = 0; len != lim; len++) { if (dtrace_load8((uintptr_t)s++) == '\0') break; + } return (len); } @@ -862,14 +1465,15 @@ dtrace_strcpy(const void *src, void *dst, size_t len) * specified type; we assume that we can store to directly. */ static void -dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) +dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) { ASSERT(type->dtdt_flags & DIF_TF_BYREF); - if (type->dtdt_kind == DIF_TYPE_STRING) - dtrace_strcpy(src, dst, type->dtdt_size); - else - dtrace_bcopy(src, dst, type->dtdt_size); + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); + } else { + dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); + } } /* @@ -915,6 +1519,93 @@ dtrace_bzero(void *dst, size_t len) *cp++ = 0; } +static void +dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) +{ + uint64_t result[2]; + + result[0] = addend1[0] + addend2[0]; + result[1] = addend1[1] + addend2[1] + + (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); + + sum[0] = result[0]; + sum[1] = result[1]; +} + +/* + * Shift the 128-bit value in a by b. If b is positive, shift left. + * If b is negative, shift right. + */ +static void +dtrace_shift_128(uint64_t *a, int b) +{ + uint64_t mask; + + if (b == 0) + return; + + if (b < 0) { + b = -b; + if (b >= 64) { + a[0] = a[1] >> (b - 64); + a[1] = 0; + } else { + a[0] >>= b; + mask = 1LL << (64 - b); + mask -= 1; + a[0] |= ((a[1] & mask) << (64 - b)); + a[1] >>= b; + } + } else { + if (b >= 64) { + a[1] = a[0] << (b - 64); + a[0] = 0; + } else { + a[1] <<= b; + mask = a[0] >> (64 - b); + a[1] |= mask; + a[0] <<= b; + } + } +} + +/* + * The basic idea is to break the 2 64-bit values into 4 32-bit values, + * use native multiplication on those, and then re-combine into the + * resulting 128-bit value. + * + * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = + * hi1 * hi2 << 64 + + * hi1 * lo2 << 32 + + * hi2 * lo1 << 32 + + * lo1 * lo2 + */ +static void +dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) +{ + uint64_t hi1, hi2, lo1, lo2; + uint64_t tmp[2]; + + hi1 = factor1 >> 32; + hi2 = factor2 >> 32; + + lo1 = factor1 & DT_MASK_LO; + lo2 = factor2 & DT_MASK_LO; + + product[0] = lo1 * lo2; + product[1] = hi1 * hi2; + + tmp[0] = hi1 * lo2; + tmp[1] = 0; + dtrace_shift_128(tmp, 32); + dtrace_add_128(product, tmp, product); + + tmp[0] = hi2 * lo1; + tmp[1] = 0; + dtrace_shift_128(tmp, 32); + dtrace_add_128(product, tmp, product); +} + /* * This privilege check should be used by actions and subroutines to * verify that the user credentials of the process that enabled the @@ -931,17 +1622,13 @@ dtrace_priv_proc_common_user(dtrace_state_t *state) */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) != NULL && -#else if ((cr = dtrace_CRED()) != NULL && -#endif /* __APPLE__ */ - s_cr->cr_uid == cr->cr_uid && - s_cr->cr_uid == cr->cr_ruid && - s_cr->cr_uid == cr->cr_suid && - s_cr->cr_gid == cr->cr_gid && - s_cr->cr_gid == cr->cr_rgid && - s_cr->cr_gid == cr->cr_sgid) + posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid && + posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid && + posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid && + posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_gid && + posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_rgid && + posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_sgid) return (1); return (0); @@ -956,6 +1643,7 @@ static int dtrace_priv_proc_common_zone(dtrace_state_t *state) { cred_t *cr, *s_cr = state->dts_cred.dcr_cred; +#pragma unused(cr, s_cr, state) /* __APPLE__ */ /* * We should always have a non-NULL state cred here, since if cred @@ -963,52 +1651,29 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state) */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) != NULL && - s_cr->cr_zone == cr->cr_zone) - return (1); - - return (0); -#else -#pragma unused(state) - - return 1; /* Darwin doesn't do zones. */ -#endif /* __APPLE__ */ + return 1; /* APPLE NOTE: Darwin doesn't do zones. */ } /* * This privilege check should be used by actions and subroutines to * verify that the process has not setuid or changed credentials. */ -#if !defined(__APPLE__) -static int -dtrace_priv_proc_common_nocd() -{ - proc_t *proc; - - if ((proc = ttoproc(curthread)) != NULL && - !(proc->p_flag & SNOCD)) - return (1); - - return (0); -} -#else static int dtrace_priv_proc_common_nocd(void) { return 1; /* Darwin omits "No Core Dump" flag. */ } -#endif /* __APPLE__ */ static int dtrace_priv_proc_destructive(dtrace_state_t *state) { int action = state->dts_cred.dcr_action; -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) @@ -1033,10 +1698,11 @@ bad: static int dtrace_priv_proc_control(dtrace_state_t *state) { -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) return (1); @@ -1046,9 +1712,7 @@ dtrace_priv_proc_control(dtrace_state_t *state) dtrace_priv_proc_common_nocd()) return (1); -#if defined(__APPLE__) bad: -#endif /* __APPLE__ */ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); @@ -1057,24 +1721,26 @@ bad: static int dtrace_priv_proc(dtrace_state_t *state) { -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) return (1); -#if defined(__APPLE__) bad: -#endif /* __APPLE__ */ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); } -#if defined(__APPLE__) -/* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */ +/* + * The P_LNOATTACH check is an Apple specific check. + * We need a version of dtrace_priv_proc() that omits + * that check for PID and EXECNAME accesses + */ static int dtrace_priv_proc_relaxed(dtrace_state_t *state) { @@ -1086,14 +1752,17 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state) return (0); } -#endif /* __APPLE__ */ static int dtrace_priv_kernel(dtrace_state_t *state) { + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) + goto bad; + if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) return (1); +bad: cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); @@ -1102,9 +1771,13 @@ dtrace_priv_kernel(dtrace_state_t *state) static int dtrace_priv_kernel_destructive(dtrace_state_t *state) { + if (dtrace_is_restricted()) + goto bad; + if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) return (1); +bad: cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); @@ -1116,10 +1789,7 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) * clean the dirty dynamic variable lists on all CPUs. Dynamic variable * cleaning is explained in detail in . */ -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -void +static void dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; @@ -1211,12 +1881,10 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * variable can be allocated. If NULL is returned, the appropriate counter * will be incremented. */ -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -dtrace_dynvar_t * +static dtrace_dynvar_t * dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, - dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op) + dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { uint64_t hashval = DTRACE_DYNHASH_VALID; dtrace_dynhash_t *hash = dstate->dtds_hash; @@ -1268,6 +1936,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, uint64_t j, size = key[i].dttk_size; uintptr_t base = (uintptr_t)key[i].dttk_value; + if (!dtrace_canload(base, size, mstate, vstate)) + break; + for (j = 0; j < size; j++) { hashval += dtrace_load8(base + j); hashval += (hashval << 10); @@ -1276,6 +1947,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, } } + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return (NULL); + hashval += (hashval << 3); hashval ^= (hashval >> 11); hashval += (hashval << 15); @@ -1306,7 +1980,7 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, while ((lock = *lockp) & 1) continue; - if (dtrace_casptr((void *)lockp, + if (dtrace_casptr((void *)(uintptr_t)lockp, (void *)lock, (void *)(lock + 1)) == (void *)lock) break; } @@ -1662,15 +2336,15 @@ retry: dvar->dtdv_next = free; } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); - return (dtrace_dynvar(dstate, nkeys, key, dsize, op)); + return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate)); } /*ARGSUSED*/ static void dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) { -#pragma unused(arg) - if (nval < *oval) +#pragma unused(arg) /* __APPLE__ */ + if ((int64_t)nval < (int64_t)*oval) *oval = nval; } @@ -1678,8 +2352,8 @@ dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) static void dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) { -#pragma unused(arg) - if (nval > *oval) +#pragma unused(arg) /* __APPLE__ */ + if ((int64_t)nval > (int64_t)*oval) *oval = nval; } @@ -1744,30 +2418,124 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) lquanta[levels + 1] += incr; } -/*ARGSUSED*/ -static void -dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) -{ -#pragma unused(arg) - data[0]++; - data[1] += nval; -} - -/*ARGSUSED*/ -static void -dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) +static int +dtrace_aggregate_llquantize_bucket(int16_t factor, int16_t low, int16_t high, + int16_t nsteps, int64_t value) { -#pragma unused(nval,arg) - *oval = *oval + 1; -} + int64_t this = 1, last, next; + int base = 1, order; -/*ARGSUSED*/ -static void -dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) -{ -#pragma unused(arg) - *oval += nval; -} + for (order = 0; order < low; ++order) + this *= factor; + + /* + * If our value is less than our factor taken to the power of the + * low order of magnitude, it goes into the zeroth bucket. + */ + if (value < this) + return 0; + else + last = this; + + for (this *= factor; order <= high; ++order) { + int nbuckets = this > nsteps ? nsteps : this; + + /* + * We should not generally get log/linear quantizations + * with a high magnitude that allows 64-bits to + * overflow, but we nonetheless protect against this + * by explicitly checking for overflow, and clamping + * our value accordingly. + */ + next = this * factor; + if (next < this) { + value = this - 1; + } + + /* + * If our value lies within this order of magnitude, + * determine its position by taking the offset within + * the order of magnitude, dividing by the bucket + * width, and adding to our (accumulated) base. + */ + if (value < this) { + return (base + (value - last) / (this / nbuckets)); + } + + base += nbuckets - (nbuckets / factor); + last = this; + this = next; + } + + /* + * Our value is greater than or equal to our factor taken to the + * power of one plus the high magnitude -- return the top bucket. + */ + return base; +} + +static void +dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) +{ + uint64_t arg = *llquanta++; + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + + llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr; +} + +/*ARGSUSED*/ +static void +dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) +{ +#pragma unused(arg) /* __APPLE__ */ + data[0]++; + data[1] += nval; +} + +/*ARGSUSED*/ +static void +dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) +{ +#pragma unused(arg) /* __APPLE__ */ + int64_t snval = (int64_t)nval; + uint64_t tmp[2]; + + data[0]++; + data[1] += nval; + + /* + * What we want to say here is: + * + * data[2] += nval * nval; + * + * But given that nval is 64-bit, we could easily overflow, so + * we do this as 128-bit arithmetic. + */ + if (snval < 0) + snval = -snval; + + dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); + dtrace_add_128(data + 2, tmp, data + 2); +} + +/*ARGSUSED*/ +static void +dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) +{ +#pragma unused(nval, arg) /* __APPLE__ */ + *oval = *oval + 1; +} + +/*ARGSUSED*/ +static void +dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) +{ +#pragma unused(arg) /* __APPLE__ */ + *oval += nval; +} /* * Aggregate given the tuple in the principal data buffer, and the aggregating @@ -1776,6 +2544,7 @@ dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) * failure; if there is no space in the aggregation buffer, the data will be * dropped, and a corresponding counter incremented. */ +__attribute__((noinline)) static void dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) @@ -2083,18 +2852,19 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, { dtrace_speculation_t *spec; dtrace_buffer_t *src, *dest; - uintptr_t daddr, saddr, dlimit; - dtrace_speculation_state_t current, new; + uintptr_t daddr, saddr, dlimit, slimit; + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; intptr_t offs; + uint64_t timestamp; if (which == 0) return; - if (which > state->dts_nspeculations) { + if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } - + spec = &state->dts_speculations[which - 1]; src = &spec->dtsp_buffer[cpu]; dest = &state->dts_buffer[cpu]; @@ -2137,7 +2907,7 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, new = DTRACESPEC_COMMITTING; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DTRACESPEC_ACTIVEMANY: new = DTRACESPEC_COMMITTINGMANY; @@ -2161,7 +2931,38 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, } /* - * We have the space; copy the buffer across. (Note that this is a + * We have sufficient space to copy the speculative buffer into the + * primary buffer. First, modify the speculative buffer, filling + * in the timestamp of all entries with the current time. The data + * must have the commit() time rather than the time it was traced, + * so that all entries in the primary buffer are in timestamp order. + */ + timestamp = dtrace_gethrtime(); + saddr = (uintptr_t)src->dtb_tomax; + slimit = saddr + src->dtb_offset; + while (saddr < slimit) { + size_t size; + dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr; + + if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { + saddr += sizeof (dtrace_epid_t); + continue; + } + + ASSERT(dtrh->dtrh_epid <= ((dtrace_epid_t) state->dts_necbs)); + size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size; + + ASSERT(saddr + size <= slimit); + ASSERT(size >= sizeof(dtrace_rechdr_t)); + ASSERT(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh) == UINT64_MAX); + + DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp); + + saddr += size; + } + + /* + * Copy the buffer across. (Note that this is a * highly subobtimal bcopy(); in the unlikely event that this becomes * a serious performance issue, a high-performance DTrace-specific * bcopy() should obviously be invented.) @@ -2200,6 +3001,7 @@ out: (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); +#pragma unused(rval) /* __APPLE__ */ ASSERT(rval == DTRACESPEC_COMMITTING); } @@ -2215,18 +3017,19 @@ out: * do nothing. The state of the specified speculation is transitioned * according to the state transition diagram outlined in */ +__attribute__((noinline)) static void dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) { dtrace_speculation_t *spec; - dtrace_speculation_state_t current, new; + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; dtrace_buffer_t *buf; if (which == 0) return; - if (which > state->dts_nspeculations) { + if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } @@ -2289,7 +3092,7 @@ dtrace_speculation_clean_here(dtrace_state_t *state) return; } - for (i = 0; i < state->dts_nspeculations; i++) { + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; @@ -2324,10 +3127,11 @@ dtrace_speculation_clean_here(dtrace_state_t *state) static void dtrace_speculation_clean(dtrace_state_t *state) { - int work = 0, rv; + int work = 0; + uint32_t rv; dtrace_specid_t i; - for (i = 0; i < state->dts_nspeculations; i++) { + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; ASSERT(!spec->dtsp_cleaning); @@ -2351,7 +3155,7 @@ dtrace_speculation_clean(dtrace_state_t *state) * speculation buffers, as appropriate. We can now set the state * to inactive. */ - for (i = 0; i < state->dts_nspeculations; i++) { + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_speculation_state_t current, new; @@ -2377,18 +3181,19 @@ dtrace_speculation_clean(dtrace_state_t *state) * the active CPU is not the specified CPU -- the speculation will be * atomically transitioned into the ACTIVEMANY state. */ +__attribute__((noinline)) static dtrace_buffer_t * dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, dtrace_specid_t which) { dtrace_speculation_t *spec; - dtrace_speculation_state_t current, new; + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; dtrace_buffer_t *buf; if (which == 0) return (NULL); - if (which > state->dts_nspeculations) { + if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return (NULL); } @@ -2441,6 +3246,51 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, return (buf); } +/* + * Return a string. In the event that the user lacks the privilege to access + * arbitrary kernel memory, we copy the string out to scratch memory so that we + * don't fail access checking. + * + * dtrace_dif_variable() uses this routine as a helper for various + * builtin values such as 'execname' and 'probefunc.' + */ +static +uintptr_t +dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, + dtrace_mstate_t *mstate) +{ + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t ret; + size_t strsz; + + /* + * The easy case: this probe is allowed to read all of memory, so + * we can just return this as a vanilla pointer. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + return (addr); + + /* + * This is the tougher case: we copy the string in question from + * kernel memory into scratch memory and return it that way: this + * ensures that we won't trip up when access checking tests the + * BYREF return value. + */ + strsz = dtrace_strlen((char *)addr, size) + 1; + + if (mstate->dtms_scratch_ptr + strsz > + mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return (0); + } + + dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, + strsz); + ret = mstate->dtms_scratch_ptr; + mstate->dtms_scratch_ptr += strsz; + return (ret); +} + /* * This function implements the DIF emulator's variable lookups. The emulator * passes a reserved variable identifier and optional built-in array index. @@ -2463,12 +3313,8 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { -#if !defined(__APPLE__) int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ - int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ + dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *pv; uint64_t val; @@ -2477,14 +3323,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, mstate->dtms_probe->dtpr_id, mstate->dtms_probe->dtpr_arg, ndx, aframes); -#if defined(__APPLE__) - /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */ + /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */ else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) { - return ((dtrace_state_t *)(mstate->dtms_arg[0]))->dts_arg_error_illval; + return ((dtrace_state_t *)(uintptr_t)(mstate->dtms_arg[0]))->dts_arg_error_illval; } -#endif /* __APPLE__ */ + else - val = dtrace_getarg(ndx, aframes); + val = dtrace_getarg(ndx, aframes, mstate, vstate); /* * This is regrettably required to keep the compiler @@ -2503,22 +3348,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_arg[ndx]); -#if !defined(__APPLE__) - case DIF_VAR_UREGS: { - klwp_t *lwp; - - if (!dtrace_priv_proc(state)) - return (0); - - if ((lwp = curthread->t_lwp) == NULL) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL; - return (0); - } - - return (dtrace_getreg(lwp->lwp_regs, ndx)); - } -#else case DIF_VAR_UREGS: { thread_t thread; @@ -2533,20 +3362,27 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(find_user_regs(thread), ndx)); } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_VAR_CURTHREAD: + case DIF_VAR_VMREGS: { + uint64_t rval; + if (!dtrace_priv_kernel(state)) return (0); - return ((uint64_t)(uintptr_t)curthread); -#else + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + + rval = dtrace_getvmreg(ndx); + + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (rval); + } + case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); return ((uint64_t)(uintptr_t)current_thread()); -#endif /* __APPLE__ */ case DIF_VAR_TIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { @@ -2555,15 +3391,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_timestamp); -#if !defined(__APPLE__) - case DIF_VAR_VTIMESTAMP: - ASSERT(dtrace_vtime_references != 0); - return (curthread->t_dtrace_vtime); -#else case DIF_VAR_VTIMESTAMP: ASSERT(dtrace_vtime_references != 0); return (dtrace_get_thread_vtime(current_thread())); -#endif /* __APPLE__ */ case DIF_VAR_WALLTIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { @@ -2572,6 +3402,24 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_walltimestamp); + case DIF_VAR_MACHTIMESTAMP: + if (!(mstate->dtms_present & DTRACE_MSTATE_MACHTIMESTAMP)) { + mstate->dtms_machtimestamp = mach_absolute_time(); + mstate->dtms_present |= DTRACE_MSTATE_MACHTIMESTAMP; + } + return (mstate->dtms_machtimestamp); + + case DIF_VAR_MACHCTIMESTAMP: + if (!(mstate->dtms_present & DTRACE_MSTATE_MACHCTIMESTAMP)) { + mstate->dtms_machctimestamp = mach_continuous_time(); + mstate->dtms_present |= DTRACE_MSTATE_MACHCTIMESTAMP; + } + return (mstate->dtms_machctimestamp); + + + case DIF_VAR_CPU: + return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread())); + case DIF_VAR_IPL: if (!dtrace_priv_kernel(state)) return (0); @@ -2593,12 +3441,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { -#if !defined(__APPLE__) int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ - int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; @@ -2629,12 +3472,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { -#if !defined(__APPLE__) int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ - int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ if (!DTRACE_ANCHORED(mstate->dtms_probe)) { /* @@ -2649,7 +3487,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); mstate->dtms_caller = caller[1]; } else if ((mstate->dtms_caller = - dtrace_caller(aframes)) == -1) { + dtrace_caller(aframes)) == (uintptr_t)-1) { /* * We have failed to do this the quick way; * we must resort to the slower approach of @@ -2679,8 +3517,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * uint64_t will contain the caller, which is what * we're after. */ - ustack[2] = NULL; + ustack[2] = 0; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack(ustack, 3); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); mstate->dtms_ucaller = ustack[2]; mstate->dtms_present |= DTRACE_MSTATE_UCALLER; } @@ -2689,48 +3529,28 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_PROBEPROV: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_provider->dtpv_name); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, + state, mstate)); case DIF_VAR_PROBEMOD: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_mod); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_mod, + state, mstate)); case DIF_VAR_PROBEFUNC: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_func); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_func, + state, mstate)); case DIF_VAR_PROBENAME: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_name); - -#if !defined(__APPLE__) - case DIF_VAR_PID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * Note that we are assuming that an unanchored probe is - * always due to a high-level interrupt. (And we're assuming - * that there is only a single high level interrupt.) - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (pid0.pid_id); - - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * Further, it is always safe to dereference the p_pidp member - * of one's own proc structure. (These are truisms becuase - * threads and processes don't clean up their own state -- - * they leave that task to whomever reaps them.) - */ - return ((uint64_t)curthread->t_procp->p_pidp->pid_id); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_name, + state, mstate)); -#else case DIF_VAR_PID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -2744,22 +3564,8 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, /* Anchored probe that fires while on an interrupt accrues to process 0 */ return 0; - return ((uint64_t)proc_selfpid()); -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_PPID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (pid0.pid_id); + return ((uint64_t)dtrace_proc_selfpid()); - return ((uint64_t)curthread->t_procp->p_ppid); -#else case DIF_VAR_PPID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -2770,110 +3576,104 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - return ((uint64_t)(uintptr_t)(current_proc()->p_ppid)); -#endif /* __APPLE__ */ + return ((uint64_t)dtrace_proc_selfppid()); -#if !defined(__APPLE__) case DIF_VAR_TID: - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); + /* We do not need to check for null current_thread() */ + return thread_tid(current_thread()); /* globally unique */ - return ((uint64_t)curthread->t_tid); -#else - case DIF_VAR_TID: - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) + case DIF_VAR_PTHREAD_SELF: + if (!dtrace_priv_proc(state)) return (0); - return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */ -#endif /* __APPLE__ */ + /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */ + return 0; -#if !defined(__APPLE__) - case DIF_VAR_EXECNAME: + case DIF_VAR_DISPATCHQADDR: if (!dtrace_priv_proc(state)) return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)(uintptr_t)p0.p_user.u_comm); + /* We do not need to check for null current_thread() */ + return thread_dispatchqaddr(current_thread()); - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return ((uint64_t)(uintptr_t) - curthread->t_procp->p_user.u_comm); -#else case DIF_VAR_EXECNAME: { char *xname = (char *)mstate->dtms_scratch_ptr; - size_t scratch_size = MAXCOMLEN+1; + char *pname = proc_best_name(curproc); + size_t scratch_size = sizeof(proc_name_t); /* The scratch allocation's lifetime is that of the clause. */ - if (mstate->dtms_scratch_ptr + scratch_size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return 0; + } if (!dtrace_priv_proc_relaxed(state)) return (0); mstate->dtms_scratch_ptr += scratch_size; - proc_selfname( xname, MAXCOMLEN ); + strlcpy(xname, pname, scratch_size); return ((uint64_t)(uintptr_t)xname); } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) + + case DIF_VAR_ZONENAME: + { + /* scratch_size is equal to length('global') + 1 for the null-terminator. */ + char *zname = (char *)mstate->dtms_scratch_ptr; + size_t scratch_size = 6 + 1; + if (!dtrace_priv_proc(state)) return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); + /* The scratch allocation's lifetime is that of the clause. */ + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return 0; + } - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return ((uint64_t)(uintptr_t) - curthread->t_procp->p_zone->zone_name); + mstate->dtms_scratch_ptr += scratch_size; -#else - case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state)) - return (0); - - return ((uint64_t)(uintptr_t)NULL); /* Darwin doesn't do "zones" */ -#endif /* __APPLE__ */ + /* The kernel does not provide zonename, it will always return 'global'. */ + strlcpy(zname, "global", scratch_size); + + return ((uint64_t)(uintptr_t)zname); + } + +#if MONOTONIC + case DIF_VAR_CPUINSTRS: + return mt_cur_cpu_instrs(); + + case DIF_VAR_CPUCYCLES: + return mt_cur_cpu_cycles(); + + case DIF_VAR_VINSTRS: + return mt_cur_thread_instrs(); + + case DIF_VAR_VCYCLES: + return mt_cur_thread_cycles(); +#else /* MONOTONIC */ + case DIF_VAR_CPUINSTRS: /* FALLTHROUGH */ + case DIF_VAR_CPUCYCLES: /* FALLTHROUGH */ + case DIF_VAR_VINSTRS: /* FALLTHROUGH */ + case DIF_VAR_VCYCLES: /* FALLTHROUGH */ + return 0; +#endif /* !MONOTONIC */ -#if !defined(__APPLE__) case DIF_VAR_UID: - if (!dtrace_priv_proc(state)) + if (!dtrace_priv_proc_relaxed(state)) return (0); /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)p0.p_cred->cr_uid); + return (0); - return ((uint64_t)curthread->t_cred->cr_uid); -#else - case DIF_VAR_UID: + return ((uint64_t) dtrace_proc_selfruid()); + + case DIF_VAR_GID: if (!dtrace_priv_proc(state)) return (0); @@ -2884,13 +3684,16 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); if (dtrace_CRED() != NULL) - return ((uint64_t)kauth_getuid()); - else - return -1LL; -#endif /* __APPLE__ */ + /* Credential does not require lazy initialization. */ + return ((uint64_t)kauth_getgid()); + else { + /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } -#if !defined(__APPLE__) - case DIF_VAR_GID: + case DIF_VAR_ERRNO: { + uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (!dtrace_priv_proc(state)) return (0); @@ -2898,63 +3701,472 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)p0.p_cred->cr_gid); - - return ((uint64_t)curthread->t_cred->cr_gid); -#else - case DIF_VAR_GID: - if (!dtrace_priv_proc(state)) return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); + if (uthread) + return (uint64_t)uthread->t_dtrace_errno; + else { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } + } - if (dtrace_CRED() != NULL) - return ((uint64_t)kauth_getgid()); - else - return -1LL; -#endif /* __APPLE__ */ + default: + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return (0); + } +} -#if !defined(__APPLE__) - case DIF_VAR_ERRNO: { - klwp_t *lwp; - if (!dtrace_priv_proc(state)) - return (0); +typedef enum dtrace_json_state { + DTRACE_JSON_REST = 1, + DTRACE_JSON_OBJECT, + DTRACE_JSON_STRING, + DTRACE_JSON_STRING_ESCAPE, + DTRACE_JSON_STRING_ESCAPE_UNICODE, + DTRACE_JSON_COLON, + DTRACE_JSON_COMMA, + DTRACE_JSON_VALUE, + DTRACE_JSON_IDENTIFIER, + DTRACE_JSON_NUMBER, + DTRACE_JSON_NUMBER_FRAC, + DTRACE_JSON_NUMBER_EXP, + DTRACE_JSON_COLLECT_OBJECT +} dtrace_json_state_t; - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); +/* + * This function possesses just enough knowledge about JSON to extract a single + * value from a JSON string and store it in the scratch buffer. It is able + * to extract nested object values, and members of arrays by index. + * + * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to + * be looked up as we descend into the object tree. e.g. + * + * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL + * with nelems = 5. + * + * The run time of this function must be bounded above by strsize to limit the + * amount of work done in probe context. As such, it is implemented as a + * simple state machine, reading one character at a time using safe loads + * until we find the requested element, hit a parsing error or run off the + * end of the object or string. + * + * As there is no way for a subroutine to return an error without interrupting + * clause execution, we simply return NULL in the event of a missing key or any + * other error condition. Each NULL return in this function is commented with + * the error condition it represents -- parsing or otherwise. + * + * The set of states for the state machine closely matches the JSON + * specification (http://json.org/). Briefly: + * + * DTRACE_JSON_REST: + * Skip whitespace until we find either a top-level Object, moving + * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE. + * + * DTRACE_JSON_OBJECT: + * Locate the next key String in an Object. Sets a flag to denote + * the next String as a key string and moves to DTRACE_JSON_STRING. + * + * DTRACE_JSON_COLON: + * Skip whitespace until we find the colon that separates key Strings + * from their values. Once found, move to DTRACE_JSON_VALUE. + * + * DTRACE_JSON_VALUE: + * Detects the type of the next value (String, Number, Identifier, Object + * or Array) and routes to the states that process that type. Here we also + * deal with the element selector list if we are requested to traverse down + * into the object tree. + * + * DTRACE_JSON_COMMA: + * Skip whitespace until we find the comma that separates key-value pairs + * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays + * (similarly DTRACE_JSON_VALUE). All following literal value processing + * states return to this state at the end of their value, unless otherwise + * noted. + * + * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP: + * Processes a Number literal from the JSON, including any exponent + * component that may be present. Numbers are returned as strings, which + * may be passed to strtoll() if an integer is required. + * + * DTRACE_JSON_IDENTIFIER: + * Processes a "true", "false" or "null" literal in the JSON. + * + * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, + * DTRACE_JSON_STRING_ESCAPE_UNICODE: + * Processes a String literal from the JSON, whether the String denotes + * a key, a value or part of a larger Object. Handles all escape sequences + * present in the specification, including four-digit unicode characters, + * but merely includes the escape sequence without converting it to the + * actual escaped character. If the String is flagged as a key, we + * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA. + * + * DTRACE_JSON_COLLECT_OBJECT: + * This state collects an entire Object (or Array), correctly handling + * embedded strings. If the full element selector list matches this nested + * object, we return the Object in full as a string. If not, we use this + * state to skip to the next value at this level and continue processing. + */ +static char * +dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, + char *dest) +{ + dtrace_json_state_t state = DTRACE_JSON_REST; + int64_t array_elem = INT64_MIN; + int64_t array_pos = 0; + uint8_t escape_unicount = 0; + boolean_t string_is_key = B_FALSE; + boolean_t collect_object = B_FALSE; + boolean_t found_key = B_FALSE; + boolean_t in_array = B_FALSE; + uint32_t braces = 0, brackets = 0; + char *elem = elemlist; + char *dd = dest; + uintptr_t cur; + + for (cur = json; cur < json + size; cur++) { + char cc = dtrace_load8(cur); + if (cc == '\0') + return (NULL); - if ((lwp = curthread->t_lwp) == NULL) - return (0); + switch (state) { + case DTRACE_JSON_REST: + if (isspace(cc)) + break; - return ((uint64_t)lwp->lwp_errno); - } -#else - case DIF_VAR_ERRNO: { - uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); - if (!dtrace_priv_proc(state)) - return (0); + if (cc == '{') { + state = DTRACE_JSON_OBJECT; + break; + } - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); + if (cc == '[') { + in_array = B_TRUE; + array_pos = 0; + array_elem = dtrace_strtoll(elem, 10, size); + found_key = array_elem == 0 ? B_TRUE : B_FALSE; + state = DTRACE_JSON_VALUE; + break; + } - return (uthread ? uthread->t_dtrace_errno : -1); - } -#endif /* __APPLE__ */ + /* + * ERROR: expected to find a top-level object or array. + */ + return (NULL); + case DTRACE_JSON_OBJECT: + if (isspace(cc)) + break; - default: - DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); - return (0); + if (cc == '"') { + state = DTRACE_JSON_STRING; + string_is_key = B_TRUE; + break; + } + + /* + * ERROR: either the object did not start with a key + * string, or we've run off the end of the object + * without finding the requested key. + */ + return (NULL); + case DTRACE_JSON_STRING: + if (cc == '\\') { + *dd++ = '\\'; + state = DTRACE_JSON_STRING_ESCAPE; + break; + } + + if (cc == '"') { + if (collect_object) { + /* + * We don't reset the dest here, as + * the string is part of a larger + * object being collected. + */ + *dd++ = cc; + collect_object = B_FALSE; + state = DTRACE_JSON_COLLECT_OBJECT; + break; + } + *dd = '\0'; + dd = dest; /* reset string buffer */ + if (string_is_key) { + if (dtrace_strncmp(dest, elem, + size) == 0) + found_key = B_TRUE; + } else if (found_key) { + if (nelems > 1) { + /* + * We expected an object, not + * this string. + */ + return (NULL); + } + return (dest); + } + state = string_is_key ? DTRACE_JSON_COLON : + DTRACE_JSON_COMMA; + string_is_key = B_FALSE; + break; + } + + *dd++ = cc; + break; + case DTRACE_JSON_STRING_ESCAPE: + *dd++ = cc; + if (cc == 'u') { + escape_unicount = 0; + state = DTRACE_JSON_STRING_ESCAPE_UNICODE; + } else { + state = DTRACE_JSON_STRING; + } + break; + case DTRACE_JSON_STRING_ESCAPE_UNICODE: + if (!isxdigit(cc)) { + /* + * ERROR: invalid unicode escape, expected + * four valid hexidecimal digits. + */ + return (NULL); + } + + *dd++ = cc; + if (++escape_unicount == 4) + state = DTRACE_JSON_STRING; + break; + case DTRACE_JSON_COLON: + if (isspace(cc)) + break; + + if (cc == ':') { + state = DTRACE_JSON_VALUE; + break; + } + + /* + * ERROR: expected a colon. + */ + return (NULL); + case DTRACE_JSON_COMMA: + if (isspace(cc)) + break; + + if (cc == ',') { + if (in_array) { + state = DTRACE_JSON_VALUE; + if (++array_pos == array_elem) + found_key = B_TRUE; + } else { + state = DTRACE_JSON_OBJECT; + } + break; + } + + /* + * ERROR: either we hit an unexpected character, or + * we reached the end of the object or array without + * finding the requested key. + */ + return (NULL); + case DTRACE_JSON_IDENTIFIER: + if (islower(cc)) { + *dd++ = cc; + break; + } + + *dd = '\0'; + dd = dest; /* reset string buffer */ + + if (dtrace_strncmp(dest, "true", 5) == 0 || + dtrace_strncmp(dest, "false", 6) == 0 || + dtrace_strncmp(dest, "null", 5) == 0) { + if (found_key) { + if (nelems > 1) { + /* + * ERROR: We expected an object, + * not this identifier. + */ + return (NULL); + } + return (dest); + } else { + cur--; + state = DTRACE_JSON_COMMA; + break; + } + } + + /* + * ERROR: we did not recognise the identifier as one + * of those in the JSON specification. + */ + return (NULL); + case DTRACE_JSON_NUMBER: + if (cc == '.') { + *dd++ = cc; + state = DTRACE_JSON_NUMBER_FRAC; + break; + } + + if (cc == 'x' || cc == 'X') { + /* + * ERROR: specification explicitly excludes + * hexidecimal or octal numbers. + */ + return (NULL); + } + + OS_FALLTHROUGH; + case DTRACE_JSON_NUMBER_FRAC: + if (cc == 'e' || cc == 'E') { + *dd++ = cc; + state = DTRACE_JSON_NUMBER_EXP; + break; + } + + if (cc == '+' || cc == '-') { + /* + * ERROR: expect sign as part of exponent only. + */ + return (NULL); + } + OS_FALLTHROUGH; + case DTRACE_JSON_NUMBER_EXP: + if (isdigit(cc) || cc == '+' || cc == '-') { + *dd++ = cc; + break; + } + + *dd = '\0'; + dd = dest; /* reset string buffer */ + if (found_key) { + if (nelems > 1) { + /* + * ERROR: We expected an object, not + * this number. + */ + return (NULL); + } + return (dest); + } + + cur--; + state = DTRACE_JSON_COMMA; + break; + case DTRACE_JSON_VALUE: + if (isspace(cc)) + break; + + if (cc == '{' || cc == '[') { + if (nelems > 1 && found_key) { + in_array = cc == '[' ? B_TRUE : B_FALSE; + /* + * If our element selector directs us + * to descend into this nested object, + * then move to the next selector + * element in the list and restart the + * state machine. + */ + while (*elem != '\0') + elem++; + elem++; /* skip the inter-element NUL */ + nelems--; + dd = dest; + if (in_array) { + state = DTRACE_JSON_VALUE; + array_pos = 0; + array_elem = dtrace_strtoll( + elem, 10, size); + found_key = array_elem == 0 ? + B_TRUE : B_FALSE; + } else { + found_key = B_FALSE; + state = DTRACE_JSON_OBJECT; + } + break; + } + + /* + * Otherwise, we wish to either skip this + * nested object or return it in full. + */ + if (cc == '[') + brackets = 1; + else + braces = 1; + *dd++ = cc; + state = DTRACE_JSON_COLLECT_OBJECT; + break; + } + + if (cc == '"') { + state = DTRACE_JSON_STRING; + break; + } + + if (islower(cc)) { + /* + * Here we deal with true, false and null. + */ + *dd++ = cc; + state = DTRACE_JSON_IDENTIFIER; + break; + } + + if (cc == '-' || isdigit(cc)) { + *dd++ = cc; + state = DTRACE_JSON_NUMBER; + break; + } + + /* + * ERROR: unexpected character at start of value. + */ + return (NULL); + case DTRACE_JSON_COLLECT_OBJECT: + if (cc == '\0') + /* + * ERROR: unexpected end of input. + */ + return (NULL); + + *dd++ = cc; + if (cc == '"') { + collect_object = B_TRUE; + state = DTRACE_JSON_STRING; + break; + } + + if (cc == ']') { + if (brackets-- == 0) { + /* + * ERROR: unbalanced brackets. + */ + return (NULL); + } + } else if (cc == '}') { + if (braces-- == 0) { + /* + * ERROR: unbalanced braces. + */ + return (NULL); + } + } else if (cc == '{') { + braces++; + } else if (cc == '[') { + brackets++; + } + + if (brackets == 0 && braces == 0) { + if (found_key) { + *dd = '\0'; + return (dest); + } + dd = dest; /* reset string buffer */ + state = DTRACE_JSON_COMMA; + } + break; + } } + return (NULL); } /* @@ -2970,11 +4182,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_mstate_t *mstate, dtrace_state_t *state) { volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; -#if !defined(__APPLE__) - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE__ */ + dtrace_vstate_t *vstate = &state->dts_vstate; #if !defined(__APPLE__) union { @@ -2987,16 +4196,23 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t rw; } r; #else -/* XXX awaits lock/mutex work */ +/* FIXME: awaits lock/mutex work */ #endif /* __APPLE__ */ switch (subr) { case DIF_SUBR_RAND: - regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; + regs[rd] = dtrace_xoroshiro128_plus_next( + state->dts_rstate[CPU->cpu_id]); break; #if !defined(__APPLE__) case DIF_SUBR_MUTEX_OWNED: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi)) regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; @@ -3005,6 +4221,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; case DIF_SUBR_MUTEX_OWNER: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi) && MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) @@ -3014,11 +4236,23 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); break; case DIF_SUBR_MUTEX_TYPE_SPIN: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_SPIN(&m.mi); break; @@ -3026,22 +4260,40 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_RW_READ_HELD: { uintptr_t tmp; + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_READ_HELD(&r.ri, tmp); break; } case DIF_SUBR_RW_WRITE_HELD: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_WRITE_HELD(&r.ri); break; case DIF_SUBR_RW_ISWRITER: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_ISWRITER(&r.ri); break; #else -/* XXX awaits lock/mutex work */ +/* FIXME: awaits lock/mutex work */ #endif /* __APPLE__ */ case DIF_SUBR_BCOPY: { @@ -3059,6 +4311,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + if (!dtrace_canload(src, size, mstate, vstate)) { + regs[rd] = 0; + break; + } + dtrace_bcopy((void *)src, (void *)dest, size); break; } @@ -3070,26 +4327,35 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; + /* + * Check whether the user can access kernel memory + */ + if (dtrace_priv_kernel(state) == 0) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + regs[rd] = 0; + break; + } /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the * enabling user does not have permissions. */ - if (mstate->dtms_scratch_ptr + scratch_size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + + /* + * Rounding up the user allocation size could have overflowed + * a large, bogus allocation (like -1ULL) to 0. + */ + if (scratch_size < size || + !DTRACE_INSCRATCH(mstate, scratch_size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } if (subr == DIF_SUBR_COPYIN) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#else if (dtrace_priv_proc(state)) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#endif /* __APPLE__ */ + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } @@ -3114,12 +4380,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#else if (dtrace_priv_proc(state)) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#endif /* __APPLE__ */ + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; } @@ -3136,20 +4398,15 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * probes will not activate in user contexts to which the * enabling user does not have permissions. */ - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size); -#else if (dtrace_priv_proc(state)) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size); -#endif /* __APPLE__ */ + dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); ((char *)dest)[size - 1] = '\0'; @@ -3158,87 +4415,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } -#if !defined(__APPLE__) - case DIF_SUBR_MSGSIZE: - case DIF_SUBR_MSGDSIZE: { - uintptr_t baddr = tupregs[0].dttk_value, daddr; - uintptr_t wptr, rptr; - size_t count = 0; - int cont = 0; - - while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - wptr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_wptr)); - - rptr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_rptr)); - - if (wptr < rptr) { - *flags |= CPU_DTRACE_BADADDR; - *illval = tupregs[0].dttk_value; - break; - } - - daddr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_datap)); - - baddr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_cont)); - - /* - * We want to prevent against denial-of-service here, - * so we're only going to search the list for - * dtrace_msgdsize_max mblks. - */ - if (cont++ > dtrace_msgdsize_max) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - - if (subr == DIF_SUBR_MSGDSIZE) { - if (dtrace_load8(daddr + - offsetof(dblk_t, db_type)) != M_DATA) - continue; - } - - count += wptr - rptr; - } - - if (!(*flags & CPU_DTRACE_FAULT)) - regs[rd] = count; - - break; - } -#else case DIF_SUBR_MSGSIZE: case DIF_SUBR_MSGDSIZE: { /* Darwin does not implement SysV streams messages */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); regs[rd] = 0; break; } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_SUBR_PROGENYOF: { - pid_t pid = tupregs[0].dttk_value; - proc_t *p; - int rval = 0; - - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - - for (p = curthread->t_procp; p != NULL; p = p->p_parent) { - if (p->p_pidp->pid_id == pid) { - rval = 1; - break; - } - } - - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - - regs[rd] = rval; - break; - } -#else case DIF_SUBR_PROGENYOF: { pid_t pid = tupregs[0].dttk_value; struct proc *p = current_proc(); @@ -3260,6 +4444,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; /* Can't climb process tree any further. */ p = (struct proc *)dtrace_loadptr((uintptr_t)&(p->p_pptr)); +#if __has_feature(ptrauth_calls) + p = ptrauth_strip(p, ptrauth_key_process_independent_data); +#endif if (*flags & CPU_DTRACE_FAULT) break; } @@ -3267,79 +4454,59 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = rval; break; } -#endif /* __APPLE__ */ case DIF_SUBR_SPECULATION: regs[rd] = dtrace_speculation(state); break; -#if !defined(__APPLE__) + case DIF_SUBR_COPYOUT: { uintptr_t kaddr = tupregs[0].dttk_value; - uintptr_t uaddr = tupregs[1].dttk_value; + user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_canload(kaddr, size, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size); + dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } case DIF_SUBR_COPYOUTSTR: { - uintptr_t kaddr = tupregs[0].dttk_value; - uintptr_t uaddr = tupregs[1].dttk_value; - uint64_t size = tupregs[2].dttk_value; - - if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - } - break; - } -#else - case DIF_SUBR_COPYOUT: { uintptr_t kaddr = tupregs[0].dttk_value; user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; + size_t lim; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size); + dtrace_copyoutstr(kaddr, uaddr, lim, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } - case DIF_SUBR_COPYOUTSTR: { - uintptr_t kaddr = tupregs[0].dttk_value; - user_addr_t uaddr = tupregs[1].dttk_value; - uint64_t size = tupregs[2].dttk_value; + case DIF_SUBR_STRLEN: { + size_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; + size_t lim; - if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; } - break; - } -#endif /* __APPLE__ */ - case DIF_SUBR_STRLEN: - regs[rd] = dtrace_strlen((char *)(uintptr_t) - tupregs[0].dttk_value, - state->dts_options[DTRACEOPT_STRSIZE]); + regs[rd] = dtrace_strlen((char *)addr, lim); + break; + } case DIF_SUBR_STRCHR: case DIF_SUBR_STRRCHR: { @@ -3351,10 +4518,18 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * of the specified character instead of the first. */ uintptr_t addr = tupregs[0].dttk_value; - uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t addr_limit; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t lim; char c, target = (char)tupregs[1].dttk_value; - for (regs[rd] = NULL; addr < limit; addr++) { + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; + } + addr_limit = addr + lim; + + for (regs[rd] = 0; addr < addr_limit; addr++) { if ((c = dtrace_load8(addr)) == target) { regs[rd] = addr; @@ -3392,6 +4567,17 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = notfound; + if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, + vstate)) { + regs[rd] = 0; + break; + } + /* * strstr() and index()/rindex() have similar semantics if * both strings are the empty string: strstr() returns a @@ -3466,13 +4652,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - if (pos > len) + if ((size_t)pos > len) pos = len; } else { if (pos < 0) pos = 0; - if (pos >= len) { + if ((size_t)pos >= len) { if (sublen == 0) regs[rd] = len; break; @@ -3512,19 +4698,29 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t addr = tupregs[0].dttk_value; uintptr_t tokaddr = tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - uintptr_t limit, toklimit = tokaddr + size; - uint8_t c, tokmap[32]; /* 256 / 8 */ + uintptr_t limit, toklimit; + size_t clim; char *dest = (char *)mstate->dtms_scratch_ptr; - int i; + uint8_t c='\0', tokmap[32]; /* 256 / 8 */ + uint64_t i = 0; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + /* + * Check both the token buffer and (later) the input buffer, + * since both could be non-scratch addresses. + */ + if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) { + regs[rd] = 0; + break; + } + toklimit = tokaddr + clim; + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - if (addr == NULL) { + if (addr == 0) { /* * If the address specified is NULL, we use our saved * strtok pointer from the mstate. Note that this @@ -3533,6 +4729,22 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * it behaves like an implicit clause-local variable. */ addr = mstate->dtms_strtok; + limit = mstate->dtms_strtok_limit; + } else { + /* + * If the user-specified address is non-NULL we must + * access check it. This is the only time we have + * a chance to do so, since this address may reside + * in the string table of this clause-- future calls + * (when we fetch addr from mstate->dtms_strtok) + * would fail this access check. + */ + if (!dtrace_strcanload(addr, size, &clim, mstate, + vstate)) { + regs[rd] = 0; + break; + } + limit = addr + clim; } /* @@ -3551,10 +4763,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tokmap[c >> 3] |= (1 << (c & 0x7)); } - for (limit = addr + size; addr < limit; addr++) { + for (; addr < limit; addr++) { /* - * We're looking for a character that is _not_ contained - * in the token string. + * We're looking for a character that is _not_ + * contained in the token string. */ if ((c = dtrace_load8(addr)) == '\0') break; @@ -3570,8 +4782,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * We return NULL in this case, and we set the saved * address to NULL as well. */ - regs[rd] = NULL; - mstate->dtms_strtok = NULL; + regs[rd] = 0; + mstate->dtms_strtok = 0; + mstate->dtms_strtok_limit = 0; break; } @@ -3594,6 +4807,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; mstate->dtms_strtok = addr; + mstate->dtms_strtok_limit = limit; break; } @@ -3606,16 +4820,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, size_t len = dtrace_strlen((char *)s, size); int64_t i = 0; - if (nargs <= 2) - remaining = (int64_t)size; + if (!dtrace_canload(s, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } + if (nargs <= 2) + remaining = (int64_t)size; + if (index < 0) { index += len; @@ -3625,267 +4843,70 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } } - if (index >= len || index < 0) - index = len; - - for (d[0] = '\0'; remaining > 0; remaining--) { - if ((d[i++] = dtrace_load8(s++ + index)) == '\0') - break; + if ((size_t)index >= len || index < 0) { + remaining = 0; + } else if (remaining < 0) { + remaining += len - index; + } else if ((uint64_t)index + (uint64_t)remaining > size) { + remaining = size - index; + } - if (i == size) { - d[i - 1] = '\0'; + for (i = 0; i < remaining; i++) { + if ((d[i] = dtrace_load8(s + index + i)) == '\0') break; } - } + + d[i] = '\0'; mstate->dtms_scratch_ptr += size; regs[rd] = (uintptr_t)d; break; } -#if !defined(__APPLE__) - case DIF_SUBR_GETMAJOR: -#ifdef __LP64__ - regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; -#else - regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; -#endif - break; - -#else /* __APPLE__ */ case DIF_SUBR_GETMAJOR: regs[rd] = (uintptr_t)major( (dev_t)tupregs[0].dttk_value ); break; -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_SUBR_GETMINOR: -#ifdef __LP64__ - regs[rd] = tupregs[0].dttk_value & MAXMIN64; -#else - regs[rd] = tupregs[0].dttk_value & MAXMIN; -#endif - break; - -#else /* __APPLE__ */ case DIF_SUBR_GETMINOR: regs[rd] = (uintptr_t)minor( (dev_t)tupregs[0].dttk_value ); break; -#endif /* __APPLE__ */ -#if !defined(__APPLE__) case DIF_SUBR_DDI_PATHNAME: { - /* - * This one is a galactic mess. We are going to roughly - * emulate ddi_pathname(), but it's made more complicated - * by the fact that we (a) want to include the minor name and - * (b) must proceed iteratively instead of recursively. - */ - uintptr_t dest = mstate->dtms_scratch_ptr; - uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - char *start = (char *)dest, *end = start + size - 1; - uintptr_t daddr = tupregs[0].dttk_value; - int64_t minor = (int64_t)tupregs[1].dttk_value; - char *s; - int i, len, depth = 0; - - if (size == 0 || mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; - break; - } - - *end = '\0'; - - /* - * We want to have a name for the minor. In order to do this, - * we need to walk the minor list from the devinfo. We want - * to be sure that we don't infinitely walk a circular list, - * so we check for circularity by sending a scout pointer - * ahead two elements for every element that we iterate over; - * if the list is circular, these will ultimately point to the - * same element. You may recognize this little trick as the - * answer to a stupid interview question -- one that always - * seems to be asked by those who had to have it laboriously - * explained to them, and who can't even concisely describe - * the conditions under which one would be forced to resort to - * this technique. Needless to say, those conditions are - * found here -- and probably only here. Is this is the only - * use of this infamous trick in shipping, production code? - * If it isn't, it probably should be... - */ - if (minor != -1) { - uintptr_t maddr = dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_minor)); - - uintptr_t next = offsetof(struct ddi_minor_data, next); - uintptr_t name = offsetof(struct ddi_minor_data, - d_minor) + offsetof(struct ddi_minor, name); - uintptr_t dev = offsetof(struct ddi_minor_data, - d_minor) + offsetof(struct ddi_minor, dev); - uintptr_t scout; - - if (maddr != NULL) - scout = dtrace_loadptr(maddr + next); - - while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - uint64_t m; -#ifdef __LP64__ - m = dtrace_load64(maddr + dev) & MAXMIN64; -#else - m = dtrace_load32(maddr + dev) & MAXMIN; -#endif - if (m != minor) { - maddr = dtrace_loadptr(maddr + next); - - if (scout == NULL) - continue; - - scout = dtrace_loadptr(scout + next); - - if (scout == NULL) - continue; - - scout = dtrace_loadptr(scout + next); - - if (scout == NULL) - continue; - - if (scout == maddr) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - - continue; - } - - /* - * We have the minor data. Now we need to - * copy the minor's name into the end of the - * pathname. - */ - s = (char *)dtrace_loadptr(maddr + name); - len = dtrace_strlen(s, size); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (len != 0) { - if ((end -= (len + 1)) < start) - break; - - *end = ':'; - } - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - break; - } - } - - while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - ddi_node_state_t devi_state; - - devi_state = dtrace_load32(daddr + - offsetof(struct dev_info, devi_node_state)); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (devi_state >= DS_INITIALIZED) { - s = (char *)dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_addr)); - len = dtrace_strlen(s, size); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (len != 0) { - if ((end -= (len + 1)) < start) - break; - - *end = '@'; - } - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - } - - /* - * Now for the node name... - */ - s = (char *)dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_node_name)); - - daddr = dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_parent)); - - /* - * If our parent is NULL (that is, if we're the root - * node), we're going to use the special path - * "devices". - */ - if (daddr == NULL) - s = "devices"; - - len = dtrace_strlen(s, size); - if (*flags & CPU_DTRACE_FAULT) - break; - - if ((end -= (len + 1)) < start) - break; - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - *end = '/'; - - if (depth++ > dtrace_devdepth_max) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - } - - if (end < start) - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - - if (daddr == NULL) { - regs[rd] = (uintptr_t)end; - mstate->dtms_scratch_ptr += size; - } - - break; - } -#else - case DIF_SUBR_DDI_PATHNAME: { - /* XXX awaits galactic disentanglement ;-} */ - regs[rd] = NULL; + /* APPLE NOTE: currently unsupported on Darwin */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + regs[rd] = 0; break; } -#endif /* __APPLE__ */ case DIF_SUBR_STRJOIN: { char *d = (char *)mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; - int i = 0; + uint64_t i = 0, j = 0; + size_t lim1, lim2; + char c; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) || + !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s1++)) == '\0') { + c = (i >= lim1) ? '\0' : dtrace_load8(s1++); + if ((d[i++] = c) == '\0') { i--; break; } @@ -3894,11 +4915,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s2++)) == '\0') + c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++); + if ((d[i++] = c) == '\0') break; } @@ -3910,26 +4931,70 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + case DIF_SUBR_STRTOLL: { + uintptr_t s = tupregs[0].dttk_value; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t lim; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } + + if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) { + regs[rd] = INT64_MIN; + break; + } + + regs[rd] = dtrace_strtoll((char *)s, base, lim); + break; + } + case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; - int64_t val = i < 0 ? i * -1 : i; - uint64_t size = 22; /* enough room for 2^64 in decimal */ + uint64_t val, digit; + uint64_t size = 65; /* enough room for 2^64 in binary */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + val = (base == 10 && i < 0) ? i * -1 : i; + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - for (*end-- = '\0'; val; val /= 10) - *end-- = '0' + (val % 10); + for (*end-- = '\0'; val; val /= base) { + if ((digit = val % base) <= '9' - '0') { + *end-- = '0' + digit; + } else { + *end-- = 'a' + (digit - ('9' - '0') - 1); + } + } + + if (i == 0 && base == 16) + *end-- = '0'; + + if (base == 16) + *end-- = 'x'; - if (i == 0) + if (i == 0 || base == 8 || base == 16) *end-- = '0'; - if (i < 0) + if (i < 0 && base == 10) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; @@ -3937,6 +5002,36 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + case DIF_SUBR_HTONS: + case DIF_SUBR_NTOHS: +#ifdef _BIG_ENDIAN + regs[rd] = (uint16_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); +#endif + break; + + + case DIF_SUBR_HTONL: + case DIF_SUBR_NTOHL: +#ifdef _BIG_ENDIAN + regs[rd] = (uint32_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); +#endif + break; + + + case DIF_SUBR_HTONLL: + case DIF_SUBR_NTOHLL: +#ifdef _BIG_ENDIAN + regs[rd] = (uint64_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); +#endif + break; + + case DIF_SUBR_DIRNAME: case DIF_SUBR_BASENAME: { char *dest = (char *)mstate->dtms_scratch_ptr; @@ -3946,10 +5041,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int lastbase = -1, firstbase = -1, lastdir = -1; int start, end; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_canload(src, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4055,7 +5154,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, end = lastbase; } - for (i = start, j = 0; i <= end && j < size - 1; i++, j++) + for (i = start, j = 0; i <= end && (uint64_t)j < size - 1; i++, j++) dest[j] = dtrace_load8(src + i); dest[j] = '\0'; @@ -4068,12 +5167,17 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, char *dest = (char *)mstate->dtms_scratch_ptr, c; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; - int i = 0, j = 0; + size_t lim; + size_t i = 0, j = 0; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4081,9 +5185,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * Move forward, loading each character. */ do { - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); next: - if (j + 5 >= size) /* 5 = strlen("/..c\0") */ + if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */ break; if (c != '/') { @@ -4091,7 +5195,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4112,7 +5216,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4135,7 +5239,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c != '/' && c != '\0') { /* @@ -4150,45 +5254,478 @@ next: continue; } - /* - * This is "/../" or "/..\0". We need to back up - * our destination pointer until we find a "/". - */ - i--; - while (j != 0 && dest[--j] != '/') - continue; + /* + * This is "/../" or "/..\0". We need to back up + * our destination pointer until we find a "/". + */ + i--; + while (j != 0 && dest[--j] != '/') + continue; + + if (c == '\0') + dest[++j] = '/'; + } while (c != '\0'); + + dest[j] = '\0'; + regs[rd] = (uintptr_t)dest; + mstate->dtms_scratch_ptr += size; + break; + } + + case DIF_SUBR_INET_NTOA: + case DIF_SUBR_INET_NTOA6: + case DIF_SUBR_INET_NTOP: { + size_t size; + int af, argi, i; + char *base, *end; + + if (subr == DIF_SUBR_INET_NTOP) { + af = (int)tupregs[0].dttk_value; + argi = 1; + } else { + af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; + argi = 0; + } + + if (af == AF_INET) { +#if !defined(__APPLE__) + ipaddr_t ip4; +#else + uint32_t ip4; +#endif /* __APPLE__ */ + uint8_t *ptr8, val; + + /* + * Safely load the IPv4 address. + */ +#if !defined(__APPLE__) + ip4 = dtrace_load32(tupregs[argi].dttk_value); +#else + if (!dtrace_canload(tupregs[argi].dttk_value, sizeof(ip4), + mstate, vstate)) { + regs[rd] = 0; + break; + } + + dtrace_bcopy( + (void *)(uintptr_t)tupregs[argi].dttk_value, + (void *)(uintptr_t)&ip4, sizeof (ip4)); +#endif /* __APPLE__ */ + /* + * Check an IPv4 string will fit in scratch. + */ +#if !defined(__APPLE__) + size = INET_ADDRSTRLEN; +#else + size = MAX_IPv4_STR_LEN; +#endif /* __APPLE__ */ + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + base = (char *)mstate->dtms_scratch_ptr; + end = (char *)mstate->dtms_scratch_ptr + size - 1; + + /* + * Stringify as a dotted decimal quad. + */ + *end-- = '\0'; + ptr8 = (uint8_t *)&ip4; + for (i = 3; i >= 0; i--) { + val = ptr8[i]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 10) { + *end-- = '0' + (val % 10); + } + } + + if (i > 0) + *end-- = '.'; + } + ASSERT(end + 1 >= base); + + } else if (af == AF_INET6) { +#if defined(__APPLE__) +#define _S6_un __u6_addr +#define _S6_u8 __u6_addr8 +#endif /* __APPLE__ */ + struct in6_addr ip6; + int firstzero, tryzero, numzero, v6end; + uint16_t val; + const char digits[] = "0123456789abcdef"; + + /* + * Stringify using RFC 1884 convention 2 - 16 bit + * hexadecimal values with a zero-run compression. + * Lower case hexadecimal digits are used. + * eg, fe80::214:4fff:fe0b:76c8. + * The IPv4 embedded form is returned for inet_ntop, + * just the IPv4 string is returned for inet_ntoa6. + */ + + if (!dtrace_canload(tupregs[argi].dttk_value, + sizeof(struct in6_addr), mstate, vstate)) { + regs[rd] = 0; + break; + } + + /* + * Safely load the IPv6 address. + */ + dtrace_bcopy( + (void *)(uintptr_t)tupregs[argi].dttk_value, + (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); + + /* + * Check an IPv6 string will fit in scratch. + */ + size = INET6_ADDRSTRLEN; + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + base = (char *)mstate->dtms_scratch_ptr; + end = (char *)mstate->dtms_scratch_ptr + size - 1; + *end-- = '\0'; + + /* + * Find the longest run of 16 bit zero values + * for the single allowed zero compression - "::". + */ + firstzero = -1; + tryzero = -1; + numzero = 1; + for (i = 0; i < (int)sizeof (struct in6_addr); i++) { + if (ip6._S6_un._S6_u8[i] == 0 && + tryzero == -1 && i % 2 == 0) { + tryzero = i; + continue; + } + + if (tryzero != -1 && + (ip6._S6_un._S6_u8[i] != 0 || + i == sizeof (struct in6_addr) - 1)) { + + if (i - tryzero <= numzero) { + tryzero = -1; + continue; + } + + firstzero = tryzero; + numzero = i - i % 2 - tryzero; + tryzero = -1; + + if (ip6._S6_un._S6_u8[i] == 0 && + i == sizeof (struct in6_addr) - 1) + numzero += 2; + } + } + ASSERT(firstzero + numzero <= (int)sizeof (struct in6_addr)); + + /* + * Check for an IPv4 embedded address. + */ + v6end = sizeof (struct in6_addr) - 2; + if (IN6_IS_ADDR_V4MAPPED(&ip6) || + IN6_IS_ADDR_V4COMPAT(&ip6)) { + for (i = sizeof (struct in6_addr) - 1; + i >= (int)DTRACE_V4MAPPED_OFFSET; i--) { + ASSERT(end >= base); + + val = ip6._S6_un._S6_u8[i]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 10) { + *end-- = '0' + val % 10; + } + } + + if (i > (int)DTRACE_V4MAPPED_OFFSET) + *end-- = '.'; + } + + if (subr == DIF_SUBR_INET_NTOA6) + goto inetout; + + /* + * Set v6end to skip the IPv4 address that + * we have already stringified. + */ + v6end = 10; + } + + /* + * Build the IPv6 string by working through the + * address in reverse. + */ + for (i = v6end; i >= 0; i -= 2) { + ASSERT(end >= base); + + if (i == firstzero + numzero - 2) { + *end-- = ':'; + *end-- = ':'; + i -= numzero - 2; + continue; + } + + if (i < 14 && i != firstzero - 2) + *end-- = ':'; + + val = (ip6._S6_un._S6_u8[i] << 8) + + ip6._S6_un._S6_u8[i + 1]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 16) { + *end-- = digits[val % 16]; + } + } + } + ASSERT(end + 1 >= base); + +#if defined(__APPLE__) +#undef _S6_un +#undef _S6_u8 +#endif /* __APPLE__ */ + } else { + /* + * The user didn't use AH_INET or AH_INET6. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + regs[rd] = 0; + break; + } + +inetout: regs[rd] = (uintptr_t)end + 1; + mstate->dtms_scratch_ptr += size; + break; + } + + case DIF_SUBR_JSON: { + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t json = tupregs[0].dttk_value; + size_t jsonlen = dtrace_strlen((char *)json, size); + uintptr_t elem = tupregs[1].dttk_value; + size_t elemlen = dtrace_strlen((char *)elem, size); + + char *dest = (char *)mstate->dtms_scratch_ptr; + char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1; + char *ee = elemlist; + int nelems = 1; + uintptr_t cur; + + if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) || + !dtrace_canload(elem, elemlen + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + + /* + * Read the element selector and split it up into a packed list + * of strings. + */ + for (cur = elem; cur < elem + elemlen; cur++) { + char cc = dtrace_load8(cur); + + if (cur == elem && cc == '[') { + /* + * If the first element selector key is + * actually an array index then ignore the + * bracket. + */ + continue; + } + + if (cc == ']') + continue; + + if (cc == '.' || cc == '[') { + nelems++; + cc = '\0'; + } + + *ee++ = cc; + } + *ee++ = '\0'; + + if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist, + nelems, dest)) != 0) + mstate->dtms_scratch_ptr += jsonlen + 1; + break; + } + + case DIF_SUBR_TOUPPER: + case DIF_SUBR_TOLOWER: { + uintptr_t src = tupregs[0].dttk_value; + char *dest = (char *)mstate->dtms_scratch_ptr; + char lower, upper, base, c; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t len = dtrace_strlen((char*) src, size); + size_t i = 0; + + lower = (subr == DIF_SUBR_TOUPPER) ? 'a' : 'A'; + upper = (subr == DIF_SUBR_TOUPPER) ? 'z' : 'Z'; + base = (subr == DIF_SUBR_TOUPPER) ? 'A' : 'a'; + + if (!dtrace_canload(src, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + + for (i = 0; i < size - 1; ++i) { + if ((c = dtrace_load8(src + i)) == '\0') + break; + if (c >= lower && c <= upper) + c = base + (c - lower); + dest[i] = c; + } + + ASSERT(i < size); + + dest[i] = '\0'; + regs[rd] = (uintptr_t) dest; + mstate->dtms_scratch_ptr += size; + + break; + } + + case DIF_SUBR_STRIP: + if (!dtrace_is_valid_ptrauth_key(tupregs[1].dttk_value)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + break; + } + regs[rd] = (uint64_t)dtrace_ptrauth_strip( + (void*)tupregs[0].dttk_value, tupregs[1].dttk_value); + break; + +#if defined(__APPLE__) + case DIF_SUBR_VM_KERNEL_ADDRPERM: { + if (!dtrace_priv_kernel(state)) { + regs[rd] = 0; + } else { + regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value); + } + + break; + } + + case DIF_SUBR_KDEBUG_TRACE: { + uint32_t debugid; + uintptr_t args[4] = {0}; + int i; + + if (nargs < 2 || nargs > 5) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + break; + } + + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; + } + + debugid = tupregs[0].dttk_value; + for (i = 0; i < nargs - 1; i++) + args[i] = tupregs[i + 1].dttk_value; - if (c == '\0') - dest[++j] = '/'; - } while (c != '\0'); + kernel_debug(debugid, args[0], args[1], args[2], args[3], 0); - dest[j] = '\0'; - regs[rd] = (uintptr_t)dest; - mstate->dtms_scratch_ptr += size; break; } -#ifdef __APPLE__ - /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */ - case DIF_SUBR_CHUD: { - uint64_t selector = tupregs[0].dttk_value; - uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; - uint32_t ii; + case DIF_SUBR_KDEBUG_TRACE_STRING: { + if (nargs != 3) { + break; + } - /* copy in any variadic argument list */ - for(ii = 0; ii < DIF_DTR_NREGS-1; ii++) { - args[ii] = tupregs[ii+1].dttk_value; + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; } - kern_return_t ret = - chudxnu_dtrace_callback(selector, args, DIF_DTR_NREGS-1); - if(KERN_SUCCESS != ret) { - /* error */ + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uint32_t debugid = tupregs[0].dttk_value; + uint64_t str_id = tupregs[1].dttk_value; + uintptr_t src = tupregs[2].dttk_value; + size_t lim; + char buf[size]; + char* str = NULL; + + if (src != (uintptr_t)0) { + str = buf; + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { + break; + } + dtrace_strcpy((void*)src, buf, size); } + + (void)kernel_debug_string(debugid, &str_id, str); + regs[rd] = str_id; + break; } -#endif /* __APPLE__ */ + case DIF_SUBR_MTONS: + absolutetime_to_nanoseconds(tupregs[0].dttk_value, ®s[rd]); + + break; + case DIF_SUBR_PHYSMEM_READ: { +#if DEBUG || DEVELOPMENT + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; + } + regs[rd] = dtrace_physmem_read(tupregs[0].dttk_value, + tupregs[1].dttk_value); +#else + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); +#endif /* DEBUG || DEVELOPMENT */ + break; + } + case DIF_SUBR_PHYSMEM_WRITE: { +#if DEBUG || DEVELOPMENT + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; + } + + dtrace_physmem_write(tupregs[0].dttk_value, + tupregs[1].dttk_value, (size_t)tupregs[2].dttk_value); +#else + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); +#endif /* DEBUG || DEVELOPMENT */ + break; + } + + case DIF_SUBR_KVTOPHYS: { +#if DEBUG || DEVELOPMENT + regs[rd] = kvtophys(tupregs[0].dttk_value); +#else + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); +#endif /* DEBUG || DEVELOPMENT */ + break; + } +#endif /* defined(__APPLE__) */ } } @@ -4212,11 +5749,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; dtrace_difv_t *v; volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; -#if !defined(__APPLE__) - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE__ */ dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ uint64_t regs[DIF_DIR_NREGS]; @@ -4224,11 +5757,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; int64_t cc_r; - uint_t pc = 0, id, opc; + uint_t pc = 0, id, opc = 0; uint8_t ttop = 0; dif_instr_t instr; uint_t r1, r2, rd; + /* + * We stash the current DIF object into the machine state: we need it + * for subsequent access checking. + */ + mstate->dtms_difo = difo; + regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { @@ -4368,7 +5907,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDSB: regs[rd] = (int8_t)dtrace_load8(regs[r1]); break; @@ -4378,7 +5917,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDSH: regs[rd] = (int16_t)dtrace_load16(regs[r1]); break; @@ -4388,7 +5927,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDSW: regs[rd] = (int32_t)dtrace_load32(regs[r1]); break; @@ -4398,7 +5937,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDUB: regs[rd] = dtrace_load8(regs[r1]); break; @@ -4408,7 +5947,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDUH: regs[rd] = dtrace_load16(regs[r1]); break; @@ -4418,7 +5957,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDUW: regs[rd] = dtrace_load32(regs[r1]); break; @@ -4428,10 +5967,16 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); break; +/* + * Darwin 32-bit kernel may fetch from 64-bit user. + * Do not cast regs to uintptr_t + * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB + * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX + */ case DIF_OP_ULDSB: regs[rd] = (int8_t) dtrace_fuword8(regs[r1]); @@ -4462,6 +6007,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; case DIF_OP_RET: rval = regs[rd]; + pc = textlen; break; case DIF_OP_NOP: break; @@ -4472,15 +6018,27 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[rd] = (uint64_t)(uintptr_t) (strtab + DIF_INSTR_STRING(instr)); break; - case DIF_OP_SCMP: - cc_r = dtrace_strncmp((char *)(uintptr_t)regs[r1], - (char *)(uintptr_t)regs[r2], - state->dts_options[DTRACEOPT_STRSIZE]); + case DIF_OP_SCMP: { + size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t s1 = regs[r1]; + uintptr_t s2 = regs[r2]; + size_t lim1 = sz, lim2 = sz; + + if (s1 != 0 && + !dtrace_strcanload(s1, sz, &lim1, mstate, vstate)) + break; + if (s2 != 0 && + !dtrace_strcanload(s2, sz, &lim2, mstate, vstate)) + break; + + cc_r = dtrace_strncmp((char *)s1, (char *)s2, + MIN(lim1, lim2)); cc_n = cc_r < 0; cc_z = cc_r == 0; cc_v = cc_c = 0; break; + } case DIF_OP_LDGA: regs[rd] = dtrace_dif_variable(mstate, state, r1, regs[r2]); @@ -4509,7 +6067,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * then this is to be treated as a * reference to a NULL variable. */ - regs[rd] = NULL; + regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } @@ -4526,26 +6084,32 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_nglobals); svar = vstate->dtvs_globals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; + size_t lim; - ASSERT(a != NULL); + ASSERT(a != 0); ASSERT(svar->dtsv_size != 0); - if (regs[rd] == NULL) { + if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { *(uint8_t *)a = 0; a += sizeof (uint64_t); } + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + &lim, mstate, vstate)) + break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -4574,9 +6138,8 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < vstate->dtvs_nlocals); + ASSERT(id < (uint_t)vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); - svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; @@ -4595,7 +6158,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * then this is to be treated as a * reference to a NULL variable. */ - regs[rd] = NULL; + regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } @@ -4613,8 +6176,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < vstate->dtvs_nlocals); - + VERIFY(id < (uint_t)vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); @@ -4623,12 +6185,13 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t sz = v->dtdv_type.dtdt_size; + size_t lim; sz += sizeof (uint64_t); ASSERT(svar->dtsv_size == (int)NCPU * sz); a += CPU->cpu_id * sz; - if (regs[rd] == NULL) { + if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { @@ -4636,8 +6199,13 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, a += sizeof (uint64_t); } + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + &lim, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -4662,7 +6230,8 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, key[1].dttk_size = 0; dvar = dtrace_dynvar(dstate, 2, key, - sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC); + sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, + mstate, vstate); if (dvar == NULL) { regs[rd] = 0; @@ -4685,6 +6254,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_value = (uint64_t)id; @@ -4697,25 +6267,27 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : - DTRACE_DYNVAR_DEALLOC); + DTRACE_DYNVAR_DEALLOC, mstate, vstate); /* * Given that we're storing to thread-local data, * we need to flush our predicate cache. */ -#if !defined(__APPLE__) - curthread->t_predcache = NULL; -#else dtrace_set_thread_predcache(current_thread(), 0); -#endif /* __APPLE__ */ - if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], + &v->dtdv_type, &lim, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -4752,6 +6324,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[r2] ? regs[r2] : dtrace_strsize_default) + 1; } else { + if (regs[r2] > LONG_MAX) { + *flags |= CPU_DTRACE_ILLOP; + break; + } tupregs[ttop].dttk_size = regs[r2]; } @@ -4793,15 +6369,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } dvar = dtrace_dynvar(dstate, nkeys, key, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), - DTRACE_DYNVAR_NOALLOC); + DTRACE_DYNVAR_NOALLOC, mstate, vstate); if (dvar == NULL) { regs[rd] = 0; @@ -4833,8 +6411,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } @@ -4842,14 +6422,21 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : - DTRACE_DYNVAR_DEALLOC); + DTRACE_DYNVAR_DEALLOC, mstate, vstate); if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + &lim, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -4861,17 +6448,21 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + - mstate->dtms_scratch_size) { + /* + * Rounding up the user allocation size could have + * overflowed large, bogus allocations (like -1ULL) to + * 0. + */ + if (size < regs[r1] || + !DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; - } else { - dtrace_bzero((void *) - mstate->dtms_scratch_ptr, size); + regs[rd] = 0; + break; + } + + dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); mstate->dtms_scratch_ptr += size; regs[rd] = ptr; - } break; } @@ -4883,6 +6474,9 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; } + if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) + break; + dtrace_bcopy((void *)(uintptr_t)regs[r1], (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); break; @@ -4930,17 +6524,22 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[rd]; break; } -#if !defined(__APPLE__) - if (regs[rd] & 7) { -#else - if (regs[rd] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */ -#endif /* __APPLE__ */ + + /* + * Darwin kmem_zalloc() called from + * dtrace_difo_init() is 4-byte aligned. + */ + if (regs[rd] & 3) { *flags |= CPU_DTRACE_BADALIGN; *illval = regs[rd]; break; } *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; break; + case DIF_OP_STRIP: + regs[rd] = (uint64_t)dtrace_ptrauth_strip( + (void*)regs[r1], r2); + break; } } @@ -4953,14 +6552,15 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, return (0); } +__attribute__((noinline)) static void dtrace_action_breakpoint(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; char c[DTRACE_FULLNAMELEN + 80], *str; - char *msg = "dtrace: breakpoint action at probe "; - char *ecbmsg = " (ecb "; + const char *msg = "dtrace: breakpoint action at probe "; + const char *ecbmsg = " (ecb "; uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); uintptr_t val = (uintptr_t)ecb; int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; @@ -5015,6 +6615,7 @@ dtrace_action_breakpoint(dtrace_ecb_t *ecb) debug_enter(c); } +__attribute__((noinline)) static void dtrace_action_panic(dtrace_ecb_t *ecb) { @@ -5031,27 +6632,23 @@ dtrace_action_panic(dtrace_ecb_t *ecb) if (dtrace_panicked != NULL) return; -#if !defined(__APPLE__) - if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) - return; -#else if (dtrace_casptr(&dtrace_panicked, NULL, current_thread()) != NULL) return; -#endif /* __APPLE__ */ /* * We won the right to panic. (We want to be sure that only one * thread calls panic() from dtrace_probe(), and that panic() is * called exactly once.) */ - dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", + panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", probe->dtpr_provider->dtpv_name, probe->dtpr_mod, probe->dtpr_func, probe->dtpr_name, (void *)ecb); -#if defined(__APPLE__) - /* Mac OS X debug feature -- can return from panic() */ + /* + * APPLE NOTE: this was for an old Mac OS X debug feature + * allowing a return from panic(). Revisit someday. + */ dtrace_panicked = NULL; -#endif /* __APPLE__ */ } static void @@ -5065,24 +6662,17 @@ dtrace_action_raise(uint64_t sig) return; } -#if !defined(__APPLE__) /* * raise() has a queue depth of 1 -- we ignore all subsequent * invocations of the raise() action. */ - if (curthread->t_dtrace_sig == 0) - curthread->t_dtrace_sig = (uint8_t)sig; - curthread->t_sig_check = 1; - aston(curthread); -#else uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread && uthread->t_dtrace_sig == 0) { uthread->t_dtrace_sig = sig; - psignal(current_proc(), (int)sig); + act_set_astbsd(current_thread()); } -#endif /* __APPLE__ */ } static void @@ -5091,23 +6681,56 @@ dtrace_action_stop(void) if (dtrace_destructive_disallow) return; -#if !defined(__APPLE__) - if (!curthread->t_dtrace_stop) { - curthread->t_dtrace_stop = 1; - curthread->t_sig_check = 1; - aston(curthread); + uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); + if (uthread) { + /* + * The currently running process will be set to task_suspend + * when it next leaves the kernel. + */ + uthread->t_dtrace_stop = 1; + act_set_astbsd(current_thread()); + } +} + + +/* + * APPLE NOTE: pidresume works in conjunction with the dtrace stop action. + * Both activate only when the currently running process next leaves the + * kernel. + */ +static void +dtrace_action_pidresume(uint64_t pid) +{ + if (dtrace_destructive_disallow) + return; + + if (kauth_cred_issuser(kauth_cred_get()) == 0) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return; + } + uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); + + /* + * When the currently running process leaves the kernel, it attempts to + * task_resume the process (denoted by pid), if that pid appears to have + * been stopped by dtrace_action_stop(). + * The currently running process has a pidresume() queue depth of 1 -- + * subsequent invocations of the pidresume() action are ignored. + */ + + if (pid != 0 && uthread && uthread->t_dtrace_resumepid == 0) { + uthread->t_dtrace_resumepid = pid; + act_set_astbsd(current_thread()); } -#else - psignal(current_proc(), SIGSTOP); -#endif /* __APPLE__ */ } +__attribute__((noinline)) static void dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) { hrtime_t now; volatile uint16_t *flags; - cpu_t *cpu = CPU; + dtrace_cpu_t *cpu = CPU; if (dtrace_destructive_disallow) return; @@ -5147,6 +6770,7 @@ dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) cpu->cpu_dtrace_chilled += val; } +__attribute__((noinline)) static void dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t *buf, uint64_t arg) @@ -5173,8 +6797,7 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, size = (uintptr_t)fps - mstate->dtms_scratch_ptr + (nframes * sizeof (uint64_t)); - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, (uintptr_t)size)) { /* * Not enough room for our frame pointers -- need to indicate * that we ran out of scratch space. @@ -5261,20 +6884,123 @@ out: mstate->dtms_scratch_ptr = old; } +__attribute__((noinline)) +static void +dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, + size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) +{ + volatile uint16_t *flags; + uint64_t val = *valp; + size_t valoffs = *valoffsp; + + flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); + + /* + * If this is a string, we're going to only load until we find the zero + * byte -- after which we'll store zero bytes. + */ + if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { + char c = '\0' + 1; + size_t s; + + for (s = 0; s < size; s++) { + if (c != '\0' && dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, valoffs++, c); + + if (c == '\0' && intuple) + break; + } + } else { + uint8_t c; + while (valoffs < end) { + if (dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, + valoffs++, c); + } + } + + *valp = val; + *valoffsp = valoffs; +} + +/* + * Disables interrupts and sets the per-thread inprobe flag. When DEBUG is + * defined, we also assert that we are not recursing unless the probe ID is an + * error probe. + */ +static dtrace_icookie_t +dtrace_probe_enter(dtrace_id_t id) +{ + thread_t thread = current_thread(); + uint16_t inprobe; + + dtrace_icookie_t cookie; + + cookie = dtrace_interrupt_disable(); + + /* + * Unless this is an ERROR probe, we are not allowed to recurse in + * dtrace_probe(). Recursing into DTrace probe usually means that a + * function is instrumented that should not have been instrumented or + * that the ordering guarantee of the records will be violated, + * resulting in unexpected output. If there is an exception to this + * assertion, a new case should be added. + */ + inprobe = dtrace_get_thread_inprobe(thread); + VERIFY(inprobe == 0 || + id == dtrace_probeid_error); + ASSERT(inprobe < UINT16_MAX); + dtrace_set_thread_inprobe(thread, inprobe + 1); + + return (cookie); +} + +/* + * Clears the per-thread inprobe flag and enables interrupts. + */ +static void +dtrace_probe_exit(dtrace_icookie_t cookie) +{ + thread_t thread = current_thread(); + uint16_t inprobe = dtrace_get_thread_inprobe(thread); + + ASSERT(inprobe > 0); + dtrace_set_thread_inprobe(thread, inprobe - 1); + +#if INTERRUPT_MASKED_DEBUG + ml_spin_debug_reset(thread); +#endif /* INTERRUPT_MASKED_DEBUG */ + + dtrace_interrupt_enable(cookie); +} + /* * If you're looking for the epicenter of DTrace, you just found it. This * is the function called by the provider to fire a probe -- from which all * subsequent probe-context DTrace activity emanates. */ -#if !defined(__APPLE__) void -dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) -#else -static void -__dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, +dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) -#endif /* __APPLE__ */ { processorid_t cpuid; dtrace_icookie_t cookie; @@ -5288,33 +7014,30 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, volatile uint16_t *flags; hrtime_t now; -#if !defined(__APPLE__) - /* - * Kick out immediately if this CPU is still being born (in which case - * curthread will be set to -1) - */ - if ((uintptr_t)curthread & 1) + cookie = dtrace_probe_enter(id); + + /* Ensure that probe id is valid. */ + if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { + dtrace_probe_exit(cookie); return; -#else -#endif /* __APPLE__ */ + } - cookie = dtrace_interrupt_disable(); probe = dtrace_probes[id - 1]; + if (probe == NULL) { + dtrace_probe_exit(cookie); + return; + } + cpuid = CPU->cpu_id; onintr = CPU_ON_INTR(CPU); -#if !defined(__APPLE__) - if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && - probe->dtpr_predcache == curthread->t_predcache) { -#else if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && probe->dtpr_predcache == dtrace_get_thread_predcache(current_thread())) { -#endif /* __APPLE__ */ /* * We have hit in the predicate cache; we know that * this predicate would evaluate to be false. */ - dtrace_interrupt_enable(cookie); + dtrace_probe_exit(cookie); return; } @@ -5322,7 +7045,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, /* * We don't trace anything if we're panicking. */ - dtrace_interrupt_enable(cookie); + dtrace_probe_exit(cookie); return; } @@ -5333,6 +7056,11 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (vtime && curthread->t_dtrace_start) curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; #else + /* + * APPLE NOTE: The time spent entering DTrace and arriving + * to this point, is attributed to the current thread. + * Instead it should accrue to DTrace. FIXME + */ vtime = dtrace_vtime_references != 0; if (vtime) @@ -5354,14 +7082,14 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, now = dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */ #endif /* __APPLE__ */ -#if defined(__APPLE__) /* - * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances. - * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes - * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here - * and cons up a viable state (from the probe_id). + * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of + * dtrace_probe() in some circumstances. See, e.g. fasttrap_isa.c. + * However the provider has no access to ECB context, so passes + * 0 through "arg0" and the probe_id of the overridden probe as arg1. + * Detect that here and cons up a viable state (from the probe_id). */ - if (dtrace_probeid_error == id && NULL == arg0) { + if (dtrace_probeid_error == id && 0 == arg0) { dtrace_id_t ftp_id = (dtrace_id_t)arg1; dtrace_probe_t *ftp_probe = dtrace_probes[ftp_id - 1]; dtrace_ecb_t *ftp_ecb = ftp_probe->dtpr_ecb; @@ -5377,9 +7105,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ftp_state->dts_arg_error_illval = -1; /* arg5 */ } } -#endif /* __APPLE__ */ + mstate.dtms_difo = NULL; mstate.dtms_probe = probe; + mstate.dtms_strtok = 0; mstate.dtms_arg[0] = arg0; mstate.dtms_arg[1] = arg1; mstate.dtms_arg[2] = arg2; @@ -5395,6 +7124,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; + uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; @@ -5462,6 +7192,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * not the case. */ if ((ecb->dte_cond & DTRACE_COND_USERMODE) && + prov->dtpv_pops.dtps_usermode && prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg) == 0) continue; @@ -5483,25 +7214,26 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; proc_t *proc; +#pragma unused(proc) /* __APPLE__ */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) == NULL || -#else + /* + * XXX this is hackish, but so is setting a variable + * XXX in a McCarthy OR... + */ if ((cr = dtrace_CRED()) == NULL || -#endif /* __APPLE__ */ - s_cr->cr_uid != cr->cr_uid || - s_cr->cr_uid != cr->cr_ruid || - s_cr->cr_uid != cr->cr_suid || - s_cr->cr_gid != cr->cr_gid || - s_cr->cr_gid != cr->cr_rgid || - s_cr->cr_gid != cr->cr_sgid || + posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid || + posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid || + posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid || + posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_gid || + posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_rgid || + posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_sgid || #if !defined(__APPLE__) (proc = ttoproc(curthread)) == NULL || (proc->p_flag & SNOCD)) #else - 1) /* Darwin omits "No Core Dump" flag. */ + 1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */ #endif /* __APPLE__ */ continue; } @@ -5510,14 +7242,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cred_t *cr; cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; +#pragma unused(cr, s_cr) /* __APPLE__ */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) /* Darwin doesn't do zones. */ +#if !defined(__APPLE__) if ((cr = CRED()) == NULL || s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) continue; +#else + /* APPLE NOTE: Darwin doesn't do zones. */ #endif /* __APPLE__ */ } } @@ -5553,15 +7288,35 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tomax = buf->dtb_tomax; ASSERT(tomax != NULL); - if (ecb->dte_size != 0) - DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid); + /* + * Build and store the record header corresponding to the ECB. + */ + if (ecb->dte_size != 0) { + dtrace_rechdr_t dtrh; + + if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) { + mstate.dtms_timestamp = dtrace_gethrtime(); + mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; + } + + ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t)); + + dtrh.dtrh_epid = ecb->dte_epid; + DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, mstate.dtms_timestamp); + DTRACE_STORE(dtrace_rechdr_t, tomax, offs, dtrh); + } mstate.dtms_epid = ecb->dte_epid; mstate.dtms_present |= DTRACE_MSTATE_EPID; + if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) + mstate.dtms_access = DTRACE_ACCESS_KERNEL; + else + mstate.dtms_access = 0; + if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; - int rval; + uint64_t rval; rval = dtrace_dif_emulate(dp, &mstate, vstate, state); @@ -5573,11 +7328,8 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * Update the predicate cache... */ ASSERT(cid == pred->dtp_cacheid); -#if !defined(__APPLE__) - curthread->t_predcache = cid; -#else + dtrace_set_thread_predcache(current_thread(), cid); -#endif /* __APPLE__ */ } continue; @@ -5642,8 +7394,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_getpcstack((pc_t *)(tomax + valoffs), size / sizeof (pc_t), probe->dtpr_aframes, DTRACE_ANCHORED(probe) ? NULL : - (uint32_t *)arg0); - + (uint32_t *)(uintptr_t)arg0); continue; case DTRACEACT_JSTACK: @@ -5701,7 +7452,9 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; switch (act->dta_kind) { - case DTRACEACT_SPECULATE: + case DTRACEACT_SPECULATE: { + dtrace_rechdr_t *dtrh = NULL; + ASSERT(buf == &state->dts_buffer[cpuid]); buf = dtrace_speculation_buffer(state, cpuid, val); @@ -5723,10 +7476,24 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tomax = buf->dtb_tomax; ASSERT(tomax != NULL); - if (ecb->dte_size != 0) - DTRACE_STORE(uint32_t, tomax, offs, - ecb->dte_epid); - continue; + if (ecb->dte_size == 0) + continue; + + ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t)); + dtrh = ((void *)(tomax + offs)); + dtrh->dtrh_epid = ecb->dte_epid; + + /* + * When the speculation is committed, all of + * the records in the speculative buffer will + * have their timestamps set to the commit + * time. Until then, it is set to a sentinel + * value, for debugability. + */ + DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX); + + continue; + } case DTRACEACT_CHILL: if (dtrace_priv_kernel_destructive(state)) @@ -5738,6 +7505,11 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_action_raise(val); continue; + case DTRACEACT_PIDRESUME: /* __APPLE__ */ + if (dtrace_priv_proc_destructive(state)) + dtrace_action_pidresume(val); + continue; + case DTRACEACT_COMMIT: ASSERT(!committed); @@ -5761,31 +7533,20 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_APPLEBINARY: /* __APPLE__ */ + case DTRACEACT_TRACEMEM: + break; + + case DTRACEACT_TRACEMEM_DYNSIZE: + tracememsize = val; break; case DTRACEACT_SYM: case DTRACEACT_MOD: if (!dtrace_priv_kernel(state)) continue; - break; - -#if !defined(__APPLE__) - case DTRACEACT_USYM: - case DTRACEACT_UMOD: - case DTRACEACT_UADDR: { - struct pid *pid = curthread->t_procp->p_pidp; - - if (!dtrace_priv_proc(state)) - continue; - - DTRACE_STORE(uint64_t, tomax, - valoffs, (uint64_t)pid->pid_id); - DTRACE_STORE(uint64_t, tomax, - valoffs + sizeof (uint64_t), val); - - continue; - } -#else + break; + case DTRACEACT_USYM: case DTRACEACT_UMOD: case DTRACEACT_UADDR: { @@ -5793,13 +7554,12 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; DTRACE_STORE(uint64_t, tomax, - valoffs, (uint64_t)proc_selfpid()); + valoffs, (uint64_t)dtrace_proc_selfpid()); DTRACE_STORE(uint64_t, tomax, valoffs + sizeof (uint64_t), val); continue; } -#endif /* __APPLE__ */ case DTRACEACT_EXIT: { /* @@ -5839,38 +7599,27 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ASSERT(0); } - if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { + if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) { uintptr_t end = valoffs + size; - /* - * If this is a string, we're going to only - * load until we find the zero byte -- after - * which we'll store zero bytes. - */ - if (dp->dtdo_rtype.dtdt_kind == - DIF_TYPE_STRING) { - char c = '\0' + 1; - int intuple = act->dta_intuple; - size_t s; - - for (s = 0; s < size; s++) { - if (c != '\0') - c = dtrace_load8(val++); - - DTRACE_STORE(uint8_t, tomax, - valoffs++, c); - - if (c == '\0' && intuple) - break; - } - + if (tracememsize != 0 && + valoffs + tracememsize < end) + { + end = valoffs + tracememsize; + tracememsize = 0; + } + + if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && + !dtrace_vcanload((void *)(uintptr_t)val, + &dp->dtdo_rtype, NULL, &mstate, vstate)) + { continue; } - while (valoffs < end) { - DTRACE_STORE(uint8_t, tomax, valoffs++, - dtrace_load8(val++)); - } + dtrace_store_by_ref(dp, tomax, size, &valoffs, + &val, end, act->dta_intuple, + dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? + DIF_TF_BYREF: DIF_TF_BYUREF); continue; } @@ -5928,13 +7677,14 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * time to prevent it from being accumulated * into t_dtrace_vtime. */ -#if !defined(__APPLE__) - curthread->t_dtrace_start = 0; -#else - /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */ + + /* + * Darwin sets the sign bit on t_dtrace_tracing + * to suspend accumulation to it. + */ dtrace_set_thread_tracing(current_thread(), - (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); -#endif /* __APPLE__ */ + (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); + } /* @@ -5961,45 +7711,23 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, buf->dtb_offset = offs + ecb->dte_size; } -#if !defined(__APPLE__) - if (vtime) - curthread->t_dtrace_start = dtrace_gethrtime(); -#else + /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed + to the current thread. Instead it should accrue to DTrace. */ if (vtime) { thread_t thread = current_thread(); int64_t t = dtrace_get_thread_tracing(thread); - if (t >= 0) { + if (t >= 0) { /* Usual case, accumulate time spent here into t_dtrace_tracing */ dtrace_set_thread_tracing(thread, t + (dtrace_gethrtime() - now)); - } else { + } else { /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */ - dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); + dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); } } -#endif /* __APPLE__ */ - - dtrace_interrupt_enable(cookie); -} - -#if defined(__APPLE__) -/* Don't allow a thread to re-enter dtrace_probe() */ -void -dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4) -{ - thread_t thread = current_thread(); - if (id == dtrace_probeid_error) { - __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); - dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */ - } else if (!dtrace_get_thread_reentering(thread)) { - dtrace_set_thread_reentering(thread, TRUE); - __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); - dtrace_set_thread_reentering(thread, FALSE); - } + dtrace_probe_exit(cookie); } -#endif /* __APPLE__ */ /* * DTrace Probe Hashing Functions @@ -6013,7 +7741,7 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * specified.) */ static uint_t -dtrace_hash_str(char *p) +dtrace_hash_str(const char *p) { unsigned int g; uint_t hval = 0; @@ -6027,12 +7755,33 @@ dtrace_hash_str(char *p) return (hval); } +static const char* +dtrace_strkey_probe_provider(void *elm, uintptr_t offs) +{ +#pragma unused(offs) + dtrace_probe_t *probe = (dtrace_probe_t*)elm; + return probe->dtpr_provider->dtpv_name; +} + +static const char* +dtrace_strkey_offset(void *elm, uintptr_t offs) +{ + return ((char *)((uintptr_t)(elm) + offs)); +} + +static const char* +dtrace_strkey_deref_offset(void *elm, uintptr_t offs) +{ + return *((char **)((uintptr_t)(elm) + offs)); +} + static dtrace_hash_t * -dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) +dtrace_hash_create(dtrace_strkey_f func, uintptr_t arg, uintptr_t nextoffs, uintptr_t prevoffs) { dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); - hash->dth_stroffs = stroffs; + hash->dth_getstr = func; + hash->dth_stroffs = arg; hash->dth_nextoffs = nextoffs; hash->dth_prevoffs = prevoffs; @@ -6045,11 +7794,16 @@ dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) return (hash); } -#if !defined(__APPLE__) /* Quiet compiler warning */ +/* + * APPLE NOTE: dtrace_hash_destroy is not used. + * It is called by dtrace_detach which is not + * currently implemented. Revisit someday. + */ +#if !defined(__APPLE__) static void dtrace_hash_destroy(dtrace_hash_t *hash) { -#ifdef DEBUG +#if DEBUG int i; for (i = 0; i < hash->dth_size; i++) @@ -6076,10 +7830,10 @@ dtrace_hash_resize(dtrace_hash_t *hash) for (i = 0; i < size; i++) { for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { - dtrace_probe_t *probe = bucket->dthb_chain; + void *elm = bucket->dthb_chain; - ASSERT(probe != NULL); - ndx = DTRACE_HASHSTR(hash, probe) & new_mask; + ASSERT(elm != NULL); + ndx = DTRACE_HASHSTR(hash, elm) & new_mask; next = bucket->dthb_next; bucket->dthb_next = new_tab[ndx]; @@ -6094,12 +7848,12 @@ dtrace_hash_resize(dtrace_hash_t *hash) } static void -dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) +dtrace_hash_add(dtrace_hash_t *hash, void *new) { int hashval = DTRACE_HASHSTR(hash, new); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; - dtrace_probe_t **nextp, **prevp; + void **nextp, **prevp; for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) @@ -6132,23 +7886,29 @@ add: bucket->dthb_len++; } -static dtrace_probe_t * -dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) +static void * +dtrace_hash_lookup_string(dtrace_hash_t *hash, const char *str) { - int hashval = DTRACE_HASHSTR(hash, template); + int hashval = dtrace_hash_str(str); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; for (; bucket != NULL; bucket = bucket->dthb_next) { - if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) + if (strcmp(str, DTRACE_GETSTR(hash, bucket->dthb_chain)) == 0) return (bucket->dthb_chain); } return (NULL); } +static dtrace_probe_t * +dtrace_hash_lookup(dtrace_hash_t *hash, void *template) +{ + return dtrace_hash_lookup_string(hash, DTRACE_GETSTR(hash, template)); +} + static int -dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) +dtrace_hash_collisions(dtrace_hash_t *hash, void *template) { int hashval = DTRACE_HASHSTR(hash, template); int ndx = hashval & hash->dth_mask; @@ -6159,23 +7919,23 @@ dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) return (bucket->dthb_len); } - return (NULL); + return (0); } static void -dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) +dtrace_hash_remove(dtrace_hash_t *hash, void *elm) { - int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; + int ndx = DTRACE_HASHSTR(hash, elm) & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; - dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); - dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); + void **prevp = DTRACE_HASHPREV(hash, elm); + void **nextp = DTRACE_HASHNEXT(hash, elm); /* - * Find the bucket that we're removing this probe from. + * Find the bucket that we're removing this elm from. */ for (; bucket != NULL; bucket = bucket->dthb_next) { - if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) + if (DTRACE_HASHEQ(hash, bucket->dthb_chain, elm)) break; } @@ -6184,12 +7944,12 @@ dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) if (*prevp == NULL) { if (*nextp == NULL) { /* - * The removed probe was the only probe on this + * The removed element was the only element on this * bucket; we need to remove the bucket. */ dtrace_hashbucket_t *b = hash->dth_tab[ndx]; - ASSERT(bucket->dthb_chain == probe); + ASSERT(bucket->dthb_chain == elm); ASSERT(b != NULL); if (b == bucket) { @@ -6229,18 +7989,63 @@ dtrace_badattr(const dtrace_attribute_t *a) } /* - * Return a duplicate copy of a string. If the specified string is NULL, - * this function returns a zero-length string. + * Returns a dtrace-managed copy of a string, and will + * deduplicate copies of the same string. + * If the specified string is NULL, returns an empty string */ static char * -dtrace_strdup(const char *str) +dtrace_strref(const char *str) { - char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); + dtrace_string_t *s = NULL; + size_t bufsize = (str != NULL ? strlen(str) : 0) + 1; - if (str != NULL) - (void) strcpy(new, str); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - return (new); + if (str == NULL) + str = ""; + + for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL; + s = *(DTRACE_HASHNEXT(dtrace_strings, s))) { + if (strncmp(str, s->dtst_str, bufsize) != 0) { + continue; + } + ASSERT(s->dtst_refcount != UINT32_MAX); + s->dtst_refcount++; + return s->dtst_str; + } + + s = kmem_zalloc(sizeof(dtrace_string_t) + bufsize, KM_SLEEP); + s->dtst_refcount = 1; + (void) strlcpy(s->dtst_str, str, bufsize); + + dtrace_hash_add(dtrace_strings, s); + + return s->dtst_str; +} + +static void +dtrace_strunref(const char *str) +{ + ASSERT(str != NULL); + dtrace_string_t *s = NULL; + size_t bufsize = strlen(str) + 1; + + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL; + s = *(DTRACE_HASHNEXT(dtrace_strings, s))) { + if (strncmp(str, s->dtst_str, bufsize) != 0) { + continue; + } + ASSERT(s->dtst_refcount != 0); + s->dtst_refcount--; + if (s->dtst_refcount == 0) { + dtrace_hash_remove(dtrace_strings, s); + kmem_free(s, sizeof(dtrace_string_t) + bufsize); + } + return; + } + panic("attempt to unref non-existent string %s", str); } #define DTRACE_ISALPHA(c) \ @@ -6272,10 +8077,14 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) uint32_t priv; if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { - /* - * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. - */ - priv = DTRACE_PRIV_ALL; + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + priv = DTRACE_PRIV_USER | DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER; + } + else { + priv = DTRACE_PRIV_ALL; + } + *uidp = 0; + *zoneidp = 0; } else { *uidp = crgetuid(cr); *zoneidp = crgetzoneid(cr); @@ -6300,16 +8109,12 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) static void dtrace_errdebug(const char *str) { - int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; + int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; int occupied = 0; lck_mtx_lock(&dtrace_errlock); dtrace_errlast = str; -#if !defined(__APPLE__) - dtrace_errthread = curthread; -#else - dtrace_errthread = current_thread(); -#endif /* __APPLE__ */ + dtrace_errthread = (kthread_t *)current_thread(); while (occupied++ < DTRACE_ERRHASHSZ) { if (dtrace_errhash[hval].dter_msg == str) { @@ -6504,12 +8309,12 @@ top: case '\\': if ((c = *p++) == '\0') return (0); - /*FALLTHRU*/ + OS_FALLTHROUGH; default: if (c != s1) return (0); - /*FALLTHRU*/ + OS_FALLTHROUGH; case '?': if (s1 != '\0') @@ -6536,14 +8341,35 @@ top: static int dtrace_match_string(const char *s, const char *p, int depth) { - return (s != NULL && strcmp(s, p) == 0); +#pragma unused(depth) /* __APPLE__ */ + return (s != NULL && s == p); +} + +/*ARGSUSED*/ +static int +dtrace_match_module(const char *s, const char *p, int depth) +{ +#pragma unused(depth) /* __APPLE__ */ + size_t len; + if (s == NULL || p == NULL) + return (0); + + len = strlen(p); + + if (strncmp(p, s, len) != 0) + return (0); + + if (s[len] == '.' || s[len] == '\0') + return (1); + + return (0); } /*ARGSUSED*/ static int dtrace_match_nul(const char *s, const char *p, int depth) { -#pragma unused(s,p,depth) +#pragma unused(s, p, depth) /* __APPLE__ */ return (1); /* always match the empty pattern */ } @@ -6551,20 +8377,31 @@ dtrace_match_nul(const char *s, const char *p, int depth) static int dtrace_match_nonzero(const char *s, const char *p, int depth) { -#pragma unused(p,depth) +#pragma unused(p, depth) /* __APPLE__ */ return (s != NULL && s[0] != '\0'); } static int dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, - zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) + zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *, void *), void *arg1, void *arg2) { - dtrace_probe_t template, *probe; + dtrace_probe_t *probe; + dtrace_provider_t prov_template = { + .dtpv_name = (char *)(uintptr_t)pkp->dtpk_prov + }; + + dtrace_probe_t template = { + .dtpr_provider = &prov_template, + .dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod, + .dtpr_func = (char *)(uintptr_t)pkp->dtpk_func, + .dtpr_name = (char *)(uintptr_t)pkp->dtpk_name + }; + dtrace_hash_t *hash = NULL; - int len, best = INT_MAX, nmatched = 0; + int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); /* * If the probe ID is specified in the key, just lookup by ID and @@ -6573,22 +8410,26 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - (void) (*matched)(probe, arg); + if ((*matched)(probe, arg1, arg2) == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); } - template.dtpr_mod = (char *)pkp->dtpk_mod; - template.dtpr_func = (char *)pkp->dtpk_func; - template.dtpr_name = (char *)pkp->dtpk_name; - /* - * We want to find the most distinct of the module name, function - * name, and name. So for each one that is not a glob pattern or - * empty string, we perform a lookup in the corresponding hash and - * use the hash table with the fewest collisions to do our search. + * We want to find the most distinct of the provider name, module name, + * function name, and name. So for each one that is not a glob + * pattern or empty string, we perform a lookup in the corresponding + * hash and use the hash table with the fewest collisions to do our + * search. */ + if (pkp->dtpk_pmatch == &dtrace_match_string && + (len = dtrace_hash_collisions(dtrace_byprov, &template)) < best) { + best = len; + hash = dtrace_byprov; + } + if (pkp->dtpk_mmatch == &dtrace_match_string && (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { best = len; @@ -6612,7 +8453,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, * invoke our callback for each one that matches our input probe key. */ if (hash == NULL) { - for (i = 0; i < dtrace_nprobes; i++) { + for (i = 0; i < (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL || dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) @@ -6620,8 +8461,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) - break; + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); + break; + } } return (nmatched); @@ -6640,8 +8484,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) - break; + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); + break; + } } return (nmatched); @@ -6669,6 +8516,24 @@ dtrace_probekey_func(const char *p) return (&dtrace_match_string); } +static dtrace_probekey_f * +dtrace_probekey_module_func(const char *p) +{ + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + dtrace_probekey_f *f = dtrace_probekey_func(p); + if (f == &dtrace_match_string) { + dtrace_probe_t template = { + .dtpr_mod = (char *)(uintptr_t)p, + }; + if (dtrace_hash_lookup(dtrace_bymod, &template) == NULL) { + return (&dtrace_match_module); + } + return (&dtrace_match_string); + } + return f; +} + /* * Build a probe comparison key for use with dtrace_match_probe() from the * given probe description. By convention, a null key only matches anchored @@ -6678,16 +8543,17 @@ dtrace_probekey_func(const char *p) static void dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) { - pkp->dtpk_prov = pdp->dtpd_provider; + + pkp->dtpk_prov = dtrace_strref(pdp->dtpd_provider); pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); - pkp->dtpk_mod = pdp->dtpd_mod; - pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); + pkp->dtpk_mod = dtrace_strref(pdp->dtpd_mod); + pkp->dtpk_mmatch = dtrace_probekey_module_func(pdp->dtpd_mod); - pkp->dtpk_func = pdp->dtpd_func; + pkp->dtpk_func = dtrace_strref(pdp->dtpd_func); pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); - pkp->dtpk_name = pdp->dtpd_name; + pkp->dtpk_name = dtrace_strref(pdp->dtpd_name); pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); pkp->dtpk_id = pdp->dtpd_id; @@ -6700,6 +8566,26 @@ dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) pkp->dtpk_fmatch = &dtrace_match_nonzero; } +static void +dtrace_probekey_release(dtrace_probekey_t *pkp) +{ + dtrace_strunref(pkp->dtpk_prov); + dtrace_strunref(pkp->dtpk_mod); + dtrace_strunref(pkp->dtpk_func); + dtrace_strunref(pkp->dtpk_name); +} + +static int +dtrace_cond_provider_match(dtrace_probedesc_t *desc, void *data) +{ + if (desc == NULL) + return 1; + + dtrace_probekey_f *func = dtrace_probekey_func(desc->dtpd_provider); + + return func((char*)data, desc->dtpd_provider, 0); +} + /* * DTrace Provider-to-Framework API Functions * @@ -6765,8 +8651,6 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, } provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); - provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); - (void) strcpy(provider->dtpv_name, name); provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; @@ -6778,30 +8662,30 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, if (pops->dtps_provide == NULL) { ASSERT(pops->dtps_provide_module != NULL); - provider->dtpv_pops.dtps_provide = - (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop; + provider->dtpv_pops.dtps_provide = dtrace_provide_nullop; } if (pops->dtps_provide_module == NULL) { ASSERT(pops->dtps_provide != NULL); provider->dtpv_pops.dtps_provide_module = - (void (*)(void *, struct modctl *))dtrace_nullop; + dtrace_provide_module_nullop; } if (pops->dtps_suspend == NULL) { ASSERT(pops->dtps_resume == NULL); - provider->dtpv_pops.dtps_suspend = - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; - provider->dtpv_pops.dtps_resume = - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; + provider->dtpv_pops.dtps_suspend = dtrace_suspend_nullop; + provider->dtpv_pops.dtps_resume = dtrace_resume_nullop; } provider->dtpv_arg = arg; *idp = (dtrace_provider_id_t)provider; if (pops == &dtrace_provider_ops) { - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + provider->dtpv_name = dtrace_strref(name); + ASSERT(dtrace_anon.dta_enabling == NULL); /* @@ -6816,6 +8700,8 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); + provider->dtpv_name = dtrace_strref(name); + /* * If there is at least one provider registered, we'll add this * provider after the first provider. @@ -6831,13 +8717,16 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, dtrace_enabling_provide(provider); /* - * Now we need to call dtrace_enabling_matchall() -- which - * will acquire cpu_lock and dtrace_lock. We therefore need + * Now we need to call dtrace_enabling_matchall_with_cond() -- + * with a condition matching the provider name we just added, + * which will acquire cpu_lock and dtrace_lock. We therefore need * to drop all of our locks before calling into it... */ lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); - dtrace_enabling_matchall(); + + dtrace_match_cond_t cond = {dtrace_cond_provider_match, provider->dtpv_name}; + dtrace_enabling_matchall_with_cond(&cond); return (0); } @@ -6857,20 +8746,22 @@ dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; - int i, self = 0; - dtrace_probe_t *probe, *first = NULL; + int self = 0; + dtrace_probe_t *probe, *first = NULL, *next = NULL; + dtrace_probe_t template = { + .dtpr_provider = old + }; if (old->dtpv_pops.dtps_enable == - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { /* * If DTrace itself is the provider, we're called with locks * already held. */ ASSERT(old == dtrace_provider); ASSERT(dtrace_devi != NULL); - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); self = 1; if (dtrace_provider->dtpv_next != NULL) { @@ -6904,16 +8795,7 @@ dtrace_unregister(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - - if (probe->dtpr_provider != old) - continue; - - if (probe->dtpr_ecb == NULL) - continue; - + if (old->dtpv_ecb_count!=0) { /* * We have at least one ECB; we can't remove this provider. */ @@ -6929,14 +8811,13 @@ dtrace_unregister(dtrace_provider_id_t id) * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - + for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL; + probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) { if (probe->dtpr_provider != old) continue; - dtrace_probes[i] = NULL; + dtrace_probes[probe->dtpr_id - 1] = NULL; + old->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -6946,11 +8827,19 @@ dtrace_unregister(dtrace_provider_id_t id) first = probe; probe->dtpr_nextmod = NULL; } else { + /* + * Use nextmod as the chain of probes to remove + */ probe->dtpr_nextmod = first; first = probe; } } + for (probe = first; probe != NULL; probe = next) { + next = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); + } + /* * The provider's probes have been removed from the hash chains and * from the probe array. Now issue a dtrace_sync() to be sure that @@ -6958,20 +8847,16 @@ dtrace_unregister(dtrace_provider_id_t id) */ dtrace_sync(); - for (probe = first; probe != NULL; probe = first) { - first = probe->dtpr_nextmod; + for (probe = first; probe != NULL; probe = next) { + next = probe->dtpr_nextmod; old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else zfree(dtrace_probe_t_zone, probe); -#endif } if ((prev = dtrace_provider) == old) { @@ -6990,13 +8875,14 @@ dtrace_unregister(dtrace_provider_id_t id) prev->dtpv_next = old->dtpv_next; } + dtrace_strunref(old->dtpv_name); + if (!self) { lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); } - kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); kmem_free(old, sizeof (dtrace_provider_t)); return (0); @@ -7012,7 +8898,7 @@ dtrace_invalidate(dtrace_provider_id_t id) dtrace_provider_t *pvp = (dtrace_provider_t *)id; ASSERT(pvp->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); @@ -7046,14 +8932,16 @@ int dtrace_condense(dtrace_provider_id_t id) { dtrace_provider_t *prov = (dtrace_provider_t *)id; - int i; - dtrace_probe_t *probe; + dtrace_probe_t *probe, *first = NULL; + dtrace_probe_t template = { + .dtpr_provider = prov + }; /* * Make sure this isn't the dtrace provider itself. */ ASSERT(prov->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); @@ -7061,9 +8949,8 @@ dtrace_condense(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; + for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL; + probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) { if (probe->dtpr_provider != prov) continue; @@ -7071,23 +8958,35 @@ dtrace_condense(dtrace_provider_id_t id) if (probe->dtpr_ecb != NULL) continue; - dtrace_probes[i] = NULL; + dtrace_probes[probe->dtpr_id - 1] = NULL; + prov->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); - prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, + prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); + if (first == NULL) { + first = probe; + probe->dtpr_nextmod = NULL; + } else { + /* + * Use nextmod as the chain of probes to remove + */ + probe->dtpr_nextmod = first; + first = probe; + } + } + + for (probe = first; probe != NULL; probe = first) { + first = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); + vmem_free(dtrace_arena, (void *)((uintptr_t)probe->dtpr_id), 1); zfree(dtrace_probe_t_zone, probe); -#endif - vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); } lck_mtx_unlock(&dtrace_lock); @@ -7118,71 +9017,58 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_id_t id; if (provider == dtrace_provider) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); } else { lck_mtx_lock(&dtrace_lock); } id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, VM_BESTFIT | VM_SLEEP); -#if !defined(__APPLE__) - probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); -#else + probe = zalloc(dtrace_probe_t_zone); bzero(probe, sizeof (dtrace_probe_t)); -#endif probe->dtpr_id = id; probe->dtpr_gen = dtrace_probegen++; - probe->dtpr_mod = dtrace_strdup(mod); - probe->dtpr_func = dtrace_strdup(func); - probe->dtpr_name = dtrace_strdup(name); + probe->dtpr_mod = dtrace_strref(mod); + probe->dtpr_func = dtrace_strref(func); + probe->dtpr_name = dtrace_strref(name); probe->dtpr_arg = arg; probe->dtpr_aframes = aframes; probe->dtpr_provider = provider; + dtrace_hash_add(dtrace_byprov, probe); dtrace_hash_add(dtrace_bymod, probe); dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); - if (id - 1 >= dtrace_nprobes) { + if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); - size_t nsize = osize << 1; - - if (nsize == 0) { - ASSERT(osize == 0); - ASSERT(dtrace_probes == NULL); - nsize = sizeof (dtrace_probe_t *); - } + size_t nsize = osize * 2; probes = kmem_zalloc(nsize, KM_SLEEP); - if (dtrace_probes == NULL) { - ASSERT(osize == 0); - dtrace_probes = probes; - dtrace_nprobes = 1; - } else { - dtrace_probe_t **oprobes = dtrace_probes; + dtrace_probe_t **oprobes = dtrace_probes; - bcopy(oprobes, probes, osize); - dtrace_membar_producer(); - dtrace_probes = probes; + bcopy(oprobes, probes, osize); + dtrace_membar_producer(); + dtrace_probes = probes; - dtrace_sync(); + dtrace_sync(); - /* - * All CPUs are now seeing the new probes array; we can - * safely free the old array. - */ - kmem_free(oprobes, osize); - dtrace_nprobes <<= 1; - } + /* + * All CPUs are now seeing the new probes array; we can + * safely free the old array. + */ + kmem_free(oprobes, osize); + dtrace_nprobes *= 2; - ASSERT(id - 1 < dtrace_nprobes); + ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes); } ASSERT(dtrace_probes[id - 1] == NULL); dtrace_probes[id - 1] = probe; + provider->dtpv_probe_count++; if (provider != dtrace_provider) lck_mtx_unlock(&dtrace_lock); @@ -7193,18 +9079,19 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, static dtrace_probe_t * dtrace_probe_lookup_id(dtrace_id_t id) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (id == 0 || id > dtrace_nprobes) + if (id == 0 || id > (dtrace_id_t)dtrace_nprobes) return (NULL); return (dtrace_probes[id - 1]); } static int -dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) +dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg1, void *arg2) { - *((dtrace_id_t *)arg) = probe->dtpr_id; +#pragma unused(arg2) + *((dtrace_id_t *)arg1) = probe->dtpr_id; return (DTRACE_MATCH_DONE); } @@ -7221,19 +9108,23 @@ dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, dtrace_id_t id; int match; - pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; + lck_mtx_lock(&dtrace_lock); + + pkey.dtpk_prov = dtrace_strref(((dtrace_provider_t *)prid)->dtpv_name); pkey.dtpk_pmatch = &dtrace_match_string; - pkey.dtpk_mod = mod; + pkey.dtpk_mod = dtrace_strref(mod); pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; - pkey.dtpk_func = func; + pkey.dtpk_func = dtrace_strref(func); pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; - pkey.dtpk_name = name; + pkey.dtpk_name = dtrace_strref(name); pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; pkey.dtpk_id = DTRACE_IDNONE; - lck_mtx_lock(&dtrace_lock); match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, - dtrace_probe_lookup_match, &id); + dtrace_probe_lookup_match, &id, NULL); + + dtrace_probekey_release(&pkey); + lck_mtx_unlock(&dtrace_lock); ASSERT(match == 1 || match == 0); @@ -7269,6 +9160,7 @@ dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) bzero(pdp, sizeof (dtrace_probedesc_t)); pdp->dtpd_id = prp->dtpr_id; + /* APPLE NOTE: Darwin employs size bounded string operation. */ (void) strlcpy(pdp->dtpd_provider, prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN); @@ -7298,20 +9190,19 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) struct modctl *ctl; int all = 0; - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); if (prv == NULL) { all = 1; prv = dtrace_provider; } - + do { /* * First, call the blanket provide operation. */ prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); - -#if !defined(__APPLE__) + /* * Now call the per-module provide operation. We will grab * mod_lock to prevent the list from being modified. Note @@ -7319,37 +9210,14 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) * (mod_busy can only be changed with mod_lock held.) */ lck_mtx_lock(&mod_lock); - - ctl = &modules; - do { - if (ctl->mod_busy || ctl->mod_mp == NULL) - continue; - - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - - } while ((ctl = ctl->mod_next) != &modules); - - lck_mtx_unlock(&mod_lock); -#else -#if 0 /* FIXME: Workaround for PR_4643546 */ - simple_lock(&kmod_lock); - kmod_info_t *ktl = kmod; - while (ktl) { - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ktl); - ktl = ktl->next; + ctl = dtrace_modctl_list; + while (ctl) { + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + ctl = ctl->mod_next; } - simple_unlock(&kmod_lock); -#else - /* - * Don't bother to iterate over the kmod list. At present only fbt - * offers a provide_module in its dtpv_pops, and then it ignores the - * module anyway. - */ - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, NULL); -#endif -#endif /* __APPLE__ */ + lck_mtx_unlock(&mod_lock); } while (all && (prv = prv->dtpv_next) != NULL); } @@ -7395,14 +9263,15 @@ dtrace_probe_foreach(uintptr_t offs) } static int -dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) +dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab, dtrace_ecbdesc_t *ep) { dtrace_probekey_t pkey; uint32_t priv; uid_t uid; zoneid_t zoneid; + int err; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); dtrace_ecb_create_cache = NULL; @@ -7411,7 +9280,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) * If we're passed a NULL description, we're being asked to * create an ECB with a NULL probe. */ - (void) dtrace_ecb_create_enable(NULL, enab); + (void) dtrace_ecb_create_enable(NULL, enab, ep); return (0); } @@ -7419,8 +9288,11 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, &priv, &uid, &zoneid); - return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, - enab)); + err = dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, enab, ep); + + dtrace_probekey_release(&pkey); + + return err; } /* @@ -7452,7 +9324,7 @@ dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, } static void -dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -7501,7 +9373,7 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) + if ((parg = mops->dtms_provide_proc(meta->dtm_arg, &dhpv, p)) == NULL) return; meta->dtm_count++; @@ -7516,15 +9388,15 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) dhpb.dthpb_mod = dhp->dofhp_mod; dhpb.dthpb_func = strtab + probe->dofpr_func; dhpb.dthpb_name = strtab + probe->dofpr_name; -#if defined(__APPLE__) - dhpb.dthpb_base = dhp->dofhp_addr; -#else +#if !defined(__APPLE__) dhpb.dthpb_base = probe->dofpr_addr; +#else + dhpb.dthpb_base = dhp->dofhp_addr; /* FIXME: James, why? */ #endif - dhpb.dthpb_offs = off + probe->dofpr_offidx; + dhpb.dthpb_offs = (int32_t *)(off + probe->dofpr_offidx); dhpb.dthpb_noffs = probe->dofpr_noffs; if (enoff != NULL) { - dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; + dhpb.dthpb_enoffs = (int32_t *)(enoff + probe->dofpr_enoffidx); dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; } else { dhpb.dthpb_enoffs = NULL; @@ -7538,16 +9410,27 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); } + + /* + * Since we just created probes, we need to match our enablings + * against those, with a precondition knowing that we have only + * added probes from this provider + */ + char *prov_name = mops->dtms_provider_name(parg); + ASSERT(prov_name != NULL); + dtrace_match_cond_t cond = {dtrace_cond_provider_match, (void*)prov_name}; + + dtrace_enabling_matchall_with_cond(&cond); } static void -dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provide(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; - int i; + uint32_t i; - lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + @@ -7556,21 +9439,12 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provide_one(dhp, sec, pid); + dtrace_helper_provide_one(dhp, sec, p); } - - /* - * We may have just created probes, so we must now rematch against - * any retained enablings. Note that this call will acquire both - * cpu_lock and dtrace_lock; the fact that we are holding - * dtrace_meta_lock now is what defines the ordering with respect to - * these three locks. - */ - dtrace_enabling_matchall(); } static void -dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -7592,19 +9466,19 @@ dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); + mops->dtms_remove_proc(meta->dtm_arg, &dhpv, p); meta->dtm_count--; } static void -dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provider_remove(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; - int i; + uint32_t i; - lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + @@ -7613,7 +9487,7 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provider_remove_one(dhp, sec, pid); + dtrace_helper_provider_remove_one(dhp, sec, p); } } @@ -7629,7 +9503,7 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, { dtrace_meta_t *meta; dtrace_helpers_t *help, *next; - int i; + uint_t i; *idp = DTRACE_METAPROVNONE; @@ -7645,8 +9519,8 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, if (mops == NULL || mops->dtms_create_probe == NULL || - mops->dtms_provide_pid == NULL || - mops->dtms_remove_pid == NULL) { + mops->dtms_provide_proc == NULL || + mops->dtms_remove_proc == NULL) { cmn_err(CE_WARN, "failed to register meta-register %s: " "invalid ops", name); return (EINVAL); @@ -7654,8 +9528,6 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; - meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); - (void) strcpy(meta->dtm_name, name); meta->dtm_arg = arg; lck_mtx_lock(&dtrace_meta_lock); @@ -7666,11 +9538,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, lck_mtx_unlock(&dtrace_meta_lock); cmn_err(CE_WARN, "failed to register meta-register %s: " "user-land meta-provider exists", name); - kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); kmem_free(meta, sizeof (dtrace_meta_t)); return (EINVAL); } + meta->dtm_name = dtrace_strref(name); + dtrace_meta_pid = meta; *idp = (dtrace_meta_provider_id_t)meta; @@ -7686,8 +9559,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, while (help != NULL) { for (i = 0; i < help->dthps_nprovs; i++) { + proc_t *p = proc_find(help->dthps_pid); + if (p == PROC_NULL) + continue; dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - help->dthps_pid); + p); + proc_rele(p); } next = help->dthps_next; @@ -7725,10 +9602,11 @@ dtrace_meta_unregister(dtrace_meta_provider_id_t id) *pp = NULL; + dtrace_strunref(old->dtm_name); + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_meta_lock); - kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); kmem_free(old, sizeof (dtrace_meta_t)); return (0); @@ -7771,13 +9649,16 @@ static int dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, cred_t *cr) { - int err = 0, i; + int err = 0; + uint_t i; + int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; - int kcheck; + int kcheckload; uint_t pc; + int maxglobal = -1, maxlocal = -1, maxtlocal = -1; - kcheck = cr == NULL || - PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE) == 0; + kcheckload = cr == NULL || + (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; dp->dtdo_destructive = 0; @@ -7816,7 +9697,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_NOT: case DIF_OP_MOV: @@ -7828,7 +9709,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_LDSB: case DIF_OP_LDSH: @@ -7844,8 +9725,8 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); - if (kcheck) + err += efunc(pc, "cannot write to %%r0\n"); + if (kcheckload) dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); break; @@ -7863,7 +9744,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_ULDSB: case DIF_OP_ULDSH: @@ -7879,7 +9760,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_STB: case DIF_OP_STH: @@ -7949,7 +9830,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_SETS: if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { @@ -7959,7 +9840,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_LDGA: case DIF_OP_LDTA: @@ -7970,7 +9851,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_LDGS: case DIF_OP_LDTS: @@ -7982,7 +9863,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_STGS: case DIF_OP_STTS: @@ -7995,16 +9876,25 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "invalid register %u\n", rd); break; case DIF_OP_CALL: - if (subr > DIF_SUBR_MAX) + if (subr > DIF_SUBR_MAX && + !(subr >= DIF_SUBR_APPLE_MIN && subr <= DIF_SUBR_APPLE_MAX)) err += efunc(pc, "invalid subr %u\n", subr); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); - - if (subr == DIF_SUBR_COPYOUT || - subr == DIF_SUBR_COPYOUTSTR) { + err += efunc(pc, "cannot write to %%r0\n"); + + switch (subr) { + case DIF_SUBR_COPYOUT: + case DIF_SUBR_COPYOUTSTR: + case DIF_SUBR_KDEBUG_TRACE: + case DIF_SUBR_KDEBUG_TRACE_STRING: + case DIF_SUBR_PHYSMEM_READ: + case DIF_SUBR_PHYSMEM_WRITE: dp->dtdo_destructive = 1; + break; + default: + break; } break; case DIF_OP_PUSHTR: @@ -8023,6 +9913,16 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rs >= nregs) err += efunc(pc, "invalid register %u\n", rs); break; + case DIF_OP_STRIP: + if (r1 >= nregs) + err += efunc(pc, "invalid register %u\n", r1); + if (!dtrace_is_valid_ptrauth_key(r2)) + err += efunc(pc, "invalid key\n"); + if (rd >= nregs) + err += efunc(pc, "invalid register %u\n", rd); + if (rd == 0) + err += efunc(pc, "cannot write to %%r0\n"); + break; default: err += efunc(pc, "invalid opcode %u\n", DIF_INSTR_OP(instr)); @@ -8035,7 +9935,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, "expected 'ret' as last DIF instruction\n"); } - if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { + if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { /* * If we're not returning by reference, the size must be either * 0 or the size of one of the base types. @@ -8049,14 +9949,15 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; default: - err += efunc(dp->dtdo_len - 1, "bad return size"); + err += efunc(dp->dtdo_len - 1, "bad return size\n"); } } for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; dtrace_diftype_t *vt, *et; - uint_t id, ndx; + uint_t id; + int ndx; if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && v->dtdv_scope != DIFV_SCOPE_THREAD && @@ -8090,6 +9991,9 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, switch (v->dtdv_scope) { case DIFV_SCOPE_GLOBAL: + if (maxglobal == -1 || ndx > maxglobal) + maxglobal = ndx; + if (ndx < vstate->dtvs_nglobals) { dtrace_statvar_t *svar; @@ -8100,11 +10004,16 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; case DIFV_SCOPE_THREAD: + if (maxtlocal == -1 || ndx > maxtlocal) + maxtlocal = ndx; + if (ndx < vstate->dtvs_ntlocals) existing = &vstate->dtvs_tlocals[ndx]; break; case DIFV_SCOPE_LOCAL: + if (maxlocal == -1 || ndx > maxlocal) + maxlocal = ndx; if (ndx < vstate->dtvs_nlocals) { dtrace_statvar_t *svar; @@ -8123,9 +10032,10 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; } - if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && - vt->dtdt_size > dtrace_global_maxsize) { - err += efunc(i, "oversized by-ref global\n"); + if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL || + v->dtdv_scope == DIFV_SCOPE_LOCAL) && + vt->dtdt_size > dtrace_statvar_maxsize) { + err += efunc(i, "oversized by-ref static\n"); break; } } @@ -8152,6 +10062,37 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, } } + for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { + dif_instr_t instr = dp->dtdo_buf[pc]; + + uint_t v = DIF_INSTR_VAR(instr); + uint_t op = DIF_INSTR_OP(instr); + + switch (op) { + case DIF_OP_LDGS: + case DIF_OP_LDGAA: + case DIF_OP_STGS: + case DIF_OP_STGAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxglobal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDTS: + case DIF_OP_LDTAA: + case DIF_OP_STTS: + case DIF_OP_STTAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxtlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDLS: + case DIF_OP_STLS: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + default: + break; + } + } + return (err); } @@ -8275,22 +10216,34 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) break; case DIF_OP_CALL: - if (subr == DIF_SUBR_ALLOCA || - subr == DIF_SUBR_BCOPY || - subr == DIF_SUBR_COPYIN || - subr == DIF_SUBR_COPYINTO || - subr == DIF_SUBR_COPYINSTR || - subr == DIF_SUBR_INDEX || - subr == DIF_SUBR_LLTOSTR || - subr == DIF_SUBR_RINDEX || - subr == DIF_SUBR_STRCHR || - subr == DIF_SUBR_STRJOIN || - subr == DIF_SUBR_STRRCHR || - subr == DIF_SUBR_STRSTR || - subr == DIF_SUBR_CHUD) + switch (subr) { + case DIF_SUBR_ALLOCA: + case DIF_SUBR_BCOPY: + case DIF_SUBR_COPYIN: + case DIF_SUBR_COPYINTO: + case DIF_SUBR_COPYINSTR: + case DIF_SUBR_HTONS: + case DIF_SUBR_HTONL: + case DIF_SUBR_HTONLL: + case DIF_SUBR_INDEX: + case DIF_SUBR_INET_NTOA: + case DIF_SUBR_INET_NTOA6: + case DIF_SUBR_INET_NTOP: + case DIF_SUBR_JSON: + case DIF_SUBR_LLTOSTR: + case DIF_SUBR_NTOHS: + case DIF_SUBR_NTOHL: + case DIF_SUBR_NTOHLL: + case DIF_SUBR_RINDEX: + case DIF_SUBR_STRCHR: + case DIF_SUBR_STRTOLL: + case DIF_SUBR_STRJOIN: + case DIF_SUBR_STRRCHR: + case DIF_SUBR_STRSTR: break; - - err += efunc(pc, "invalid subr %u\n", subr); + default: + err += efunc(pc, "invalid subr %u\n", subr); + } break; default: @@ -8309,7 +10262,7 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) static int dtrace_difo_cacheable(dtrace_difo_t *dp) { - int i; + uint_t i; if (dp == NULL) return (0); @@ -8354,9 +10307,9 @@ dtrace_difo_cacheable(dtrace_difo_t *dp) static void dtrace_difo_hold(dtrace_difo_t *dp) { - int i; + uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); dp->dtdo_refcnt++; ASSERT(dp->dtdo_refcnt != 0); @@ -8386,7 +10339,7 @@ dtrace_difo_hold(dtrace_difo_t *dp) static void dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - uint64_t sval; + uint64_t sval = 0; dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ const dif_instr_t *text = dp->dtdo_buf; uint_t pc, srd = 0; @@ -8452,6 +10405,9 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) if (srd == 0) return; + if (sval > LONG_MAX) + return; + tupregs[ttop++].dttk_size = sval; } @@ -8513,6 +10469,19 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) */ size = P2ROUNDUP(size, sizeof (uint64_t)); + /* + * Before setting the chunk size, check that we're not going + * to set it to a negative value... + */ + if (size > LONG_MAX) + return; + + /* + * ...and make certain that we didn't badly overflow. + */ + if (size < ksize || size < sizeof (dtrace_dynvar_t)) + return; + if (size > vstate->dtvs_dynvars.dtds_chunksize) vstate->dtvs_dynvars.dtds_chunksize = size; } @@ -8521,18 +10490,19 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - int i, oldsvars, osz, nsz, otlocals, ntlocals; - uint_t id; + int oldsvars, osz, nsz, otlocals, ntlocals; + uint_t i, id; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; - dtrace_statvar_t *svar, ***svarp; + dtrace_statvar_t *svar; + dtrace_statvar_t ***svarp = NULL; size_t dsize = 0; uint8_t scope = v->dtdv_scope; - int *np; + int *np = (int *)NULL; if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) continue; @@ -8541,7 +10511,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) switch (scope) { case DIFV_SCOPE_THREAD: - while (id >= (otlocals = vstate->dtvs_ntlocals)) { + while (id >= (uint_t)(otlocals = vstate->dtvs_ntlocals)) { dtrace_difv_t *tlocals; if ((ntlocals = (otlocals << 1)) == 0) @@ -8591,7 +10561,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) ASSERT(0); } - while (id >= (oldsvars = *np)) { + while (id >= (uint_t)(oldsvars = *np)) { dtrace_statvar_t **statics; int newsvars, oldsize, newsize; @@ -8678,16 +10648,17 @@ dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - int i; + uint_t i; ASSERT(dp->dtdo_refcnt == 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; - dtrace_statvar_t *svar, **svarp; + dtrace_statvar_t *svar; + dtrace_statvar_t **svarp = NULL; uint_t id; uint8_t scope = v->dtdv_scope; - int *np; + int *np = NULL; switch (scope) { case DIFV_SCOPE_THREAD: @@ -8711,7 +10682,8 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) continue; id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < *np); + + ASSERT(id < (uint_t)*np); svar = svarp[id]; ASSERT(svar != NULL); @@ -8721,7 +10693,7 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) continue; if (svar->dtsv_size != 0) { - ASSERT(svar->dtsv_data != NULL); + ASSERT(svar->dtsv_data != 0); kmem_free((void *)(uintptr_t)svar->dtsv_data, svar->dtsv_size); } @@ -8741,9 +10713,9 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - int i; + uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); for (i = 0; i < dp->dtdo_varlen; i++) { @@ -8764,18 +10736,35 @@ dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) /* * DTrace Format Functions */ + +static dtrace_format_t* +dtrace_format_new(char *str) +{ + dtrace_format_t *fmt = NULL; + size_t bufsize = strlen(str) + 1; + + fmt = kmem_zalloc(sizeof(*fmt) + bufsize, KM_SLEEP); + + fmt->dtf_refcount = 1; + (void) strlcpy(fmt->dtf_str, str, bufsize); + + return fmt; +} + static uint16_t dtrace_format_add(dtrace_state_t *state, char *str) { - char *fmt, **new; - uint16_t ndx, len = strlen(str) + 1; - - fmt = kmem_zalloc(len, KM_SLEEP); - bcopy(str, fmt, len); + dtrace_format_t **new; + uint16_t ndx; for (ndx = 0; ndx < state->dts_nformats; ndx++) { if (state->dts_formats[ndx] == NULL) { - state->dts_formats[ndx] = fmt; + state->dts_formats[ndx] = dtrace_format_new(str); + return (ndx + 1); + } + else if (strcmp(state->dts_formats[ndx]->dtf_str, str) == 0) { + VERIFY(state->dts_formats[ndx]->dtf_refcount < UINT64_MAX); + state->dts_formats[ndx]->dtf_refcount++; return (ndx + 1); } } @@ -8785,7 +10774,6 @@ dtrace_format_add(dtrace_state_t *state, char *str) * This is only likely if a denial-of-service attack is being * attempted. As such, it's okay to fail silently here. */ - kmem_free(fmt, len); return (0); } @@ -8794,16 +10782,16 @@ dtrace_format_add(dtrace_state_t *state, char *str) * number of formats. */ ndx = state->dts_nformats++; - new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); + new = kmem_alloc((ndx + 1) * sizeof (*state->dts_formats), KM_SLEEP); if (state->dts_formats != NULL) { ASSERT(ndx != 0); - bcopy(state->dts_formats, new, ndx * sizeof (char *)); - kmem_free(state->dts_formats, ndx * sizeof (char *)); + bcopy(state->dts_formats, new, ndx * sizeof (*state->dts_formats)); + kmem_free(state->dts_formats, ndx * sizeof (*state->dts_formats)); } state->dts_formats = new; - state->dts_formats[ndx] = fmt; + state->dts_formats[ndx] = dtrace_format_new(str); return (ndx + 1); } @@ -8811,15 +10799,22 @@ dtrace_format_add(dtrace_state_t *state, char *str) static void dtrace_format_remove(dtrace_state_t *state, uint16_t format) { - char *fmt; + dtrace_format_t *fmt; ASSERT(state->dts_formats != NULL); ASSERT(format <= state->dts_nformats); - ASSERT(state->dts_formats[format - 1] != NULL); fmt = state->dts_formats[format - 1]; - kmem_free(fmt, strlen(fmt) + 1); - state->dts_formats[format - 1] = NULL; + + ASSERT(fmt != NULL); + VERIFY(fmt->dtf_refcount > 0); + + fmt->dtf_refcount--; + + if (fmt->dtf_refcount == 0) { + kmem_free(fmt, DTRACE_FORMAT_SIZE(fmt)); + state->dts_formats[format - 1] = NULL; + } } static void @@ -8835,15 +10830,15 @@ dtrace_format_destroy(dtrace_state_t *state) ASSERT(state->dts_formats != NULL); for (i = 0; i < state->dts_nformats; i++) { - char *fmt = state->dts_formats[i]; + dtrace_format_t *fmt = state->dts_formats[i]; if (fmt == NULL) continue; - kmem_free(fmt, strlen(fmt) + 1); + kmem_free(fmt, DTRACE_FORMAT_SIZE(fmt)); } - kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); + kmem_free(state->dts_formats, state->dts_nformats * sizeof (*state->dts_formats)); state->dts_nformats = 0; state->dts_formats = NULL; } @@ -8856,7 +10851,7 @@ dtrace_predicate_create(dtrace_difo_t *dp) { dtrace_predicate_t *pred; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); @@ -8886,7 +10881,7 @@ dtrace_predicate_create(dtrace_difo_t *dp) static void dtrace_predicate_hold(dtrace_predicate_t *pred) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); @@ -8897,8 +10892,9 @@ static void dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) { dtrace_difo_t *dp = pred->dtp_difo; +#pragma unused(dp) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp != NULL && dp->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); @@ -8917,8 +10913,8 @@ dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, { dtrace_actdesc_t *act; -/* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && - arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/ + ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != 0 && + arg >= KERNELBASE) || (arg == 0 && kind == DTRACEACT_PRINTA)); act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); act->dtad_kind = kind; @@ -8954,8 +10950,8 @@ dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) if (DTRACEACT_ISPRINTFLIKE(kind)) { char *str = (char *)(uintptr_t)act->dtad_arg; -/* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || - (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/ + ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || + (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); if (str != NULL) kmem_free(str, strlen(str) + 1); @@ -8973,7 +10969,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) dtrace_ecb_t *ecb; dtrace_epid_t epid; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); ecb->dte_predicate = NULL; @@ -8981,18 +10977,18 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) /* * The default size is the size of the default action: recording - * the epid. + * the header. */ - ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); + ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t); ecb->dte_alignment = sizeof (dtrace_epid_t); epid = state->dts_epid++; - if (epid - 1 >= state->dts_necbs) { + if (epid - 1 >= (dtrace_epid_t)state->dts_necbs) { dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; int necbs = state->dts_necbs << 1; - ASSERT(epid == state->dts_necbs + 1); + ASSERT(epid == (dtrace_epid_t)state->dts_necbs + 1); if (necbs == 0) { ASSERT(oecbs == NULL); @@ -9034,22 +11030,23 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) return (ecb); } -static void +static int dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_next == NULL); if (probe == NULL) { /* * This is the NULL probe -- there's nothing to do. */ - return; + return(0); } + probe->dtpr_provider->dtpv_ecb_count++; if (probe->dtpr_ecb == NULL) { dtrace_provider_t *prov = probe->dtpr_provider; @@ -9061,8 +11058,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) if (ecb->dte_predicate != NULL) probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; - prov->dtpv_pops.dtps_enable(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg); + return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg)); } else { /* * This probe is already active. Swing the last pointer to @@ -9075,128 +11072,98 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) probe->dtpr_predcache = 0; dtrace_sync(); + return(0); } } -static void +static int dtrace_ecb_resize(dtrace_ecb_t *ecb) { - uint32_t maxalign = sizeof (dtrace_epid_t); - uint32_t align = sizeof (uint8_t), offs, diff; dtrace_action_t *act; - int wastuple = 0; + uint32_t curneeded = UINT32_MAX; uint32_t aggbase = UINT32_MAX; - dtrace_state_t *state = ecb->dte_state; /* - * If we record anything, we always record the epid. (And we always - * record it first.) + * If we record anything, we always record the dtrace_rechdr_t. (And + * we always record it first.) */ - offs = sizeof (dtrace_epid_t); - ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); + ecb->dte_size = sizeof (dtrace_rechdr_t); + ecb->dte_alignment = sizeof (dtrace_epid_t); for (act = ecb->dte_action; act != NULL; act = act->dta_next) { dtrace_recdesc_t *rec = &act->dta_rec; + ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1); - if ((align = rec->dtrd_alignment) > maxalign) - maxalign = align; - - if (!wastuple && act->dta_intuple) { - /* - * This is the first record in a tuple. Align the - * offset to be at offset 4 in an 8-byte aligned - * block. - */ - diff = offs + sizeof (dtrace_aggid_t); - - if ((diff = (diff & (sizeof (uint64_t) - 1)))) - offs += sizeof (uint64_t) - diff; - - aggbase = offs - sizeof (dtrace_aggid_t); - ASSERT(!(aggbase & (sizeof (uint64_t) - 1))); - } - - /*LINTED*/ - if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) { - /* - * The current offset is not properly aligned; align it. - */ - offs += align - diff; - } - - rec->dtrd_offset = offs; - - if (offs + rec->dtrd_size > ecb->dte_needed) { - ecb->dte_needed = offs + rec->dtrd_size; - - if (ecb->dte_needed > state->dts_needed) - state->dts_needed = ecb->dte_needed; - } + ecb->dte_alignment = MAX(ecb->dte_alignment, rec->dtrd_alignment); if (DTRACEACT_ISAGG(act->dta_kind)) { dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; - dtrace_action_t *first = agg->dtag_first, *prev; - ASSERT(rec->dtrd_size != 0 && first != NULL); - ASSERT(wastuple); + ASSERT(rec->dtrd_size != 0); + ASSERT(agg->dtag_first != NULL); + ASSERT(act->dta_prev->dta_intuple); ASSERT(aggbase != UINT32_MAX); + ASSERT(curneeded != UINT32_MAX); agg->dtag_base = aggbase; + curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); + rec->dtrd_offset = curneeded; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); + curneeded += rec->dtrd_size; + ecb->dte_needed = MAX(ecb->dte_needed, curneeded); - while ((prev = first->dta_prev) != NULL && - DTRACEACT_ISAGG(prev->dta_kind)) { - agg = (dtrace_aggregation_t *)prev; - first = agg->dtag_first; - } + aggbase = UINT32_MAX; + curneeded = UINT32_MAX; + } else if (act->dta_intuple) { + if (curneeded == UINT32_MAX) { + /* + * This is the first record in a tuple. Align + * curneeded to be at offset 4 in an 8-byte + * aligned block. + */ + ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); + ASSERT(aggbase == UINT32_MAX); - if (prev != NULL) { - offs = prev->dta_rec.dtrd_offset + - prev->dta_rec.dtrd_size; - } else { - offs = sizeof (dtrace_epid_t); + curneeded = P2PHASEUP(ecb->dte_size, + sizeof (uint64_t), sizeof (dtrace_aggid_t)); + + aggbase = curneeded - sizeof (dtrace_aggid_t); + ASSERT(IS_P2ALIGNED(aggbase, + sizeof (uint64_t))); } - wastuple = 0; - } else { - if (!act->dta_intuple) - ecb->dte_size = offs + rec->dtrd_size; - offs += rec->dtrd_size; + curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); + rec->dtrd_offset = curneeded; + curneeded += rec->dtrd_size; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); + } else { + /* tuples must be followed by an aggregation */ + ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); + ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment); + rec->dtrd_offset = ecb->dte_size; + if (ecb->dte_size + rec->dtrd_size < ecb->dte_size) + return (EINVAL); + ecb->dte_size += rec->dtrd_size; + ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); } - - wastuple = act->dta_intuple; } if ((act = ecb->dte_action) != NULL && !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && - ecb->dte_size == sizeof (dtrace_epid_t)) { + ecb->dte_size == sizeof (dtrace_rechdr_t)) { /* - * If the size is still sizeof (dtrace_epid_t), then all + * If the size is still sizeof (dtrace_rechdr_t), then all * actions store no data; set the size to 0. */ - ecb->dte_alignment = maxalign; ecb->dte_size = 0; - - /* - * If the needed space is still sizeof (dtrace_epid_t), then - * all actions need no additional space; set the needed - * size to 0. - */ - if (ecb->dte_needed == sizeof (dtrace_epid_t)) - ecb->dte_needed = 0; - - return; } - /* - * Set our alignment, and make sure that the dte_size and dte_needed - * are aligned to the size of an EPID. - */ - ecb->dte_alignment = maxalign; - ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) & - ~(sizeof (dtrace_epid_t) - 1); - ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) & - ~(sizeof (dtrace_epid_t) - 1); - ASSERT(ecb->dte_size <= ecb->dte_needed); + ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); + ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); + ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed); + return (0); } static dtrace_action_t * @@ -9217,11 +11184,12 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) switch (desc->dtad_kind) { case DTRACEAGG_MIN: - agg->dtag_initial = UINT64_MAX; + agg->dtag_initial = INT64_MAX; agg->dtag_aggregate = dtrace_aggregate_min; break; case DTRACEAGG_MAX: + agg->dtag_initial = INT64_MIN; agg->dtag_aggregate = dtrace_aggregate_max; break; @@ -9249,11 +11217,44 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; } + case DTRACEAGG_LLQUANTIZE: { + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); + int64_t v; + + agg->dtag_initial = desc->dtad_arg; + agg->dtag_aggregate = dtrace_aggregate_llquantize; + + if (factor < 2 || low >= high || nsteps < factor) + goto err; + + /* + * Now check that the number of steps evenly divides a power + * of the factor. (This assures both integer bucket size and + * linearity within each magnitude.) + */ + for (v = factor; v < nsteps; v *= factor) + continue; + + if ((v % nsteps) || (nsteps % factor)) + goto err; + + size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); + break; + } + case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; break; + case DTRACEAGG_STDDEV: + agg->dtag_aggregate = dtrace_aggregate_stddev; + size = sizeof (uint64_t) * 4; + break; + case DTRACEAGG_SUM: agg->dtag_aggregate = dtrace_aggregate_sum; break; @@ -9312,13 +11313,13 @@ success: aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, VM_BESTFIT | VM_SLEEP); - if (aggid - 1 >= state->dts_naggregations) { + if (aggid - 1 >= (dtrace_aggid_t)state->dts_naggregations) { dtrace_aggregation_t **oaggs = state->dts_aggregations; dtrace_aggregation_t **aggs; int naggs = state->dts_naggregations << 1; int onaggs = state->dts_naggregations; - ASSERT(aggid == state->dts_naggregations + 1); + ASSERT(aggid == (dtrace_aggid_t)state->dts_naggregations + 1); if (naggs == 0) { ASSERT(oaggs == NULL); @@ -9376,10 +11377,11 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) uint16_t format = 0; dtrace_recdesc_t *rec; dtrace_state_t *state = ecb->dte_state; - dtrace_optval_t *opt = state->dts_options, nframes, strsize; + dtrace_optval_t *opt = state->dts_options; + dtrace_optval_t nframes=0, strsize; uint64_t arg = desc->dtad_arg; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); if (DTRACEACT_ISAGG(desc->dtad_kind)) { @@ -9413,23 +11415,27 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ - if (arg == NULL) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); + if (arg == 0) { + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || + desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { - ASSERT(arg != NULL); - /* ASSERT(arg > KERNELBASE); */ + ASSERT(arg != 0); + ASSERT(arg > KERNELBASE); format = dtrace_format_add(state, (char *)(uintptr_t)arg); } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DTRACEACT_LIBACT: - case DTRACEACT_DIFEXPR: + case DTRACEACT_TRACEMEM: + case DTRACEACT_TRACEMEM_DYNSIZE: + case DTRACEACT_APPLEBINARY: /* __APPLE__ */ if (dp == NULL) return (EINVAL); @@ -9464,7 +11470,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) arg = DTRACE_USTACK_ARG(nframes, strsize); - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DTRACEACT_USTACK: if (desc->dtad_kind != DTRACEACT_JSTACK && (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { @@ -9516,6 +11522,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_CHILL: case DTRACEACT_DISCARD: case DTRACEACT_RAISE: + case DTRACEACT_PIDRESUME: /* __APPLE__ */ if (dp == NULL) return (EINVAL); break; @@ -9528,7 +11535,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; case DTRACEACT_SPECULATE: - if (ecb->dte_size > sizeof (dtrace_epid_t)) + if (ecb->dte_size > sizeof (dtrace_rechdr_t)) return (EINVAL); if (dp == NULL) @@ -9641,7 +11648,7 @@ dtrace_ecb_action_remove(dtrace_ecb_t *ecb) ecb->dte_action = NULL; ecb->dte_action_last = NULL; - ecb->dte_size = sizeof (dtrace_epid_t); + ecb->dte_size = 0; } static void @@ -9653,7 +11660,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) dtrace_ecb_t *pecb, *prev = NULL; dtrace_probe_t *probe = ecb->dte_probe; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (probe == NULL) { /* @@ -9681,6 +11688,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) probe->dtpr_ecb_last = prev; } + probe->dtpr_provider->dtpv_ecb_count--; /* * The ECB has been disconnected from the probe; now sync to assure * that all CPUs have seen the change before returning. @@ -9731,7 +11739,7 @@ dtrace_ecb_destroy(dtrace_ecb_t *ecb) dtrace_predicate_t *pred; dtrace_epid_t epid = ecb->dte_epid; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_next == NULL); ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); @@ -9756,7 +11764,7 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, dtrace_provider_t *prov; dtrace_ecbdesc_t *desc = enab->dten_current; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(state != NULL); ecb = dtrace_ecb_add(state, probe); @@ -9826,21 +11834,25 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, } } - dtrace_ecb_resize(ecb); + if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) { + dtrace_ecb_destroy(ecb); + return (NULL); + } return (dtrace_ecb_create_cache = ecb); } static int -dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) +dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg1, void *arg2) { dtrace_ecb_t *ecb; - dtrace_enabling_t *enab = arg; + dtrace_enabling_t *enab = arg1; + dtrace_ecbdesc_t *ep = arg2; dtrace_state_t *state = enab->dten_vstate->dtvs_state; ASSERT(state != NULL); - if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { + if (probe != NULL && ep != NULL && probe->dtpr_gen < ep->dted_probegen) { /* * This probe was created in a generation for which this * enabling has previously created ECBs; we don't want to @@ -9852,7 +11864,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) return (DTRACE_MATCH_DONE); - dtrace_ecb_enable(ecb); + if (dtrace_ecb_enable(ecb) < 0) + return (DTRACE_MATCH_FAIL); + return (DTRACE_MATCH_NEXT); } @@ -9860,10 +11874,11 @@ static dtrace_ecb_t * dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) { dtrace_ecb_t *ecb; +#pragma unused(ecb) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (id == 0 || id > state->dts_necbs) + if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) return (NULL); ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); @@ -9876,10 +11891,11 @@ static dtrace_aggregation_t * dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) { dtrace_aggregation_t *agg; +#pragma unused(agg) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (id == 0 || id > state->dts_naggregations) + if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations) return (NULL); ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); @@ -9910,11 +11926,13 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) caddr_t tomax = buf->dtb_tomax; caddr_t xamot = buf->dtb_xamot; dtrace_icookie_t cookie; + hrtime_t now; ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); cookie = dtrace_interrupt_disable(); + now = dtrace_gethrtime(); buf->dtb_tomax = xamot; buf->dtb_xamot = tomax; buf->dtb_xamot_drops = buf->dtb_drops; @@ -9925,6 +11943,10 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_drops = 0; buf->dtb_errors = 0; buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); + buf->dtb_interval = now - buf->dtb_switched; + buf->dtb_switched = now; + buf->dtb_cur_limit = buf->dtb_limit; + dtrace_interrupt_enable(cookie); } @@ -9956,24 +11978,31 @@ dtrace_buffer_activate(dtrace_state_t *state) } static int -dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, +dtrace_buffer_canalloc(size_t size) +{ + if (size > (UINT64_MAX - dtrace_buffer_memory_inuse)) + return (B_FALSE); + if ((size + dtrace_buffer_memory_inuse) > dtrace_buffer_memory_maxsize) + return (B_FALSE); + + return (B_TRUE); +} + +static int +dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t limit, size_t size, int flags, processorid_t cpu) { - cpu_t *cp; + dtrace_cpu_t *cp; dtrace_buffer_t *buf; + size_t size_before_alloc = dtrace_buffer_memory_inuse; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (size > dtrace_nonroot_maxsize && + if (size > (size_t)dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); -#if defined(__APPLE__) - if (size > (sane_size / 8) / (int)NCPU) /* As in kdbg_set_nkdbufs(), roughly. */ - return (ENOMEM); -#endif /* __APPLE__ */ - cp = cpu_list; do { @@ -9994,9 +12023,17 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, ASSERT(buf->dtb_xamot == NULL); + /* DTrace, please do not eat all the memory. */ + if (dtrace_buffer_canalloc(size) == B_FALSE) + goto err; if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; + dtrace_buffer_memory_inuse += size; + /* Unsure that limit is always lower than size */ + limit = limit == size ? limit - 1 : limit; + buf->dtb_cur_limit = limit; + buf->dtb_limit = limit; buf->dtb_size = size; buf->dtb_flags = flags; buf->dtb_offset = 0; @@ -10005,10 +12042,16 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, if (flags & DTRACEBUF_NOSWITCH) continue; + /* DTrace, please do not eat all the memory. */ + if (dtrace_buffer_canalloc(size) == B_FALSE) + goto err; if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; + dtrace_buffer_memory_inuse += size; } while ((cp = cp->cpu_next) != cpu_list); + ASSERT(dtrace_buffer_memory_inuse <= dtrace_buffer_memory_maxsize); + return (0); err: @@ -10036,6 +12079,9 @@ err: buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); + /* Restore the size saved before allocating memory */ + dtrace_buffer_memory_inuse = size_before_alloc; + return (ENOMEM); } @@ -10087,9 +12133,27 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, offs += sizeof (uint32_t); } - if ((soffs = offs + needed) > buf->dtb_size) { - dtrace_buffer_drop(buf); - return (-1); + if ((uint64_t)(soffs = offs + needed) > buf->dtb_cur_limit) { + if (buf->dtb_cur_limit == buf->dtb_limit) { + buf->dtb_cur_limit = buf->dtb_size; + + os_atomic_inc(&state->dts_buf_over_limit, relaxed); + /** + * Set an AST on the current processor + * so that we can wake up the process + * outside of probe context, when we know + * it is safe to do so + */ + minor_t minor = getminor(state->dts_dev); + ASSERT(minor < 32); + + os_atomic_or(&dtrace_wake_clients, 1 << minor, relaxed); + ast_dtrace_on(); + } + if ((uint64_t)soffs > buf->dtb_size) { + dtrace_buffer_drop(buf); + return (-1); + } } if (mstate == NULL) @@ -10158,7 +12222,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, * there. We need to clear the buffer from the current * offset to the end (there may be old gunk there). */ - while (offs < buf->dtb_size) + while ((uint64_t)offs < buf->dtb_size) tomax[offs++] = 0; /* @@ -10195,14 +12259,14 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, } } - while (offs + total_off > woffs) { + while (offs + total_off > (size_t)woffs) { dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); size_t size; if (epid == DTRACE_EPIDNONE) { size = sizeof (uint32_t); } else { - ASSERT(epid <= state->dts_necbs); + ASSERT(epid <= (dtrace_epid_t)state->dts_necbs); ASSERT(state->dts_ecbs[epid - 1] != NULL); size = state->dts_ecbs[epid - 1]->dte_size; @@ -10237,7 +12301,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, buf->dtb_offset = 0; woffs = total_off; - while (woffs < buf->dtb_size) + while ((uint64_t)woffs < buf->dtb_size) tomax[woffs++] = 0; } @@ -10295,7 +12359,7 @@ static void dtrace_buffer_polish(dtrace_buffer_t *buf) { ASSERT(buf->dtb_flags & DTRACEBUF_RING); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) return; @@ -10354,9 +12418,15 @@ dtrace_buffer_free(dtrace_buffer_t *bufs) if (buf->dtb_xamot != NULL) { ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); kmem_free(buf->dtb_xamot, buf->dtb_size); + + ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size); + dtrace_buffer_memory_inuse -= buf->dtb_size; } kmem_free(buf->dtb_tomax, buf->dtb_size); + ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size); + dtrace_buffer_memory_inuse -= buf->dtb_size; + buf->dtb_size = 0; buf->dtb_tomax = NULL; buf->dtb_xamot = NULL; @@ -10390,9 +12460,8 @@ dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) ASSERT(enab->dten_probegen == 0); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); -#if defined(__APPLE__) - if (ecb == NULL) return; /* XXX protection against gcc 4.0 botch on x86 */ -#endif /* __APPLE__ */ + /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */ + if (ecb == NULL) return; if (enab->dten_ndesc < enab->dten_maxdesc) { enab->dten_desc[enab->dten_ndesc++] = ecb; @@ -10467,7 +12536,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) dtrace_ecbdesc_t *ep; dtrace_vstate_t *vstate = enab->dten_vstate; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_actdesc_t *act, *next; @@ -10498,6 +12567,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; + dtrace_retained_gen++; } if (enab->dten_prev == NULL) { @@ -10526,7 +12596,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) { dtrace_state_t *state; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); ASSERT(enab->dten_vstate != NULL); @@ -10540,6 +12610,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) return (ENOSPC); state->dts_nretained++; + dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; @@ -10560,7 +12631,7 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_enabling_t *new, *enab; int found = 0, err = ENOENT; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); @@ -10592,16 +12663,17 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *pd = &ep->dted_probe; - if (strcmp(pd->dtpd_provider, match->dtpd_provider)) + /* APPLE NOTE: Darwin employs size bounded string operation. */ + if (strncmp(pd->dtpd_provider, match->dtpd_provider, DTRACE_PROVNAMELEN)) continue; - if (strcmp(pd->dtpd_mod, match->dtpd_mod)) + if (strncmp(pd->dtpd_mod, match->dtpd_mod, DTRACE_MODNAMELEN)) continue; - if (strcmp(pd->dtpd_func, match->dtpd_func)) + if (strncmp(pd->dtpd_func, match->dtpd_func, DTRACE_FUNCNAMELEN)) continue; - if (strcmp(pd->dtpd_name, match->dtpd_name)) + if (strncmp(pd->dtpd_name, match->dtpd_name, DTRACE_NAMELEN)) continue; /* @@ -10626,7 +12698,7 @@ dtrace_enabling_retract(dtrace_state_t *state) { dtrace_enabling_t *enab, *next; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); /* * Iterate over all retained enablings, destroy the enablings retained @@ -10651,13 +12723,13 @@ dtrace_enabling_retract(dtrace_state_t *state) } static int -dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) +dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched, dtrace_match_cond_t *cond) { int i = 0; - int matched = 0; + int total_matched = 0, matched = 0; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_ecbdesc_t *ep = enab->dten_desc[i]; @@ -10665,7 +12737,22 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; - matched += dtrace_probe_enable(&ep->dted_probe, enab); + /** + * Before doing a dtrace_probe_enable, which is really + * expensive, check that this enabling matches the matching precondition + * if we have one + */ + if (cond && (cond->dmc_func(&ep->dted_probe, cond->dmc_data) == 0)) { + continue; + } + /* + * If a provider failed to enable a probe then get out and + * let the consumer know we failed. + */ + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab, ep)) < 0) + return (EBUSY); + + total_matched += matched; if (enab->dten_error != 0) { /* @@ -10689,17 +12776,18 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) return (enab->dten_error); } + + ep->dted_probegen = dtrace_probegen; } - enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) - *nmatched = matched; + *nmatched = total_matched; return (0); } static void -dtrace_enabling_matchall(void) +dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond) { dtrace_enabling_t *enab; @@ -10707,44 +12795,36 @@ dtrace_enabling_matchall(void) lck_mtx_lock(&dtrace_lock); /* - * Because we can be called after dtrace_detach() has been called, we - * cannot assert that there are retained enablings. We can safely - * load from dtrace_retained, however: the taskq_destroy() at the - * end of dtrace_detach() will block pending our completion. + * Iterate over all retained enablings to see if any probes match + * against them. We only perform this operation on enablings for which + * we have sufficient permissions by virtue of being in the global zone + * or in the same zone as the DTrace client. Because we can be called + * after dtrace_detach() has been called, we cannot assert that there + * are retained enablings. We can safely load from dtrace_retained, + * however: the taskq_destroy() at the end of dtrace_detach() will + * block pending our completion. + */ + + /* + * Darwin doesn't do zones. + * Behave as if always in "global" zone." */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) - (void) dtrace_enabling_match(enab, NULL); + for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { + (void) dtrace_enabling_match(enab, NULL, cond); + } lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); + } -static int -dtrace_enabling_matchstate(dtrace_state_t *state, int *nmatched) +static void +dtrace_enabling_matchall(void) { - dtrace_enabling_t *enab; - int matched, total_matched = 0, err; - - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { - ASSERT(enab->dten_vstate->dtvs_state != NULL); - - if (enab->dten_vstate->dtvs_state != state) - continue; - - if ((err = dtrace_enabling_match(enab, &matched)) != 0) - return (err); - - total_matched += matched; - } + dtrace_enabling_matchall_with_cond(NULL); +} - if (nmatched != NULL) - *nmatched = total_matched; - return (0); -} /* * If an enabling is to be enabled without having matched probes (that is, if @@ -10780,7 +12860,7 @@ dtrace_enabling_prime(dtrace_state_t *state) for (i = 0; i < enab->dten_ndesc; i++) { enab->dten_current = enab->dten_desc[i]; - (void) dtrace_probe_enable(NULL, enab); + (void) dtrace_probe_enable(NULL, enab, NULL); } enab->dten_primed = 1; @@ -10798,9 +12878,10 @@ dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; + dtrace_genid_t gen; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); if (prv == NULL) { all = 1; @@ -10808,15 +12889,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } do { - dtrace_enabling_t *enab = dtrace_retained; + dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; - for (; enab != NULL; enab = enab->dten_next) { +retry: + gen = dtrace_retained_gen; + for (enab = dtrace_retained; enab != NULL; + enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; lck_mtx_unlock(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); lck_mtx_lock(&dtrace_lock); + /* + * Process the retained enablings again if + * they have changed while we weren't holding + * dtrace_lock. + */ + if (gen != dtrace_retained_gen) + goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); @@ -10833,7 +12924,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) static void dtrace_dof_error(dof_hdr_t *dof, const char *str) { -#pragma unused(dof) +#pragma unused(dof) /* __APPLE__ */ if (dtrace_err_verbose) cmn_err(CE_WARN, "failed to process DOF: %s", str); @@ -10857,9 +12948,9 @@ dtrace_dof_create(dtrace_state_t *state) roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + sizeof (dof_optdesc_t) * DTRACEOPT_MAX; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP); + dof = kmem_zalloc_aligned(len, 8, KM_SLEEP); dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; @@ -10906,24 +12997,16 @@ dtrace_dof_create(dtrace_state_t *state) } static dof_hdr_t * -#if defined(__APPLE__) dtrace_dof_copyin(user_addr_t uarg, int *errp) -#else -dtrace_dof_copyin(uintptr_t uarg, int *errp) -#endif { dof_hdr_t hdr, *dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); /* * First, we're going to copyin() the sizeof (dof_hdr_t). */ -#if defined(__APPLE__) if (copyin(uarg, &hdr, sizeof (hdr)) != 0) { -#else - if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { -#endif dtrace_dof_error(NULL, "failed to copyin DOF header"); *errp = EFAULT; return (NULL); @@ -10933,7 +13016,7 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ - if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { + if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -10945,29 +13028,24 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) return (NULL); } - dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); + dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); -#if defined(__APPLE__) - if (copyin(uarg, dof, hdr.dofh_loadsz) != 0) { -#else - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { -#endif - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); - *errp = EFAULT; - return (NULL); - } + if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 || + dof->dofh_loadsz != hdr.dofh_loadsz) { + kmem_free_aligned(dof, hdr.dofh_loadsz); + *errp = EFAULT; + return (NULL); + } return (dof); } -#if defined(__APPLE__) - static dof_hdr_t * dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) { dof_hdr_t hdr, *dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); /* * First, we're going to copyin() the sizeof (dof_hdr_t). @@ -10982,7 +13060,7 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ - if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { + if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -10994,10 +13072,10 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (NULL); } - dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); + dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); if (uread(p, dof, hdr.dofh_loadsz, uarg) != KERN_SUCCESS) { - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); + kmem_free_aligned(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } @@ -11005,59 +13083,61 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (dof); } -#endif /* __APPLE__ */ +static void +dtrace_dof_destroy(dof_hdr_t *dof) +{ + kmem_free_aligned(dof, dof->dofh_loadsz); +} static dof_hdr_t * dtrace_dof_property(const char *name) { - uchar_t *buf; - uint64_t loadsz; - unsigned int len, i; + unsigned int len = 0; dof_hdr_t *dof; - /* - * Unfortunately, array of values in .conf files are always (and - * only) interpreted to be integer arrays. We must read our DOF - * as an integer array, and then squeeze it into a byte array. - */ - if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, - (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) - return (NULL); + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + return NULL; + } + + if (!PEReadNVRAMProperty(name, NULL, &len)) { + return NULL; + } + + dof = kmem_alloc_aligned(len, 8, KM_SLEEP); - for (i = 0; i < len; i++) - buf[i] = (uchar_t)(((int *)buf)[i]); + if (!PEReadNVRAMProperty(name, dof, &len)) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "unreadable DOF"); + return NULL; + } if (len < sizeof (dof_hdr_t)) { - ddi_prop_free(buf); + dtrace_dof_destroy(dof); dtrace_dof_error(NULL, "truncated header"); return (NULL); } - if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { - ddi_prop_free(buf); + if (len < dof->dofh_loadsz) { + dtrace_dof_destroy(dof); dtrace_dof_error(NULL, "truncated DOF"); return (NULL); } - if (loadsz >= dtrace_dof_maxsize) { - ddi_prop_free(buf); - dtrace_dof_error(NULL, "oversized DOF"); + if (len != dof->dofh_loadsz) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "invalid DOF size"); return (NULL); } - dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP); - bcopy(buf, dof, loadsz); - ddi_prop_free(buf); + if (dof->dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "oversized DOF"); + return (NULL); + } return (dof); } -static void -dtrace_dof_destroy(dof_hdr_t *dof) -{ - dt_kmem_free_aligned(dof, dof->dofh_loadsz); -} - /* * Return the dof_sec_t pointer corresponding to a given section index. If the * index is not valid, dtrace_dof_error() is called and NULL is returned. If @@ -11130,6 +13210,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (char *)(str + probe->dofp_provider), MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + if (probe->dofp_mod >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe module"); return (NULL); @@ -11138,6 +13221,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + if (probe->dofp_func >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe function"); return (NULL); @@ -11146,6 +13232,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + if (probe->dofp_name >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe name"); return (NULL); @@ -11154,6 +13243,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + return (desc); } @@ -11166,7 +13258,9 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, dof_difohdr_t *dofd; uintptr_t daddr = (uintptr_t)dof; size_t max_size = dtrace_difo_maxsize; - int i, l, n; + uint_t i; + int l, n; + static const struct { int section; @@ -11192,11 +13286,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), sizeof (uint_t), "multiple variable tables" }, -#if !defined(__APPLE__) - { DOF_SECT_NONE, 0, 0, 0, NULL } -#else { DOF_SECT_NONE, 0, 0, 0, 0, NULL } -#endif /* __APPLE__ */ }; if (sec->dofs_type != DOF_SECT_DIFOHDR) { @@ -11238,7 +13328,8 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, ttl += subsec->dofs_size; for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { - if (subsec->dofs_type != difo[i].section) + + if (subsec->dofs_type != (uint32_t)difo[i].section) continue; if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { @@ -11246,7 +13337,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } - if (subsec->dofs_align != difo[i].align) { + if (subsec->dofs_align != (uint32_t)difo[i].align) { dtrace_dof_error(dof, "bad alignment"); goto err; } @@ -11259,7 +13350,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } - if (difo[i].entsize != subsec->dofs_entsize) { + if ((uint32_t)difo[i].entsize != subsec->dofs_entsize) { dtrace_dof_error(dof, "entry size mismatch"); goto err; } @@ -11291,7 +13382,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } } - + if (dp->dtdo_buf == NULL) { /* * We can't have a DIF object without DIF text. @@ -11394,15 +13485,19 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if (DTRACEACT_ISPRINTFLIKE(kind) && - (kind != DTRACEACT_PRINTA || - desc->dofa_strtab != DOF_SECIDX_NONE)) { + if ((DTRACEACT_ISPRINTFLIKE(kind) && + (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) || + (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) + { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * printf()-like actions must have a format string. + * The argument to these actions is an index into the + * DOF string table. For printf()-like actions, this + * is the format string. For print(), this is the + * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -11538,76 +13633,10 @@ err: return (NULL); } -#if !defined(__APPLE__) /* APPLE dyld has already done this for us */ /* - * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the - * specified DOF. At present, this amounts to simply adding 'ubase' to the - * site of any user SETX relocations to account for load object base address. - * In the future, if we need other relocations, this function can be extended. + * APPLE NOTE: dyld handles dof relocation. + * Darwin does not need dtrace_dof_relocate() */ -static int -dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) -{ - uintptr_t daddr = (uintptr_t)dof; - dof_relohdr_t *dofr = - (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); - dof_sec_t *ss, *rs, *ts; - dof_relodesc_t *r; - uint_t i, n; - - if (sec->dofs_size < sizeof (dof_relohdr_t) || - sec->dofs_align != sizeof (dof_secidx_t)) { - dtrace_dof_error(dof, "invalid relocation header"); - return (-1); - } - - ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); - rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); - ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); - - if (ss == NULL || rs == NULL || ts == NULL) - return (-1); /* dtrace_dof_error() has been called already */ - - if (rs->dofs_entsize < sizeof (dof_relodesc_t) || - rs->dofs_align != sizeof (uint64_t)) { - dtrace_dof_error(dof, "invalid relocation section"); - return (-1); - } - - r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); - n = rs->dofs_size / rs->dofs_entsize; - - for (i = 0; i < n; i++) { - uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; - - switch (r->dofr_type) { - case DOF_RELO_NONE: - break; - case DOF_RELO_SETX: - if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + - sizeof (uint64_t) > ts->dofs_size) { - dtrace_dof_error(dof, "bad relocation offset"); - return (-1); - } - - if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { - dtrace_dof_error(dof, "misaligned setx relo"); - return (-1); - } - - *(uint64_t *)taddr += ubase; - break; - default: - dtrace_dof_error(dof, "invalid relocation type"); - return (-1); - } - - r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); - } - - return (0); -} -#endif /* __APPLE__ */ /* * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated @@ -11619,13 +13648,14 @@ static int dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) { +#pragma unused(ubase) /* __APPLE__ */ uint64_t len = dof->dofh_loadsz, seclen; uintptr_t daddr = (uintptr_t)dof; dtrace_ecbdesc_t *ep; dtrace_enabling_t *enab; uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); /* @@ -11650,21 +13680,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } -#if !defined(__APPLE__) - if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && - dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { - dtrace_dof_error(dof, "DOF version mismatch"); - return (-1); - } -#else /* - * We only support DOF_VERSION_3 for now. + * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now. */ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_3) { dtrace_dof_error(dof, "DOF version mismatch"); return (-1); } -#endif if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { dtrace_dof_error(dof, "DOF uses unsupported instruction set"); @@ -11693,8 +13715,8 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } - if (dof->dofh_secsize == 0) { - dtrace_dof_error(dof, "zero section header size"); + if (dof->dofh_secsize < sizeof(dof_sec_t)) { + dtrace_dof_error(dof, "invalid section header size"); return (-1); } @@ -11768,32 +13790,10 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } } -#if !defined(__APPLE__) - /* - * APPLE NOTE: We have no relocation to perform. All dof values are - * relative offsets. - */ - /* - * Take a second pass through the sections and locate and perform any - * relocations that are present. We do this after the first pass to - * be sure that all sections have had their headers validated. + * APPLE NOTE: We have no further relocation to perform. + * All dof values are relative offsets. */ - for (i = 0; i < dof->dofh_secnum; i++) { - dof_sec_t *sec = (dof_sec_t *)(daddr + - (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); - - if (!(sec->dofs_flags & DOF_SECF_LOAD)) - continue; /* skip sections that are not loadable */ - - switch (sec->dofs_type) { - case DOF_SECT_URELHDR: - if (dtrace_dof_relocate(dof, sec, ubase) != 0) - return (-1); - break; - } - } -#endif /* __APPLE__ */ if ((enab = *enabp) == NULL) enab = *enabp = dtrace_enabling_create(vstate); @@ -11805,22 +13805,18 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, if (sec->dofs_type != DOF_SECT_ECBDESC) continue; -#if !defined(__APPLE__) - if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { - dtrace_enabling_destroy(enab); - *enabp = NULL; - return (-1); - } -#else - /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc - are checked for the NULL return value.) */ + /* + * APPLE NOTE: Defend against gcc 4.0 botch on x86. + * not all paths out of inlined dtrace_dof_ecbdesc + * are checked for the NULL return value. + * Check for NULL explicitly here. + */ ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr); if (ep == NULL) { dtrace_enabling_destroy(enab); *enabp = NULL; return (-1); } -#endif /* __APPLE__ */ dtrace_enabling_add(enab, ep); } @@ -11835,7 +13831,8 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, static int dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) { - int i, rval; + uint_t i; + int rval; uint32_t entsize; size_t offs; dof_optdesc_t *desc; @@ -11872,7 +13869,7 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) return (EINVAL); } - if (desc->dofo_value == DTRACEOPT_UNSET) { + if (desc->dofo_value == (uint64_t)DTRACEOPT_UNSET) { dtrace_dof_error(dof, "unset option"); return (EINVAL); } @@ -11891,19 +13888,16 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) /* * DTrace Consumer State Functions */ -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -int +static int dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) { size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize; void *base; uintptr_t limit; dtrace_dynvar_t *dvar, *next, *start; - int i; + size_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); bzero(dstate, sizeof (dtrace_dstate_t)); @@ -11911,6 +13905,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((dstate->dtds_chunksize = chunksize) == 0) dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; + VERIFY(dstate->dtds_chunksize < (LONG_MAX - sizeof (dtrace_dynhash_t))); + if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min_size; @@ -11951,10 +13947,13 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); limit = (uintptr_t)base + size; + VERIFY((uintptr_t)start < limit); + VERIFY((uintptr_t)start >= (uintptr_t)base); + maxper = (limit - (uintptr_t)start) / (int)NCPU; maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; - for (i = 0; i < (int)NCPU; i++) { + for (i = 0; i < NCPU; i++) { dstate->dtds_percpu[i].dtdsc_free = dvar = start; /* @@ -11964,7 +13963,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) * whatever is left over. In either case, we set the limit to * be the limit of the dynamic variable space. */ - if (maxper == 0 || i == (int)NCPU - 1) { + if (maxper == 0 || i == NCPU - 1) { limit = (uintptr_t)base + size; start = NULL; } else { @@ -11972,7 +13971,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) start = (dtrace_dynvar_t *)limit; } - ASSERT(limit <= (uintptr_t)base + size); + VERIFY(limit <= (uintptr_t)base + size); for (;;) { next = (dtrace_dynvar_t *)((uintptr_t)dvar + @@ -11981,6 +13980,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((uintptr_t)next + dstate->dtds_chunksize >= limit) break; + VERIFY((uintptr_t)dvar >= (uintptr_t)base && + (uintptr_t)dvar <= (uintptr_t)base + size); dvar->dtdv_next = next; dvar = next; } @@ -11992,13 +13993,10 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) return (0); } -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -void +static void dtrace_dstate_fini(dtrace_dstate_t *dstate) { - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); if (dstate->dtds_base == NULL) return; @@ -12070,11 +14068,8 @@ dtrace_state_deadman(dtrace_state_t *state) state->dts_alive = now; } -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -dtrace_state_t * -dtrace_state_create(dev_t *devp, cred_t *cr) +static int +dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) { minor_t minor; major_t major; @@ -12082,50 +14077,31 @@ dtrace_state_create(dev_t *devp, cred_t *cr) dtrace_state_t *state; dtrace_optval_t *opt; int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i; + unsigned int cpu_it; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - -#if !defined(__APPLE__) - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); -#else - /* - * Darwin's DEVFS layer acquired the minor number for this "device" when it called - * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number - * (next unused according to vmem_alloc()) and then immediately put the number back in play - * (by calling vmem_free()). Now that minor number is being used for an open, so committing it - * to use. The following vmem_alloc() must deliver that same minor number. - */ - - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); - if (NULL != devp) { - ASSERT(getminor(*devp) == minor); - if (getminor(*devp) != minor) { - printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n", - getminor(*devp), minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return NULL; - } - } else { - /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), - * so just vend the minor device number here de novo since no "open" has occurred. */ + /* Cause restart */ + *new_state = NULL; + + if (devp != NULL) { + minor = getminor(*devp); + } + else { + minor = DTRACE_NCLIENTS - 1; } -#endif /* __APPLE__ */ - - if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (NULL); + state = dtrace_state_allocate(minor); + if (NULL == state) { + printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor); + return (ERESTART); /* can't reacquire */ } - state = ddi_get_soft_state(dtrace_softstate, minor); state->dts_epid = DTRACE_EPIDNONE + 1; (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); - state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, + state->dts_aggid_arena = vmem_create(c, (void *)1, INT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); if (devp != NULL) { @@ -12134,7 +14110,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr) major = ddi_driver_major(dtrace_devi); } - state->dts_dev = makedevice(major, minor); + state->dts_dev = makedev(major, minor); if (devp != NULL) *devp = state->dts_dev; @@ -12147,6 +14123,26 @@ dtrace_state_create(dev_t *devp, cred_t *cr) */ state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); + state->dts_buf_over_limit = 0; + + /* + * Allocate and initialise the per-process per-CPU random state. + * SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is + * assumed to be seeded at this point (if from Fortuna seed file). + */ + state->dts_rstate = kmem_zalloc(NCPU * sizeof(uint64_t*), KM_SLEEP); + state->dts_rstate[0] = kmem_zalloc(2 * sizeof(uint64_t), KM_SLEEP); + (void) read_random(state->dts_rstate[0], 2 * sizeof(uint64_t)); + for (cpu_it = 1; cpu_it < NCPU; cpu_it++) { + state->dts_rstate[cpu_it] = kmem_zalloc(2 * sizeof(uint64_t), KM_SLEEP); + /* + * Each CPU is assigned a 2^64 period, non-overlapping + * subsequence. + */ + dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it-1], + state->dts_rstate[cpu_it]); + } + state->dts_cleaner = CYCLIC_NONE; state->dts_deadman = CYCLIC_NONE; state->dts_vstate.dtvs_state = state; @@ -12172,8 +14168,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr) opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; - - state->dts_activity = DTRACE_ACTIVITY_INACTIVE; + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_default; /* * Depending on the user credentials, we set flag bits which alter probe @@ -12181,10 +14176,32 @@ dtrace_state_create(dev_t *devp, cred_t *cr) * actual anonymous tracing, or the possession of all privileges, all of * the normal checks are bypassed. */ +#if defined(__APPLE__) + if (cr != NULL) { + kauth_cred_ref(cr); + state->dts_cred.dcr_cred = cr; + } + if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + /* + * Allow only proc credentials when DTrace is + * restricted by the current security policy + */ + state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC; + state->dts_cred.dcr_action = DTRACE_CRA_PROC | DTRACE_CRA_PROC_CONTROL | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; + } + else { + state->dts_cred.dcr_visible = DTRACE_CRV_ALL; + state->dts_cred.dcr_action = DTRACE_CRA_ALL; + } + } + +#else if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { state->dts_cred.dcr_visible = DTRACE_CRV_ALL; state->dts_cred.dcr_action = DTRACE_CRA_ALL; - } else { + } + else { /* * Set up the credentials for this instantiation. We take a * hold on the credential to prevent it from disappearing on @@ -12231,18 +14248,13 @@ dtrace_state_create(dev_t *devp, cred_t *cr) * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. + * + * APPLE NOTE: Darwin doesn't do zones. + * Behave as if zone always has destructive privs. */ -#if !defined(__APPLE__) - if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), - cr->cr_zone->zone_privset)) { - state->dts_cred.dcr_action |= - DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; - } -#else - /* Darwin doesn't do zones. */ + state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; -#endif /* __APPLE__ */ } /* @@ -12282,18 +14294,12 @@ dtrace_state_create(dev_t *devp, cred_t *cr) * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. - */ -#if !defined(__APPLE__) - if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), - cr->cr_zone->zone_privset)) { - state->dts_cred.dcr_action |= - DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; - } -#else - /* Darwin doesn't do zones. */ + * + * APPLE NOTE: Darwin doesn't do zones. + * Behave as if zone always has destructive privs. + */ state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; -#endif /* __APPLE__ */ } /* @@ -12312,8 +14318,10 @@ dtrace_state_create(dev_t *devp, cred_t *cr) DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; } } +#endif - return (state); + *new_state = state; + return(0); /* Success */ } static int @@ -12321,10 +14329,11 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu = 0; + size_t limit = buf->dtb_size; int flags = 0, rval; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); ASSERT(which < DTRACEOPT_MAX); ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || (state == dtrace_anon.dta_state && @@ -12351,7 +14360,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } - for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { + for (size = opt[which]; (size_t)size >= sizeof (uint64_t); size >>= 1) { /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -12368,8 +14377,8 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) */ return (E2BIG); } - - rval = dtrace_buffer_alloc(buf, size, flags, cpu); + limit = opt[DTRACEOPT_BUFLIMIT] * size / 100; + rval = dtrace_buffer_alloc(buf, limit, size, flags, cpu); if (rval != ENOMEM) { opt[which] = size; @@ -12546,7 +14555,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) * a buffer to use as scratch. */ if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || - opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { + (size_t)opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { opt[DTRACEOPT_BUFSIZE] = state->dts_needed; } } @@ -12617,6 +14626,18 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; + if (opt[DTRACEOPT_STRSIZE] > dtrace_strsize_max) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_max; + + if (opt[DTRACEOPT_STRSIZE] < dtrace_strsize_min) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_min; + + if (opt[DTRACEOPT_BUFLIMIT] > dtrace_buflimit_max) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_max; + + if (opt[DTRACEOPT_BUFLIMIT] < dtrace_buflimit_min) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_min; + hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; hdlr.cyh_arg = state; hdlr.cyh_level = CY_LOW_LEVEL; @@ -12710,7 +14731,7 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) { dtrace_icookie_t cookie; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && state->dts_activity != DTRACE_ACTIVITY_DRAINING) @@ -12761,7 +14782,7 @@ static int dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, dtrace_optval_t val) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) return (EBUSY); @@ -12823,8 +14844,8 @@ dtrace_state_destroy(dtrace_state_t *state) int nspec = state->dts_nspeculations; uint32_t match; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); /* * First, retract any retained enablings for this state. @@ -12851,7 +14872,7 @@ dtrace_state_destroy(dtrace_state_t *state) * Release the credential hold we took in dtrace_state_create(). */ if (state->dts_cred.dcr_cred != NULL) - crfree(state->dts_cred.dcr_cred); + kauth_cred_unref(&state->dts_cred.dcr_cred); /* * Now we can safely disable and destroy any enabled probes. Because @@ -12890,6 +14911,11 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_buffer_free(state->dts_buffer); dtrace_buffer_free(state->dts_aggbuffer); + for (i = 0; i < (int)NCPU; i++) { + kmem_free(state->dts_rstate[i], 2 * sizeof(uint64_t)); + } + kmem_free(state->dts_rstate, NCPU * sizeof(uint64_t*)); + for (i = 0; i < nspec; i++) dtrace_buffer_free(spec[i].dtsp_buffer); @@ -12904,7 +14930,7 @@ dtrace_state_destroy(dtrace_state_t *state) kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); if (state->dts_aggregations != NULL) { -#ifdef DEBUG +#if DEBUG for (i = 0; i < state->dts_naggregations; i++) ASSERT(state->dts_aggregations[i] == NULL); #endif @@ -12924,19 +14950,32 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_format_destroy(state); vmem_destroy(state->dts_aggid_arena); - ddi_soft_state_free(dtrace_softstate, minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); + dtrace_state_free(minor); } /* * DTrace Anonymous Enabling Functions */ + +int +dtrace_keep_kernel_symbols(void) +{ + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + return 0; + } + + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) + return 1; + + return 0; +} + static dtrace_state_t * dtrace_anon_grab(void) { dtrace_state_t *state; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if ((state = dtrace_anon.dta_state) == NULL) { ASSERT(dtrace_anon.dta_enabling == NULL); @@ -12961,8 +15000,8 @@ dtrace_anon_property(void) dof_hdr_t *dof; char c[32]; /* enough for "dof-data-" + digits */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; ; i++) { (void) snprintf(c, sizeof (c), "dof-data-%d", i); @@ -12974,6 +15013,7 @@ dtrace_anon_property(void) break; } +#ifdef illumos /* * We want to create anonymous state, so we need to transition * the kernel debugger to indicate that DTrace is active. If @@ -12986,15 +15026,15 @@ dtrace_anon_property(void) dtrace_dof_destroy(dof); break; } +#endif /* * If we haven't allocated an anonymous state, we'll do so now. */ if ((state = dtrace_anon.dta_state) == NULL) { - state = dtrace_state_create(NULL, NULL); + rv = dtrace_state_create(NULL, NULL, &state); dtrace_anon.dta_state = state; - - if (state == NULL) { + if (rv != 0 || state == NULL) { /* * This basically shouldn't happen: the only * failure mode from dtrace_state_create() is a @@ -13059,14 +15099,15 @@ static void dtrace_helper_trace(dtrace_helper_action_t *helper, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) { - uint32_t size, next, nnext, i; + uint32_t size, next, nnext; + int i; dtrace_helptrace_t *ent; uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; if (!dtrace_helptrace_enabled) return; - ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); + ASSERT((uint32_t)vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); /* * What would a tracing framework be without its own tracing @@ -13116,6 +15157,7 @@ dtrace_helper_trace(dtrace_helper_action_t *helper, } } +__attribute__((noinline)) static uint64_t dtrace_helper(int which, dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t arg0, uint64_t arg1) @@ -13204,7 +15246,7 @@ err: mstate->dtms_arg[0] = sarg0; mstate->dtms_arg[1] = sarg1; - return (NULL); + return (0); } static void @@ -13226,21 +15268,15 @@ dtrace_helper_action_destroy(dtrace_helper_action_t *helper, kmem_free(helper, sizeof (dtrace_helper_action_t)); } -#if !defined(__APPLE__) -static int -dtrace_helper_destroygen(int gen) -{ - proc_t *p = curproc; -#else static int dtrace_helper_destroygen(proc_t* p, int gen) { -#endif dtrace_helpers_t *help = p->p_dtrace_helpers; dtrace_vstate_t *vstate; - int i; + uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (help == NULL || gen > help->dthps_generation) return (EINVAL); @@ -13305,13 +15341,11 @@ dtrace_helper_destroygen(proc_t* p, int gen) /* * If we have a meta provider, remove this helper provider. */ - lck_mtx_lock(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); dtrace_helper_provider_remove(&prov->dthp_prov, - p->p_pid); + p); } - lck_mtx_unlock(&dtrace_meta_lock); dtrace_helper_provider_destroy(prov); @@ -13336,13 +15370,8 @@ dtrace_helper_validate(dtrace_helper_action_t *helper) return (err == 0); } -#if !defined(__APPLE__) -static int -dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) -#else static int dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) -#endif { dtrace_helpers_t *help; dtrace_helper_action_t *helper, *last; @@ -13354,11 +15383,7 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) return (EINVAL); -#if !defined(__APPLE__) - help = curproc->p_dtrace_helpers; -#else help = p->p_dtrace_helpers; -#endif last = help->dthps_actions[which]; vstate = &help->dthps_vstate; @@ -13411,7 +15436,7 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) last->dtha_next = helper; } - if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { + if ((uint32_t)vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { dtrace_helptrace_nlocals = vstate->dtvs_nlocals; dtrace_helptrace_next = 0; } @@ -13426,9 +15451,9 @@ static void dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, dof_helper_t *dofhp) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); if (!dtrace_attached() || dtrace_meta_pid == NULL) { @@ -13461,7 +15486,7 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, lck_mtx_unlock(&dtrace_lock); - dtrace_helper_provide(dofhp, p->p_pid); + dtrace_helper_provide(dofhp, p); } else { /* @@ -13469,37 +15494,25 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, * off to the meta provider. */ - int i; + uint_t i; lck_mtx_unlock(&dtrace_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - p->p_pid); + p); } } - - lck_mtx_unlock(&dtrace_meta_lock); } -#if !defined(__APPLE__) -static int -dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) -#else static int dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen) -#endif { dtrace_helpers_t *help; dtrace_helper_provider_t *hprov, **tmp_provs; uint_t tmp_maxprovs, i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - -#if !defined(__APPLE__) - help = curproc->p_dtrace_helpers; -#else + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); help = p->p_dtrace_helpers; -#endif ASSERT(help != NULL); /* @@ -13785,13 +15798,8 @@ dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) return (0); } -#if !defined(__APPLE__) -static int -dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) -#else static int dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) -#endif { dtrace_helpers_t *help; dtrace_vstate_t *vstate; @@ -13799,15 +15807,11 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; uintptr_t daddr = (uintptr_t)dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) - if ((help = curproc->p_dtrace_helpers) == NULL) - help = dtrace_helpers_create(curproc); -#else if ((help = p->p_dtrace_helpers) == NULL) help = dtrace_helpers_create(p); -#endif vstate = &help->dthps_vstate; @@ -13821,7 +15825,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * Look for helper providers and validate their descriptions. */ if (dhp != NULL) { - for (i = 0; i < dof->dofh_secnum; i++) { + for (i = 0; (uint32_t)i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); @@ -13845,30 +15849,23 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *desc = &ep->dted_probe; - if (strcmp(desc->dtpd_provider, "dtrace") != 0) + /* APPLE NOTE: Darwin employs size bounded string operation. */ + if (!LIT_STRNEQL(desc->dtpd_provider, "dtrace")) continue; - if (strcmp(desc->dtpd_mod, "helper") != 0) + if (!LIT_STRNEQL(desc->dtpd_mod, "helper")) continue; - if (strcmp(desc->dtpd_func, "ustack") != 0) + if (!LIT_STRNEQL(desc->dtpd_func, "ustack")) continue; -#if !defined(__APPLE__) - if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, ep)) != 0) -#else - if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, ep)) != 0) -#endif - { + if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, + ep)) != 0) { /* * Adding this helper action failed -- we are now going * to rip out the entire generation and return failure. */ -#if !defined(__APPLE__) - (void) dtrace_helper_destroygen(help->dthps_generation); -#else (void) dtrace_helper_destroygen(p, help->dthps_generation); -#endif dtrace_enabling_destroy(enab); dtrace_dof_destroy(dof); return (-1); @@ -13885,17 +15882,9 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) if (dhp != NULL && nprovs > 0) { dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; -#if !defined(__APPLE__) - if (dtrace_helper_provider_add(dhp, gen) == 0) { -#else if (dtrace_helper_provider_add(p, dhp, gen) == 0) { -#endif lck_mtx_unlock(&dtrace_lock); -#if !defined(__APPLE__) - dtrace_helper_provider_register(curproc, help, dhp); -#else dtrace_helper_provider_register(p, help, dhp); -#endif lck_mtx_lock(&dtrace_lock); destroy = 0; @@ -13908,10 +15897,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) return (gen); } -#if defined(__APPLE__) - /* - * DTrace lazy dof + * APPLE NOTE: DTrace lazy dof implementation * * DTrace user static probes (USDT probes) and helper actions are loaded * in a process by proccessing dof sections. The dof sections are passed @@ -13968,7 +15955,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * If the dofs data is claimed by this method, dofs_claimed will be set. * Callers should not free claimed dofs. */ -int +static int dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claimed) { ASSERT(p); @@ -13979,10 +15966,6 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -13990,7 +15973,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim * Any existing helpers force non-lazy behavior. */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; unsigned int existing_dofs_count = (existing_dofs) ? existing_dofs->dofiod_count : 0; @@ -14050,10 +16033,10 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim for (i=0; idofiod_count-1; i++) { ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof); } -#endif DEBUG +#endif /* DEBUG */ unlock: - lck_mtx_unlock(&p->p_dtrace_sprlock); + dtrace_sprunlock(p); } else { rval = EACCES; } @@ -14069,17 +16052,13 @@ unlock: * EINVAL: lazy dof is enabled, but the requested generation was not found. * EACCES: This removal needs to be handled non-lazily. */ -int +static int dtrace_lazy_dofs_remove(proc_t *p, int generation) { int rval = EINVAL; lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14087,7 +16066,7 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) * Any existing helpers force non-lazy behavior. */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; @@ -14144,81 +16123,33 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) #endif } - - lck_mtx_unlock(&p->p_dtrace_sprlock); - } else { + dtrace_sprunlock(p); + } else { rval = EACCES; } lck_rw_unlock_shared(&dtrace_dof_mode_lock); - + return rval; } void dtrace_lazy_dofs_destroy(proc_t *p) { - lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&p->p_dtrace_sprlock); - - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_exit.c and kern_exec.c. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON || p->p_lflag & P_LEXIT); - ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); - - dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; - p->p_dtrace_lazy_dofs = NULL; - - lck_mtx_unlock(&p->p_dtrace_sprlock); - lck_rw_unlock_shared(&dtrace_dof_mode_lock); - - if (lazy_dofs) { - kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); - } -} - -void -dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) -{ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - - lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&parent->p_dtrace_sprlock); - - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_fork.c - */ - ASSERT(parent->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); - ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); - /* - * In theory we should hold the child sprlock, but this is safe... - */ - ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); - - dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; - dof_ioctl_data_t* child_dofs = NULL; - if (parent_dofs) { - size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); - child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); - bcopy(parent_dofs, child_dofs, parent_dofs_size); - } + lck_rw_lock_shared(&dtrace_dof_mode_lock); + dtrace_sprlock(p); + + ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); - lck_mtx_unlock(&parent->p_dtrace_sprlock); - - if (child_dofs) { - lck_mtx_lock(&child->p_dtrace_sprlock); - child->p_dtrace_lazy_dofs = child_dofs; - lck_mtx_unlock(&child->p_dtrace_sprlock); - } + dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; + p->p_dtrace_lazy_dofs = NULL; + dtrace_sprunlock(p); lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + if (lazy_dofs) { + kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); + } } static int @@ -14231,29 +16162,24 @@ dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) return p->p_dtrace_lazy_dofs != NULL; } -static int -dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) -{ -#pragma unused(ignored) +static void +dtrace_lazy_dofs_process(proc_t *p) { /* * It is possible this process may exit during our attempt to * fault in the dof. We could fix this by holding locks longer, * but the errors are benign. */ - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); + - /* - * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF - */ ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF); - dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; p->p_dtrace_lazy_dofs = NULL; - lck_mtx_unlock(&p->p_dtrace_sprlock); - + dtrace_sprunlock(p); + lck_mtx_lock(&dtrace_meta_lock); /* * Process each dof_helper_t */ @@ -14276,7 +16202,7 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) dhp->dofhp_dof = dhp->dofhp_addr; dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval); - + if (dof != NULL) { dtrace_helpers_t *help; @@ -14308,21 +16234,84 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) lck_mtx_unlock(&dtrace_lock); } } - + lck_mtx_unlock(&dtrace_meta_lock); kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); + } else { + lck_mtx_unlock(&dtrace_meta_lock); } +} + +static int +dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) +{ +#pragma unused(ignored) + + dtrace_lazy_dofs_process(p); return PROC_RETURNED; } -#endif /* __APPLE__ */ +#define DTRACE_LAZY_DOFS_DUPLICATED 1 + +static int +dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) +{ + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + + lck_rw_lock_shared(&dtrace_dof_mode_lock); + dtrace_sprlock(parent); + + /* + * We need to make sure that the transition to lazy dofs -> helpers + * was atomic for our parent + */ + ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); + /* + * In theory we should hold the child sprlock, but this is safe... + */ + ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); + + dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; + dof_ioctl_data_t* child_dofs = NULL; + if (parent_dofs) { + size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); + child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); + bcopy(parent_dofs, child_dofs, parent_dofs_size); + } + + dtrace_sprunlock(parent); + + if (child_dofs) { + dtrace_sprlock(child); + child->p_dtrace_lazy_dofs = child_dofs; + dtrace_sprunlock(child); + /** + * We process the DOF at this point if the mode is set to + * LAZY_OFF. This can happen if DTrace is still processing the + * DOF of other process (which can happen because the + * protected pager can have a huge latency) + * but has not processed our parent yet + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_lazy_dofs_process(child); + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + return DTRACE_LAZY_DOFS_DUPLICATED; + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + return 0; +} static dtrace_helpers_t * dtrace_helpers_create(proc_t *p) { dtrace_helpers_t *help; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(p->p_dtrace_helpers == NULL); help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); @@ -14335,20 +16324,14 @@ dtrace_helpers_create(proc_t *p) return (help); } -#if !defined(__APPLE__) -static void -dtrace_helpers_destroy(void) -{ - proc_t *p = curproc; -#else static void dtrace_helpers_destroy(proc_t* p) { -#endif dtrace_helpers_t *help; dtrace_vstate_t *vstate; - int i; + uint_t i; + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); ASSERT(p->p_dtrace_helpers != NULL); @@ -14382,13 +16365,12 @@ dtrace_helpers_destroy(proc_t* p) * Destroy the helper providers. */ if (help->dthps_maxprovs > 0) { - lck_mtx_lock(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_remove( - &help->dthps_provs[i]->dthp_prov, p->p_pid); + &help->dthps_provs[i]->dthp_prov, p); } } else { lck_mtx_lock(&dtrace_lock); @@ -14412,7 +16394,6 @@ dtrace_helpers_destroy(proc_t* p) lck_mtx_unlock(&dtrace_lock); } - lck_mtx_unlock(&dtrace_meta_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_destroy(help->dthps_provs[i]); @@ -14431,6 +16412,7 @@ dtrace_helpers_destroy(proc_t* p) --dtrace_helpers; lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } static void @@ -14440,8 +16422,10 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dtrace_helper_action_t *helper, *new, *last; dtrace_difo_t *dp; dtrace_vstate_t *vstate; - int i, j, sz, hasprovs = 0; + uint_t i; + int j, sz, hasprovs = 0; + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); ASSERT(dtrace_helpers > 0); @@ -14475,11 +16459,11 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) new->dtha_actions = kmem_alloc(sz, KM_SLEEP); for (j = 0; j < new->dtha_nactions; j++) { - dtrace_difo_t *dp = helper->dtha_actions[j]; + dtrace_difo_t *dpj = helper->dtha_actions[j]; - ASSERT(dp != NULL); - dp = dtrace_difo_duplicate(dp, vstate); - new->dtha_actions[j] = dp; + ASSERT(dpj != NULL); + dpj = dtrace_difo_duplicate(dpj, vstate); + new->dtha_actions[j] = dpj; } if (last != NULL) { @@ -14506,38 +16490,427 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) newhelp->dthps_provs[i]->dthp_ref++; } - hasprovs = 1; + hasprovs = 1; + } + + lck_mtx_unlock(&dtrace_lock); + + if (hasprovs) + dtrace_helper_provider_register(to, newhelp, NULL); + + lck_mtx_unlock(&dtrace_meta_lock); +} + +/** + * DTrace Process functions + */ + +void +dtrace_proc_fork(proc_t *parent_proc, proc_t *child_proc, int spawn) +{ + /* + * This code applies to new processes who are copying the task + * and thread state and address spaces of their parent process. + */ + if (!spawn) { + /* + * APPLE NOTE: Solaris does a sprlock() and drops the + * proc_lock here. We're cheating a bit and only taking + * the p_dtrace_sprlock lock. A full sprlock would + * task_suspend the parent. + */ + dtrace_sprlock(parent_proc); + + /* + * Remove all DTrace tracepoints from the child process. We + * need to do this _before_ duplicating USDT providers since + * any associated probes may be immediately enabled. + */ + if (parent_proc->p_dtrace_count > 0) { + dtrace_fasttrap_fork(parent_proc, child_proc); + } + + dtrace_sprunlock(parent_proc); + + /* + * Duplicate any lazy dof(s). This must be done while NOT + * holding the parent sprlock! Lock ordering is + * dtrace_dof_mode_lock, then sprlock. It is imperative we + * always call dtrace_lazy_dofs_duplicate, rather than null + * check and call if !NULL. If we NULL test, during lazy dof + * faulting we can race with the faulting code and proceed + * from here to beyond the helpers copy. The lazy dof + * faulting will then fail to copy the helpers to the child + * process. We return if we duplicated lazy dofs as a process + * can only have one at the same time to avoid a race between + * a dtrace client and dtrace_proc_fork where a process would + * end up with both lazy dofs and helpers. + */ + if (dtrace_lazy_dofs_duplicate(parent_proc, child_proc) == DTRACE_LAZY_DOFS_DUPLICATED) { + return; + } + + /* + * Duplicate any helper actions and providers if they haven't + * already. + */ +#if !defined(__APPLE__) + /* + * The SFORKING + * we set above informs the code to enable USDT probes that + * sprlock() may fail because the child is being forked. + */ +#endif + /* + * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent + * never fails to find the child. We do not set SFORKING. + */ + if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { + (*dtrace_helpers_fork)(parent_proc, child_proc); + } + } +} + +void +dtrace_proc_exec(proc_t *p) +{ + /* + * Invalidate any predicate evaluation already cached for this thread by DTrace. + * That's because we've just stored to p_comm and DTrace refers to that when it + * evaluates the "execname" special variable. uid and gid may have changed as well. + */ + dtrace_set_thread_predcache(current_thread(), 0); + + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which no longer belong to this process! + */ + dtrace_lazy_dofs_destroy(p); + + + /* + * Clean up any DTrace helpers for the process. + */ + if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Cleanup the DTrace provider associated with this process. + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) { + (*dtrace_fasttrap_exec_ptr)(p); + } + proc_unlock(p); +} + +void +dtrace_proc_exit(proc_t *p) +{ + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which will never be cleaned up, and leak. + */ + dtrace_lazy_dofs_destroy(p); + + /* + * Clean up any DTrace helper actions or probes for the process. + */ + if (p->p_dtrace_helpers != NULL) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Clean up any DTrace probes associated with this process. + */ + /* + * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(), + * call this after dtrace_helpers_cleanup() + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) { + (*dtrace_fasttrap_exit_ptr)(p); + } + proc_unlock(p); +} + +/* + * DTrace Hook Functions + */ + +/* + * APPLE NOTE: dtrace_modctl_* routines for kext support. + * Used to manipulate the modctl list within dtrace xnu. + */ + +modctl_t *dtrace_modctl_list; + +static void +dtrace_modctl_add(struct modctl * newctl) +{ + struct modctl *nextp, *prevp; + + ASSERT(newctl != NULL); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); + + // Insert new module at the front of the list, + + newctl->mod_next = dtrace_modctl_list; + dtrace_modctl_list = newctl; + + /* + * If a module exists with the same name, then that module + * must have been unloaded with enabled probes. We will move + * the unloaded module to the new module's stale chain and + * then stop traversing the list. + */ + + prevp = newctl; + nextp = newctl->mod_next; + + while (nextp != NULL) { + if (nextp->mod_loaded) { + /* This is a loaded module. Keep traversing. */ + prevp = nextp; + nextp = nextp->mod_next; + continue; + } + else { + /* Found an unloaded module */ + if (strncmp (newctl->mod_modname, nextp->mod_modname, KMOD_MAX_NAME)) { + /* Names don't match. Keep traversing. */ + prevp = nextp; + nextp = nextp->mod_next; + continue; + } + else { + /* We found a stale entry, move it. We're done. */ + prevp->mod_next = nextp->mod_next; + newctl->mod_stale = nextp; + nextp->mod_next = NULL; + break; + } + } + } +} + +static modctl_t * +dtrace_modctl_lookup(struct kmod_info * kmod) +{ + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); + + struct modctl * ctl; + + for (ctl = dtrace_modctl_list; ctl; ctl=ctl->mod_next) { + if (ctl->mod_id == kmod->id) + return(ctl); + } + return (NULL); +} + +/* + * This routine is called from dtrace_module_unloaded(). + * It removes a modctl structure and its stale chain + * from the kext shadow list. + */ +static void +dtrace_modctl_remove(struct modctl * ctl) +{ + ASSERT(ctl != NULL); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); + modctl_t *prevp, *nextp, *curp; + + // Remove stale chain first + for (curp=ctl->mod_stale; curp != NULL; curp=nextp) { + nextp = curp->mod_stale; + /* There should NEVER be user symbols allocated at this point */ + ASSERT(curp->mod_user_symbols == NULL); + kmem_free(curp, sizeof(modctl_t)); + } + + prevp = NULL; + curp = dtrace_modctl_list; + + while (curp != ctl) { + prevp = curp; + curp = curp->mod_next; + } + + if (prevp != NULL) { + prevp->mod_next = ctl->mod_next; + } + else { + dtrace_modctl_list = ctl->mod_next; + } + + /* There should NEVER be user symbols allocated at this point */ + ASSERT(ctl->mod_user_symbols == NULL); + + kmem_free (ctl, sizeof(modctl_t)); +} + +/* + * APPLE NOTE: The kext loader will call dtrace_module_loaded + * when the kext is loaded in memory, but before calling the + * kext's start routine. + * + * Return 0 on success + * Return -1 on failure + */ + +static int +dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) +{ + dtrace_provider_t *prv; + + /* + * If kernel symbols have been disabled, return immediately + * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER) + return 0; + + struct modctl *ctl = NULL; + if (!kmod || kmod->address == 0 || kmod->size == 0) + return(-1); + + lck_mtx_lock(&dtrace_provider_lock); + lck_mtx_lock(&mod_lock); + + /* + * Have we seen this kext before? + */ + + ctl = dtrace_modctl_lookup(kmod); + + if (ctl != NULL) { + /* bail... we already have this kext in the modctl list */ + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + if (dtrace_err_verbose) + cmn_err(CE_WARN, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod->name, (uint_t)kmod->id, ctl->mod_modname, ctl->mod_id); + return(-1); + } + else { + ctl = kmem_alloc(sizeof(struct modctl), KM_SLEEP); + if (ctl == NULL) { + if (dtrace_err_verbose) + cmn_err(CE_WARN, "dtrace module load '%s %u' is failing ", kmod->name, (uint_t)kmod->id); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return (-1); + } + ctl->mod_next = NULL; + ctl->mod_stale = NULL; + strlcpy (ctl->mod_modname, kmod->name, sizeof(ctl->mod_modname)); + ctl->mod_loadcnt = kmod->id; + ctl->mod_nenabled = 0; + ctl->mod_address = kmod->address; + ctl->mod_size = kmod->size; + ctl->mod_id = kmod->id; + ctl->mod_loaded = 1; + ctl->mod_flags = 0; + ctl->mod_user_symbols = NULL; + + /* + * Find the UUID for this module, if it has one + */ + kernel_mach_header_t* header = (kernel_mach_header_t *)ctl->mod_address; + struct load_command* load_cmd = (struct load_command *)&header[1]; + uint32_t i; + for (i = 0; i < header->ncmds; i++) { + if (load_cmd->cmd == LC_UUID) { + struct uuid_command* uuid_cmd = (struct uuid_command *)load_cmd; + memcpy(ctl->mod_uuid, uuid_cmd->uuid, sizeof(uuid_cmd->uuid)); + ctl->mod_flags |= MODCTL_HAS_UUID; + break; + } + load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize); + } + + if (ctl->mod_address == g_kernel_kmod_info.address) { + ctl->mod_flags |= MODCTL_IS_MACH_KERNEL; + memcpy(dtrace_kerneluuid, ctl->mod_uuid, sizeof(dtrace_kerneluuid)); + } + /* + * Static kexts have a UUID that is not used for symbolication, as all their + * symbols are in kernel + */ + else if ((flag & KMOD_DTRACE_STATIC_KEXT) == KMOD_DTRACE_STATIC_KEXT) { + memcpy(ctl->mod_uuid, dtrace_kerneluuid, sizeof(dtrace_kerneluuid)); + ctl->mod_flags |= MODCTL_IS_STATIC_KEXT; + } + } + dtrace_modctl_add(ctl); + + /* + * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s) + */ + lck_mtx_lock(&dtrace_lock); + + /* + * DTrace must decide if it will instrument modules lazily via + * userspace symbols (default mode), or instrument immediately via + * kernel symbols (non-default mode) + * + * When in default/lazy mode, DTrace will only support modules + * built with a valid UUID. + * + * Overriding the default can be done explicitly in one of + * the following two ways. + * + * A module can force symbols from kernel space using the plist key, + * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set, + * we fall through and instrument this module now. + * + * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols + * from kernel space (see dtrace_impl.h). If this system state is set + * to a non-userspace mode, we fall through and instrument the module now. + */ + + if ((dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) && + (!(flag & KMOD_DTRACE_FORCE_INIT))) + { + /* We will instrument the module lazily -- this is the default */ + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return 0; } - + + /* We will instrument the module immediately using kernel symbols */ + if (!(flag & KMOD_DTRACE_NO_KERNEL_SYMS)) { + ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS; + } + lck_mtx_unlock(&dtrace_lock); - - if (hasprovs) - dtrace_helper_provider_register(to, newhelp, NULL); -} - -/* - * DTrace Hook Functions - */ -static void -dtrace_module_loaded(struct modctl *ctl) -{ - dtrace_provider_t *prv; - - lck_mtx_lock(&dtrace_provider_lock); - lck_mtx_lock(&mod_lock); - - // ASSERT(ctl->mod_busy); - + /* * We're going to call each providers per-module provide operation * specifying only this module. */ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + + /* + * APPLE NOTE: The contract with the kext loader is that once this function + * has completed, it may delete kernel symbols at will. + * We must set this while still holding the mod_lock. + */ + ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS; + lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); - + /* * If we have any retained enablings, we need to match against them. * Enabling probes requires that cpu_lock be held, and we cannot hold @@ -14547,60 +16920,106 @@ dtrace_module_loaded(struct modctl *ctl) * our task queue to do the match for us. */ lck_mtx_lock(&dtrace_lock); - + if (dtrace_retained == NULL) { lck_mtx_unlock(&dtrace_lock); - return; + return 0; } - - (void) taskq_dispatch(dtrace_taskq, - (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); - - lck_mtx_unlock(&dtrace_lock); - - /* - * And now, for a little heuristic sleaze: in general, we want to - * match modules as soon as they load. However, we cannot guarantee - * this, because it would lead us to the lock ordering violation - * outlined above. The common case, of course, is that cpu_lock is - * _not_ held -- so we delay here for a clock tick, hoping that that's - * long enough for the task queue to do its work. If it's not, it's - * not a serious problem -- it just means that the module that we - * just loaded may not be immediately instrumentable. + + /* APPLE NOTE! + * + * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually + * holds it for any reason. Thus the comment above is invalid, we can directly invoke + * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid + * the delay call as well. */ - delay(1); + lck_mtx_unlock(&dtrace_lock); + + dtrace_enabling_matchall(); + + return 0; } -static void -dtrace_module_unloaded(struct modctl *ctl) +/* + * Return 0 on success + * Return -1 on failure + */ +static int +dtrace_module_unloaded(struct kmod_info *kmod) { dtrace_probe_t template, *probe, *first, *next; dtrace_provider_t *prov; - - template.dtpr_mod = ctl->mod_modname; - - lck_mtx_lock(&dtrace_provider_lock); + struct modctl *ctl = NULL; + struct modctl *syncctl = NULL; + struct modctl *nextsyncctl = NULL; + int syncmode = 0; + + lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&mod_lock); lck_mtx_lock(&dtrace_lock); + if (kmod == NULL) { + syncmode = 1; + } + else { + ctl = dtrace_modctl_lookup(kmod); + if (ctl == NULL) + { + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return (-1); + } + ctl->mod_loaded = 0; + ctl->mod_address = 0; + ctl->mod_size = 0; + } + if (dtrace_bymod == NULL) { /* * The DTrace module is loaded (obviously) but not attached; * we don't have any work to do. */ - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); + if (ctl != NULL) + (void)dtrace_modctl_remove(ctl); + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return(0); + } + + /* Syncmode set means we target and traverse entire modctl list. */ + if (syncmode) + nextsyncctl = dtrace_modctl_list; + +syncloop: + if (syncmode) + { + /* find a stale modctl struct */ + for (syncctl = nextsyncctl; syncctl != NULL; syncctl=syncctl->mod_next) { + if (syncctl->mod_address == 0) + break; + } + if (syncctl==NULL) + { + /* We have no more work to do */ lck_mtx_unlock(&dtrace_lock); - return; + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return(0); + } + else { + /* keep track of next syncctl in case this one is removed */ + nextsyncctl = syncctl->mod_next; + ctl = syncctl; + } } + template.dtpr_mod = ctl->mod_modname; + for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); probe != NULL; probe = probe->dtpr_nextmod) { - if (probe->dtpr_ecb != NULL) { - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); - lck_mtx_unlock(&dtrace_lock); - + if (probe->dtpr_ecb != NULL) { /* * This shouldn't _actually_ be possible -- we're * unloading a module that has an enabled probe in it. @@ -14611,12 +17030,22 @@ dtrace_module_unloaded(struct modctl *ctl) * assert, but we're not going to disable the * probe, either. */ + + + if (syncmode) { + /* We're syncing, let's look at next in list */ + goto syncloop; + } + + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + if (dtrace_err_verbose) { cmn_err(CE_WARN, "unloaded module '%s' had " "enabled probes", ctl->mod_modname); } - - return; + return(-1); } } @@ -14626,8 +17055,10 @@ dtrace_module_unloaded(struct modctl *ctl) ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); dtrace_probes[probe->dtpr_id - 1] = NULL; + probe->dtpr_provider->dtpv_probe_count--; next = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); @@ -14653,20 +17084,24 @@ dtrace_module_unloaded(struct modctl *ctl) prov = probe->dtpr_provider; prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else + zfree(dtrace_probe_t_zone, probe); -#endif } + dtrace_modctl_remove(ctl); + + if (syncmode) + goto syncloop; + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); + + return(0); } void @@ -14684,7 +17119,7 @@ dtrace_resume(void) static int dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) { - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_lock(&dtrace_lock); switch (what) { @@ -14775,8 +17210,8 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) dtrace_toxrange = range; } - ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL); - ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL); + ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0); + ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0); dtrace_toxrange[dtrace_toxranges].dtt_base = base; dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; @@ -14788,7 +17223,7 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) */ /*ARGSUSED*/ static int -dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +dtrace_attach(dev_info_t *devi) { dtrace_provider_id_t id; dtrace_state_t *state = NULL; @@ -14798,31 +17233,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); - if (ddi_soft_state_init(&dtrace_softstate, - sizeof (dtrace_state_t), 0) != 0) { - cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&dtrace_lock); - return (DDI_FAILURE); - } - -#if !defined(__APPLE__) - if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, - DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || - ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, - DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { - cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); - ddi_remove_minor_node(devi, NULL); - ddi_soft_state_fini(&dtrace_softstate); - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&dtrace_lock); - return (DDI_FAILURE); - } -#endif /* __APPLE__ */ - - ddi_report_dev(devi); + /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ dtrace_devi = devi; dtrace_modload = dtrace_module_loaded; @@ -14834,36 +17245,41 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_cpustart_fini = dtrace_resume; dtrace_debugger_init = dtrace_suspend; dtrace_debugger_fini = dtrace_resume; - dtrace_kreloc_init = dtrace_suspend; - dtrace_kreloc_fini = dtrace_resume; register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); - dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, + dtrace_arena = vmem_create("dtrace", (void *)1, INT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); - dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, - UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, - VM_SLEEP | VMC_IDENTIFIER); - dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, - 1, INT_MAX, 0); dtrace_state_cache = kmem_cache_create("dtrace_state_cache", sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); - dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), + dtrace_nprobes = dtrace_nprobes_default; + dtrace_probes = kmem_zalloc(sizeof(dtrace_probe_t*) * dtrace_nprobes, + KM_SLEEP); + + dtrace_byprov = dtrace_hash_create(dtrace_strkey_probe_provider, + 0, /* unused */ + offsetof(dtrace_probe_t, dtpr_nextprov), + offsetof(dtrace_probe_t, dtpr_prevprov)); + + dtrace_bymod = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_mod), offsetof(dtrace_probe_t, dtpr_nextmod), offsetof(dtrace_probe_t, dtpr_prevmod)); - dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), + dtrace_byfunc = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_func), offsetof(dtrace_probe_t, dtpr_nextfunc), offsetof(dtrace_probe_t, dtpr_prevfunc)); - dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), + dtrace_byname = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_name), offsetof(dtrace_probe_t, dtpr_nextname), offsetof(dtrace_probe_t, dtpr_prevname)); @@ -14891,30 +17307,23 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ASSERT(dtrace_provider != NULL); ASSERT((dtrace_provider_id_t)dtrace_provider == id); -#if !defined(__APPLE__) +#if defined (__x86_64__) dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); + dtrace_provider, NULL, NULL, "BEGIN", 1, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "END", 0, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "ERROR", 1, NULL); -#elif defined(__ppc__) || defined(__ppc64__) + dtrace_provider, NULL, NULL, "ERROR", 3, NULL); +#elif (defined(__arm__) || defined(__arm64__)) dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "BEGIN", 2, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "END", 1, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "ERROR", 4, NULL); -#elif (defined(__i386__) || defined (__x86_64__)) - dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "BEGIN", 1, NULL); - dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "END", 0, NULL); - dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "ERROR", 3, NULL); #else #error Unknown Architecture -#endif /* __APPLE__ */ +#endif dtrace_anon_property(); lck_mtx_unlock(&cpu_lock); @@ -14940,6 +17349,13 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) if (dtrace_anon.dta_enabling != NULL) { ASSERT(dtrace_retained == dtrace_anon.dta_enabling); + /* + * APPLE NOTE: if handling anonymous dof, switch symbol modes. + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; + } + dtrace_enabling_provide(NULL); state = dtrace_anon.dta_state; @@ -14958,7 +17374,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_lock); if ((enab = dtrace_anon.dta_enabling) != NULL) - (void) dtrace_enabling_match(enab, NULL); + (void) dtrace_enabling_match(enab, NULL, NULL); lck_mtx_unlock(&cpu_lock); } @@ -14976,8 +17392,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } -extern void fasttrap_init(void); - /*ARGSUSED*/ static int dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) @@ -14987,19 +17401,9 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) uint32_t priv; uid_t uid; zoneid_t zoneid; + int rv; -#if !defined(__APPLE__) - if (getminor(*devp) == DTRACEMNRN_HELPER) - return (0); - - /* - * If this wasn't an open with the "helper" minor, then it must be - * the "dtrace" minor. - */ - ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ + /* APPLE: Darwin puts Helper on its own major device. */ /* * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -15009,13 +17413,11 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) if (priv == DTRACE_PRIV_NONE) return (EACCES); -#if defined(__APPLE__) /* - * We delay the initialization of fasttrap as late as possible. + * APPLE NOTE: We delay the initialization of fasttrap as late as possible. * It certainly can't be later than now! */ fasttrap_init(); -#endif /* __APPLE__ */ /* * Ask all providers to provide all their probes. @@ -15029,30 +17431,35 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) dtrace_opens++; dtrace_membar_producer(); +#ifdef illumos /* * If the kernel debugger is active (that is, if the kernel debugger * modified text in some way), we won't allow the open. */ if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { dtrace_opens--; - lck_mtx_unlock(&cpu_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); return (EBUSY); } +#endif - state = dtrace_state_create(devp, cred_p); + rv = dtrace_state_create(devp, cred_p, &state); lck_mtx_unlock(&cpu_lock); - if (state == NULL) { - if (--dtrace_opens == 0) + if (rv != 0 || state == NULL) { + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) { +#ifdef illumos (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); +#endif + } lck_mtx_unlock(&dtrace_lock); - return (EAGAIN); + /* propagate EAGAIN or ERESTART */ + return (rv); } - + lck_mtx_unlock(&dtrace_lock); -#if defined(__APPLE__) lck_rw_lock_exclusive(&dtrace_dof_mode_lock); /* @@ -15064,7 +17471,16 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; - + /* + * We do not need to hold the exclusive lock while processing + * DOF on processes. We do need to make sure the mode does not get + * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though + * (which should not happen anyway since it only happens in + * dtrace_close). There is no way imcomplete USDT probes can be + * activate by any DTrace clients here since they all have to + * call dtrace_open and be blocked on dtrace_dof_mode_lock + */ + lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock); /* * Iterate all existing processes and load lazy dofs. */ @@ -15073,10 +17489,34 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) NULL, dtrace_lazy_dofs_proc_iterate_filter, NULL); + + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + } + else { + lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); } - lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); -#endif + + /* + * Update kernel symbol state. + * + * We must own the provider and dtrace locks. + * + * NOTE! It may appear there is a race by setting this value so late + * after dtrace_probe_provide. However, any kext loaded after the + * call to probe provide and before we set LAZY_OFF will be marked as + * eligible for symbols from userspace. The same dtrace that is currently + * calling dtrace_open() (this call!) will get a list of kexts needing + * symbols and fill them in, thus closing the race window. + * + * We want to set this value only after it certain it will succeed, as + * this significantly reduces the complexity of error exits. + */ + lck_mtx_lock(&dtrace_lock); + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; + } + lck_mtx_unlock(&dtrace_lock); return (0); } @@ -15085,18 +17525,12 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) static int dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { -#pragma unused(flag,otyp,cred_p) +#pragma unused(flag, otyp, cred_p) /* __APPLE__ */ minor_t minor = getminor(dev); dtrace_state_t *state; -#if !defined(__APPLE__) - if (minor == DTRACEMNRN_HELPER) - return (0); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ - - state = ddi_get_soft_state(dtrace_softstate, minor); + /* APPLE NOTE: Darwin puts Helper on its own major device. */ + state = dtrace_state_get(minor); lck_mtx_lock(&cpu_lock); lck_mtx_lock(&dtrace_lock); @@ -15111,49 +17545,61 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_destroy(state); ASSERT(dtrace_opens > 0); - if (--dtrace_opens == 0) + + /* + * Only relinquish control of the kernel debugger interface when there + * are no consumers and no anonymous enablings. + */ + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) { +#ifdef illumos (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); +#endif + } lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); -#if defined(__APPLE__) - /* * Lock ordering requires the dof mode lock be taken before * the dtrace_lock. */ lck_rw_lock_exclusive(&dtrace_dof_mode_lock); lck_mtx_lock(&dtrace_lock); + + if (dtrace_opens == 0) { + /* + * If we are currently lazy-off, and this is the last close, transition to + * lazy state. + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; + } - /* - * If we are currently lazy-off, and this is the last close, transition to - * lazy state. - */ - if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF && dtrace_opens == 0) { - dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; + /* + * If we are the last dtrace client, switch back to lazy (from userspace) symbols + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } } - + lck_mtx_unlock(&dtrace_lock); lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); -#endif + + /* + * Kext probes may be retained past the end of the kext's lifespan. The + * probes are kept until the last reference to them has been removed. + * Since closing an active dtrace context is likely to drop that last reference, + * lets take a shot at cleaning out the orphaned probes now. + */ + dtrace_module_unloaded(NULL); return (0); } -#if defined(__APPLE__) -/* - * Introduce cast to quiet warnings. - * XXX: This hides a lot of brokenness. - */ -#define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) ) -#define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) ) -#endif /* __APPLE__ */ - -#if defined(__APPLE__) /*ARGSUSED*/ static int -dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) +dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) { #pragma unused(rv) /* @@ -15163,7 +17609,13 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) return KERN_SUCCESS; switch (cmd) { - case DTRACEHIOC_ADDDOF: { +#if defined (__arm64__) + case DTRACEHIOC_ADDDOF_U32: + case DTRACEHIOC_ADDDOF_U64: +#else + case DTRACEHIOC_ADDDOF: +#endif /* __arm64__*/ + { dof_helper_t *dhp = NULL; size_t dof_ioctl_data_size; dof_ioctl_data_t* multi_dof; @@ -15174,6 +17626,16 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) int multi_dof_claimed = 0; proc_t* p = current_proc(); + /* + * If this is a restricted process and dtrace is restricted, + * do not allow DOFs to be registered + */ + if (dtrace_is_restricted() && + !dtrace_are_restrictions_relaxed() && + !dtrace_can_attach_to_proc(current_proc())) { + return (EACCES); + } + /* * Read the number of DOF sections being passed in. */ @@ -15183,7 +17645,7 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) dtrace_dof_error(NULL, "failed to copyin dofiod_count"); return (EFAULT); } - + /* * Range check the count. */ @@ -15234,6 +17696,7 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval); if (dof != NULL) { + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); /* @@ -15245,6 +17708,7 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) } lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } } while (++i < multi_dof->dofiod_count && rval == 0); } @@ -15285,9 +17749,11 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) * EACCES means non-lazy */ if (rval == EACCES) { + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); rval = dtrace_helper_destroygen(p, generation); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } return (rval); @@ -15299,30 +17765,23 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) return ENOTTY; } -#endif /* __APPLE__ */ /*ARGSUSED*/ static int -dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv) { #pragma unused(md) - minor_t minor = getminor(dev); dtrace_state_t *state; int rval; -#if !defined(__APPLE__) - if (minor == DTRACEMNRN_HELPER) - return (dtrace_ioctl_helper(cmd, arg, rv)); -#else /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ - state = ddi_get_soft_state(dtrace_softstate, minor); + state = dtrace_state_get(minor); if (state->dts_anon) { - ASSERT(dtrace_anon.dta_state == NULL); - state = state->dts_anon; + ASSERT(dtrace_anon.dta_state == NULL); + state = state->dts_anon; } switch (cmd) { @@ -15330,14 +17789,14 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_providerdesc_t pvd; dtrace_provider_t *pvp; - if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) + if (copyin(arg, &pvd, sizeof (pvd)) != 0) return (EFAULT); pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; lck_mtx_lock(&dtrace_provider_lock); for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { - if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) + if (strncmp(pvp->dtpv_name, pvd.dtvd_name, DTRACE_PROVNAMELEN) == 0) break; } @@ -15348,7 +17807,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); - if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) + if (copyout(&pvd, arg, sizeof (pvd)) != 0) return (EFAULT); return (0); @@ -15363,7 +17822,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uintptr_t dest; int nrecs; - if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) + if (copyin(arg, &epdesc, sizeof (epdesc)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); @@ -15398,7 +17857,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * across the copyout(), below. */ size = sizeof (dtrace_eprobedesc_t) + - (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); + (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; @@ -15414,13 +17873,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) break; bcopy(&act->dta_rec, (void *)dest, - sizeof (dtrace_recdesc_t)); + sizeof (dtrace_recdesc_t)); dest += sizeof (dtrace_recdesc_t); } lck_mtx_unlock(&dtrace_lock); - if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { + if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } @@ -15440,7 +17899,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) size_t size; uintptr_t dest; - if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) + if (copyin(arg, &aggdesc, sizeof (aggdesc)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); @@ -15461,7 +17920,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) for (act = agg->dtag_first; ; act = act->dta_next) { ASSERT(act->dta_intuple || - DTRACEACT_ISAGG(act->dta_kind)); + DTRACEACT_ISAGG(act->dta_kind)); /* * If this action has a record size of zero, it @@ -15489,7 +17948,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * across the copyout(), below. */ size = sizeof (dtrace_aggdesc_t) + - (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); + (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; @@ -15522,7 +17981,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_unlock(&dtrace_lock); - if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { + if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } @@ -15543,14 +18002,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * If a NULL argument has been passed, we take this as our * cue to reevaluate our enablings. */ - if (arg == NULL) { - lck_mtx_lock(&cpu_lock); - lck_mtx_lock(&dtrace_lock); - err = dtrace_enabling_matchstate(state, rv); - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); + if (arg == 0) { + dtrace_enabling_matchall(); - return (err); + return (0); } if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) @@ -15582,14 +18037,14 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (rval); } - if ((err = dtrace_enabling_match(enab, rv)) == 0) { + if ((err = dtrace_enabling_match(enab, rv, NULL)) == 0) { err = dtrace_enabling_retain(enab); } else { dtrace_enabling_destroy(enab); } - lck_mtx_unlock(&cpu_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); dtrace_dof_destroy(dof); return (err); @@ -15601,7 +18056,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_probedesc_t *create = &desc.dtrpd_create; int err; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; @@ -15632,7 +18087,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uid_t uid; zoneid_t zoneid; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; @@ -15651,20 +18106,19 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtpd_id++; } - if (cmd == DTRACEIOC_PROBEMATCH) { - dtrace_probekey(&desc, &pkey); - pkey.dtpk_id = DTRACE_IDNONE; - } - dtrace_cred2priv(cr, &priv, &uid, &zoneid); lck_mtx_lock(&dtrace_lock); - if (cmd == DTRACEIOC_PROBEMATCH) { - for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { + if (cmd == DTRACEIOC_PROBEMATCH) { + dtrace_probekey(&desc, &pkey); + pkey.dtpk_id = DTRACE_IDNONE; + + /* Quiet compiler warning */ + for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && - (m = dtrace_match_probe(probe, &pkey, - priv, uid, zoneid)) != 0) + (m = dtrace_match_probe(probe, &pkey, + priv, uid, zoneid)) != 0) break; } @@ -15672,11 +18126,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_unlock(&dtrace_lock); return (EINVAL); } + dtrace_probekey_release(&pkey); } else { - for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { + /* Quiet compiler warning */ + for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && - dtrace_match_priv(probe, priv, uid, zoneid)) + dtrace_match_priv(probe, priv, uid, zoneid)) break; } } @@ -15689,7 +18145,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_probe_description(probe, &desc); lck_mtx_unlock(&dtrace_lock); - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); return (0); @@ -15700,7 +18156,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_probe_t *probe; dtrace_provider_t *prov; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); if (desc.dtargd_id == DTRACE_IDNONE) @@ -15713,7 +18169,8 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_lock(&mod_lock); lck_mtx_lock(&dtrace_lock); - if (desc.dtargd_id > dtrace_nprobes) { + /* Quiet compiler warning */ + if (desc.dtargd_id > (dtrace_id_t)dtrace_nprobes) { lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); @@ -15732,10 +18189,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) prov = probe->dtpr_provider; if (prov->dtpv_pops.dtps_getargdesc == NULL) { - /* - * There isn't any typed information for this probe. - * Set the argument number to DTRACE_ARGNONE. - */ + /* + * There isn't any typed information for this probe. + * Set the argument number to DTRACE_ARGNONE. + */ desc.dtargd_ndx = DTRACE_ARGNONE; } else { desc.dtargd_native[0] = '\0'; @@ -15743,13 +18200,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtargd_mapping = desc.dtargd_ndx; prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg, &desc); + probe->dtpr_id, probe->dtpr_arg, &desc); } lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); return (0); @@ -15762,7 +18219,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (rval != 0) return (rval); - if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) + if (copyout(&cpuid, arg, sizeof (cpuid)) != 0) return (EFAULT); return (0); @@ -15778,7 +18235,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (rval != 0) return (rval); - if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) + if (copyout(&cpuid, arg, sizeof (cpuid)) != 0) return (EFAULT); return (0); @@ -15788,7 +18245,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dof_hdr_t hdr, *dof; uint64_t len; - if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) + if (copyin(arg, &hdr, sizeof (hdr)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); @@ -15796,22 +18253,57 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_unlock(&dtrace_lock); len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); - rval = copyout(dof, (void *)arg, len); + rval = copyout(dof, arg, len); dtrace_dof_destroy(dof); return (rval == 0 ? 0 : EFAULT); } + case DTRACEIOC_SLEEP: { + int64_t time; + uint64_t abstime; + uint64_t rvalue = DTRACE_WAKE_TIMEOUT; + + if (copyin(arg, &time, sizeof(time)) != 0) + return (EFAULT); + + nanoseconds_to_absolutetime((uint64_t)time, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + + if (assert_wait_deadline(state, THREAD_ABORTSAFE, abstime) == THREAD_WAITING) { + if (state->dts_buf_over_limit > 0) { + clear_wait(current_thread(), THREAD_INTERRUPTED); + rvalue = DTRACE_WAKE_BUF_LIMIT; + } else { + thread_block(THREAD_CONTINUE_NULL); + if (state->dts_buf_over_limit > 0) { + rvalue = DTRACE_WAKE_BUF_LIMIT; + } + } + } + + if (copyout(&rvalue, arg, sizeof(rvalue)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_SIGNAL: { + wakeup(state); + return (0); + } + case DTRACEIOC_AGGSNAP: case DTRACEIOC_BUFSNAP: { dtrace_bufdesc_t desc; caddr_t cached; + boolean_t over_limit; dtrace_buffer_t *buf; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); - if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= (int)NCPU) + if ((int)desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) return (EINVAL); lck_mtx_lock(&dtrace_lock); @@ -15844,7 +18336,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtbd_oldest = 0; sz = sizeof (desc); - if (copyout(&desc, (void *)arg, sz) != 0) + if (copyout(&desc, arg, sz) != 0) return (EFAULT); return (0); @@ -15859,7 +18351,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) sz = buf->dtb_size; } - if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { + if (copyout(buf->dtb_tomax, (user_addr_t)desc.dtbd_data, sz) != 0) { lck_mtx_unlock(&dtrace_lock); return (EFAULT); } @@ -15868,10 +18360,11 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtbd_drops = buf->dtb_drops; desc.dtbd_errors = buf->dtb_errors; desc.dtbd_oldest = buf->dtb_xamot_offset; + desc.dtbd_timestamp = dtrace_gethrtime(); lck_mtx_unlock(&dtrace_lock); - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); buf->dtb_flags |= DTRACEBUF_CONSUMED; @@ -15886,32 +18379,51 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) } cached = buf->dtb_tomax; + over_limit = buf->dtb_cur_limit == buf->dtb_size; + ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); dtrace_xcall(desc.dtbd_cpu, - (dtrace_xcall_t)dtrace_buffer_switch, buf); + (dtrace_xcall_t)dtrace_buffer_switch, buf); state->dts_errors += buf->dtb_xamot_errors; /* - * If the buffers did not actually switch, then the cross call - * did not take place -- presumably because the given CPU is - * not in the ready set. If this is the case, we'll return - * ENOENT. - */ + * If the buffers did not actually switch, then the cross call + * did not take place -- presumably because the given CPU is + * not in the ready set. If this is the case, we'll return + * ENOENT. + */ if (buf->dtb_tomax == cached) { ASSERT(buf->dtb_xamot != cached); lck_mtx_unlock(&dtrace_lock); return (ENOENT); } - ASSERT(cached == buf->dtb_xamot); + ASSERT(cached == buf->dtb_xamot); + /* + * At this point we know the buffer have switched, so we + * can decrement the over limit count if the buffer was over + * its limit. The new buffer might already be over its limit + * yet, but we don't care since we're guaranteed not to be + * checking the buffer over limit count at this point. + */ + if (over_limit) { + uint32_t old = os_atomic_dec_orig(&state->dts_buf_over_limit, relaxed); + #pragma unused(old) + + /* + * Verify that we didn't underflow the value + */ + ASSERT(old != 0); + } /* - * We have our snapshot; now copy it out. - */ - if (copyout(buf->dtb_xamot, desc.dtbd_data, - buf->dtb_xamot_offset) != 0) { + * We have our snapshot; now copy it out. + */ + if (dtrace_buffer_copyout(buf->dtb_xamot, + (user_addr_t)desc.dtbd_data, + buf->dtb_xamot_offset) != 0) { lck_mtx_unlock(&dtrace_lock); return (EFAULT); } @@ -15920,13 +18432,14 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtbd_drops = buf->dtb_xamot_drops; desc.dtbd_errors = buf->dtb_xamot_errors; desc.dtbd_oldest = 0; + desc.dtbd_timestamp = buf->dtb_switched; lck_mtx_unlock(&dtrace_lock); /* * Finally, copy out the buffer description. */ - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); return (0); @@ -15941,7 +18454,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) conf.dtc_diftupregs = DIF_DTR_NREGS; conf.dtc_ctfmodel = CTF_MODEL_NATIVE; - if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) + if (copyout(&conf, arg, sizeof (conf)) != 0) return (EFAULT); return (0); @@ -15954,10 +18467,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uint64_t nerrs; /* - * See the comment in dtrace_state_deadman() for the reason - * for setting dts_laststatus to INT64_MAX before setting - * it to the correct value. - */ + * See the comment in dtrace_state_deadman() for the reason + * for setting dts_laststatus to INT64_MAX before setting + * it to the correct value. + */ state->dts_laststatus = INT64_MAX; dtrace_membar_producer(); state->dts_laststatus = dtrace_gethrtime(); @@ -16004,12 +18517,12 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) stat.dtst_stkstroverflows = state->dts_stkstroverflows; stat.dtst_dblerrors = state->dts_dblerrors; stat.dtst_killed = - (state->dts_activity == DTRACE_ACTIVITY_KILLED); + (state->dts_activity == DTRACE_ACTIVITY_KILLED); stat.dtst_errors = nerrs; lck_mtx_unlock(&dtrace_lock); - if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) + if (copyout(&stat, arg, sizeof (stat)) != 0) return (EFAULT); return (0); @@ -16020,13 +18533,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) char *str; int len; - if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) + if (copyin(arg, &fmt, sizeof (fmt)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); if (fmt.dtfd_format == 0 || - fmt.dtfd_format > state->dts_nformats) { + fmt.dtfd_format > state->dts_nformats) { lck_mtx_unlock(&dtrace_lock); return (EINVAL); } @@ -16038,7 +18551,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * and that the format for the specified index is non-NULL. */ ASSERT(state->dts_formats != NULL); - str = state->dts_formats[fmt.dtfd_format - 1]; + str = state->dts_formats[fmt.dtfd_format - 1]->dtf_str; ASSERT(str != NULL); len = strlen(str) + 1; @@ -16046,12 +18559,12 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (len > fmt.dtfd_length) { fmt.dtfd_length = len; - if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { + if (copyout(&fmt, arg, sizeof (fmt)) != 0) { lck_mtx_unlock(&dtrace_lock); return (EINVAL); } } else { - if (copyout(str, fmt.dtfd_string, len) != 0) { + if (copyout(str, (user_addr_t)fmt.dtfd_string, len) != 0) { lck_mtx_unlock(&dtrace_lock); return (EINVAL); } @@ -16061,6 +18574,298 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (0); } + case DTRACEIOC_MODUUIDSLIST: { + size_t module_uuids_list_size; + dtrace_module_uuids_list_t* uuids_list; + uint64_t dtmul_count; + + /* + * Security restrictions make this operation illegal, if this is enabled DTrace + * must refuse to provide any fbt probes. + */ + if (dtrace_fbt_probes_restricted()) { + cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); + return (EPERM); + } + + /* + * Fail if the kernel symbol mode makes this operation illegal. + * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check + * for them without holding the dtrace_lock. + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER || + dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) { + cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode); + return (EPERM); + } + + /* + * Read the number of symbolsdesc structs being passed in. + */ + if (copyin(arg + offsetof(dtrace_module_uuids_list_t, dtmul_count), + &dtmul_count, + sizeof(dtmul_count))) { + cmn_err(CE_WARN, "failed to copyin dtmul_count"); + return (EFAULT); + } + + /* + * Range check the count. More than 2k kexts is probably an error. + */ + if (dtmul_count > 2048) { + cmn_err(CE_WARN, "dtmul_count is not valid"); + return (EINVAL); + } + + /* + * For all queries, we return EINVAL when the user specified + * count does not match the actual number of modules we find + * available. + * + * If the user specified count is zero, then this serves as a + * simple query to count the available modules in need of symbols. + */ + + rval = 0; + + if (dtmul_count == 0) + { + lck_mtx_lock(&mod_lock); + struct modctl* ctl = dtrace_modctl_list; + while (ctl) { + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); + if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) { + dtmul_count++; + rval = EINVAL; + } + ctl = ctl->mod_next; + } + lck_mtx_unlock(&mod_lock); + + if (copyout(&dtmul_count, arg, sizeof (dtmul_count)) != 0) + return (EFAULT); + else + return (rval); + } + + /* + * If we reach this point, then we have a request for full list data. + * Allocate a correctly sized structure and copyin the data. + */ + module_uuids_list_size = DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count); + if ((uuids_list = kmem_alloc(module_uuids_list_size, KM_SLEEP)) == NULL) + return (ENOMEM); + + /* NOTE! We can no longer exit this method via return */ + if (copyin(arg, uuids_list, module_uuids_list_size) != 0) { + cmn_err(CE_WARN, "failed copyin of dtrace_module_uuids_list_t"); + rval = EFAULT; + goto moduuidslist_cleanup; + } + + /* + * Check that the count didn't change between the first copyin and the second. + */ + if (uuids_list->dtmul_count != dtmul_count) { + rval = EINVAL; + goto moduuidslist_cleanup; + } + + /* + * Build the list of UUID's that need symbols + */ + lck_mtx_lock(&mod_lock); + + dtmul_count = 0; + + struct modctl* ctl = dtrace_modctl_list; + while (ctl) { + /* + * We assume that userspace symbols will be "better" than kernel level symbols, + * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms + * are available, add user syms if the module might use them. + */ + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); + if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) { + UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count]; + if (dtmul_count++ < uuids_list->dtmul_count) { + memcpy(uuid, ctl->mod_uuid, sizeof(UUID)); + } + } + ctl = ctl->mod_next; + } + + lck_mtx_unlock(&mod_lock); + + if (uuids_list->dtmul_count < dtmul_count) + rval = EINVAL; + + uuids_list->dtmul_count = dtmul_count; + + /* + * Copyout the symbols list (or at least the count!) + */ + if (copyout(uuids_list, arg, module_uuids_list_size) != 0) { + cmn_err(CE_WARN, "failed copyout of dtrace_symbolsdesc_list_t"); + rval = EFAULT; + } + + moduuidslist_cleanup: + /* + * If we had to allocate struct memory, free it. + */ + if (uuids_list != NULL) { + kmem_free(uuids_list, module_uuids_list_size); + } + + return rval; + } + + case DTRACEIOC_PROVMODSYMS: { + size_t module_symbols_size; + dtrace_module_symbols_t* module_symbols; + uint64_t dtmodsyms_count; + + /* + * Security restrictions make this operation illegal, if this is enabled DTrace + * must refuse to provide any fbt probes. + */ + if (dtrace_fbt_probes_restricted()) { + cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); + return (EPERM); + } + + /* + * Fail if the kernel symbol mode makes this operation illegal. + * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check + * for them without holding the dtrace_lock. + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER || + dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) { + cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode); + return (EPERM); + } + + /* + * Read the number of module symbols structs being passed in. + */ + if (copyin(arg + offsetof(dtrace_module_symbols_t, dtmodsyms_count), + &dtmodsyms_count, + sizeof(dtmodsyms_count))) { + cmn_err(CE_WARN, "failed to copyin dtmodsyms_count"); + return (EFAULT); + } + + /* + * Range check the count. How much data can we pass around? + * FIX ME! + */ + if (dtmodsyms_count == 0) { + cmn_err(CE_WARN, "dtmodsyms_count is not valid"); + return (EINVAL); + } + + /* + * Allocate a correctly sized structure and copyin the data. + */ + module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count); + if (module_symbols_size > (size_t)dtrace_copy_maxsize()) { + size_t dtmodsyms_max = DTRACE_MODULE_SYMBOLS_COUNT(dtrace_copy_maxsize()); + cmn_err(CE_WARN, "dtmodsyms_count %ld is too high, maximum is %ld", dtmodsyms_count, dtmodsyms_max); + return (ENOBUFS); + } + + if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL) + return (ENOMEM); + + rval = 0; + + /* NOTE! We can no longer exit this method via return */ + if (copyin(arg, module_symbols, module_symbols_size) != 0) { + cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t"); + rval = EFAULT; + goto module_symbols_cleanup; + } + + /* + * Check that the count didn't change between the first copyin and the second. + */ + if (module_symbols->dtmodsyms_count != dtmodsyms_count) { + rval = EINVAL; + goto module_symbols_cleanup; + } + + /* + * Find the modctl to add symbols to. + */ + lck_mtx_lock(&dtrace_provider_lock); + lck_mtx_lock(&mod_lock); + + struct modctl* ctl = dtrace_modctl_list; + while (ctl) { + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); + if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl) && memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { + dtrace_provider_t *prv; + ctl->mod_user_symbols = module_symbols; + + /* + * We're going to call each providers per-module provide operation + * specifying only this module. + */ + for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + /* + * We gave every provider a chance to provide with the user syms, go ahead and clear them + */ + ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ + } + ctl = ctl->mod_next; + } + + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + + module_symbols_cleanup: + /* + * If we had to allocate struct memory, free it. + */ + if (module_symbols != NULL) { + kmem_free(module_symbols, module_symbols_size); + } + + return rval; + } + + case DTRACEIOC_PROCWAITFOR: { + dtrace_procdesc_t pdesc = { + .p_name = {0}, + .p_pid = -1 + }; + + if ((rval = copyin(arg, &pdesc, sizeof(pdesc))) != 0) + goto proc_waitfor_error; + + if ((rval = dtrace_proc_waitfor(&pdesc)) != 0) + goto proc_waitfor_error; + + if ((rval = copyout(&pdesc, arg, sizeof(pdesc))) != 0) + goto proc_waitfor_error; + + return 0; + + proc_waitfor_error: + /* The process was suspended, revert this since the client will not do it. */ + if (pdesc.p_pid != -1) { + proc_t *proc = proc_find(pdesc.p_pid); + if (proc != PROC_NULL) { + task_pidresume(proc->task); + proc_rele(proc); + } + } + + return rval; + } + default: break; } @@ -16068,11 +18873,9 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (ENOTTY); } -#if defined(__APPLE__) -#undef copyin -#undef copyout -#endif /* __APPLE__ */ - +/* + * APPLE NOTE: dtrace_detach not implemented + */ #if !defined(__APPLE__) /*ARGSUSED*/ static int @@ -16098,15 +18901,15 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) ASSERT(dtrace_opens == 0); if (dtrace_helpers > 0) { - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } @@ -16154,15 +18957,18 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) dtrace_probes = NULL; dtrace_nprobes = 0; + dtrace_hash_destroy(dtrace_strings); + dtrace_hash_destroy(dtrace_byprov); dtrace_hash_destroy(dtrace_bymod); dtrace_hash_destroy(dtrace_byfunc); dtrace_hash_destroy(dtrace_byname); + dtrace_strings = NULL; + dtrace_byprov = NULL; dtrace_bymod = NULL; dtrace_byfunc = NULL; dtrace_byname = NULL; kmem_cache_destroy(dtrace_state_cache); - vmem_destroy(dtrace_minor); vmem_destroy(dtrace_arena); if (dtrace_toxrange != NULL) { @@ -16185,6 +18991,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); +#ifdef illumos /* * We don't destroy the task queue until after we have dropped our * locks (taskq_destroy() may block on running tasks). To prevent @@ -16195,93 +19002,11 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) */ taskq_destroy(dtrace_taskq); dtrace_taskq = NULL; +#endif return (DDI_SUCCESS); } - -/*ARGSUSED*/ -static int -dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)dtrace_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -static struct cb_ops dtrace_cb_ops = { - dtrace_open, /* open */ - dtrace_close, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - dtrace_ioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops dtrace_ops = { - DEVO_REV, /* devo_rev */ - 0, /* refcnt */ - dtrace_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - dtrace_attach, /* attach */ - dtrace_detach, /* detach */ - nodev, /* reset */ - &dtrace_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; - -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "Dynamic Tracing", /* name of module */ - &dtrace_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} -#else +#endif /* __APPLE__ */ d_open_t _dtrace_open, helper_open; d_close_t _dtrace_close, helper_close; @@ -16322,16 +19047,22 @@ _dtrace_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) { #pragma unused(p) int err, rv = 0; + user_addr_t uaddrp; + + if (proc_is64bit(p)) + uaddrp = *(user_addr_t *)data; + else + uaddrp = (user_addr_t) *(uint32_t *)data; - err = dtrace_ioctl(dev, (int)cmd, *(intptr_t *)data, fflag, CRED(), &rv); + err = dtrace_ioctl(dev, cmd, uaddrp, fflag, CRED(), &rv); - /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ + /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { ASSERT( (err & 0xfffff000) == 0 ); - return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ + return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ } else if (rv != 0) { ASSERT( (rv & 0xfff00000) == 0 ); - return (((rv & 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */ + return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ } else return 0; } @@ -16342,40 +19073,34 @@ helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) #pragma unused(dev,fflag,p) int err, rv = 0; - err = dtrace_ioctl_helper((int)cmd, data, &rv); - /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ + err = dtrace_ioctl_helper(cmd, data, &rv); + /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { ASSERT( (err & 0xfffff000) == 0 ); - return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ + return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ } else if (rv != 0) { ASSERT( (rv & 0xfff00000) == 0 ); - return (((rv & 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */ + return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ } else return 0; } #define HELPER_MAJOR -24 /* let the kernel pick the device number */ -/* - * A struct describing which functions will get invoked for certain - * actions. - */ -static struct cdevsw helper_cdevsw = -{ - helper_open, /* open */ - helper_close, /* close */ - eno_rdwrt, /* read */ - eno_rdwrt, /* write */ - helper_ioctl, /* ioctl */ - (stop_fcn_t *)nulldev, /* stop */ - (reset_fcn_t *)nulldev, /* reset */ - NULL, /* tty's */ - eno_select, /* select */ - eno_mmap, /* mmap */ - eno_strat, /* strategy */ - eno_getc, /* getc */ - eno_putc, /* putc */ - 0 /* type */ +const static struct cdevsw helper_cdevsw = +{ + .d_open = helper_open, + .d_close = helper_close, + .d_read = eno_rdwrt, + .d_write = eno_rdwrt, + .d_ioctl = helper_ioctl, + .d_stop = (stop_fcn_t *)nulldev, + .d_reset = (reset_fcn_t *)nulldev, + .d_select = eno_select, + .d_mmap = eno_mmap, + .d_strategy = eno_strat, + .d_reserved_1 = eno_getc, + .d_reserved_2 = eno_putc, }; static int helper_majdevno = 0; @@ -16415,28 +19140,13 @@ helper_init( void ) #undef HELPER_MAJOR -/* - * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected. - */ static int dtrace_clone_func(dev_t dev, int action) { #pragma unused(dev) if (action == DEVFS_CLONE_ALLOC) { - if (NULL == dtrace_minor) /* Arena not created yet!?! */ - return 0; - else { - /* - * Propose a minor number, namely the next number that vmem_alloc() will return. - * Immediately put it back in play by calling vmem_free(). - */ - int ret = (int)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); - - vmem_free(dtrace_minor, (void *)(uintptr_t)ret, 1); - - return ret; - } + return dtrace_state_reserve(); } else if (action == DEVFS_CLONE_FREE) { return 0; @@ -16444,24 +19154,50 @@ dtrace_clone_func(dev_t dev, int action) else return -1; } +void dtrace_ast(void); + +void +dtrace_ast(void) +{ + int i; + uint32_t clients = os_atomic_xchg(&dtrace_wake_clients, 0, relaxed); + if (clients == 0) + return; + /** + * We disable preemption here to be sure that we won't get + * interrupted by a wakeup to a thread that is higher + * priority than us, so that we do issue all wakeups + */ + disable_preemption(); + for (i = 0; i < DTRACE_NCLIENTS; i++) { + if (clients & (1 << i)) { + dtrace_state_t *state = dtrace_state_get(i); + if (state) { + wakeup(state); + } + + } + } + enable_preemption(); +} + + #define DTRACE_MAJOR -24 /* let the kernel pick the device number */ -static struct cdevsw dtrace_cdevsw = -{ - _dtrace_open, /* open */ - _dtrace_close, /* close */ - eno_rdwrt, /* read */ - eno_rdwrt, /* write */ - _dtrace_ioctl, /* ioctl */ - (stop_fcn_t *)nulldev, /* stop */ - (reset_fcn_t *)nulldev, /* reset */ - NULL, /* tty's */ - eno_select, /* select */ - eno_mmap, /* mmap */ - eno_strat, /* strategy */ - eno_getc, /* getc */ - eno_putc, /* putc */ - 0 /* type */ +static const struct cdevsw dtrace_cdevsw = +{ + .d_open = _dtrace_open, + .d_close = _dtrace_close, + .d_read = eno_rdwrt, + .d_write = eno_rdwrt, + .d_ioctl = _dtrace_ioctl, + .d_stop = (stop_fcn_t *)nulldev, + .d_reset = (reset_fcn_t *)nulldev, + .d_select = eno_select, + .d_mmap = eno_mmap, + .d_strategy = eno_strat, + .d_reserved_1 = eno_getc, + .d_reserved_2 = eno_putc, }; lck_attr_t* dtrace_lck_attr; @@ -16470,11 +19206,61 @@ lck_grp_t* dtrace_lck_grp; static int gMajDevNo; +void dtrace_early_init (void) +{ + dtrace_restriction_policy_load(); + + /* + * See dtrace_impl.h for a description of kernel symbol modes. + * The default is to wait for symbols from userspace (lazy symbols). + */ + if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } +} + void dtrace_init( void ) { if (0 == gDTraceInited) { - int i, ncpu = NCPU; + unsigned int i, ncpu; + size_t size = sizeof(dtrace_buffer_memory_maxsize); + + /* + * Disable destructive actions when dtrace is running + * in a restricted environment + */ + dtrace_destructive_disallow = dtrace_is_restricted() && + !dtrace_are_restrictions_relaxed(); + + /* + * DTrace allocates buffers based on the maximum number + * of enabled cpus. This call avoids any race when finding + * that count. + */ + ASSERT(dtrace_max_cpus == 0); + ncpu = dtrace_max_cpus = ml_wait_max_cpus(); + + /* + * Retrieve the size of the physical memory in order to define + * the state buffer memory maximal size. If we cannot retrieve + * this value, we'll consider that we have 1Gb of memory per CPU, that's + * still better than raising a kernel panic. + */ + if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize, + &size, NULL, 0)) + { + dtrace_buffer_memory_maxsize = ncpu * 1024 * 1024 * 1024; + printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n", + dtrace_buffer_memory_maxsize); + } + + /* + * Finally, divide by three to prevent DTrace from eating too + * much memory. + */ + dtrace_buffer_memory_maxsize /= 3; + ASSERT(dtrace_buffer_memory_maxsize > 0); gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw); @@ -16491,28 +19277,11 @@ dtrace_init( void ) return; } -#if defined(DTRACE_MEMORY_ZONES) - - /* - * Initialize the dtrace kalloc-emulation zones. - */ - dtrace_alloc_init(); - -#endif /* DTRACE_MEMORY_ZONES */ - - /* - * Allocate the dtrace_probe_t zone - */ - dtrace_probe_t_zone = zinit(sizeof(dtrace_probe_t), - 1024 * sizeof(dtrace_probe_t), - sizeof(dtrace_probe_t), - "dtrace.dtrace_probe_t"); - /* * Create the dtrace lock group and attrs. */ dtrace_lck_attr = lck_attr_alloc_init(); - dtrace_lck_grp_attr= lck_grp_attr_alloc_init(); + dtrace_lck_grp_attr= lck_grp_attr_alloc_init(); dtrace_lck_grp = lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr); /* @@ -16521,7 +19290,8 @@ dtrace_init( void ) lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr); -#ifdef DEBUG + lck_mtx_init(&dtrace_procwaitfor_lock, dtrace_lck_grp, dtrace_lck_attr); +#if DEBUG lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr); #endif lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr); @@ -16534,36 +19304,56 @@ dtrace_init( void ) * the structure is sized to avoid false sharing. */ lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr); + lck_mtx_init(&cyc_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr); + /* + * Initialize the CPU offline/online hooks. + */ + dtrace_install_cpu_hooks(); + + dtrace_modctl_list = NULL; + cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP ); for (i = 0; i < ncpu; ++i) { lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr); } - cpu_list = (cpu_t *)kmem_zalloc( ncpu * sizeof(cpu_t), KM_SLEEP ); + cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP ); for (i = 0; i < ncpu; ++i) { cpu_list[i].cpu_id = (processorid_t)i; cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]); + LIST_INIT(&cpu_list[i].cpu_cyc_list); lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr); } lck_mtx_lock(&cpu_lock); for (i = 0; i < ncpu; ++i) + /* FIXME: track CPU configuration */ dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */ lck_mtx_unlock(&cpu_lock); (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ + dtrace_strings = dtrace_hash_create(dtrace_strkey_offset, + offsetof(dtrace_string_t, dtst_str), + offsetof(dtrace_string_t, dtst_next), + offsetof(dtrace_string_t, dtst_prev)); + + dtrace_isa_init(); /* * See dtrace_impl.h for a description of dof modes. * The default is lazy dof. * - * XXX Warn if state is LAZY_OFF? It won't break anything, but + * FIXME: Warn if state is LAZY_OFF? It won't break anything, but * makes no sense... */ if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) { +#if defined(XNU_TARGET_OS_OSX) dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; +#else + dtrace_dof_mode = DTRACE_DOF_MODE_NEVER; +#endif } /* @@ -16588,6 +19378,11 @@ dtrace_init( void ) break; } +#if CONFIG_DTRACE + if (dtrace_dof_mode != DTRACE_DOF_MODE_NEVER) + commpage_update_dof(true); +#endif + gDTraceInited = 1; } else @@ -16597,7 +19392,29 @@ dtrace_init( void ) void dtrace_postinit(void) { - dtrace_attach( (dev_info_t *)makedev(gMajDevNo, 0), 0 ); + /* + * Called from bsd_init after all provider's *_init() routines have been + * run. That way, anonymous DOF enabled under dtrace_attach() is safe + * to go. + */ + dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0)); /* Punning a dev_t to a dev_info_t* */ + + /* + * Add the mach_kernel to the module list for lazy processing + */ + struct kmod_info fake_kernel_kmod; + memset(&fake_kernel_kmod, 0, sizeof(fake_kernel_kmod)); + + strlcpy(fake_kernel_kmod.name, "mach_kernel", sizeof(fake_kernel_kmod.name)); + fake_kernel_kmod.id = 1; + fake_kernel_kmod.address = g_kernel_kmod_info.address; + fake_kernel_kmod.size = g_kernel_kmod_info.size; + + if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) { + printf("dtrace_postinit: Could not register mach_kernel modctl\n"); + } + + (void)OSKextRegisterKextsWithDTrace(); } #undef DTRACE_MAJOR @@ -16616,4 +19433,3 @@ unregister_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2) { #pragma unused(ignore1,ignore2) } -#endif /* __APPLE__ */