X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/ecc0ceb4089d506a0b8d16686a95817b331af9cb..d9a64523371fa019c4575bb400cbbc3a50ac9903:/bsd/dev/dtrace/dtrace.c diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index 4a2e5e23a..a83adc712 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -20,7 +20,7 @@ */ /* - * Portions Copyright (c) 2013, Joyent, Inc. All rights reserved. + * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved. * Portions Copyright (c) 2013 by Delphix. All rights reserved. */ @@ -61,6 +61,7 @@ * - Enabling functions * - DOF functions * - Anonymous enabling functions + * - Process functions * - Consumer state functions * - Helper functions * - Hook functions @@ -93,8 +94,18 @@ #include #include #include +#include #include #include +#include +#include + +#if MONOTONIC +#include +#include +#endif /* MONOTONIC */ + +#include #include extern uint32_t pmap_find_phys(void *, uint64_t); @@ -109,21 +120,19 @@ extern kmod_info_t g_kernel_kmod_info; extern void dtrace_suspend(void); extern void dtrace_resume(void); +extern void dtrace_early_init(void); +extern int dtrace_keep_kernel_symbols(void); extern void dtrace_init(void); extern void helper_init(void); extern void fasttrap_init(void); -extern void dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); + +static int dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); extern void dtrace_lazy_dofs_destroy(proc_t *); extern void dtrace_postinit(void); -#include "../../../osfmk/chud/chud_dtrace.h" - -extern kern_return_t chudxnu_dtrace_callback - (uint64_t selector, uint64_t *args, uint32_t count); - -/* Import this function to retrieve the physical memory. */ -extern int kernel_sysctlbyname(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); +extern void dtrace_proc_fork(proc_t*, proc_t*, int); +extern void dtrace_proc_exec(proc_t*); +extern void dtrace_proc_exit(proc_t*); /* * DTrace Tunable Variables @@ -146,7 +155,7 @@ uint64_t dtrace_buffer_memory_inuse = 0; int dtrace_destructive_disallow = 0; dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); -dtrace_optval_t dtrace_dof_maxsize = (384 * 1024); +dtrace_optval_t dtrace_dof_maxsize = (512 * 1024); dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024); dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024); size_t dtrace_actions_max = (16 * 1024); @@ -155,6 +164,8 @@ dtrace_optval_t dtrace_helper_actions_max = 32; dtrace_optval_t dtrace_helper_providers_max = 64; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; +dtrace_optval_t dtrace_strsize_min = 8; +dtrace_optval_t dtrace_strsize_max = 65536; dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */ dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */ dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ @@ -168,6 +179,9 @@ dtrace_optval_t dtrace_stackframes_default = 20; dtrace_optval_t dtrace_ustackframes_default = 20; dtrace_optval_t dtrace_jstackframes_default = 50; dtrace_optval_t dtrace_jstackstrsize_default = 512; +dtrace_optval_t dtrace_buflimit_default = 75; +dtrace_optval_t dtrace_buflimit_min = 1; +dtrace_optval_t dtrace_buflimit_max = 99; int dtrace_msgdsize_max = 128; hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ @@ -194,15 +208,14 @@ unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */ */ static dev_info_t *dtrace_devi; /* device info */ static vmem_t *dtrace_arena; /* probe ID arena */ -static vmem_t *dtrace_minor; /* minor number arena */ -static taskq_t *dtrace_taskq; /* task queue */ static dtrace_probe_t **dtrace_probes; /* array of all probes */ static int dtrace_nprobes; /* number of probes */ static dtrace_provider_t *dtrace_provider; /* provider list */ static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ static int dtrace_opens; /* number of opens */ static int dtrace_helpers; /* number of helpers */ -static void *dtrace_softstate; /* softstate pointer */ +static dtrace_hash_t *dtrace_strings; +static dtrace_hash_t *dtrace_byprov; /* probes hashed by provider */ static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ @@ -227,7 +240,8 @@ static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either... */ int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */ - +static uint32_t dtrace_wake_clients; +static uint8_t dtrace_kerneluuid[16]; /* the 128-bit uuid */ /* * To save memory, some common memory allocations are given a @@ -286,7 +300,7 @@ static int dtrace_module_unloaded(struct kmod_info *kmod); * * ASSERT(MUTEX_HELD(&cpu_lock)); * becomes: - * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + * LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); * */ static lck_mtx_t dtrace_lock; /* probe state lock */ @@ -318,17 +332,17 @@ dtrace_enable_nullop(void) return (0); } -static dtrace_pops_t dtrace_provider_ops = { - (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, - (void (*)(void *, struct modctl *))dtrace_nullop, - (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - NULL, - NULL, - NULL, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop +static dtrace_pops_t dtrace_provider_ops = { + .dtps_provide = (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, + .dtps_provide_module = (void (*)(void *, struct modctl *))dtrace_nullop, + .dtps_enable = (int (*)(void *, dtrace_id_t, void *))dtrace_nullop, + .dtps_disable = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + .dtps_suspend = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + .dtps_resume = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + .dtps_getargdesc = NULL, + .dtps_getargval = NULL, + .dtps_usermode = NULL, + .dtps_destroy = (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, }; static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ @@ -349,6 +363,15 @@ int dtrace_helptrace_enabled = 1; int dtrace_helptrace_enabled = 0; #endif +#if defined (__arm64__) +/* + * The ioctl for adding helper DOF is based on the + * size of a user_addr_t. We need to recognize both + * U32 and U64 as the same action. + */ +#define DTRACEHIOC_ADDDOF_U32 _IOW('h', 4, user32_addr_t) +#define DTRACEHIOC_ADDDOF_U64 _IOW('h', 4, user64_addr_t) +#endif /* __arm64__ */ /* * DTrace Error Hashing @@ -374,18 +397,22 @@ static lck_mtx_t dtrace_errlock; * outside of the implementation. There is no real structure to this cpp * mishmash -- but is there ever? */ -#define DTRACE_HASHSTR(hash, probe) \ - dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) -#define DTRACE_HASHNEXT(hash, probe) \ - (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) +#define DTRACE_GETSTR(hash, elm) \ + (hash->dth_getstr(elm, hash->dth_stroffs)) -#define DTRACE_HASHPREV(hash, probe) \ - (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) +#define DTRACE_HASHSTR(hash, elm) \ + dtrace_hash_str(DTRACE_GETSTR(hash, elm)) + +#define DTRACE_HASHNEXT(hash, elm) \ + (void**)((uintptr_t)(elm) + (hash)->dth_nextoffs) + +#define DTRACE_HASHPREV(hash, elm) \ + (void**)((uintptr_t)(elm) + (hash)->dth_prevoffs) #define DTRACE_HASHEQ(hash, lhs, rhs) \ - (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ - *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) + (strcmp(DTRACE_GETSTR(hash, lhs), \ + DTRACE_GETSTR(hash, rhs)) == 0) #define DTRACE_AGGHASHSIZE_SLEW 17 @@ -415,6 +442,25 @@ static lck_mtx_t dtrace_errlock; (where) = ((thr + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } +#elif defined(__arm__) +/* FIXME: three function calls!!! */ +#define DTRACE_TLS_THRKEY(where) { \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + uint_t pid = (uint_t)dtrace_proc_selfpid(); \ + ASSERT(intr < (1 << 3)); \ + (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \ + (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ +} +#elif defined (__arm64__) +/* FIXME: two function calls!! */ +#define DTRACE_TLS_THRKEY(where) { \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + ASSERT(intr < (1 << 3)); \ + (where) = ((thr + DIF_VARIABLE_MAX) & \ + (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ +} #else #error Unknown architecture #endif @@ -437,6 +483,14 @@ static lck_mtx_t dtrace_errlock; return (0); \ } +#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ +do { \ + if ((remp) != NULL) { \ + *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ + } \ +} while (0) + + /* * Test whether a range of memory starting at testaddr of size testsz falls * within the range of memory described by addr, sz. We take care to avoid @@ -461,7 +515,7 @@ static lck_mtx_t dtrace_errlock; #define RECOVER_LABEL(bits) dtraceLoadRecover##bits: -#if defined (__x86_64__) +#if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__)) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ uint##bits##_t dtrace_load##bits(uintptr_t addr); \ @@ -504,6 +558,12 @@ dtrace_load##bits(uintptr_t addr) \ */ \ if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \ rval = *((volatile uint##bits##_t *)addr); \ + else { \ + *flags |= CPU_DTRACE_BADADDR; \ + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ + return (0); \ + } \ + \ RECOVER_LABEL(bits); \ (void)dtrace_set_thread_recover(current_thread(), recover); \ *flags &= ~CPU_DTRACE_NOFAULT; \ @@ -551,7 +611,8 @@ dtrace_load##bits(uintptr_t addr) \ static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); -static int dtrace_enabling_match(dtrace_enabling_t *, int *); +static int dtrace_enabling_match(dtrace_enabling_t *, int *, dtrace_match_cond_t *cond); +static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond); static void dtrace_enabling_matchall(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, @@ -562,8 +623,12 @@ static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); -static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); +static int dtrace_ecb_create_enable(dtrace_probe_t *, void *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); +static int dtrace_canload_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); +static int dtrace_canstore_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); /* @@ -699,6 +764,9 @@ sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS if (value <= 0) return (ERANGE); + if (value >= dtrace_copy_maxsize()) + return (ERANGE); + lck_mtx_lock(&dtrace_lock); dtrace_dof_maxsize = value; lck_mtx_unlock(&dtrace_lock); @@ -764,13 +832,21 @@ sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS if (error) return (error); - if (value != 0 && value != 1) - return (ERANGE); + if (req->newptr) { + if (value != 0 && value != 1) + return (ERANGE); - lck_mtx_lock(&dtrace_lock); - dtrace_provide_private_probes = value; - lck_mtx_unlock(&dtrace_lock); + /* + * We do not allow changing this back to zero, as private probes + * would still be left registered + */ + if (value != 1) + return (EPERM); + lck_mtx_lock(&dtrace_lock); + dtrace_provide_private_probes = value; + lck_mtx_unlock(&dtrace_lock); + } return (0); } @@ -786,6 +862,15 @@ SYSCTL_PROC(_kern_dtrace, OID_AUTO, provide_private_probes, &dtrace_provide_private_probes, 0, sysctl_dtrace_provide_private_probes, "I", "provider must provide the private probes"); +/* + * kern.dtrace.dof_mode + * + * Returns the current DOF mode. + * This value is read-only. + */ +SYSCTL_INT(_kern_dtrace, OID_AUTO, dof_mode, CTLFLAG_RD | CTLFLAG_LOCKED, + &dtrace_dof_mode, 0, "dtrace dof mode"); + /* * DTrace Probe Context Functions * @@ -880,15 +965,15 @@ dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) } static int -dtrace_canstore_statvar(uint64_t addr, size_t sz, +dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, dtrace_statvar_t **svars, int nsvars) { int i; size_t maxglobalsize, maxlocalsize; - maxglobalsize = dtrace_statvar_maxsize; - maxlocalsize = (maxglobalsize + sizeof (uint64_t)) * NCPU; + maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); + maxlocalsize = (maxglobalsize) * NCPU; if (nsvars == 0) return (0); @@ -909,11 +994,14 @@ dtrace_canstore_statvar(uint64_t addr, size_t sz, * DTrace to escalate an orthogonal kernel heap corruption bug * into the ability to store to arbitrary locations in memory. */ - VERIFY((scope == DIFV_SCOPE_GLOBAL && size < maxglobalsize) || - (scope == DIFV_SCOPE_LOCAL && size < maxlocalsize)); + VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || + (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); - if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) + if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) { + DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, + svar->dtsv_size); return (1); + } } return (0); @@ -928,14 +1016,26 @@ dtrace_canstore_statvar(uint64_t addr, size_t sz, static int dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); +} +/* + * Implementation of dtrace_canstore which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { /* * First, check to see if the address is in scratch space... */ if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, - mstate->dtms_scratch_size)) + mstate->dtms_scratch_size)) { + DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, + mstate->dtms_scratch_size); return (1); - + } /* * Now check to see if it's a dynamic variable. This check will pick * up both thread-local variables and any global dynamically-allocated @@ -947,6 +1047,7 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, uintptr_t base = (uintptr_t)dstate->dtds_base + (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); uintptr_t chunkoffs; + dtrace_dynvar_t *dvar; /* * Before we assume that we can store here, we need to make @@ -963,6 +1064,8 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * * (3) Not span a chunk boundary * + * (4) Not be in the tuple space of a dynamic variable + * */ if (addr < base) return (0); @@ -975,6 +1078,15 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, if (chunkoffs + sz > dstate->dtds_chunksize) return (0); + dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); + + if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) + return (0); + + if (chunkoffs < sizeof (dtrace_dynvar_t) + + ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) + return (0); + return (1); } @@ -982,11 +1094,11 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * Finally, check the static local and global variables. These checks * take the longest, so we perform them last. */ - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_locals, vstate->dtvs_nlocals)) return (1); - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_globals, vstate->dtvs_nglobals)) return (1); @@ -1003,9 +1115,20 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * DTrace subroutines (DIF_SUBR_*) should use this helper to implement * appropriate memory access protection. */ -static int +int dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); +} + +/* + * Implementation of dtrace_canload which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; @@ -1013,21 +1136,27 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * If we hold the privilege to read from kernel memory, then * everything is readable. */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); return (1); + } /* * You can obviously read that which you can store. */ - if (dtrace_canstore(addr, sz, mstate, vstate)) + if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) return (1); /* * We're allowed to read from our own string table. */ if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, - mstate->dtms_difo->dtdo_strlen)) + mstate->dtms_difo->dtdo_strlen)) { + DTRACE_RANGE_REMAIN(remain, addr, + mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen); return (1); + } DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); *illval = addr; @@ -1041,21 +1170,41 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * calls in the event that the user has all privileges. */ static int -dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, - dtrace_vstate_t *vstate) +dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { - size_t strsz; + size_t rsize; /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); return (1); + } - strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); - if (dtrace_canload(addr, strsz, mstate, vstate)) - return (1); + /* + * Even if the caller is uninterested in querying the remaining valid + * range, it is required to ensure that the access is allowed. + */ + if (remain == NULL) { + remain = &rsize; + } + if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { + size_t strsz; + /* + * Perform the strlen after determining the length of the + * memory region which is accessible. This prevents timing + * information from being used to find NULs in memory which is + * not accessible to the caller. + */ + strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, + MIN(sz, *remain)); + if (strsz <= *remain) { + return (1); + } + } return (0); } @@ -1065,26 +1214,49 @@ dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * region in which a load may be issued given the user's privilege level. */ static int -dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, - dtrace_vstate_t *vstate) +dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { size_t sz; ASSERT(type->dtdt_flags & DIF_TF_BYREF); + /* + * Calculate the max size before performing any checks since even + * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function + * return the max length via 'remain'. + */ + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_state_t *state = vstate->dtvs_state; + + if (state != NULL) { + sz = state->dts_options[DTRACEOPT_STRSIZE]; + } else { + /* + * In helper context, we have a NULL state; fall back + * to using the system-wide default for the string size + * in this case. + */ + sz = dtrace_strsize_default; + } + } else { + sz = type->dtdt_size; + } + /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); return (1); + } - if (type->dtdt_kind == DIF_TYPE_STRING) - sz = dtrace_strlen(src, - vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; - else - sz = type->dtdt_size; - - return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); + if (type->dtdt_kind == DIF_TYPE_STRING) { + return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, + vstate)); + } + return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, + vstate)); } /* @@ -1222,15 +1394,15 @@ dtrace_strcpy(const void *src, void *dst, size_t len) * specified type; we assume that we can store to directly. */ static void -dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) +dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) { ASSERT(type->dtdt_flags & DIF_TF_BYREF); if (type->dtdt_kind == DIF_TYPE_STRING) { - dtrace_strcpy(src, dst, type->dtdt_size); + dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); } else { - dtrace_bcopy(src, dst, type->dtdt_size); -} + dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); + } } /* @@ -1481,7 +1653,7 @@ dtrace_priv_proc(dtrace_state_t *state) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; - if (dtrace_is_restricted() && !dtrace_is_running_apple_internal() && !dtrace_can_attach_to_proc(current_proc())) + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc())) goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) @@ -1513,7 +1685,7 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state) static int dtrace_priv_kernel(dtrace_state_t *state) { - if (dtrace_is_restricted() && !dtrace_is_running_apple_internal()) + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) @@ -3071,6 +3243,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * APPLE NOTE: Account for introduction of __dtrace_probe() */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; + dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *pv; uint64_t val; @@ -3085,7 +3258,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } else - val = dtrace_getarg(ndx, aframes); + val = dtrace_getarg(ndx, aframes, mstate, vstate); /* * This is regrettably required to keep the compiler @@ -3358,27 +3531,47 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_ZONENAME: - { - /* scratch_size is equal to length('global') + 1 for the null-terminator. */ - char *zname = (char *)mstate->dtms_scratch_ptr; - size_t scratch_size = 6 + 1; + { + /* scratch_size is equal to length('global') + 1 for the null-terminator. */ + char *zname = (char *)mstate->dtms_scratch_ptr; + size_t scratch_size = 6 + 1; if (!dtrace_priv_proc(state)) return (0); - /* The scratch allocation's lifetime is that of the clause. */ - if (!DTRACE_INSCRATCH(mstate, scratch_size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - return 0; - } + /* The scratch allocation's lifetime is that of the clause. */ + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return 0; + } + + mstate->dtms_scratch_ptr += scratch_size; + + /* The kernel does not provide zonename, it will always return 'global'. */ + strlcpy(zname, "global", scratch_size); + + return ((uint64_t)(uintptr_t)zname); + } + +#if MONOTONIC + case DIF_VAR_CPUINSTRS: + return mt_cur_cpu_instrs(); - mstate->dtms_scratch_ptr += scratch_size; + case DIF_VAR_CPUCYCLES: + return mt_cur_cpu_cycles(); - /* The kernel does not provide zonename, it will always return 'global'. */ - strlcpy(zname, "global", scratch_size); + case DIF_VAR_VINSTRS: + return mt_cur_thread_instrs(); - return ((uint64_t)(uintptr_t)zname); - } + case DIF_VAR_VCYCLES: + return mt_cur_thread_cycles(); +#else /* MONOTONIC */ + case DIF_VAR_CPUINSTRS: /* FALLTHROUGH */ + case DIF_VAR_CPUCYCLES: /* FALLTHROUGH */ + case DIF_VAR_VINSTRS: /* FALLTHROUGH */ + case DIF_VAR_VCYCLES: /* FALLTHROUGH */ + return 0; +#endif /* !MONOTONIC */ case DIF_VAR_UID: if (!dtrace_priv_proc_relaxed(state)) @@ -3593,6 +3786,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; + /* + * Check whether the user can access kernel memory + */ + if (dtrace_priv_kernel(state) == 0) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + regs[rd] = 0; + break; + } /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the @@ -3735,30 +3936,30 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t kaddr = tupregs[0].dttk_value; user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; + size_t lim; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && !dtrace_istoxic(kaddr, size) && - dtrace_strcanload(kaddr, size, mstate, vstate)) { + dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size, flags); + dtrace_copyoutstr(kaddr, uaddr, lim, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } case DIF_SUBR_STRLEN: { - size_t sz; + size_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; - sz = dtrace_strlen((char *)addr, - state->dts_options[DTRACEOPT_STRSIZE]); + size_t lim; - if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } - regs[rd] = sz; + regs[rd] = dtrace_strlen((char *)addr, lim); break; } @@ -3772,12 +3973,19 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * is DIF_SUBR_STRRCHR, we will look for the last occurrence * of the specified character instead of the first. */ - uintptr_t saddr = tupregs[0].dttk_value; uintptr_t addr = tupregs[0].dttk_value; - uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t addr_limit; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t lim; char c, target = (char)tupregs[1].dttk_value; - for (regs[rd] = 0; addr < limit; addr++) { + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; + } + addr_limit = addr + lim; + + for (regs[rd] = 0; addr < addr_limit; addr++) { if ((c = dtrace_load8(addr)) == target) { regs[rd] = addr; @@ -3789,11 +3997,6 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { - regs[rd] = 0; - break; - } - break; } @@ -3951,7 +4154,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t addr = tupregs[0].dttk_value; uintptr_t tokaddr = tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - uintptr_t limit, toklimit = tokaddr + size; + uintptr_t limit, toklimit; + size_t clim; char *dest = (char *)mstate->dtms_scratch_ptr; uint8_t c='\0', tokmap[32]; /* 256 / 8 */ uint64_t i = 0; @@ -3960,10 +4164,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * Check both the token buffer and (later) the input buffer, * since both could be non-scratch addresses. */ - if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { + if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) { regs[rd] = 0; break; } + toklimit = tokaddr + clim; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -3980,6 +4185,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * it behaves like an implicit clause-local variable. */ addr = mstate->dtms_strtok; + limit = mstate->dtms_strtok_limit; } else { /* * If the user-specified address is non-NULL we must @@ -3989,10 +4195,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * (when we fetch addr from mstate->dtms_strtok) * would fail this access check. */ - if (!dtrace_strcanload(addr, size, mstate, vstate)) { + if (!dtrace_strcanload(addr, size, &clim, mstate, + vstate)) { regs[rd] = 0; break; } + limit = addr + clim; } /* @@ -4011,10 +4219,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tokmap[c >> 3] |= (1 << (c & 0x7)); } - for (limit = addr + size; addr < limit; addr++) { + for (; addr < limit; addr++) { /* - * We're looking for a character that is _not_ contained - * in the token string. + * We're looking for a character that is _not_ + * contained in the token string. */ if ((c = dtrace_load8(addr)) == '\0') break; @@ -4032,6 +4240,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, */ regs[rd] = 0; mstate->dtms_strtok = 0; + mstate->dtms_strtok_limit = 0; break; } @@ -4054,6 +4263,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; mstate->dtms_strtok = addr; + mstate->dtms_strtok_limit = limit; break; } @@ -4129,10 +4339,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; - uint64_t i = 0; + uint64_t i = 0, j = 0; + size_t lim1, lim2; + char c; - if (!dtrace_strcanload(s1, size, mstate, vstate) || - !dtrace_strcanload(s2, size, mstate, vstate)) { + if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) || + !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) { regs[rd] = 0; break; } @@ -4149,8 +4361,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s1++)) == '\0') { + c = (i >= lim1) ? '\0' : dtrace_load8(s1++); + if ((d[i++] = c) == '\0') { i--; break; } @@ -4162,8 +4374,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s2++)) == '\0') + c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++); + if ((d[i++] = c) == '\0') break; } @@ -4177,9 +4389,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; - int64_t val = i < 0 ? i * -1 : i; - uint64_t size = 22; /* enough room for 2^64 in decimal */ + uint64_t val, digit; + uint64_t size = 65; /* enough room for 2^64 in binary */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } + + val = (base == 10 && i < 0) ? i * -1 : i; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -4187,13 +4410,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - for (*end-- = '\0'; val; val /= 10) - *end-- = '0' + (val % 10); + for (*end-- = '\0'; val; val /= base) { + if ((digit = val % base) <= '9' - '0') { + *end-- = '0' + digit; + } else { + *end-- = 'a' + (digit - ('9' - '0') - 1); + } + } + + if (i == 0 && base == 16) + *end-- = '0'; + + if (base == 16) + *end-- = 'x'; - if (i == 0) + if (i == 0 || base == 8 || base == 16) *end-- = '0'; - if (i < 0) + if (i < 0 && base == 10) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; @@ -4366,9 +4600,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, char *dest = (char *)mstate->dtms_scratch_ptr, c; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; - int i = 0, j = 0; + size_t lim; + size_t i = 0, j = 0; - if (!dtrace_strcanload(src, size, mstate, vstate)) { + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } @@ -4383,7 +4618,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * Move forward, loading each character. */ do { - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); next: if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */ break; @@ -4393,7 +4628,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4414,7 +4649,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4437,7 +4672,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c != '/' && c != '\0') { /* @@ -4499,6 +4734,12 @@ next: #if !defined(__APPLE__) ip4 = dtrace_load32(tupregs[argi].dttk_value); #else + if (!dtrace_canload(tupregs[argi].dttk_value, sizeof(ip4), + mstate, vstate)) { + regs[rd] = 0; + break; + } + dtrace_bcopy( (void *)(uintptr_t)tupregs[argi].dttk_value, (void *)(uintptr_t)&ip4, sizeof (ip4)); @@ -4559,6 +4800,12 @@ next: * just the IPv4 string is returned for inet_ntoa6. */ + if (!dtrace_canload(tupregs[argi].dttk_value, + sizeof(struct in6_addr), mstate, vstate)) { + regs[rd] = 0; + break; + } + /* * Safely load the IPv6 address. */ @@ -4736,6 +4983,7 @@ inetout: regs[rd] = (uintptr_t)end + 1; break; } +#if defined(__APPLE__) case DIF_SUBR_VM_KERNEL_ADDRPERM: { if (!dtrace_priv_kernel(state)) { regs[rd] = 0; @@ -4745,38 +4993,60 @@ inetout: regs[rd] = (uintptr_t)end + 1; break; } -/* - * APPLE NOTE: - * CoreProfile callback ('core_profile (uint64_t, [uint64_t], [uint64_t] ...)') - */ - case DIF_SUBR_COREPROFILE: { - uint64_t selector = tupregs[0].dttk_value; - uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; - uint32_t ii; - uint32_t count = (uint32_t)nargs; - - if (count < 1) { - regs[rd] = KERN_FAILURE; - break; + + case DIF_SUBR_KDEBUG_TRACE: { + uint32_t debugid; + uintptr_t args[4] = {0}; + int i; + + if (nargs < 2 || nargs > 5) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + break; } - - if(count > DIF_DTR_NREGS) - count = DIF_DTR_NREGS; - /* copy in any variadic argument list, bounded by DIF_DTR_NREGS */ - for(ii = 0; ii < count-1; ii++) { - args[ii] = tupregs[ii+1].dttk_value; + if (dtrace_destructive_disallow) + return; + + debugid = tupregs[0].dttk_value; + for (i = 0; i < nargs - 1; i++) + args[i] = tupregs[i + 1].dttk_value; + + kernel_debug(debugid, args[0], args[1], args[2], args[3], 0); + + break; + } + + case DIF_SUBR_KDEBUG_TRACE_STRING: { + if (nargs != 3) { + break; } - kern_return_t ret = - chudxnu_dtrace_callback(selector, args, count-1); - if(KERN_SUCCESS != ret) { - /* error */ + if (dtrace_destructive_disallow) + return; + + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uint32_t debugid = tupregs[0].dttk_value; + uint64_t str_id = tupregs[1].dttk_value; + uintptr_t src = tupregs[2].dttk_value; + size_t lim; + char buf[size]; + char* str = NULL; + + if (src != (uintptr_t)0) { + str = buf; + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { + break; + } + dtrace_strcpy((void*)src, buf, size); } - regs[rd] = ret; + (void)kernel_debug_string(debugid, &str_id, str); + regs[rd] = str_id; + break; } +#endif + } } @@ -5072,15 +5342,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = regs[r1]; uintptr_t s2 = regs[r2]; + size_t lim1 = sz, lim2 = sz; if (s1 != 0 && - !dtrace_strcanload(s1, sz, mstate, vstate)) + !dtrace_strcanload(s1, sz, &lim1, mstate, vstate)) break; if (s2 != 0 && - !dtrace_strcanload(s2, sz, mstate, vstate)) + !dtrace_strcanload(s2, sz, &lim2, mstate, vstate)) break; - cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); + cc_r = dtrace_strncmp((char *)s1, (char *)s2, + MIN(lim1, lim2)); cc_n = cc_r < 0; cc_z = cc_r == 0; @@ -5132,12 +5404,14 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_nglobals); svar = vstate->dtvs_globals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; + size_t lim; ASSERT(a != 0); ASSERT(svar->dtsv_size != 0); @@ -5151,11 +5425,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, } if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, - mstate, vstate)) + &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -5222,7 +5496,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < (uint_t)vstate->dtvs_nlocals); + VERIFY(id < (uint_t)vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); @@ -5231,6 +5505,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t sz = v->dtdv_type.dtdt_size; + size_t lim; sz += sizeof (uint64_t); ASSERT(svar->dtsv_size == (int)NCPU * sz); @@ -5246,11 +5521,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, - mstate, vstate)) + &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -5299,6 +5574,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_value = (uint64_t)id; @@ -5323,13 +5599,15 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], - &v->dtdv_type, mstate, vstate)) + &v->dtdv_type, &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -5411,8 +5689,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } @@ -5451,8 +5731,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } @@ -5466,13 +5748,15 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, - mstate, vstate)) + &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -6156,6 +6440,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * not the case. */ if ((ecb->dte_cond & DTRACE_COND_USERMODE) && + prov->dtpv_pops.dtps_usermode && prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg) == 0) continue; @@ -6279,7 +6564,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; - int rval; + uint64_t rval; rval = dtrace_dif_emulate(dp, &mstate, vstate, state); @@ -6439,7 +6724,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tomax = buf->dtb_tomax; ASSERT(tomax != NULL); - if (ecb->dte_size != 0) + if (ecb->dte_size == 0) continue; ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t)); @@ -6574,7 +6859,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && !dtrace_vcanload((void *)(uintptr_t)val, - &dp->dtdo_rtype, &mstate, vstate)) + &dp->dtdo_rtype, NULL, &mstate, vstate)) { continue; } @@ -6747,12 +7032,33 @@ dtrace_hash_str(const char *p) return (hval); } +static const char* +dtrace_strkey_probe_provider(void *elm, uintptr_t offs) +{ +#pragma unused(offs) + dtrace_probe_t *probe = (dtrace_probe_t*)elm; + return probe->dtpr_provider->dtpv_name; +} + +static const char* +dtrace_strkey_offset(void *elm, uintptr_t offs) +{ + return ((char *)((uintptr_t)(elm) + offs)); +} + +static const char* +dtrace_strkey_deref_offset(void *elm, uintptr_t offs) +{ + return *((char **)((uintptr_t)(elm) + offs)); +} + static dtrace_hash_t * -dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) +dtrace_hash_create(dtrace_strkey_f func, uintptr_t arg, uintptr_t nextoffs, uintptr_t prevoffs) { dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); - hash->dth_stroffs = stroffs; + hash->dth_getstr = func; + hash->dth_stroffs = arg; hash->dth_nextoffs = nextoffs; hash->dth_prevoffs = prevoffs; @@ -6801,10 +7107,10 @@ dtrace_hash_resize(dtrace_hash_t *hash) for (i = 0; i < size; i++) { for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { - dtrace_probe_t *probe = bucket->dthb_chain; + void *elm = bucket->dthb_chain; - ASSERT(probe != NULL); - ndx = DTRACE_HASHSTR(hash, probe) & new_mask; + ASSERT(elm != NULL); + ndx = DTRACE_HASHSTR(hash, elm) & new_mask; next = bucket->dthb_next; bucket->dthb_next = new_tab[ndx]; @@ -6819,12 +7125,12 @@ dtrace_hash_resize(dtrace_hash_t *hash) } static void -dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) +dtrace_hash_add(dtrace_hash_t *hash, void *new) { int hashval = DTRACE_HASHSTR(hash, new); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; - dtrace_probe_t **nextp, **prevp; + void **nextp, **prevp; for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) @@ -6857,23 +7163,29 @@ add: bucket->dthb_len++; } -static dtrace_probe_t * -dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) +static void * +dtrace_hash_lookup_string(dtrace_hash_t *hash, const char *str) { - int hashval = DTRACE_HASHSTR(hash, template); + int hashval = dtrace_hash_str(str); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; for (; bucket != NULL; bucket = bucket->dthb_next) { - if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) + if (strcmp(str, DTRACE_GETSTR(hash, bucket->dthb_chain)) == 0) return (bucket->dthb_chain); } return (NULL); } +static dtrace_probe_t * +dtrace_hash_lookup(dtrace_hash_t *hash, void *template) +{ + return dtrace_hash_lookup_string(hash, DTRACE_GETSTR(hash, template)); +} + static int -dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) +dtrace_hash_collisions(dtrace_hash_t *hash, void *template) { int hashval = DTRACE_HASHSTR(hash, template); int ndx = hashval & hash->dth_mask; @@ -6888,19 +7200,19 @@ dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) } static void -dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) +dtrace_hash_remove(dtrace_hash_t *hash, void *elm) { - int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; + int ndx = DTRACE_HASHSTR(hash, elm) & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; - dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); - dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); + void **prevp = DTRACE_HASHPREV(hash, elm); + void **nextp = DTRACE_HASHNEXT(hash, elm); /* - * Find the bucket that we're removing this probe from. + * Find the bucket that we're removing this elm from. */ for (; bucket != NULL; bucket = bucket->dthb_next) { - if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) + if (DTRACE_HASHEQ(hash, bucket->dthb_chain, elm)) break; } @@ -6909,12 +7221,12 @@ dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) if (*prevp == NULL) { if (*nextp == NULL) { /* - * The removed probe was the only probe on this + * The removed element was the only element on this * bucket; we need to remove the bucket. */ dtrace_hashbucket_t *b = hash->dth_tab[ndx]; - ASSERT(bucket->dthb_chain == probe); + ASSERT(bucket->dthb_chain == elm); ASSERT(b != NULL); if (b == bucket) { @@ -6954,20 +7266,63 @@ dtrace_badattr(const dtrace_attribute_t *a) } /* - * Return a duplicate copy of a string. If the specified string is NULL, - * this function returns a zero-length string. - * APPLE NOTE: Darwin employs size bounded string operation. + * Returns a dtrace-managed copy of a string, and will + * deduplicate copies of the same string. + * If the specified string is NULL, returns an empty string */ static char * -dtrace_strdup(const char *str) +dtrace_strref(const char *str) { + dtrace_string_t *s = NULL; size_t bufsize = (str != NULL ? strlen(str) : 0) + 1; - char *new = kmem_zalloc(bufsize, KM_SLEEP); - if (str != NULL) - (void) strlcpy(new, str, bufsize); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - return (new); + if (str == NULL) + str = ""; + + for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL; + s = *(DTRACE_HASHNEXT(dtrace_strings, s))) { + if (strncmp(str, s->dtst_str, bufsize) != 0) { + continue; + } + ASSERT(s->dtst_refcount != UINT32_MAX); + s->dtst_refcount++; + return s->dtst_str; + } + + s = kmem_zalloc(sizeof(dtrace_string_t) + bufsize, KM_SLEEP); + s->dtst_refcount = 1; + (void) strlcpy(s->dtst_str, str, bufsize); + + dtrace_hash_add(dtrace_strings, s); + + return s->dtst_str; +} + +static void +dtrace_strunref(const char *str) +{ + ASSERT(str != NULL); + dtrace_string_t *s = NULL; + size_t bufsize = strlen(str) + 1; + + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL; + s = *(DTRACE_HASHNEXT(dtrace_strings, s))) { + if (strncmp(str, s->dtst_str, bufsize) != 0) { + continue; + } + ASSERT(s->dtst_refcount != 0); + s->dtst_refcount--; + if (s->dtst_refcount == 0) { + dtrace_hash_remove(dtrace_strings, s); + kmem_free(s, sizeof(dtrace_string_t) + bufsize); + } + return; + } + panic("attempt to unref non-existent string %s", str); } #define DTRACE_ISALPHA(c) \ @@ -6999,10 +7354,14 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) uint32_t priv; if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { - /* - * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. - */ - priv = DTRACE_PRIV_ALL; + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + priv = DTRACE_PRIV_USER | DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER; + } + else { + priv = DTRACE_PRIV_ALL; + } + *uidp = 0; + *zoneidp = 0; } else { *uidp = crgetuid(cr); *zoneidp = crgetzoneid(cr); @@ -7260,9 +7619,27 @@ static int dtrace_match_string(const char *s, const char *p, int depth) { #pragma unused(depth) /* __APPLE__ */ + return (s != NULL && s == p); +} + +/*ARGSUSED*/ +static int +dtrace_match_module(const char *s, const char *p, int depth) +{ +#pragma unused(depth) /* __APPLE__ */ + size_t len; + if (s == NULL || p == NULL) + return (0); - /* APPLE NOTE: Darwin employs size bounded string operation. */ - return (s != NULL && strncmp(s, p, strlen(s) + 1) == 0); + len = strlen(p); + + if (strncmp(p, s, len) != 0) + return (0); + + if (s[len] == '.' || s[len] == '\0') + return (1); + + return (0); } /*ARGSUSED*/ @@ -7283,14 +7660,25 @@ dtrace_match_nonzero(const char *s, const char *p, int depth) static int dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, - zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) + zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *, void *), void *arg1, void *arg2) { - dtrace_probe_t template, *probe; + dtrace_probe_t *probe; + dtrace_provider_t prov_template = { + .dtpv_name = (char *)(uintptr_t)pkp->dtpk_prov + }; + + dtrace_probe_t template = { + .dtpr_provider = &prov_template, + .dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod, + .dtpr_func = (char *)(uintptr_t)pkp->dtpk_func, + .dtpr_name = (char *)(uintptr_t)pkp->dtpk_name + }; + dtrace_hash_t *hash = NULL; int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); /* * If the probe ID is specified in the key, just lookup by ID and @@ -7299,23 +7687,26 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) + if ((*matched)(probe, arg1, arg2) == DTRACE_MATCH_FAIL) return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); } - template.dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod; - template.dtpr_func = (char *)(uintptr_t)pkp->dtpk_func; - template.dtpr_name = (char *)(uintptr_t)pkp->dtpk_name; - /* - * We want to find the most distinct of the module name, function - * name, and name. So for each one that is not a glob pattern or - * empty string, we perform a lookup in the corresponding hash and - * use the hash table with the fewest collisions to do our search. + * We want to find the most distinct of the provider name, module name, + * function name, and name. So for each one that is not a glob + * pattern or empty string, we perform a lookup in the corresponding + * hash and use the hash table with the fewest collisions to do our + * search. */ + if (pkp->dtpk_pmatch == &dtrace_match_string && + (len = dtrace_hash_collisions(dtrace_byprov, &template)) < best) { + best = len; + hash = dtrace_byprov; + } + if (pkp->dtpk_mmatch == &dtrace_match_string && (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { best = len; @@ -7347,7 +7738,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { if (rc == DTRACE_MATCH_FAIL) return (DTRACE_MATCH_FAIL); break; @@ -7370,7 +7761,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { if (rc == DTRACE_MATCH_FAIL) return (DTRACE_MATCH_FAIL); break; @@ -7402,8 +7793,26 @@ dtrace_probekey_func(const char *p) return (&dtrace_match_string); } -/* - * Build a probe comparison key for use with dtrace_match_probe() from the +static dtrace_probekey_f * +dtrace_probekey_module_func(const char *p) +{ + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + dtrace_probekey_f *f = dtrace_probekey_func(p); + if (f == &dtrace_match_string) { + dtrace_probe_t template = { + .dtpr_mod = (char *)(uintptr_t)p, + }; + if (dtrace_hash_lookup(dtrace_bymod, &template) == NULL) { + return (&dtrace_match_module); + } + return (&dtrace_match_string); + } + return f; +} + +/* + * Build a probe comparison key for use with dtrace_match_probe() from the * given probe description. By convention, a null key only matches anchored * probes: if each field is the empty string, reset dtpk_fmatch to * dtrace_match_nonzero(). @@ -7411,16 +7820,17 @@ dtrace_probekey_func(const char *p) static void dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) { - pkp->dtpk_prov = pdp->dtpd_provider; + + pkp->dtpk_prov = dtrace_strref(pdp->dtpd_provider); pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); - pkp->dtpk_mod = pdp->dtpd_mod; - pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); + pkp->dtpk_mod = dtrace_strref(pdp->dtpd_mod); + pkp->dtpk_mmatch = dtrace_probekey_module_func(pdp->dtpd_mod); - pkp->dtpk_func = pdp->dtpd_func; + pkp->dtpk_func = dtrace_strref(pdp->dtpd_func); pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); - pkp->dtpk_name = pdp->dtpd_name; + pkp->dtpk_name = dtrace_strref(pdp->dtpd_name); pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); pkp->dtpk_id = pdp->dtpd_id; @@ -7433,6 +7843,26 @@ dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) pkp->dtpk_fmatch = &dtrace_match_nonzero; } +static void +dtrace_probekey_release(dtrace_probekey_t *pkp) +{ + dtrace_strunref(pkp->dtpk_prov); + dtrace_strunref(pkp->dtpk_mod); + dtrace_strunref(pkp->dtpk_func); + dtrace_strunref(pkp->dtpk_name); +} + +static int +dtrace_cond_provider_match(dtrace_probedesc_t *desc, void *data) +{ + if (desc == NULL) + return 1; + + dtrace_probekey_f *func = dtrace_probekey_func(desc->dtpd_provider); + + return func((char*)data, desc->dtpd_provider, 0); +} + /* * DTrace Provider-to-Framework API Functions * @@ -7499,13 +7929,6 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); - /* APPLE NOTE: Darwin employs size bounded string operation. */ - { - size_t bufsize = strlen(name) + 1; - provider->dtpv_name = kmem_alloc(bufsize, KM_SLEEP); - (void) strlcpy(provider->dtpv_name, name, bufsize); - } - provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; if (cr != NULL) { @@ -7538,8 +7961,11 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, *idp = (dtrace_provider_id_t)provider; if (pops == &dtrace_provider_ops) { - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + provider->dtpv_name = dtrace_strref(name); + ASSERT(dtrace_anon.dta_enabling == NULL); /* @@ -7554,6 +7980,8 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); + provider->dtpv_name = dtrace_strref(name); + /* * If there is at least one provider registered, we'll add this * provider after the first provider. @@ -7569,13 +7997,16 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, dtrace_enabling_provide(provider); /* - * Now we need to call dtrace_enabling_matchall() -- which - * will acquire cpu_lock and dtrace_lock. We therefore need + * Now we need to call dtrace_enabling_matchall_with_cond() -- + * with a condition matching the provider name we just added, + * which will acquire cpu_lock and dtrace_lock. We therefore need * to drop all of our locks before calling into it... */ lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); - dtrace_enabling_matchall(); + + dtrace_match_cond_t cond = {dtrace_cond_provider_match, provider->dtpv_name}; + dtrace_enabling_matchall_with_cond(&cond); return (0); } @@ -7595,8 +8026,11 @@ dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; - int i, self = 0; - dtrace_probe_t *probe, *first = NULL; + int self = 0; + dtrace_probe_t *probe, *first = NULL, *next = NULL; + dtrace_probe_t template = { + .dtpr_provider = old + }; if (old->dtpv_pops.dtps_enable == (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { @@ -7606,8 +8040,8 @@ dtrace_unregister(dtrace_provider_id_t id) */ ASSERT(old == dtrace_provider); ASSERT(dtrace_devi != NULL); - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); self = 1; if (dtrace_provider->dtpv_next != NULL) { @@ -7657,14 +8091,12 @@ dtrace_unregister(dtrace_provider_id_t id) * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ - for (i = 0; i < dtrace_nprobes && old->dtpv_probe_count!=0; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - + for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL; + probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) { if (probe->dtpr_provider != old) continue; - dtrace_probes[i] = NULL; + dtrace_probes[probe->dtpr_id - 1] = NULL; old->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); @@ -7675,11 +8107,19 @@ dtrace_unregister(dtrace_provider_id_t id) first = probe; probe->dtpr_nextmod = NULL; } else { + /* + * Use nextmod as the chain of probes to remove + */ probe->dtpr_nextmod = first; first = probe; } } + for (probe = first; probe != NULL; probe = next) { + next = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); + } + /* * The provider's probes have been removed from the hash chains and * from the probe array. Now issue a dtrace_sync() to be sure that @@ -7687,14 +8127,14 @@ dtrace_unregister(dtrace_provider_id_t id) */ dtrace_sync(); - for (probe = first; probe != NULL; probe = first) { - first = probe->dtpr_nextmod; + for (probe = first; probe != NULL; probe = next) { + next = probe->dtpr_nextmod; old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); zfree(dtrace_probe_t_zone, probe); } @@ -7715,13 +8155,14 @@ dtrace_unregister(dtrace_provider_id_t id) prev->dtpv_next = old->dtpv_next; } + dtrace_strunref(old->dtpv_name); + if (!self) { lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); } - kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); kmem_free(old, sizeof (dtrace_provider_t)); return (0); @@ -7771,8 +8212,10 @@ int dtrace_condense(dtrace_provider_id_t id) { dtrace_provider_t *prov = (dtrace_provider_t *)id; - int i; - dtrace_probe_t *probe; + dtrace_probe_t *probe, *first = NULL; + dtrace_probe_t template = { + .dtpr_provider = prov + }; /* * Make sure this isn't the dtrace provider itself. @@ -7786,9 +8229,8 @@ dtrace_condense(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; + for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL; + probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) { if (probe->dtpr_provider != prov) continue; @@ -7796,20 +8238,35 @@ dtrace_condense(dtrace_provider_id_t id) if (probe->dtpr_ecb != NULL) continue; - dtrace_probes[i] = NULL; + dtrace_probes[probe->dtpr_id - 1] = NULL; prov->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); - prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, + prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); + if (first == NULL) { + first = probe; + probe->dtpr_nextmod = NULL; + } else { + /* + * Use nextmod as the chain of probes to remove + */ + probe->dtpr_nextmod = first; + first = probe; + } + } + + for (probe = first; probe != NULL; probe = first) { + first = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); + vmem_free(dtrace_arena, (void *)((uintptr_t)probe->dtpr_id), 1); zfree(dtrace_probe_t_zone, probe); - vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); } lck_mtx_unlock(&dtrace_lock); @@ -7840,7 +8297,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_id_t id; if (provider == dtrace_provider) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); } else { lck_mtx_lock(&dtrace_lock); } @@ -7853,13 +8310,14 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, probe->dtpr_id = id; probe->dtpr_gen = dtrace_probegen++; - probe->dtpr_mod = dtrace_strdup(mod); - probe->dtpr_func = dtrace_strdup(func); - probe->dtpr_name = dtrace_strdup(name); + probe->dtpr_mod = dtrace_strref(mod); + probe->dtpr_func = dtrace_strref(func); + probe->dtpr_name = dtrace_strref(name); probe->dtpr_arg = arg; probe->dtpr_aframes = aframes; probe->dtpr_provider = provider; + dtrace_hash_add(dtrace_byprov, probe); dtrace_hash_add(dtrace_bymod, probe); dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); @@ -7913,7 +8371,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, static dtrace_probe_t * dtrace_probe_lookup_id(dtrace_id_t id) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (id == 0 || id > (dtrace_id_t)dtrace_nprobes) return (NULL); @@ -7922,9 +8380,10 @@ dtrace_probe_lookup_id(dtrace_id_t id) } static int -dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) +dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg1, void *arg2) { - *((dtrace_id_t *)arg) = probe->dtpr_id; +#pragma unused(arg2) + *((dtrace_id_t *)arg1) = probe->dtpr_id; return (DTRACE_MATCH_DONE); } @@ -7941,19 +8400,23 @@ dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, dtrace_id_t id; int match; - pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; + lck_mtx_lock(&dtrace_lock); + + pkey.dtpk_prov = dtrace_strref(((dtrace_provider_t *)prid)->dtpv_name); pkey.dtpk_pmatch = &dtrace_match_string; - pkey.dtpk_mod = mod; + pkey.dtpk_mod = dtrace_strref(mod); pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; - pkey.dtpk_func = func; + pkey.dtpk_func = dtrace_strref(func); pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; - pkey.dtpk_name = name; + pkey.dtpk_name = dtrace_strref(name); pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; pkey.dtpk_id = DTRACE_IDNONE; - lck_mtx_lock(&dtrace_lock); match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, - dtrace_probe_lookup_match, &id); + dtrace_probe_lookup_match, &id, NULL); + + dtrace_probekey_release(&pkey); + lck_mtx_unlock(&dtrace_lock); ASSERT(match == 1 || match == 0); @@ -8019,7 +8482,7 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) struct modctl *ctl; int all = 0; - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); if (prv == NULL) { all = 1; @@ -8092,14 +8555,15 @@ dtrace_probe_foreach(uintptr_t offs) } static int -dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) +dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab, dtrace_ecbdesc_t *ep) { dtrace_probekey_t pkey; uint32_t priv; uid_t uid; zoneid_t zoneid; + int err; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); dtrace_ecb_create_cache = NULL; @@ -8108,7 +8572,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) * If we're passed a NULL description, we're being asked to * create an ECB with a NULL probe. */ - (void) dtrace_ecb_create_enable(NULL, enab); + (void) dtrace_ecb_create_enable(NULL, enab, ep); return (0); } @@ -8116,8 +8580,11 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, &priv, &uid, &zoneid); - return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, - enab)); + err = dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, enab, ep); + + dtrace_probekey_release(&pkey); + + return err; } /* @@ -8149,7 +8616,7 @@ dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, } static void -dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -8198,7 +8665,7 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) + if ((parg = mops->dtms_provide_proc(meta->dtm_arg, &dhpv, p)) == NULL) return; meta->dtm_count++; @@ -8235,16 +8702,27 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); } + + /* + * Since we just created probes, we need to match our enablings + * against those, with a precondition knowing that we have only + * added probes from this provider + */ + char *prov_name = mops->dtms_provider_name(parg); + ASSERT(prov_name != NULL); + dtrace_match_cond_t cond = {dtrace_cond_provider_match, (void*)prov_name}; + + dtrace_enabling_matchall_with_cond(&cond); } static void -dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provide(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; uint32_t i; - lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + @@ -8253,21 +8731,12 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provide_one(dhp, sec, pid); + dtrace_helper_provide_one(dhp, sec, p); } - - /* - * We may have just created probes, so we must now rematch against - * any retained enablings. Note that this call will acquire both - * cpu_lock and dtrace_lock; the fact that we are holding - * dtrace_meta_lock now is what defines the ordering with respect to - * these three locks. - */ - dtrace_enabling_matchall(); } static void -dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -8289,19 +8758,19 @@ dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); + mops->dtms_remove_proc(meta->dtm_arg, &dhpv, p); meta->dtm_count--; } static void -dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provider_remove(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; uint32_t i; - lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + @@ -8310,7 +8779,7 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provider_remove_one(dhp, sec, pid); + dtrace_helper_provider_remove_one(dhp, sec, p); } } @@ -8342,8 +8811,8 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, if (mops == NULL || mops->dtms_create_probe == NULL || - mops->dtms_provide_pid == NULL || - mops->dtms_remove_pid == NULL) { + mops->dtms_provide_proc == NULL || + mops->dtms_remove_proc == NULL) { cmn_err(CE_WARN, "failed to register meta-register %s: " "invalid ops", name); return (EINVAL); @@ -8351,14 +8820,6 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; - - /* APPLE NOTE: Darwin employs size bounded string operation. */ - { - size_t bufsize = strlen(name) + 1; - meta->dtm_name = kmem_alloc(bufsize, KM_SLEEP); - (void) strlcpy(meta->dtm_name, name, bufsize); - } - meta->dtm_arg = arg; lck_mtx_lock(&dtrace_meta_lock); @@ -8369,11 +8830,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, lck_mtx_unlock(&dtrace_meta_lock); cmn_err(CE_WARN, "failed to register meta-register %s: " "user-land meta-provider exists", name); - kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); kmem_free(meta, sizeof (dtrace_meta_t)); return (EINVAL); } + meta->dtm_name = dtrace_strref(name); + dtrace_meta_pid = meta; *idp = (dtrace_meta_provider_id_t)meta; @@ -8389,8 +8851,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, while (help != NULL) { for (i = 0; i < help->dthps_nprovs; i++) { + proc_t *p = proc_find(help->dthps_pid); + if (p == PROC_NULL) + continue; dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - help->dthps_pid); + p); + proc_rele(p); } next = help->dthps_next; @@ -8428,10 +8894,11 @@ dtrace_meta_unregister(dtrace_meta_provider_id_t id) *pp = NULL; + dtrace_strunref(old->dtm_name); + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_meta_lock); - kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); kmem_free(old, sizeof (dtrace_meta_t)); return (0); @@ -8480,6 +8947,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; int kcheckload; uint_t pc; + int maxglobal = -1, maxlocal = -1, maxtlocal = -1; kcheckload = cr == NULL || (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; @@ -8700,7 +9168,8 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "invalid register %u\n", rd); break; case DIF_OP_CALL: - if (subr > DIF_SUBR_MAX) + if (subr > DIF_SUBR_MAX && + !(subr >= DIF_SUBR_APPLE_MIN && subr <= DIF_SUBR_APPLE_MAX)) err += efunc(pc, "invalid subr %u\n", subr); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); @@ -8708,7 +9177,9 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "cannot write to %r0\n"); if (subr == DIF_SUBR_COPYOUT || - subr == DIF_SUBR_COPYOUTSTR) { + subr == DIF_SUBR_COPYOUTSTR || + subr == DIF_SUBR_KDEBUG_TRACE || + subr == DIF_SUBR_KDEBUG_TRACE_STRING) { dp->dtdo_destructive = 1; } break; @@ -8796,6 +9267,9 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, switch (v->dtdv_scope) { case DIFV_SCOPE_GLOBAL: + if (maxglobal == -1 || ndx > maxglobal) + maxglobal = ndx; + if (ndx < vstate->dtvs_nglobals) { dtrace_statvar_t *svar; @@ -8806,11 +9280,16 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; case DIFV_SCOPE_THREAD: + if (maxtlocal == -1 || ndx > maxtlocal) + maxtlocal = ndx; + if (ndx < vstate->dtvs_ntlocals) existing = &vstate->dtvs_tlocals[ndx]; break; case DIFV_SCOPE_LOCAL: + if (maxlocal == -1 || ndx > maxlocal) + maxlocal = ndx; if (ndx < vstate->dtvs_nlocals) { dtrace_statvar_t *svar; @@ -8859,6 +9338,37 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, } } + for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { + dif_instr_t instr = dp->dtdo_buf[pc]; + + uint_t v = DIF_INSTR_VAR(instr); + uint_t op = DIF_INSTR_OP(instr); + + switch (op) { + case DIF_OP_LDGS: + case DIF_OP_LDGAA: + case DIF_OP_STGS: + case DIF_OP_STGAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxglobal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDTS: + case DIF_OP_LDTAA: + case DIF_OP_STTS: + case DIF_OP_STTAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxtlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDLS: + case DIF_OP_STLS: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + default: + break; + } + } + return (err); } @@ -8997,7 +9507,8 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) subr == DIF_SUBR_STRJOIN || subr == DIF_SUBR_STRRCHR || subr == DIF_SUBR_STRSTR || - subr == DIF_SUBR_COREPROFILE || + subr == DIF_SUBR_KDEBUG_TRACE || + subr == DIF_SUBR_KDEBUG_TRACE_STRING || subr == DIF_SUBR_HTONS || subr == DIF_SUBR_HTONL || subr == DIF_SUBR_HTONLL || @@ -9072,7 +9583,7 @@ dtrace_difo_hold(dtrace_difo_t *dp) { uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); dp->dtdo_refcnt++; ASSERT(dp->dtdo_refcnt != 0); @@ -9256,7 +9767,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) int oldsvars, osz, nsz, otlocals, ntlocals; uint_t i, id; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { @@ -9478,7 +9989,7 @@ dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); for (i = 0; i < dp->dtdo_varlen; i++) { @@ -9591,7 +10102,7 @@ dtrace_predicate_create(dtrace_difo_t *dp) { dtrace_predicate_t *pred; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); @@ -9621,7 +10132,7 @@ dtrace_predicate_create(dtrace_difo_t *dp) static void dtrace_predicate_hold(dtrace_predicate_t *pred) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); @@ -9634,7 +10145,7 @@ dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) dtrace_difo_t *dp = pred->dtp_difo; #pragma unused(dp) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp != NULL && dp->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); @@ -9709,7 +10220,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) dtrace_ecb_t *ecb; dtrace_epid_t epid; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); ecb->dte_predicate = NULL; @@ -9775,8 +10286,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_next == NULL); if (probe == NULL) { @@ -9816,7 +10327,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) } } -static void +static int dtrace_ecb_resize(dtrace_ecb_t *ecb) { dtrace_action_t *act; @@ -9846,9 +10357,10 @@ dtrace_ecb_resize(dtrace_ecb_t *ecb) ASSERT(curneeded != UINT32_MAX); agg->dtag_base = aggbase; - curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); rec->dtrd_offset = curneeded; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); curneeded += rec->dtrd_size; ecb->dte_needed = MAX(ecb->dte_needed, curneeded); @@ -9875,11 +10387,15 @@ dtrace_ecb_resize(dtrace_ecb_t *ecb) curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); rec->dtrd_offset = curneeded; curneeded += rec->dtrd_size; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); } else { /* tuples must be followed by an aggregation */ ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment); rec->dtrd_offset = ecb->dte_size; + if (ecb->dte_size + rec->dtrd_size < ecb->dte_size) + return (EINVAL); ecb->dte_size += rec->dtrd_size; ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); } @@ -9898,6 +10414,7 @@ dtrace_ecb_resize(dtrace_ecb_t *ecb) ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed); + return (0); } static dtrace_action_t * @@ -10115,7 +10632,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) dtrace_optval_t nframes=0, strsize; uint64_t arg = desc->dtad_arg; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); if (DTRACEACT_ISAGG(desc->dtad_kind)) { @@ -10394,7 +10911,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) dtrace_ecb_t *pecb, *prev = NULL; dtrace_probe_t *probe = ecb->dte_probe; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (probe == NULL) { /* @@ -10473,7 +10990,7 @@ dtrace_ecb_destroy(dtrace_ecb_t *ecb) dtrace_predicate_t *pred; dtrace_epid_t epid = ecb->dte_epid; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_next == NULL); ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); @@ -10498,7 +11015,7 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, dtrace_provider_t *prov; dtrace_ecbdesc_t *desc = enab->dten_current; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(state != NULL); ecb = dtrace_ecb_add(state, probe); @@ -10568,21 +11085,25 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, } } - dtrace_ecb_resize(ecb); + if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) { + dtrace_ecb_destroy(ecb); + return (NULL); + } return (dtrace_ecb_create_cache = ecb); } static int -dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) +dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg1, void *arg2) { dtrace_ecb_t *ecb; - dtrace_enabling_t *enab = arg; + dtrace_enabling_t *enab = arg1; + dtrace_ecbdesc_t *ep = arg2; dtrace_state_t *state = enab->dten_vstate->dtvs_state; ASSERT(state != NULL); - if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { + if (probe != NULL && ep != NULL && probe->dtpr_gen < ep->dted_probegen) { /* * This probe was created in a generation for which this * enabling has previously created ECBs; we don't want to @@ -10606,7 +11127,7 @@ dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) dtrace_ecb_t *ecb; #pragma unused(ecb) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) return (NULL); @@ -10623,7 +11144,7 @@ dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) dtrace_aggregation_t *agg; #pragma unused(agg) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations) return (NULL); @@ -10675,6 +11196,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); buf->dtb_interval = now - buf->dtb_switched; buf->dtb_switched = now; + buf->dtb_cur_limit = buf->dtb_limit; + dtrace_interrupt_enable(cookie); } @@ -10717,15 +11240,15 @@ dtrace_buffer_canalloc(size_t size) } static int -dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, +dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t limit, size_t size, int flags, processorid_t cpu) { dtrace_cpu_t *cp; dtrace_buffer_t *buf; size_t size_before_alloc = dtrace_buffer_memory_inuse; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (size > (size_t)dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) @@ -10758,6 +11281,10 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, goto err; dtrace_buffer_memory_inuse += size; + /* Unsure that limit is always lower than size */ + limit = limit == size ? limit - 1 : limit; + buf->dtb_cur_limit = limit; + buf->dtb_limit = limit; buf->dtb_size = size; buf->dtb_flags = flags; buf->dtb_offset = 0; @@ -10857,9 +11384,27 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, offs += sizeof (uint32_t); } - if ((uint64_t)(soffs = offs + needed) > buf->dtb_size) { - dtrace_buffer_drop(buf); - return (-1); + if ((uint64_t)(soffs = offs + needed) > buf->dtb_cur_limit) { + if (buf->dtb_cur_limit == buf->dtb_limit) { + buf->dtb_cur_limit = buf->dtb_size; + + atomic_add_32(&state->dts_buf_over_limit, 1); + /** + * Set an AST on the current processor + * so that we can wake up the process + * outside of probe context, when we know + * it is safe to do so + */ + minor_t minor = getminor(state->dts_dev); + ASSERT(minor < 32); + + atomic_or_32(&dtrace_wake_clients, 1 << minor); + ast_dtrace_on(); + } + if ((uint64_t)soffs > buf->dtb_size) { + dtrace_buffer_drop(buf); + return (-1); + } } if (mstate == NULL) @@ -11065,7 +11610,7 @@ static void dtrace_buffer_polish(dtrace_buffer_t *buf) { ASSERT(buf->dtb_flags & DTRACEBUF_RING); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) return; @@ -11242,7 +11787,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) dtrace_ecbdesc_t *ep; dtrace_vstate_t *vstate = enab->dten_vstate; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_actdesc_t *act, *next; @@ -11302,7 +11847,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) { dtrace_state_t *state; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); ASSERT(enab->dten_vstate != NULL); @@ -11337,7 +11882,7 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_enabling_t *new, *enab; int found = 0, err = ENOENT; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); @@ -11404,7 +11949,7 @@ dtrace_enabling_retract(dtrace_state_t *state) { dtrace_enabling_t *enab, *next; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); /* * Iterate over all retained enablings, destroy the enablings retained @@ -11429,13 +11974,13 @@ dtrace_enabling_retract(dtrace_state_t *state) } static int -dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) +dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched, dtrace_match_cond_t *cond) { int i = 0; int total_matched = 0, matched = 0; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_ecbdesc_t *ep = enab->dten_desc[i]; @@ -11443,11 +11988,19 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; + /** + * Before doing a dtrace_probe_enable, which is really + * expensive, check that this enabling matches the matching precondition + * if we have one + */ + if (cond && (cond->dmc_func(&ep->dted_probe, cond->dmc_data) == 0)) { + continue; + } /* * If a provider failed to enable a probe then get out and * let the consumer know we failed. */ - if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab, ep)) < 0) return (EBUSY); total_matched += matched; @@ -11474,9 +12027,10 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) return (enab->dten_error); } + + ep->dted_probegen = dtrace_probegen; } - enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) *nmatched = total_matched; @@ -11484,7 +12038,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) } static void -dtrace_enabling_matchall(void) +dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond) { dtrace_enabling_t *enab; @@ -11507,13 +12061,22 @@ dtrace_enabling_matchall(void) * Behave as if always in "global" zone." */ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { - (void) dtrace_enabling_match(enab, NULL); + (void) dtrace_enabling_match(enab, NULL, cond); } lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); + +} + +static void +dtrace_enabling_matchall(void) +{ + dtrace_enabling_matchall_with_cond(NULL); } + + /* * If an enabling is to be enabled without having matched probes (that is, if * dtrace_state_go() is to be called on the underlying dtrace_state_t), the @@ -11548,7 +12111,7 @@ dtrace_enabling_prime(dtrace_state_t *state) for (i = 0; i < enab->dten_ndesc; i++) { enab->dten_current = enab->dten_desc[i]; - (void) dtrace_probe_enable(NULL, enab); + (void) dtrace_probe_enable(NULL, enab, NULL); } enab->dten_primed = 1; @@ -11568,8 +12131,8 @@ dtrace_enabling_provide(dtrace_provider_t *prv) dtrace_probedesc_t desc; dtrace_genid_t gen; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); if (prv == NULL) { all = 1; @@ -11636,9 +12199,9 @@ dtrace_dof_create(dtrace_state_t *state) roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + sizeof (dof_optdesc_t) * DTRACEOPT_MAX; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP); + dof = kmem_zalloc_aligned(len, 8, KM_SLEEP); dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; @@ -11689,7 +12252,7 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) { dof_hdr_t hdr, *dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); /* * First, we're going to copyin() the sizeof (dof_hdr_t). @@ -11716,11 +12279,11 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) return (NULL); } - dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); + dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 || dof->dofh_loadsz != hdr.dofh_loadsz) { - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); + kmem_free_aligned(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } @@ -11733,7 +12296,7 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) { dof_hdr_t hdr, *dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); /* * First, we're going to copyin() the sizeof (dof_hdr_t). @@ -11760,10 +12323,10 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (NULL); } - dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); + dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); if (uread(p, dof, hdr.dofh_loadsz, uarg) != KERN_SUCCESS) { - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); + kmem_free_aligned(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } @@ -11771,57 +12334,61 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (dof); } +static void +dtrace_dof_destroy(dof_hdr_t *dof) +{ + kmem_free_aligned(dof, dof->dofh_loadsz); +} + static dof_hdr_t * dtrace_dof_property(const char *name) { - uchar_t *buf; - uint64_t loadsz; - unsigned int len, i; + unsigned int len = 0; dof_hdr_t *dof; - /* - * Unfortunately, array of values in .conf files are always (and - * only) interpreted to be integer arrays. We must read our DOF - * as an integer array, and then squeeze it into a byte array. - */ - if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, - name, (int **)&buf, &len) != DDI_PROP_SUCCESS) - return (NULL); + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + return NULL; + } + + if (!PEReadNVRAMProperty(name, NULL, &len)) { + return NULL; + } + + dof = kmem_alloc_aligned(len, 8, KM_SLEEP); - for (i = 0; i < len; i++) - buf[i] = (uchar_t)(((int *)buf)[i]); + if (!PEReadNVRAMProperty(name, dof, &len)) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "unreadable DOF"); + return NULL; + } if (len < sizeof (dof_hdr_t)) { - ddi_prop_free(buf); + dtrace_dof_destroy(dof); dtrace_dof_error(NULL, "truncated header"); return (NULL); } - if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { - ddi_prop_free(buf); + if (len < dof->dofh_loadsz) { + dtrace_dof_destroy(dof); dtrace_dof_error(NULL, "truncated DOF"); return (NULL); } - if (loadsz >= (uint64_t)dtrace_dof_maxsize) { - ddi_prop_free(buf); - dtrace_dof_error(NULL, "oversized DOF"); + if (len != dof->dofh_loadsz) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "invalid DOF size"); return (NULL); } - dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP); - bcopy(buf, dof, loadsz); - ddi_prop_free(buf); + if (dof->dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "oversized DOF"); + return (NULL); + } return (dof); } -static void -dtrace_dof_destroy(dof_hdr_t *dof) -{ - dt_kmem_free_aligned(dof, dof->dofh_loadsz); -} - /* * Return the dof_sec_t pointer corresponding to a given section index. If the * index is not valid, dtrace_dof_error() is called and NULL is returned. If @@ -12339,7 +12906,7 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, dtrace_enabling_t *enab; uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); /* @@ -12399,8 +12966,8 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } - if (dof->dofh_secsize == 0) { - dtrace_dof_error(dof, "zero section header size"); + if (dof->dofh_secsize < sizeof(dof_sec_t)) { + dtrace_dof_error(dof, "invalid section header size"); return (-1); } @@ -12581,7 +13148,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) dtrace_dynvar_t *dvar, *next, *start; size_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); bzero(dstate, sizeof (dtrace_dstate_t)); @@ -12680,7 +13247,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) static void dtrace_dstate_fini(dtrace_dstate_t *dstate) { - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); if (dstate->dtds_base == NULL) return; @@ -12762,42 +13329,25 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) dtrace_optval_t *opt; int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); /* Cause restart */ *new_state = NULL; - /* - * Darwin's DEVFS layer acquired the minor number for this "device" when it called - * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number - * (next unused according to vmem_alloc()) and then immediately put the number back in play - * (by calling vmem_free()). Now that minor number is being used for an open, so committing it - * to use. The following vmem_alloc() must deliver that same minor number. FIXME. - */ - - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); - - if (NULL != devp) { - ASSERT(getminor(*devp) == minor); - if (getminor(*devp) != minor) { - printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n", - getminor(*devp), minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (ERESTART); /* can't reacquire */ - } - } else { - /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), - * so just vend the minor device number here de novo since no "open" has occurred. */ + if (devp != NULL) { + minor = getminor(*devp); + } + else { + minor = DTRACE_NCLIENTS - 1; } - if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (EAGAIN); /* temporary resource shortage */ + state = dtrace_state_allocate(minor); + if (NULL == state) { + printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor); + return (ERESTART); /* can't reacquire */ } - state = ddi_get_soft_state(dtrace_softstate, minor); state->dts_epid = DTRACE_EPIDNONE + 1; (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); @@ -12810,7 +13360,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) major = ddi_driver_major(dtrace_devi); } - state->dts_dev = makedevice(major, minor); + state->dts_dev = makedev(major, minor); if (devp != NULL) *devp = state->dts_dev; @@ -12823,6 +13373,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) */ state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); + state->dts_buf_over_limit = 0; state->dts_cleaner = CYCLIC_NONE; state->dts_deadman = CYCLIC_NONE; state->dts_vstate.dtvs_state = state; @@ -12848,8 +13399,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; - - state->dts_activity = DTRACE_ACTIVITY_INACTIVE; + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_default; /* * Depending on the user credentials, we set flag bits which alter probe @@ -12857,10 +13407,32 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) * actual anonymous tracing, or the possession of all privileges, all of * the normal checks are bypassed. */ +#if defined(__APPLE__) + if (cr != NULL) { + kauth_cred_ref(cr); + state->dts_cred.dcr_cred = cr; + } + if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + /* + * Allow only proc credentials when DTrace is + * restricted by the current security policy + */ + state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC; + state->dts_cred.dcr_action = DTRACE_CRA_PROC | DTRACE_CRA_PROC_CONTROL | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; + } + else { + state->dts_cred.dcr_visible = DTRACE_CRV_ALL; + state->dts_cred.dcr_action = DTRACE_CRA_ALL; + } + } + +#else if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { state->dts_cred.dcr_visible = DTRACE_CRV_ALL; state->dts_cred.dcr_action = DTRACE_CRA_ALL; - } else { + } + else { /* * Set up the credentials for this instantiation. We take a * hold on the credential to prevent it from disappearing on @@ -12977,6 +13549,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; } } +#endif *new_state = state; return(0); /* Success */ @@ -12987,10 +13560,11 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu = 0; + size_t limit = buf->dtb_size; int flags = 0, rval; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); ASSERT(which < DTRACEOPT_MAX); ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || (state == dtrace_anon.dta_state && @@ -13034,8 +13608,8 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) */ return (E2BIG); } - - rval = dtrace_buffer_alloc(buf, size, flags, cpu); + limit = opt[DTRACEOPT_BUFLIMIT] * size / 100; + rval = dtrace_buffer_alloc(buf, limit, size, flags, cpu); if (rval != ENOMEM) { opt[which] = size; @@ -13283,6 +13857,18 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; + if (opt[DTRACEOPT_STRSIZE] > dtrace_strsize_max) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_max; + + if (opt[DTRACEOPT_STRSIZE] < dtrace_strsize_min) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_min; + + if (opt[DTRACEOPT_BUFLIMIT] > dtrace_buflimit_max) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_max; + + if (opt[DTRACEOPT_BUFLIMIT] < dtrace_buflimit_min) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_min; + hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; hdlr.cyh_arg = state; hdlr.cyh_level = CY_LOW_LEVEL; @@ -13376,7 +13962,7 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) { dtrace_icookie_t cookie; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && state->dts_activity != DTRACE_ACTIVITY_DRAINING) @@ -13427,7 +14013,7 @@ static int dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, dtrace_optval_t val) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) return (EBUSY); @@ -13494,8 +14080,8 @@ dtrace_state_destroy(dtrace_state_t *state) int nspec = state->dts_nspeculations; uint32_t match; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); /* * First, retract any retained enablings for this state. @@ -13522,7 +14108,7 @@ dtrace_state_destroy(dtrace_state_t *state) * Release the credential hold we took in dtrace_state_create(). */ if (state->dts_cred.dcr_cred != NULL) - crfree(state->dts_cred.dcr_cred); + kauth_cred_unref(&state->dts_cred.dcr_cred); /* * Now we can safely disable and destroy any enabled probes. Because @@ -13595,19 +14181,32 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_format_destroy(state); vmem_destroy(state->dts_aggid_arena); - ddi_soft_state_free(dtrace_softstate, minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); + dtrace_state_free(minor); } /* * DTrace Anonymous Enabling Functions */ + +int +dtrace_keep_kernel_symbols(void) +{ + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + return 0; + } + + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) + return 1; + + return 0; +} + static dtrace_state_t * dtrace_anon_grab(void) { dtrace_state_t *state; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if ((state = dtrace_anon.dta_state) == NULL) { ASSERT(dtrace_anon.dta_enabling == NULL); @@ -13632,8 +14231,8 @@ dtrace_anon_property(void) dof_hdr_t *dof; char c[32]; /* enough for "dof-data-" + digits */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; ; i++) { (void) snprintf(c, sizeof (c), "dof-data-%d", i); @@ -13645,6 +14244,7 @@ dtrace_anon_property(void) break; } +#ifdef illumos /* * We want to create anonymous state, so we need to transition * the kernel debugger to indicate that DTrace is active. If @@ -13657,6 +14257,7 @@ dtrace_anon_property(void) dtrace_dof_destroy(dof); break; } +#endif /* * If we haven't allocated an anonymous state, we'll do so now. @@ -13904,7 +14505,8 @@ dtrace_helper_destroygen(proc_t* p, int gen) dtrace_vstate_t *vstate; uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (help == NULL || gen > help->dthps_generation) return (EINVAL); @@ -13969,13 +14571,11 @@ dtrace_helper_destroygen(proc_t* p, int gen) /* * If we have a meta provider, remove this helper provider. */ - lck_mtx_lock(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); dtrace_helper_provider_remove(&prov->dthp_prov, - p->p_pid); + p); } - lck_mtx_unlock(&dtrace_meta_lock); dtrace_helper_provider_destroy(prov); @@ -14081,9 +14681,9 @@ static void dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, dof_helper_t *dofhp) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); if (!dtrace_attached() || dtrace_meta_pid == NULL) { @@ -14116,7 +14716,7 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, lck_mtx_unlock(&dtrace_lock); - dtrace_helper_provide(dofhp, p->p_pid); + dtrace_helper_provide(dofhp, p); } else { /* @@ -14129,11 +14729,9 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - p->p_pid); + p); } } - - lck_mtx_unlock(&dtrace_meta_lock); } static int @@ -14143,7 +14741,7 @@ dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen) dtrace_helper_provider_t *hprov, **tmp_provs; uint_t tmp_maxprovs, i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); help = p->p_dtrace_helpers; ASSERT(help != NULL); @@ -14439,7 +15037,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; uintptr_t daddr = (uintptr_t)dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if ((help = p->p_dtrace_helpers) == NULL) help = dtrace_helpers_create(p); @@ -14597,10 +15196,6 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14608,7 +15203,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim * Any existing helpers force non-lazy behavior. */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; unsigned int existing_dofs_count = (existing_dofs) ? existing_dofs->dofiod_count : 0; @@ -14671,7 +15266,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim #endif /* DEBUG */ unlock: - lck_mtx_unlock(&p->p_dtrace_sprlock); + dtrace_sprunlock(p); } else { rval = EACCES; } @@ -14694,10 +15289,6 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14705,7 +15296,7 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) * Any existing helpers force non-lazy behavior. */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; @@ -14762,14 +15353,13 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) #endif } - - lck_mtx_unlock(&p->p_dtrace_sprlock); - } else { + dtrace_sprunlock(p); + } else { rval = EACCES; } lck_rw_unlock_shared(&dtrace_dof_mode_lock); - + return rval; } @@ -14777,20 +15367,14 @@ void dtrace_lazy_dofs_destroy(proc_t *p) { lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_exit.c and kern_exec.c. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON || p->p_lflag & P_LEXIT); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; p->p_dtrace_lazy_dofs = NULL; - lck_mtx_unlock(&p->p_dtrace_sprlock); + dtrace_sprunlock(p); lck_rw_unlock_shared(&dtrace_dof_mode_lock); if (lazy_dofs) { @@ -14798,80 +15382,34 @@ dtrace_lazy_dofs_destroy(proc_t *p) } } -void -dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) +static int +dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - - lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&parent->p_dtrace_sprlock); - +#pragma unused(ignored) /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_fork.c + * Okay to NULL test without taking the sprlock. */ - ASSERT(parent->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); - ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); + return p->p_dtrace_lazy_dofs != NULL; +} + +static void +dtrace_lazy_dofs_process(proc_t *p) { /* - * In theory we should hold the child sprlock, but this is safe... + * It is possible this process may exit during our attempt to + * fault in the dof. We could fix this by holding locks longer, + * but the errors are benign. */ - ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); + dtrace_sprlock(p); - dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; - dof_ioctl_data_t* child_dofs = NULL; - if (parent_dofs) { - size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); - child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); - bcopy(parent_dofs, child_dofs, parent_dofs_size); - } - lck_mtx_unlock(&parent->p_dtrace_sprlock); - - if (child_dofs) { - lck_mtx_lock(&child->p_dtrace_sprlock); - child->p_dtrace_lazy_dofs = child_dofs; - lck_mtx_unlock(&child->p_dtrace_sprlock); - } - - lck_rw_unlock_shared(&dtrace_dof_mode_lock); -} - -static int -dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) -{ -#pragma unused(ignored) - /* - * Okay to NULL test without taking the sprlock. - */ - return p->p_dtrace_lazy_dofs != NULL; -} - -static int -dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) -{ -#pragma unused(ignored) - /* - * It is possible this process may exit during our attempt to - * fault in the dof. We could fix this by holding locks longer, - * but the errors are benign. - */ - lck_mtx_lock(&p->p_dtrace_sprlock); - - /* - * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF - */ ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF); - dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; p->p_dtrace_lazy_dofs = NULL; - lck_mtx_unlock(&p->p_dtrace_sprlock); - + dtrace_sprunlock(p); + lck_mtx_lock(&dtrace_meta_lock); /* * Process each dof_helper_t */ @@ -14894,7 +15432,7 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) dhp->dofhp_dof = dhp->dofhp_addr; dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval); - + if (dof != NULL) { dtrace_helpers_t *help; @@ -14926,19 +15464,84 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) lck_mtx_unlock(&dtrace_lock); } } - + lck_mtx_unlock(&dtrace_meta_lock); kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); + } else { + lck_mtx_unlock(&dtrace_meta_lock); } +} + +static int +dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) +{ +#pragma unused(ignored) + + dtrace_lazy_dofs_process(p); return PROC_RETURNED; } +#define DTRACE_LAZY_DOFS_DUPLICATED 1 + +static int +dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) +{ + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + + lck_rw_lock_shared(&dtrace_dof_mode_lock); + dtrace_sprlock(parent); + + /* + * We need to make sure that the transition to lazy dofs -> helpers + * was atomic for our parent + */ + ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); + /* + * In theory we should hold the child sprlock, but this is safe... + */ + ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); + + dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; + dof_ioctl_data_t* child_dofs = NULL; + if (parent_dofs) { + size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); + child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); + bcopy(parent_dofs, child_dofs, parent_dofs_size); + } + + dtrace_sprunlock(parent); + + if (child_dofs) { + dtrace_sprlock(child); + child->p_dtrace_lazy_dofs = child_dofs; + dtrace_sprunlock(child); + /** + * We process the DOF at this point if the mode is set to + * LAZY_OFF. This can happen if DTrace is still processing the + * DOF of other process (which can happen because the + * protected pager can have a huge latency) + * but has not processed our parent yet + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_lazy_dofs_process(child); + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + return DTRACE_LAZY_DOFS_DUPLICATED; + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + return 0; +} + static dtrace_helpers_t * dtrace_helpers_create(proc_t *p) { dtrace_helpers_t *help; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(p->p_dtrace_helpers == NULL); help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); @@ -14958,6 +15561,7 @@ dtrace_helpers_destroy(proc_t* p) dtrace_vstate_t *vstate; uint_t i; + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); ASSERT(p->p_dtrace_helpers != NULL); @@ -14991,13 +15595,12 @@ dtrace_helpers_destroy(proc_t* p) * Destroy the helper providers. */ if (help->dthps_maxprovs > 0) { - lck_mtx_lock(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_remove( - &help->dthps_provs[i]->dthp_prov, p->p_pid); + &help->dthps_provs[i]->dthp_prov, p); } } else { lck_mtx_lock(&dtrace_lock); @@ -15021,7 +15624,6 @@ dtrace_helpers_destroy(proc_t* p) lck_mtx_unlock(&dtrace_lock); } - lck_mtx_unlock(&dtrace_meta_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_destroy(help->dthps_provs[i]); @@ -15040,6 +15642,7 @@ dtrace_helpers_destroy(proc_t* p) --dtrace_helpers; lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } static void @@ -15052,6 +15655,7 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) uint_t i; int j, sz, hasprovs = 0; + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); ASSERT(dtrace_helpers > 0); @@ -15123,6 +15727,150 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) if (hasprovs) dtrace_helper_provider_register(to, newhelp, NULL); + + lck_mtx_unlock(&dtrace_meta_lock); +} + +/** + * DTrace Process functions + */ + +void +dtrace_proc_fork(proc_t *parent_proc, proc_t *child_proc, int spawn) +{ + /* + * This code applies to new processes who are copying the task + * and thread state and address spaces of their parent process. + */ + if (!spawn) { + /* + * APPLE NOTE: Solaris does a sprlock() and drops the + * proc_lock here. We're cheating a bit and only taking + * the p_dtrace_sprlock lock. A full sprlock would + * task_suspend the parent. + */ + dtrace_sprlock(parent_proc); + + /* + * Remove all DTrace tracepoints from the child process. We + * need to do this _before_ duplicating USDT providers since + * any associated probes may be immediately enabled. + */ + if (parent_proc->p_dtrace_count > 0) { + dtrace_fasttrap_fork(parent_proc, child_proc); + } + + dtrace_sprunlock(parent_proc); + + /* + * Duplicate any lazy dof(s). This must be done while NOT + * holding the parent sprlock! Lock ordering is + * dtrace_dof_mode_lock, then sprlock. It is imperative we + * always call dtrace_lazy_dofs_duplicate, rather than null + * check and call if !NULL. If we NULL test, during lazy dof + * faulting we can race with the faulting code and proceed + * from here to beyond the helpers copy. The lazy dof + * faulting will then fail to copy the helpers to the child + * process. We return if we duplicated lazy dofs as a process + * can only have one at the same time to avoid a race between + * a dtrace client and dtrace_proc_fork where a process would + * end up with both lazy dofs and helpers. + */ + if (dtrace_lazy_dofs_duplicate(parent_proc, child_proc) == DTRACE_LAZY_DOFS_DUPLICATED) { + return; + } + + /* + * Duplicate any helper actions and providers if they haven't + * already. + */ +#if !defined(__APPLE__) + /* + * The SFORKING + * we set above informs the code to enable USDT probes that + * sprlock() may fail because the child is being forked. + */ +#endif + /* + * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent + * never fails to find the child. We do not set SFORKING. + */ + if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { + (*dtrace_helpers_fork)(parent_proc, child_proc); + } + } +} + +void +dtrace_proc_exec(proc_t *p) +{ + /* + * Invalidate any predicate evaluation already cached for this thread by DTrace. + * That's because we've just stored to p_comm and DTrace refers to that when it + * evaluates the "execname" special variable. uid and gid may have changed as well. + */ + dtrace_set_thread_predcache(current_thread(), 0); + + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which no longer belong to this process! + */ + dtrace_lazy_dofs_destroy(p); + + + /* + * Clean up any DTrace helpers for the process. + */ + if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Cleanup the DTrace provider associated with this process. + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) { + (*dtrace_fasttrap_exec_ptr)(p); + } + proc_unlock(p); +} + +void +dtrace_proc_exit(proc_t *p) +{ + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which will never be cleaned up, and leak. + */ + dtrace_lazy_dofs_destroy(p); + + /* + * Clean up any DTrace helper actions or probes for the process. + */ + if (p->p_dtrace_helpers != NULL) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Clean up any DTrace probes associated with this process. + */ + /* + * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(), + * call this after dtrace_helpers_cleanup() + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) { + (*dtrace_fasttrap_exit_ptr)(p); + } + proc_unlock(p); } /* @@ -15142,7 +15890,7 @@ dtrace_modctl_add(struct modctl * newctl) struct modctl *nextp, *prevp; ASSERT(newctl != NULL); - lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); // Insert new module at the front of the list, @@ -15188,7 +15936,7 @@ dtrace_modctl_add(struct modctl * newctl) static modctl_t * dtrace_modctl_lookup(struct kmod_info * kmod) { - lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); struct modctl * ctl; @@ -15208,7 +15956,7 @@ static void dtrace_modctl_remove(struct modctl * ctl) { ASSERT(ctl != NULL); - lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); modctl_t *prevp, *nextp, *curp; // Remove stale chain first @@ -15302,7 +16050,7 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) ctl->mod_loaded = 1; ctl->mod_flags = 0; ctl->mod_user_symbols = NULL; - + /* * Find the UUID for this module, if it has one */ @@ -15321,6 +16069,15 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) if (ctl->mod_address == g_kernel_kmod_info.address) { ctl->mod_flags |= MODCTL_IS_MACH_KERNEL; + memcpy(dtrace_kerneluuid, ctl->mod_uuid, sizeof(dtrace_kerneluuid)); + } + /* + * Static kexts have a UUID that is not used for symbolication, as all their + * symbols are in kernel + */ + else if ((flag & KMOD_DTRACE_STATIC_KEXT) == KMOD_DTRACE_STATIC_KEXT) { + memcpy(ctl->mod_uuid, dtrace_kerneluuid, sizeof(dtrace_kerneluuid)); + ctl->mod_flags |= MODCTL_IS_STATIC_KEXT; } } dtrace_modctl_add(ctl); @@ -15529,6 +16286,7 @@ syncloop: probe->dtpr_provider->dtpv_probe_count--; next = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); @@ -15554,9 +16312,9 @@ syncloop: prov = probe->dtpr_provider; prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); zfree(dtrace_probe_t_zone, probe); @@ -15589,7 +16347,7 @@ dtrace_resume(void) static int dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) { - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_lock(&dtrace_lock); switch (what) { @@ -15693,9 +16451,8 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) */ /*ARGSUSED*/ static int -dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +dtrace_attach(dev_info_t *devi) { -#pragma unused(cmd) /* __APPLE__ */ dtrace_provider_id_t id; dtrace_state_t *state = NULL; dtrace_enabling_t *enab; @@ -15704,18 +16461,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); - if (ddi_soft_state_init(&dtrace_softstate, - sizeof (dtrace_state_t), 0) != 0) { - cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&cpu_lock); - return (DDI_FAILURE); - } - /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ - - ddi_report_dev(devi); dtrace_devi = devi; dtrace_modload = dtrace_module_loaded; @@ -15730,30 +16476,34 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); - dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, - UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, - VM_SLEEP | VMC_IDENTIFIER); - dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, - 1, INT_MAX, 0); dtrace_state_cache = kmem_cache_create("dtrace_state_cache", sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + + dtrace_byprov = dtrace_hash_create(dtrace_strkey_probe_provider, + 0, /* unused */ + offsetof(dtrace_probe_t, dtpr_nextprov), + offsetof(dtrace_probe_t, dtpr_prevprov)); + + dtrace_bymod = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_mod), offsetof(dtrace_probe_t, dtpr_nextmod), offsetof(dtrace_probe_t, dtpr_prevmod)); - dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), + dtrace_byfunc = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_func), offsetof(dtrace_probe_t, dtpr_nextfunc), offsetof(dtrace_probe_t, dtpr_prevfunc)); - dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), + dtrace_byname = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_name), offsetof(dtrace_probe_t, dtpr_nextname), offsetof(dtrace_probe_t, dtpr_prevname)); @@ -15788,6 +16538,13 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_provider, NULL, NULL, "END", 0, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "ERROR", 3, NULL); +#elif (defined(__arm__) || defined(__arm64__)) + dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "BEGIN", 2, NULL); + dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "END", 1, NULL); + dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "ERROR", 4, NULL); #else #error Unknown Architecture #endif @@ -15841,7 +16598,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_lock); if ((enab = dtrace_anon.dta_enabling) != NULL) - (void) dtrace_enabling_match(enab, NULL); + (void) dtrace_enabling_match(enab, NULL, NULL); lck_mtx_unlock(&cpu_lock); } @@ -15898,6 +16655,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) dtrace_opens++; dtrace_membar_producer(); +#ifdef illumos /* * If the kernel debugger is active (that is, if the kernel debugger * modified text in some way), we won't allow the open. @@ -15908,13 +16666,17 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) lck_mtx_unlock(&cpu_lock); return (EBUSY); } +#endif rv = dtrace_state_create(devp, cred_p, &state); lck_mtx_unlock(&cpu_lock); if (rv != 0 || state == NULL) { - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) { +#ifdef illumos (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); +#endif + } lck_mtx_unlock(&dtrace_lock); /* propagate EAGAIN or ERESTART */ return (rv); @@ -15933,7 +16695,16 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; - + /* + * We do not need to hold the exclusive lock while processing + * DOF on processes. We do need to make sure the mode does not get + * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though + * (which should not happen anyway since it only happens in + * dtrace_close). There is no way imcomplete USDT probes can be + * activate by any DTrace clients here since they all have to + * call dtrace_open and be blocked on dtrace_dof_mode_lock + */ + lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock); /* * Iterate all existing processes and load lazy dofs. */ @@ -15942,9 +16713,13 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) NULL, dtrace_lazy_dofs_proc_iterate_filter, NULL); + + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + } + else { + lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); } - lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); /* * Update kernel symbol state. @@ -15979,8 +16754,7 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_t *state; /* APPLE NOTE: Darwin puts Helper on its own major device. */ - - state = ddi_get_soft_state(dtrace_softstate, minor); + state = dtrace_state_get(minor); lck_mtx_lock(&cpu_lock); lck_mtx_lock(&dtrace_lock); @@ -16000,9 +16774,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) * Only relinquish control of the kernel debugger interface when there * are no consumers and no anonymous enablings. */ - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) { +#ifdef illumos (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); - +#endif + } + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); @@ -16056,7 +16833,12 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) return KERN_SUCCESS; switch (cmd) { +#if defined (__arm64__) + case DTRACEHIOC_ADDDOF_U32: + case DTRACEHIOC_ADDDOF_U64: +#else case DTRACEHIOC_ADDDOF: +#endif /* __arm64__*/ { dof_helper_t *dhp = NULL; size_t dof_ioctl_data_size; @@ -16068,6 +16850,16 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) int multi_dof_claimed = 0; proc_t* p = current_proc(); + /* + * If this is a restricted process and dtrace is restricted, + * do not allow DOFs to be registered + */ + if (dtrace_is_restricted() && + !dtrace_are_restrictions_relaxed() && + !dtrace_can_attach_to_proc(current_proc())) { + return (EACCES); + } + /* * Read the number of DOF sections being passed in. */ @@ -16077,7 +16869,7 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) dtrace_dof_error(NULL, "failed to copyin dofiod_count"); return (EFAULT); } - + /* * Range check the count. */ @@ -16128,6 +16920,7 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval); if (dof != NULL) { + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); /* @@ -16139,6 +16932,7 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) } lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } } while (++i < multi_dof->dofiod_count && rval == 0); } @@ -16179,9 +16973,11 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) * EACCES means non-lazy */ if (rval == EACCES) { + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); rval = dtrace_helper_destroygen(p, generation); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } return (rval); @@ -16205,7 +17001,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv /* Darwin puts Helper on its own major device. */ - state = ddi_get_soft_state(dtrace_softstate, minor); + state = dtrace_state_get(minor); if (state->dts_anon) { ASSERT(dtrace_anon.dta_state == NULL); @@ -16465,7 +17261,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv return (rval); } - if ((err = dtrace_enabling_match(enab, rv)) == 0) { + if ((err = dtrace_enabling_match(enab, rv, NULL)) == 0) { err = dtrace_enabling_retain(enab); } else { dtrace_enabling_destroy(enab); @@ -16534,17 +17330,15 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv desc.dtpd_id++; } - if (cmd == DTRACEIOC_PROBEMATCH) { - dtrace_probekey(&desc, &pkey); - pkey.dtpk_id = DTRACE_IDNONE; - } - dtrace_cred2priv(cr, &priv, &uid, &zoneid); lck_mtx_lock(&dtrace_lock); - if (cmd == DTRACEIOC_PROBEMATCH) { - /* Quiet compiler warning */ + if (cmd == DTRACEIOC_PROBEMATCH) { + dtrace_probekey(&desc, &pkey); + pkey.dtpk_id = DTRACE_IDNONE; + + /* Quiet compiler warning */ for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && (m = dtrace_match_probe(probe, &pkey, @@ -16556,6 +17350,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv lck_mtx_unlock(&dtrace_lock); return (EINVAL); } + dtrace_probekey_release(&pkey); } else { /* Quiet compiler warning */ @@ -16688,10 +17483,45 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv return (rval == 0 ? 0 : EFAULT); } + case DTRACEIOC_SLEEP: { + int64_t time; + uint64_t abstime; + uint64_t rvalue = DTRACE_WAKE_TIMEOUT; + + if (copyin(arg, &time, sizeof(time)) != 0) + return (EFAULT); + + nanoseconds_to_absolutetime((uint64_t)time, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + + if (assert_wait_deadline(state, THREAD_ABORTSAFE, abstime) == THREAD_WAITING) { + if (state->dts_buf_over_limit > 0) { + clear_wait(current_thread(), THREAD_INTERRUPTED); + rvalue = DTRACE_WAKE_BUF_LIMIT; + } else { + thread_block(THREAD_CONTINUE_NULL); + if (state->dts_buf_over_limit > 0) { + rvalue = DTRACE_WAKE_BUF_LIMIT; + } + } + } + + if (copyout(&rvalue, arg, sizeof(rvalue)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_SIGNAL: { + wakeup(state); + return (0); + } + case DTRACEIOC_AGGSNAP: case DTRACEIOC_BUFSNAP: { dtrace_bufdesc_t desc; caddr_t cached; + boolean_t over_limit; dtrace_buffer_t *buf; if (copyin(arg, &desc, sizeof (desc)) != 0) @@ -16773,6 +17603,8 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv } cached = buf->dtb_tomax; + over_limit = buf->dtb_cur_limit == buf->dtb_size; + ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); dtrace_xcall(desc.dtbd_cpu, @@ -16793,11 +17625,28 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv } ASSERT(cached == buf->dtb_xamot); + /* + * At this point we know the buffer have switched, so we + * can decrement the over limit count if the buffer was over + * its limit. The new buffer might already be over its limit + * yet, but we don't care since we're guaranteed not to be + * checking the buffer over limit count at this point. + */ + if (over_limit) { + uint32_t old = atomic_add_32(&state->dts_buf_over_limit, -1); + #pragma unused(old) + + /* + * Verify that we didn't underflow the value + */ + ASSERT(old != 0); + } /* * We have our snapshot; now copy it out. */ - if (copyout(buf->dtb_xamot, (user_addr_t)desc.dtbd_data, + if (dtrace_buffer_copyout(buf->dtb_xamot, + (user_addr_t)desc.dtbd_data, buf->dtb_xamot_offset) != 0) { lck_mtx_unlock(&dtrace_lock); return (EFAULT); @@ -17013,7 +17862,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); - if (!MOD_SYMBOLS_DONE(ctl)) { + if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) { dtmul_count++; rval = EINVAL; } @@ -17069,7 +17918,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * are available, add user syms if the module might use them. */ ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); - if (!MOD_SYMBOLS_DONE(ctl)) { + if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) { UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count]; if (dtmul_count++ < uuids_list->dtmul_count) { memcpy(uuid, ctl->mod_uuid, sizeof(UUID)); @@ -17159,7 +18008,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv /* NOTE! We can no longer exit this method via return */ if (copyin(arg, module_symbols, module_symbols_size) != 0) { - cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t, symbol count %llu", module_symbols->dtmodsyms_count); + cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t"); rval = EFAULT; goto module_symbols_cleanup; } @@ -17185,32 +18034,24 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); - if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) { - if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { - /* BINGO! */ - ctl->mod_user_symbols = module_symbols; - break; - } + if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl) && memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { + dtrace_provider_t *prv; + ctl->mod_user_symbols = module_symbols; + + /* + * We're going to call each providers per-module provide operation + * specifying only this module. + */ + for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + /* + * We gave every provider a chance to provide with the user syms, go ahead and clear them + */ + ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ } ctl = ctl->mod_next; } - if (ctl) { - dtrace_provider_t *prv; - - /* - * We're going to call each providers per-module provide operation - * specifying only this module. - */ - for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - - /* - * We gave every provider a chance to provide with the user syms, go ahead and clear them - */ - ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ - } - lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); @@ -17346,15 +18187,18 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) dtrace_probes = NULL; dtrace_nprobes = 0; + dtrace_hash_destroy(dtrace_strings); + dtrace_hash_destroy(dtrace_byprov); dtrace_hash_destroy(dtrace_bymod); dtrace_hash_destroy(dtrace_byfunc); dtrace_hash_destroy(dtrace_byname); + dtrace_strings = NULL; + dtrace_byprov = NULL; dtrace_bymod = NULL; dtrace_byfunc = NULL; dtrace_byname = NULL; kmem_cache_destroy(dtrace_state_cache); - vmem_destroy(dtrace_minor); vmem_destroy(dtrace_arena); if (dtrace_toxrange != NULL) { @@ -17377,6 +18221,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); +#ifdef illumos /* * We don't destroy the task queue until after we have dropped our * locks (taskq_destroy() may block on running tasks). To prevent @@ -17387,6 +18232,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) */ taskq_destroy(dtrace_taskq); dtrace_taskq = NULL; +#endif return (DDI_SUCCESS); } @@ -17530,28 +18376,13 @@ helper_init( void ) #undef HELPER_MAJOR -/* - * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected. - */ static int dtrace_clone_func(dev_t dev, int action) { #pragma unused(dev) if (action == DEVFS_CLONE_ALLOC) { - if (NULL == dtrace_minor) /* Arena not created yet!?! */ - return 0; - else { - /* - * Propose a minor number, namely the next number that vmem_alloc() will return. - * Immediately put it back in play by calling vmem_free(). FIXME. - */ - int ret = (int)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); - - vmem_free(dtrace_minor, (void *)(uintptr_t)ret, 1); - - return ret; - } + return dtrace_state_reserve(); } else if (action == DEVFS_CLONE_FREE) { return 0; @@ -17559,6 +18390,34 @@ dtrace_clone_func(dev_t dev, int action) else return -1; } +void dtrace_ast(void); + +void +dtrace_ast(void) +{ + int i; + uint32_t clients = atomic_and_32(&dtrace_wake_clients, 0); + if (clients == 0) + return; + /** + * We disable preemption here to be sure that we won't get + * interrupted by a wakeup to a thread that is higher + * priority than us, so that we do issue all wakeups + */ + disable_preemption(); + for (i = 0; i < DTRACE_NCLIENTS; i++) { + if (clients & (1 << i)) { + dtrace_state_t *state = dtrace_state_get(i); + if (state) { + wakeup(state); + } + + } + } + enable_preemption(); +} + + #define DTRACE_MAJOR -24 /* let the kernel pick the device number */ static struct cdevsw dtrace_cdevsw = @@ -17585,6 +18444,19 @@ lck_grp_t* dtrace_lck_grp; static int gMajDevNo; +void dtrace_early_init (void) +{ + dtrace_restriction_policy_load(); + + /* + * See dtrace_impl.h for a description of kernel symbol modes. + * The default is to wait for symbols from userspace (lazy symbols). + */ + if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } +} + void dtrace_init( void ) { @@ -17636,13 +18508,6 @@ dtrace_init( void ) return; } -#if defined(DTRACE_MEMORY_ZONES) - /* - * Initialize the dtrace kalloc-emulation zones. - */ - dtrace_alloc_init(); -#endif /* DTRACE_MEMORY_ZONES */ - /* * Allocate the dtrace_probe_t zone */ @@ -17703,12 +18568,17 @@ dtrace_init( void ) lck_mtx_lock(&cpu_lock); for (i = 0; i < ncpu; ++i) - /* FIXME: track CPU configuration a la CHUD Processor Pref Pane. */ + /* FIXME: track CPU configuration */ dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */ lck_mtx_unlock(&cpu_lock); (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ + dtrace_strings = dtrace_hash_create(dtrace_strkey_offset, + offsetof(dtrace_string_t, dtst_str), + offsetof(dtrace_string_t, dtst_next), + offsetof(dtrace_string_t, dtst_prev)); + dtrace_isa_init(); /* * See dtrace_impl.h for a description of dof modes. @@ -17718,7 +18588,12 @@ dtrace_init( void ) * makes no sense... */ if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) { +#if CONFIG_EMBEDDED + /* Disable DOF mode by default for performance reasons */ + dtrace_dof_mode = DTRACE_DOF_MODE_NEVER; +#else dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; +#endif } /* @@ -17743,16 +18618,6 @@ dtrace_init( void ) break; } - /* - * See dtrace_impl.h for a description of kernel symbol modes. - * The default is to wait for symbols from userspace (lazy symbols). - */ - if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { - dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; - } - - dtrace_restriction_policy_load(); - gDTraceInited = 1; } else @@ -17767,7 +18632,7 @@ dtrace_postinit(void) * run. That way, anonymous DOF enabled under dtrace_attach() is safe * to go. */ - dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */ + dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0)); /* Punning a dev_t to a dev_info_t* */ /* * Add the mach_kernel to the module list for lazy processing @@ -17783,6 +18648,10 @@ dtrace_postinit(void) if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) { printf("dtrace_postinit: Could not register mach_kernel modctl\n"); } + + if (!PE_parse_boot_argn("dtrace_provide_private_probes", &dtrace_provide_private_probes, sizeof (dtrace_provide_private_probes))) { + dtrace_provide_private_probes = 0; + } (void)OSKextRegisterKextsWithDTrace(); }