X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/04b8595b18b1b41ac7a206e4b3d51a635f8413d7..a991bd8d3e7fe02dbca0644054bab73c5b75324a:/bsd/dev/dtrace/dtrace.c?ds=sidebyside diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index dd02ad502..36d4f8223 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -20,8 +20,8 @@ */ /* - * Portions Copyright (c) 2011, Joyent, Inc. All rights reserved. - * Portions Copyright (c) 2012 by Delphix. All rights reserved. + * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved. + * Portions Copyright (c) 2013 by Delphix. All rights reserved. */ /* @@ -29,8 +29,6 @@ * Use is subject to license terms. */ -/* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */ - /* * DTrace - Dynamic Tracing for Solaris * @@ -61,6 +59,7 @@ * - Enabling functions * - DOF functions * - Anonymous enabling functions + * - Process functions * - Consumer state functions * - Helper functions * - Hook functions @@ -74,6 +73,7 @@ #include #include #include +#include #include #include #include @@ -93,14 +93,30 @@ #include #include #include +#include #include #include +#include +#include + +#if MONOTONIC +#include +#include +#endif /* MONOTONIC */ + +#include "dtrace_xoroshiro128_plus.h" + +#include #include + +extern addr64_t kvtophys(vm_offset_t va); + extern uint32_t pmap_find_phys(void *, uint64_t); extern boolean_t pmap_valid_page(uint32_t); extern void OSKextRegisterKextsWithDTrace(void); extern kmod_info_t g_kernel_kmod_info; +extern void commpage_update_dof(boolean_t enabled); /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ @@ -109,21 +125,19 @@ extern kmod_info_t g_kernel_kmod_info; extern void dtrace_suspend(void); extern void dtrace_resume(void); +extern void dtrace_early_init(void); +extern int dtrace_keep_kernel_symbols(void); extern void dtrace_init(void); extern void helper_init(void); extern void fasttrap_init(void); -extern void dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); + +static int dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); extern void dtrace_lazy_dofs_destroy(proc_t *); extern void dtrace_postinit(void); -#include "../../../osfmk/chud/chud_dtrace.h" - -extern kern_return_t chudxnu_dtrace_callback - (uint64_t selector, uint64_t *args, uint32_t count); - -/* Import this function to retrieve the physical memory. */ -extern int kernel_sysctlbyname(const char *name, void *oldp, - size_t *oldlenp, void *newp, size_t newlen); +extern void dtrace_proc_fork(proc_t*, proc_t*, int); +extern void dtrace_proc_exec(proc_t*); +extern void dtrace_proc_exit(proc_t*); /* * DTrace Tunable Variables @@ -143,17 +157,20 @@ extern int kernel_sysctlbyname(const char *name, void *oldp, */ uint64_t dtrace_buffer_memory_maxsize = 0; /* initialized in dtrace_init */ uint64_t dtrace_buffer_memory_inuse = 0; -int dtrace_destructive_disallow = 0; +int dtrace_destructive_disallow = 1; dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); -dtrace_optval_t dtrace_dof_maxsize = (384 * 1024); -size_t dtrace_global_maxsize = (16 * 1024); +dtrace_optval_t dtrace_dof_maxsize = (512 * 1024); +dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024); +dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; dtrace_optval_t dtrace_helper_actions_max = 32; dtrace_optval_t dtrace_helper_providers_max = 64; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; +dtrace_optval_t dtrace_strsize_min = 8; +dtrace_optval_t dtrace_strsize_max = 65536; dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */ dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */ dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ @@ -167,12 +184,15 @@ dtrace_optval_t dtrace_stackframes_default = 20; dtrace_optval_t dtrace_ustackframes_default = 20; dtrace_optval_t dtrace_jstackframes_default = 50; dtrace_optval_t dtrace_jstackstrsize_default = 512; +dtrace_optval_t dtrace_buflimit_default = 75; +dtrace_optval_t dtrace_buflimit_min = 1; +dtrace_optval_t dtrace_buflimit_max = 99; +size_t dtrace_nprobes_default = 4; int dtrace_msgdsize_max = 128; hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ int dtrace_devdepth_max = 32; int dtrace_err_verbose; -int dtrace_provide_private_probes = 0; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; @@ -193,15 +213,14 @@ unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */ */ static dev_info_t *dtrace_devi; /* device info */ static vmem_t *dtrace_arena; /* probe ID arena */ -static vmem_t *dtrace_minor; /* minor number arena */ -static taskq_t *dtrace_taskq; /* task queue */ static dtrace_probe_t **dtrace_probes; /* array of all probes */ static int dtrace_nprobes; /* number of probes */ static dtrace_provider_t *dtrace_provider; /* provider list */ static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ static int dtrace_opens; /* number of opens */ static int dtrace_helpers; /* number of helpers */ -static void *dtrace_softstate; /* softstate pointer */ +static dtrace_hash_t *dtrace_strings; +static dtrace_hash_t *dtrace_byprov; /* probes hashed by provider */ static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ @@ -226,7 +245,8 @@ static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either... */ int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */ - +static uint32_t dtrace_wake_clients; +static uint8_t dtrace_kerneluuid[16]; /* the 128-bit uuid */ /* * To save memory, some common memory allocations are given a @@ -235,7 +255,8 @@ int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwi * 20k elements allocated, the space saved is substantial. */ -struct zone *dtrace_probe_t_zone; +static ZONE_DECLARE(dtrace_probe_t_zone, "dtrace.dtrace_probe_t", + sizeof(dtrace_probe_t), ZC_NONE); static int dtrace_module_unloaded(struct kmod_info *kmod); @@ -285,7 +306,7 @@ static int dtrace_module_unloaded(struct kmod_info *kmod); * * ASSERT(MUTEX_HELD(&cpu_lock)); * becomes: - * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + * LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); * */ static lck_mtx_t dtrace_lock; /* probe state lock */ @@ -308,26 +329,60 @@ static dtrace_pattr_t dtrace_provider_attr = { }; static void -dtrace_nullop(void) -{} +dtrace_provide_nullop(void *arg, const dtrace_probedesc_t *desc) +{ +#pragma unused(arg, desc) +} + +static void +dtrace_provide_module_nullop(void *arg, struct modctl *ctl) +{ +#pragma unused(arg, ctl) +} static int -dtrace_enable_nullop(void) +dtrace_enable_nullop(void *arg, dtrace_id_t id, void *parg) { +#pragma unused(arg, id, parg) return (0); } -static dtrace_pops_t dtrace_provider_ops = { - (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, - (void (*)(void *, struct modctl *))dtrace_nullop, - (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, - NULL, - NULL, - NULL, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop +static void +dtrace_disable_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + +static void +dtrace_suspend_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + +static void +dtrace_resume_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + +static void +dtrace_destroy_nullop(void *arg, dtrace_id_t id, void *parg) +{ +#pragma unused(arg, id, parg) +} + + +static dtrace_pops_t dtrace_provider_ops = { + .dtps_provide = dtrace_provide_nullop, + .dtps_provide_module = dtrace_provide_module_nullop, + .dtps_enable = dtrace_enable_nullop, + .dtps_disable = dtrace_disable_nullop, + .dtps_suspend = dtrace_suspend_nullop, + .dtps_resume = dtrace_resume_nullop, + .dtps_getargdesc = NULL, + .dtps_getargval = NULL, + .dtps_usermode = NULL, + .dtps_destroy = dtrace_destroy_nullop, }; static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */ @@ -348,6 +403,15 @@ int dtrace_helptrace_enabled = 1; int dtrace_helptrace_enabled = 0; #endif +#if defined (__arm64__) +/* + * The ioctl for adding helper DOF is based on the + * size of a user_addr_t. We need to recognize both + * U32 and U64 as the same action. + */ +#define DTRACEHIOC_ADDDOF_U32 _IOW('h', 4, user32_addr_t) +#define DTRACEHIOC_ADDDOF_U64 _IOW('h', 4, user64_addr_t) +#endif /* __arm64__ */ /* * DTrace Error Hashing @@ -373,18 +437,22 @@ static lck_mtx_t dtrace_errlock; * outside of the implementation. There is no real structure to this cpp * mishmash -- but is there ever? */ -#define DTRACE_HASHSTR(hash, probe) \ - dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) -#define DTRACE_HASHNEXT(hash, probe) \ - (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) +#define DTRACE_GETSTR(hash, elm) \ + (hash->dth_getstr(elm, hash->dth_stroffs)) + +#define DTRACE_HASHSTR(hash, elm) \ + dtrace_hash_str(DTRACE_GETSTR(hash, elm)) -#define DTRACE_HASHPREV(hash, probe) \ - (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) +#define DTRACE_HASHNEXT(hash, elm) \ + (void**)((uintptr_t)(elm) + (hash)->dth_nextoffs) + +#define DTRACE_HASHPREV(hash, elm) \ + (void**)((uintptr_t)(elm) + (hash)->dth_prevoffs) #define DTRACE_HASHEQ(hash, lhs, rhs) \ - (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ - *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) + (strcmp(DTRACE_GETSTR(hash, lhs), \ + DTRACE_GETSTR(hash, rhs)) == 0) #define DTRACE_AGGHASHSIZE_SLEW 17 @@ -414,6 +482,25 @@ static lck_mtx_t dtrace_errlock; (where) = ((thr + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } +#elif defined(__arm__) +/* FIXME: three function calls!!! */ +#define DTRACE_TLS_THRKEY(where) { \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + uint_t pid = (uint_t)dtrace_proc_selfpid(); \ + ASSERT(intr < (1 << 3)); \ + (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \ + (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ +} +#elif defined (__arm64__) +/* FIXME: two function calls!! */ +#define DTRACE_TLS_THRKEY(where) { \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ + ASSERT(intr < (1 << 3)); \ + (where) = ((thr + DIF_VARIABLE_MAX) & \ + (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ +} #else #error Unknown architecture #endif @@ -436,6 +523,14 @@ static lck_mtx_t dtrace_errlock; return (0); \ } +#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ +do { \ + if ((remp) != NULL) { \ + *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ + } \ +} while (0) + + /* * Test whether a range of memory starting at testaddr of size testsz falls * within the range of memory described by addr, sz. We take care to avoid @@ -460,7 +555,7 @@ static lck_mtx_t dtrace_errlock; #define RECOVER_LABEL(bits) dtraceLoadRecover##bits: -#if defined (__x86_64__) +#if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__)) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ uint##bits##_t dtrace_load##bits(uintptr_t addr); \ @@ -495,7 +590,7 @@ dtrace_load##bits(uintptr_t addr) \ { \ volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \ *flags |= CPU_DTRACE_NOFAULT; \ - recover = dtrace_set_thread_recover(current_thread(), recover); \ + recover = dtrace_sign_and_set_thread_recover(current_thread(), recover); \ /*CSTYLED*/ \ /* \ * PR6394061 - avoid device memory that is unpredictably \ @@ -503,6 +598,12 @@ dtrace_load##bits(uintptr_t addr) \ */ \ if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \ rval = *((volatile uint##bits##_t *)addr); \ + else { \ + *flags |= CPU_DTRACE_BADADDR; \ + cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ + return (0); \ + } \ + \ RECOVER_LABEL(bits); \ (void)dtrace_set_thread_recover(current_thread(), recover); \ *flags &= ~CPU_DTRACE_NOFAULT; \ @@ -550,7 +651,8 @@ dtrace_load##bits(uintptr_t addr) \ static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); -static int dtrace_enabling_match(dtrace_enabling_t *, int *); +static int dtrace_enabling_match(dtrace_enabling_t *, int *, dtrace_match_cond_t *cond); +static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond); static void dtrace_enabling_matchall(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, @@ -561,8 +663,12 @@ static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); -static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); +static int dtrace_ecb_create_enable(dtrace_probe_t *, void *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); +static int dtrace_canload_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); +static int dtrace_canstore_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); /* @@ -698,6 +804,9 @@ sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS if (value <= 0) return (ERANGE); + if (value >= dtrace_copy_maxsize()) + return (ERANGE); + lck_mtx_lock(&dtrace_lock); dtrace_dof_maxsize = value; lck_mtx_unlock(&dtrace_lock); @@ -718,7 +827,7 @@ SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize, sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize"); static int -sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS +sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS { #pragma unused(oidp, arg2, req) int changed, error; @@ -730,9 +839,11 @@ sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS if (value <= 0) return (ERANGE); + if (value > dtrace_statvar_maxsize_max) + return (ERANGE); lck_mtx_lock(&dtrace_lock); - dtrace_global_maxsize = value; + dtrace_statvar_maxsize = value; lck_mtx_unlock(&dtrace_lock); return (0); @@ -741,47 +852,34 @@ sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS /* * kern.dtrace.global_maxsize * - * Set the global variable max size in bytes, check the definition of - * dtrace_global_maxsize to get the default value. Attempting to set a null or - * negative size will result in a failure. + * Set the variable max size in bytes, check the definition of + * dtrace_statvar_maxsize to get the default value. Attempting to set a null, + * too high or negative size will result in a failure. */ SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize, CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, - &dtrace_global_maxsize, 0, - sysctl_dtrace_global_maxsize, "Q", "dtrace global maxsize"); - -static int -sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS -{ -#pragma unused(oidp, arg2) - int error; - int value = *(int *) arg1; - - error = sysctl_io_number(req, value, sizeof(value), &value, NULL); - if (error) - return (error); - - if (value != 0 && value != 1) - return (ERANGE); - - lck_mtx_lock(&dtrace_lock); - dtrace_provide_private_probes = value; - lck_mtx_unlock(&dtrace_lock); + &dtrace_statvar_maxsize, 0, + sysctl_dtrace_statvar_maxsize, "Q", "dtrace statvar maxsize"); - return (0); -} /* * kern.dtrace.provide_private_probes * * Set whether the providers must provide the private probes. This is - * mainly used by the FBT provider to request probes for the private/static - * symbols. + * kept as compatibility as they are always provided. */ -SYSCTL_PROC(_kern_dtrace, OID_AUTO, provide_private_probes, - CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, - &dtrace_provide_private_probes, 0, - sysctl_dtrace_provide_private_probes, "I", "provider must provide the private probes"); +SYSCTL_INT(_kern_dtrace, OID_AUTO, provide_private_probes, + CTLFLAG_RD | CTLFLAG_LOCKED, + (int *)NULL, 1, "provider must provide the private probes"); + +/* + * kern.dtrace.dof_mode + * + * Returns the current DOF mode. + * This value is read-only. + */ +SYSCTL_INT(_kern_dtrace, OID_AUTO, dof_mode, CTLFLAG_RD | CTLFLAG_LOCKED, + &dtrace_dof_mode, 0, "dtrace dof mode"); /* * DTrace Probe Context Functions @@ -877,19 +975,43 @@ dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) } static int -dtrace_canstore_statvar(uint64_t addr, size_t sz, +dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, dtrace_statvar_t **svars, int nsvars) { int i; + size_t maxglobalsize, maxlocalsize; + + maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); + maxlocalsize = (maxglobalsize) * NCPU; + + if (nsvars == 0) + return (0); + for (i = 0; i < nsvars; i++) { dtrace_statvar_t *svar = svars[i]; + uint8_t scope; + size_t size; - if (svar == NULL || svar->dtsv_size == 0) + if (svar == NULL || (size = svar->dtsv_size) == 0) continue; - if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) + scope = svar->dtsv_var.dtdv_scope; + + /** + * We verify that our size is valid in the spirit of providing + * defense in depth: we want to prevent attackers from using + * DTrace to escalate an orthogonal kernel heap corruption bug + * into the ability to store to arbitrary locations in memory. + */ + VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || + (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); + + if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) { + DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, + svar->dtsv_size); return (1); + } } return (0); @@ -904,14 +1026,26 @@ dtrace_canstore_statvar(uint64_t addr, size_t sz, static int dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); +} +/* + * Implementation of dtrace_canstore which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { /* * First, check to see if the address is in scratch space... */ if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, - mstate->dtms_scratch_size)) + mstate->dtms_scratch_size)) { + DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, + mstate->dtms_scratch_size); return (1); - + } /* * Now check to see if it's a dynamic variable. This check will pick * up both thread-local variables and any global dynamically-allocated @@ -923,6 +1057,7 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, uintptr_t base = (uintptr_t)dstate->dtds_base + (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); uintptr_t chunkoffs; + dtrace_dynvar_t *dvar; /* * Before we assume that we can store here, we need to make @@ -939,6 +1074,8 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * * (3) Not span a chunk boundary * + * (4) Not be in the tuple space of a dynamic variable + * */ if (addr < base) return (0); @@ -951,6 +1088,15 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, if (chunkoffs + sz > dstate->dtds_chunksize) return (0); + dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); + + if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) + return (0); + + if (chunkoffs < sizeof (dtrace_dynvar_t) + + ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) + return (0); + return (1); } @@ -958,11 +1104,11 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * Finally, check the static local and global variables. These checks * take the longest, so we perform them last. */ - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_locals, vstate->dtvs_nlocals)) return (1); - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_globals, vstate->dtvs_nglobals)) return (1); @@ -979,9 +1125,20 @@ dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * DTrace subroutines (DIF_SUBR_*) should use this helper to implement * appropriate memory access protection. */ -static int +int dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); +} + +/* + * Implementation of dtrace_canload which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; @@ -989,21 +1146,27 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * If we hold the privilege to read from kernel memory, then * everything is readable. */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); return (1); + } /* * You can obviously read that which you can store. */ - if (dtrace_canstore(addr, sz, mstate, vstate)) + if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) return (1); /* * We're allowed to read from our own string table. */ if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, - mstate->dtms_difo->dtdo_strlen)) + mstate->dtms_difo->dtdo_strlen)) { + DTRACE_RANGE_REMAIN(remain, addr, + mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen); return (1); + } DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); *illval = addr; @@ -1017,21 +1180,41 @@ dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * calls in the event that the user has all privileges. */ static int -dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, - dtrace_vstate_t *vstate) +dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { - size_t strsz; + size_t rsize; /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); return (1); + } - strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, sz); - if (dtrace_canload(addr, strsz, mstate, vstate)) - return (1); + /* + * Even if the caller is uninterested in querying the remaining valid + * range, it is required to ensure that the access is allowed. + */ + if (remain == NULL) { + remain = &rsize; + } + if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { + size_t strsz; + /* + * Perform the strlen after determining the length of the + * memory region which is accessible. This prevents timing + * information from being used to find NULs in memory which is + * not accessible to the caller. + */ + strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, + MIN(sz, *remain)); + if (strsz <= *remain) { + return (1); + } + } return (0); } @@ -1041,33 +1224,117 @@ dtrace_strcanload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, * region in which a load may be issued given the user's privilege level. */ static int -dtrace_vcanload(void *src, dtrace_diftype_t *type, dtrace_mstate_t *mstate, - dtrace_vstate_t *vstate) +dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { size_t sz; ASSERT(type->dtdt_flags & DIF_TF_BYREF); + /* + * Calculate the max size before performing any checks since even + * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function + * return the max length via 'remain'. + */ + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_state_t *state = vstate->dtvs_state; + + if (state != NULL) { + sz = state->dts_options[DTRACEOPT_STRSIZE]; + } else { + /* + * In helper context, we have a NULL state; fall back + * to using the system-wide default for the string size + * in this case. + */ + sz = dtrace_strsize_default; + } + } else { + sz = type->dtdt_size; + } + /* * If we hold the privilege to read from kernel memory, then * everything is readable. */ - if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); return (1); + } - if (type->dtdt_kind == DIF_TYPE_STRING) - sz = dtrace_strlen(src, - vstate->dtvs_state->dts_options[DTRACEOPT_STRSIZE]) + 1; - else - sz = type->dtdt_size; + if (type->dtdt_kind == DIF_TYPE_STRING) { + return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, + vstate)); + } + return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, + vstate)); +} + +#define isdigit(ch) ((ch) >= '0' && (ch) <= '9') +#define islower(ch) ((ch) >= 'a' && (ch) <= 'z') +#define isspace(ch) (((ch) == ' ') || ((ch) == '\r') || ((ch) == '\n') || \ + ((ch) == '\t') || ((ch) == '\f')) +#define isxdigit(ch) (isdigit(ch) || ((ch) >= 'a' && (ch) <= 'f') || \ + ((ch) >= 'A' && (ch) <= 'F')) +#define lisalnum(x) \ + (isdigit(x) || ((x) >= 'a' && (x) <= 'z') || ((x) >= 'A' && (x) <= 'Z')) + +#define DIGIT(x) \ + (isdigit(x) ? (x) - '0' : islower(x) ? (x) + 10 - 'a' : (x) + 10 - 'A') + +/* + * Convert a string to a signed integer using safe loads. + */ +static int64_t +dtrace_strtoll(char *input, int base, size_t limit) +{ + uintptr_t pos = (uintptr_t)input; + int64_t val = 0; + int x; + boolean_t neg = B_FALSE; + char c, cc, ccc; + uintptr_t end = pos + limit; + + /* + * Consume any whitespace preceding digits. + */ + while ((c = dtrace_load8(pos)) == ' ' || c == '\t') + pos++; + + /* + * Handle an explicit sign if one is present. + */ + if (c == '-' || c == '+') { + if (c == '-') + neg = B_TRUE; + c = dtrace_load8(++pos); + } + + /* + * Check for an explicit hexadecimal prefix ("0x" or "0X") and skip it + * if present. + */ + if (base == 16 && c == '0' && ((cc = dtrace_load8(pos + 1)) == 'x' || + cc == 'X') && isxdigit(ccc = dtrace_load8(pos + 2))) { + pos += 2; + c = ccc; + } + + /* + * Read in contiguous digits until the first non-digit character. + */ + for (; pos < end && c != '\0' && lisalnum(c) && (x = DIGIT(c)) < base; + c = dtrace_load8(++pos)) + val = val * base + x; - return (dtrace_canload((uintptr_t)src, sz, mstate, vstate)); + return (neg ? -val : val); } + /* * Compare two strings using safe loads. */ static int -dtrace_strncmp(char *s1, char *s2, size_t limit) +dtrace_strncmp(const char *s1, const char *s2, size_t limit) { uint8_t c1, c2; volatile uint16_t *flags; @@ -1198,15 +1465,15 @@ dtrace_strcpy(const void *src, void *dst, size_t len) * specified type; we assume that we can store to directly. */ static void -dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) +dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) { ASSERT(type->dtdt_flags & DIF_TF_BYREF); if (type->dtdt_kind == DIF_TYPE_STRING) { - dtrace_strcpy(src, dst, type->dtdt_size); + dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); } else { - dtrace_bcopy(src, dst, type->dtdt_size); -} + dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); + } } /* @@ -1457,7 +1724,7 @@ dtrace_priv_proc(dtrace_state_t *state) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; - if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc())) goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) @@ -1489,7 +1756,7 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state) static int dtrace_priv_kernel(dtrace_state_t *state) { - if (dtrace_is_restricted()) + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) @@ -2277,6 +2544,7 @@ dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) * failure; if there is no space in the aggregation buffer, the data will be * dropped, and a corresponding counter incremented. */ +__attribute__((noinline)) static void dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) @@ -2639,7 +2907,7 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, new = DTRACESPEC_COMMITTING; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DTRACESPEC_ACTIVEMANY: new = DTRACESPEC_COMMITTINGMANY; @@ -2749,6 +3017,7 @@ out: * do nothing. The state of the specified speculation is transitioned * according to the state transition diagram outlined in */ +__attribute__((noinline)) static void dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) @@ -2912,6 +3181,7 @@ dtrace_speculation_clean(dtrace_state_t *state) * the active CPU is not the specified CPU -- the speculation will be * atomically transitioned into the ACTIVEMANY state. */ +__attribute__((noinline)) static dtrace_buffer_t * dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, dtrace_specid_t which) @@ -3043,10 +3313,8 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { - /* - * APPLE NOTE: Account for introduction of __dtrace_probe() - */ - int aframes = mstate->dtms_probe->dtpr_aframes + 3; + int aframes = mstate->dtms_probe->dtpr_aframes + 2; + dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *pv; uint64_t val; @@ -3061,7 +3329,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } else - val = dtrace_getarg(ndx, aframes); + val = dtrace_getarg(ndx, aframes, mstate, vstate); /* * This is regrettably required to keep the compiler @@ -3095,6 +3363,20 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(find_user_regs(thread), ndx)); } + case DIF_VAR_VMREGS: { + uint64_t rval; + + if (!dtrace_priv_kernel(state)) + return (0); + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + + rval = dtrace_getvmreg(ndx); + + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + return (rval); + } case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) @@ -3127,6 +3409,17 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_machtimestamp); + case DIF_VAR_MACHCTIMESTAMP: + if (!(mstate->dtms_present & DTRACE_MSTATE_MACHCTIMESTAMP)) { + mstate->dtms_machctimestamp = mach_continuous_time(); + mstate->dtms_present |= DTRACE_MSTATE_MACHCTIMESTAMP; + } + return (mstate->dtms_machctimestamp); + + + case DIF_VAR_CPU: + return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread())); + case DIF_VAR_IPL: if (!dtrace_priv_kernel(state)) return (0); @@ -3148,10 +3441,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { - /* - * APPLE NOTE: Account for introduction of __dtrace_probe() - */ - int aframes = mstate->dtms_probe->dtpr_aframes + 3; + int aframes = mstate->dtms_probe->dtpr_aframes + 2; mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; @@ -3182,10 +3472,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { - /* - * APPLE NOTE: Account for introduction of __dtrace_probe() - */ - int aframes = mstate->dtms_probe->dtpr_aframes + 3; + int aframes = mstate->dtms_probe->dtpr_aframes + 2; if (!DTRACE_ANCHORED(mstate->dtms_probe)) { /* @@ -3312,7 +3599,8 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_EXECNAME: { char *xname = (char *)mstate->dtms_scratch_ptr; - size_t scratch_size = MAXCOMLEN+1; + char *pname = proc_best_name(curproc); + size_t scratch_size = sizeof(proc_name_t); /* The scratch allocation's lifetime is that of the clause. */ if (!DTRACE_INSCRATCH(mstate, scratch_size)) { @@ -3324,34 +3612,54 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); mstate->dtms_scratch_ptr += scratch_size; - proc_selfname( xname, MAXCOMLEN ); + strlcpy(xname, pname, scratch_size); return ((uint64_t)(uintptr_t)xname); } case DIF_VAR_ZONENAME: - { - /* scratch_size is equal to length('global') + 1 for the null-terminator. */ - char *zname = (char *)mstate->dtms_scratch_ptr; - size_t scratch_size = 6 + 1; + { + /* scratch_size is equal to length('global') + 1 for the null-terminator. */ + char *zname = (char *)mstate->dtms_scratch_ptr; + size_t scratch_size = 6 + 1; if (!dtrace_priv_proc(state)) return (0); - /* The scratch allocation's lifetime is that of the clause. */ - if (!DTRACE_INSCRATCH(mstate, scratch_size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - return 0; - } + /* The scratch allocation's lifetime is that of the clause. */ + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return 0; + } + + mstate->dtms_scratch_ptr += scratch_size; + + /* The kernel does not provide zonename, it will always return 'global'. */ + strlcpy(zname, "global", scratch_size); + + return ((uint64_t)(uintptr_t)zname); + } + +#if MONOTONIC + case DIF_VAR_CPUINSTRS: + return mt_cur_cpu_instrs(); - mstate->dtms_scratch_ptr += scratch_size; + case DIF_VAR_CPUCYCLES: + return mt_cur_cpu_cycles(); - /* The kernel does not provide zonename, it will always return 'global'. */ - strlcpy(zname, "global", scratch_size); + case DIF_VAR_VINSTRS: + return mt_cur_thread_instrs(); - return ((uint64_t)(uintptr_t)zname); - } + case DIF_VAR_VCYCLES: + return mt_cur_thread_cycles(); +#else /* MONOTONIC */ + case DIF_VAR_CPUINSTRS: /* FALLTHROUGH */ + case DIF_VAR_CPUCYCLES: /* FALLTHROUGH */ + case DIF_VAR_VINSTRS: /* FALLTHROUGH */ + case DIF_VAR_VCYCLES: /* FALLTHROUGH */ + return 0; +#endif /* !MONOTONIC */ case DIF_VAR_UID: if (!dtrace_priv_proc_relaxed(state)) @@ -3409,40 +3717,493 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } } +typedef enum dtrace_json_state { + DTRACE_JSON_REST = 1, + DTRACE_JSON_OBJECT, + DTRACE_JSON_STRING, + DTRACE_JSON_STRING_ESCAPE, + DTRACE_JSON_STRING_ESCAPE_UNICODE, + DTRACE_JSON_COLON, + DTRACE_JSON_COMMA, + DTRACE_JSON_VALUE, + DTRACE_JSON_IDENTIFIER, + DTRACE_JSON_NUMBER, + DTRACE_JSON_NUMBER_FRAC, + DTRACE_JSON_NUMBER_EXP, + DTRACE_JSON_COLLECT_OBJECT +} dtrace_json_state_t; + /* - * Emulate the execution of DTrace ID subroutines invoked by the call opcode. - * Notice that we don't bother validating the proper number of arguments or - * their types in the tuple stack. This isn't needed because all argument - * interpretation is safe because of our load safety -- the worst that can - * happen is that a bogus program can obtain bogus results. + * This function possesses just enough knowledge about JSON to extract a single + * value from a JSON string and store it in the scratch buffer. It is able + * to extract nested object values, and members of arrays by index. + * + * elemlist is a list of JSON keys, stored as packed NUL-terminated strings, to + * be looked up as we descend into the object tree. e.g. + * + * foo[0].bar.baz[32] --> "foo" NUL "0" NUL "bar" NUL "baz" NUL "32" NUL + * with nelems = 5. + * + * The run time of this function must be bounded above by strsize to limit the + * amount of work done in probe context. As such, it is implemented as a + * simple state machine, reading one character at a time using safe loads + * until we find the requested element, hit a parsing error or run off the + * end of the object or string. + * + * As there is no way for a subroutine to return an error without interrupting + * clause execution, we simply return NULL in the event of a missing key or any + * other error condition. Each NULL return in this function is commented with + * the error condition it represents -- parsing or otherwise. + * + * The set of states for the state machine closely matches the JSON + * specification (http://json.org/). Briefly: + * + * DTRACE_JSON_REST: + * Skip whitespace until we find either a top-level Object, moving + * to DTRACE_JSON_OBJECT; or an Array, moving to DTRACE_JSON_VALUE. + * + * DTRACE_JSON_OBJECT: + * Locate the next key String in an Object. Sets a flag to denote + * the next String as a key string and moves to DTRACE_JSON_STRING. + * + * DTRACE_JSON_COLON: + * Skip whitespace until we find the colon that separates key Strings + * from their values. Once found, move to DTRACE_JSON_VALUE. + * + * DTRACE_JSON_VALUE: + * Detects the type of the next value (String, Number, Identifier, Object + * or Array) and routes to the states that process that type. Here we also + * deal with the element selector list if we are requested to traverse down + * into the object tree. + * + * DTRACE_JSON_COMMA: + * Skip whitespace until we find the comma that separates key-value pairs + * in Objects (returning to DTRACE_JSON_OBJECT) or values in Arrays + * (similarly DTRACE_JSON_VALUE). All following literal value processing + * states return to this state at the end of their value, unless otherwise + * noted. + * + * DTRACE_JSON_NUMBER, DTRACE_JSON_NUMBER_FRAC, DTRACE_JSON_NUMBER_EXP: + * Processes a Number literal from the JSON, including any exponent + * component that may be present. Numbers are returned as strings, which + * may be passed to strtoll() if an integer is required. + * + * DTRACE_JSON_IDENTIFIER: + * Processes a "true", "false" or "null" literal in the JSON. + * + * DTRACE_JSON_STRING, DTRACE_JSON_STRING_ESCAPE, + * DTRACE_JSON_STRING_ESCAPE_UNICODE: + * Processes a String literal from the JSON, whether the String denotes + * a key, a value or part of a larger Object. Handles all escape sequences + * present in the specification, including four-digit unicode characters, + * but merely includes the escape sequence without converting it to the + * actual escaped character. If the String is flagged as a key, we + * move to DTRACE_JSON_COLON rather than DTRACE_JSON_COMMA. + * + * DTRACE_JSON_COLLECT_OBJECT: + * This state collects an entire Object (or Array), correctly handling + * embedded strings. If the full element selector list matches this nested + * object, we return the Object in full as a string. If not, we use this + * state to skip to the next value at this level and continue processing. */ -static void -dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, - dtrace_key_t *tupregs, int nargs, - dtrace_mstate_t *mstate, dtrace_state_t *state) +static char * +dtrace_json(uint64_t size, uintptr_t json, char *elemlist, int nelems, + char *dest) { - volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; - volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; - dtrace_vstate_t *vstate = &state->dts_vstate; + dtrace_json_state_t state = DTRACE_JSON_REST; + int64_t array_elem = INT64_MIN; + int64_t array_pos = 0; + uint8_t escape_unicount = 0; + boolean_t string_is_key = B_FALSE; + boolean_t collect_object = B_FALSE; + boolean_t found_key = B_FALSE; + boolean_t in_array = B_FALSE; + uint32_t braces = 0, brackets = 0; + char *elem = elemlist; + char *dd = dest; + uintptr_t cur; + + for (cur = json; cur < json + size; cur++) { + char cc = dtrace_load8(cur); + if (cc == '\0') + return (NULL); -#if !defined(__APPLE__) - union { - mutex_impl_t mi; - uint64_t mx; - } m; + switch (state) { + case DTRACE_JSON_REST: + if (isspace(cc)) + break; - union { - krwlock_t ri; - uintptr_t rw; - } r; -#else -/* FIXME: awaits lock/mutex work */ -#endif /* __APPLE__ */ + if (cc == '{') { + state = DTRACE_JSON_OBJECT; + break; + } - switch (subr) { - case DIF_SUBR_RAND: - regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; - break; + if (cc == '[') { + in_array = B_TRUE; + array_pos = 0; + array_elem = dtrace_strtoll(elem, 10, size); + found_key = array_elem == 0 ? B_TRUE : B_FALSE; + state = DTRACE_JSON_VALUE; + break; + } + + /* + * ERROR: expected to find a top-level object or array. + */ + return (NULL); + case DTRACE_JSON_OBJECT: + if (isspace(cc)) + break; + + if (cc == '"') { + state = DTRACE_JSON_STRING; + string_is_key = B_TRUE; + break; + } + + /* + * ERROR: either the object did not start with a key + * string, or we've run off the end of the object + * without finding the requested key. + */ + return (NULL); + case DTRACE_JSON_STRING: + if (cc == '\\') { + *dd++ = '\\'; + state = DTRACE_JSON_STRING_ESCAPE; + break; + } + + if (cc == '"') { + if (collect_object) { + /* + * We don't reset the dest here, as + * the string is part of a larger + * object being collected. + */ + *dd++ = cc; + collect_object = B_FALSE; + state = DTRACE_JSON_COLLECT_OBJECT; + break; + } + *dd = '\0'; + dd = dest; /* reset string buffer */ + if (string_is_key) { + if (dtrace_strncmp(dest, elem, + size) == 0) + found_key = B_TRUE; + } else if (found_key) { + if (nelems > 1) { + /* + * We expected an object, not + * this string. + */ + return (NULL); + } + return (dest); + } + state = string_is_key ? DTRACE_JSON_COLON : + DTRACE_JSON_COMMA; + string_is_key = B_FALSE; + break; + } + + *dd++ = cc; + break; + case DTRACE_JSON_STRING_ESCAPE: + *dd++ = cc; + if (cc == 'u') { + escape_unicount = 0; + state = DTRACE_JSON_STRING_ESCAPE_UNICODE; + } else { + state = DTRACE_JSON_STRING; + } + break; + case DTRACE_JSON_STRING_ESCAPE_UNICODE: + if (!isxdigit(cc)) { + /* + * ERROR: invalid unicode escape, expected + * four valid hexidecimal digits. + */ + return (NULL); + } + + *dd++ = cc; + if (++escape_unicount == 4) + state = DTRACE_JSON_STRING; + break; + case DTRACE_JSON_COLON: + if (isspace(cc)) + break; + + if (cc == ':') { + state = DTRACE_JSON_VALUE; + break; + } + + /* + * ERROR: expected a colon. + */ + return (NULL); + case DTRACE_JSON_COMMA: + if (isspace(cc)) + break; + + if (cc == ',') { + if (in_array) { + state = DTRACE_JSON_VALUE; + if (++array_pos == array_elem) + found_key = B_TRUE; + } else { + state = DTRACE_JSON_OBJECT; + } + break; + } + + /* + * ERROR: either we hit an unexpected character, or + * we reached the end of the object or array without + * finding the requested key. + */ + return (NULL); + case DTRACE_JSON_IDENTIFIER: + if (islower(cc)) { + *dd++ = cc; + break; + } + + *dd = '\0'; + dd = dest; /* reset string buffer */ + + if (dtrace_strncmp(dest, "true", 5) == 0 || + dtrace_strncmp(dest, "false", 6) == 0 || + dtrace_strncmp(dest, "null", 5) == 0) { + if (found_key) { + if (nelems > 1) { + /* + * ERROR: We expected an object, + * not this identifier. + */ + return (NULL); + } + return (dest); + } else { + cur--; + state = DTRACE_JSON_COMMA; + break; + } + } + + /* + * ERROR: we did not recognise the identifier as one + * of those in the JSON specification. + */ + return (NULL); + case DTRACE_JSON_NUMBER: + if (cc == '.') { + *dd++ = cc; + state = DTRACE_JSON_NUMBER_FRAC; + break; + } + + if (cc == 'x' || cc == 'X') { + /* + * ERROR: specification explicitly excludes + * hexidecimal or octal numbers. + */ + return (NULL); + } + + OS_FALLTHROUGH; + case DTRACE_JSON_NUMBER_FRAC: + if (cc == 'e' || cc == 'E') { + *dd++ = cc; + state = DTRACE_JSON_NUMBER_EXP; + break; + } + + if (cc == '+' || cc == '-') { + /* + * ERROR: expect sign as part of exponent only. + */ + return (NULL); + } + OS_FALLTHROUGH; + case DTRACE_JSON_NUMBER_EXP: + if (isdigit(cc) || cc == '+' || cc == '-') { + *dd++ = cc; + break; + } + + *dd = '\0'; + dd = dest; /* reset string buffer */ + if (found_key) { + if (nelems > 1) { + /* + * ERROR: We expected an object, not + * this number. + */ + return (NULL); + } + return (dest); + } + + cur--; + state = DTRACE_JSON_COMMA; + break; + case DTRACE_JSON_VALUE: + if (isspace(cc)) + break; + + if (cc == '{' || cc == '[') { + if (nelems > 1 && found_key) { + in_array = cc == '[' ? B_TRUE : B_FALSE; + /* + * If our element selector directs us + * to descend into this nested object, + * then move to the next selector + * element in the list and restart the + * state machine. + */ + while (*elem != '\0') + elem++; + elem++; /* skip the inter-element NUL */ + nelems--; + dd = dest; + if (in_array) { + state = DTRACE_JSON_VALUE; + array_pos = 0; + array_elem = dtrace_strtoll( + elem, 10, size); + found_key = array_elem == 0 ? + B_TRUE : B_FALSE; + } else { + found_key = B_FALSE; + state = DTRACE_JSON_OBJECT; + } + break; + } + + /* + * Otherwise, we wish to either skip this + * nested object or return it in full. + */ + if (cc == '[') + brackets = 1; + else + braces = 1; + *dd++ = cc; + state = DTRACE_JSON_COLLECT_OBJECT; + break; + } + + if (cc == '"') { + state = DTRACE_JSON_STRING; + break; + } + + if (islower(cc)) { + /* + * Here we deal with true, false and null. + */ + *dd++ = cc; + state = DTRACE_JSON_IDENTIFIER; + break; + } + + if (cc == '-' || isdigit(cc)) { + *dd++ = cc; + state = DTRACE_JSON_NUMBER; + break; + } + + /* + * ERROR: unexpected character at start of value. + */ + return (NULL); + case DTRACE_JSON_COLLECT_OBJECT: + if (cc == '\0') + /* + * ERROR: unexpected end of input. + */ + return (NULL); + + *dd++ = cc; + if (cc == '"') { + collect_object = B_TRUE; + state = DTRACE_JSON_STRING; + break; + } + + if (cc == ']') { + if (brackets-- == 0) { + /* + * ERROR: unbalanced brackets. + */ + return (NULL); + } + } else if (cc == '}') { + if (braces-- == 0) { + /* + * ERROR: unbalanced braces. + */ + return (NULL); + } + } else if (cc == '{') { + braces++; + } else if (cc == '[') { + brackets++; + } + + if (brackets == 0 && braces == 0) { + if (found_key) { + *dd = '\0'; + return (dest); + } + dd = dest; /* reset string buffer */ + state = DTRACE_JSON_COMMA; + } + break; + } + } + return (NULL); +} + +/* + * Emulate the execution of DTrace ID subroutines invoked by the call opcode. + * Notice that we don't bother validating the proper number of arguments or + * their types in the tuple stack. This isn't needed because all argument + * interpretation is safe because of our load safety -- the worst that can + * happen is that a bogus program can obtain bogus results. + */ +static void +dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, + dtrace_key_t *tupregs, int nargs, + dtrace_mstate_t *mstate, dtrace_state_t *state) +{ + volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; + dtrace_vstate_t *vstate = &state->dts_vstate; + +#if !defined(__APPLE__) + union { + mutex_impl_t mi; + uint64_t mx; + } m; + + union { + krwlock_t ri; + uintptr_t rw; + } r; +#else +/* FIXME: awaits lock/mutex work */ +#endif /* __APPLE__ */ + + switch (subr) { + case DIF_SUBR_RAND: + regs[rd] = dtrace_xoroshiro128_plus_next( + state->dts_rstate[CPU->cpu_id]); + break; #if !defined(__APPLE__) case DIF_SUBR_MUTEX_OWNED: @@ -3566,6 +4327,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; + /* + * Check whether the user can access kernel memory + */ + if (dtrace_priv_kernel(state) == 0) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + regs[rd] = 0; + break; + } /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the @@ -3675,6 +4444,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; /* Can't climb process tree any further. */ p = (struct proc *)dtrace_loadptr((uintptr_t)&(p->p_pptr)); +#if __has_feature(ptrauth_calls) + p = ptrauth_strip(p, ptrauth_key_process_independent_data); +#endif if (*flags & CPU_DTRACE_FAULT) break; } @@ -3695,7 +4467,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_canload(kaddr, size, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); @@ -3707,29 +4480,30 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t kaddr = tupregs[0].dttk_value; user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; + size_t lim; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size, flags); + dtrace_copyoutstr(kaddr, uaddr, lim, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } case DIF_SUBR_STRLEN: { - size_t sz; + size_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; - sz = dtrace_strlen((char *)addr, - state->dts_options[DTRACEOPT_STRSIZE]); + size_t lim; - if (!dtrace_canload(addr, sz + 1, mstate, vstate)) { + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } - regs[rd] = sz; + regs[rd] = dtrace_strlen((char *)addr, lim); break; } @@ -3743,12 +4517,19 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * is DIF_SUBR_STRRCHR, we will look for the last occurrence * of the specified character instead of the first. */ - uintptr_t saddr = tupregs[0].dttk_value; uintptr_t addr = tupregs[0].dttk_value; - uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t addr_limit; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t lim; char c, target = (char)tupregs[1].dttk_value; - for (regs[rd] = 0; addr < limit; addr++) { + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; + } + addr_limit = addr + lim; + + for (regs[rd] = 0; addr < addr_limit; addr++) { if ((c = dtrace_load8(addr)) == target) { regs[rd] = addr; @@ -3760,11 +4541,6 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - if (!dtrace_canload(saddr, addr - saddr, mstate, vstate)) { - regs[rd] = 0; - break; - } - break; } @@ -3922,7 +4698,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t addr = tupregs[0].dttk_value; uintptr_t tokaddr = tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - uintptr_t limit, toklimit = tokaddr + size; + uintptr_t limit, toklimit; + size_t clim; char *dest = (char *)mstate->dtms_scratch_ptr; uint8_t c='\0', tokmap[32]; /* 256 / 8 */ uint64_t i = 0; @@ -3931,10 +4708,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * Check both the token buffer and (later) the input buffer, * since both could be non-scratch addresses. */ - if (!dtrace_strcanload(tokaddr, size, mstate, vstate)) { + if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) { regs[rd] = 0; break; } + toklimit = tokaddr + clim; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -3951,6 +4729,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * it behaves like an implicit clause-local variable. */ addr = mstate->dtms_strtok; + limit = mstate->dtms_strtok_limit; } else { /* * If the user-specified address is non-NULL we must @@ -3960,10 +4739,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * (when we fetch addr from mstate->dtms_strtok) * would fail this access check. */ - if (!dtrace_strcanload(addr, size, mstate, vstate)) { + if (!dtrace_strcanload(addr, size, &clim, mstate, + vstate)) { regs[rd] = 0; break; } + limit = addr + clim; } /* @@ -3982,10 +4763,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tokmap[c >> 3] |= (1 << (c & 0x7)); } - for (limit = addr + size; addr < limit; addr++) { + for (; addr < limit; addr++) { /* - * We're looking for a character that is _not_ contained - * in the token string. + * We're looking for a character that is _not_ + * contained in the token string. */ if ((c = dtrace_load8(addr)) == '\0') break; @@ -4003,6 +4784,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, */ regs[rd] = 0; mstate->dtms_strtok = 0; + mstate->dtms_strtok_limit = 0; break; } @@ -4025,6 +4807,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; mstate->dtms_strtok = addr; + mstate->dtms_strtok_limit = limit; break; } @@ -4100,10 +4883,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; - uint64_t i = 0; + uint64_t i = 0, j = 0; + size_t lim1, lim2; + char c; - if (!dtrace_strcanload(s1, size, mstate, vstate) || - !dtrace_strcanload(s2, size, mstate, vstate)) { + if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) || + !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) { regs[rd] = 0; break; } @@ -4120,8 +4905,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s1++)) == '\0') { + c = (i >= lim1) ? '\0' : dtrace_load8(s1++); + if ((d[i++] = c) == '\0') { i--; break; } @@ -4133,8 +4918,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s2++)) == '\0') + c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++); + if ((d[i++] = c) == '\0') break; } @@ -4146,11 +4931,45 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + case DIF_SUBR_STRTOLL: { + uintptr_t s = tupregs[0].dttk_value; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t lim; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } + + if (!dtrace_strcanload(s, size, &lim, mstate, vstate)) { + regs[rd] = INT64_MIN; + break; + } + + regs[rd] = dtrace_strtoll((char *)s, base, lim); + break; + } + case DIF_SUBR_LLTOSTR: { int64_t i = (int64_t)tupregs[0].dttk_value; - int64_t val = i < 0 ? i * -1 : i; - uint64_t size = 22; /* enough room for 2^64 in decimal */ + uint64_t val, digit; + uint64_t size = 65; /* enough room for 2^64 in binary */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; + int base = 10; + + if (nargs > 1) { + if ((base = tupregs[1].dttk_value) <= 1 || + base > ('z' - 'a' + 1) + ('9' - '0' + 1)) { + *flags |= CPU_DTRACE_ILLOP; + break; + } + } + + val = (base == 10 && i < 0) ? i * -1 : i; if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); @@ -4158,13 +4977,24 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - for (*end-- = '\0'; val; val /= 10) - *end-- = '0' + (val % 10); + for (*end-- = '\0'; val; val /= base) { + if ((digit = val % base) <= '9' - '0') { + *end-- = '0' + digit; + } else { + *end-- = 'a' + (digit - ('9' - '0') - 1); + } + } + + if (i == 0 && base == 16) + *end-- = '0'; + + if (base == 16) + *end-- = 'x'; - if (i == 0) + if (i == 0 || base == 8 || base == 16) *end-- = '0'; - if (i < 0) + if (i < 0 && base == 10) *end-- = '-'; regs[rd] = (uintptr_t)end + 1; @@ -4337,9 +5167,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, char *dest = (char *)mstate->dtms_scratch_ptr, c; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; - int i = 0, j = 0; + size_t lim; + size_t i = 0, j = 0; - if (!dtrace_strcanload(src, size, mstate, vstate)) { + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { regs[rd] = 0; break; } @@ -4354,7 +5185,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * Move forward, loading each character. */ do { - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); next: if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */ break; @@ -4364,7 +5195,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4385,7 +5216,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4408,7 +5239,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c != '/' && c != '\0') { /* @@ -4470,6 +5301,12 @@ next: #if !defined(__APPLE__) ip4 = dtrace_load32(tupregs[argi].dttk_value); #else + if (!dtrace_canload(tupregs[argi].dttk_value, sizeof(ip4), + mstate, vstate)) { + regs[rd] = 0; + break; + } + dtrace_bcopy( (void *)(uintptr_t)tupregs[argi].dttk_value, (void *)(uintptr_t)&ip4, sizeof (ip4)); @@ -4530,6 +5367,12 @@ next: * just the IPv4 string is returned for inet_ntoa6. */ + if (!dtrace_canload(tupregs[argi].dttk_value, + sizeof(struct in6_addr), mstate, vstate)) { + regs[rd] = 0; + break; + } + /* * Safely load the IPv6 address. */ @@ -4666,6 +5509,65 @@ inetout: regs[rd] = (uintptr_t)end + 1; break; } + case DIF_SUBR_JSON: { + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t json = tupregs[0].dttk_value; + size_t jsonlen = dtrace_strlen((char *)json, size); + uintptr_t elem = tupregs[1].dttk_value; + size_t elemlen = dtrace_strlen((char *)elem, size); + + char *dest = (char *)mstate->dtms_scratch_ptr; + char *elemlist = (char *)mstate->dtms_scratch_ptr + jsonlen + 1; + char *ee = elemlist; + int nelems = 1; + uintptr_t cur; + + if (!dtrace_canload(json, jsonlen + 1, mstate, vstate) || + !dtrace_canload(elem, elemlen + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, jsonlen + 1 + elemlen + 1)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + + /* + * Read the element selector and split it up into a packed list + * of strings. + */ + for (cur = elem; cur < elem + elemlen; cur++) { + char cc = dtrace_load8(cur); + + if (cur == elem && cc == '[') { + /* + * If the first element selector key is + * actually an array index then ignore the + * bracket. + */ + continue; + } + + if (cc == ']') + continue; + + if (cc == '.' || cc == '[') { + nelems++; + cc = '\0'; + } + + *ee++ = cc; + } + *ee++ = '\0'; + + if ((regs[rd] = (uintptr_t)dtrace_json(size, json, elemlist, + nelems, dest)) != 0) + mstate->dtms_scratch_ptr += jsonlen + 1; + break; + } + case DIF_SUBR_TOUPPER: case DIF_SUBR_TOLOWER: { uintptr_t src = tupregs[0].dttk_value; @@ -4707,38 +5609,124 @@ inetout: regs[rd] = (uintptr_t)end + 1; break; } -/* - * APPLE NOTE: - * CoreProfile callback ('core_profile (uint64_t, [uint64_t], [uint64_t] ...)') - */ - case DIF_SUBR_COREPROFILE: { - uint64_t selector = tupregs[0].dttk_value; - uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; - uint32_t ii; - uint32_t count = (uint32_t)nargs; - - if (count < 1) { - regs[rd] = KERN_FAILURE; - break; + case DIF_SUBR_STRIP: + if (!dtrace_is_valid_ptrauth_key(tupregs[1].dttk_value)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + break; } - - if(count > DIF_DTR_NREGS) - count = DIF_DTR_NREGS; + regs[rd] = (uint64_t)dtrace_ptrauth_strip( + (void*)tupregs[0].dttk_value, tupregs[1].dttk_value); + break; + +#if defined(__APPLE__) + case DIF_SUBR_VM_KERNEL_ADDRPERM: { + if (!dtrace_priv_kernel(state)) { + regs[rd] = 0; + } else { + regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value); + } + + break; + } - /* copy in any variadic argument list, bounded by DIF_DTR_NREGS */ - for(ii = 0; ii < count-1; ii++) { - args[ii] = tupregs[ii+1].dttk_value; + case DIF_SUBR_KDEBUG_TRACE: { + uint32_t debugid; + uintptr_t args[4] = {0}; + int i; + + if (nargs < 2 || nargs > 5) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + break; } - kern_return_t ret = - chudxnu_dtrace_callback(selector, args, count-1); - if(KERN_SUCCESS != ret) { - /* error */ + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; } - regs[rd] = ret; + debugid = tupregs[0].dttk_value; + for (i = 0; i < nargs - 1; i++) + args[i] = tupregs[i + 1].dttk_value; + + kernel_debug(debugid, args[0], args[1], args[2], args[3], 0); + break; } + + case DIF_SUBR_KDEBUG_TRACE_STRING: { + if (nargs != 3) { + break; + } + + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; + } + + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uint32_t debugid = tupregs[0].dttk_value; + uint64_t str_id = tupregs[1].dttk_value; + uintptr_t src = tupregs[2].dttk_value; + size_t lim; + char buf[size]; + char* str = NULL; + + if (src != (uintptr_t)0) { + str = buf; + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { + break; + } + dtrace_strcpy((void*)src, buf, size); + } + + (void)kernel_debug_string(debugid, &str_id, str); + regs[rd] = str_id; + + break; + } + + case DIF_SUBR_MTONS: + absolutetime_to_nanoseconds(tupregs[0].dttk_value, ®s[rd]); + + break; + case DIF_SUBR_PHYSMEM_READ: { +#if DEBUG || DEVELOPMENT + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; + } + regs[rd] = dtrace_physmem_read(tupregs[0].dttk_value, + tupregs[1].dttk_value); +#else + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); +#endif /* DEBUG || DEVELOPMENT */ + break; + } + case DIF_SUBR_PHYSMEM_WRITE: { +#if DEBUG || DEVELOPMENT + if (dtrace_destructive_disallow || + !dtrace_priv_kernel_destructive(state)) { + return; + } + + dtrace_physmem_write(tupregs[0].dttk_value, + tupregs[1].dttk_value, (size_t)tupregs[2].dttk_value); +#else + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); +#endif /* DEBUG || DEVELOPMENT */ + break; + } + + case DIF_SUBR_KVTOPHYS: { +#if DEBUG || DEVELOPMENT + regs[rd] = kvtophys(tupregs[0].dttk_value); +#else + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); +#endif /* DEBUG || DEVELOPMENT */ + break; + } +#endif /* defined(__APPLE__) */ + } } @@ -4919,7 +5907,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDSB: regs[rd] = (int8_t)dtrace_load8(regs[r1]); break; @@ -4929,7 +5917,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDSH: regs[rd] = (int16_t)dtrace_load16(regs[r1]); break; @@ -4939,7 +5927,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDSW: regs[rd] = (int32_t)dtrace_load32(regs[r1]); break; @@ -4949,7 +5937,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDUB: regs[rd] = dtrace_load8(regs[r1]); break; @@ -4959,7 +5947,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDUH: regs[rd] = dtrace_load16(regs[r1]); break; @@ -4969,7 +5957,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDUW: regs[rd] = dtrace_load32(regs[r1]); break; @@ -4979,7 +5967,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[r1]; break; } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); break; @@ -5034,15 +6022,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = regs[r1]; uintptr_t s2 = regs[r2]; + size_t lim1 = sz, lim2 = sz; if (s1 != 0 && - !dtrace_strcanload(s1, sz, mstate, vstate)) + !dtrace_strcanload(s1, sz, &lim1, mstate, vstate)) break; if (s2 != 0 && - !dtrace_strcanload(s2, sz, mstate, vstate)) + !dtrace_strcanload(s2, sz, &lim2, mstate, vstate)) break; - cc_r = dtrace_strncmp((char *)s1, (char *)s2, sz); + cc_r = dtrace_strncmp((char *)s1, (char *)s2, + MIN(lim1, lim2)); cc_n = cc_r < 0; cc_z = cc_r == 0; @@ -5094,12 +6084,14 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_nglobals); svar = vstate->dtvs_globals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; + size_t lim; ASSERT(a != 0); ASSERT(svar->dtsv_size != 0); @@ -5113,11 +6105,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, } if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, - mstate, vstate)) + &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -5184,7 +6176,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < (uint_t)vstate->dtvs_nlocals); + VERIFY(id < (uint_t)vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); @@ -5193,6 +6185,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t sz = v->dtdv_type.dtdt_size; + size_t lim; sz += sizeof (uint64_t); ASSERT(svar->dtsv_size == (int)NCPU * sz); @@ -5208,11 +6201,11 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, - mstate, vstate)) + &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -5261,6 +6254,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_value = (uint64_t)id; @@ -5285,13 +6279,15 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], - &v->dtdv_type, mstate, vstate)) + &v->dtdv_type, &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -5328,6 +6324,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[r2] ? regs[r2] : dtrace_strsize_default) + 1; } else { + if (regs[r2] > LONG_MAX) { + *flags |= CPU_DTRACE_ILLOP; + break; + } tupregs[ttop].dttk_size = regs[r2]; } @@ -5369,8 +6369,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } @@ -5409,8 +6411,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } @@ -5424,13 +6428,15 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + if (!dtrace_vcanload( (void *)(uintptr_t)regs[rd], &v->dtdv_type, - mstate, vstate)) + &lim, mstate, vstate)) break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -5530,6 +6536,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, } *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1]; break; + case DIF_OP_STRIP: + regs[rd] = (uint64_t)dtrace_ptrauth_strip( + (void*)regs[r1], r2); + break; } } @@ -5542,6 +6552,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, return (0); } +__attribute__((noinline)) static void dtrace_action_breakpoint(dtrace_ecb_t *ecb) { @@ -5604,6 +6615,7 @@ dtrace_action_breakpoint(dtrace_ecb_t *ecb) debug_enter(c); } +__attribute__((noinline)) static void dtrace_action_panic(dtrace_ecb_t *ecb) { @@ -5712,6 +6724,7 @@ dtrace_action_pidresume(uint64_t pid) } } +__attribute__((noinline)) static void dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) { @@ -5757,6 +6770,7 @@ dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) cpu->cpu_dtrace_chilled += val; } +__attribute__((noinline)) static void dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t *buf, uint64_t arg) @@ -5832,42 +6846,151 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, continue; } - if (sym == NULL) { - str[offs++] = '\0'; - continue; + if (sym == NULL) { + str[offs++] = '\0'; + continue; + } + + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + + /* + * Now copy in the string that the helper returned to us. + */ + for (j = 0; offs + j < strsize; j++) { + if ((str[offs + j] = sym[j]) == '\0') + break; + } + + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + + offs += j + 1; + } + + if (offs >= strsize) { + /* + * If we didn't have room for all of the strings, we don't + * abort processing -- this needn't be a fatal error -- but we + * still want to increment a counter (dts_stkstroverflows) to + * allow this condition to be warned about. (If this is from + * a jstack() action, it is easily tuned via jstackstrsize.) + */ + dtrace_error(&state->dts_stkstroverflows); + } + + while (offs < strsize) + str[offs++] = '\0'; + +out: + mstate->dtms_scratch_ptr = old; +} + +__attribute__((noinline)) +static void +dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, + size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) +{ + volatile uint16_t *flags; + uint64_t val = *valp; + size_t valoffs = *valoffsp; + + flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); + + /* + * If this is a string, we're going to only load until we find the zero + * byte -- after which we'll store zero bytes. + */ + if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { + char c = '\0' + 1; + size_t s; + + for (s = 0; s < size; s++) { + if (c != '\0' && dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, valoffs++, c); + + if (c == '\0' && intuple) + break; + } + } else { + uint8_t c; + while (valoffs < end) { + if (dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, + valoffs++, c); } + } - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + *valp = val; + *valoffsp = valoffs; +} - /* - * Now copy in the string that the helper returned to us. - */ - for (j = 0; offs + j < strsize; j++) { - if ((str[offs + j] = sym[j]) == '\0') - break; - } +/* + * Disables interrupts and sets the per-thread inprobe flag. When DEBUG is + * defined, we also assert that we are not recursing unless the probe ID is an + * error probe. + */ +static dtrace_icookie_t +dtrace_probe_enter(dtrace_id_t id) +{ + thread_t thread = current_thread(); + uint16_t inprobe; - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + dtrace_icookie_t cookie; - offs += j + 1; - } + cookie = dtrace_interrupt_disable(); - if (offs >= strsize) { - /* - * If we didn't have room for all of the strings, we don't - * abort processing -- this needn't be a fatal error -- but we - * still want to increment a counter (dts_stkstroverflows) to - * allow this condition to be warned about. (If this is from - * a jstack() action, it is easily tuned via jstackstrsize.) - */ - dtrace_error(&state->dts_stkstroverflows); - } + /* + * Unless this is an ERROR probe, we are not allowed to recurse in + * dtrace_probe(). Recursing into DTrace probe usually means that a + * function is instrumented that should not have been instrumented or + * that the ordering guarantee of the records will be violated, + * resulting in unexpected output. If there is an exception to this + * assertion, a new case should be added. + */ + inprobe = dtrace_get_thread_inprobe(thread); + VERIFY(inprobe == 0 || + id == dtrace_probeid_error); + ASSERT(inprobe < UINT16_MAX); + dtrace_set_thread_inprobe(thread, inprobe + 1); - while (offs < strsize) - str[offs++] = '\0'; + return (cookie); +} -out: - mstate->dtms_scratch_ptr = old; +/* + * Clears the per-thread inprobe flag and enables interrupts. + */ +static void +dtrace_probe_exit(dtrace_icookie_t cookie) +{ + thread_t thread = current_thread(); + uint16_t inprobe = dtrace_get_thread_inprobe(thread); + + ASSERT(inprobe > 0); + dtrace_set_thread_inprobe(thread, inprobe - 1); + +#if INTERRUPT_MASKED_DEBUG + ml_spin_debug_reset(thread); +#endif /* INTERRUPT_MASKED_DEBUG */ + + dtrace_interrupt_enable(cookie); } /* @@ -5875,8 +6998,8 @@ out: * is the function called by the provider to fire a probe -- from which all * subsequent probe-context DTrace activity emanates. */ -static void -__dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, +void +dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) { processorid_t cpuid; @@ -5891,8 +7014,20 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, volatile uint16_t *flags; hrtime_t now; - cookie = dtrace_interrupt_disable(); + cookie = dtrace_probe_enter(id); + + /* Ensure that probe id is valid. */ + if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { + dtrace_probe_exit(cookie); + return; + } + probe = dtrace_probes[id - 1]; + if (probe == NULL) { + dtrace_probe_exit(cookie); + return; + } + cpuid = CPU->cpu_id; onintr = CPU_ON_INTR(CPU); @@ -5902,7 +7037,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * We have hit in the predicate cache; we know that * this predicate would evaluate to be false. */ - dtrace_interrupt_enable(cookie); + dtrace_probe_exit(cookie); return; } @@ -5910,7 +7045,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, /* * We don't trace anything if we're panicking. */ - dtrace_interrupt_enable(cookie); + dtrace_probe_exit(cookie); return; } @@ -6057,6 +7192,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * not the case. */ if ((ecb->dte_cond & DTRACE_COND_USERMODE) && + prov->dtpv_pops.dtps_usermode && prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg) == 0) continue; @@ -6180,7 +7316,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; - int rval; + uint64_t rval; rval = dtrace_dif_emulate(dp, &mstate, vstate, state); @@ -6340,7 +7476,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tomax = buf->dtb_tomax; ASSERT(tomax != NULL); - if (ecb->dte_size != 0) + if (ecb->dte_size == 0) continue; ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t)); @@ -6463,7 +7599,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ASSERT(0); } - if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { + if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) { uintptr_t end = valoffs + size; if (tracememsize != 0 && @@ -6473,39 +7609,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tracememsize = 0; } - if (!dtrace_vcanload((void *)(uintptr_t)val, - &dp->dtdo_rtype, &mstate, vstate)) - continue; - - /* - * If this is a string, we're going to only - * load until we find the zero byte -- after - * which we'll store zero bytes. - */ - if (dp->dtdo_rtype.dtdt_kind == - DIF_TYPE_STRING) { - char c = '\0' + 1; - int intuple = act->dta_intuple; - size_t s; - - for (s = 0; s < size; s++) { - if (c != '\0') - c = dtrace_load8(val++); - - DTRACE_STORE(uint8_t, tomax, - valoffs++, c); - - if (c == '\0' && intuple) - break; - } - + if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && + !dtrace_vcanload((void *)(uintptr_t)val, + &dp->dtdo_rtype, NULL, &mstate, vstate)) + { continue; } - while (valoffs < end) { - DTRACE_STORE(uint8_t, tomax, valoffs++, - dtrace_load8(val++)); - } + dtrace_store_by_ref(dp, tomax, size, &valoffs, + &val, end, act->dta_intuple, + dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? + DIF_TF_BYREF: DIF_TF_BYUREF); continue; } @@ -6603,45 +7717,16 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, thread_t thread = current_thread(); int64_t t = dtrace_get_thread_tracing(thread); - if (t >= 0) { + if (t >= 0) { /* Usual case, accumulate time spent here into t_dtrace_tracing */ dtrace_set_thread_tracing(thread, t + (dtrace_gethrtime() - now)); - } else { + } else { /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */ - dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); + dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); } } - dtrace_interrupt_enable(cookie); -} - -/* - * APPLE NOTE: Don't allow a thread to re-enter dtrace_probe(). - * This could occur if a probe is encountered on some function in the - * transitive closure of the call to dtrace_probe(). - * Solaris has some strong guarantees that this won't happen. - * The Darwin implementation is not so mature as to make those guarantees. - * Hence, the introduction of __dtrace_probe() on xnu. - */ - -void -dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, - uint64_t arg2, uint64_t arg3, uint64_t arg4) -{ - thread_t thread = current_thread(); - disable_preemption(); - if (id == dtrace_probeid_error) { - __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); - dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */ - } else if (!dtrace_get_thread_reentering(thread)) { - dtrace_set_thread_reentering(thread, TRUE); - __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); - dtrace_set_thread_reentering(thread, FALSE); - } -#if DEBUG - else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN); -#endif - enable_preemption(); + dtrace_probe_exit(cookie); } /* @@ -6670,12 +7755,33 @@ dtrace_hash_str(const char *p) return (hval); } +static const char* +dtrace_strkey_probe_provider(void *elm, uintptr_t offs) +{ +#pragma unused(offs) + dtrace_probe_t *probe = (dtrace_probe_t*)elm; + return probe->dtpr_provider->dtpv_name; +} + +static const char* +dtrace_strkey_offset(void *elm, uintptr_t offs) +{ + return ((char *)((uintptr_t)(elm) + offs)); +} + +static const char* +dtrace_strkey_deref_offset(void *elm, uintptr_t offs) +{ + return *((char **)((uintptr_t)(elm) + offs)); +} + static dtrace_hash_t * -dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) +dtrace_hash_create(dtrace_strkey_f func, uintptr_t arg, uintptr_t nextoffs, uintptr_t prevoffs) { dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP); - hash->dth_stroffs = stroffs; + hash->dth_getstr = func; + hash->dth_stroffs = arg; hash->dth_nextoffs = nextoffs; hash->dth_prevoffs = prevoffs; @@ -6724,10 +7830,10 @@ dtrace_hash_resize(dtrace_hash_t *hash) for (i = 0; i < size; i++) { for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) { - dtrace_probe_t *probe = bucket->dthb_chain; + void *elm = bucket->dthb_chain; - ASSERT(probe != NULL); - ndx = DTRACE_HASHSTR(hash, probe) & new_mask; + ASSERT(elm != NULL); + ndx = DTRACE_HASHSTR(hash, elm) & new_mask; next = bucket->dthb_next; bucket->dthb_next = new_tab[ndx]; @@ -6742,12 +7848,12 @@ dtrace_hash_resize(dtrace_hash_t *hash) } static void -dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new) +dtrace_hash_add(dtrace_hash_t *hash, void *new) { int hashval = DTRACE_HASHSTR(hash, new); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; - dtrace_probe_t **nextp, **prevp; + void **nextp, **prevp; for (; bucket != NULL; bucket = bucket->dthb_next) { if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new)) @@ -6780,23 +7886,29 @@ add: bucket->dthb_len++; } -static dtrace_probe_t * -dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template) +static void * +dtrace_hash_lookup_string(dtrace_hash_t *hash, const char *str) { - int hashval = DTRACE_HASHSTR(hash, template); + int hashval = dtrace_hash_str(str); int ndx = hashval & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; for (; bucket != NULL; bucket = bucket->dthb_next) { - if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template)) + if (strcmp(str, DTRACE_GETSTR(hash, bucket->dthb_chain)) == 0) return (bucket->dthb_chain); } return (NULL); } +static dtrace_probe_t * +dtrace_hash_lookup(dtrace_hash_t *hash, void *template) +{ + return dtrace_hash_lookup_string(hash, DTRACE_GETSTR(hash, template)); +} + static int -dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) +dtrace_hash_collisions(dtrace_hash_t *hash, void *template) { int hashval = DTRACE_HASHSTR(hash, template); int ndx = hashval & hash->dth_mask; @@ -6811,19 +7923,19 @@ dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) } static void -dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) +dtrace_hash_remove(dtrace_hash_t *hash, void *elm) { - int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask; + int ndx = DTRACE_HASHSTR(hash, elm) & hash->dth_mask; dtrace_hashbucket_t *bucket = hash->dth_tab[ndx]; - dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe); - dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe); + void **prevp = DTRACE_HASHPREV(hash, elm); + void **nextp = DTRACE_HASHNEXT(hash, elm); /* - * Find the bucket that we're removing this probe from. + * Find the bucket that we're removing this elm from. */ for (; bucket != NULL; bucket = bucket->dthb_next) { - if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe)) + if (DTRACE_HASHEQ(hash, bucket->dthb_chain, elm)) break; } @@ -6832,12 +7944,12 @@ dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe) if (*prevp == NULL) { if (*nextp == NULL) { /* - * The removed probe was the only probe on this + * The removed element was the only element on this * bucket; we need to remove the bucket. */ dtrace_hashbucket_t *b = hash->dth_tab[ndx]; - ASSERT(bucket->dthb_chain == probe); + ASSERT(bucket->dthb_chain == elm); ASSERT(b != NULL); if (b == bucket) { @@ -6877,20 +7989,63 @@ dtrace_badattr(const dtrace_attribute_t *a) } /* - * Return a duplicate copy of a string. If the specified string is NULL, - * this function returns a zero-length string. - * APPLE NOTE: Darwin employs size bounded string operation. + * Returns a dtrace-managed copy of a string, and will + * deduplicate copies of the same string. + * If the specified string is NULL, returns an empty string */ static char * -dtrace_strdup(const char *str) +dtrace_strref(const char *str) { + dtrace_string_t *s = NULL; size_t bufsize = (str != NULL ? strlen(str) : 0) + 1; - char *new = kmem_zalloc(bufsize, KM_SLEEP); - if (str != NULL) - (void) strlcpy(new, str, bufsize); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - return (new); + if (str == NULL) + str = ""; + + for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL; + s = *(DTRACE_HASHNEXT(dtrace_strings, s))) { + if (strncmp(str, s->dtst_str, bufsize) != 0) { + continue; + } + ASSERT(s->dtst_refcount != UINT32_MAX); + s->dtst_refcount++; + return s->dtst_str; + } + + s = kmem_zalloc(sizeof(dtrace_string_t) + bufsize, KM_SLEEP); + s->dtst_refcount = 1; + (void) strlcpy(s->dtst_str, str, bufsize); + + dtrace_hash_add(dtrace_strings, s); + + return s->dtst_str; +} + +static void +dtrace_strunref(const char *str) +{ + ASSERT(str != NULL); + dtrace_string_t *s = NULL; + size_t bufsize = strlen(str) + 1; + + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL; + s = *(DTRACE_HASHNEXT(dtrace_strings, s))) { + if (strncmp(str, s->dtst_str, bufsize) != 0) { + continue; + } + ASSERT(s->dtst_refcount != 0); + s->dtst_refcount--; + if (s->dtst_refcount == 0) { + dtrace_hash_remove(dtrace_strings, s); + kmem_free(s, sizeof(dtrace_string_t) + bufsize); + } + return; + } + panic("attempt to unref non-existent string %s", str); } #define DTRACE_ISALPHA(c) \ @@ -6922,10 +8077,14 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) uint32_t priv; if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { - /* - * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. - */ - priv = DTRACE_PRIV_ALL; + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + priv = DTRACE_PRIV_USER | DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER; + } + else { + priv = DTRACE_PRIV_ALL; + } + *uidp = 0; + *zoneidp = 0; } else { *uidp = crgetuid(cr); *zoneidp = crgetzoneid(cr); @@ -7150,12 +8309,12 @@ top: case '\\': if ((c = *p++) == '\0') return (0); - /*FALLTHRU*/ + OS_FALLTHROUGH; default: if (c != s1) return (0); - /*FALLTHRU*/ + OS_FALLTHROUGH; case '?': if (s1 != '\0') @@ -7183,9 +8342,27 @@ static int dtrace_match_string(const char *s, const char *p, int depth) { #pragma unused(depth) /* __APPLE__ */ + return (s != NULL && s == p); +} - /* APPLE NOTE: Darwin employs size bounded string operation. */ - return (s != NULL && strncmp(s, p, strlen(s) + 1) == 0); +/*ARGSUSED*/ +static int +dtrace_match_module(const char *s, const char *p, int depth) +{ +#pragma unused(depth) /* __APPLE__ */ + size_t len; + if (s == NULL || p == NULL) + return (0); + + len = strlen(p); + + if (strncmp(p, s, len) != 0) + return (0); + + if (s[len] == '.' || s[len] == '\0') + return (1); + + return (0); } /*ARGSUSED*/ @@ -7206,14 +8383,25 @@ dtrace_match_nonzero(const char *s, const char *p, int depth) static int dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, - zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) + zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *, void *), void *arg1, void *arg2) { - dtrace_probe_t template, *probe; + dtrace_probe_t *probe; + dtrace_provider_t prov_template = { + .dtpv_name = (char *)(uintptr_t)pkp->dtpk_prov + }; + + dtrace_probe_t template = { + .dtpr_provider = &prov_template, + .dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod, + .dtpr_func = (char *)(uintptr_t)pkp->dtpk_func, + .dtpr_name = (char *)(uintptr_t)pkp->dtpk_name + }; + dtrace_hash_t *hash = NULL; int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); /* * If the probe ID is specified in the key, just lookup by ID and @@ -7222,23 +8410,26 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL) + if ((*matched)(probe, arg1, arg2) == DTRACE_MATCH_FAIL) return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); } - template.dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod; - template.dtpr_func = (char *)(uintptr_t)pkp->dtpk_func; - template.dtpr_name = (char *)(uintptr_t)pkp->dtpk_name; - /* - * We want to find the most distinct of the module name, function - * name, and name. So for each one that is not a glob pattern or - * empty string, we perform a lookup in the corresponding hash and - * use the hash table with the fewest collisions to do our search. + * We want to find the most distinct of the provider name, module name, + * function name, and name. So for each one that is not a glob + * pattern or empty string, we perform a lookup in the corresponding + * hash and use the hash table with the fewest collisions to do our + * search. */ + if (pkp->dtpk_pmatch == &dtrace_match_string && + (len = dtrace_hash_collisions(dtrace_byprov, &template)) < best) { + best = len; + hash = dtrace_byprov; + } + if (pkp->dtpk_mmatch == &dtrace_match_string && (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) { best = len; @@ -7270,7 +8461,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { if (rc == DTRACE_MATCH_FAIL) return (DTRACE_MATCH_FAIL); break; @@ -7293,7 +8484,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) { + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { if (rc == DTRACE_MATCH_FAIL) return (DTRACE_MATCH_FAIL); break; @@ -7325,6 +8516,24 @@ dtrace_probekey_func(const char *p) return (&dtrace_match_string); } +static dtrace_probekey_f * +dtrace_probekey_module_func(const char *p) +{ + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + dtrace_probekey_f *f = dtrace_probekey_func(p); + if (f == &dtrace_match_string) { + dtrace_probe_t template = { + .dtpr_mod = (char *)(uintptr_t)p, + }; + if (dtrace_hash_lookup(dtrace_bymod, &template) == NULL) { + return (&dtrace_match_module); + } + return (&dtrace_match_string); + } + return f; +} + /* * Build a probe comparison key for use with dtrace_match_probe() from the * given probe description. By convention, a null key only matches anchored @@ -7334,16 +8543,17 @@ dtrace_probekey_func(const char *p) static void dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) { - pkp->dtpk_prov = pdp->dtpd_provider; + + pkp->dtpk_prov = dtrace_strref(pdp->dtpd_provider); pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider); - pkp->dtpk_mod = pdp->dtpd_mod; - pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod); + pkp->dtpk_mod = dtrace_strref(pdp->dtpd_mod); + pkp->dtpk_mmatch = dtrace_probekey_module_func(pdp->dtpd_mod); - pkp->dtpk_func = pdp->dtpd_func; + pkp->dtpk_func = dtrace_strref(pdp->dtpd_func); pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func); - pkp->dtpk_name = pdp->dtpd_name; + pkp->dtpk_name = dtrace_strref(pdp->dtpd_name); pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name); pkp->dtpk_id = pdp->dtpd_id; @@ -7356,6 +8566,26 @@ dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) pkp->dtpk_fmatch = &dtrace_match_nonzero; } +static void +dtrace_probekey_release(dtrace_probekey_t *pkp) +{ + dtrace_strunref(pkp->dtpk_prov); + dtrace_strunref(pkp->dtpk_mod); + dtrace_strunref(pkp->dtpk_func); + dtrace_strunref(pkp->dtpk_name); +} + +static int +dtrace_cond_provider_match(dtrace_probedesc_t *desc, void *data) +{ + if (desc == NULL) + return 1; + + dtrace_probekey_f *func = dtrace_probekey_func(desc->dtpd_provider); + + return func((char*)data, desc->dtpd_provider, 0); +} + /* * DTrace Provider-to-Framework API Functions * @@ -7422,13 +8652,6 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); - /* APPLE NOTE: Darwin employs size bounded string operation. */ - { - size_t bufsize = strlen(name) + 1; - provider->dtpv_name = kmem_alloc(bufsize, KM_SLEEP); - (void) strlcpy(provider->dtpv_name, name, bufsize); - } - provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; if (cr != NULL) { @@ -7439,30 +8662,30 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, if (pops->dtps_provide == NULL) { ASSERT(pops->dtps_provide_module != NULL); - provider->dtpv_pops.dtps_provide = - (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop; + provider->dtpv_pops.dtps_provide = dtrace_provide_nullop; } if (pops->dtps_provide_module == NULL) { ASSERT(pops->dtps_provide != NULL); provider->dtpv_pops.dtps_provide_module = - (void (*)(void *, struct modctl *))dtrace_nullop; + dtrace_provide_module_nullop; } if (pops->dtps_suspend == NULL) { ASSERT(pops->dtps_resume == NULL); - provider->dtpv_pops.dtps_suspend = - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; - provider->dtpv_pops.dtps_resume = - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop; + provider->dtpv_pops.dtps_suspend = dtrace_suspend_nullop; + provider->dtpv_pops.dtps_resume = dtrace_resume_nullop; } provider->dtpv_arg = arg; *idp = (dtrace_provider_id_t)provider; if (pops == &dtrace_provider_ops) { - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + + provider->dtpv_name = dtrace_strref(name); + ASSERT(dtrace_anon.dta_enabling == NULL); /* @@ -7477,6 +8700,8 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); + provider->dtpv_name = dtrace_strref(name); + /* * If there is at least one provider registered, we'll add this * provider after the first provider. @@ -7492,13 +8717,16 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, dtrace_enabling_provide(provider); /* - * Now we need to call dtrace_enabling_matchall() -- which - * will acquire cpu_lock and dtrace_lock. We therefore need + * Now we need to call dtrace_enabling_matchall_with_cond() -- + * with a condition matching the provider name we just added, + * which will acquire cpu_lock and dtrace_lock. We therefore need * to drop all of our locks before calling into it... */ lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); - dtrace_enabling_matchall(); + + dtrace_match_cond_t cond = {dtrace_cond_provider_match, provider->dtpv_name}; + dtrace_enabling_matchall_with_cond(&cond); return (0); } @@ -7518,8 +8746,11 @@ dtrace_unregister(dtrace_provider_id_t id) { dtrace_provider_t *old = (dtrace_provider_t *)id; dtrace_provider_t *prev = NULL; - int i, self = 0; - dtrace_probe_t *probe, *first = NULL; + int self = 0; + dtrace_probe_t *probe, *first = NULL, *next = NULL; + dtrace_probe_t template = { + .dtpr_provider = old + }; if (old->dtpv_pops.dtps_enable == (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { @@ -7529,8 +8760,8 @@ dtrace_unregister(dtrace_provider_id_t id) */ ASSERT(old == dtrace_provider); ASSERT(dtrace_devi != NULL); - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); self = 1; if (dtrace_provider->dtpv_next != NULL) { @@ -7580,14 +8811,12 @@ dtrace_unregister(dtrace_provider_id_t id) * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ - for (i = 0; i < dtrace_nprobes && old->dtpv_probe_count!=0; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - + for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL; + probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) { if (probe->dtpr_provider != old) continue; - dtrace_probes[i] = NULL; + dtrace_probes[probe->dtpr_id - 1] = NULL; old->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); @@ -7598,11 +8827,19 @@ dtrace_unregister(dtrace_provider_id_t id) first = probe; probe->dtpr_nextmod = NULL; } else { + /* + * Use nextmod as the chain of probes to remove + */ probe->dtpr_nextmod = first; first = probe; } } + for (probe = first; probe != NULL; probe = next) { + next = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); + } + /* * The provider's probes have been removed from the hash chains and * from the probe array. Now issue a dtrace_sync() to be sure that @@ -7610,14 +8847,14 @@ dtrace_unregister(dtrace_provider_id_t id) */ dtrace_sync(); - for (probe = first; probe != NULL; probe = first) { - first = probe->dtpr_nextmod; + for (probe = first; probe != NULL; probe = next) { + next = probe->dtpr_nextmod; old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); zfree(dtrace_probe_t_zone, probe); } @@ -7638,13 +8875,14 @@ dtrace_unregister(dtrace_provider_id_t id) prev->dtpv_next = old->dtpv_next; } + dtrace_strunref(old->dtpv_name); + if (!self) { lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); } - kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1); kmem_free(old, sizeof (dtrace_provider_t)); return (0); @@ -7694,8 +8932,10 @@ int dtrace_condense(dtrace_provider_id_t id) { dtrace_provider_t *prov = (dtrace_provider_t *)id; - int i; - dtrace_probe_t *probe; + dtrace_probe_t *probe, *first = NULL; + dtrace_probe_t template = { + .dtpr_provider = prov + }; /* * Make sure this isn't the dtrace provider itself. @@ -7709,9 +8949,8 @@ dtrace_condense(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; + for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL; + probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) { if (probe->dtpr_provider != prov) continue; @@ -7719,20 +8958,35 @@ dtrace_condense(dtrace_provider_id_t id) if (probe->dtpr_ecb != NULL) continue; - dtrace_probes[i] = NULL; + dtrace_probes[probe->dtpr_id - 1] = NULL; prov->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); - prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1, + prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); + if (first == NULL) { + first = probe; + probe->dtpr_nextmod = NULL; + } else { + /* + * Use nextmod as the chain of probes to remove + */ + probe->dtpr_nextmod = first; + first = probe; + } + } + + for (probe = first; probe != NULL; probe = first) { + first = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); + vmem_free(dtrace_arena, (void *)((uintptr_t)probe->dtpr_id), 1); zfree(dtrace_probe_t_zone, probe); - vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); } lck_mtx_unlock(&dtrace_lock); @@ -7763,7 +9017,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_id_t id; if (provider == dtrace_provider) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); } else { lck_mtx_lock(&dtrace_lock); } @@ -7776,49 +9030,38 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, probe->dtpr_id = id; probe->dtpr_gen = dtrace_probegen++; - probe->dtpr_mod = dtrace_strdup(mod); - probe->dtpr_func = dtrace_strdup(func); - probe->dtpr_name = dtrace_strdup(name); + probe->dtpr_mod = dtrace_strref(mod); + probe->dtpr_func = dtrace_strref(func); + probe->dtpr_name = dtrace_strref(name); probe->dtpr_arg = arg; probe->dtpr_aframes = aframes; probe->dtpr_provider = provider; + dtrace_hash_add(dtrace_byprov, probe); dtrace_hash_add(dtrace_bymod, probe); dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); - size_t nsize = osize << 1; - - if (nsize == 0) { - ASSERT(osize == 0); - ASSERT(dtrace_probes == NULL); - nsize = sizeof (dtrace_probe_t *); - } + size_t nsize = osize * 2; probes = kmem_zalloc(nsize, KM_SLEEP); - if (dtrace_probes == NULL) { - ASSERT(osize == 0); - dtrace_probes = probes; - dtrace_nprobes = 1; - } else { - dtrace_probe_t **oprobes = dtrace_probes; + dtrace_probe_t **oprobes = dtrace_probes; - bcopy(oprobes, probes, osize); - dtrace_membar_producer(); - dtrace_probes = probes; + bcopy(oprobes, probes, osize); + dtrace_membar_producer(); + dtrace_probes = probes; - dtrace_sync(); + dtrace_sync(); - /* - * All CPUs are now seeing the new probes array; we can - * safely free the old array. - */ - kmem_free(oprobes, osize); - dtrace_nprobes <<= 1; - } + /* + * All CPUs are now seeing the new probes array; we can + * safely free the old array. + */ + kmem_free(oprobes, osize); + dtrace_nprobes *= 2; ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes); } @@ -7836,7 +9079,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, static dtrace_probe_t * dtrace_probe_lookup_id(dtrace_id_t id) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (id == 0 || id > (dtrace_id_t)dtrace_nprobes) return (NULL); @@ -7845,9 +9088,10 @@ dtrace_probe_lookup_id(dtrace_id_t id) } static int -dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) +dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg1, void *arg2) { - *((dtrace_id_t *)arg) = probe->dtpr_id; +#pragma unused(arg2) + *((dtrace_id_t *)arg1) = probe->dtpr_id; return (DTRACE_MATCH_DONE); } @@ -7864,19 +9108,23 @@ dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, dtrace_id_t id; int match; - pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name; + lck_mtx_lock(&dtrace_lock); + + pkey.dtpk_prov = dtrace_strref(((dtrace_provider_t *)prid)->dtpv_name); pkey.dtpk_pmatch = &dtrace_match_string; - pkey.dtpk_mod = mod; + pkey.dtpk_mod = dtrace_strref(mod); pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul; - pkey.dtpk_func = func; + pkey.dtpk_func = dtrace_strref(func); pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul; - pkey.dtpk_name = name; + pkey.dtpk_name = dtrace_strref(name); pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul; pkey.dtpk_id = DTRACE_IDNONE; - lck_mtx_lock(&dtrace_lock); match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, - dtrace_probe_lookup_match, &id); + dtrace_probe_lookup_match, &id, NULL); + + dtrace_probekey_release(&pkey); + lck_mtx_unlock(&dtrace_lock); ASSERT(match == 1 || match == 0); @@ -7942,7 +9190,7 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) struct modctl *ctl; int all = 0; - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); if (prv == NULL) { all = 1; @@ -8015,14 +9263,15 @@ dtrace_probe_foreach(uintptr_t offs) } static int -dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) +dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab, dtrace_ecbdesc_t *ep) { dtrace_probekey_t pkey; uint32_t priv; uid_t uid; zoneid_t zoneid; + int err; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); dtrace_ecb_create_cache = NULL; @@ -8031,7 +9280,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) * If we're passed a NULL description, we're being asked to * create an ECB with a NULL probe. */ - (void) dtrace_ecb_create_enable(NULL, enab); + (void) dtrace_ecb_create_enable(NULL, enab, ep); return (0); } @@ -8039,8 +9288,11 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred, &priv, &uid, &zoneid); - return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, - enab)); + err = dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, enab, ep); + + dtrace_probekey_release(&pkey); + + return err; } /* @@ -8072,7 +9324,7 @@ dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, } static void -dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -8121,7 +9373,7 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) + if ((parg = mops->dtms_provide_proc(meta->dtm_arg, &dhpv, p)) == NULL) return; meta->dtm_count++; @@ -8158,16 +9410,27 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); } + + /* + * Since we just created probes, we need to match our enablings + * against those, with a precondition knowing that we have only + * added probes from this provider + */ + char *prov_name = mops->dtms_provider_name(parg); + ASSERT(prov_name != NULL); + dtrace_match_cond_t cond = {dtrace_cond_provider_match, (void*)prov_name}; + + dtrace_enabling_matchall_with_cond(&cond); } static void -dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provide(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; uint32_t i; - lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + @@ -8176,21 +9439,12 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provide_one(dhp, sec, pid); + dtrace_helper_provide_one(dhp, sec, p); } - - /* - * We may have just created probes, so we must now rematch against - * any retained enablings. Note that this call will acquire both - * cpu_lock and dtrace_lock; the fact that we are holding - * dtrace_meta_lock now is what defines the ordering with respect to - * these three locks. - */ - dtrace_enabling_matchall(); } static void -dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -8212,19 +9466,19 @@ dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); + mops->dtms_remove_proc(meta->dtm_arg, &dhpv, p); meta->dtm_count--; } static void -dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provider_remove(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; uint32_t i; - lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + @@ -8233,7 +9487,7 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provider_remove_one(dhp, sec, pid); + dtrace_helper_provider_remove_one(dhp, sec, p); } } @@ -8265,8 +9519,8 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, if (mops == NULL || mops->dtms_create_probe == NULL || - mops->dtms_provide_pid == NULL || - mops->dtms_remove_pid == NULL) { + mops->dtms_provide_proc == NULL || + mops->dtms_remove_proc == NULL) { cmn_err(CE_WARN, "failed to register meta-register %s: " "invalid ops", name); return (EINVAL); @@ -8274,14 +9528,6 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; - - /* APPLE NOTE: Darwin employs size bounded string operation. */ - { - size_t bufsize = strlen(name) + 1; - meta->dtm_name = kmem_alloc(bufsize, KM_SLEEP); - (void) strlcpy(meta->dtm_name, name, bufsize); - } - meta->dtm_arg = arg; lck_mtx_lock(&dtrace_meta_lock); @@ -8292,11 +9538,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, lck_mtx_unlock(&dtrace_meta_lock); cmn_err(CE_WARN, "failed to register meta-register %s: " "user-land meta-provider exists", name); - kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1); kmem_free(meta, sizeof (dtrace_meta_t)); return (EINVAL); } + meta->dtm_name = dtrace_strref(name); + dtrace_meta_pid = meta; *idp = (dtrace_meta_provider_id_t)meta; @@ -8312,8 +9559,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, while (help != NULL) { for (i = 0; i < help->dthps_nprovs; i++) { + proc_t *p = proc_find(help->dthps_pid); + if (p == PROC_NULL) + continue; dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - help->dthps_pid); + p); + proc_rele(p); } next = help->dthps_next; @@ -8351,10 +9602,11 @@ dtrace_meta_unregister(dtrace_meta_provider_id_t id) *pp = NULL; + dtrace_strunref(old->dtm_name); + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_meta_lock); - kmem_free(old->dtm_name, strlen(old->dtm_name) + 1); kmem_free(old, sizeof (dtrace_meta_t)); return (0); @@ -8403,6 +9655,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; int kcheckload; uint_t pc; + int maxglobal = -1, maxlocal = -1, maxtlocal = -1; kcheckload = cr == NULL || (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; @@ -8444,7 +9697,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_NOT: case DIF_OP_MOV: @@ -8456,7 +9709,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_LDSB: case DIF_OP_LDSH: @@ -8472,7 +9725,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); if (kcheckload) dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); @@ -8491,7 +9744,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_ULDSB: case DIF_OP_ULDSH: @@ -8507,7 +9760,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_STB: case DIF_OP_STH: @@ -8577,7 +9830,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_SETS: if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) { @@ -8587,7 +9840,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_LDGA: case DIF_OP_LDTA: @@ -8598,7 +9851,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_LDGS: case DIF_OP_LDTS: @@ -8610,7 +9863,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); + err += efunc(pc, "cannot write to %%r0\n"); break; case DIF_OP_STGS: case DIF_OP_STTS: @@ -8623,16 +9876,25 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "invalid register %u\n", rd); break; case DIF_OP_CALL: - if (subr > DIF_SUBR_MAX) + if (subr > DIF_SUBR_MAX && + !(subr >= DIF_SUBR_APPLE_MIN && subr <= DIF_SUBR_APPLE_MAX)) err += efunc(pc, "invalid subr %u\n", subr); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) - err += efunc(pc, "cannot write to %r0\n"); - - if (subr == DIF_SUBR_COPYOUT || - subr == DIF_SUBR_COPYOUTSTR) { + err += efunc(pc, "cannot write to %%r0\n"); + + switch (subr) { + case DIF_SUBR_COPYOUT: + case DIF_SUBR_COPYOUTSTR: + case DIF_SUBR_KDEBUG_TRACE: + case DIF_SUBR_KDEBUG_TRACE_STRING: + case DIF_SUBR_PHYSMEM_READ: + case DIF_SUBR_PHYSMEM_WRITE: dp->dtdo_destructive = 1; + break; + default: + break; } break; case DIF_OP_PUSHTR: @@ -8651,6 +9913,16 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, if (rs >= nregs) err += efunc(pc, "invalid register %u\n", rs); break; + case DIF_OP_STRIP: + if (r1 >= nregs) + err += efunc(pc, "invalid register %u\n", r1); + if (!dtrace_is_valid_ptrauth_key(r2)) + err += efunc(pc, "invalid key\n"); + if (rd >= nregs) + err += efunc(pc, "invalid register %u\n", rd); + if (rd == 0) + err += efunc(pc, "cannot write to %%r0\n"); + break; default: err += efunc(pc, "invalid opcode %u\n", DIF_INSTR_OP(instr)); @@ -8663,7 +9935,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, "expected 'ret' as last DIF instruction\n"); } - if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { + if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { /* * If we're not returning by reference, the size must be either * 0 or the size of one of the base types. @@ -8719,6 +9991,9 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, switch (v->dtdv_scope) { case DIFV_SCOPE_GLOBAL: + if (maxglobal == -1 || ndx > maxglobal) + maxglobal = ndx; + if (ndx < vstate->dtvs_nglobals) { dtrace_statvar_t *svar; @@ -8729,11 +10004,16 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; case DIFV_SCOPE_THREAD: + if (maxtlocal == -1 || ndx > maxtlocal) + maxtlocal = ndx; + if (ndx < vstate->dtvs_ntlocals) existing = &vstate->dtvs_tlocals[ndx]; break; case DIFV_SCOPE_LOCAL: + if (maxlocal == -1 || ndx > maxlocal) + maxlocal = ndx; if (ndx < vstate->dtvs_nlocals) { dtrace_statvar_t *svar; @@ -8752,9 +10032,10 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; } - if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && - vt->dtdt_size > dtrace_global_maxsize) { - err += efunc(i, "oversized by-ref global\n"); + if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL || + v->dtdv_scope == DIFV_SCOPE_LOCAL) && + vt->dtdt_size > dtrace_statvar_maxsize) { + err += efunc(i, "oversized by-ref static\n"); break; } } @@ -8781,6 +10062,37 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, } } + for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { + dif_instr_t instr = dp->dtdo_buf[pc]; + + uint_t v = DIF_INSTR_VAR(instr); + uint_t op = DIF_INSTR_OP(instr); + + switch (op) { + case DIF_OP_LDGS: + case DIF_OP_LDGAA: + case DIF_OP_STGS: + case DIF_OP_STGAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxglobal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDTS: + case DIF_OP_LDTAA: + case DIF_OP_STTS: + case DIF_OP_STTAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxtlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDLS: + case DIF_OP_STLS: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + default: + break; + } + } + return (err); } @@ -8904,31 +10216,34 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) break; case DIF_OP_CALL: - if (subr == DIF_SUBR_ALLOCA || - subr == DIF_SUBR_BCOPY || - subr == DIF_SUBR_COPYIN || - subr == DIF_SUBR_COPYINTO || - subr == DIF_SUBR_COPYINSTR || - subr == DIF_SUBR_INDEX || - subr == DIF_SUBR_INET_NTOA || - subr == DIF_SUBR_INET_NTOA6 || - subr == DIF_SUBR_INET_NTOP || - subr == DIF_SUBR_LLTOSTR || - subr == DIF_SUBR_RINDEX || - subr == DIF_SUBR_STRCHR || - subr == DIF_SUBR_STRJOIN || - subr == DIF_SUBR_STRRCHR || - subr == DIF_SUBR_STRSTR || - subr == DIF_SUBR_COREPROFILE || - subr == DIF_SUBR_HTONS || - subr == DIF_SUBR_HTONL || - subr == DIF_SUBR_HTONLL || - subr == DIF_SUBR_NTOHS || - subr == DIF_SUBR_NTOHL || - subr == DIF_SUBR_NTOHLL) + switch (subr) { + case DIF_SUBR_ALLOCA: + case DIF_SUBR_BCOPY: + case DIF_SUBR_COPYIN: + case DIF_SUBR_COPYINTO: + case DIF_SUBR_COPYINSTR: + case DIF_SUBR_HTONS: + case DIF_SUBR_HTONL: + case DIF_SUBR_HTONLL: + case DIF_SUBR_INDEX: + case DIF_SUBR_INET_NTOA: + case DIF_SUBR_INET_NTOA6: + case DIF_SUBR_INET_NTOP: + case DIF_SUBR_JSON: + case DIF_SUBR_LLTOSTR: + case DIF_SUBR_NTOHS: + case DIF_SUBR_NTOHL: + case DIF_SUBR_NTOHLL: + case DIF_SUBR_RINDEX: + case DIF_SUBR_STRCHR: + case DIF_SUBR_STRTOLL: + case DIF_SUBR_STRJOIN: + case DIF_SUBR_STRRCHR: + case DIF_SUBR_STRSTR: break; - - err += efunc(pc, "invalid subr %u\n", subr); + default: + err += efunc(pc, "invalid subr %u\n", subr); + } break; default: @@ -8994,7 +10309,7 @@ dtrace_difo_hold(dtrace_difo_t *dp) { uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); dp->dtdo_refcnt++; ASSERT(dp->dtdo_refcnt != 0); @@ -9090,6 +10405,9 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) if (srd == 0) return; + if (sval > LONG_MAX) + return; + tupregs[ttop++].dttk_size = sval; } @@ -9151,6 +10469,19 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) */ size = P2ROUNDUP(size, sizeof (uint64_t)); + /* + * Before setting the chunk size, check that we're not going + * to set it to a negative value... + */ + if (size > LONG_MAX) + return; + + /* + * ...and make certain that we didn't badly overflow. + */ + if (size < ksize || size < sizeof (dtrace_dynvar_t)) + return; + if (size > vstate->dtvs_dynvars.dtds_chunksize) vstate->dtvs_dynvars.dtds_chunksize = size; } @@ -9162,7 +10493,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) int oldsvars, osz, nsz, otlocals, ntlocals; uint_t i, id; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { @@ -9384,7 +10715,7 @@ dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); for (i = 0; i < dp->dtdo_varlen; i++) { @@ -9405,18 +10736,35 @@ dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) /* * DTrace Format Functions */ + +static dtrace_format_t* +dtrace_format_new(char *str) +{ + dtrace_format_t *fmt = NULL; + size_t bufsize = strlen(str) + 1; + + fmt = kmem_zalloc(sizeof(*fmt) + bufsize, KM_SLEEP); + + fmt->dtf_refcount = 1; + (void) strlcpy(fmt->dtf_str, str, bufsize); + + return fmt; +} + static uint16_t dtrace_format_add(dtrace_state_t *state, char *str) { - char *fmt, **new; - uint16_t ndx, len = strlen(str) + 1; - - fmt = kmem_zalloc(len, KM_SLEEP); - bcopy(str, fmt, len); + dtrace_format_t **new; + uint16_t ndx; for (ndx = 0; ndx < state->dts_nformats; ndx++) { if (state->dts_formats[ndx] == NULL) { - state->dts_formats[ndx] = fmt; + state->dts_formats[ndx] = dtrace_format_new(str); + return (ndx + 1); + } + else if (strcmp(state->dts_formats[ndx]->dtf_str, str) == 0) { + VERIFY(state->dts_formats[ndx]->dtf_refcount < UINT64_MAX); + state->dts_formats[ndx]->dtf_refcount++; return (ndx + 1); } } @@ -9426,7 +10774,6 @@ dtrace_format_add(dtrace_state_t *state, char *str) * This is only likely if a denial-of-service attack is being * attempted. As such, it's okay to fail silently here. */ - kmem_free(fmt, len); return (0); } @@ -9435,16 +10782,16 @@ dtrace_format_add(dtrace_state_t *state, char *str) * number of formats. */ ndx = state->dts_nformats++; - new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP); + new = kmem_alloc((ndx + 1) * sizeof (*state->dts_formats), KM_SLEEP); if (state->dts_formats != NULL) { ASSERT(ndx != 0); - bcopy(state->dts_formats, new, ndx * sizeof (char *)); - kmem_free(state->dts_formats, ndx * sizeof (char *)); + bcopy(state->dts_formats, new, ndx * sizeof (*state->dts_formats)); + kmem_free(state->dts_formats, ndx * sizeof (*state->dts_formats)); } state->dts_formats = new; - state->dts_formats[ndx] = fmt; + state->dts_formats[ndx] = dtrace_format_new(str); return (ndx + 1); } @@ -9452,15 +10799,22 @@ dtrace_format_add(dtrace_state_t *state, char *str) static void dtrace_format_remove(dtrace_state_t *state, uint16_t format) { - char *fmt; + dtrace_format_t *fmt; ASSERT(state->dts_formats != NULL); ASSERT(format <= state->dts_nformats); - ASSERT(state->dts_formats[format - 1] != NULL); fmt = state->dts_formats[format - 1]; - kmem_free(fmt, strlen(fmt) + 1); - state->dts_formats[format - 1] = NULL; + + ASSERT(fmt != NULL); + VERIFY(fmt->dtf_refcount > 0); + + fmt->dtf_refcount--; + + if (fmt->dtf_refcount == 0) { + kmem_free(fmt, DTRACE_FORMAT_SIZE(fmt)); + state->dts_formats[format - 1] = NULL; + } } static void @@ -9476,15 +10830,15 @@ dtrace_format_destroy(dtrace_state_t *state) ASSERT(state->dts_formats != NULL); for (i = 0; i < state->dts_nformats; i++) { - char *fmt = state->dts_formats[i]; + dtrace_format_t *fmt = state->dts_formats[i]; if (fmt == NULL) continue; - kmem_free(fmt, strlen(fmt) + 1); + kmem_free(fmt, DTRACE_FORMAT_SIZE(fmt)); } - kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *)); + kmem_free(state->dts_formats, state->dts_nformats * sizeof (*state->dts_formats)); state->dts_nformats = 0; state->dts_formats = NULL; } @@ -9497,7 +10851,7 @@ dtrace_predicate_create(dtrace_difo_t *dp) { dtrace_predicate_t *pred; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP); @@ -9527,7 +10881,7 @@ dtrace_predicate_create(dtrace_difo_t *dp) static void dtrace_predicate_hold(dtrace_predicate_t *pred) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); @@ -9540,7 +10894,7 @@ dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) dtrace_difo_t *dp = pred->dtp_difo; #pragma unused(dp) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp != NULL && dp->dtdo_refcnt != 0); ASSERT(pred->dtp_refcnt > 0); @@ -9615,7 +10969,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) dtrace_ecb_t *ecb; dtrace_epid_t epid; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP); ecb->dte_predicate = NULL; @@ -9681,8 +11035,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_next == NULL); if (probe == NULL) { @@ -9722,7 +11076,7 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) } } -static void +static int dtrace_ecb_resize(dtrace_ecb_t *ecb) { dtrace_action_t *act; @@ -9752,9 +11106,10 @@ dtrace_ecb_resize(dtrace_ecb_t *ecb) ASSERT(curneeded != UINT32_MAX); agg->dtag_base = aggbase; - curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); rec->dtrd_offset = curneeded; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); curneeded += rec->dtrd_size; ecb->dte_needed = MAX(ecb->dte_needed, curneeded); @@ -9781,11 +11136,15 @@ dtrace_ecb_resize(dtrace_ecb_t *ecb) curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); rec->dtrd_offset = curneeded; curneeded += rec->dtrd_size; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); } else { /* tuples must be followed by an aggregation */ ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment); rec->dtrd_offset = ecb->dte_size; + if (ecb->dte_size + rec->dtrd_size < ecb->dte_size) + return (EINVAL); ecb->dte_size += rec->dtrd_size; ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); } @@ -9804,6 +11163,7 @@ dtrace_ecb_resize(dtrace_ecb_t *ecb) ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed); + return (0); } static dtrace_action_t * @@ -10021,7 +11381,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) dtrace_optval_t nframes=0, strsize; uint64_t arg = desc->dtad_arg; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1); if (DTRACEACT_ISAGG(desc->dtad_kind)) { @@ -10055,12 +11415,14 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ if (arg == 0) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || + desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { ASSERT(arg != 0); @@ -10069,9 +11431,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) (char *)(uintptr_t)arg); } - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DTRACEACT_LIBACT: - case DTRACEACT_DIFEXPR: case DTRACEACT_TRACEMEM: case DTRACEACT_TRACEMEM_DYNSIZE: case DTRACEACT_APPLEBINARY: /* __APPLE__ */ @@ -10109,7 +11470,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) arg = DTRACE_USTACK_ARG(nframes, strsize); - /*FALLTHROUGH*/ + OS_FALLTHROUGH; case DTRACEACT_USTACK: if (desc->dtad_kind != DTRACEACT_JSTACK && (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) { @@ -10299,7 +11660,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) dtrace_ecb_t *pecb, *prev = NULL; dtrace_probe_t *probe = ecb->dte_probe; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (probe == NULL) { /* @@ -10378,7 +11739,7 @@ dtrace_ecb_destroy(dtrace_ecb_t *ecb) dtrace_predicate_t *pred; dtrace_epid_t epid = ecb->dte_epid; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(ecb->dte_next == NULL); ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb); @@ -10403,7 +11764,7 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, dtrace_provider_t *prov; dtrace_ecbdesc_t *desc = enab->dten_current; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(state != NULL); ecb = dtrace_ecb_add(state, probe); @@ -10473,21 +11834,25 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, } } - dtrace_ecb_resize(ecb); + if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) { + dtrace_ecb_destroy(ecb); + return (NULL); + } return (dtrace_ecb_create_cache = ecb); } static int -dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) +dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg1, void *arg2) { dtrace_ecb_t *ecb; - dtrace_enabling_t *enab = arg; + dtrace_enabling_t *enab = arg1; + dtrace_ecbdesc_t *ep = arg2; dtrace_state_t *state = enab->dten_vstate->dtvs_state; ASSERT(state != NULL); - if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { + if (probe != NULL && ep != NULL && probe->dtpr_gen < ep->dted_probegen) { /* * This probe was created in a generation for which this * enabling has previously created ECBs; we don't want to @@ -10511,7 +11876,7 @@ dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) dtrace_ecb_t *ecb; #pragma unused(ecb) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) return (NULL); @@ -10528,7 +11893,7 @@ dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) dtrace_aggregation_t *agg; #pragma unused(agg) /* __APPLE__ */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations) return (NULL); @@ -10580,6 +11945,8 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); buf->dtb_interval = now - buf->dtb_switched; buf->dtb_switched = now; + buf->dtb_cur_limit = buf->dtb_limit; + dtrace_interrupt_enable(cookie); } @@ -10622,15 +11989,15 @@ dtrace_buffer_canalloc(size_t size) } static int -dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, +dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t limit, size_t size, int flags, processorid_t cpu) { dtrace_cpu_t *cp; dtrace_buffer_t *buf; size_t size_before_alloc = dtrace_buffer_memory_inuse; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (size > (size_t)dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) @@ -10663,6 +12030,10 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, goto err; dtrace_buffer_memory_inuse += size; + /* Unsure that limit is always lower than size */ + limit = limit == size ? limit - 1 : limit; + buf->dtb_cur_limit = limit; + buf->dtb_limit = limit; buf->dtb_size = size; buf->dtb_flags = flags; buf->dtb_offset = 0; @@ -10762,9 +12133,27 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, offs += sizeof (uint32_t); } - if ((uint64_t)(soffs = offs + needed) > buf->dtb_size) { - dtrace_buffer_drop(buf); - return (-1); + if ((uint64_t)(soffs = offs + needed) > buf->dtb_cur_limit) { + if (buf->dtb_cur_limit == buf->dtb_limit) { + buf->dtb_cur_limit = buf->dtb_size; + + os_atomic_inc(&state->dts_buf_over_limit, relaxed); + /** + * Set an AST on the current processor + * so that we can wake up the process + * outside of probe context, when we know + * it is safe to do so + */ + minor_t minor = getminor(state->dts_dev); + ASSERT(minor < 32); + + os_atomic_or(&dtrace_wake_clients, 1 << minor, relaxed); + ast_dtrace_on(); + } + if ((uint64_t)soffs > buf->dtb_size) { + dtrace_buffer_drop(buf); + return (-1); + } } if (mstate == NULL) @@ -10970,7 +12359,7 @@ static void dtrace_buffer_polish(dtrace_buffer_t *buf) { ASSERT(buf->dtb_flags & DTRACEBUF_RING); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (!(buf->dtb_flags & DTRACEBUF_WRAPPED)) return; @@ -11147,7 +12536,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) dtrace_ecbdesc_t *ep; dtrace_vstate_t *vstate = enab->dten_vstate; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_actdesc_t *act, *next; @@ -11207,7 +12596,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) { dtrace_state_t *state; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); ASSERT(enab->dten_vstate != NULL); @@ -11242,7 +12631,7 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_enabling_t *new, *enab; int found = 0, err = ENOENT; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN); ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN); ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN); @@ -11309,7 +12698,7 @@ dtrace_enabling_retract(dtrace_state_t *state) { dtrace_enabling_t *enab, *next; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); /* * Iterate over all retained enablings, destroy the enablings retained @@ -11334,13 +12723,13 @@ dtrace_enabling_retract(dtrace_state_t *state) } static int -dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) +dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched, dtrace_match_cond_t *cond) { int i = 0; int total_matched = 0, matched = 0; - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; i < enab->dten_ndesc; i++) { dtrace_ecbdesc_t *ep = enab->dten_desc[i]; @@ -11348,11 +12737,19 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; + /** + * Before doing a dtrace_probe_enable, which is really + * expensive, check that this enabling matches the matching precondition + * if we have one + */ + if (cond && (cond->dmc_func(&ep->dted_probe, cond->dmc_data) == 0)) { + continue; + } /* * If a provider failed to enable a probe then get out and * let the consumer know we failed. */ - if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0) + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab, ep)) < 0) return (EBUSY); total_matched += matched; @@ -11379,9 +12776,10 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) return (enab->dten_error); } + + ep->dted_probegen = dtrace_probegen; } - enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) *nmatched = total_matched; @@ -11389,7 +12787,7 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) } static void -dtrace_enabling_matchall(void) +dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond) { dtrace_enabling_t *enab; @@ -11412,13 +12810,22 @@ dtrace_enabling_matchall(void) * Behave as if always in "global" zone." */ for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { - (void) dtrace_enabling_match(enab, NULL); + (void) dtrace_enabling_match(enab, NULL, cond); } lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); + +} + +static void +dtrace_enabling_matchall(void) +{ + dtrace_enabling_matchall_with_cond(NULL); } + + /* * If an enabling is to be enabled without having matched probes (that is, if * dtrace_state_go() is to be called on the underlying dtrace_state_t), the @@ -11453,7 +12860,7 @@ dtrace_enabling_prime(dtrace_state_t *state) for (i = 0; i < enab->dten_ndesc; i++) { enab->dten_current = enab->dten_desc[i]; - (void) dtrace_probe_enable(NULL, enab); + (void) dtrace_probe_enable(NULL, enab, NULL); } enab->dten_primed = 1; @@ -11473,8 +12880,8 @@ dtrace_enabling_provide(dtrace_provider_t *prv) dtrace_probedesc_t desc; dtrace_genid_t gen; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); if (prv == NULL) { all = 1; @@ -11541,9 +12948,9 @@ dtrace_dof_create(dtrace_state_t *state) roundup(sizeof (dof_sec_t), sizeof (uint64_t)) + sizeof (dof_optdesc_t) * DTRACEOPT_MAX; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP); + dof = kmem_zalloc_aligned(len, 8, KM_SLEEP); dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0; dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1; dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2; @@ -11594,7 +13001,7 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) { dof_hdr_t hdr, *dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); /* * First, we're going to copyin() the sizeof (dof_hdr_t). @@ -11621,11 +13028,11 @@ dtrace_dof_copyin(user_addr_t uarg, int *errp) return (NULL); } - dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); + dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 || dof->dofh_loadsz != hdr.dofh_loadsz) { - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); + kmem_free_aligned(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } @@ -11638,7 +13045,7 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) { dof_hdr_t hdr, *dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); /* * First, we're going to copyin() the sizeof (dof_hdr_t). @@ -11665,10 +13072,10 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (NULL); } - dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); + dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); if (uread(p, dof, hdr.dofh_loadsz, uarg) != KERN_SUCCESS) { - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); + kmem_free_aligned(dof, hdr.dofh_loadsz); *errp = EFAULT; return (NULL); } @@ -11676,57 +13083,61 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (dof); } +static void +dtrace_dof_destroy(dof_hdr_t *dof) +{ + kmem_free_aligned(dof, dof->dofh_loadsz); +} + static dof_hdr_t * dtrace_dof_property(const char *name) { - uchar_t *buf; - uint64_t loadsz; - unsigned int len, i; + unsigned int len = 0; dof_hdr_t *dof; - /* - * Unfortunately, array of values in .conf files are always (and - * only) interpreted to be integer arrays. We must read our DOF - * as an integer array, and then squeeze it into a byte array. - */ - if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, - name, (int **)&buf, &len) != DDI_PROP_SUCCESS) - return (NULL); + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + return NULL; + } + + if (!PEReadNVRAMProperty(name, NULL, &len)) { + return NULL; + } - for (i = 0; i < len; i++) - buf[i] = (uchar_t)(((int *)buf)[i]); + dof = kmem_alloc_aligned(len, 8, KM_SLEEP); + + if (!PEReadNVRAMProperty(name, dof, &len)) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "unreadable DOF"); + return NULL; + } if (len < sizeof (dof_hdr_t)) { - ddi_prop_free(buf); + dtrace_dof_destroy(dof); dtrace_dof_error(NULL, "truncated header"); return (NULL); } - if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) { - ddi_prop_free(buf); + if (len < dof->dofh_loadsz) { + dtrace_dof_destroy(dof); dtrace_dof_error(NULL, "truncated DOF"); return (NULL); } - if (loadsz >= (uint64_t)dtrace_dof_maxsize) { - ddi_prop_free(buf); - dtrace_dof_error(NULL, "oversized DOF"); + if (len != dof->dofh_loadsz) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "invalid DOF size"); return (NULL); } - dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP); - bcopy(buf, dof, loadsz); - ddi_prop_free(buf); + if (dof->dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { + dtrace_dof_destroy(dof); + dtrace_dof_error(NULL, "oversized DOF"); + return (NULL); + } return (dof); } -static void -dtrace_dof_destroy(dof_hdr_t *dof) -{ - dt_kmem_free_aligned(dof, dof->dofh_loadsz); -} - /* * Return the dof_sec_t pointer corresponding to a given section index. If the * index is not valid, dtrace_dof_error() is called and NULL is returned. If @@ -12074,15 +13485,19 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if (DTRACEACT_ISPRINTFLIKE(kind) && - (kind != DTRACEACT_PRINTA || - desc->dofa_strtab != DOF_SECIDX_NONE)) { + if ((DTRACEACT_ISPRINTFLIKE(kind) && + (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) || + (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) + { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * printf()-like actions must have a format string. + * The argument to these actions is an index into the + * DOF string table. For printf()-like actions, this + * is the format string. For print(), this is the + * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -12240,7 +13655,7 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, dtrace_enabling_t *enab; uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t)); /* @@ -12300,8 +13715,8 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } - if (dof->dofh_secsize == 0) { - dtrace_dof_error(dof, "zero section header size"); + if (dof->dofh_secsize < sizeof(dof_sec_t)) { + dtrace_dof_error(dof, "invalid section header size"); return (-1); } @@ -12482,7 +13897,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) dtrace_dynvar_t *dvar, *next, *start; size_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); bzero(dstate, sizeof (dtrace_dstate_t)); @@ -12490,6 +13905,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((dstate->dtds_chunksize = chunksize) == 0) dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; + VERIFY(dstate->dtds_chunksize < (LONG_MAX - sizeof (dtrace_dynhash_t))); + if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) size = min_size; @@ -12530,6 +13947,9 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); limit = (uintptr_t)base + size; + VERIFY((uintptr_t)start < limit); + VERIFY((uintptr_t)start >= (uintptr_t)base); + maxper = (limit - (uintptr_t)start) / (int)NCPU; maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; @@ -12551,7 +13971,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) start = (dtrace_dynvar_t *)limit; } - ASSERT(limit <= (uintptr_t)base + size); + VERIFY(limit <= (uintptr_t)base + size); for (;;) { next = (dtrace_dynvar_t *)((uintptr_t)dvar + @@ -12560,6 +13980,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((uintptr_t)next + dstate->dtds_chunksize >= limit) break; + VERIFY((uintptr_t)dvar >= (uintptr_t)base && + (uintptr_t)dvar <= (uintptr_t)base + size); dvar->dtdv_next = next; dvar = next; } @@ -12574,7 +13996,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) static void dtrace_dstate_fini(dtrace_dstate_t *dstate) { - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); if (dstate->dtds_base == NULL) return; @@ -12655,47 +14077,31 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) dtrace_state_t *state; dtrace_optval_t *opt; int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i; + unsigned int cpu_it; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); /* Cause restart */ *new_state = NULL; - /* - * Darwin's DEVFS layer acquired the minor number for this "device" when it called - * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number - * (next unused according to vmem_alloc()) and then immediately put the number back in play - * (by calling vmem_free()). Now that minor number is being used for an open, so committing it - * to use. The following vmem_alloc() must deliver that same minor number. FIXME. - */ - - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); - - if (NULL != devp) { - ASSERT(getminor(*devp) == minor); - if (getminor(*devp) != minor) { - printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n", - getminor(*devp), minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (ERESTART); /* can't reacquire */ - } - } else { - /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), - * so just vend the minor device number here de novo since no "open" has occurred. */ + if (devp != NULL) { + minor = getminor(*devp); + } + else { + minor = DTRACE_NCLIENTS - 1; } - if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (EAGAIN); /* temporary resource shortage */ + state = dtrace_state_allocate(minor); + if (NULL == state) { + printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor); + return (ERESTART); /* can't reacquire */ } - state = ddi_get_soft_state(dtrace_softstate, minor); state->dts_epid = DTRACE_EPIDNONE + 1; (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); - state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1, + state->dts_aggid_arena = vmem_create(c, (void *)1, INT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); if (devp != NULL) { @@ -12704,7 +14110,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) major = ddi_driver_major(dtrace_devi); } - state->dts_dev = makedevice(major, minor); + state->dts_dev = makedev(major, minor); if (devp != NULL) *devp = state->dts_dev; @@ -12717,6 +14123,26 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) */ state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); + state->dts_buf_over_limit = 0; + + /* + * Allocate and initialise the per-process per-CPU random state. + * SI_SUB_RANDOM < SI_SUB_DTRACE_ANON therefore entropy device is + * assumed to be seeded at this point (if from Fortuna seed file). + */ + state->dts_rstate = kmem_zalloc(NCPU * sizeof(uint64_t*), KM_SLEEP); + state->dts_rstate[0] = kmem_zalloc(2 * sizeof(uint64_t), KM_SLEEP); + (void) read_random(state->dts_rstate[0], 2 * sizeof(uint64_t)); + for (cpu_it = 1; cpu_it < NCPU; cpu_it++) { + state->dts_rstate[cpu_it] = kmem_zalloc(2 * sizeof(uint64_t), KM_SLEEP); + /* + * Each CPU is assigned a 2^64 period, non-overlapping + * subsequence. + */ + dtrace_xoroshiro128_plus_jump(state->dts_rstate[cpu_it-1], + state->dts_rstate[cpu_it]); + } + state->dts_cleaner = CYCLIC_NONE; state->dts_deadman = CYCLIC_NONE; state->dts_vstate.dtvs_state = state; @@ -12742,8 +14168,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; - - state->dts_activity = DTRACE_ACTIVITY_INACTIVE; + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_default; /* * Depending on the user credentials, we set flag bits which alter probe @@ -12751,10 +14176,32 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) * actual anonymous tracing, or the possession of all privileges, all of * the normal checks are bypassed. */ +#if defined(__APPLE__) + if (cr != NULL) { + kauth_cred_ref(cr); + state->dts_cred.dcr_cred = cr; + } + if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + /* + * Allow only proc credentials when DTrace is + * restricted by the current security policy + */ + state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC; + state->dts_cred.dcr_action = DTRACE_CRA_PROC | DTRACE_CRA_PROC_CONTROL | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; + } + else { + state->dts_cred.dcr_visible = DTRACE_CRV_ALL; + state->dts_cred.dcr_action = DTRACE_CRA_ALL; + } + } + +#else if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { state->dts_cred.dcr_visible = DTRACE_CRV_ALL; state->dts_cred.dcr_action = DTRACE_CRA_ALL; - } else { + } + else { /* * Set up the credentials for this instantiation. We take a * hold on the credential to prevent it from disappearing on @@ -12871,6 +14318,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; } } +#endif *new_state = state; return(0); /* Success */ @@ -12881,10 +14329,11 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; processorid_t cpu = 0; + size_t limit = buf->dtb_size; int flags = 0, rval; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); ASSERT(which < DTRACEOPT_MAX); ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE || (state == dtrace_anon.dta_state && @@ -12928,8 +14377,8 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) */ return (E2BIG); } - - rval = dtrace_buffer_alloc(buf, size, flags, cpu); + limit = opt[DTRACEOPT_BUFLIMIT] * size / 100; + rval = dtrace_buffer_alloc(buf, limit, size, flags, cpu); if (rval != ENOMEM) { opt[which] = size; @@ -13177,6 +14626,18 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; + if (opt[DTRACEOPT_STRSIZE] > dtrace_strsize_max) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_max; + + if (opt[DTRACEOPT_STRSIZE] < dtrace_strsize_min) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_min; + + if (opt[DTRACEOPT_BUFLIMIT] > dtrace_buflimit_max) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_max; + + if (opt[DTRACEOPT_BUFLIMIT] < dtrace_buflimit_min) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_min; + hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; hdlr.cyh_arg = state; hdlr.cyh_level = CY_LOW_LEVEL; @@ -13270,7 +14731,7 @@ dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu) { dtrace_icookie_t cookie; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE && state->dts_activity != DTRACE_ACTIVITY_DRAINING) @@ -13321,7 +14782,7 @@ static int dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, dtrace_optval_t val) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) return (EBUSY); @@ -13334,12 +14795,7 @@ dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, switch (option) { case DTRACEOPT_DESTRUCTIVE: - /* - * Prevent consumers from enabling destructive actions if DTrace - * is running in a restricted environment, or if actions are - * disallowed. - */ - if (dtrace_is_restricted() || dtrace_destructive_disallow) + if (dtrace_destructive_disallow) return (EACCES); state->dts_cred.dcr_destructive = 1; @@ -13388,8 +14844,8 @@ dtrace_state_destroy(dtrace_state_t *state) int nspec = state->dts_nspeculations; uint32_t match; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); /* * First, retract any retained enablings for this state. @@ -13416,7 +14872,7 @@ dtrace_state_destroy(dtrace_state_t *state) * Release the credential hold we took in dtrace_state_create(). */ if (state->dts_cred.dcr_cred != NULL) - crfree(state->dts_cred.dcr_cred); + kauth_cred_unref(&state->dts_cred.dcr_cred); /* * Now we can safely disable and destroy any enabled probes. Because @@ -13455,6 +14911,11 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_buffer_free(state->dts_buffer); dtrace_buffer_free(state->dts_aggbuffer); + for (i = 0; i < (int)NCPU; i++) { + kmem_free(state->dts_rstate[i], 2 * sizeof(uint64_t)); + } + kmem_free(state->dts_rstate, NCPU * sizeof(uint64_t*)); + for (i = 0; i < nspec; i++) dtrace_buffer_free(spec[i].dtsp_buffer); @@ -13489,19 +14950,32 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_format_destroy(state); vmem_destroy(state->dts_aggid_arena); - ddi_soft_state_free(dtrace_softstate, minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); + dtrace_state_free(minor); } /* * DTrace Anonymous Enabling Functions */ + +int +dtrace_keep_kernel_symbols(void) +{ + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + return 0; + } + + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) + return 1; + + return 0; +} + static dtrace_state_t * dtrace_anon_grab(void) { dtrace_state_t *state; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if ((state = dtrace_anon.dta_state) == NULL) { ASSERT(dtrace_anon.dta_enabling == NULL); @@ -13526,8 +15000,8 @@ dtrace_anon_property(void) dof_hdr_t *dof; char c[32]; /* enough for "dof-data-" + digits */ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); for (i = 0; ; i++) { (void) snprintf(c, sizeof (c), "dof-data-%d", i); @@ -13539,6 +15013,7 @@ dtrace_anon_property(void) break; } +#ifdef illumos /* * We want to create anonymous state, so we need to transition * the kernel debugger to indicate that DTrace is active. If @@ -13551,6 +15026,7 @@ dtrace_anon_property(void) dtrace_dof_destroy(dof); break; } +#endif /* * If we haven't allocated an anonymous state, we'll do so now. @@ -13681,6 +15157,7 @@ dtrace_helper_trace(dtrace_helper_action_t *helper, } } +__attribute__((noinline)) static uint64_t dtrace_helper(int which, dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t arg0, uint64_t arg1) @@ -13798,7 +15275,8 @@ dtrace_helper_destroygen(proc_t* p, int gen) dtrace_vstate_t *vstate; uint_t i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if (help == NULL || gen > help->dthps_generation) return (EINVAL); @@ -13863,13 +15341,11 @@ dtrace_helper_destroygen(proc_t* p, int gen) /* * If we have a meta provider, remove this helper provider. */ - lck_mtx_lock(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); dtrace_helper_provider_remove(&prov->dthp_prov, - p->p_pid); + p); } - lck_mtx_unlock(&dtrace_meta_lock); dtrace_helper_provider_destroy(prov); @@ -13975,9 +15451,9 @@ static void dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, dof_helper_t *dofhp) { - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); if (!dtrace_attached() || dtrace_meta_pid == NULL) { @@ -14010,7 +15486,7 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, lck_mtx_unlock(&dtrace_lock); - dtrace_helper_provide(dofhp, p->p_pid); + dtrace_helper_provide(dofhp, p); } else { /* @@ -14023,11 +15499,9 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - p->p_pid); + p); } } - - lck_mtx_unlock(&dtrace_meta_lock); } static int @@ -14037,7 +15511,7 @@ dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen) dtrace_helper_provider_t *hprov, **tmp_provs; uint_t tmp_maxprovs, i; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); help = p->p_dtrace_helpers; ASSERT(help != NULL); @@ -14333,7 +15807,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1; uintptr_t daddr = (uintptr_t)dof; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); if ((help = p->p_dtrace_helpers) == NULL) help = dtrace_helpers_create(p); @@ -14491,10 +15966,6 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14502,7 +15973,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim * Any existing helpers force non-lazy behavior. */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; unsigned int existing_dofs_count = (existing_dofs) ? existing_dofs->dofiod_count : 0; @@ -14565,7 +16036,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim #endif /* DEBUG */ unlock: - lck_mtx_unlock(&p->p_dtrace_sprlock); + dtrace_sprunlock(p); } else { rval = EACCES; } @@ -14588,10 +16059,6 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14599,7 +16066,7 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) * Any existing helpers force non-lazy behavior. */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) { - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs; @@ -14656,14 +16123,13 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) #endif } - - lck_mtx_unlock(&p->p_dtrace_sprlock); - } else { + dtrace_sprunlock(p); + } else { rval = EACCES; } lck_rw_unlock_shared(&dtrace_dof_mode_lock); - + return rval; } @@ -14671,20 +16137,14 @@ void dtrace_lazy_dofs_destroy(proc_t *p) { lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_exit.c and kern_exec.c. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON || p->p_lflag & P_LEXIT); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; p->p_dtrace_lazy_dofs = NULL; - lck_mtx_unlock(&p->p_dtrace_sprlock); + dtrace_sprunlock(p); lck_rw_unlock_shared(&dtrace_dof_mode_lock); if (lazy_dofs) { @@ -14692,47 +16152,6 @@ dtrace_lazy_dofs_destroy(proc_t *p) } } -void -dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) -{ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - - lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&parent->p_dtrace_sprlock); - - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_fork.c - */ - ASSERT(parent->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); - ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); - /* - * In theory we should hold the child sprlock, but this is safe... - */ - ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); - - dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; - dof_ioctl_data_t* child_dofs = NULL; - if (parent_dofs) { - size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); - child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); - bcopy(parent_dofs, child_dofs, parent_dofs_size); - } - - lck_mtx_unlock(&parent->p_dtrace_sprlock); - - if (child_dofs) { - lck_mtx_lock(&child->p_dtrace_sprlock); - child->p_dtrace_lazy_dofs = child_dofs; - lck_mtx_unlock(&child->p_dtrace_sprlock); - } - - lck_rw_unlock_shared(&dtrace_dof_mode_lock); -} - static int dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) { @@ -14743,29 +16162,24 @@ dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) return p->p_dtrace_lazy_dofs != NULL; } -static int -dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) -{ -#pragma unused(ignored) +static void +dtrace_lazy_dofs_process(proc_t *p) { /* * It is possible this process may exit during our attempt to * fault in the dof. We could fix this by holding locks longer, * but the errors are benign. */ - lck_mtx_lock(&p->p_dtrace_sprlock); + dtrace_sprlock(p); + - /* - * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF - */ ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF); - dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; p->p_dtrace_lazy_dofs = NULL; - lck_mtx_unlock(&p->p_dtrace_sprlock); - + dtrace_sprunlock(p); + lck_mtx_lock(&dtrace_meta_lock); /* * Process each dof_helper_t */ @@ -14788,7 +16202,7 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) dhp->dofhp_dof = dhp->dofhp_addr; dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval); - + if (dof != NULL) { dtrace_helpers_t *help; @@ -14820,11 +16234,76 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) lck_mtx_unlock(&dtrace_lock); } } + lck_mtx_unlock(&dtrace_meta_lock); + kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); + } else { + lck_mtx_unlock(&dtrace_meta_lock); + } +} + +static int +dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) +{ +#pragma unused(ignored) + + dtrace_lazy_dofs_process(p); + + return PROC_RETURNED; +} + +#define DTRACE_LAZY_DOFS_DUPLICATED 1 + +static int +dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) +{ + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + + lck_rw_lock_shared(&dtrace_dof_mode_lock); + dtrace_sprlock(parent); + + /* + * We need to make sure that the transition to lazy dofs -> helpers + * was atomic for our parent + */ + ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); + /* + * In theory we should hold the child sprlock, but this is safe... + */ + ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); + + dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; + dof_ioctl_data_t* child_dofs = NULL; + if (parent_dofs) { + size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); + child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); + bcopy(parent_dofs, child_dofs, parent_dofs_size); + } + + dtrace_sprunlock(parent); + + if (child_dofs) { + dtrace_sprlock(child); + child->p_dtrace_lazy_dofs = child_dofs; + dtrace_sprunlock(child); + /** + * We process the DOF at this point if the mode is set to + * LAZY_OFF. This can happen if DTrace is still processing the + * DOF of other process (which can happen because the + * protected pager can have a huge latency) + * but has not processed our parent yet + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_lazy_dofs_process(child); + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); - kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); + return DTRACE_LAZY_DOFS_DUPLICATED; } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); - return PROC_RETURNED; + return 0; } static dtrace_helpers_t * @@ -14832,7 +16311,7 @@ dtrace_helpers_create(proc_t *p) { dtrace_helpers_t *help; - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(p->p_dtrace_helpers == NULL); help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP); @@ -14852,6 +16331,7 @@ dtrace_helpers_destroy(proc_t* p) dtrace_vstate_t *vstate; uint_t i; + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); ASSERT(p->p_dtrace_helpers != NULL); @@ -14885,13 +16365,12 @@ dtrace_helpers_destroy(proc_t* p) * Destroy the helper providers. */ if (help->dthps_maxprovs > 0) { - lck_mtx_lock(&dtrace_meta_lock); if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_remove( - &help->dthps_provs[i]->dthp_prov, p->p_pid); + &help->dthps_provs[i]->dthp_prov, p); } } else { lck_mtx_lock(&dtrace_lock); @@ -14915,7 +16394,6 @@ dtrace_helpers_destroy(proc_t* p) lck_mtx_unlock(&dtrace_lock); } - lck_mtx_unlock(&dtrace_meta_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_destroy(help->dthps_provs[i]); @@ -14934,6 +16412,7 @@ dtrace_helpers_destroy(proc_t* p) --dtrace_helpers; lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } static void @@ -14946,6 +16425,7 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) uint_t i; int j, sz, hasprovs = 0; + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); ASSERT(dtrace_helpers > 0); @@ -15017,6 +16497,150 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) if (hasprovs) dtrace_helper_provider_register(to, newhelp, NULL); + + lck_mtx_unlock(&dtrace_meta_lock); +} + +/** + * DTrace Process functions + */ + +void +dtrace_proc_fork(proc_t *parent_proc, proc_t *child_proc, int spawn) +{ + /* + * This code applies to new processes who are copying the task + * and thread state and address spaces of their parent process. + */ + if (!spawn) { + /* + * APPLE NOTE: Solaris does a sprlock() and drops the + * proc_lock here. We're cheating a bit and only taking + * the p_dtrace_sprlock lock. A full sprlock would + * task_suspend the parent. + */ + dtrace_sprlock(parent_proc); + + /* + * Remove all DTrace tracepoints from the child process. We + * need to do this _before_ duplicating USDT providers since + * any associated probes may be immediately enabled. + */ + if (parent_proc->p_dtrace_count > 0) { + dtrace_fasttrap_fork(parent_proc, child_proc); + } + + dtrace_sprunlock(parent_proc); + + /* + * Duplicate any lazy dof(s). This must be done while NOT + * holding the parent sprlock! Lock ordering is + * dtrace_dof_mode_lock, then sprlock. It is imperative we + * always call dtrace_lazy_dofs_duplicate, rather than null + * check and call if !NULL. If we NULL test, during lazy dof + * faulting we can race with the faulting code and proceed + * from here to beyond the helpers copy. The lazy dof + * faulting will then fail to copy the helpers to the child + * process. We return if we duplicated lazy dofs as a process + * can only have one at the same time to avoid a race between + * a dtrace client and dtrace_proc_fork where a process would + * end up with both lazy dofs and helpers. + */ + if (dtrace_lazy_dofs_duplicate(parent_proc, child_proc) == DTRACE_LAZY_DOFS_DUPLICATED) { + return; + } + + /* + * Duplicate any helper actions and providers if they haven't + * already. + */ +#if !defined(__APPLE__) + /* + * The SFORKING + * we set above informs the code to enable USDT probes that + * sprlock() may fail because the child is being forked. + */ +#endif + /* + * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent + * never fails to find the child. We do not set SFORKING. + */ + if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { + (*dtrace_helpers_fork)(parent_proc, child_proc); + } + } +} + +void +dtrace_proc_exec(proc_t *p) +{ + /* + * Invalidate any predicate evaluation already cached for this thread by DTrace. + * That's because we've just stored to p_comm and DTrace refers to that when it + * evaluates the "execname" special variable. uid and gid may have changed as well. + */ + dtrace_set_thread_predcache(current_thread(), 0); + + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which no longer belong to this process! + */ + dtrace_lazy_dofs_destroy(p); + + + /* + * Clean up any DTrace helpers for the process. + */ + if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Cleanup the DTrace provider associated with this process. + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) { + (*dtrace_fasttrap_exec_ptr)(p); + } + proc_unlock(p); +} + +void +dtrace_proc_exit(proc_t *p) +{ + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which will never be cleaned up, and leak. + */ + dtrace_lazy_dofs_destroy(p); + + /* + * Clean up any DTrace helper actions or probes for the process. + */ + if (p->p_dtrace_helpers != NULL) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Clean up any DTrace probes associated with this process. + */ + /* + * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(), + * call this after dtrace_helpers_cleanup() + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) { + (*dtrace_fasttrap_exit_ptr)(p); + } + proc_unlock(p); } /* @@ -15036,7 +16660,7 @@ dtrace_modctl_add(struct modctl * newctl) struct modctl *nextp, *prevp; ASSERT(newctl != NULL); - lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); // Insert new module at the front of the list, @@ -15082,7 +16706,7 @@ dtrace_modctl_add(struct modctl * newctl) static modctl_t * dtrace_modctl_lookup(struct kmod_info * kmod) { - lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); struct modctl * ctl; @@ -15102,7 +16726,7 @@ static void dtrace_modctl_remove(struct modctl * ctl) { ASSERT(ctl != NULL); - lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED); modctl_t *prevp, *nextp, *curp; // Remove stale chain first @@ -15196,7 +16820,7 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) ctl->mod_loaded = 1; ctl->mod_flags = 0; ctl->mod_user_symbols = NULL; - + /* * Find the UUID for this module, if it has one */ @@ -15215,6 +16839,15 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) if (ctl->mod_address == g_kernel_kmod_info.address) { ctl->mod_flags |= MODCTL_IS_MACH_KERNEL; + memcpy(dtrace_kerneluuid, ctl->mod_uuid, sizeof(dtrace_kerneluuid)); + } + /* + * Static kexts have a UUID that is not used for symbolication, as all their + * symbols are in kernel + */ + else if ((flag & KMOD_DTRACE_STATIC_KEXT) == KMOD_DTRACE_STATIC_KEXT) { + memcpy(ctl->mod_uuid, dtrace_kerneluuid, sizeof(dtrace_kerneluuid)); + ctl->mod_flags |= MODCTL_IS_STATIC_KEXT; } } dtrace_modctl_add(ctl); @@ -15255,7 +16888,9 @@ dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) } /* We will instrument the module immediately using kernel symbols */ - ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS; + if (!(flag & KMOD_DTRACE_NO_KERNEL_SYMS)) { + ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS; + } lck_mtx_unlock(&dtrace_lock); @@ -15423,6 +17058,7 @@ syncloop: probe->dtpr_provider->dtpv_probe_count--; next = probe->dtpr_nextmod; + dtrace_hash_remove(dtrace_byprov, probe); dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); dtrace_hash_remove(dtrace_byname, probe); @@ -15448,9 +17084,9 @@ syncloop: prov = probe->dtpr_provider; prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg); - kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); - kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); - kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); + dtrace_strunref(probe->dtpr_mod); + dtrace_strunref(probe->dtpr_func); + dtrace_strunref(probe->dtpr_name); vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); zfree(dtrace_probe_t_zone, probe); @@ -15483,7 +17119,7 @@ dtrace_resume(void) static int dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu) { - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_lock(&dtrace_lock); switch (what) { @@ -15587,9 +17223,8 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) */ /*ARGSUSED*/ static int -dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) +dtrace_attach(dev_info_t *devi) { -#pragma unused(cmd) /* __APPLE__ */ dtrace_provider_id_t id; dtrace_state_t *state = NULL; dtrace_enabling_t *enab; @@ -15598,18 +17233,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); - if (ddi_soft_state_init(&dtrace_softstate, - sizeof (dtrace_state_t), 0) != 0) { - cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&cpu_lock); - return (DDI_FAILURE); - } - /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ - - ddi_report_dev(devi); dtrace_devi = devi; dtrace_modload = dtrace_module_loaded; @@ -15624,30 +17248,38 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); - dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, + dtrace_arena = vmem_create("dtrace", (void *)1, INT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); - dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, - UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, - VM_SLEEP | VMC_IDENTIFIER); - dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, - 1, INT_MAX, 0); dtrace_state_cache = kmem_cache_create("dtrace_state_cache", sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod), + LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); + + dtrace_nprobes = dtrace_nprobes_default; + dtrace_probes = kmem_zalloc(sizeof(dtrace_probe_t*) * dtrace_nprobes, + KM_SLEEP); + + dtrace_byprov = dtrace_hash_create(dtrace_strkey_probe_provider, + 0, /* unused */ + offsetof(dtrace_probe_t, dtpr_nextprov), + offsetof(dtrace_probe_t, dtpr_prevprov)); + + dtrace_bymod = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_mod), offsetof(dtrace_probe_t, dtpr_nextmod), offsetof(dtrace_probe_t, dtpr_prevmod)); - dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func), + dtrace_byfunc = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_func), offsetof(dtrace_probe_t, dtpr_nextfunc), offsetof(dtrace_probe_t, dtpr_prevfunc)); - dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name), + dtrace_byname = dtrace_hash_create(dtrace_strkey_deref_offset, + offsetof(dtrace_probe_t, dtpr_name), offsetof(dtrace_probe_t, dtpr_nextname), offsetof(dtrace_probe_t, dtpr_prevname)); @@ -15682,6 +17314,13 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_provider, NULL, NULL, "END", 0, NULL); dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "ERROR", 3, NULL); +#elif (defined(__arm__) || defined(__arm64__)) + dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "BEGIN", 2, NULL); + dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "END", 1, NULL); + dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) + dtrace_provider, NULL, NULL, "ERROR", 4, NULL); #else #error Unknown Architecture #endif @@ -15735,7 +17374,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_lock); if ((enab = dtrace_anon.dta_enabling) != NULL) - (void) dtrace_enabling_match(enab, NULL); + (void) dtrace_enabling_match(enab, NULL, NULL); lck_mtx_unlock(&cpu_lock); } @@ -15792,6 +17431,7 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) dtrace_opens++; dtrace_membar_producer(); +#ifdef illumos /* * If the kernel debugger is active (that is, if the kernel debugger * modified text in some way), we won't allow the open. @@ -15802,13 +17442,17 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) lck_mtx_unlock(&cpu_lock); return (EBUSY); } +#endif rv = dtrace_state_create(devp, cred_p, &state); lck_mtx_unlock(&cpu_lock); if (rv != 0 || state == NULL) { - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) { +#ifdef illumos (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); +#endif + } lck_mtx_unlock(&dtrace_lock); /* propagate EAGAIN or ERESTART */ return (rv); @@ -15827,7 +17471,16 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; - + /* + * We do not need to hold the exclusive lock while processing + * DOF on processes. We do need to make sure the mode does not get + * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though + * (which should not happen anyway since it only happens in + * dtrace_close). There is no way imcomplete USDT probes can be + * activate by any DTrace clients here since they all have to + * call dtrace_open and be blocked on dtrace_dof_mode_lock + */ + lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock); /* * Iterate all existing processes and load lazy dofs. */ @@ -15836,9 +17489,13 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) NULL, dtrace_lazy_dofs_proc_iterate_filter, NULL); + + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + } + else { + lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); } - lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); /* * Update kernel symbol state. @@ -15873,8 +17530,7 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_t *state; /* APPLE NOTE: Darwin puts Helper on its own major device. */ - - state = ddi_get_soft_state(dtrace_softstate, minor); + state = dtrace_state_get(minor); lck_mtx_lock(&cpu_lock); lck_mtx_lock(&dtrace_lock); @@ -15894,9 +17550,12 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) * Only relinquish control of the kernel debugger interface when there * are no consumers and no anonymous enablings. */ - if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) { +#ifdef illumos (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); - +#endif + } + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); @@ -15950,7 +17609,12 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) return KERN_SUCCESS; switch (cmd) { +#if defined (__arm64__) + case DTRACEHIOC_ADDDOF_U32: + case DTRACEHIOC_ADDDOF_U64: +#else case DTRACEHIOC_ADDDOF: +#endif /* __arm64__*/ { dof_helper_t *dhp = NULL; size_t dof_ioctl_data_size; @@ -15962,6 +17626,16 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) int multi_dof_claimed = 0; proc_t* p = current_proc(); + /* + * If this is a restricted process and dtrace is restricted, + * do not allow DOFs to be registered + */ + if (dtrace_is_restricted() && + !dtrace_are_restrictions_relaxed() && + !dtrace_can_attach_to_proc(current_proc())) { + return (EACCES); + } + /* * Read the number of DOF sections being passed in. */ @@ -15971,7 +17645,7 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) dtrace_dof_error(NULL, "failed to copyin dofiod_count"); return (EFAULT); } - + /* * Range check the count. */ @@ -16022,6 +17696,7 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval); if (dof != NULL) { + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); /* @@ -16033,6 +17708,7 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) } lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } } while (++i < multi_dof->dofiod_count && rval == 0); } @@ -16073,9 +17749,11 @@ dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) * EACCES means non-lazy */ if (rval == EACCES) { + lck_mtx_lock(&dtrace_meta_lock); lck_mtx_lock(&dtrace_lock); rval = dtrace_helper_destroygen(p, generation); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_meta_lock); } return (rval); @@ -16099,7 +17777,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv /* Darwin puts Helper on its own major device. */ - state = ddi_get_soft_state(dtrace_softstate, minor); + state = dtrace_state_get(minor); if (state->dts_anon) { ASSERT(dtrace_anon.dta_state == NULL); @@ -16359,7 +18037,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv return (rval); } - if ((err = dtrace_enabling_match(enab, rv)) == 0) { + if ((err = dtrace_enabling_match(enab, rv, NULL)) == 0) { err = dtrace_enabling_retain(enab); } else { dtrace_enabling_destroy(enab); @@ -16428,17 +18106,15 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv desc.dtpd_id++; } - if (cmd == DTRACEIOC_PROBEMATCH) { - dtrace_probekey(&desc, &pkey); - pkey.dtpk_id = DTRACE_IDNONE; - } - dtrace_cred2priv(cr, &priv, &uid, &zoneid); lck_mtx_lock(&dtrace_lock); - if (cmd == DTRACEIOC_PROBEMATCH) { - /* Quiet compiler warning */ + if (cmd == DTRACEIOC_PROBEMATCH) { + dtrace_probekey(&desc, &pkey); + pkey.dtpk_id = DTRACE_IDNONE; + + /* Quiet compiler warning */ for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && (m = dtrace_match_probe(probe, &pkey, @@ -16450,6 +18126,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv lck_mtx_unlock(&dtrace_lock); return (EINVAL); } + dtrace_probekey_release(&pkey); } else { /* Quiet compiler warning */ @@ -16582,10 +18259,45 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv return (rval == 0 ? 0 : EFAULT); } + case DTRACEIOC_SLEEP: { + int64_t time; + uint64_t abstime; + uint64_t rvalue = DTRACE_WAKE_TIMEOUT; + + if (copyin(arg, &time, sizeof(time)) != 0) + return (EFAULT); + + nanoseconds_to_absolutetime((uint64_t)time, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + + if (assert_wait_deadline(state, THREAD_ABORTSAFE, abstime) == THREAD_WAITING) { + if (state->dts_buf_over_limit > 0) { + clear_wait(current_thread(), THREAD_INTERRUPTED); + rvalue = DTRACE_WAKE_BUF_LIMIT; + } else { + thread_block(THREAD_CONTINUE_NULL); + if (state->dts_buf_over_limit > 0) { + rvalue = DTRACE_WAKE_BUF_LIMIT; + } + } + } + + if (copyout(&rvalue, arg, sizeof(rvalue)) != 0) + return (EFAULT); + + return (0); + } + + case DTRACEIOC_SIGNAL: { + wakeup(state); + return (0); + } + case DTRACEIOC_AGGSNAP: case DTRACEIOC_BUFSNAP: { dtrace_bufdesc_t desc; caddr_t cached; + boolean_t over_limit; dtrace_buffer_t *buf; if (copyin(arg, &desc, sizeof (desc)) != 0) @@ -16667,6 +18379,8 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv } cached = buf->dtb_tomax; + over_limit = buf->dtb_cur_limit == buf->dtb_size; + ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); dtrace_xcall(desc.dtbd_cpu, @@ -16687,11 +18401,28 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv } ASSERT(cached == buf->dtb_xamot); + /* + * At this point we know the buffer have switched, so we + * can decrement the over limit count if the buffer was over + * its limit. The new buffer might already be over its limit + * yet, but we don't care since we're guaranteed not to be + * checking the buffer over limit count at this point. + */ + if (over_limit) { + uint32_t old = os_atomic_dec_orig(&state->dts_buf_over_limit, relaxed); + #pragma unused(old) + + /* + * Verify that we didn't underflow the value + */ + ASSERT(old != 0); + } /* * We have our snapshot; now copy it out. */ - if (copyout(buf->dtb_xamot, (user_addr_t)desc.dtbd_data, + if (dtrace_buffer_copyout(buf->dtb_xamot, + (user_addr_t)desc.dtbd_data, buf->dtb_xamot_offset) != 0) { lck_mtx_unlock(&dtrace_lock); return (EFAULT); @@ -16820,7 +18551,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * and that the format for the specified index is non-NULL. */ ASSERT(state->dts_formats != NULL); - str = state->dts_formats[fmt.dtfd_format - 1]; + str = state->dts_formats[fmt.dtfd_format - 1]->dtf_str; ASSERT(str != NULL); len = strlen(str) + 1; @@ -16852,7 +18583,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * Security restrictions make this operation illegal, if this is enabled DTrace * must refuse to provide any fbt probes. */ - if (dtrace_is_restricted()) { + if (dtrace_fbt_probes_restricted()) { cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); return (EPERM); } @@ -16902,12 +18633,8 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv lck_mtx_lock(&mod_lock); struct modctl* ctl = dtrace_modctl_list; while (ctl) { - /* Update the private probes bit */ - if (dtrace_provide_private_probes) - ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; - ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); - if (!MOD_SYMBOLS_DONE(ctl)) { + if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) { dtmul_count++; rval = EINVAL; } @@ -16953,17 +18680,13 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv struct modctl* ctl = dtrace_modctl_list; while (ctl) { - /* Update the private probes bit */ - if (dtrace_provide_private_probes) - ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; - /* * We assume that userspace symbols will be "better" than kernel level symbols, * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms * are available, add user syms if the module might use them. */ ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); - if (!MOD_SYMBOLS_DONE(ctl)) { + if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) { UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count]; if (dtmul_count++ < uuids_list->dtmul_count) { memcpy(uuid, ctl->mod_uuid, sizeof(UUID)); @@ -17007,7 +18730,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * Security restrictions make this operation illegal, if this is enabled DTrace * must refuse to provide any fbt probes. */ - if (dtrace_is_restricted()) { + if (dtrace_fbt_probes_restricted()) { cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); return (EPERM); } @@ -17037,7 +18760,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * Range check the count. How much data can we pass around? * FIX ME! */ - if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) { + if (dtmodsyms_count == 0) { cmn_err(CE_WARN, "dtmodsyms_count is not valid"); return (EINVAL); } @@ -17046,6 +18769,12 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv * Allocate a correctly sized structure and copyin the data. */ module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count); + if (module_symbols_size > (size_t)dtrace_copy_maxsize()) { + size_t dtmodsyms_max = DTRACE_MODULE_SYMBOLS_COUNT(dtrace_copy_maxsize()); + cmn_err(CE_WARN, "dtmodsyms_count %ld is too high, maximum is %ld", dtmodsyms_count, dtmodsyms_max); + return (ENOBUFS); + } + if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL) return (ENOMEM); @@ -17053,7 +18782,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv /* NOTE! We can no longer exit this method via return */ if (copyin(arg, module_symbols, module_symbols_size) != 0) { - cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t, symbol count %llu", module_symbols->dtmodsyms_count); + cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t"); rval = EFAULT; goto module_symbols_cleanup; } @@ -17074,37 +18803,25 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv struct modctl* ctl = dtrace_modctl_list; while (ctl) { - /* Update the private probes bit */ - if (dtrace_provide_private_probes) - ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; - ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); - if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) { - if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { - /* BINGO! */ - ctl->mod_user_symbols = module_symbols; - break; - } + if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl) && memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { + dtrace_provider_t *prv; + ctl->mod_user_symbols = module_symbols; + + /* + * We're going to call each providers per-module provide operation + * specifying only this module. + */ + for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + /* + * We gave every provider a chance to provide with the user syms, go ahead and clear them + */ + ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ } ctl = ctl->mod_next; } - if (ctl) { - dtrace_provider_t *prv; - - /* - * We're going to call each providers per-module provide operation - * specifying only this module. - */ - for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - - /* - * We gave every provider a chance to provide with the user syms, go ahead and clear them - */ - ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ - } - lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); @@ -17121,7 +18838,7 @@ dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv case DTRACEIOC_PROCWAITFOR: { dtrace_procdesc_t pdesc = { - .p_comm = {0}, + .p_name = {0}, .p_pid = -1 }; @@ -17240,15 +18957,18 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) dtrace_probes = NULL; dtrace_nprobes = 0; + dtrace_hash_destroy(dtrace_strings); + dtrace_hash_destroy(dtrace_byprov); dtrace_hash_destroy(dtrace_bymod); dtrace_hash_destroy(dtrace_byfunc); dtrace_hash_destroy(dtrace_byname); + dtrace_strings = NULL; + dtrace_byprov = NULL; dtrace_bymod = NULL; dtrace_byfunc = NULL; dtrace_byname = NULL; kmem_cache_destroy(dtrace_state_cache); - vmem_destroy(dtrace_minor); vmem_destroy(dtrace_arena); if (dtrace_toxrange != NULL) { @@ -17271,6 +18991,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); +#ifdef illumos /* * We don't destroy the task queue until after we have dropped our * locks (taskq_destroy() may block on running tasks). To prevent @@ -17281,6 +19002,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) */ taskq_destroy(dtrace_taskq); dtrace_taskq = NULL; +#endif return (DDI_SUCCESS); } @@ -17365,26 +19087,20 @@ helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) #define HELPER_MAJOR -24 /* let the kernel pick the device number */ -/* - * A struct describing which functions will get invoked for certain - * actions. - */ -static struct cdevsw helper_cdevsw = -{ - helper_open, /* open */ - helper_close, /* close */ - eno_rdwrt, /* read */ - eno_rdwrt, /* write */ - helper_ioctl, /* ioctl */ - (stop_fcn_t *)nulldev, /* stop */ - (reset_fcn_t *)nulldev, /* reset */ - NULL, /* tty's */ - eno_select, /* select */ - eno_mmap, /* mmap */ - eno_strat, /* strategy */ - eno_getc, /* getc */ - eno_putc, /* putc */ - 0 /* type */ +const static struct cdevsw helper_cdevsw = +{ + .d_open = helper_open, + .d_close = helper_close, + .d_read = eno_rdwrt, + .d_write = eno_rdwrt, + .d_ioctl = helper_ioctl, + .d_stop = (stop_fcn_t *)nulldev, + .d_reset = (reset_fcn_t *)nulldev, + .d_select = eno_select, + .d_mmap = eno_mmap, + .d_strategy = eno_strat, + .d_reserved_1 = eno_getc, + .d_reserved_2 = eno_putc, }; static int helper_majdevno = 0; @@ -17424,28 +19140,13 @@ helper_init( void ) #undef HELPER_MAJOR -/* - * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected. - */ static int dtrace_clone_func(dev_t dev, int action) { #pragma unused(dev) if (action == DEVFS_CLONE_ALLOC) { - if (NULL == dtrace_minor) /* Arena not created yet!?! */ - return 0; - else { - /* - * Propose a minor number, namely the next number that vmem_alloc() will return. - * Immediately put it back in play by calling vmem_free(). FIXME. - */ - int ret = (int)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); - - vmem_free(dtrace_minor, (void *)(uintptr_t)ret, 1); - - return ret; - } + return dtrace_state_reserve(); } else if (action == DEVFS_CLONE_FREE) { return 0; @@ -17453,24 +19154,50 @@ dtrace_clone_func(dev_t dev, int action) else return -1; } +void dtrace_ast(void); + +void +dtrace_ast(void) +{ + int i; + uint32_t clients = os_atomic_xchg(&dtrace_wake_clients, 0, relaxed); + if (clients == 0) + return; + /** + * We disable preemption here to be sure that we won't get + * interrupted by a wakeup to a thread that is higher + * priority than us, so that we do issue all wakeups + */ + disable_preemption(); + for (i = 0; i < DTRACE_NCLIENTS; i++) { + if (clients & (1 << i)) { + dtrace_state_t *state = dtrace_state_get(i); + if (state) { + wakeup(state); + } + + } + } + enable_preemption(); +} + + #define DTRACE_MAJOR -24 /* let the kernel pick the device number */ -static struct cdevsw dtrace_cdevsw = -{ - _dtrace_open, /* open */ - _dtrace_close, /* close */ - eno_rdwrt, /* read */ - eno_rdwrt, /* write */ - _dtrace_ioctl, /* ioctl */ - (stop_fcn_t *)nulldev, /* stop */ - (reset_fcn_t *)nulldev, /* reset */ - NULL, /* tty's */ - eno_select, /* select */ - eno_mmap, /* mmap */ - eno_strat, /* strategy */ - eno_getc, /* getc */ - eno_putc, /* putc */ - 0 /* type */ +static const struct cdevsw dtrace_cdevsw = +{ + .d_open = _dtrace_open, + .d_close = _dtrace_close, + .d_read = eno_rdwrt, + .d_write = eno_rdwrt, + .d_ioctl = _dtrace_ioctl, + .d_stop = (stop_fcn_t *)nulldev, + .d_reset = (reset_fcn_t *)nulldev, + .d_select = eno_select, + .d_mmap = eno_mmap, + .d_strategy = eno_strat, + .d_reserved_1 = eno_getc, + .d_reserved_2 = eno_putc, }; lck_attr_t* dtrace_lck_attr; @@ -17479,20 +19206,40 @@ lck_grp_t* dtrace_lck_grp; static int gMajDevNo; +void dtrace_early_init (void) +{ + dtrace_restriction_policy_load(); + + /* + * See dtrace_impl.h for a description of kernel symbol modes. + * The default is to wait for symbols from userspace (lazy symbols). + */ + if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } +} + void dtrace_init( void ) { if (0 == gDTraceInited) { - int i, ncpu; + unsigned int i, ncpu; size_t size = sizeof(dtrace_buffer_memory_maxsize); + /* + * Disable destructive actions when dtrace is running + * in a restricted environment + */ + dtrace_destructive_disallow = dtrace_is_restricted() && + !dtrace_are_restrictions_relaxed(); + /* * DTrace allocates buffers based on the maximum number * of enabled cpus. This call avoids any race when finding * that count. */ ASSERT(dtrace_max_cpus == 0); - ncpu = dtrace_max_cpus = ml_get_max_cpus(); + ncpu = dtrace_max_cpus = ml_wait_max_cpus(); /* * Retrieve the size of the physical memory in order to define @@ -17530,26 +19277,11 @@ dtrace_init( void ) return; } -#if defined(DTRACE_MEMORY_ZONES) - /* - * Initialize the dtrace kalloc-emulation zones. - */ - dtrace_alloc_init(); -#endif /* DTRACE_MEMORY_ZONES */ - - /* - * Allocate the dtrace_probe_t zone - */ - dtrace_probe_t_zone = zinit(sizeof(dtrace_probe_t), - 1024 * sizeof(dtrace_probe_t), - sizeof(dtrace_probe_t), - "dtrace.dtrace_probe_t"); - /* * Create the dtrace lock group and attrs. */ dtrace_lck_attr = lck_attr_alloc_init(); - dtrace_lck_grp_attr= lck_grp_attr_alloc_init(); + dtrace_lck_grp_attr= lck_grp_attr_alloc_init(); dtrace_lck_grp = lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr); /* @@ -17597,14 +19329,18 @@ dtrace_init( void ) lck_mtx_lock(&cpu_lock); for (i = 0; i < ncpu; ++i) - /* FIXME: track CPU configuration a la CHUD Processor Pref Pane. */ + /* FIXME: track CPU configuration */ dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */ lck_mtx_unlock(&cpu_lock); (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ + dtrace_strings = dtrace_hash_create(dtrace_strkey_offset, + offsetof(dtrace_string_t, dtst_str), + offsetof(dtrace_string_t, dtst_next), + offsetof(dtrace_string_t, dtst_prev)); + dtrace_isa_init(); - /* * See dtrace_impl.h for a description of dof modes. * The default is lazy dof. @@ -17613,7 +19349,11 @@ dtrace_init( void ) * makes no sense... */ if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) { +#if defined(XNU_TARGET_OS_OSX) dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; +#else + dtrace_dof_mode = DTRACE_DOF_MODE_NEVER; +#endif } /* @@ -17638,14 +19378,11 @@ dtrace_init( void ) break; } - /* - * See dtrace_impl.h for a description of kernel symbol modes. - * The default is to wait for symbols from userspace (lazy symbols). - */ - if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { - dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; - } - +#if CONFIG_DTRACE + if (dtrace_dof_mode != DTRACE_DOF_MODE_NEVER) + commpage_update_dof(true); +#endif + gDTraceInited = 1; } else @@ -17660,7 +19397,7 @@ dtrace_postinit(void) * run. That way, anonymous DOF enabled under dtrace_attach() is safe * to go. */ - dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */ + dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0)); /* Punning a dev_t to a dev_info_t* */ /* * Add the mach_kernel to the module list for lazy processing