X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/935ed37a5c468c8a1c07408573c08b8b7ef80e8b..d190cdc3f5544636abb56dc1874be391d3e1b148:/bsd/dev/dtrace/dtrace.c diff --git a/bsd/dev/dtrace/dtrace.c b/bsd/dev/dtrace/dtrace.c index c28ae6f0b..30d41336c 100644 --- a/bsd/dev/dtrace/dtrace.c +++ b/bsd/dev/dtrace/dtrace.c @@ -20,11 +20,16 @@ */ /* - * Copyright 2006 Sun Microsystems, Inc. All rights reserved. + * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved. + * Portions Copyright (c) 2013 by Delphix. All rights reserved. + */ + +/* + * Copyright 2009 Sun Microsystems, Inc. All rights reserved. * Use is subject to license terms. */ -/* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */ +/* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */ /* * DTrace - Dynamic Tracing for Solaris @@ -56,6 +61,7 @@ * - Enabling functions * - DOF functions * - Anonymous enabling functions + * - Process functions * - Consumer state functions * - Helper functions * - Hook functions @@ -65,9 +71,6 @@ * [Group] Functions", allowing one to find each block by searching forward * on capital-f functions. */ - -#define _DTRACE_WANT_PROC_GLUE_ 1 - #include #include #include @@ -75,6 +78,7 @@ #include #include #include +#include #include #include #include @@ -87,7 +91,23 @@ #include #include #include +#include #include +#include +#include +#include +#include +#include +#include + +#include +extern uint32_t pmap_find_phys(void *, uint64_t); +extern boolean_t pmap_valid_page(uint32_t); +extern void OSKextRegisterKextsWithDTrace(void); +extern kmod_info_t g_kernel_kmod_info; + +/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ +#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */ @@ -95,23 +115,21 @@ extern void dtrace_suspend(void); extern void dtrace_resume(void); extern void dtrace_init(void); extern void helper_init(void); +extern void fasttrap_init(void); -#if defined(__APPLE__) - -#include "../../../osfmk/chud/chud_dtrace.h" - -extern kern_return_t chudxnu_dtrace_callback - (uint64_t selector, uint64_t *args, uint32_t count); -#endif +static int dtrace_lazy_dofs_duplicate(proc_t *, proc_t *); +extern void dtrace_lazy_dofs_destroy(proc_t *); +extern void dtrace_postinit(void); +extern void dtrace_proc_fork(proc_t*, proc_t*, int); +extern void dtrace_proc_exec(proc_t*); +extern void dtrace_proc_exit(proc_t*); /* * DTrace Tunable Variables * - * The following variables may be tuned by adding a line to /etc/system that - * includes both the name of the DTrace module ("dtrace") and the name of the - * variable. For example: - * - * set dtrace:dtrace_destructive_disallow = 1 + * The following variables may be dynamically tuned by using sysctl(8), the + * variables being stored in the kern.dtrace namespace. For example: + * sysctl kern.dtrace.dof_maxsize = 1048575 # 1M * * In general, the only variables that one should be tuning this way are those * that affect system-wide DTrace behavior, and for which the default behavior @@ -120,26 +138,26 @@ extern kern_return_t chudxnu_dtrace_callback * When tuning these variables, avoid pathological values; while some attempt * is made to verify the integrity of these variables, they are not considered * part of the supported interface to DTrace, and they are therefore not - * checked comprehensively. Further, these variables should not be tuned - * dynamically via "mdb -kw" or other means; they should only be tuned via - * /etc/system. + * checked comprehensively. */ +uint64_t dtrace_buffer_memory_maxsize = 0; /* initialized in dtrace_init */ +uint64_t dtrace_buffer_memory_inuse = 0; int dtrace_destructive_disallow = 0; -#if defined(__APPLE__) -#define proc_t struct proc -#endif /* __APPLE__ */ dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024); size_t dtrace_difo_maxsize = (256 * 1024); -dtrace_optval_t dtrace_dof_maxsize = (256 * 1024); -size_t dtrace_global_maxsize = (16 * 1024); +dtrace_optval_t dtrace_dof_maxsize = (384 * 1024); +dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024); +dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024); size_t dtrace_actions_max = (16 * 1024); size_t dtrace_retain_max = 1024; dtrace_optval_t dtrace_helper_actions_max = 32; -dtrace_optval_t dtrace_helper_providers_max = 32; +dtrace_optval_t dtrace_helper_providers_max = 64; dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024); size_t dtrace_strsize_default = 256; -dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */ -dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */ +dtrace_optval_t dtrace_strsize_min = 8; +dtrace_optval_t dtrace_strsize_max = 65536; +dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */ +dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */ dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */ dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */ dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */ @@ -151,11 +169,15 @@ dtrace_optval_t dtrace_stackframes_default = 20; dtrace_optval_t dtrace_ustackframes_default = 20; dtrace_optval_t dtrace_jstackframes_default = 50; dtrace_optval_t dtrace_jstackstrsize_default = 512; +dtrace_optval_t dtrace_buflimit_default = 75; +dtrace_optval_t dtrace_buflimit_min = 1; +dtrace_optval_t dtrace_buflimit_max = 99; int dtrace_msgdsize_max = 128; hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */ hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */ int dtrace_devdepth_max = 32; int dtrace_err_verbose; +int dtrace_provide_private_probes = 0; hrtime_t dtrace_deadman_interval = NANOSEC; hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC; hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; @@ -170,13 +192,12 @@ hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC; * it is used by some translators as an implementation detail. */ const char dtrace_zero[256] = { 0 }; /* zero-filled memory */ - +unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */ /* * DTrace Internal Variables */ static dev_info_t *dtrace_devi; /* device info */ static vmem_t *dtrace_arena; /* probe ID arena */ -static vmem_t *dtrace_minor; /* minor number arena */ static taskq_t *dtrace_taskq; /* task queue */ static dtrace_probe_t **dtrace_probes; /* array of all probes */ static int dtrace_nprobes; /* number of probes */ @@ -184,7 +205,6 @@ static dtrace_provider_t *dtrace_provider; /* provider list */ static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */ static int dtrace_opens; /* number of opens */ static int dtrace_helpers; /* number of helpers */ -static void *dtrace_softstate; /* softstate pointer */ static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */ static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */ static dtrace_hash_t *dtrace_byname; /* probes hashed by name */ @@ -199,23 +219,29 @@ static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */ static dtrace_genid_t dtrace_probegen; /* current probe generation */ static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */ static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */ +static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */ static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */ -#if defined(__APPLE__) -static int dtrace_dof_mode; /* dof mode */ -#endif -#if defined(__APPLE__) +static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */ + + /* + * This does't quite fit as an internal variable, as it must be accessed in + * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either... + */ +int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */ +static uint32_t dtrace_wake_clients; + /* * To save memory, some common memory allocations are given a - * unique zone. In example, dtrace_probe_t is 72 bytes in size, + * unique zone. For example, dtrace_probe_t is 72 bytes in size, * which means it would fall into the kalloc.128 bucket. With * 20k elements allocated, the space saved is substantial. */ struct zone *dtrace_probe_t_zone; -#endif +static int dtrace_module_unloaded(struct kmod_info *kmod); /* * DTrace Locking @@ -248,12 +274,14 @@ struct zone *dtrace_probe_t_zone; * acquired _between_ dtrace_provider_lock and dtrace_lock. */ + /* * APPLE NOTE: * - * All kmutex_t vars have been changed to lck_mtx_t. - * Note that lck_mtx_t's require explicit initialization. + * For porting purposes, all kmutex_t vars have been changed + * to lck_mtx_t, which require explicit initialization. * + * kmutex_t becomes lck_mtx_t * mutex_enter() becomes lck_mtx_lock() * mutex_exit() becomes lck_mtx_unlock() * @@ -263,14 +291,11 @@ struct zone *dtrace_probe_t_zone; * becomes: * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); * - * Due to the number of these changes, they are not called out explicitly. */ static lck_mtx_t dtrace_lock; /* probe state lock */ static lck_mtx_t dtrace_provider_lock; /* provider state lock */ static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */ -#if defined(__APPLE__) static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */ -#endif /* * DTrace Provider Variables @@ -290,10 +315,16 @@ static void dtrace_nullop(void) {} +static int +dtrace_enable_nullop(void) +{ + return (0); +} + static dtrace_pops_t dtrace_provider_ops = { (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop, (void (*)(void *, struct modctl *))dtrace_nullop, - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, (void (*)(void *, dtrace_id_t, void *))dtrace_nullop, @@ -313,14 +344,15 @@ dtrace_id_t dtrace_probeid_error; /* special ERROR probe */ uint32_t dtrace_helptrace_next = 0; uint32_t dtrace_helptrace_nlocals; char *dtrace_helptrace_buffer; -int dtrace_helptrace_bufsize = 512 * 1024; +size_t dtrace_helptrace_bufsize = 512 * 1024; -#ifdef DEBUG +#if DEBUG int dtrace_helptrace_enabled = 1; #else int dtrace_helptrace_enabled = 0; #endif + /* * DTrace Error Hashing * @@ -330,7 +362,7 @@ int dtrace_helptrace_enabled = 0; * debugging problems in the DIF code generator or in DOF generation . The * error hash may be examined with the ::dtrace_errhash MDB dcmd. */ -#ifdef DEBUG +#if DEBUG static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ]; static const char *dtrace_errlast; static kthread_t *dtrace_errthread; @@ -360,9 +392,11 @@ static lck_mtx_t dtrace_errlock; #define DTRACE_AGGHASHSIZE_SLEW 17 +#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3) + /* * The key for a thread-local variable consists of the lower 61 bits of the - * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL. + * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL. * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never * equal to a variable identifier. This is necessary (but not sufficient) to * assure that global associative arrays never collide with thread-local @@ -375,79 +409,30 @@ static lck_mtx_t dtrace_errlock; * no way for a global variable key signature to match a thread-local key * signature. */ -#if !defined(__APPLE__) +#if defined (__x86_64__) +/* FIXME: two function calls!! */ #define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = 0; \ - uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \ - for (; actv; actv >>= 1) \ - intr++; \ + uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ + uint64_t thr = (uintptr_t)current_thread(); \ ASSERT(intr < (1 << 3)); \ - (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \ + (where) = ((thr + DIF_VARIABLE_MAX) & \ (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ } #else -#define DTRACE_TLS_THRKEY(where) { \ - uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \ - uint_t thr = (uint_t)current_thread(); \ - uint_t pid = (uint_t)proc_selfpid(); \ - ASSERT(intr < (1 << 3)); \ - (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \ - (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ -} -#endif /* __APPLE__ */ +#error Unknown architecture +#endif + +#define DT_BSWAP_8(x) ((x) & 0xff) +#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) +#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) +#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) + +#define DT_MASK_LO 0x00000000FFFFFFFFULL #define DTRACE_STORE(type, tomax, offset, what) \ *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); -#if !defined(__APPLE__) -#if !(defined(__i386__) || defined (__x86_64__)) -#define DTRACE_ALIGNCHECK(addr, size, flags) \ - if (addr & (size - 1)) { \ - *flags |= CPU_DTRACE_BADALIGN; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } -#else -#define DTRACE_ALIGNCHECK(addr, size, flags) -#endif -#define DTRACE_LOADFUNC(bits) \ -/*CSTYLED*/ \ -uint##bits##_t \ -dtrace_load##bits(uintptr_t addr) \ -{ \ - size_t size = bits / NBBY; \ - /*CSTYLED*/ \ - uint##bits##_t rval; \ - int i; \ - volatile uint16_t *flags = (volatile uint16_t *) \ - &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ - \ - DTRACE_ALIGNCHECK(addr, size, flags); \ - \ - for (i = 0; i < dtrace_toxranges; i++) { \ - if (addr >= dtrace_toxrange[i].dtt_limit) \ - continue; \ - \ - if (addr + size <= dtrace_toxrange[i].dtt_base) \ - continue; \ - \ - /* \ - * This address falls within a toxic region; return 0. \ - */ \ - *flags |= CPU_DTRACE_BADADDR; \ - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ - return (0); \ - } \ - \ - *flags |= CPU_DTRACE_NOFAULT; \ - /*CSTYLED*/ \ - rval = *((volatile uint##bits##_t *)addr); \ - *flags &= ~CPU_DTRACE_NOFAULT; \ - \ - return (rval); \ -} -#else #define DTRACE_ALIGNCHECK(addr, size, flags) \ if (addr & (MIN(size,4) - 1)) { \ *flags |= CPU_DTRACE_BADALIGN; \ @@ -455,11 +440,41 @@ dtrace_load##bits(uintptr_t addr) \ return (0); \ } -#define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" ); +#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \ +do { \ + if ((remp) != NULL) { \ + *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \ + } \ +} while (0) + + +/* + * Test whether a range of memory starting at testaddr of size testsz falls + * within the range of memory described by addr, sz. We take care to avoid + * problems with overflow and underflow of the unsigned quantities, and + * disallow all negative sizes. Ranges of size 0 are allowed. + */ +#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ + ((testaddr) - (baseaddr) < (basesz) && \ + (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ + (testaddr) + (testsz) >= (testaddr)) + +/* + * Test whether alloc_sz bytes will fit in the scratch region. We isolate + * alloc_sz on the righthand side of the comparison in order to avoid overflow + * or underflow in the comparison with it. This is simpler than the INRANGE + * check above, because we know that the dtms_scratch_ptr is valid in the + * range. Allocations of size zero are allowed. + */ +#define DTRACE_INSCRATCH(mstate, alloc_sz) \ + ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ + (mstate)->dtms_scratch_ptr >= (alloc_sz)) + +#define RECOVER_LABEL(bits) dtraceLoadRecover##bits: +#if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__)) #define DTRACE_LOADFUNC(bits) \ /*CSTYLED*/ \ -extern vm_offset_t dtraceLoadRecover##bits; \ uint##bits##_t dtrace_load##bits(uintptr_t addr); \ \ uint##bits##_t \ @@ -469,7 +484,6 @@ dtrace_load##bits(uintptr_t addr) \ /*CSTYLED*/ \ uint##bits##_t rval = 0; \ int i; \ - ppnum_t pp; \ volatile uint16_t *flags = (volatile uint16_t *) \ &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \ \ @@ -490,21 +504,23 @@ dtrace_load##bits(uintptr_t addr) \ return (0); \ } \ \ - pp = pmap_find_phys(kernel_pmap, addr); \ - \ - if (0 == pp || /* pmap_find_phys failed ? */ \ - !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \ + { \ + volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \ + *flags |= CPU_DTRACE_NOFAULT; \ + recover = dtrace_set_thread_recover(current_thread(), recover); \ + /*CSTYLED*/ \ + /* \ + * PR6394061 - avoid device memory that is unpredictably \ + * mapped and unmapped \ + */ \ + if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \ + rval = *((volatile uint##bits##_t *)addr); \ + else { \ *flags |= CPU_DTRACE_BADADDR; \ cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \ return (0); \ } \ \ - { \ - volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \ - *flags |= CPU_DTRACE_NOFAULT; \ - recover = dtrace_set_thread_recover(current_thread(), recover); \ - /*CSTYLED*/ \ - rval = *((volatile uint##bits##_t *)addr); \ RECOVER_LABEL(bits); \ (void)dtrace_set_thread_recover(current_thread(), recover); \ *flags &= ~CPU_DTRACE_NOFAULT; \ @@ -512,8 +528,9 @@ dtrace_load##bits(uintptr_t addr) \ \ return (rval); \ } -#endif /* __APPLE__ */ - +#else /* all other architectures */ +#error Unknown Architecture +#endif #ifdef __LP64__ #define dtrace_loadptr dtrace_load64 @@ -525,6 +542,7 @@ dtrace_load##bits(uintptr_t addr) \ #define DTRACE_DYNHASH_SINK 1 #define DTRACE_DYNHASH_VALID 2 +#define DTRACE_MATCH_FAIL -1 #define DTRACE_MATCH_NEXT 0 #define DTRACE_MATCH_DONE 1 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0') @@ -539,15 +557,19 @@ dtrace_load##bits(uintptr_t addr) \ ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \ ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \ ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \ + ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \ DTRACEFLT_UNKNOWN) #define DTRACEACT_ISSTRING(act) \ ((act)->dta_kind == DTRACEACT_DIFEXPR && \ (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) + +static size_t dtrace_strlen(const char *, size_t); static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id); static void dtrace_enabling_provide(dtrace_provider_t *); -static int dtrace_enabling_match(dtrace_enabling_t *, int *); +static int dtrace_enabling_match(dtrace_enabling_t *, int *, dtrace_match_cond_t *cond); +static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond); static void dtrace_enabling_matchall(void); static dtrace_state_t *dtrace_anon_grab(void); static uint64_t dtrace_helper(int, dtrace_mstate_t *, @@ -558,8 +580,233 @@ static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t, dtrace_state_t *, dtrace_mstate_t *); static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t, dtrace_optval_t); -static int dtrace_ecb_create_enable(dtrace_probe_t *, void *); +static int dtrace_ecb_create_enable(dtrace_probe_t *, void *, void *); static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); +static int dtrace_canload_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); +static int dtrace_canstore_remains(uint64_t, size_t, size_t *, + dtrace_mstate_t *, dtrace_vstate_t *); + + +/* + * DTrace sysctl handlers + * + * These declarations and functions are used for a deeper DTrace configuration. + * Most of them are not per-consumer basis and may impact the other DTrace + * consumers. Correctness may not be supported for all the variables, so you + * should be careful about what values you are using. + */ + +SYSCTL_DECL(_kern_dtrace); +SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "dtrace"); + +static int +sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int changed, error; + int value = *(int *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value != 0 && value != 1) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_err_verbose = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.err_verbose + * + * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld). + * Errors are reported when a DIFO or a DOF has been rejected by the kernel. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, err_verbose, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_err_verbose, 0, + sysctl_dtrace_err_verbose, "I", "dtrace error verbose"); + +static int +sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + uint64_t value = *(uint64_t *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= dtrace_buffer_memory_inuse) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_buffer_memory_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.buffer_memory_maxsize + * + * Set DTrace maximal size in bytes used by all the consumers' state buffers. By default + * the limit is PHYS_MEM / 3 for *all* consumers. Attempting to set a null, a negative value + * or a value <= to dtrace_buffer_memory_inuse will result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, buffer_memory_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_buffer_memory_maxsize, 0, + sysctl_dtrace_buffer_memory_maxsize, "Q", "dtrace state buffer memory maxsize"); + +/* + * kern.dtrace.buffer_memory_inuse + * + * Current state buffer memory used, in bytes, by all the DTrace consumers. + * This value is read-only. + */ +SYSCTL_QUAD(_kern_dtrace, OID_AUTO, buffer_memory_inuse, CTLFLAG_RD | CTLFLAG_LOCKED, + &dtrace_buffer_memory_inuse, "dtrace state buffer memory in-use"); + +static int +sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + size_t value = *(size_t*) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_difo_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.difo_maxsize + * + * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize + * to get the default value. Attempting to set a null or negative size will + * result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, difo_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_difo_maxsize, 0, + sysctl_dtrace_difo_maxsize, "Q", "dtrace difo maxsize"); + +static int +sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + dtrace_optval_t value = *(dtrace_optval_t *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_dof_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.dof_maxsize + * + * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to + * get the default value. Attempting to set a null or negative size will result + * in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_dof_maxsize, 0, + sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize"); + +static int +sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2, req) + int changed, error; + dtrace_optval_t value = *(dtrace_optval_t*) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, &changed); + if (error || !changed) + return (error); + + if (value <= 0) + return (ERANGE); + if (value > dtrace_statvar_maxsize_max) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_statvar_maxsize = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.global_maxsize + * + * Set the variable max size in bytes, check the definition of + * dtrace_statvar_maxsize to get the default value. Attempting to set a null, + * too high or negative size will result in a failure. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize, + CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_statvar_maxsize, 0, + sysctl_dtrace_statvar_maxsize, "Q", "dtrace statvar maxsize"); + +static int +sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS +{ +#pragma unused(oidp, arg2) + int error; + int value = *(int *) arg1; + + error = sysctl_io_number(req, value, sizeof(value), &value, NULL); + if (error) + return (error); + + if (value != 0 && value != 1) + return (ERANGE); + + lck_mtx_lock(&dtrace_lock); + dtrace_provide_private_probes = value; + lck_mtx_unlock(&dtrace_lock); + + return (0); +} + +/* + * kern.dtrace.provide_private_probes + * + * Set whether the providers must provide the private probes. This is + * mainly used by the FBT provider to request probes for the private/static + * symbols. + */ +SYSCTL_PROC(_kern_dtrace, OID_AUTO, provide_private_probes, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, + &dtrace_provide_private_probes, 0, + sysctl_dtrace_provide_private_probes, "I", "provider must provide the private probes"); /* * DTrace Probe Context Functions @@ -578,20 +825,11 @@ static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *); * for these functions, there will be a comment above the function reading * "Note: not called from probe context." */ -void -dtrace_panic(const char *format, ...) -{ - va_list alist; - - va_start(alist, format); - dtrace_vpanic(format, alist); - va_end(alist); -} int dtrace_assfail(const char *a, const char *f, int l) { - dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l); + panic("dtrace: assertion failed: %s, file: %s, line: %d", a, f, l); /* * We just need something here that even the most clever compiler @@ -664,20 +902,43 @@ dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate) } static int -dtrace_canstore_statvar(uint64_t addr, size_t sz, +dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain, dtrace_statvar_t **svars, int nsvars) { int i; + size_t maxglobalsize, maxlocalsize; + + maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t); + maxlocalsize = (maxglobalsize) * NCPU; + + if (nsvars == 0) + return (0); + for (i = 0; i < nsvars; i++) { dtrace_statvar_t *svar = svars[i]; + uint8_t scope; + size_t size; - if (svar == NULL || svar->dtsv_size == 0) + if (svar == NULL || (size = svar->dtsv_size) == 0) continue; - if (addr - svar->dtsv_data < svar->dtsv_size && - addr + sz <= svar->dtsv_data + svar->dtsv_size) + scope = svar->dtsv_var.dtdv_scope; + + /** + * We verify that our size is valid in the spirit of providing + * defense in depth: we want to prevent attackers from using + * DTrace to escalate an orthogonal kernel heap corruption bug + * into the ability to store to arbitrary locations in memory. + */ + VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) || + (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize)); + + if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) { + DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data, + svar->dtsv_size); return (1); + } } return (0); @@ -693,45 +954,250 @@ static int dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { - uintptr_t a; - size_t s; - + return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate)); +} +/* + * Implementation of dtrace_canstore which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ /* * First, check to see if the address is in scratch space... */ - a = mstate->dtms_scratch_base; - s = mstate->dtms_scratch_size; - - if (addr - a < s && addr + sz <= a + s) + if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base, + mstate->dtms_scratch_size)) { + DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base, + mstate->dtms_scratch_size); return (1); - + } /* * Now check to see if it's a dynamic variable. This check will pick * up both thread-local variables and any global dynamically-allocated * variables. */ - a = (uintptr_t)vstate->dtvs_dynvars.dtds_base; - s = vstate->dtvs_dynvars.dtds_size; - if (addr - a < s && addr + sz <= a + s) + if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base, + vstate->dtvs_dynvars.dtds_size)) { + dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; + uintptr_t base = (uintptr_t)dstate->dtds_base + + (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t)); + uintptr_t chunkoffs; + dtrace_dynvar_t *dvar; + + /* + * Before we assume that we can store here, we need to make + * sure that it isn't in our metadata -- storing to our + * dynamic variable metadata would corrupt our state. For + * the range to not include any dynamic variable metadata, + * it must: + * + * (1) Start above the hash table that is at the base of + * the dynamic variable space + * + * (2) Have a starting chunk offset that is beyond the + * dtrace_dynvar_t that is at the base of every chunk + * + * (3) Not span a chunk boundary + * + * (4) Not be in the tuple space of a dynamic variable + * + */ + if (addr < base) + return (0); + + chunkoffs = (addr - base) % dstate->dtds_chunksize; + + if (chunkoffs < sizeof (dtrace_dynvar_t)) + return (0); + + if (chunkoffs + sz > dstate->dtds_chunksize) + return (0); + + dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs); + + if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) + return (0); + + if (chunkoffs < sizeof (dtrace_dynvar_t) + + ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t))) + return (0); + return (1); + } /* * Finally, check the static local and global variables. These checks * take the longest, so we perform them last. */ - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_locals, vstate->dtvs_nlocals)) return (1); - if (dtrace_canstore_statvar(addr, sz, + if (dtrace_canstore_statvar(addr, sz, remain, vstate->dtvs_globals, vstate->dtvs_nglobals)) return (1); return (0); } + /* - * Compare two strings using safe loads. + * Convenience routine to check to see if the address is within a memory + * region in which a load may be issued given the user's privilege level; + * if not, it sets the appropriate error flags and loads 'addr' into the + * illegal value slot. + * + * DTrace subroutines (DIF_SUBR_*) should use this helper to implement + * appropriate memory access protection. + */ +static int +dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate, + dtrace_vstate_t *vstate) +{ + return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate)); +} + +/* + * Implementation of dtrace_canload which communicates the upper bound of the + * allowed memory region. + */ +static int +dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); + return (1); + } + + /* + * You can obviously read that which you can store. + */ + if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate)) + return (1); + + /* + * We're allowed to read from our own string table. + */ + if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen)) { + DTRACE_RANGE_REMAIN(remain, addr, + mstate->dtms_difo->dtdo_strtab, + mstate->dtms_difo->dtdo_strlen); + return (1); + } + + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + *illval = addr; + return (0); +} + +/* + * Convenience routine to check to see if a given string is within a memory + * region in which a load may be issued given the user's privilege level; + * this exists so that we don't need to issue unnecessary dtrace_strlen() + * calls in the event that the user has all privileges. + */ +static int +dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + size_t rsize; + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, addr, addr, sz); + return (1); + } + + /* + * Even if the caller is uninterested in querying the remaining valid + * range, it is required to ensure that the access is allowed. + */ + if (remain == NULL) { + remain = &rsize; + } + if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) { + size_t strsz; + /* + * Perform the strlen after determining the length of the + * memory region which is accessible. This prevents timing + * information from being used to find NULs in memory which is + * not accessible to the caller. + */ + strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr, + MIN(sz, *remain)); + if (strsz <= *remain) { + return (1); + } + } + + return (0); +} + +/* + * Convenience routine to check to see if a given variable is within a memory + * region in which a load may be issued given the user's privilege level. + */ +static int +dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) +{ + size_t sz; + ASSERT(type->dtdt_flags & DIF_TF_BYREF); + + /* + * Calculate the max size before performing any checks since even + * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function + * return the max length via 'remain'. + */ + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_state_t *state = vstate->dtvs_state; + + if (state != NULL) { + sz = state->dts_options[DTRACEOPT_STRSIZE]; + } else { + /* + * In helper context, we have a NULL state; fall back + * to using the system-wide default for the string size + * in this case. + */ + sz = dtrace_strsize_default; + } + } else { + sz = type->dtdt_size; + } + + /* + * If we hold the privilege to read from kernel memory, then + * everything is readable. + */ + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) { + DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz); + return (1); + } + + if (type->dtdt_kind == DIF_TYPE_STRING) { + return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate, + vstate)); + } + return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate, + vstate)); +} + +/* + * Compare two strings using safe loads. */ static int dtrace_strncmp(char *s1, char *s2, size_t limit) @@ -745,15 +1211,17 @@ dtrace_strncmp(char *s1, char *s2, size_t limit) flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; do { - if (s1 == NULL) + if (s1 == NULL) { c1 = '\0'; - else + } else { c1 = dtrace_load8((uintptr_t)s1++); + } - if (s2 == NULL) + if (s2 == NULL) { c2 = '\0'; - else + } else { c2 = dtrace_load8((uintptr_t)s2++); + } if (c1 != c2) return (c1 - c2); @@ -771,9 +1239,10 @@ dtrace_strlen(const char *s, size_t lim) { uint_t len; - for (len = 0; len != lim; len++) + for (len = 0; len != lim; len++) { if (dtrace_load8((uintptr_t)s++) == '\0') break; + } return (len); } @@ -862,14 +1331,15 @@ dtrace_strcpy(const void *src, void *dst, size_t len) * specified type; we assume that we can store to directly. */ static void -dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type) +dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit) { ASSERT(type->dtdt_flags & DIF_TF_BYREF); - if (type->dtdt_kind == DIF_TYPE_STRING) - dtrace_strcpy(src, dst, type->dtdt_size); - else - dtrace_bcopy(src, dst, type->dtdt_size); + if (type->dtdt_kind == DIF_TYPE_STRING) { + dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit)); + } else { + dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit)); + } } /* @@ -915,6 +1385,93 @@ dtrace_bzero(void *dst, size_t len) *cp++ = 0; } +static void +dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum) +{ + uint64_t result[2]; + + result[0] = addend1[0] + addend2[0]; + result[1] = addend1[1] + addend2[1] + + (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0); + + sum[0] = result[0]; + sum[1] = result[1]; +} + +/* + * Shift the 128-bit value in a by b. If b is positive, shift left. + * If b is negative, shift right. + */ +static void +dtrace_shift_128(uint64_t *a, int b) +{ + uint64_t mask; + + if (b == 0) + return; + + if (b < 0) { + b = -b; + if (b >= 64) { + a[0] = a[1] >> (b - 64); + a[1] = 0; + } else { + a[0] >>= b; + mask = 1LL << (64 - b); + mask -= 1; + a[0] |= ((a[1] & mask) << (64 - b)); + a[1] >>= b; + } + } else { + if (b >= 64) { + a[1] = a[0] << (b - 64); + a[0] = 0; + } else { + a[1] <<= b; + mask = a[0] >> (64 - b); + a[1] |= mask; + a[0] <<= b; + } + } +} + +/* + * The basic idea is to break the 2 64-bit values into 4 32-bit values, + * use native multiplication on those, and then re-combine into the + * resulting 128-bit value. + * + * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = + * hi1 * hi2 << 64 + + * hi1 * lo2 << 32 + + * hi2 * lo1 << 32 + + * lo1 * lo2 + */ +static void +dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product) +{ + uint64_t hi1, hi2, lo1, lo2; + uint64_t tmp[2]; + + hi1 = factor1 >> 32; + hi2 = factor2 >> 32; + + lo1 = factor1 & DT_MASK_LO; + lo2 = factor2 & DT_MASK_LO; + + product[0] = lo1 * lo2; + product[1] = hi1 * hi2; + + tmp[0] = hi1 * lo2; + tmp[1] = 0; + dtrace_shift_128(tmp, 32); + dtrace_add_128(product, tmp, product); + + tmp[0] = hi2 * lo1; + tmp[1] = 0; + dtrace_shift_128(tmp, 32); + dtrace_add_128(product, tmp, product); +} + /* * This privilege check should be used by actions and subroutines to * verify that the user credentials of the process that enabled the @@ -931,17 +1488,13 @@ dtrace_priv_proc_common_user(dtrace_state_t *state) */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) != NULL && -#else if ((cr = dtrace_CRED()) != NULL && -#endif /* __APPLE__ */ - s_cr->cr_uid == cr->cr_uid && - s_cr->cr_uid == cr->cr_ruid && - s_cr->cr_uid == cr->cr_suid && - s_cr->cr_gid == cr->cr_gid && - s_cr->cr_gid == cr->cr_rgid && - s_cr->cr_gid == cr->cr_sgid) + posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid && + posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid && + posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid && + posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_gid && + posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_rgid && + posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_sgid) return (1); return (0); @@ -956,6 +1509,7 @@ static int dtrace_priv_proc_common_zone(dtrace_state_t *state) { cred_t *cr, *s_cr = state->dts_cred.dcr_cred; +#pragma unused(cr, s_cr, state) /* __APPLE__ */ /* * We should always have a non-NULL state cred here, since if cred @@ -963,50 +1517,29 @@ dtrace_priv_proc_common_zone(dtrace_state_t *state) */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) != NULL && - s_cr->cr_zone == cr->cr_zone) - return (1); - - return (0); -#else - return 1; /* Darwin doesn't do zones. */ -#endif /* __APPLE__ */ + return 1; /* APPLE NOTE: Darwin doesn't do zones. */ } /* * This privilege check should be used by actions and subroutines to * verify that the process has not setuid or changed credentials. */ -#if !defined(__APPLE__) -static int -dtrace_priv_proc_common_nocd() -{ - proc_t *proc; - - if ((proc = ttoproc(curthread)) != NULL && - !(proc->p_flag & SNOCD)) - return (1); - - return (0); -} -#else static int dtrace_priv_proc_common_nocd(void) { return 1; /* Darwin omits "No Core Dump" flag. */ } -#endif /* __APPLE__ */ static int dtrace_priv_proc_destructive(dtrace_state_t *state) { int action = state->dts_cred.dcr_action; -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) && dtrace_priv_proc_common_zone(state) == 0) @@ -1031,10 +1564,11 @@ bad: static int dtrace_priv_proc_control(dtrace_state_t *state) { -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL) return (1); @@ -1044,9 +1578,7 @@ dtrace_priv_proc_control(dtrace_state_t *state) dtrace_priv_proc_common_nocd()) return (1); -#if defined(__APPLE__) bad: -#endif /* __APPLE__ */ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); @@ -1055,24 +1587,26 @@ bad: static int dtrace_priv_proc(dtrace_state_t *state) { -#if defined(__APPLE__) if (ISSET(current_proc()->p_lflag, P_LNOATTACH)) goto bad; -#endif /* __APPLE__ */ + + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc())) + goto bad; if (state->dts_cred.dcr_action & DTRACE_CRA_PROC) return (1); -#if defined(__APPLE__) bad: -#endif /* __APPLE__ */ cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV; return (0); } -#if defined(__APPLE__) -/* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */ +/* + * The P_LNOATTACH check is an Apple specific check. + * We need a version of dtrace_priv_proc() that omits + * that check for PID and EXECNAME accesses + */ static int dtrace_priv_proc_relaxed(dtrace_state_t *state) { @@ -1084,14 +1618,17 @@ dtrace_priv_proc_relaxed(dtrace_state_t *state) return (0); } -#endif /* __APPLE__ */ static int dtrace_priv_kernel(dtrace_state_t *state) { + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) + goto bad; + if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL) return (1); +bad: cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); @@ -1100,9 +1637,13 @@ dtrace_priv_kernel(dtrace_state_t *state) static int dtrace_priv_kernel_destructive(dtrace_state_t *state) { + if (dtrace_is_restricted()) + goto bad; + if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE) return (1); +bad: cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV; return (0); @@ -1114,17 +1655,14 @@ dtrace_priv_kernel_destructive(dtrace_state_t *state) * clean the dirty dynamic variable lists on all CPUs. Dynamic variable * cleaning is explained in detail in . */ -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -void +static void dtrace_dynvar_clean(dtrace_dstate_t *dstate) { dtrace_dynvar_t *dirty; dtrace_dstate_percpu_t *dcpu; int i, work = 0; - for (i = 0; i < NCPU; i++) { + for (i = 0; i < (int)NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; ASSERT(dcpu->dtdsc_rinsing == NULL); @@ -1174,7 +1712,7 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) dtrace_sync(); - for (i = 0; i < NCPU; i++) { + for (i = 0; i < (int)NCPU; i++) { dcpu = &dstate->dtds_percpu[i]; if (dcpu->dtdsc_rinsing == NULL) @@ -1209,12 +1747,10 @@ dtrace_dynvar_clean(dtrace_dstate_t *dstate) * variable can be allocated. If NULL is returned, the appropriate counter * will be incremented. */ -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -dtrace_dynvar_t * +static dtrace_dynvar_t * dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, - dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op) + dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op, + dtrace_mstate_t *mstate, dtrace_vstate_t *vstate) { uint64_t hashval = DTRACE_DYNHASH_VALID; dtrace_dynhash_t *hash = dstate->dtds_hash; @@ -1266,6 +1802,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, uint64_t j, size = key[i].dttk_size; uintptr_t base = (uintptr_t)key[i].dttk_value; + if (!dtrace_canload(base, size, mstate, vstate)) + break; + for (j = 0; j < size; j++) { hashval += dtrace_load8(base + j); hashval += (hashval << 10); @@ -1274,6 +1813,9 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, } } + if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT)) + return (NULL); + hashval += (hashval << 3); hashval ^= (hashval >> 11); hashval += (hashval << 15); @@ -1304,7 +1846,7 @@ dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys, while ((lock = *lockp) & 1) continue; - if (dtrace_casptr((void *)lockp, + if (dtrace_casptr((void *)(uintptr_t)lockp, (void *)lock, (void *)(lock + 1)) == (void *)lock) break; } @@ -1519,7 +2061,7 @@ retry: case DTRACE_DSTATE_CLEAN: { void *sp = &dstate->dtds_state; - if (++cpu >= NCPU) + if (++cpu >= (int)NCPU) cpu = 0; if (dcpu->dtdsc_dirty != NULL && @@ -1660,14 +2202,15 @@ retry: dvar->dtdv_next = free; } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free); - return (dtrace_dynvar(dstate, nkeys, key, dsize, op)); + return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate)); } /*ARGSUSED*/ static void dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) { - if (nval < *oval) +#pragma unused(arg) /* __APPLE__ */ + if ((int64_t)nval < (int64_t)*oval) *oval = nval; } @@ -1675,7 +2218,8 @@ dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg) static void dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg) { - if (nval > *oval) +#pragma unused(arg) /* __APPLE__ */ + if ((int64_t)nval > (int64_t)*oval) *oval = nval; } @@ -1740,18 +2284,114 @@ dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr) lquanta[levels + 1] += incr; } +static int +dtrace_aggregate_llquantize_bucket(int16_t factor, int16_t low, int16_t high, + int16_t nsteps, int64_t value) +{ + int64_t this = 1, last, next; + int base = 1, order; + + for (order = 0; order < low; ++order) + this *= factor; + + /* + * If our value is less than our factor taken to the power of the + * low order of magnitude, it goes into the zeroth bucket. + */ + if (value < this) + return 0; + else + last = this; + + for (this *= factor; order <= high; ++order) { + int nbuckets = this > nsteps ? nsteps : this; + + /* + * We should not generally get log/linear quantizations + * with a high magnitude that allows 64-bits to + * overflow, but we nonetheless protect against this + * by explicitly checking for overflow, and clamping + * our value accordingly. + */ + next = this * factor; + if (next < this) { + value = this - 1; + } + + /* + * If our value lies within this order of magnitude, + * determine its position by taking the offset within + * the order of magnitude, dividing by the bucket + * width, and adding to our (accumulated) base. + */ + if (value < this) { + return (base + (value - last) / (this / nbuckets)); + } + + base += nbuckets - (nbuckets / factor); + last = this; + this = next; + } + + /* + * Our value is greater than or equal to our factor taken to the + * power of one plus the high magnitude -- return the top bucket. + */ + return base; +} + +static void +dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr) +{ + uint64_t arg = *llquanta++; + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg); + + llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr; +} + /*ARGSUSED*/ static void dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg) { +#pragma unused(arg) /* __APPLE__ */ data[0]++; data[1] += nval; } +/*ARGSUSED*/ +static void +dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg) +{ +#pragma unused(arg) /* __APPLE__ */ + int64_t snval = (int64_t)nval; + uint64_t tmp[2]; + + data[0]++; + data[1] += nval; + + /* + * What we want to say here is: + * + * data[2] += nval * nval; + * + * But given that nval is 64-bit, we could easily overflow, so + * we do this as 128-bit arithmetic. + */ + if (snval < 0) + snval = -snval; + + dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp); + dtrace_add_128(data + 2, tmp, data + 2); +} + /*ARGSUSED*/ static void dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) { +#pragma unused(nval, arg) /* __APPLE__ */ *oval = *oval + 1; } @@ -1759,6 +2399,7 @@ dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg) static void dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg) { +#pragma unused(arg) /* __APPLE__ */ *oval += nval; } @@ -1773,6 +2414,7 @@ static void dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf, intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg) { +#pragma unused(arg) dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec; uint32_t i, ndx, size, fsize; uint32_t align = sizeof (uint64_t) - 1; @@ -2075,18 +2717,19 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, { dtrace_speculation_t *spec; dtrace_buffer_t *src, *dest; - uintptr_t daddr, saddr, dlimit; - dtrace_speculation_state_t current, new; + uintptr_t daddr, saddr, dlimit, slimit; + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; intptr_t offs; + uint64_t timestamp; if (which == 0) return; - if (which > state->dts_nspeculations) { + if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } - + spec = &state->dts_speculations[which - 1]; src = &spec->dtsp_buffer[cpu]; dest = &state->dts_buffer[cpu]; @@ -2153,7 +2796,38 @@ dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu, } /* - * We have the space; copy the buffer across. (Note that this is a + * We have sufficient space to copy the speculative buffer into the + * primary buffer. First, modify the speculative buffer, filling + * in the timestamp of all entries with the current time. The data + * must have the commit() time rather than the time it was traced, + * so that all entries in the primary buffer are in timestamp order. + */ + timestamp = dtrace_gethrtime(); + saddr = (uintptr_t)src->dtb_tomax; + slimit = saddr + src->dtb_offset; + while (saddr < slimit) { + size_t size; + dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr; + + if (dtrh->dtrh_epid == DTRACE_EPIDNONE) { + saddr += sizeof (dtrace_epid_t); + continue; + } + + ASSERT(dtrh->dtrh_epid <= ((dtrace_epid_t) state->dts_necbs)); + size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size; + + ASSERT(saddr + size <= slimit); + ASSERT(size >= sizeof(dtrace_rechdr_t)); + ASSERT(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh) == UINT64_MAX); + + DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp); + + saddr += size; + } + + /* + * Copy the buffer across. (Note that this is a * highly subobtimal bcopy(); in the unlikely event that this becomes * a serious performance issue, a high-performance DTrace-specific * bcopy() should obviously be invented.) @@ -2192,6 +2866,7 @@ out: (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) { uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state, DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE); +#pragma unused(rval) /* __APPLE__ */ ASSERT(rval == DTRACESPEC_COMMITTING); } @@ -2212,13 +2887,13 @@ dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu, dtrace_specid_t which) { dtrace_speculation_t *spec; - dtrace_speculation_state_t current, new; + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; dtrace_buffer_t *buf; if (which == 0) return; - if (which > state->dts_nspeculations) { + if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return; } @@ -2281,7 +2956,7 @@ dtrace_speculation_clean_here(dtrace_state_t *state) return; } - for (i = 0; i < state->dts_nspeculations; i++) { + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_buffer_t *src = &spec->dtsp_buffer[cpu]; @@ -2316,10 +2991,11 @@ dtrace_speculation_clean_here(dtrace_state_t *state) static void dtrace_speculation_clean(dtrace_state_t *state) { - int work = 0, rv; + int work = 0; + uint32_t rv; dtrace_specid_t i; - for (i = 0; i < state->dts_nspeculations; i++) { + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; ASSERT(!spec->dtsp_cleaning); @@ -2343,7 +3019,7 @@ dtrace_speculation_clean(dtrace_state_t *state) * speculation buffers, as appropriate. We can now set the state * to inactive. */ - for (i = 0; i < state->dts_nspeculations; i++) { + for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) { dtrace_speculation_t *spec = &state->dts_speculations[i]; dtrace_speculation_state_t current, new; @@ -2374,13 +3050,13 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, dtrace_specid_t which) { dtrace_speculation_t *spec; - dtrace_speculation_state_t current, new; + dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE; dtrace_buffer_t *buf; if (which == 0) return (NULL); - if (which > state->dts_nspeculations) { + if (which > (dtrace_specid_t)state->dts_nspeculations) { cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP; return (NULL); } @@ -2434,33 +3110,76 @@ dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid, } /* - * This function implements the DIF emulator's variable lookups. The emulator - * passes a reserved variable identifier and optional built-in array index. + * Return a string. In the event that the user lacks the privilege to access + * arbitrary kernel memory, we copy the string out to scratch memory so that we + * don't fail access checking. + * + * dtrace_dif_variable() uses this routine as a helper for various + * builtin values such as 'execname' and 'probefunc.' */ -static uint64_t -dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, - uint64_t ndx) +static +uintptr_t +dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state, + dtrace_mstate_t *mstate) { + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t ret; + size_t strsz; + /* - * If we're accessing one of the uncached arguments, we'll turn this - * into a reference in the args array. + * The easy case: this probe is allowed to read all of memory, so + * we can just return this as a vanilla pointer. */ - if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { - ndx = v - DIF_VAR_ARG0; - v = DIF_VAR_ARGS; - } + if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) + return (addr); - switch (v) { - case DIF_VAR_ARGS: - ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); + /* + * This is the tougher case: we copy the string in question from + * kernel memory into scratch memory and return it that way: this + * ensures that we won't trip up when access checking tests the + * BYREF return value. + */ + strsz = dtrace_strlen((char *)addr, size) + 1; + + if (mstate->dtms_scratch_ptr + strsz > + mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return (0); + } + + dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr, + strsz); + ret = mstate->dtms_scratch_ptr; + mstate->dtms_scratch_ptr += strsz; + return (ret); +} + +/* + * This function implements the DIF emulator's variable lookups. The emulator + * passes a reserved variable identifier and optional built-in array index. + */ +static uint64_t +dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, + uint64_t ndx) +{ + /* + * If we're accessing one of the uncached arguments, we'll turn this + * into a reference in the args array. + */ + if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) { + ndx = v - DIF_VAR_ARG0; + v = DIF_VAR_ARGS; + } + + switch (v) { + case DIF_VAR_ARGS: + ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS); if (ndx >= sizeof (mstate->dtms_arg) / sizeof (mstate->dtms_arg[0])) { -#if !defined(__APPLE__) - int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ + /* + * APPLE NOTE: Account for introduction of __dtrace_probe() + */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ dtrace_provider_t *pv; uint64_t val; @@ -2469,12 +3188,11 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg, mstate->dtms_probe->dtpr_id, mstate->dtms_probe->dtpr_arg, ndx, aframes); -#if defined(__APPLE__) - /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */ + /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */ else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) { - return ((dtrace_state_t *)(mstate->dtms_arg[0]))->dts_arg_error_illval; + return ((dtrace_state_t *)(uintptr_t)(mstate->dtms_arg[0]))->dts_arg_error_illval; } -#endif /* __APPLE__ */ + else val = dtrace_getarg(ndx, aframes); @@ -2495,22 +3213,6 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (mstate->dtms_arg[ndx]); -#if !defined(__APPLE__) - case DIF_VAR_UREGS: { - klwp_t *lwp; - - if (!dtrace_priv_proc(state)) - return (0); - - if ((lwp = curthread->t_lwp) == NULL) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR); - cpu_core[CPU->cpu_id].cpuc_dtrace_illval = NULL; - return (0); - } - - return (dtrace_getreg(lwp->lwp_regs, ndx)); - } -#else case DIF_VAR_UREGS: { thread_t thread; @@ -2525,20 +3227,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (dtrace_getreg(find_user_regs(thread), ndx)); } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_VAR_CURTHREAD: - if (!dtrace_priv_kernel(state)) - return (0); - return ((uint64_t)(uintptr_t)curthread); -#else + case DIF_VAR_CURTHREAD: if (!dtrace_priv_kernel(state)) return (0); return ((uint64_t)(uintptr_t)current_thread()); -#endif /* __APPLE__ */ case DIF_VAR_TIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) { @@ -2547,15 +3242,9 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_timestamp); -#if !defined(__APPLE__) - case DIF_VAR_VTIMESTAMP: - ASSERT(dtrace_vtime_references != 0); - return (curthread->t_dtrace_vtime); -#else case DIF_VAR_VTIMESTAMP: ASSERT(dtrace_vtime_references != 0); return (dtrace_get_thread_vtime(current_thread())); -#endif /* __APPLE__ */ case DIF_VAR_WALLTIMESTAMP: if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) { @@ -2564,6 +3253,16 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, } return (mstate->dtms_walltimestamp); + case DIF_VAR_MACHTIMESTAMP: + if (!(mstate->dtms_present & DTRACE_MSTATE_MACHTIMESTAMP)) { + mstate->dtms_machtimestamp = mach_absolute_time(); + mstate->dtms_present |= DTRACE_MSTATE_MACHTIMESTAMP; + } + return (mstate->dtms_machtimestamp); + + case DIF_VAR_CPU: + return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread())); + case DIF_VAR_IPL: if (!dtrace_priv_kernel(state)) return (0); @@ -2585,12 +3284,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) { -#if !defined(__APPLE__) - int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ + /* + * APPLE NOTE: Account for introduction of __dtrace_probe() + */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ mstate->dtms_stackdepth = dtrace_getstackdepth(aframes); mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH; @@ -2621,12 +3318,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (!dtrace_priv_kernel(state)) return (0); if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) { -#if !defined(__APPLE__) - int aframes = mstate->dtms_probe->dtpr_aframes + 2; -#else - /* Account for introduction of __dtrace_probe() on xnu. */ + /* + * APPLE NOTE: Account for introduction of __dtrace_probe() + */ int aframes = mstate->dtms_probe->dtpr_aframes + 3; -#endif /* __APPLE__ */ if (!DTRACE_ANCHORED(mstate->dtms_probe)) { /* @@ -2641,7 +3336,7 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, (uint32_t *)(uintptr_t)mstate->dtms_arg[0]); mstate->dtms_caller = caller[1]; } else if ((mstate->dtms_caller = - dtrace_caller(aframes)) == -1) { + dtrace_caller(aframes)) == (uintptr_t)-1) { /* * We have failed to do this the quick way; * we must resort to the slower approach of @@ -2671,8 +3366,10 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, * uint64_t will contain the caller, which is what * we're after. */ - ustack[2] = NULL; + ustack[2] = 0; + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); dtrace_getupcstack(ustack, 3); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); mstate->dtms_ucaller = ustack[2]; mstate->dtms_present |= DTRACE_MSTATE_UCALLER; } @@ -2681,48 +3378,28 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, case DIF_VAR_PROBEPROV: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_provider->dtpv_name); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name, + state, mstate)); case DIF_VAR_PROBEMOD: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_mod); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_mod, + state, mstate)); case DIF_VAR_PROBEFUNC: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_func); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_func, + state, mstate)); case DIF_VAR_PROBENAME: ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE); - return ((uint64_t)(uintptr_t) - mstate->dtms_probe->dtpr_name); - -#if !defined(__APPLE__) - case DIF_VAR_PID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * Note that we are assuming that an unanchored probe is - * always due to a high-level interrupt. (And we're assuming - * that there is only a single high level interrupt.) - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (pid0.pid_id); - - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * Further, it is always safe to dereference the p_pidp member - * of one's own proc structure. (These are truisms becuase - * threads and processes don't clean up their own state -- - * they leave that task to whomever reaps them.) - */ - return ((uint64_t)curthread->t_procp->p_pidp->pid_id); + return (dtrace_dif_varstr( + (uintptr_t)mstate->dtms_probe->dtpr_name, + state, mstate)); -#else case DIF_VAR_PID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -2736,22 +3413,8 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, /* Anchored probe that fires while on an interrupt accrues to process 0 */ return 0; - return ((uint64_t)proc_selfpid()); -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_PPID: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (pid0.pid_id); + return ((uint64_t)dtrace_proc_selfpid()); - return ((uint64_t)curthread->t_procp->p_ppid); -#else case DIF_VAR_PPID: if (!dtrace_priv_proc_relaxed(state)) return (0); @@ -2762,138 +3425,82 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - return ((uint64_t)(uintptr_t)(current_proc()->p_ppid)); -#endif /* __APPLE__ */ + return ((uint64_t)dtrace_proc_selfppid()); -#if !defined(__APPLE__) case DIF_VAR_TID: - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); + /* We do not need to check for null current_thread() */ + return thread_tid(current_thread()); /* globally unique */ - return ((uint64_t)curthread->t_tid); -#else - case DIF_VAR_TID: - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) + case DIF_VAR_PTHREAD_SELF: + if (!dtrace_priv_proc(state)) return (0); - return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */ -#endif /* __APPLE__ */ + /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */ + return 0; -#if !defined(__APPLE__) - case DIF_VAR_EXECNAME: + case DIF_VAR_DISPATCHQADDR: if (!dtrace_priv_proc(state)) return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)(uintptr_t)p0.p_user.u_comm); + /* We do not need to check for null current_thread() */ + return thread_dispatchqaddr(current_thread()); - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return ((uint64_t)(uintptr_t) - curthread->t_procp->p_user.u_comm); -#else case DIF_VAR_EXECNAME: { char *xname = (char *)mstate->dtms_scratch_ptr; size_t scratch_size = MAXCOMLEN+1; /* The scratch allocation's lifetime is that of the clause. */ - if (mstate->dtms_scratch_ptr + scratch_size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); return 0; + } if (!dtrace_priv_proc_relaxed(state)) return (0); mstate->dtms_scratch_ptr += scratch_size; - proc_selfname( xname, MAXCOMLEN ); + proc_selfname( xname, scratch_size ); return ((uint64_t)(uintptr_t)xname); } -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)(uintptr_t)p0.p_zone->zone_name); - /* - * It is always safe to dereference one's own t_procp pointer: - * it always points to a valid, allocated proc structure. - * (This is true because threads don't clean up their own - * state -- they leave that task to whomever reaps them.) - */ - return ((uint64_t)(uintptr_t) - curthread->t_procp->p_zone->zone_name); -#else case DIF_VAR_ZONENAME: - if (!dtrace_priv_proc(state)) - return (0); - - return ((uint64_t)(uintptr_t)NULL); /* Darwin doesn't do "zones" */ -#endif /* __APPLE__ */ + { + /* scratch_size is equal to length('global') + 1 for the null-terminator. */ + char *zname = (char *)mstate->dtms_scratch_ptr; + size_t scratch_size = 6 + 1; -#if !defined(__APPLE__) - case DIF_VAR_UID: if (!dtrace_priv_proc(state)) return (0); - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)p0.p_cred->cr_uid); + /* The scratch allocation's lifetime is that of the clause. */ + if (!DTRACE_INSCRATCH(mstate, scratch_size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + return 0; + } - return ((uint64_t)curthread->t_cred->cr_uid); -#else - case DIF_VAR_UID: - if (!dtrace_priv_proc(state)) - return (0); + mstate->dtms_scratch_ptr += scratch_size; - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); + /* The kernel does not provide zonename, it will always return 'global'. */ + strlcpy(zname, "global", scratch_size); - if (dtrace_CRED() != NULL) - return ((uint64_t)kauth_getuid()); - else - return -1LL; -#endif /* __APPLE__ */ + return ((uint64_t)(uintptr_t)zname); + } -#if !defined(__APPLE__) - case DIF_VAR_GID: - if (!dtrace_priv_proc(state)) + case DIF_VAR_UID: + if (!dtrace_priv_proc_relaxed(state)) return (0); /* * See comment in DIF_VAR_PID. */ if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return ((uint64_t)p0.p_cred->cr_gid); + return (0); + + return ((uint64_t) dtrace_proc_selfruid()); - return ((uint64_t)curthread->t_cred->cr_gid); -#else case DIF_VAR_GID: if (!dtrace_priv_proc(state)) return (0); @@ -2905,29 +3512,14 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, return (0); if (dtrace_CRED() != NULL) + /* Credential does not require lazy initialization. */ return ((uint64_t)kauth_getgid()); - else - return -1LL; -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_VAR_ERRNO: { - klwp_t *lwp; - if (!dtrace_priv_proc(state)) - return (0); - - /* - * See comment in DIF_VAR_PID. - */ - if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) - return (0); - - if ((lwp = curthread->t_lwp) == NULL) - return (0); + else { + /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } - return ((uint64_t)lwp->lwp_errno); - } -#else case DIF_VAR_ERRNO: { uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (!dtrace_priv_proc(state)) @@ -2939,9 +3531,13 @@ dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v, if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU)) return (0); - return (uthread ? uthread->t_dtrace_errno : -1); + if (uthread) + return (uint64_t)uthread->t_dtrace_errno; + else { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return -1ULL; + } } -#endif /* __APPLE__ */ default: DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); @@ -2962,11 +3558,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, dtrace_mstate_t *mstate, dtrace_state_t *state) { volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; -#if !defined(__APPLE__) - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE__ */ + dtrace_vstate_t *vstate = &state->dts_vstate; #if !defined(__APPLE__) union { @@ -2979,7 +3572,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t rw; } r; #else -/* XXX awaits lock/mutex work */ +/* FIXME: awaits lock/mutex work */ #endif /* __APPLE__ */ switch (subr) { @@ -2989,6 +3582,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, #if !defined(__APPLE__) case DIF_SUBR_MUTEX_OWNED: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi)) regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER; @@ -2997,6 +3596,12 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; case DIF_SUBR_MUTEX_OWNER: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); if (MUTEX_TYPE_ADAPTIVE(&m.mi) && MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER) @@ -3006,11 +3611,23 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; case DIF_SUBR_MUTEX_TYPE_ADAPTIVE: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi); break; case DIF_SUBR_MUTEX_TYPE_SPIN: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + m.mx = dtrace_load64(tupregs[0].dttk_value); regs[rd] = MUTEX_TYPE_SPIN(&m.mi); break; @@ -3018,22 +3635,40 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, case DIF_SUBR_RW_READ_HELD: { uintptr_t tmp; + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_READ_HELD(&r.ri, tmp); break; } case DIF_SUBR_RW_WRITE_HELD: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_WRITE_HELD(&r.ri); break; case DIF_SUBR_RW_ISWRITER: + if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t), + mstate, vstate)) { + regs[rd] = 0; + break; + } + r.rw = dtrace_loadptr(tupregs[0].dttk_value); regs[rd] = _RW_ISWRITER(&r.ri); break; #else -/* XXX awaits lock/mutex work */ +/* FIXME: awaits lock/mutex work */ #endif /* __APPLE__ */ case DIF_SUBR_BCOPY: { @@ -3051,6 +3686,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + if (!dtrace_canload(src, size, mstate, vstate)) { + regs[rd] = 0; + break; + } + dtrace_bcopy((void *)src, (void *)dest, size); break; } @@ -3062,26 +3702,35 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value; size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size; + /* + * Check whether the user can access kernel memory + */ + if (dtrace_priv_kernel(state) == 0) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV); + regs[rd] = 0; + break; + } /* * This action doesn't require any credential checks since * probes will not activate in user contexts to which the * enabling user does not have permissions. */ - if (mstate->dtms_scratch_ptr + scratch_size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + + /* + * Rounding up the user allocation size could have overflowed + * a large, bogus allocation (like -1ULL) to 0. + */ + if (scratch_size < size || + !DTRACE_INSCRATCH(mstate, scratch_size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } if (subr == DIF_SUBR_COPYIN) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#else if (dtrace_priv_proc(state)) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#endif /* __APPLE__ */ + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } @@ -3106,12 +3755,8 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#else if (dtrace_priv_proc(state)) - dtrace_copyin(tupregs[0].dttk_value, dest, size); -#endif /* __APPLE__ */ + dtrace_copyin(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); break; } @@ -3128,20 +3773,15 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * probes will not activate in user contexts to which the * enabling user does not have permissions. */ - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); -#if !defined(__APPLE__) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size); -#else if (dtrace_priv_proc(state)) - dtrace_copyinstr(tupregs[0].dttk_value, dest, size); -#endif /* __APPLE__ */ + dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); ((char *)dest)[size - 1] = '\0'; @@ -3150,87 +3790,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } -#if !defined(__APPLE__) - case DIF_SUBR_MSGSIZE: - case DIF_SUBR_MSGDSIZE: { - uintptr_t baddr = tupregs[0].dttk_value, daddr; - uintptr_t wptr, rptr; - size_t count = 0; - int cont = 0; - - while (baddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - wptr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_wptr)); - - rptr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_rptr)); - - if (wptr < rptr) { - *flags |= CPU_DTRACE_BADADDR; - *illval = tupregs[0].dttk_value; - break; - } - - daddr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_datap)); - - baddr = dtrace_loadptr(baddr + - offsetof(mblk_t, b_cont)); - - /* - * We want to prevent against denial-of-service here, - * so we're only going to search the list for - * dtrace_msgdsize_max mblks. - */ - if (cont++ > dtrace_msgdsize_max) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - - if (subr == DIF_SUBR_MSGDSIZE) { - if (dtrace_load8(daddr + - offsetof(dblk_t, db_type)) != M_DATA) - continue; - } - - count += wptr - rptr; - } - - if (!(*flags & CPU_DTRACE_FAULT)) - regs[rd] = count; - - break; - } -#else case DIF_SUBR_MSGSIZE: case DIF_SUBR_MSGDSIZE: { /* Darwin does not implement SysV streams messages */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); regs[rd] = 0; break; } -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_SUBR_PROGENYOF: { - pid_t pid = tupregs[0].dttk_value; - proc_t *p; - int rval = 0; - - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - for (p = curthread->t_procp; p != NULL; p = p->p_parent) { - if (p->p_pidp->pid_id == pid) { - rval = 1; - break; - } - } - - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - - regs[rd] = rval; - break; - } -#else case DIF_SUBR_PROGENYOF: { pid_t pid = tupregs[0].dttk_value; struct proc *p = current_proc(); @@ -3259,79 +3826,59 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = rval; break; } -#endif /* __APPLE__ */ case DIF_SUBR_SPECULATION: regs[rd] = dtrace_speculation(state); break; -#if !defined(__APPLE__) + case DIF_SUBR_COPYOUT: { uintptr_t kaddr = tupregs[0].dttk_value; - uintptr_t uaddr = tupregs[1].dttk_value; + user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_canload(kaddr, size, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size); + dtrace_copyout(kaddr, uaddr, size, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } case DIF_SUBR_COPYOUTSTR: { - uintptr_t kaddr = tupregs[0].dttk_value; - uintptr_t uaddr = tupregs[1].dttk_value; - uint64_t size = tupregs[2].dttk_value; - - if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); - } - break; - } -#else - case DIF_SUBR_COPYOUT: { uintptr_t kaddr = tupregs[0].dttk_value; user_addr_t uaddr = tupregs[1].dttk_value; uint64_t size = tupregs[2].dttk_value; + size_t lim; if (!dtrace_destructive_disallow && dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { + !dtrace_istoxic(kaddr, size) && + dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyout(kaddr, uaddr, size); + dtrace_copyoutstr(kaddr, uaddr, lim, flags); DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); } break; } - case DIF_SUBR_COPYOUTSTR: { - uintptr_t kaddr = tupregs[0].dttk_value; - user_addr_t uaddr = tupregs[1].dttk_value; - uint64_t size = tupregs[2].dttk_value; + case DIF_SUBR_STRLEN: { + size_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t addr = (uintptr_t)tupregs[0].dttk_value; + size_t lim; - if (!dtrace_destructive_disallow && - dtrace_priv_proc_control(state) && - !dtrace_istoxic(kaddr, size)) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); - dtrace_copyoutstr(kaddr, uaddr, size); - DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; } - break; - } -#endif /* __APPLE__ */ - case DIF_SUBR_STRLEN: - regs[rd] = dtrace_strlen((char *)(uintptr_t) - tupregs[0].dttk_value, - state->dts_options[DTRACEOPT_STRSIZE]); + regs[rd] = dtrace_strlen((char *)addr, lim); + break; + } case DIF_SUBR_STRCHR: case DIF_SUBR_STRRCHR: { @@ -3343,10 +3890,18 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * of the specified character instead of the first. */ uintptr_t addr = tupregs[0].dttk_value; - uintptr_t limit = addr + state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t addr_limit; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t lim; char c, target = (char)tupregs[1].dttk_value; - for (regs[rd] = NULL; addr < limit; addr++) { + if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) { + regs[rd] = NULL; + break; + } + addr_limit = addr + lim; + + for (regs[rd] = 0; addr < addr_limit; addr++) { if ((c = dtrace_load8(addr)) == target) { regs[rd] = addr; @@ -3384,6 +3939,17 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = notfound; + if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate, + vstate)) { + regs[rd] = 0; + break; + } + /* * strstr() and index()/rindex() have similar semantics if * both strings are the empty string: strstr() returns a @@ -3458,13 +4024,13 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } - if (pos > len) + if ((size_t)pos > len) pos = len; } else { if (pos < 0) pos = 0; - if (pos >= len) { + if ((size_t)pos >= len) { if (sublen == 0) regs[rd] = len; break; @@ -3504,19 +4070,29 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uintptr_t addr = tupregs[0].dttk_value; uintptr_t tokaddr = tupregs[1].dttk_value; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - uintptr_t limit, toklimit = tokaddr + size; - uint8_t c, tokmap[32]; /* 256 / 8 */ + uintptr_t limit, toklimit; + size_t clim; char *dest = (char *)mstate->dtms_scratch_ptr; - int i; + uint8_t c='\0', tokmap[32]; /* 256 / 8 */ + uint64_t i = 0; + + /* + * Check both the token buffer and (later) the input buffer, + * since both could be non-scratch addresses. + */ + if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) { + regs[rd] = 0; + break; + } + toklimit = tokaddr + clim; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - if (addr == NULL) { + if (addr == 0) { /* * If the address specified is NULL, we use our saved * strtok pointer from the mstate. Note that this @@ -3525,6 +4101,22 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * it behaves like an implicit clause-local variable. */ addr = mstate->dtms_strtok; + limit = mstate->dtms_strtok_limit; + } else { + /* + * If the user-specified address is non-NULL we must + * access check it. This is the only time we have + * a chance to do so, since this address may reside + * in the string table of this clause-- future calls + * (when we fetch addr from mstate->dtms_strtok) + * would fail this access check. + */ + if (!dtrace_strcanload(addr, size, &clim, mstate, + vstate)) { + regs[rd] = 0; + break; + } + limit = addr + clim; } /* @@ -3532,7 +4124,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * string -- setting a bit in the map for every character * found in the token string. */ - for (i = 0; i < sizeof (tokmap); i++) + for (i = 0; i < (int)sizeof (tokmap); i++) tokmap[i] = 0; for (; tokaddr < toklimit; tokaddr++) { @@ -3543,10 +4135,10 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, tokmap[c >> 3] |= (1 << (c & 0x7)); } - for (limit = addr + size; addr < limit; addr++) { + for (; addr < limit; addr++) { /* - * We're looking for a character that is _not_ contained - * in the token string. + * We're looking for a character that is _not_ + * contained in the token string. */ if ((c = dtrace_load8(addr)) == '\0') break; @@ -3562,8 +4154,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * We return NULL in this case, and we set the saved * address to NULL as well. */ - regs[rd] = NULL; - mstate->dtms_strtok = NULL; + regs[rd] = 0; + mstate->dtms_strtok = 0; + mstate->dtms_strtok_limit = NULL; break; } @@ -3586,6 +4179,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, regs[rd] = (uintptr_t)dest; mstate->dtms_scratch_ptr += size; mstate->dtms_strtok = addr; + mstate->dtms_strtok_limit = limit; break; } @@ -3598,16 +4192,20 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, size_t len = dtrace_strlen((char *)s, size); int64_t i = 0; - if (nargs <= 2) - remaining = (int64_t)size; + if (!dtrace_canload(s, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } + if (nargs <= 2) + remaining = (int64_t)size; + if (index < 0) { index += len; @@ -3617,267 +4215,70 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, } } - if (index >= len || index < 0) - index = len; - - for (d[0] = '\0'; remaining > 0; remaining--) { - if ((d[i++] = dtrace_load8(s++ + index)) == '\0') - break; + if ((size_t)index >= len || index < 0) { + remaining = 0; + } else if (remaining < 0) { + remaining += len - index; + } else if ((uint64_t)index + (uint64_t)remaining > size) { + remaining = size - index; + } - if (i == size) { - d[i - 1] = '\0'; + for (i = 0; i < remaining; i++) { + if ((d[i] = dtrace_load8(s + index + i)) == '\0') break; } - } + + d[i] = '\0'; mstate->dtms_scratch_ptr += size; regs[rd] = (uintptr_t)d; break; } -#if !defined(__APPLE__) - case DIF_SUBR_GETMAJOR: -#ifdef __LP64__ - regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR64) & MAXMAJ64; -#else - regs[rd] = (tupregs[0].dttk_value >> NBITSMINOR) & MAXMAJ; -#endif - break; - -#else /* __APPLE__ */ case DIF_SUBR_GETMAJOR: regs[rd] = (uintptr_t)major( (dev_t)tupregs[0].dttk_value ); break; -#endif /* __APPLE__ */ -#if !defined(__APPLE__) - case DIF_SUBR_GETMINOR: -#ifdef __LP64__ - regs[rd] = tupregs[0].dttk_value & MAXMIN64; -#else - regs[rd] = tupregs[0].dttk_value & MAXMIN; -#endif - break; - -#else /* __APPLE__ */ case DIF_SUBR_GETMINOR: regs[rd] = (uintptr_t)minor( (dev_t)tupregs[0].dttk_value ); break; -#endif /* __APPLE__ */ - -#if !defined(__APPLE__) - case DIF_SUBR_DDI_PATHNAME: { - /* - * This one is a galactic mess. We are going to roughly - * emulate ddi_pathname(), but it's made more complicated - * by the fact that we (a) want to include the minor name and - * (b) must proceed iteratively instead of recursively. - */ - uintptr_t dest = mstate->dtms_scratch_ptr; - uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; - char *start = (char *)dest, *end = start + size - 1; - uintptr_t daddr = tupregs[0].dttk_value; - int64_t minor = (int64_t)tupregs[1].dttk_value; - char *s; - int i, len, depth = 0; - - if (size == 0 || mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; - break; - } - - *end = '\0'; - - /* - * We want to have a name for the minor. In order to do this, - * we need to walk the minor list from the devinfo. We want - * to be sure that we don't infinitely walk a circular list, - * so we check for circularity by sending a scout pointer - * ahead two elements for every element that we iterate over; - * if the list is circular, these will ultimately point to the - * same element. You may recognize this little trick as the - * answer to a stupid interview question -- one that always - * seems to be asked by those who had to have it laboriously - * explained to them, and who can't even concisely describe - * the conditions under which one would be forced to resort to - * this technique. Needless to say, those conditions are - * found here -- and probably only here. Is this is the only - * use of this infamous trick in shipping, production code? - * If it isn't, it probably should be... - */ - if (minor != -1) { - uintptr_t maddr = dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_minor)); - - uintptr_t next = offsetof(struct ddi_minor_data, next); - uintptr_t name = offsetof(struct ddi_minor_data, - d_minor) + offsetof(struct ddi_minor, name); - uintptr_t dev = offsetof(struct ddi_minor_data, - d_minor) + offsetof(struct ddi_minor, dev); - uintptr_t scout; - - if (maddr != NULL) - scout = dtrace_loadptr(maddr + next); - - while (maddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - uint64_t m; -#ifdef __LP64__ - m = dtrace_load64(maddr + dev) & MAXMIN64; -#else - m = dtrace_load32(maddr + dev) & MAXMIN; -#endif - if (m != minor) { - maddr = dtrace_loadptr(maddr + next); - - if (scout == NULL) - continue; - - scout = dtrace_loadptr(scout + next); - - if (scout == NULL) - continue; - - scout = dtrace_loadptr(scout + next); - - if (scout == NULL) - continue; - - if (scout == maddr) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - - continue; - } - - /* - * We have the minor data. Now we need to - * copy the minor's name into the end of the - * pathname. - */ - s = (char *)dtrace_loadptr(maddr + name); - len = dtrace_strlen(s, size); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (len != 0) { - if ((end -= (len + 1)) < start) - break; - - *end = ':'; - } - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - break; - } - } - while (daddr != NULL && !(*flags & CPU_DTRACE_FAULT)) { - ddi_node_state_t devi_state; - - devi_state = dtrace_load32(daddr + - offsetof(struct dev_info, devi_node_state)); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (devi_state >= DS_INITIALIZED) { - s = (char *)dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_addr)); - len = dtrace_strlen(s, size); - - if (*flags & CPU_DTRACE_FAULT) - break; - - if (len != 0) { - if ((end -= (len + 1)) < start) - break; - - *end = '@'; - } - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - } - - /* - * Now for the node name... - */ - s = (char *)dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_node_name)); - - daddr = dtrace_loadptr(daddr + - offsetof(struct dev_info, devi_parent)); - - /* - * If our parent is NULL (that is, if we're the root - * node), we're going to use the special path - * "devices". - */ - if (daddr == NULL) - s = "devices"; - - len = dtrace_strlen(s, size); - if (*flags & CPU_DTRACE_FAULT) - break; - - if ((end -= (len + 1)) < start) - break; - - for (i = 1; i <= len; i++) - end[i] = dtrace_load8((uintptr_t)s++); - *end = '/'; - - if (depth++ > dtrace_devdepth_max) { - *flags |= CPU_DTRACE_ILLOP; - break; - } - } - - if (end < start) - DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - - if (daddr == NULL) { - regs[rd] = (uintptr_t)end; - mstate->dtms_scratch_ptr += size; - } - - break; - } -#else case DIF_SUBR_DDI_PATHNAME: { - /* XXX awaits galactic disentanglement ;-} */ - regs[rd] = NULL; + /* APPLE NOTE: currently unsupported on Darwin */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + regs[rd] = 0; break; } -#endif /* __APPLE__ */ case DIF_SUBR_STRJOIN: { char *d = (char *)mstate->dtms_scratch_ptr; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t s1 = tupregs[0].dttk_value; uintptr_t s2 = tupregs[1].dttk_value; - int i = 0; + uint64_t i = 0, j = 0; + size_t lim1, lim2; + char c; + + if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) || + !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) { + regs[rd] = 0; + break; + } - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s1++)) == '\0') { + c = (i >= lim1) ? '\0' : dtrace_load8(s1++); + if ((d[i++] = c) == '\0') { i--; break; } @@ -3886,11 +4287,11 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, for (;;) { if (i >= size) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } - - if ((d[i++] = dtrace_load8(s2++)) == '\0') + c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++); + if ((d[i++] = c) == '\0') break; } @@ -3908,10 +4309,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, uint64_t size = 22; /* enough room for 2^64 in decimal */ char *end = (char *)mstate->dtms_scratch_ptr + size - 1; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -3929,6 +4329,36 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, break; } + case DIF_SUBR_HTONS: + case DIF_SUBR_NTOHS: +#ifdef _BIG_ENDIAN + regs[rd] = (uint16_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value); +#endif + break; + + + case DIF_SUBR_HTONL: + case DIF_SUBR_NTOHL: +#ifdef _BIG_ENDIAN + regs[rd] = (uint32_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value); +#endif + break; + + + case DIF_SUBR_HTONLL: + case DIF_SUBR_NTOHLL: +#ifdef _BIG_ENDIAN + regs[rd] = (uint64_t)tupregs[0].dttk_value; +#else + regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value); +#endif + break; + + case DIF_SUBR_DIRNAME: case DIF_SUBR_BASENAME: { char *dest = (char *)mstate->dtms_scratch_ptr; @@ -3938,10 +4368,14 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, int lastbase = -1, firstbase = -1, lastdir = -1; int start, end; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_canload(src, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4047,7 +4481,7 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, end = lastbase; } - for (i = start, j = 0; i <= end && j < size - 1; i++, j++) + for (i = start, j = 0; i <= end && (uint64_t)j < size - 1; i++, j++) dest[j] = dtrace_load8(src + i); dest[j] = '\0'; @@ -4060,12 +4494,17 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, char *dest = (char *)mstate->dtms_scratch_ptr, c; uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; uintptr_t src = tupregs[0].dttk_value; - int i = 0, j = 0; + size_t lim; + size_t i = 0, j = 0; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; + regs[rd] = 0; break; } @@ -4073,9 +4512,9 @@ dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs, * Move forward, loading each character. */ do { - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); next: - if (j + 5 >= size) /* 5 = strlen("/..c\0") */ + if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */ break; if (c != '/') { @@ -4083,7 +4522,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4104,7 +4543,7 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); if (c == '/') { /* @@ -4127,60 +4566,380 @@ next: continue; } - c = dtrace_load8(src + i++); + c = (i >= lim) ? '\0' : dtrace_load8(src + i++); + + if (c != '/' && c != '\0') { + /* + * This is not ".." -- it's "..[mumble]". + * We'll store the "/.." and this character + * and continue processing. + */ + dest[j++] = '/'; + dest[j++] = '.'; + dest[j++] = '.'; + dest[j++] = c; + continue; + } + + /* + * This is "/../" or "/..\0". We need to back up + * our destination pointer until we find a "/". + */ + i--; + while (j != 0 && dest[--j] != '/') + continue; + + if (c == '\0') + dest[++j] = '/'; + } while (c != '\0'); + + dest[j] = '\0'; + regs[rd] = (uintptr_t)dest; + mstate->dtms_scratch_ptr += size; + break; + } + + case DIF_SUBR_INET_NTOA: + case DIF_SUBR_INET_NTOA6: + case DIF_SUBR_INET_NTOP: { + size_t size; + int af, argi, i; + char *base, *end; + + if (subr == DIF_SUBR_INET_NTOP) { + af = (int)tupregs[0].dttk_value; + argi = 1; + } else { + af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6; + argi = 0; + } + + if (af == AF_INET) { +#if !defined(__APPLE__) + ipaddr_t ip4; +#else + uint32_t ip4; +#endif /* __APPLE__ */ + uint8_t *ptr8, val; + + /* + * Safely load the IPv4 address. + */ +#if !defined(__APPLE__) + ip4 = dtrace_load32(tupregs[argi].dttk_value); +#else + if (!dtrace_canload(tupregs[argi].dttk_value, sizeof(ip4), + mstate, vstate)) { + regs[rd] = 0; + break; + } + + dtrace_bcopy( + (void *)(uintptr_t)tupregs[argi].dttk_value, + (void *)(uintptr_t)&ip4, sizeof (ip4)); +#endif /* __APPLE__ */ + /* + * Check an IPv4 string will fit in scratch. + */ +#if !defined(__APPLE__) + size = INET_ADDRSTRLEN; +#else + size = MAX_IPv4_STR_LEN; +#endif /* __APPLE__ */ + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + base = (char *)mstate->dtms_scratch_ptr; + end = (char *)mstate->dtms_scratch_ptr + size - 1; + + /* + * Stringify as a dotted decimal quad. + */ + *end-- = '\0'; + ptr8 = (uint8_t *)&ip4; + for (i = 3; i >= 0; i--) { + val = ptr8[i]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 10) { + *end-- = '0' + (val % 10); + } + } + + if (i > 0) + *end-- = '.'; + } + ASSERT(end + 1 >= base); + + } else if (af == AF_INET6) { +#if defined(__APPLE__) +#define _S6_un __u6_addr +#define _S6_u8 __u6_addr8 +#endif /* __APPLE__ */ + struct in6_addr ip6; + int firstzero, tryzero, numzero, v6end; + uint16_t val; + const char digits[] = "0123456789abcdef"; + + /* + * Stringify using RFC 1884 convention 2 - 16 bit + * hexadecimal values with a zero-run compression. + * Lower case hexadecimal digits are used. + * eg, fe80::214:4fff:fe0b:76c8. + * The IPv4 embedded form is returned for inet_ntop, + * just the IPv4 string is returned for inet_ntoa6. + */ + + if (!dtrace_canload(tupregs[argi].dttk_value, + sizeof(struct in6_addr), mstate, vstate)) { + regs[rd] = 0; + break; + } + + /* + * Safely load the IPv6 address. + */ + dtrace_bcopy( + (void *)(uintptr_t)tupregs[argi].dttk_value, + (void *)(uintptr_t)&ip6, sizeof (struct in6_addr)); + + /* + * Check an IPv6 string will fit in scratch. + */ + size = INET6_ADDRSTRLEN; + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; + } + base = (char *)mstate->dtms_scratch_ptr; + end = (char *)mstate->dtms_scratch_ptr + size - 1; + *end-- = '\0'; + + /* + * Find the longest run of 16 bit zero values + * for the single allowed zero compression - "::". + */ + firstzero = -1; + tryzero = -1; + numzero = 1; + for (i = 0; i < (int)sizeof (struct in6_addr); i++) { + if (ip6._S6_un._S6_u8[i] == 0 && + tryzero == -1 && i % 2 == 0) { + tryzero = i; + continue; + } + + if (tryzero != -1 && + (ip6._S6_un._S6_u8[i] != 0 || + i == sizeof (struct in6_addr) - 1)) { + + if (i - tryzero <= numzero) { + tryzero = -1; + continue; + } + + firstzero = tryzero; + numzero = i - i % 2 - tryzero; + tryzero = -1; + + if (ip6._S6_un._S6_u8[i] == 0 && + i == sizeof (struct in6_addr) - 1) + numzero += 2; + } + } + ASSERT(firstzero + numzero <= (int)sizeof (struct in6_addr)); + + /* + * Check for an IPv4 embedded address. + */ + v6end = sizeof (struct in6_addr) - 2; + if (IN6_IS_ADDR_V4MAPPED(&ip6) || + IN6_IS_ADDR_V4COMPAT(&ip6)) { + for (i = sizeof (struct in6_addr) - 1; + i >= (int)DTRACE_V4MAPPED_OFFSET; i--) { + ASSERT(end >= base); + + val = ip6._S6_un._S6_u8[i]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 10) { + *end-- = '0' + val % 10; + } + } + + if (i > (int)DTRACE_V4MAPPED_OFFSET) + *end-- = '.'; + } + + if (subr == DIF_SUBR_INET_NTOA6) + goto inetout; - if (c != '/' && c != '\0') { /* - * This is not ".." -- it's "..[mumble]". - * We'll store the "/.." and this character - * and continue processing. + * Set v6end to skip the IPv4 address that + * we have already stringified. */ - dest[j++] = '/'; - dest[j++] = '.'; - dest[j++] = '.'; - dest[j++] = c; - continue; + v6end = 10; } /* - * This is "/../" or "/..\0". We need to back up - * our destination pointer until we find a "/". + * Build the IPv6 string by working through the + * address in reverse. */ - i--; - while (j != 0 && dest[--j] != '/') - continue; + for (i = v6end; i >= 0; i -= 2) { + ASSERT(end >= base); - if (c == '\0') - dest[++j] = '/'; - } while (c != '\0'); + if (i == firstzero + numzero - 2) { + *end-- = ':'; + *end-- = ':'; + i -= numzero - 2; + continue; + } - dest[j] = '\0'; - regs[rd] = (uintptr_t)dest; + if (i < 14 && i != firstzero - 2) + *end-- = ':'; + + val = (ip6._S6_un._S6_u8[i] << 8) + + ip6._S6_un._S6_u8[i + 1]; + + if (val == 0) { + *end-- = '0'; + } else { + for (; val; val /= 16) { + *end-- = digits[val % 16]; + } + } + } + ASSERT(end + 1 >= base); + +#if defined(__APPLE__) +#undef _S6_un +#undef _S6_u8 +#endif /* __APPLE__ */ + } else { + /* + * The user didn't use AH_INET or AH_INET6. + */ + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + regs[rd] = 0; + break; + } + +inetout: regs[rd] = (uintptr_t)end + 1; mstate->dtms_scratch_ptr += size; break; } -#ifdef __APPLE__ - /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */ - case DIF_SUBR_CHUD: { - uint64_t selector = tupregs[0].dttk_value; - uint64_t args[DIF_DTR_NREGS-1] = {0ULL}; - uint32_t ii; + case DIF_SUBR_TOUPPER: + case DIF_SUBR_TOLOWER: { + uintptr_t src = tupregs[0].dttk_value; + char *dest = (char *)mstate->dtms_scratch_ptr; + char lower, upper, base, c; + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + size_t len = dtrace_strlen((char*) src, size); + size_t i = 0; + + lower = (subr == DIF_SUBR_TOUPPER) ? 'a' : 'A'; + upper = (subr == DIF_SUBR_TOUPPER) ? 'z' : 'Z'; + base = (subr == DIF_SUBR_TOUPPER) ? 'A' : 'a'; - /* copy in any variadic argument list */ - for(ii = 0; ii < DIF_DTR_NREGS-1; ii++) { - args[ii] = tupregs[ii+1].dttk_value; + if (!dtrace_canload(src, len + 1, mstate, vstate)) { + regs[rd] = 0; + break; + } + + if (!DTRACE_INSCRATCH(mstate, size)) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); + regs[rd] = 0; + break; } - kern_return_t ret = - chudxnu_dtrace_callback(selector, args, DIF_DTR_NREGS-1); - if(KERN_SUCCESS != ret) { - /* error */ + for (i = 0; i < size - 1; ++i) { + if ((c = dtrace_load8(src + i)) == '\0') + break; + if (c >= lower && c <= upper) + c = base + (c - lower); + dest[i] = c; } + + ASSERT(i < size); + + dest[i] = '\0'; + regs[rd] = (uintptr_t) dest; + mstate->dtms_scratch_ptr += size; + break; } -#endif /* __APPLE__ */ +#if defined(__APPLE__) + case DIF_SUBR_VM_KERNEL_ADDRPERM: { + if (!dtrace_priv_kernel(state)) { + regs[rd] = 0; + } else { + regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value); + } + + break; + } + + case DIF_SUBR_KDEBUG_TRACE: { + uint32_t debugid; + uintptr_t args[4] = {0}; + int i; + + if (nargs < 2 || nargs > 5) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + break; + } + + if (dtrace_destructive_disallow) + return; + + debugid = tupregs[0].dttk_value; + for (i = 0; i < nargs - 1; i++) + args[i] = tupregs[i + 1].dttk_value; + + kernel_debug(debugid, args[0], args[1], args[2], args[3], 0); + + break; + } + + case DIF_SUBR_KDEBUG_TRACE_STRING: { + if (nargs != 3) { + break; + } + + if (dtrace_destructive_disallow) + return; + + uint64_t size = state->dts_options[DTRACEOPT_STRSIZE]; + uint32_t debugid = tupregs[0].dttk_value; + uint64_t str_id = tupregs[1].dttk_value; + uintptr_t src = tupregs[2].dttk_value; + size_t lim; + char buf[size]; + char* str = NULL; + + if (src != (uintptr_t)0) { + str = buf; + if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) { + break; + } + dtrace_strcpy((void*)src, buf, size); + } + + (void)kernel_debug_string(debugid, &str_id, str); + regs[rd] = str_id; + + break; + } +#endif } } @@ -4204,11 +4963,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, dtrace_dstate_t *dstate = &vstate->dtvs_dynvars; dtrace_difv_t *v; volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; -#if !defined(__APPLE__) - volatile uintptr_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#else volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval; -#endif /* __APPLE__ */ dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ uint64_t regs[DIF_DIR_NREGS]; @@ -4216,11 +4971,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0; int64_t cc_r; - uint_t pc = 0, id, opc; + uint_t pc = 0, id, opc = 0; uint8_t ttop = 0; dif_instr_t instr; uint_t r1, r2, rd; + /* + * We stash the current DIF object into the machine state: we need it + * for subsequent access checking. + */ + mstate->dtms_difo = difo; + regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */ while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) { @@ -4424,6 +5185,12 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, case DIF_OP_LDX: regs[rd] = dtrace_load64(regs[r1]); break; +/* + * Darwin 32-bit kernel may fetch from 64-bit user. + * Do not cast regs to uintptr_t + * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB + * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX + */ case DIF_OP_ULDSB: regs[rd] = (int8_t) dtrace_fuword8(regs[r1]); @@ -4454,6 +5221,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; case DIF_OP_RET: rval = regs[rd]; + pc = textlen; break; case DIF_OP_NOP: break; @@ -4464,15 +5232,27 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[rd] = (uint64_t)(uintptr_t) (strtab + DIF_INSTR_STRING(instr)); break; - case DIF_OP_SCMP: - cc_r = dtrace_strncmp((char *)(uintptr_t)regs[r1], - (char *)(uintptr_t)regs[r2], - state->dts_options[DTRACEOPT_STRSIZE]); + case DIF_OP_SCMP: { + size_t sz = state->dts_options[DTRACEOPT_STRSIZE]; + uintptr_t s1 = regs[r1]; + uintptr_t s2 = regs[r2]; + size_t lim1 = sz, lim2 = sz; + + if (s1 != 0 && + !dtrace_strcanload(s1, sz, &lim1, mstate, vstate)) + break; + if (s2 != 0 && + !dtrace_strcanload(s2, sz, &lim2, mstate, vstate)) + break; + + cc_r = dtrace_strncmp((char *)s1, (char *)s2, + MIN(lim1, lim2)); cc_n = cc_r < 0; cc_z = cc_r == 0; cc_v = cc_c = 0; break; + } case DIF_OP_LDGA: regs[rd] = dtrace_dif_variable(mstate, state, r1, regs[r2]); @@ -4501,7 +5281,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * then this is to be treated as a * reference to a NULL variable. */ - regs[rd] = NULL; + regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } @@ -4518,26 +5298,32 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_nglobals); svar = vstate->dtvs_globals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; + size_t lim; - ASSERT(a != NULL); + ASSERT(a != 0); ASSERT(svar->dtsv_size != 0); - if (regs[rd] == NULL) { + if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { *(uint8_t *)a = 0; a += sizeof (uint64_t); } + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + &lim, mstate, vstate)) + break; dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } @@ -4566,9 +5352,8 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < vstate->dtvs_nlocals); + ASSERT(id < (uint_t)vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); - svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); v = &svar->dtsv_var; @@ -4578,7 +5363,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, size_t sz = v->dtdv_type.dtdt_size; sz += sizeof (uint64_t); - ASSERT(svar->dtsv_size == NCPU * sz); + ASSERT(svar->dtsv_size == (int)NCPU * sz); a += CPU->cpu_id * sz; if (*(uint8_t *)a == UINT8_MAX) { @@ -4587,7 +5372,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, * then this is to be treated as a * reference to a NULL variable. */ - regs[rd] = NULL; + regs[rd] = 0; } else { regs[rd] = a + sizeof (uint64_t); } @@ -4595,7 +5380,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; } - ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); + ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t)); tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; regs[rd] = tmp[CPU->cpu_id]; break; @@ -4605,8 +5390,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < vstate->dtvs_nlocals); - + VERIFY(id < (uint_t)vstate->dtvs_nlocals); ASSERT(vstate->dtvs_locals != NULL); svar = vstate->dtvs_locals[id]; ASSERT(svar != NULL); @@ -4615,12 +5399,13 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { uintptr_t a = (uintptr_t)svar->dtsv_data; size_t sz = v->dtdv_type.dtdt_size; + size_t lim; sz += sizeof (uint64_t); - ASSERT(svar->dtsv_size == NCPU * sz); + ASSERT(svar->dtsv_size == (int)NCPU * sz); a += CPU->cpu_id * sz; - if (regs[rd] == NULL) { + if (regs[rd] == 0) { *(uint8_t *)a = UINT8_MAX; break; } else { @@ -4628,12 +5413,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, a += sizeof (uint64_t); } + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + &lim, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], - (void *)a, &v->dtdv_type); + (void *)a, &v->dtdv_type, lim); break; } - ASSERT(svar->dtsv_size == NCPU * sizeof (uint64_t)); + ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t)); tmp = (uint64_t *)(uintptr_t)svar->dtsv_data; tmp[CPU->cpu_id] = regs[rd]; break; @@ -4654,7 +5444,8 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, key[1].dttk_size = 0; dvar = dtrace_dynvar(dstate, 2, key, - sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC); + sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC, + mstate, vstate); if (dvar == NULL) { regs[rd] = 0; @@ -4677,6 +5468,7 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, id = DIF_INSTR_VAR(instr); ASSERT(id >= DIF_VAR_OTHER_UBASE); id -= DIF_VAR_OTHER_UBASE; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); key = &tupregs[DIF_DTR_NREGS]; key[0].dttk_value = (uint64_t)id; @@ -4689,25 +5481,27 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : - DTRACE_DYNVAR_DEALLOC); + DTRACE_DYNVAR_DEALLOC, mstate, vstate); /* * Given that we're storing to thread-local data, * we need to flush our predicate cache. */ -#if !defined(__APPLE__) - curthread->t_predcache = NULL; -#else dtrace_set_thread_predcache(current_thread(), 0); -#endif /* __APPLE__ */ - if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], + &v->dtdv_type, &lim, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -4744,6 +5538,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, regs[r2] ? regs[r2] : dtrace_strsize_default) + 1; } else { + if (regs[r2] > LONG_MAX) { + *flags |= CPU_DTRACE_ILLOP; + break; + } tupregs[ttop].dttk_size = regs[r2]; } @@ -4785,15 +5583,17 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } dvar = dtrace_dynvar(dstate, nkeys, key, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), - DTRACE_DYNVAR_NOALLOC); + DTRACE_DYNVAR_NOALLOC, mstate, vstate); if (dvar == NULL) { regs[rd] = 0; @@ -4825,8 +5625,10 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) { DTRACE_TLS_THRKEY(key[nkeys].dttk_value); key[nkeys++].dttk_size = 0; + VERIFY(id < (uint_t)vstate->dtvs_ntlocals); v = &vstate->dtvs_tlocals[id]; } else { + VERIFY(id < (uint_t)vstate->dtvs_nglobals); v = &vstate->dtvs_globals[id]->dtsv_var; } @@ -4834,14 +5636,21 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, v->dtdv_type.dtdt_size > sizeof (uint64_t) ? v->dtdv_type.dtdt_size : sizeof (uint64_t), regs[rd] ? DTRACE_DYNVAR_ALLOC : - DTRACE_DYNVAR_DEALLOC); + DTRACE_DYNVAR_DEALLOC, mstate, vstate); if (dvar == NULL) break; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) { + size_t lim; + + if (!dtrace_vcanload( + (void *)(uintptr_t)regs[rd], &v->dtdv_type, + &lim, mstate, vstate)) + break; + dtrace_vcopy((void *)(uintptr_t)regs[rd], - dvar->dtdv_data, &v->dtdv_type); + dvar->dtdv_data, &v->dtdv_type, lim); } else { *((uint64_t *)dvar->dtdv_data) = regs[rd]; } @@ -4853,17 +5662,21 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8); size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1]; - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + - mstate->dtms_scratch_size) { + /* + * Rounding up the user allocation size could have + * overflowed large, bogus allocations (like -1ULL) to + * 0. + */ + if (size < regs[r1] || + !DTRACE_INSCRATCH(mstate, size)) { DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH); - regs[rd] = NULL; - } else { - dtrace_bzero((void *) - mstate->dtms_scratch_ptr, size); + regs[rd] = 0; + break; + } + + dtrace_bzero((void *) mstate->dtms_scratch_ptr, size); mstate->dtms_scratch_ptr += size; regs[rd] = ptr; - } break; } @@ -4875,6 +5688,9 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, break; } + if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate)) + break; + dtrace_bcopy((void *)(uintptr_t)regs[r1], (void *)(uintptr_t)regs[rd], (size_t)regs[r2]); break; @@ -4922,11 +5738,12 @@ dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate, *illval = regs[rd]; break; } -#if !defined(__APPLE__) - if (regs[rd] & 7) { -#else - if (regs[rd] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */ -#endif /* __APPLE__ */ + + /* + * Darwin kmem_zalloc() called from + * dtrace_difo_init() is 4-byte aligned. + */ + if (regs[rd] & 3) { *flags |= CPU_DTRACE_BADALIGN; *illval = regs[rd]; break; @@ -4951,8 +5768,8 @@ dtrace_action_breakpoint(dtrace_ecb_t *ecb) dtrace_probe_t *probe = ecb->dte_probe; dtrace_provider_t *prov = probe->dtpr_provider; char c[DTRACE_FULLNAMELEN + 80], *str; - char *msg = "dtrace: breakpoint action at probe "; - char *ecbmsg = " (ecb "; + const char *msg = "dtrace: breakpoint action at probe "; + const char *ecbmsg = " (ecb "; uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4)); uintptr_t val = (uintptr_t)ecb; int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0; @@ -5023,27 +5840,23 @@ dtrace_action_panic(dtrace_ecb_t *ecb) if (dtrace_panicked != NULL) return; -#if !defined(__APPLE__) - if (dtrace_casptr(&dtrace_panicked, NULL, curthread) != NULL) - return; -#else if (dtrace_casptr(&dtrace_panicked, NULL, current_thread()) != NULL) return; -#endif /* __APPLE__ */ /* * We won the right to panic. (We want to be sure that only one * thread calls panic() from dtrace_probe(), and that panic() is * called exactly once.) */ - dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", + panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", probe->dtpr_provider->dtpv_name, probe->dtpr_mod, probe->dtpr_func, probe->dtpr_name, (void *)ecb); -#if defined(__APPLE__) - /* Mac OS X debug feature -- can return from panic() */ + /* + * APPLE NOTE: this was for an old Mac OS X debug feature + * allowing a return from panic(). Revisit someday. + */ dtrace_panicked = NULL; -#endif /* __APPLE__ */ } static void @@ -5057,24 +5870,17 @@ dtrace_action_raise(uint64_t sig) return; } -#if !defined(__APPLE__) /* * raise() has a queue depth of 1 -- we ignore all subsequent * invocations of the raise() action. */ - if (curthread->t_dtrace_sig == 0) - curthread->t_dtrace_sig = (uint8_t)sig; - curthread->t_sig_check = 1; - aston(curthread); -#else uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); if (uthread && uthread->t_dtrace_sig == 0) { uthread->t_dtrace_sig = sig; - psignal(current_proc(), (int)sig); + act_set_astbsd(current_thread()); } -#endif /* __APPLE__ */ } static void @@ -5083,15 +5889,47 @@ dtrace_action_stop(void) if (dtrace_destructive_disallow) return; -#if !defined(__APPLE__) - if (!curthread->t_dtrace_stop) { - curthread->t_dtrace_stop = 1; - curthread->t_sig_check = 1; - aston(curthread); + uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); + if (uthread) { + /* + * The currently running process will be set to task_suspend + * when it next leaves the kernel. + */ + uthread->t_dtrace_stop = 1; + act_set_astbsd(current_thread()); + } +} + + +/* + * APPLE NOTE: pidresume works in conjunction with the dtrace stop action. + * Both activate only when the currently running process next leaves the + * kernel. + */ +static void +dtrace_action_pidresume(uint64_t pid) +{ + if (dtrace_destructive_disallow) + return; + + if (kauth_cred_issuser(kauth_cred_get()) == 0) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP); + return; + } + uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread()); + + /* + * When the currently running process leaves the kernel, it attempts to + * task_resume the process (denoted by pid), if that pid appears to have + * been stopped by dtrace_action_stop(). + * The currently running process has a pidresume() queue depth of 1 -- + * subsequent invocations of the pidresume() action are ignored. + */ + + if (pid != 0 && uthread && uthread->t_dtrace_resumepid == 0) { + uthread->t_dtrace_resumepid = pid; + act_set_astbsd(current_thread()); } -#else - psignal(current_proc(), SIGSTOP); -#endif /* __APPLE__ */ } static void @@ -5099,7 +5937,7 @@ dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val) { hrtime_t now; volatile uint16_t *flags; - cpu_t *cpu = CPU; + dtrace_cpu_t *cpu = CPU; if (dtrace_destructive_disallow) return; @@ -5165,8 +6003,7 @@ dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state, size = (uintptr_t)fps - mstate->dtms_scratch_ptr + (nframes * sizeof (uint64_t)); - if (mstate->dtms_scratch_ptr + size > - mstate->dtms_scratch_base + mstate->dtms_scratch_size) { + if (!DTRACE_INSCRATCH(mstate, (uintptr_t)size)) { /* * Not enough room for our frame pointers -- need to indicate * that we ran out of scratch space. @@ -5253,20 +6090,71 @@ out: mstate->dtms_scratch_ptr = old; } +static void +dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size, + size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind) +{ + volatile uint16_t *flags; + uint64_t val = *valp; + size_t valoffs = *valoffsp; + + flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags; + ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF); + + /* + * If this is a string, we're going to only load until we find the zero + * byte -- after which we'll store zero bytes. + */ + if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) { + char c = '\0' + 1; + size_t s; + + for (s = 0; s < size; s++) { + if (c != '\0' && dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (c != '\0' && dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, valoffs++, c); + + if (c == '\0' && intuple) + break; + } + } else { + uint8_t c; + while (valoffs < end) { + if (dtkind == DIF_TF_BYREF) { + c = dtrace_load8(val++); + } else if (dtkind == DIF_TF_BYUREF) { + DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT); + c = dtrace_fuword8((user_addr_t)(uintptr_t)val++); + DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT); + if (*flags & CPU_DTRACE_FAULT) + break; + } + + DTRACE_STORE(uint8_t, tomax, + valoffs++, c); + } + } + + *valp = val; + *valoffsp = valoffs; +} + /* * If you're looking for the epicenter of DTrace, you just found it. This * is the function called by the provider to fire a probe -- from which all * subsequent probe-context DTrace activity emanates. */ -#if !defined(__APPLE__) -void -dtrace_probe(dtrace_id_t id, uintptr_t arg0, uintptr_t arg1, - uintptr_t arg2, uintptr_t arg3, uintptr_t arg4) -#else static void __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) -#endif /* __APPLE__ */ { processorid_t cpuid; dtrace_icookie_t cookie; @@ -5280,28 +6168,13 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, volatile uint16_t *flags; hrtime_t now; -#if !defined(__APPLE__) - /* - * Kick out immediately if this CPU is still being born (in which case - * curthread will be set to -1) - */ - if ((uintptr_t)curthread & 1) - return; -#else -#endif /* __APPLE__ */ - cookie = dtrace_interrupt_disable(); probe = dtrace_probes[id - 1]; cpuid = CPU->cpu_id; onintr = CPU_ON_INTR(CPU); -#if !defined(__APPLE__) - if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && - probe->dtpr_predcache == curthread->t_predcache) { -#else if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE && probe->dtpr_predcache == dtrace_get_thread_predcache(current_thread())) { -#endif /* __APPLE__ */ /* * We have hit in the predicate cache; we know that * this predicate would evaluate to be false. @@ -5325,6 +6198,11 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, if (vtime && curthread->t_dtrace_start) curthread->t_dtrace_vtime += now - curthread->t_dtrace_start; #else + /* + * APPLE NOTE: The time spent entering DTrace and arriving + * to this point, is attributed to the current thread. + * Instead it should accrue to DTrace. FIXME + */ vtime = dtrace_vtime_references != 0; if (vtime) @@ -5346,14 +6224,14 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, now = dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */ #endif /* __APPLE__ */ -#if defined(__APPLE__) /* - * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances. - * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes - * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here - * and cons up a viable state (from the probe_id). + * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of + * dtrace_probe() in some circumstances. See, e.g. fasttrap_isa.c. + * However the provider has no access to ECB context, so passes + * 0 through "arg0" and the probe_id of the overridden probe as arg1. + * Detect that here and cons up a viable state (from the probe_id). */ - if (dtrace_probeid_error == id && NULL == arg0) { + if (dtrace_probeid_error == id && 0 == arg0) { dtrace_id_t ftp_id = (dtrace_id_t)arg1; dtrace_probe_t *ftp_probe = dtrace_probes[ftp_id - 1]; dtrace_ecb_t *ftp_ecb = ftp_probe->dtpr_ecb; @@ -5369,9 +6247,10 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ftp_state->dts_arg_error_illval = -1; /* arg5 */ } } -#endif /* __APPLE__ */ + mstate.dtms_difo = NULL; mstate.dtms_probe = probe; + mstate.dtms_strtok = 0; mstate.dtms_arg[0] = arg0; mstate.dtms_arg[1] = arg1; mstate.dtms_arg[2] = arg2; @@ -5387,6 +6266,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid]; dtrace_vstate_t *vstate = &state->dts_vstate; dtrace_provider_t *prov = probe->dtpr_provider; + uint64_t tracememsize = 0; int committed = 0; caddr_t tomax; @@ -5403,7 +6283,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, #ifdef lint uint64_t val = 0; #else - uint64_t val; + uint64_t val = 0; #endif mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE; @@ -5454,6 +6334,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * not the case. */ if ((ecb->dte_cond & DTRACE_COND_USERMODE) && + prov->dtpv_pops.dtps_usermode && prov->dtpv_pops.dtps_usermode(prov->dtpv_arg, probe->dtpr_id, probe->dtpr_arg) == 0) continue; @@ -5475,25 +6356,26 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; proc_t *proc; +#pragma unused(proc) /* __APPLE__ */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) - if ((cr = CRED()) == NULL || -#else + /* + * XXX this is hackish, but so is setting a variable + * XXX in a McCarthy OR... + */ if ((cr = dtrace_CRED()) == NULL || -#endif /* __APPLE__ */ - s_cr->cr_uid != cr->cr_uid || - s_cr->cr_uid != cr->cr_ruid || - s_cr->cr_uid != cr->cr_suid || - s_cr->cr_gid != cr->cr_gid || - s_cr->cr_gid != cr->cr_rgid || - s_cr->cr_gid != cr->cr_sgid || + posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid || + posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid || + posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid || + posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_gid || + posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_rgid || + posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_sgid || #if !defined(__APPLE__) (proc = ttoproc(curthread)) == NULL || (proc->p_flag & SNOCD)) #else - 1) /* Darwin omits "No Core Dump" flag. */ + 1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */ #endif /* __APPLE__ */ continue; } @@ -5502,14 +6384,17 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, cred_t *cr; cred_t *s_cr = ecb->dte_state->dts_cred.dcr_cred; +#pragma unused(cr, s_cr) /* __APPLE__ */ ASSERT(s_cr != NULL); -#if !defined(__APPLE__) /* Darwin doesn't do zones. */ +#if !defined(__APPLE__) if ((cr = CRED()) == NULL || s_cr->cr_zone->zone_id != cr->cr_zone->zone_id) continue; +#else + /* APPLE NOTE: Darwin doesn't do zones. */ #endif /* __APPLE__ */ } } @@ -5545,12 +6430,32 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, tomax = buf->dtb_tomax; ASSERT(tomax != NULL); - if (ecb->dte_size != 0) - DTRACE_STORE(uint32_t, tomax, offs, ecb->dte_epid); + /* + * Build and store the record header corresponding to the ECB. + */ + if (ecb->dte_size != 0) { + dtrace_rechdr_t dtrh; + + if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) { + mstate.dtms_timestamp = dtrace_gethrtime(); + mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP; + } + + ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t)); + + dtrh.dtrh_epid = ecb->dte_epid; + DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, mstate.dtms_timestamp); + DTRACE_STORE(dtrace_rechdr_t, tomax, offs, dtrh); + } mstate.dtms_epid = ecb->dte_epid; mstate.dtms_present |= DTRACE_MSTATE_EPID; + if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) + mstate.dtms_access = DTRACE_ACCESS_KERNEL; + else + mstate.dtms_access = 0; + if (pred != NULL) { dtrace_difo_t *dp = pred->dtp_difo; int rval; @@ -5565,11 +6470,8 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * Update the predicate cache... */ ASSERT(cid == pred->dtp_cacheid); -#if !defined(__APPLE__) - curthread->t_predcache = cid; -#else + dtrace_set_thread_predcache(current_thread(), cid); -#endif /* __APPLE__ */ } continue; @@ -5634,8 +6536,7 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_getpcstack((pc_t *)(tomax + valoffs), size / sizeof (pc_t), probe->dtpr_aframes, DTRACE_ANCHORED(probe) ? NULL : - (uint32_t *)arg0); - + (uint32_t *)(uintptr_t)arg0); continue; case DTRACEACT_JSTACK: @@ -5693,7 +6594,9 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; switch (act->dta_kind) { - case DTRACEACT_SPECULATE: + case DTRACEACT_SPECULATE: { + dtrace_rechdr_t *dtrh = NULL; + ASSERT(buf == &state->dts_buffer[cpuid]); buf = dtrace_speculation_buffer(state, cpuid, val); @@ -5712,13 +6615,27 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; } - tomax = buf->dtb_tomax; - ASSERT(tomax != NULL); + tomax = buf->dtb_tomax; + ASSERT(tomax != NULL); + + if (ecb->dte_size == 0) + continue; + + ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t)); + dtrh = ((void *)(tomax + offs)); + dtrh->dtrh_epid = ecb->dte_epid; + + /* + * When the speculation is committed, all of + * the records in the speculative buffer will + * have their timestamps set to the commit + * time. Until then, it is set to a sentinel + * value, for debugability. + */ + DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX); - if (ecb->dte_size != 0) - DTRACE_STORE(uint32_t, tomax, offs, - ecb->dte_epid); - continue; + continue; + } case DTRACEACT_CHILL: if (dtrace_priv_kernel_destructive(state)) @@ -5730,6 +6647,11 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_action_raise(val); continue; + case DTRACEACT_PIDRESUME: /* __APPLE__ */ + if (dtrace_priv_proc_destructive(state)) + dtrace_action_pidresume(val); + continue; + case DTRACEACT_COMMIT: ASSERT(!committed); @@ -5753,6 +6675,12 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_APPLEBINARY: /* __APPLE__ */ + case DTRACEACT_TRACEMEM: + break; + + case DTRACEACT_TRACEMEM_DYNSIZE: + tracememsize = val; break; case DTRACEACT_SYM: @@ -5761,23 +6689,6 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; break; -#if !defined(__APPLE__) - case DTRACEACT_USYM: - case DTRACEACT_UMOD: - case DTRACEACT_UADDR: { - struct pid *pid = curthread->t_procp->p_pidp; - - if (!dtrace_priv_proc(state)) - continue; - - DTRACE_STORE(uint64_t, tomax, - valoffs, (uint64_t)pid->pid_id); - DTRACE_STORE(uint64_t, tomax, - valoffs + sizeof (uint64_t), val); - - continue; - } -#else case DTRACEACT_USYM: case DTRACEACT_UMOD: case DTRACEACT_UADDR: { @@ -5785,13 +6696,12 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, continue; DTRACE_STORE(uint64_t, tomax, - valoffs, (uint64_t)proc_selfpid()); + valoffs, (uint64_t)dtrace_proc_selfpid()); DTRACE_STORE(uint64_t, tomax, valoffs + sizeof (uint64_t), val); continue; } -#endif /* __APPLE__ */ case DTRACEACT_EXIT: { /* @@ -5831,38 +6741,27 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, ASSERT(0); } - if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF) { + if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) { uintptr_t end = valoffs + size; - /* - * If this is a string, we're going to only - * load until we find the zero byte -- after - * which we'll store zero bytes. - */ - if (dp->dtdo_rtype.dtdt_kind == - DIF_TYPE_STRING) { - char c = '\0' + 1; - int intuple = act->dta_intuple; - size_t s; - - for (s = 0; s < size; s++) { - if (c != '\0') - c = dtrace_load8(val++); - - DTRACE_STORE(uint8_t, tomax, - valoffs++, c); - - if (c == '\0' && intuple) - break; - } - + if (tracememsize != 0 && + valoffs + tracememsize < end) + { + end = valoffs + tracememsize; + tracememsize = 0; + } + + if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF && + !dtrace_vcanload((void *)(uintptr_t)val, + &dp->dtdo_rtype, NULL, &mstate, vstate)) + { continue; } - while (valoffs < end) { - DTRACE_STORE(uint8_t, tomax, valoffs++, - dtrace_load8(val++)); - } + dtrace_store_by_ref(dp, tomax, size, &valoffs, + &val, end, act->dta_intuple, + dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ? + DIF_TF_BYREF: DIF_TF_BYUREF); continue; } @@ -5920,13 +6819,14 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * time to prevent it from being accumulated * into t_dtrace_vtime. */ -#if !defined(__APPLE__) - curthread->t_dtrace_start = 0; -#else - /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */ + + /* + * Darwin sets the sign bit on t_dtrace_tracing + * to suspend accumulation to it. + */ dtrace_set_thread_tracing(current_thread(), - (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); -#endif /* __APPLE__ */ + (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); + } /* @@ -5953,10 +6853,8 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, buf->dtb_offset = offs + ecb->dte_size; } -#if !defined(__APPLE__) - if (vtime) - curthread->t_dtrace_start = dtrace_gethrtime(); -#else + /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed + to the current thread. Instead it should accrue to DTrace. */ if (vtime) { thread_t thread = current_thread(); int64_t t = dtrace_get_thread_tracing(thread); @@ -5969,29 +6867,38 @@ __dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t); } } -#endif /* __APPLE__ */ dtrace_interrupt_enable(cookie); } -#if defined(__APPLE__) -/* Don't allow a thread to re-enter dtrace_probe() */ +/* + * APPLE NOTE: Don't allow a thread to re-enter dtrace_probe(). + * This could occur if a probe is encountered on some function in the + * transitive closure of the call to dtrace_probe(). + * Solaris has some strong guarantees that this won't happen. + * The Darwin implementation is not so mature as to make those guarantees. + * Hence, the introduction of __dtrace_probe() on xnu. + */ + void dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, uint64_t arg2, uint64_t arg3, uint64_t arg4) { thread_t thread = current_thread(); - + disable_preemption(); if (id == dtrace_probeid_error) { __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); - dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */ + dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */ } else if (!dtrace_get_thread_reentering(thread)) { dtrace_set_thread_reentering(thread, TRUE); __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4); dtrace_set_thread_reentering(thread, FALSE); } +#if DEBUG + else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN); +#endif + enable_preemption(); } -#endif /* __APPLE__ */ /* * DTrace Probe Hashing Functions @@ -6005,7 +6912,7 @@ dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1, * specified.) */ static uint_t -dtrace_hash_str(char *p) +dtrace_hash_str(const char *p) { unsigned int g; uint_t hval = 0; @@ -6037,11 +6944,16 @@ dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs) return (hash); } -#if !defined(__APPLE__) /* Quiet compiler warning */ +/* + * APPLE NOTE: dtrace_hash_destroy is not used. + * It is called by dtrace_detach which is not + * currently implemented. Revisit someday. + */ +#if !defined(__APPLE__) static void dtrace_hash_destroy(dtrace_hash_t *hash) { -#ifdef DEBUG +#if DEBUG int i; for (i = 0; i < hash->dth_size; i++) @@ -6151,7 +7063,7 @@ dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template) return (bucket->dthb_len); } - return (NULL); + return (0); } static void @@ -6223,14 +7135,16 @@ dtrace_badattr(const dtrace_attribute_t *a) /* * Return a duplicate copy of a string. If the specified string is NULL, * this function returns a zero-length string. + * APPLE NOTE: Darwin employs size bounded string operation. */ static char * dtrace_strdup(const char *str) { - char *new = kmem_zalloc((str != NULL ? strlen(str) : 0) + 1, KM_SLEEP); + size_t bufsize = (str != NULL ? strlen(str) : 0) + 1; + char *new = kmem_zalloc(bufsize, KM_SLEEP); if (str != NULL) - (void) strcpy(new, str); + (void) strlcpy(new, str, bufsize); return (new); } @@ -6264,10 +7178,12 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) uint32_t priv; if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { - /* - * For DTRACE_PRIV_ALL, the uid and zoneid don't matter. - */ - priv = DTRACE_PRIV_ALL; + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + priv = DTRACE_PRIV_USER | DTRACE_PRIV_PROC; + } + else { + priv = DTRACE_PRIV_ALL; + } } else { *uidp = crgetuid(cr); *zoneidp = crgetzoneid(cr); @@ -6292,16 +7208,12 @@ dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp) static void dtrace_errdebug(const char *str) { - int hval = dtrace_hash_str((char *)str) % DTRACE_ERRHASHSZ; + int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ; int occupied = 0; lck_mtx_lock(&dtrace_errlock); dtrace_errlast = str; -#if !defined(__APPLE__) - dtrace_errthread = curthread; -#else - dtrace_errthread = current_thread(); -#endif /* __APPLE__ */ + dtrace_errthread = (kthread_t *)current_thread(); while (occupied++ < DTRACE_ERRHASHSZ) { if (dtrace_errhash[hval].dter_msg == str) { @@ -6528,13 +7440,17 @@ top: static int dtrace_match_string(const char *s, const char *p, int depth) { - return (s != NULL && strcmp(s, p) == 0); +#pragma unused(depth) /* __APPLE__ */ + + /* APPLE NOTE: Darwin employs size bounded string operation. */ + return (s != NULL && strncmp(s, p, strlen(s) + 1) == 0); } /*ARGSUSED*/ static int dtrace_match_nul(const char *s, const char *p, int depth) { +#pragma unused(s, p, depth) /* __APPLE__ */ return (1); /* always match the empty pattern */ } @@ -6542,16 +7458,17 @@ dtrace_match_nul(const char *s, const char *p, int depth) static int dtrace_match_nonzero(const char *s, const char *p, int depth) { +#pragma unused(p, depth) /* __APPLE__ */ return (s != NULL && s[0] != '\0'); } static int dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, - zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *), void *arg) + zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *, void *), void *arg1, void *arg2) { dtrace_probe_t template, *probe; dtrace_hash_t *hash = NULL; - int len, best = INT_MAX, nmatched = 0; + int len, rc, best = INT_MAX, nmatched = 0; dtrace_id_t i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -6563,15 +7480,16 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, if (pkp->dtpk_id != DTRACE_IDNONE) { if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL && dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) { - (void) (*matched)(probe, arg); + if ((*matched)(probe, arg1, arg2) == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); nmatched++; } return (nmatched); } - template.dtpr_mod = (char *)pkp->dtpk_mod; - template.dtpr_func = (char *)pkp->dtpk_func; - template.dtpr_name = (char *)pkp->dtpk_name; + template.dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod; + template.dtpr_func = (char *)(uintptr_t)pkp->dtpk_func; + template.dtpr_name = (char *)(uintptr_t)pkp->dtpk_name; /* * We want to find the most distinct of the module name, function @@ -6602,7 +7520,7 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, * invoke our callback for each one that matches our input probe key. */ if (hash == NULL) { - for (i = 0; i < dtrace_nprobes; i++) { + for (i = 0; i < (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i]) == NULL || dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0) @@ -6610,8 +7528,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) - break; + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); + break; + } } return (nmatched); @@ -6630,8 +7551,11 @@ dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid, nmatched++; - if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT) - break; + if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) { + if (rc == DTRACE_MATCH_FAIL) + return (DTRACE_MATCH_FAIL); + break; + } } return (nmatched); @@ -6690,6 +7614,17 @@ dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp) pkp->dtpk_fmatch = &dtrace_match_nonzero; } +static int +dtrace_cond_provider_match(dtrace_probedesc_t *desc, void *data) +{ + if (desc == NULL) + return 1; + + dtrace_probekey_f *func = dtrace_probekey_func(desc->dtpd_provider); + + return func(desc->dtpd_provider, (char*)data, 0); +} + /* * DTrace Provider-to-Framework API Functions * @@ -6755,8 +7690,13 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, } provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP); - provider->dtpv_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); - (void) strcpy(provider->dtpv_name, name); + + /* APPLE NOTE: Darwin employs size bounded string operation. */ + { + size_t bufsize = strlen(name) + 1; + provider->dtpv_name = kmem_alloc(bufsize, KM_SLEEP); + (void) strlcpy(provider->dtpv_name, name, bufsize); + } provider->dtpv_attr = *pap; provider->dtpv_priv.dtpp_flags = priv; @@ -6821,13 +7761,16 @@ dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv, dtrace_enabling_provide(provider); /* - * Now we need to call dtrace_enabling_matchall() -- which - * will acquire cpu_lock and dtrace_lock. We therefore need + * Now we need to call dtrace_enabling_matchall_with_cond() -- + * with a condition matching the provider name we just added, + * which will acquire cpu_lock and dtrace_lock. We therefore need * to drop all of our locks before calling into it... */ lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&dtrace_provider_lock); - dtrace_enabling_matchall(); + + dtrace_match_cond_t cond = {dtrace_cond_provider_match, provider->dtpv_name}; + dtrace_enabling_matchall_with_cond(&cond); return (0); } @@ -6851,7 +7794,7 @@ dtrace_unregister(dtrace_provider_id_t id) dtrace_probe_t *probe, *first = NULL; if (old->dtpv_pops.dtps_enable == - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) { + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) { /* * If DTrace itself is the provider, we're called with locks * already held. @@ -6860,7 +7803,6 @@ dtrace_unregister(dtrace_provider_id_t id) ASSERT(dtrace_devi != NULL); lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - self = 1; if (dtrace_provider->dtpv_next != NULL) { @@ -6894,16 +7836,7 @@ dtrace_unregister(dtrace_provider_id_t id) /* * Attempt to destroy the probes associated with this provider. */ - for (i = 0; i < dtrace_nprobes; i++) { - if ((probe = dtrace_probes[i]) == NULL) - continue; - - if (probe->dtpr_provider != old) - continue; - - if (probe->dtpr_ecb == NULL) - continue; - + if (old->dtpv_ecb_count!=0) { /* * We have at least one ECB; we can't remove this provider. */ @@ -6919,7 +7852,7 @@ dtrace_unregister(dtrace_provider_id_t id) * All of the probes for this provider are disabled; we can safely * remove all of them from their hash chains and from the probe array. */ - for (i = 0; i < dtrace_nprobes; i++) { + for (i = 0; i < dtrace_nprobes && old->dtpv_probe_count!=0; i++) { if ((probe = dtrace_probes[i]) == NULL) continue; @@ -6927,6 +7860,7 @@ dtrace_unregister(dtrace_provider_id_t id) continue; dtrace_probes[i] = NULL; + old->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -6957,11 +7891,7 @@ dtrace_unregister(dtrace_provider_id_t id) kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else zfree(dtrace_probe_t_zone, probe); -#endif } if ((prev = dtrace_provider) == old) { @@ -7002,7 +7932,7 @@ dtrace_invalidate(dtrace_provider_id_t id) dtrace_provider_t *pvp = (dtrace_provider_t *)id; ASSERT(pvp->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); @@ -7043,7 +7973,7 @@ dtrace_condense(dtrace_provider_id_t id) * Make sure this isn't the dtrace provider itself. */ ASSERT(prov->dtpv_pops.dtps_enable != - (void (*)(void *, dtrace_id_t, void *))dtrace_nullop); + (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop); lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); @@ -7062,6 +7992,7 @@ dtrace_condense(dtrace_provider_id_t id) continue; dtrace_probes[i] = NULL; + prov->dtpv_probe_count--; dtrace_hash_remove(dtrace_bymod, probe); dtrace_hash_remove(dtrace_byfunc, probe); @@ -7072,11 +8003,7 @@ dtrace_condense(dtrace_provider_id_t id) kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1); kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else zfree(dtrace_probe_t_zone, probe); -#endif vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1); } @@ -7115,12 +8042,9 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1, VM_BESTFIT | VM_SLEEP); -#if !defined(__APPLE__) - probe = kmem_zalloc(sizeof (dtrace_probe_t), KM_SLEEP); -#else + probe = zalloc(dtrace_probe_t_zone); bzero(probe, sizeof (dtrace_probe_t)); -#endif probe->dtpr_id = id; probe->dtpr_gen = dtrace_probegen++; @@ -7135,7 +8059,7 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_hash_add(dtrace_byfunc, probe); dtrace_hash_add(dtrace_byname, probe); - if (id - 1 >= dtrace_nprobes) { + if (id - 1 >= (dtrace_id_t)dtrace_nprobes) { size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *); size_t nsize = osize << 1; @@ -7168,11 +8092,12 @@ dtrace_probe_create(dtrace_provider_id_t prov, const char *mod, dtrace_nprobes <<= 1; } - ASSERT(id - 1 < dtrace_nprobes); + ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes); } ASSERT(dtrace_probes[id - 1] == NULL); dtrace_probes[id - 1] = probe; + provider->dtpv_probe_count++; if (provider != dtrace_provider) lck_mtx_unlock(&dtrace_lock); @@ -7185,16 +8110,17 @@ dtrace_probe_lookup_id(dtrace_id_t id) { lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (id == 0 || id > dtrace_nprobes) + if (id == 0 || id > (dtrace_id_t)dtrace_nprobes) return (NULL); return (dtrace_probes[id - 1]); } static int -dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg) +dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg1, void *arg2) { - *((dtrace_id_t *)arg) = probe->dtpr_id; +#pragma unused(arg2) + *((dtrace_id_t *)arg1) = probe->dtpr_id; return (DTRACE_MATCH_DONE); } @@ -7223,7 +8149,7 @@ dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod, lck_mtx_lock(&dtrace_lock); match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0, - dtrace_probe_lookup_match, &id); + dtrace_probe_lookup_match, &id, NULL); lck_mtx_unlock(&dtrace_lock); ASSERT(match == 1 || match == 0); @@ -7259,6 +8185,7 @@ dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp) bzero(pdp, sizeof (dtrace_probedesc_t)); pdp->dtpd_id = prp->dtpr_id; + /* APPLE NOTE: Darwin employs size bounded string operation. */ (void) strlcpy(pdp->dtpd_provider, prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN); @@ -7294,15 +8221,13 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) all = 1; prv = dtrace_provider; } - + do { - kmod_info_t *ktl; /* * First, call the blanket provide operation. */ prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc); - -#if !defined(__APPLE__) + /* * Now call the per-module provide operation. We will grab * mod_lock to prevent the list from being modified. Note @@ -7310,37 +8235,14 @@ dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv) * (mod_busy can only be changed with mod_lock held.) */ lck_mtx_lock(&mod_lock); - - ctl = &modules; - do { - if (ctl->mod_busy || ctl->mod_mp == NULL) - continue; - - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - - } while ((ctl = ctl->mod_next) != &modules); - - lck_mtx_unlock(&mod_lock); -#else -#if 0 /* XXX Workaround for PR_4643546 XXX */ - simple_lock(&kmod_lock); - ktl = kmod; - while (ktl) { - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ktl); - ktl = ktl->next; + ctl = dtrace_modctl_list; + while (ctl) { + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + ctl = ctl->mod_next; } - simple_unlock(&kmod_lock); -#else - /* - * Don't bother to iterate over the kmod list. At present only fbt - * offers a provide_module in its dtpv_pops, and then it ignores the - * module anyway. - */ - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, NULL); -#endif -#endif /* __APPLE__ */ + lck_mtx_unlock(&mod_lock); } while (all && (prv = prv->dtpv_next) != NULL); } @@ -7386,7 +8288,7 @@ dtrace_probe_foreach(uintptr_t offs) } static int -dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) +dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab, dtrace_ecbdesc_t *ep) { dtrace_probekey_t pkey; uint32_t priv; @@ -7402,7 +8304,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) * If we're passed a NULL description, we're being asked to * create an ECB with a NULL probe. */ - (void) dtrace_ecb_create_enable(NULL, enab); + (void) dtrace_ecb_create_enable(NULL, enab, ep); return (0); } @@ -7411,7 +8313,7 @@ dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab) &priv, &uid, &zoneid); return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, - enab)); + enab, ep)); } /* @@ -7443,7 +8345,7 @@ dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov, } static void -dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -7492,7 +8394,7 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - if ((parg = mops->dtms_provide_pid(meta->dtm_arg, &dhpv, pid)) == NULL) + if ((parg = mops->dtms_provide_proc(meta->dtm_arg, &dhpv, p)) == NULL) return; meta->dtm_count++; @@ -7507,15 +8409,15 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) dhpb.dthpb_mod = dhp->dofhp_mod; dhpb.dthpb_func = strtab + probe->dofpr_func; dhpb.dthpb_name = strtab + probe->dofpr_name; -#if defined(__APPLE__) - dhpb.dthpb_base = dhp->dofhp_addr; -#else +#if !defined(__APPLE__) dhpb.dthpb_base = probe->dofpr_addr; +#else + dhpb.dthpb_base = dhp->dofhp_addr; /* FIXME: James, why? */ #endif - dhpb.dthpb_offs = off + probe->dofpr_offidx; + dhpb.dthpb_offs = (int32_t *)(off + probe->dofpr_offidx); dhpb.dthpb_noffs = probe->dofpr_noffs; if (enoff != NULL) { - dhpb.dthpb_enoffs = enoff + probe->dofpr_enoffidx; + dhpb.dthpb_enoffs = (int32_t *)(enoff + probe->dofpr_enoffidx); dhpb.dthpb_nenoffs = probe->dofpr_nenoffs; } else { dhpb.dthpb_enoffs = NULL; @@ -7529,14 +8431,25 @@ dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb); } + + /* + * Since we just created probes, we need to match our enablings + * against those, with a precondition knowing that we have only + * added probes from this provider + */ + char *prov_name = mops->dtms_provider_name(parg); + ASSERT(prov_name != NULL); + dtrace_match_cond_t cond = {dtrace_cond_provider_match, (void*)prov_name}; + + dtrace_enabling_matchall_with_cond(&cond); } static void -dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provide(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; - int i; + uint32_t i; lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); @@ -7547,21 +8460,12 @@ dtrace_helper_provide(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provide_one(dhp, sec, pid); + dtrace_helper_provide_one(dhp, sec, p); } - - /* - * We may have just created probes, so we must now rematch against - * any retained enablings. Note that this call will acquire both - * cpu_lock and dtrace_lock; the fact that we are holding - * dtrace_meta_lock now is what defines the ordering with respect to - * these three locks. - */ - dtrace_enabling_matchall(); } static void -dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) +dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; @@ -7583,17 +8487,17 @@ dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, pid_t pid) */ dtrace_dofprov2hprov(&dhpv, provider, strtab); - mops->dtms_remove_pid(meta->dtm_arg, &dhpv, pid); + mops->dtms_remove_proc(meta->dtm_arg, &dhpv, p); meta->dtm_count--; } static void -dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) +dtrace_helper_provider_remove(dof_helper_t *dhp, proc_t *p) { uintptr_t daddr = (uintptr_t)dhp->dofhp_dof; dof_hdr_t *dof = (dof_hdr_t *)daddr; - int i; + uint32_t i; lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED); @@ -7604,7 +8508,7 @@ dtrace_helper_provider_remove(dof_helper_t *dhp, pid_t pid) if (sec->dofs_type != DOF_SECT_PROVIDER) continue; - dtrace_helper_provider_remove_one(dhp, sec, pid); + dtrace_helper_provider_remove_one(dhp, sec, p); } } @@ -7620,7 +8524,7 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, { dtrace_meta_t *meta; dtrace_helpers_t *help, *next; - int i; + uint_t i; *idp = DTRACE_METAPROVNONE; @@ -7636,8 +8540,8 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, if (mops == NULL || mops->dtms_create_probe == NULL || - mops->dtms_provide_pid == NULL || - mops->dtms_remove_pid == NULL) { + mops->dtms_provide_proc == NULL || + mops->dtms_remove_proc == NULL) { cmn_err(CE_WARN, "failed to register meta-register %s: " "invalid ops", name); return (EINVAL); @@ -7645,8 +8549,14 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP); meta->dtm_mops = *mops; - meta->dtm_name = kmem_alloc(strlen(name) + 1, KM_SLEEP); - (void) strcpy(meta->dtm_name, name); + + /* APPLE NOTE: Darwin employs size bounded string operation. */ + { + size_t bufsize = strlen(name) + 1; + meta->dtm_name = kmem_alloc(bufsize, KM_SLEEP); + (void) strlcpy(meta->dtm_name, name, bufsize); + } + meta->dtm_arg = arg; lck_mtx_lock(&dtrace_meta_lock); @@ -7677,8 +8587,12 @@ dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg, while (help != NULL) { for (i = 0; i < help->dthps_nprovs; i++) { + proc_t *p = proc_find(help->dthps_pid); + if (p == PROC_NULL) + continue; dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - help->dthps_pid); + p); + proc_rele(p); } next = help->dthps_next; @@ -7762,13 +8676,16 @@ static int dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, cred_t *cr) { - int err = 0, i; + int err = 0; + uint_t i; + int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err; - int kcheck; + int kcheckload; uint_t pc; + int maxglobal = -1, maxlocal = -1, maxtlocal = -1; - kcheck = cr == NULL || - PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE) == 0; + kcheckload = cr == NULL || + (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0; dp->dtdo_destructive = 0; @@ -7836,7 +8753,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "invalid register %u\n", rd); if (rd == 0) err += efunc(pc, "cannot write to %r0\n"); - if (kcheck) + if (kcheckload) dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op + DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd); break; @@ -7986,7 +8903,8 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "invalid register %u\n", rd); break; case DIF_OP_CALL: - if (subr > DIF_SUBR_MAX) + if (subr > DIF_SUBR_MAX && + !(subr >= DIF_SUBR_APPLE_MIN && subr <= DIF_SUBR_APPLE_MAX)) err += efunc(pc, "invalid subr %u\n", subr); if (rd >= nregs) err += efunc(pc, "invalid register %u\n", rd); @@ -7994,7 +8912,9 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, err += efunc(pc, "cannot write to %r0\n"); if (subr == DIF_SUBR_COPYOUT || - subr == DIF_SUBR_COPYOUTSTR) { + subr == DIF_SUBR_COPYOUTSTR || + subr == DIF_SUBR_KDEBUG_TRACE || + subr == DIF_SUBR_KDEBUG_TRACE_STRING) { dp->dtdo_destructive = 1; } break; @@ -8026,7 +8946,7 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, "expected 'ret' as last DIF instruction\n"); } - if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF)) { + if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) { /* * If we're not returning by reference, the size must be either * 0 or the size of one of the base types. @@ -8040,14 +8960,15 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; default: - err += efunc(dp->dtdo_len - 1, "bad return size"); + err += efunc(dp->dtdo_len - 1, "bad return size\n"); } } for (i = 0; i < dp->dtdo_varlen && err == 0; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL; dtrace_diftype_t *vt, *et; - uint_t id, ndx; + uint_t id; + int ndx; if (v->dtdv_scope != DIFV_SCOPE_GLOBAL && v->dtdv_scope != DIFV_SCOPE_THREAD && @@ -8081,6 +9002,9 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, switch (v->dtdv_scope) { case DIFV_SCOPE_GLOBAL: + if (maxglobal == -1 || ndx > maxglobal) + maxglobal = ndx; + if (ndx < vstate->dtvs_nglobals) { dtrace_statvar_t *svar; @@ -8091,11 +9015,16 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; case DIFV_SCOPE_THREAD: + if (maxtlocal == -1 || ndx > maxtlocal) + maxtlocal = ndx; + if (ndx < vstate->dtvs_ntlocals) existing = &vstate->dtvs_tlocals[ndx]; break; case DIFV_SCOPE_LOCAL: + if (maxlocal == -1 || ndx > maxlocal) + maxlocal = ndx; if (ndx < vstate->dtvs_nlocals) { dtrace_statvar_t *svar; @@ -8114,9 +9043,10 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, break; } - if (v->dtdv_scope == DIFV_SCOPE_GLOBAL && - vt->dtdt_size > dtrace_global_maxsize) { - err += efunc(i, "oversized by-ref global\n"); + if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL || + v->dtdv_scope == DIFV_SCOPE_LOCAL) && + vt->dtdt_size > dtrace_statvar_maxsize) { + err += efunc(i, "oversized by-ref static\n"); break; } } @@ -8143,6 +9073,37 @@ dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs, } } + for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) { + dif_instr_t instr = dp->dtdo_buf[pc]; + + uint_t v = DIF_INSTR_VAR(instr); + uint_t op = DIF_INSTR_OP(instr); + + switch (op) { + case DIF_OP_LDGS: + case DIF_OP_LDGAA: + case DIF_OP_STGS: + case DIF_OP_STGAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxglobal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDTS: + case DIF_OP_LDTAA: + case DIF_OP_STTS: + case DIF_OP_STTAA: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxtlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + case DIF_OP_LDLS: + case DIF_OP_STLS: + if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxlocal)) + err += efunc(pc, "invalid variable %u\n", v); + break; + default: + break; + } + } + return (err); } @@ -8272,13 +9233,23 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) subr == DIF_SUBR_COPYINTO || subr == DIF_SUBR_COPYINSTR || subr == DIF_SUBR_INDEX || + subr == DIF_SUBR_INET_NTOA || + subr == DIF_SUBR_INET_NTOA6 || + subr == DIF_SUBR_INET_NTOP || subr == DIF_SUBR_LLTOSTR || subr == DIF_SUBR_RINDEX || subr == DIF_SUBR_STRCHR || subr == DIF_SUBR_STRJOIN || subr == DIF_SUBR_STRRCHR || subr == DIF_SUBR_STRSTR || - subr == DIF_SUBR_CHUD) + subr == DIF_SUBR_KDEBUG_TRACE || + subr == DIF_SUBR_KDEBUG_TRACE_STRING || + subr == DIF_SUBR_HTONS || + subr == DIF_SUBR_HTONL || + subr == DIF_SUBR_HTONLL || + subr == DIF_SUBR_NTOHS || + subr == DIF_SUBR_NTOHL || + subr == DIF_SUBR_NTOHLL) break; err += efunc(pc, "invalid subr %u\n", subr); @@ -8300,7 +9271,7 @@ dtrace_difo_validate_helper(dtrace_difo_t *dp) static int dtrace_difo_cacheable(dtrace_difo_t *dp) { - int i; + uint_t i; if (dp == NULL) return (0); @@ -8345,7 +9316,7 @@ dtrace_difo_cacheable(dtrace_difo_t *dp) static void dtrace_difo_hold(dtrace_difo_t *dp) { - int i; + uint_t i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -8377,7 +9348,7 @@ dtrace_difo_hold(dtrace_difo_t *dp) static void dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - uint64_t sval; + uint64_t sval = 0; dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */ const dif_instr_t *text = dp->dtdo_buf; uint_t pc, srd = 0; @@ -8443,6 +9414,9 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) if (srd == 0) return; + if (sval > LONG_MAX) + return; + tupregs[ttop++].dttk_size = sval; } @@ -8504,6 +9478,19 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) */ size = P2ROUNDUP(size, sizeof (uint64_t)); + /* + * Before setting the chunk size, check that we're not going + * to set it to a negative value... + */ + if (size > LONG_MAX) + return; + + /* + * ...and make certain that we didn't badly overflow. + */ + if (size < ksize || size < sizeof (dtrace_dynvar_t)) + return; + if (size > vstate->dtvs_dynvars.dtds_chunksize) vstate->dtvs_dynvars.dtds_chunksize = size; } @@ -8512,18 +9499,19 @@ dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - int i, oldsvars, osz, nsz, otlocals, ntlocals; - uint_t id; + int oldsvars, osz, nsz, otlocals, ntlocals; + uint_t i, id; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; - dtrace_statvar_t *svar, ***svarp; + dtrace_statvar_t *svar; + dtrace_statvar_t ***svarp = NULL; size_t dsize = 0; uint8_t scope = v->dtdv_scope; - int *np; + int *np = (int *)NULL; if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE) continue; @@ -8532,7 +9520,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) switch (scope) { case DIFV_SCOPE_THREAD: - while (id >= (otlocals = vstate->dtvs_ntlocals)) { + while (id >= (uint_t)(otlocals = vstate->dtvs_ntlocals)) { dtrace_difv_t *tlocals; if ((ntlocals = (otlocals << 1)) == 0) @@ -8561,10 +9549,10 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) svarp = &vstate->dtvs_locals; if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) - dsize = NCPU * (v->dtdv_type.dtdt_size + + dsize = (int)NCPU * (v->dtdv_type.dtdt_size + sizeof (uint64_t)); else - dsize = NCPU * sizeof (uint64_t); + dsize = (int)NCPU * sizeof (uint64_t); break; @@ -8582,7 +9570,7 @@ dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate) ASSERT(0); } - while (id >= (oldsvars = *np)) { + while (id >= (uint_t)(oldsvars = *np)) { dtrace_statvar_t **statics; int newsvars, oldsize, newsize; @@ -8669,16 +9657,17 @@ dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - int i; + uint_t i; ASSERT(dp->dtdo_refcnt == 0); for (i = 0; i < dp->dtdo_varlen; i++) { dtrace_difv_t *v = &dp->dtdo_vartab[i]; - dtrace_statvar_t *svar, **svarp; + dtrace_statvar_t *svar; + dtrace_statvar_t **svarp = NULL; uint_t id; uint8_t scope = v->dtdv_scope; - int *np; + int *np = NULL; switch (scope) { case DIFV_SCOPE_THREAD: @@ -8702,7 +9691,8 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) continue; id -= DIF_VAR_OTHER_UBASE; - ASSERT(id < *np); + + ASSERT(id < (uint_t)*np); svar = svarp[id]; ASSERT(svar != NULL); @@ -8712,7 +9702,7 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) continue; if (svar->dtsv_size != 0) { - ASSERT(svar->dtsv_data != NULL); + ASSERT(svar->dtsv_data != 0); kmem_free((void *)(uintptr_t)svar->dtsv_data, svar->dtsv_size); } @@ -8732,7 +9722,7 @@ dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate) static void dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate) { - int i; + uint_t i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp->dtdo_refcnt != 0); @@ -8888,6 +9878,7 @@ static void dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate) { dtrace_difo_t *dp = pred->dtp_difo; +#pragma unused(dp) /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dp != NULL && dp->dtdo_refcnt != 0); @@ -8908,8 +9899,8 @@ dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple, { dtrace_actdesc_t *act; -/* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL && - arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/ + ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != 0 && + arg >= KERNELBASE) || (arg == 0 && kind == DTRACEACT_PRINTA)); act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP); act->dtad_kind = kind; @@ -8945,8 +9936,8 @@ dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate) if (DTRACEACT_ISPRINTFLIKE(kind)) { char *str = (char *)(uintptr_t)act->dtad_arg; -/* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || - (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/ + ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) || + (str == NULL && act->dtad_kind == DTRACEACT_PRINTA)); if (str != NULL) kmem_free(str, strlen(str) + 1); @@ -8972,18 +9963,18 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) /* * The default size is the size of the default action: recording - * the epid. + * the header. */ - ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); + ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t); ecb->dte_alignment = sizeof (dtrace_epid_t); epid = state->dts_epid++; - if (epid - 1 >= state->dts_necbs) { + if (epid - 1 >= (dtrace_epid_t)state->dts_necbs) { dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs; int necbs = state->dts_necbs << 1; - ASSERT(epid == state->dts_necbs + 1); + ASSERT(epid == (dtrace_epid_t)state->dts_necbs + 1); if (necbs == 0) { ASSERT(oecbs == NULL); @@ -9025,7 +10016,7 @@ dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe) return (ecb); } -static void +static int dtrace_ecb_enable(dtrace_ecb_t *ecb) { dtrace_probe_t *probe = ecb->dte_probe; @@ -9038,9 +10029,10 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) /* * This is the NULL probe -- there's nothing to do. */ - return; + return(0); } + probe->dtpr_provider->dtpv_ecb_count++; if (probe->dtpr_ecb == NULL) { dtrace_provider_t *prov = probe->dtpr_provider; @@ -9052,8 +10044,8 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) if (ecb->dte_predicate != NULL) probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid; - prov->dtpv_pops.dtps_enable(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg); + return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg, + probe->dtpr_id, probe->dtpr_arg)); } else { /* * This probe is already active. Swing the last pointer to @@ -9066,128 +10058,98 @@ dtrace_ecb_enable(dtrace_ecb_t *ecb) probe->dtpr_predcache = 0; dtrace_sync(); + return(0); } } -static void +static int dtrace_ecb_resize(dtrace_ecb_t *ecb) { - uint32_t maxalign = sizeof (dtrace_epid_t); - uint32_t align = sizeof (uint8_t), offs, diff; dtrace_action_t *act; - int wastuple = 0; + uint32_t curneeded = UINT32_MAX; uint32_t aggbase = UINT32_MAX; - dtrace_state_t *state = ecb->dte_state; /* - * If we record anything, we always record the epid. (And we always - * record it first.) + * If we record anything, we always record the dtrace_rechdr_t. (And + * we always record it first.) */ - offs = sizeof (dtrace_epid_t); - ecb->dte_size = ecb->dte_needed = sizeof (dtrace_epid_t); + ecb->dte_size = sizeof (dtrace_rechdr_t); + ecb->dte_alignment = sizeof (dtrace_epid_t); for (act = ecb->dte_action; act != NULL; act = act->dta_next) { dtrace_recdesc_t *rec = &act->dta_rec; + ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1); - if ((align = rec->dtrd_alignment) > maxalign) - maxalign = align; - - if (!wastuple && act->dta_intuple) { - /* - * This is the first record in a tuple. Align the - * offset to be at offset 4 in an 8-byte aligned - * block. - */ - diff = offs + sizeof (dtrace_aggid_t); - - if (diff = (diff & (sizeof (uint64_t) - 1))) - offs += sizeof (uint64_t) - diff; - - aggbase = offs - sizeof (dtrace_aggid_t); - ASSERT(!(aggbase & (sizeof (uint64_t) - 1))); - } - - /*LINTED*/ - if (rec->dtrd_size != 0 && (diff = (offs & (align - 1)))) { - /* - * The current offset is not properly aligned; align it. - */ - offs += align - diff; - } - - rec->dtrd_offset = offs; - - if (offs + rec->dtrd_size > ecb->dte_needed) { - ecb->dte_needed = offs + rec->dtrd_size; - - if (ecb->dte_needed > state->dts_needed) - state->dts_needed = ecb->dte_needed; - } + ecb->dte_alignment = MAX(ecb->dte_alignment, rec->dtrd_alignment); if (DTRACEACT_ISAGG(act->dta_kind)) { dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act; - dtrace_action_t *first = agg->dtag_first, *prev; - ASSERT(rec->dtrd_size != 0 && first != NULL); - ASSERT(wastuple); + ASSERT(rec->dtrd_size != 0); + ASSERT(agg->dtag_first != NULL); + ASSERT(act->dta_prev->dta_intuple); ASSERT(aggbase != UINT32_MAX); + ASSERT(curneeded != UINT32_MAX); agg->dtag_base = aggbase; + curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); + rec->dtrd_offset = curneeded; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); + curneeded += rec->dtrd_size; + ecb->dte_needed = MAX(ecb->dte_needed, curneeded); - while ((prev = first->dta_prev) != NULL && - DTRACEACT_ISAGG(prev->dta_kind)) { - agg = (dtrace_aggregation_t *)prev; - first = agg->dtag_first; - } + aggbase = UINT32_MAX; + curneeded = UINT32_MAX; + } else if (act->dta_intuple) { + if (curneeded == UINT32_MAX) { + /* + * This is the first record in a tuple. Align + * curneeded to be at offset 4 in an 8-byte + * aligned block. + */ + ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); + ASSERT(aggbase == UINT32_MAX); - if (prev != NULL) { - offs = prev->dta_rec.dtrd_offset + - prev->dta_rec.dtrd_size; - } else { - offs = sizeof (dtrace_epid_t); + curneeded = P2PHASEUP(ecb->dte_size, + sizeof (uint64_t), sizeof (dtrace_aggid_t)); + + aggbase = curneeded - sizeof (dtrace_aggid_t); + ASSERT(IS_P2ALIGNED(aggbase, + sizeof (uint64_t))); } - wastuple = 0; - } else { - if (!act->dta_intuple) - ecb->dte_size = offs + rec->dtrd_size; - offs += rec->dtrd_size; + curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment); + rec->dtrd_offset = curneeded; + curneeded += rec->dtrd_size; + if (curneeded + rec->dtrd_size < curneeded) + return (EINVAL); + } else { + /* tuples must be followed by an aggregation */ + ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple); + ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment); + rec->dtrd_offset = ecb->dte_size; + if (ecb->dte_size + rec->dtrd_size < ecb->dte_size) + return (EINVAL); + ecb->dte_size += rec->dtrd_size; + ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size); } - - wastuple = act->dta_intuple; } if ((act = ecb->dte_action) != NULL && !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) && - ecb->dte_size == sizeof (dtrace_epid_t)) { + ecb->dte_size == sizeof (dtrace_rechdr_t)) { /* - * If the size is still sizeof (dtrace_epid_t), then all + * If the size is still sizeof (dtrace_rechdr_t), then all * actions store no data; set the size to 0. */ - ecb->dte_alignment = maxalign; ecb->dte_size = 0; - - /* - * If the needed space is still sizeof (dtrace_epid_t), then - * all actions need no additional space; set the needed - * size to 0. - */ - if (ecb->dte_needed == sizeof (dtrace_epid_t)) - ecb->dte_needed = 0; - - return; } - /* - * Set our alignment, and make sure that the dte_size and dte_needed - * are aligned to the size of an EPID. - */ - ecb->dte_alignment = maxalign; - ecb->dte_size = (ecb->dte_size + (sizeof (dtrace_epid_t) - 1)) & - ~(sizeof (dtrace_epid_t) - 1); - ecb->dte_needed = (ecb->dte_needed + (sizeof (dtrace_epid_t) - 1)) & - ~(sizeof (dtrace_epid_t) - 1); - ASSERT(ecb->dte_size <= ecb->dte_needed); + ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t)); + ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t))); + ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed); + return (0); } static dtrace_action_t * @@ -9208,11 +10170,12 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) switch (desc->dtad_kind) { case DTRACEAGG_MIN: - agg->dtag_initial = UINT64_MAX; + agg->dtag_initial = INT64_MAX; agg->dtag_aggregate = dtrace_aggregate_min; break; case DTRACEAGG_MAX: + agg->dtag_initial = INT64_MIN; agg->dtag_aggregate = dtrace_aggregate_max; break; @@ -9240,11 +10203,44 @@ dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; } + case DTRACEAGG_LLQUANTIZE: { + uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg); + uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg); + uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg); + uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg); + int64_t v; + + agg->dtag_initial = desc->dtad_arg; + agg->dtag_aggregate = dtrace_aggregate_llquantize; + + if (factor < 2 || low >= high || nsteps < factor) + goto err; + + /* + * Now check that the number of steps evenly divides a power + * of the factor. (This assures both integer bucket size and + * linearity within each magnitude.) + */ + for (v = factor; v < nsteps; v *= factor) + continue; + + if ((v % nsteps) || (nsteps % factor)) + goto err; + + size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t); + break; + } + case DTRACEAGG_AVG: agg->dtag_aggregate = dtrace_aggregate_avg; size = sizeof (uint64_t) * 2; break; + case DTRACEAGG_STDDEV: + agg->dtag_aggregate = dtrace_aggregate_stddev; + size = sizeof (uint64_t) * 4; + break; + case DTRACEAGG_SUM: agg->dtag_aggregate = dtrace_aggregate_sum; break; @@ -9303,13 +10299,13 @@ success: aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1, VM_BESTFIT | VM_SLEEP); - if (aggid - 1 >= state->dts_naggregations) { + if (aggid - 1 >= (dtrace_aggid_t)state->dts_naggregations) { dtrace_aggregation_t **oaggs = state->dts_aggregations; dtrace_aggregation_t **aggs; int naggs = state->dts_naggregations << 1; int onaggs = state->dts_naggregations; - ASSERT(aggid == state->dts_naggregations + 1); + ASSERT(aggid == (dtrace_aggid_t)state->dts_naggregations + 1); if (naggs == 0) { ASSERT(oaggs == NULL); @@ -9367,7 +10363,8 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) uint16_t format = 0; dtrace_recdesc_t *rec; dtrace_state_t *state = ecb->dte_state; - dtrace_optval_t *opt = state->dts_options, nframes, strsize; + dtrace_optval_t *opt = state->dts_options; + dtrace_optval_t nframes=0, strsize; uint64_t arg = desc->dtad_arg; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -9404,23 +10401,27 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_PRINTA: case DTRACEACT_SYSTEM: case DTRACEACT_FREOPEN: + case DTRACEACT_DIFEXPR: /* * We know that our arg is a string -- turn it into a * format. */ - if (arg == NULL) { - ASSERT(desc->dtad_kind == DTRACEACT_PRINTA); + if (arg == 0) { + ASSERT(desc->dtad_kind == DTRACEACT_PRINTA || + desc->dtad_kind == DTRACEACT_DIFEXPR); format = 0; } else { - ASSERT(arg != NULL); - /* ASSERT(arg > KERNELBASE); */ + ASSERT(arg != 0); + ASSERT(arg > KERNELBASE); format = dtrace_format_add(state, (char *)(uintptr_t)arg); } /*FALLTHROUGH*/ case DTRACEACT_LIBACT: - case DTRACEACT_DIFEXPR: + case DTRACEACT_TRACEMEM: + case DTRACEACT_TRACEMEM_DYNSIZE: + case DTRACEACT_APPLEBINARY: /* __APPLE__ */ if (dp == NULL) return (EINVAL); @@ -9507,6 +10508,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) case DTRACEACT_CHILL: case DTRACEACT_DISCARD: case DTRACEACT_RAISE: + case DTRACEACT_PIDRESUME: /* __APPLE__ */ if (dp == NULL) return (EINVAL); break; @@ -9519,7 +10521,7 @@ dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc) break; case DTRACEACT_SPECULATE: - if (ecb->dte_size > sizeof (dtrace_epid_t)) + if (ecb->dte_size > sizeof (dtrace_rechdr_t)) return (EINVAL); if (dp == NULL) @@ -9632,7 +10634,7 @@ dtrace_ecb_action_remove(dtrace_ecb_t *ecb) ecb->dte_action = NULL; ecb->dte_action_last = NULL; - ecb->dte_size = sizeof (dtrace_epid_t); + ecb->dte_size = 0; } static void @@ -9672,6 +10674,7 @@ dtrace_ecb_disable(dtrace_ecb_t *ecb) probe->dtpr_ecb_last = prev; } + probe->dtpr_provider->dtpv_ecb_count--; /* * The ECB has been disconnected from the probe; now sync to assure * that all CPUs have seen the change before returning. @@ -9795,12 +10798,12 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, * of creating our own (saving both time and space). */ dtrace_ecb_t *cached = dtrace_ecb_create_cache; - dtrace_action_t *act = cached->dte_action; + dtrace_action_t *act_if = cached->dte_action; - if (act != NULL) { - ASSERT(act->dta_refcnt > 0); - act->dta_refcnt++; - ecb->dte_action = act; + if (act_if != NULL) { + ASSERT(act_if->dta_refcnt > 0); + act_if->dta_refcnt++; + ecb->dte_action = act_if; ecb->dte_action_last = cached->dte_action_last; ecb->dte_needed = cached->dte_needed; ecb->dte_size = cached->dte_size; @@ -9817,21 +10820,25 @@ dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe, } } - dtrace_ecb_resize(ecb); + if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) { + dtrace_ecb_destroy(ecb); + return (NULL); + } return (dtrace_ecb_create_cache = ecb); } static int -dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) +dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg1, void *arg2) { dtrace_ecb_t *ecb; - dtrace_enabling_t *enab = arg; + dtrace_enabling_t *enab = arg1; + dtrace_ecbdesc_t *ep = arg2; dtrace_state_t *state = enab->dten_vstate->dtvs_state; ASSERT(state != NULL); - if (probe != NULL && probe->dtpr_gen < enab->dten_probegen) { + if (probe != NULL && ep != NULL && probe->dtpr_gen < ep->dted_probegen) { /* * This probe was created in a generation for which this * enabling has previously created ECBs; we don't want to @@ -9843,7 +10850,9 @@ dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg) if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL) return (DTRACE_MATCH_DONE); - dtrace_ecb_enable(ecb); + if (dtrace_ecb_enable(ecb) < 0) + return (DTRACE_MATCH_FAIL); + return (DTRACE_MATCH_NEXT); } @@ -9851,10 +10860,11 @@ static dtrace_ecb_t * dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id) { dtrace_ecb_t *ecb; +#pragma unused(ecb) /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (id == 0 || id > state->dts_necbs) + if (id == 0 || id > (dtrace_epid_t)state->dts_necbs) return (NULL); ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL); @@ -9867,10 +10877,11 @@ static dtrace_aggregation_t * dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id) { dtrace_aggregation_t *agg; +#pragma unused(agg) /* __APPLE__ */ lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (id == 0 || id > state->dts_naggregations) + if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations) return (NULL); ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL); @@ -9901,11 +10912,13 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) caddr_t tomax = buf->dtb_tomax; caddr_t xamot = buf->dtb_xamot; dtrace_icookie_t cookie; + hrtime_t now; ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); ASSERT(!(buf->dtb_flags & DTRACEBUF_RING)); cookie = dtrace_interrupt_disable(); + now = dtrace_gethrtime(); buf->dtb_tomax = xamot; buf->dtb_xamot = tomax; buf->dtb_xamot_drops = buf->dtb_drops; @@ -9916,6 +10929,10 @@ dtrace_buffer_switch(dtrace_buffer_t *buf) buf->dtb_drops = 0; buf->dtb_errors = 0; buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED); + buf->dtb_interval = now - buf->dtb_switched; + buf->dtb_switched = now; + buf->dtb_cur_limit = buf->dtb_limit; + dtrace_interrupt_enable(cookie); } @@ -9947,24 +10964,31 @@ dtrace_buffer_activate(dtrace_state_t *state) } static int -dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, +dtrace_buffer_canalloc(size_t size) +{ + if (size > (UINT64_MAX - dtrace_buffer_memory_inuse)) + return (B_FALSE); + if ((size + dtrace_buffer_memory_inuse) > dtrace_buffer_memory_maxsize) + return (B_FALSE); + + return (B_TRUE); +} + +static int +dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t limit, size_t size, int flags, processorid_t cpu) { - cpu_t *cp; + dtrace_cpu_t *cp; dtrace_buffer_t *buf; + size_t size_before_alloc = dtrace_buffer_memory_inuse; lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - if (size > dtrace_nonroot_maxsize && + if (size > (size_t)dtrace_nonroot_maxsize && !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE)) return (EFBIG); -#if defined(__APPLE__) - if (size > (sane_size / 8) / NCPU) /* As in kdbg_set_nkdbufs(), roughly. */ - return (ENOMEM); -#endif /* __APPLE__ */ - cp = cpu_list; do { @@ -9985,9 +11009,18 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, ASSERT(buf->dtb_xamot == NULL); + + /* DTrace, please do not eat all the memory. */ + if (dtrace_buffer_canalloc(size) == B_FALSE) + goto err; if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; + dtrace_buffer_memory_inuse += size; + /* Unsure that limit is always lower than size */ + limit = limit == size ? limit - 1 : limit; + buf->dtb_cur_limit = limit; + buf->dtb_limit = limit; buf->dtb_size = size; buf->dtb_flags = flags; buf->dtb_offset = 0; @@ -9996,10 +11029,16 @@ dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags, if (flags & DTRACEBUF_NOSWITCH) continue; + /* DTrace, please do not eat all the memory. */ + if (dtrace_buffer_canalloc(size) == B_FALSE) + goto err; if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL) goto err; + dtrace_buffer_memory_inuse += size; } while ((cp = cp->cpu_next) != cpu_list); + ASSERT(dtrace_buffer_memory_inuse <= dtrace_buffer_memory_maxsize); + return (0); err: @@ -10027,6 +11066,9 @@ err: buf->dtb_size = 0; } while ((cp = cp->cpu_next) != cpu_list); + /* Restore the size saved before allocating memory */ + dtrace_buffer_memory_inuse = size_before_alloc; + return (ENOMEM); } @@ -10056,7 +11098,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, intptr_t offs = buf->dtb_offset, soffs; intptr_t woffs; caddr_t tomax; - size_t total; + size_t total_off; if (buf->dtb_flags & DTRACEBUF_INACTIVE) return (-1); @@ -10078,9 +11120,27 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, offs += sizeof (uint32_t); } - if ((soffs = offs + needed) > buf->dtb_size) { - dtrace_buffer_drop(buf); - return (-1); + if ((uint64_t)(soffs = offs + needed) > buf->dtb_cur_limit) { + if (buf->dtb_cur_limit == buf->dtb_limit) { + buf->dtb_cur_limit = buf->dtb_size; + + atomic_add_32(&state->dts_buf_over_limit, 1); + /** + * Set an AST on the current processor + * so that we can wake up the process + * outside of probe context, when we know + * it is safe to do so + */ + minor_t minor = getminor(state->dts_dev); + ASSERT(minor < 32); + + atomic_or_32(&dtrace_wake_clients, 1 << minor); + ast_dtrace_on(); + } + if ((uint64_t)soffs > buf->dtb_size) { + dtrace_buffer_drop(buf); + return (-1); + } } if (mstate == NULL) @@ -10100,7 +11160,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, goto out; } - total = needed + (offs & (align - 1)); + total_off = needed + (offs & (align - 1)); /* * For a ring buffer, life is quite a bit more complicated. Before @@ -10109,15 +11169,15 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, * is required.) */ if ((buf->dtb_flags & DTRACEBUF_WRAPPED) || - offs + total > buf->dtb_size) { + offs + total_off > buf->dtb_size) { woffs = buf->dtb_xamot_offset; - if (offs + total > buf->dtb_size) { + if (offs + total_off > buf->dtb_size) { /* * We can't fit in the end of the buffer. First, a * sanity check that we can fit in the buffer at all. */ - if (total > buf->dtb_size) { + if (total_off > buf->dtb_size) { dtrace_buffer_drop(buf); return (-1); } @@ -10149,7 +11209,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, * there. We need to clear the buffer from the current * offset to the end (there may be old gunk there). */ - while (offs < buf->dtb_size) + while ((uint64_t)offs < buf->dtb_size) tomax[offs++] = 0; /* @@ -10160,7 +11220,7 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, * that the top of the buffer is aligned. */ offs = 0; - total = needed; + total_off = needed; buf->dtb_flags |= DTRACEBUF_WRAPPED; } else { /* @@ -10186,14 +11246,14 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, } } - while (offs + total > woffs) { + while (offs + total_off > (size_t)woffs) { dtrace_epid_t epid = *(uint32_t *)(tomax + woffs); size_t size; if (epid == DTRACE_EPIDNONE) { size = sizeof (uint32_t); } else { - ASSERT(epid <= state->dts_necbs); + ASSERT(epid <= (dtrace_epid_t)state->dts_necbs); ASSERT(state->dts_ecbs[epid - 1] != NULL); size = state->dts_ecbs[epid - 1]->dte_size; @@ -10226,9 +11286,9 @@ dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align, if (offs == 0) { buf->dtb_flags &= ~DTRACEBUF_WRAPPED; buf->dtb_offset = 0; - woffs = total; + woffs = total_off; - while (woffs < buf->dtb_size) + while ((uint64_t)woffs < buf->dtb_size) tomax[woffs++] = 0; } @@ -10333,7 +11393,7 @@ dtrace_buffer_free(dtrace_buffer_t *bufs) { int i; - for (i = 0; i < NCPU; i++) { + for (i = 0; i < (int)NCPU; i++) { dtrace_buffer_t *buf = &bufs[i]; if (buf->dtb_tomax == NULL) { @@ -10345,9 +11405,15 @@ dtrace_buffer_free(dtrace_buffer_t *bufs) if (buf->dtb_xamot != NULL) { ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); kmem_free(buf->dtb_xamot, buf->dtb_size); + + ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size); + dtrace_buffer_memory_inuse -= buf->dtb_size; } kmem_free(buf->dtb_tomax, buf->dtb_size); + ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size); + dtrace_buffer_memory_inuse -= buf->dtb_size; + buf->dtb_size = 0; buf->dtb_tomax = NULL; buf->dtb_xamot = NULL; @@ -10381,9 +11447,8 @@ dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb) ASSERT(enab->dten_probegen == 0); ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL); -#if defined(__APPLE__) - if (ecb == NULL) return; /* XXX protection against gcc 4.0 botch on x86 */ -#endif /* __APPLE__ */ + /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */ + if (ecb == NULL) return; if (enab->dten_ndesc < enab->dten_maxdesc) { enab->dten_desc[enab->dten_ndesc++] = ecb; @@ -10489,6 +11554,7 @@ dtrace_enabling_destroy(dtrace_enabling_t *enab) ASSERT(enab->dten_vstate->dtvs_state != NULL); ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0); enab->dten_vstate->dtvs_state->dts_nretained--; + dtrace_retained_gen++; } if (enab->dten_prev == NULL) { @@ -10531,6 +11597,7 @@ dtrace_enabling_retain(dtrace_enabling_t *enab) return (ENOSPC); state->dts_nretained++; + dtrace_retained_gen++; if (dtrace_retained == NULL) { dtrace_retained = enab; @@ -10583,16 +11650,17 @@ dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match, dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *pd = &ep->dted_probe; - if (strcmp(pd->dtpd_provider, match->dtpd_provider)) + /* APPLE NOTE: Darwin employs size bounded string operation. */ + if (strncmp(pd->dtpd_provider, match->dtpd_provider, DTRACE_PROVNAMELEN)) continue; - if (strcmp(pd->dtpd_mod, match->dtpd_mod)) + if (strncmp(pd->dtpd_mod, match->dtpd_mod, DTRACE_MODNAMELEN)) continue; - if (strcmp(pd->dtpd_func, match->dtpd_func)) + if (strncmp(pd->dtpd_func, match->dtpd_func, DTRACE_FUNCNAMELEN)) continue; - if (strcmp(pd->dtpd_name, match->dtpd_name)) + if (strncmp(pd->dtpd_name, match->dtpd_name, DTRACE_NAMELEN)) continue; /* @@ -10642,10 +11710,10 @@ dtrace_enabling_retract(dtrace_state_t *state) } static int -dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) +dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched, dtrace_match_cond_t *cond) { int i = 0; - int matched = 0; + int total_matched = 0, matched = 0; lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -10656,7 +11724,22 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) enab->dten_current = ep; enab->dten_error = 0; - matched += dtrace_probe_enable(&ep->dted_probe, enab); + /** + * Before doing a dtrace_probe_enable, which is really + * expensive, check that this enabling matches the matching precondition + * if we have one + */ + if (cond && (cond->dmc_func(&ep->dted_probe, cond->dmc_data) == 0)) { + continue; + } + /* + * If a provider failed to enable a probe then get out and + * let the consumer know we failed. + */ + if ((matched = dtrace_probe_enable(&ep->dted_probe, enab, ep)) < 0) + return (EBUSY); + + total_matched += matched; if (enab->dten_error != 0) { /* @@ -10680,17 +11763,18 @@ dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched) return (enab->dten_error); } + + ep->dted_probegen = dtrace_probegen; } - enab->dten_probegen = dtrace_probegen; if (nmatched != NULL) - *nmatched = matched; + *nmatched = total_matched; return (0); } static void -dtrace_enabling_matchall(void) +dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond) { dtrace_enabling_t *enab; @@ -10698,44 +11782,36 @@ dtrace_enabling_matchall(void) lck_mtx_lock(&dtrace_lock); /* - * Because we can be called after dtrace_detach() has been called, we - * cannot assert that there are retained enablings. We can safely - * load from dtrace_retained, however: the taskq_destroy() at the - * end of dtrace_detach() will block pending our completion. + * Iterate over all retained enablings to see if any probes match + * against them. We only perform this operation on enablings for which + * we have sufficient permissions by virtue of being in the global zone + * or in the same zone as the DTrace client. Because we can be called + * after dtrace_detach() has been called, we cannot assert that there + * are retained enablings. We can safely load from dtrace_retained, + * however: the taskq_destroy() at the end of dtrace_detach() will + * block pending our completion. */ - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) - (void) dtrace_enabling_match(enab, NULL); + + /* + * Darwin doesn't do zones. + * Behave as if always in "global" zone." + */ + for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { + (void) dtrace_enabling_match(enab, NULL, cond); + } lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); + } -static int -dtrace_enabling_matchstate(dtrace_state_t *state, int *nmatched) +static void +dtrace_enabling_matchall(void) { - dtrace_enabling_t *enab; - int matched, total = 0, err; - - lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - - for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) { - ASSERT(enab->dten_vstate->dtvs_state != NULL); - - if (enab->dten_vstate->dtvs_state != state) - continue; - - if ((err = dtrace_enabling_match(enab, &matched)) != 0) - return (err); - - total += matched; - } + dtrace_enabling_matchall_with_cond(NULL); +} - if (nmatched != NULL) - *nmatched = total; - return (0); -} /* * If an enabling is to be enabled without having matched probes (that is, if @@ -10771,7 +11847,7 @@ dtrace_enabling_prime(dtrace_state_t *state) for (i = 0; i < enab->dten_ndesc; i++) { enab->dten_current = enab->dten_desc[i]; - (void) dtrace_probe_enable(NULL, enab); + (void) dtrace_probe_enable(NULL, enab, NULL); } enab->dten_primed = 1; @@ -10789,6 +11865,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) { int i, all = 0; dtrace_probedesc_t desc; + dtrace_genid_t gen; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED); @@ -10799,15 +11876,25 @@ dtrace_enabling_provide(dtrace_provider_t *prv) } do { - dtrace_enabling_t *enab = dtrace_retained; + dtrace_enabling_t *enab; void *parg = prv->dtpv_arg; - for (; enab != NULL; enab = enab->dten_next) { +retry: + gen = dtrace_retained_gen; + for (enab = dtrace_retained; enab != NULL; + enab = enab->dten_next) { for (i = 0; i < enab->dten_ndesc; i++) { desc = enab->dten_desc[i]->dted_probe; lck_mtx_unlock(&dtrace_lock); prv->dtpv_pops.dtps_provide(parg, &desc); lck_mtx_lock(&dtrace_lock); + /* + * Process the retained enablings again if + * they have changed while we weren't holding + * dtrace_lock. + */ + if (gen != dtrace_retained_gen) + goto retry; } } } while (all && (prv = prv->dtpv_next) != NULL); @@ -10824,6 +11911,7 @@ dtrace_enabling_provide(dtrace_provider_t *prv) static void dtrace_dof_error(dof_hdr_t *dof, const char *str) { +#pragma unused(dof) /* __APPLE__ */ if (dtrace_err_verbose) cmn_err(CE_WARN, "failed to process DOF: %s", str); @@ -10896,11 +11984,7 @@ dtrace_dof_create(dtrace_state_t *state) } static dof_hdr_t * -#if defined(__APPLE__) dtrace_dof_copyin(user_addr_t uarg, int *errp) -#else -dtrace_dof_copyin(uintptr_t uarg, int *errp) -#endif { dof_hdr_t hdr, *dof; @@ -10909,11 +11993,7 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) /* * First, we're going to copyin() the sizeof (dof_hdr_t). */ -#if defined(__APPLE__) if (copyin(uarg, &hdr, sizeof (hdr)) != 0) { -#else - if (copyin((void *)uarg, &hdr, sizeof (hdr)) != 0) { -#endif dtrace_dof_error(NULL, "failed to copyin DOF header"); *errp = EFAULT; return (NULL); @@ -10923,7 +12003,7 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ - if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { + if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -10937,21 +12017,16 @@ dtrace_dof_copyin(uintptr_t uarg, int *errp) dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP); -#if defined(__APPLE__) - if (copyin(uarg, dof, hdr.dofh_loadsz) != 0) { -#else - if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) { -#endif - dt_kmem_free_aligned(dof, hdr.dofh_loadsz); - *errp = EFAULT; - return (NULL); - } + if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 || + dof->dofh_loadsz != hdr.dofh_loadsz) { + dt_kmem_free_aligned(dof, hdr.dofh_loadsz); + *errp = EFAULT; + return (NULL); + } return (dof); } -#if defined(__APPLE__) - static dof_hdr_t * dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) { @@ -10972,7 +12047,7 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) * Now we'll allocate the entire DOF and copy it in -- provided * that the length isn't outrageous. */ - if (hdr.dofh_loadsz >= dtrace_dof_maxsize) { + if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) { dtrace_dof_error(&hdr, "load size exceeds maximum"); *errp = E2BIG; return (NULL); @@ -10995,8 +12070,6 @@ dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp) return (dof); } -#endif /* __APPLE__ */ - static dof_hdr_t * dtrace_dof_property(const char *name) { @@ -11011,7 +12084,7 @@ dtrace_dof_property(const char *name) * as an integer array, and then squeeze it into a byte array. */ if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0, - (char *)name, (int **)&buf, &len) != DDI_PROP_SUCCESS) + name, (int **)&buf, &len) != DDI_PROP_SUCCESS) return (NULL); for (i = 0; i < len; i++) @@ -11029,7 +12102,7 @@ dtrace_dof_property(const char *name) return (NULL); } - if (loadsz >= dtrace_dof_maxsize) { + if (loadsz >= (uint64_t)dtrace_dof_maxsize) { ddi_prop_free(buf); dtrace_dof_error(NULL, "oversized DOF"); return (NULL); @@ -11120,6 +12193,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (char *)(str + probe->dofp_provider), MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; + if (probe->dofp_mod >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe module"); return (NULL); @@ -11128,6 +12204,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod), MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0'; + if (probe->dofp_func >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe function"); return (NULL); @@ -11136,6 +12215,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func), MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0'; + if (probe->dofp_name >= strtab->dofs_size) { dtrace_dof_error(dof, "corrupt probe name"); return (NULL); @@ -11144,6 +12226,9 @@ dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc) (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name), MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name)); + /* APPLE NOTE: Darwin employs size bounded string operation. */ + desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0'; + return (desc); } @@ -11155,8 +12240,10 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, size_t ttl = 0; dof_difohdr_t *dofd; uintptr_t daddr = (uintptr_t)dof; - size_t max = dtrace_difo_maxsize; - int i, l, n; + size_t max_size = dtrace_difo_maxsize; + uint_t i; + int l, n; + static const struct { int section; @@ -11182,11 +12269,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t), sizeof (uint_t), "multiple variable tables" }, -#if !defined(__APPLE__) - { DOF_SECT_NONE, 0, 0, 0, NULL } -#else { DOF_SECT_NONE, 0, 0, 0, 0, NULL } -#endif /* __APPLE__ */ }; if (sec->dofs_type != DOF_SECT_DIFOHDR) { @@ -11220,7 +12303,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, dofd->dofd_links[l])) == NULL) goto err; /* invalid section link */ - if (ttl + subsec->dofs_size > max) { + if (ttl + subsec->dofs_size > max_size) { dtrace_dof_error(dof, "exceeds maximum size"); goto err; } @@ -11228,7 +12311,8 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, ttl += subsec->dofs_size; for (i = 0; difo[i].section != DOF_SECT_NONE; i++) { - if (subsec->dofs_type != difo[i].section) + + if (subsec->dofs_type != (uint32_t)difo[i].section) continue; if (!(subsec->dofs_flags & DOF_SECF_LOAD)) { @@ -11236,7 +12320,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } - if (subsec->dofs_align != difo[i].align) { + if (subsec->dofs_align != (uint32_t)difo[i].align) { dtrace_dof_error(dof, "bad alignment"); goto err; } @@ -11249,7 +12333,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } - if (difo[i].entsize != subsec->dofs_entsize) { + if ((uint32_t)difo[i].entsize != subsec->dofs_entsize) { dtrace_dof_error(dof, "entry size mismatch"); goto err; } @@ -11281,7 +12365,7 @@ dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, goto err; } } - + if (dp->dtdo_buf == NULL) { /* * We can't have a DIF object without DIF text. @@ -11384,15 +12468,19 @@ dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate, (uintptr_t)sec->dofs_offset + offs); kind = (dtrace_actkind_t)desc->dofa_kind; - if (DTRACEACT_ISPRINTFLIKE(kind) && - (kind != DTRACEACT_PRINTA || - desc->dofa_strtab != DOF_SECIDX_NONE)) { + if ((DTRACEACT_ISPRINTFLIKE(kind) && + (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) || + (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE)) + { dof_sec_t *strtab; char *str, *fmt; uint64_t i; /* - * printf()-like actions must have a format string. + * The argument to these actions is an index into the + * DOF string table. For printf()-like actions, this + * is the format string. For print(), this is the + * CTF type of the expression result. */ if ((strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL) @@ -11528,76 +12616,10 @@ err: return (NULL); } -#if !defined(__APPLE__) /* APPLE dyld has already done this for us */ /* - * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the - * specified DOF. At present, this amounts to simply adding 'ubase' to the - * site of any user SETX relocations to account for load object base address. - * In the future, if we need other relocations, this function can be extended. + * APPLE NOTE: dyld handles dof relocation. + * Darwin does not need dtrace_dof_relocate() */ -static int -dtrace_dof_relocate(dof_hdr_t *dof, dof_sec_t *sec, uint64_t ubase) -{ - uintptr_t daddr = (uintptr_t)dof; - dof_relohdr_t *dofr = - (dof_relohdr_t *)(uintptr_t)(daddr + sec->dofs_offset); - dof_sec_t *ss, *rs, *ts; - dof_relodesc_t *r; - uint_t i, n; - - if (sec->dofs_size < sizeof (dof_relohdr_t) || - sec->dofs_align != sizeof (dof_secidx_t)) { - dtrace_dof_error(dof, "invalid relocation header"); - return (-1); - } - - ss = dtrace_dof_sect(dof, DOF_SECT_STRTAB, dofr->dofr_strtab); - rs = dtrace_dof_sect(dof, DOF_SECT_RELTAB, dofr->dofr_relsec); - ts = dtrace_dof_sect(dof, DOF_SECT_NONE, dofr->dofr_tgtsec); - - if (ss == NULL || rs == NULL || ts == NULL) - return (-1); /* dtrace_dof_error() has been called already */ - - if (rs->dofs_entsize < sizeof (dof_relodesc_t) || - rs->dofs_align != sizeof (uint64_t)) { - dtrace_dof_error(dof, "invalid relocation section"); - return (-1); - } - - r = (dof_relodesc_t *)(uintptr_t)(daddr + rs->dofs_offset); - n = rs->dofs_size / rs->dofs_entsize; - - for (i = 0; i < n; i++) { - uintptr_t taddr = daddr + ts->dofs_offset + r->dofr_offset; - - switch (r->dofr_type) { - case DOF_RELO_NONE: - break; - case DOF_RELO_SETX: - if (r->dofr_offset >= ts->dofs_size || r->dofr_offset + - sizeof (uint64_t) > ts->dofs_size) { - dtrace_dof_error(dof, "bad relocation offset"); - return (-1); - } - - if (!IS_P2ALIGNED(taddr, sizeof (uint64_t))) { - dtrace_dof_error(dof, "misaligned setx relo"); - return (-1); - } - - *(uint64_t *)taddr += ubase; - break; - default: - dtrace_dof_error(dof, "invalid relocation type"); - return (-1); - } - - r = (dof_relodesc_t *)((uintptr_t)r + rs->dofs_entsize); - } - - return (0); -} -#endif /* __APPLE__ */ /* * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated @@ -11609,6 +12631,7 @@ static int dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, dtrace_enabling_t **enabp, uint64_t ubase, int noprobes) { +#pragma unused(ubase) /* __APPLE__ */ uint64_t len = dof->dofh_loadsz, seclen; uintptr_t daddr = (uintptr_t)dof; dtrace_ecbdesc_t *ep; @@ -11640,21 +12663,13 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, return (-1); } -#if !defined(__APPLE__) - if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 && - dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_2) { - dtrace_dof_error(dof, "DOF version mismatch"); - return (-1); - } -#else /* - * We only support DOF_VERSION_3 for now. + * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now. */ if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_3) { dtrace_dof_error(dof, "DOF version mismatch"); return (-1); } -#endif if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) { dtrace_dof_error(dof, "DOF uses unsupported instruction set"); @@ -11758,32 +12773,10 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, } } -#if !defined(__APPLE__) - /* - * APPLE NOTE: We have no relocation to perform. All dof values are - * relative offsets. - */ - /* - * Take a second pass through the sections and locate and perform any - * relocations that are present. We do this after the first pass to - * be sure that all sections have had their headers validated. + * APPLE NOTE: We have no further relocation to perform. + * All dof values are relative offsets. */ - for (i = 0; i < dof->dofh_secnum; i++) { - dof_sec_t *sec = (dof_sec_t *)(daddr + - (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize); - - if (!(sec->dofs_flags & DOF_SECF_LOAD)) - continue; /* skip sections that are not loadable */ - - switch (sec->dofs_type) { - case DOF_SECT_URELHDR: - if (dtrace_dof_relocate(dof, sec, ubase) != 0) - return (-1); - break; - } - } -#endif /* __APPLE__ */ if ((enab = *enabp) == NULL) enab = *enabp = dtrace_enabling_create(vstate); @@ -11795,22 +12788,18 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, if (sec->dofs_type != DOF_SECT_ECBDESC) continue; -#if !defined(__APPLE__) - if ((ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr)) == NULL) { - dtrace_enabling_destroy(enab); - *enabp = NULL; - return (-1); - } -#else - /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc - are checked for the NULL return value.) */ + /* + * APPLE NOTE: Defend against gcc 4.0 botch on x86. + * not all paths out of inlined dtrace_dof_ecbdesc + * are checked for the NULL return value. + * Check for NULL explicitly here. + */ ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr); if (ep == NULL) { dtrace_enabling_destroy(enab); *enabp = NULL; return (-1); } -#endif /* __APPLE__ */ dtrace_enabling_add(enab, ep); } @@ -11825,7 +12814,8 @@ dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr, static int dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) { - int i, rval; + uint_t i; + int rval; uint32_t entsize; size_t offs; dof_optdesc_t *desc; @@ -11862,7 +12852,7 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) return (EINVAL); } - if (desc->dofo_value == DTRACEOPT_UNSET) { + if (desc->dofo_value == (uint64_t)DTRACEOPT_UNSET) { dtrace_dof_error(dof, "unset option"); return (EINVAL); } @@ -11881,17 +12871,14 @@ dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state) /* * DTrace Consumer State Functions */ -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -int +static int dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) { - size_t hashsize, maxper, min, chunksize = dstate->dtds_chunksize; + size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize; void *base; uintptr_t limit; dtrace_dynvar_t *dvar, *next, *start; - int i; + size_t i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL); @@ -11901,8 +12888,10 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((dstate->dtds_chunksize = chunksize) == 0) dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE; - if (size < (min = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) - size = min; + VERIFY(dstate->dtds_chunksize < (LONG_MAX - sizeof (dtrace_dynhash_t))); + + if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t))) + size = min_size; if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL) return (ENOMEM); @@ -11910,7 +12899,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) dstate->dtds_size = size; dstate->dtds_base = base; dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP); - bzero(dstate->dtds_percpu, NCPU * sizeof (dtrace_dstate_percpu_t)); + bzero(dstate->dtds_percpu, (int)NCPU * sizeof (dtrace_dstate_percpu_t)); hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)); @@ -11941,7 +12930,10 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t)); limit = (uintptr_t)base + size; - maxper = (limit - (uintptr_t)start) / NCPU; + VERIFY((uintptr_t)start < limit); + VERIFY((uintptr_t)start >= (uintptr_t)base); + + maxper = (limit - (uintptr_t)start) / (int)NCPU; maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize; for (i = 0; i < NCPU; i++) { @@ -11962,7 +12954,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) start = (dtrace_dynvar_t *)limit; } - ASSERT(limit <= (uintptr_t)base + size); + VERIFY(limit <= (uintptr_t)base + size); for (;;) { next = (dtrace_dynvar_t *)((uintptr_t)dvar + @@ -11971,6 +12963,8 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) if ((uintptr_t)next + dstate->dtds_chunksize >= limit) break; + VERIFY((uintptr_t)dvar >= (uintptr_t)base && + (uintptr_t)dvar <= (uintptr_t)base + size); dvar->dtdv_next = next; dvar = next; } @@ -11982,10 +12976,7 @@ dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size) return (0); } -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -void +static void dtrace_dstate_fini(dtrace_dstate_t *dstate) { lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); @@ -12060,58 +13051,30 @@ dtrace_state_deadman(dtrace_state_t *state) state->dts_alive = now; } -#if defined(__APPLE__) -static -#endif /* __APPLE__ */ -dtrace_state_t * -dtrace_state_create(dev_t *devp, cred_t *cr) +static int +dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state) { minor_t minor; major_t major; char c[30]; dtrace_state_t *state; dtrace_optval_t *opt; - int bufsize = NCPU * sizeof (dtrace_buffer_t), i; + int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); -#else - /* - * Darwin's DEVFS layer acquired the minor number for this "device" when it called - * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number - * (next unused according to vmem_alloc()) and then immediately put the number back in play - * (by calling vmem_free()). Now that minor number is being used for an open, so committing it - * to use. The following vmem_alloc() must deliver that same minor number. - */ - - minor = (minor_t)(uintptr_t)vmem_alloc(dtrace_minor, 1, - VM_BESTFIT | VM_SLEEP); - - if (NULL != devp) { - ASSERT(getminor(*devp) == minor); - if (getminor(*devp) != minor) { - printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n", - getminor(*devp), minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return NULL; - } - } else { - /* NULL==devp iff "Anonymous state" (see dtrace_anon_property), - * so just vend the minor device number here de novo since no "open" has occurred. */ - } - -#endif /* __APPLE__ */ + /* Cause restart */ + *new_state = NULL; + + minor = getminor(*devp); - if (ddi_soft_state_zalloc(dtrace_softstate, minor) != DDI_SUCCESS) { - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); - return (NULL); + state = dtrace_state_allocate(minor); + if (NULL == state) { + printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor); + return (ERESTART); /* can't reacquire */ } - state = ddi_get_soft_state(dtrace_softstate, minor); state->dts_epid = DTRACE_EPIDNONE + 1; (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor); @@ -12137,6 +13100,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr) */ state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP); state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP); + state->dts_buf_over_limit = 0; state->dts_cleaner = CYCLIC_NONE; state->dts_deadman = CYCLIC_NONE; state->dts_vstate.dtvs_state = state; @@ -12162,8 +13126,7 @@ dtrace_state_create(dev_t *devp, cred_t *cr) opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default; opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default; opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default; - - state->dts_activity = DTRACE_ACTIVITY_INACTIVE; + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_default; /* * Depending on the user credentials, we set flag bits which alter probe @@ -12171,10 +13134,28 @@ dtrace_state_create(dev_t *devp, cred_t *cr) * actual anonymous tracing, or the possession of all privileges, all of * the normal checks are bypassed. */ +#if defined(__APPLE__) + if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { + if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { + /* + * Allow only proc credentials when DTrace is + * restricted by the current security policy + */ + state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC; + state->dts_cred.dcr_action = DTRACE_CRA_PROC | DTRACE_CRA_PROC_CONTROL | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER; + } + else { + state->dts_cred.dcr_visible = DTRACE_CRV_ALL; + state->dts_cred.dcr_action = DTRACE_CRA_ALL; + } + } + +#else if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) { state->dts_cred.dcr_visible = DTRACE_CRV_ALL; state->dts_cred.dcr_action = DTRACE_CRA_ALL; - } else { + } + else { /* * Set up the credentials for this instantiation. We take a * hold on the credential to prevent it from disappearing on @@ -12221,18 +13202,13 @@ dtrace_state_create(dev_t *devp, cred_t *cr) * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. + * + * APPLE NOTE: Darwin doesn't do zones. + * Behave as if zone always has destructive privs. */ -#if !defined(__APPLE__) - if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), - cr->cr_zone->zone_privset)) { - state->dts_cred.dcr_action |= - DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; - } -#else - /* Darwin doesn't do zones. */ + state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; -#endif /* __APPLE__ */ } /* @@ -12272,18 +13248,12 @@ dtrace_state_create(dev_t *devp, cred_t *cr) * If we have all privs in whatever zone this is, * we can do destructive things to processes which * have altered credentials. - */ -#if !defined(__APPLE__) - if (priv_isequalset(priv_getset(cr, PRIV_EFFECTIVE), - cr->cr_zone->zone_privset)) { - state->dts_cred.dcr_action |= - DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; - } -#else - /* Darwin doesn't do zones. */ + * + * APPLE NOTE: Darwin doesn't do zones. + * Behave as if zone always has destructive privs. + */ state->dts_cred.dcr_action |= DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG; -#endif /* __APPLE__ */ } /* @@ -12302,15 +13272,18 @@ dtrace_state_create(dev_t *devp, cred_t *cr) DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE; } } +#endif - return (state); + *new_state = state; + return(0); /* Success */ } static int dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) { dtrace_optval_t *opt = state->dts_options, size; - processorid_t cpu; + processorid_t cpu = 0; + size_t limit = buf->dtb_size; int flags = 0, rval; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -12341,7 +13314,7 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) flags |= DTRACEBUF_INACTIVE; } - for (size = opt[which]; size >= sizeof (uint64_t); size >>= 1) { + for (size = opt[which]; (size_t)size >= sizeof (uint64_t); size >>= 1) { /* * The size must be 8-byte aligned. If the size is not 8-byte * aligned, drop it down by the difference. @@ -12358,8 +13331,8 @@ dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which) */ return (E2BIG); } - - rval = dtrace_buffer_alloc(buf, size, flags, cpu); + limit = opt[DTRACEOPT_BUFLIMIT] * size / 100; + rval = dtrace_buffer_alloc(buf, limit, size, flags, cpu); if (rval != ENOMEM) { opt[which] = size; @@ -12430,7 +13403,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) dtrace_buffer_t *buf; cyc_handler_t hdlr; cyc_time_t when; - int rval = 0, i, bufsize = NCPU * sizeof (dtrace_buffer_t); + int rval = 0, i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t); dtrace_icookie_t cookie; lck_mtx_lock(&cpu_lock); @@ -12536,7 +13509,7 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) * a buffer to use as scratch. */ if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET || - opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { + (size_t)opt[DTRACEOPT_BUFSIZE] < state->dts_needed) { opt[DTRACEOPT_BUFSIZE] = state->dts_needed; } } @@ -12607,6 +13580,18 @@ dtrace_state_go(dtrace_state_t *state, processorid_t *cpu) if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max) opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max; + if (opt[DTRACEOPT_STRSIZE] > dtrace_strsize_max) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_max; + + if (opt[DTRACEOPT_STRSIZE] < dtrace_strsize_min) + opt[DTRACEOPT_STRSIZE] = dtrace_strsize_min; + + if (opt[DTRACEOPT_BUFLIMIT] > dtrace_buflimit_max) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_max; + + if (opt[DTRACEOPT_BUFLIMIT] < dtrace_buflimit_min) + opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_min; + hdlr.cyh_func = (cyc_func_t)dtrace_state_clean; hdlr.cyh_arg = state; hdlr.cyh_level = CY_LOW_LEVEL; @@ -12764,7 +13749,12 @@ dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option, switch (option) { case DTRACEOPT_DESTRUCTIVE: - if (dtrace_destructive_disallow) + /* + * Prevent consumers from enabling destructive actions if DTrace + * is running in a restricted environment, or if actions are + * disallowed. + */ + if (dtrace_is_restricted() || dtrace_destructive_disallow) return (EACCES); state->dts_cred.dcr_destructive = 1; @@ -12808,7 +13798,7 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_ecb_t *ecb; dtrace_vstate_t *vstate = &state->dts_vstate; minor_t minor = getminor(state->dts_dev); - int i, bufsize = NCPU * sizeof (dtrace_buffer_t); + int i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t); dtrace_speculation_t *spec = state->dts_speculations; int nspec = state->dts_nspeculations; uint32_t match; @@ -12894,7 +13884,7 @@ dtrace_state_destroy(dtrace_state_t *state) kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *)); if (state->dts_aggregations != NULL) { -#ifdef DEBUG +#if DEBUG for (i = 0; i < state->dts_naggregations; i++) ASSERT(state->dts_aggregations[i] == NULL); #endif @@ -12914,8 +13904,7 @@ dtrace_state_destroy(dtrace_state_t *state) dtrace_format_destroy(state); vmem_destroy(state->dts_aggid_arena); - ddi_soft_state_free(dtrace_softstate, minor); - vmem_free(dtrace_minor, (void *)(uintptr_t)minor, 1); + dtrace_state_free(minor); } /* @@ -12981,10 +13970,9 @@ dtrace_anon_property(void) * If we haven't allocated an anonymous state, we'll do so now. */ if ((state = dtrace_anon.dta_state) == NULL) { - state = dtrace_state_create(NULL, NULL); + rv = dtrace_state_create(NULL, NULL, &state); dtrace_anon.dta_state = state; - - if (state == NULL) { + if (rv != 0 || state == NULL) { /* * This basically shouldn't happen: the only * failure mode from dtrace_state_create() is a @@ -13049,14 +14037,15 @@ static void dtrace_helper_trace(dtrace_helper_action_t *helper, dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where) { - uint32_t size, next, nnext, i; + uint32_t size, next, nnext; + int i; dtrace_helptrace_t *ent; uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags; if (!dtrace_helptrace_enabled) return; - ASSERT(vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); + ASSERT((uint32_t)vstate->dtvs_nlocals <= dtrace_helptrace_nlocals); /* * What would a tracing framework be without its own tracing @@ -13100,7 +14089,7 @@ dtrace_helper_trace(dtrace_helper_action_t *helper, if ((svar = vstate->dtvs_locals[i]) == NULL) continue; - ASSERT(svar->dtsv_size >= NCPU * sizeof (uint64_t)); + ASSERT(svar->dtsv_size >= (int)NCPU * sizeof (uint64_t)); ent->dtht_locals[i] = ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id]; } @@ -13113,7 +14102,7 @@ dtrace_helper(int which, dtrace_mstate_t *mstate, uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; uint64_t sarg0 = mstate->dtms_arg[0]; uint64_t sarg1 = mstate->dtms_arg[1]; - uint64_t rval; + uint64_t rval = 0; dtrace_helpers_t *helpers = curproc->p_dtrace_helpers; dtrace_helper_action_t *helper; dtrace_vstate_t *vstate; @@ -13194,7 +14183,7 @@ err: mstate->dtms_arg[0] = sarg0; mstate->dtms_arg[1] = sarg1; - return (NULL); + return (0); } static void @@ -13216,19 +14205,12 @@ dtrace_helper_action_destroy(dtrace_helper_action_t *helper, kmem_free(helper, sizeof (dtrace_helper_action_t)); } -#if !defined(__APPLE__) -static int -dtrace_helper_destroygen(int gen) -{ - proc_t *p = curproc; -#else static int dtrace_helper_destroygen(proc_t* p, int gen) { -#endif dtrace_helpers_t *help = p->p_dtrace_helpers; dtrace_vstate_t *vstate; - int i; + uint_t i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); @@ -13262,7 +14244,7 @@ dtrace_helper_destroygen(proc_t* p, int gen) * given generation number. */ for (;;) { - dtrace_helper_provider_t *prov; + dtrace_helper_provider_t *prov = NULL; /* * Look for a helper provider with the right generation. We @@ -13299,7 +14281,7 @@ dtrace_helper_destroygen(proc_t* p, int gen) if (dtrace_meta_pid != NULL) { ASSERT(dtrace_deferred_pid == NULL); dtrace_helper_provider_remove(&prov->dthp_prov, - p->p_pid); + p); } lck_mtx_unlock(&dtrace_meta_lock); @@ -13326,13 +14308,8 @@ dtrace_helper_validate(dtrace_helper_action_t *helper) return (err == 0); } -#if !defined(__APPLE__) -static int -dtrace_helper_action_add(int which, dtrace_ecbdesc_t *ep) -#else static int dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) -#endif { dtrace_helpers_t *help; dtrace_helper_action_t *helper, *last; @@ -13344,11 +14321,7 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) if (which < 0 || which >= DTRACE_NHELPER_ACTIONS) return (EINVAL); -#if !defined(__APPLE__) - help = curproc->p_dtrace_helpers; -#else help = p->p_dtrace_helpers; -#endif last = help->dthps_actions[which]; vstate = &help->dthps_vstate; @@ -13401,7 +14374,7 @@ dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep) last->dtha_next = helper; } - if (vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { + if ((uint32_t)vstate->dtvs_nlocals > dtrace_helptrace_nlocals) { dtrace_helptrace_nlocals = vstate->dtvs_nlocals; dtrace_helptrace_next = 0; } @@ -13451,7 +14424,7 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, lck_mtx_unlock(&dtrace_lock); - dtrace_helper_provide(dofhp, p->p_pid); + dtrace_helper_provide(dofhp, p); } else { /* @@ -13459,37 +14432,27 @@ dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help, * off to the meta provider. */ - int i; + uint_t i; lck_mtx_unlock(&dtrace_lock); for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov, - p->p_pid); + p); } } lck_mtx_unlock(&dtrace_meta_lock); } -#if !defined(__APPLE__) -static int -dtrace_helper_provider_add(dof_helper_t *dofhp, int gen) -#else static int dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen) -#endif { dtrace_helpers_t *help; dtrace_helper_provider_t *hprov, **tmp_provs; uint_t tmp_maxprovs, i; lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); - -#if !defined(__APPLE__) - help = curproc->p_dtrace_helpers; -#else help = p->p_dtrace_helpers; -#endif ASSERT(help != NULL); /* @@ -13775,13 +14738,8 @@ dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec) return (0); } -#if !defined(__APPLE__) -static int -dtrace_helper_slurp(dof_hdr_t *dof, dof_helper_t *dhp) -#else static int dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) -#endif { dtrace_helpers_t *help; dtrace_vstate_t *vstate; @@ -13791,13 +14749,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED); -#if !defined(__APPLE__) - if ((help = curproc->p_dtrace_helpers) == NULL) - help = dtrace_helpers_create(curproc); -#else if ((help = p->p_dtrace_helpers) == NULL) help = dtrace_helpers_create(p); -#endif vstate = &help->dthps_vstate; @@ -13811,7 +14764,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * Look for helper providers and validate their descriptions. */ if (dhp != NULL) { - for (i = 0; i < dof->dofh_secnum; i++) { + for (i = 0; (uint32_t)i < dof->dofh_secnum; i++) { dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff + i * dof->dofh_secsize); @@ -13835,30 +14788,23 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) dtrace_ecbdesc_t *ep = enab->dten_desc[i]; dtrace_probedesc_t *desc = &ep->dted_probe; - if (strcmp(desc->dtpd_provider, "dtrace") != 0) + /* APPLE NOTE: Darwin employs size bounded string operation. */ + if (!LIT_STRNEQL(desc->dtpd_provider, "dtrace")) continue; - if (strcmp(desc->dtpd_mod, "helper") != 0) + if (!LIT_STRNEQL(desc->dtpd_mod, "helper")) continue; - if (strcmp(desc->dtpd_func, "ustack") != 0) + if (!LIT_STRNEQL(desc->dtpd_func, "ustack")) continue; -#if !defined(__APPLE__) - if ((rv = dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK, ep)) != 0) -#else - if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, ep)) != 0) -#endif - { + if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK, + ep)) != 0) { /* * Adding this helper action failed -- we are now going * to rip out the entire generation and return failure. */ -#if !defined(__APPLE__) - (void) dtrace_helper_destroygen(help->dthps_generation); -#else (void) dtrace_helper_destroygen(p, help->dthps_generation); -#endif dtrace_enabling_destroy(enab); dtrace_dof_destroy(dof); return (-1); @@ -13875,17 +14821,9 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) if (dhp != NULL && nprovs > 0) { dhp->dofhp_dof = (uint64_t)(uintptr_t)dof; -#if !defined(__APPLE__) - if (dtrace_helper_provider_add(dhp, gen) == 0) { -#else if (dtrace_helper_provider_add(p, dhp, gen) == 0) { -#endif lck_mtx_unlock(&dtrace_lock); -#if !defined(__APPLE__) - dtrace_helper_provider_register(curproc, help, dhp); -#else dtrace_helper_provider_register(p, help, dhp); -#endif lck_mtx_lock(&dtrace_lock); destroy = 0; @@ -13898,10 +14836,8 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) return (gen); } -#if defined(__APPLE__) - /* - * DTrace lazy dof + * APPLE NOTE: DTrace lazy dof implementation * * DTrace user static probes (USDT probes) and helper actions are loaded * in a process by proccessing dof sections. The dof sections are passed @@ -13958,7 +14894,7 @@ dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp) * If the dofs data is claimed by this method, dofs_claimed will be set. * Callers should not free claimed dofs. */ -int +static int dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claimed) { ASSERT(p); @@ -13969,10 +14905,6 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14040,7 +14972,7 @@ dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claim for (i=0; idofiod_count-1; i++) { ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof); } -#endif DEBUG +#endif /* DEBUG */ unlock: lck_mtx_unlock(&p->p_dtrace_sprlock); @@ -14059,17 +14991,13 @@ unlock: * EINVAL: lazy dof is enabled, but the requested generation was not found. * EACCES: This removal needs to be handled non-lazily. */ -int +static int dtrace_lazy_dofs_remove(proc_t *p, int generation) { int rval = EINVAL; lck_rw_lock_shared(&dtrace_dof_mode_lock); - /* - * If we have lazy dof, dof mode better be LAZY_ON. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER); @@ -14141,7 +15069,7 @@ dtrace_lazy_dofs_remove(proc_t *p, int generation) } lck_rw_unlock_shared(&dtrace_dof_mode_lock); - + return rval; } @@ -14151,12 +15079,6 @@ dtrace_lazy_dofs_destroy(proc_t *p) lck_rw_lock_shared(&dtrace_dof_mode_lock); lck_mtx_lock(&p->p_dtrace_sprlock); - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_exit.c and kern_exec.c. - */ - ASSERT(p->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON || p->p_lflag & P_LEXIT); ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; @@ -14170,47 +15092,6 @@ dtrace_lazy_dofs_destroy(proc_t *p) } } -void -dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) -{ - lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); - - lck_rw_lock_shared(&dtrace_dof_mode_lock); - lck_mtx_lock(&parent->p_dtrace_sprlock); - - /* - * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting. - * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from - * kern_fork.c - */ - ASSERT(parent->p_dtrace_lazy_dofs == NULL || dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON); - ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); - /* - * In theory we should hold the child sprlock, but this is safe... - */ - ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); - - dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; - dof_ioctl_data_t* child_dofs = NULL; - if (parent_dofs) { - size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); - child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); - bcopy(parent_dofs, child_dofs, parent_dofs_size); - } - - lck_mtx_unlock(&parent->p_dtrace_sprlock); - - if (child_dofs) { - lck_mtx_lock(&child->p_dtrace_sprlock); - child->p_dtrace_lazy_dofs = child_dofs; - lck_mtx_unlock(&child->p_dtrace_sprlock); - } - - lck_rw_unlock_shared(&dtrace_dof_mode_lock); -} - static int dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) { @@ -14221,10 +15102,8 @@ dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored) return p->p_dtrace_lazy_dofs != NULL; } -static int -dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) -{ -#pragma unused(ignored) +static void +dtrace_lazy_dofs_process(proc_t *p) { /* * It is possible this process may exit during our attempt to * fault in the dof. We could fix this by holding locks longer, @@ -14232,13 +15111,10 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) */ lck_mtx_lock(&p->p_dtrace_sprlock); - /* - * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF - */ + ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL); ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF); - dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs; p->p_dtrace_lazy_dofs = NULL; @@ -14266,7 +15142,7 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) dhp->dofhp_dof = dhp->dofhp_addr; dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval); - + if (dof != NULL) { dtrace_helpers_t *help; @@ -14301,11 +15177,72 @@ dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count)); } +} + +static int +dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored) +{ +#pragma unused(ignored) + + dtrace_lazy_dofs_process(p); return PROC_RETURNED; } -#endif /* __APPLE__ */ +#define DTRACE_LAZY_DOFS_DUPLICATED 1 + +static int +dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child) +{ + lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED); + + lck_rw_lock_shared(&dtrace_dof_mode_lock); + lck_mtx_lock(&parent->p_dtrace_sprlock); + + /* + * We need to make sure that the transition to lazy dofs -> helpers + * was atomic for our parent + */ + ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL); + /* + * In theory we should hold the child sprlock, but this is safe... + */ + ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL); + + dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs; + dof_ioctl_data_t* child_dofs = NULL; + if (parent_dofs) { + size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count); + child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP); + bcopy(parent_dofs, child_dofs, parent_dofs_size); + } + + lck_mtx_unlock(&parent->p_dtrace_sprlock); + + if (child_dofs) { + lck_mtx_lock(&child->p_dtrace_sprlock); + child->p_dtrace_lazy_dofs = child_dofs; + lck_mtx_unlock(&child->p_dtrace_sprlock); + /** + * We process the DOF at this point if the mode is set to + * LAZY_OFF. This can happen if DTrace is still processing the + * DOF of other process (which can happen because the + * protected pager can have a huge latency) + * but has not processed our parent yet + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_lazy_dofs_process(child); + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + return DTRACE_LAZY_DOFS_DUPLICATED; + } + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + + return 0; +} static dtrace_helpers_t * dtrace_helpers_create(proc_t *p) @@ -14325,19 +15262,12 @@ dtrace_helpers_create(proc_t *p) return (help); } -#if !defined(__APPLE__) -static void -dtrace_helpers_destroy(void) -{ - proc_t *p = curproc; -#else static void dtrace_helpers_destroy(proc_t* p) { -#endif dtrace_helpers_t *help; dtrace_vstate_t *vstate; - int i; + uint_t i; lck_mtx_lock(&dtrace_lock); @@ -14378,7 +15308,7 @@ dtrace_helpers_destroy(proc_t* p) for (i = 0; i < help->dthps_nprovs; i++) { dtrace_helper_provider_remove( - &help->dthps_provs[i]->dthp_prov, p->p_pid); + &help->dthps_provs[i]->dthp_prov, p); } } else { lck_mtx_lock(&dtrace_lock); @@ -14430,7 +15360,8 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dtrace_helper_action_t *helper, *new, *last; dtrace_difo_t *dp; dtrace_vstate_t *vstate; - int i, j, sz, hasprovs = 0; + uint_t i; + int j, sz, hasprovs = 0; lck_mtx_lock(&dtrace_lock); ASSERT(from->p_dtrace_helpers != NULL); @@ -14465,11 +15396,11 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) new->dtha_actions = kmem_alloc(sz, KM_SLEEP); for (j = 0; j < new->dtha_nactions; j++) { - dtrace_difo_t *dp = helper->dtha_actions[j]; + dtrace_difo_t *dpj = helper->dtha_actions[j]; - ASSERT(dp != NULL); - dp = dtrace_difo_duplicate(dp, vstate); - new->dtha_actions[j] = dp; + ASSERT(dpj != NULL); + dpj = dtrace_difo_duplicate(dpj, vstate); + new->dtha_actions[j] = dpj; } if (last != NULL) { @@ -14505,29 +15436,405 @@ dtrace_helpers_duplicate(proc_t *from, proc_t *to) dtrace_helper_provider_register(to, newhelp, NULL); } +/** + * DTrace Process functions + */ + +void +dtrace_proc_fork(proc_t *parent_proc, proc_t *child_proc, int spawn) +{ + /* + * This code applies to new processes who are copying the task + * and thread state and address spaces of their parent process. + */ + if (!spawn) { + /* + * APPLE NOTE: Solaris does a sprlock() and drops the + * proc_lock here. We're cheating a bit and only taking + * the p_dtrace_sprlock lock. A full sprlock would + * task_suspend the parent. + */ + lck_mtx_lock(&parent_proc->p_dtrace_sprlock); + + /* + * Remove all DTrace tracepoints from the child process. We + * need to do this _before_ duplicating USDT providers since + * any associated probes may be immediately enabled. + */ + if (parent_proc->p_dtrace_count > 0) { + dtrace_fasttrap_fork(parent_proc, child_proc); + } + + lck_mtx_unlock(&parent_proc->p_dtrace_sprlock); + + /* + * Duplicate any lazy dof(s). This must be done while NOT + * holding the parent sprlock! Lock ordering is + * dtrace_dof_mode_lock, then sprlock. It is imperative we + * always call dtrace_lazy_dofs_duplicate, rather than null + * check and call if !NULL. If we NULL test, during lazy dof + * faulting we can race with the faulting code and proceed + * from here to beyond the helpers copy. The lazy dof + * faulting will then fail to copy the helpers to the child + * process. We return if we duplicated lazy dofs as a process + * can only have one at the same time to avoid a race between + * a dtrace client and dtrace_proc_fork where a process would + * end up with both lazy dofs and helpers. + */ + if (dtrace_lazy_dofs_duplicate(parent_proc, child_proc) == DTRACE_LAZY_DOFS_DUPLICATED) { + return; + } + + /* + * Duplicate any helper actions and providers if they haven't + * already. + */ +#if !defined(__APPLE__) + /* + * The SFORKING + * we set above informs the code to enable USDT probes that + * sprlock() may fail because the child is being forked. + */ +#endif + /* + * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent + * never fails to find the child. We do not set SFORKING. + */ + if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) { + (*dtrace_helpers_fork)(parent_proc, child_proc); + } + } +} + +void +dtrace_proc_exec(proc_t *p) +{ + /* + * Invalidate any predicate evaluation already cached for this thread by DTrace. + * That's because we've just stored to p_comm and DTrace refers to that when it + * evaluates the "execname" special variable. uid and gid may have changed as well. + */ + dtrace_set_thread_predcache(current_thread(), 0); + + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which no longer belong to this process! + */ + dtrace_lazy_dofs_destroy(p); + + + /* + * Clean up any DTrace helpers for the process. + */ + if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Cleanup the DTrace provider associated with this process. + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) { + (*dtrace_fasttrap_exec_ptr)(p); + } + proc_unlock(p); +} + +void +dtrace_proc_exit(proc_t *p) +{ + /* + * Free any outstanding lazy dof entries. It is imperative we + * always call dtrace_lazy_dofs_destroy, rather than null check + * and call if !NULL. If we NULL test, during lazy dof faulting + * we can race with the faulting code and proceed from here to + * beyond the helpers cleanup. The lazy dof faulting will then + * install new helpers which will never be cleaned up, and leak. + */ + dtrace_lazy_dofs_destroy(p); + + /* + * Clean up any DTrace helper actions or probes for the process. + */ + if (p->p_dtrace_helpers != NULL) { + (*dtrace_helpers_cleanup)(p); + } + + /* + * Clean up any DTrace probes associated with this process. + */ + /* + * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(), + * call this after dtrace_helpers_cleanup() + */ + proc_lock(p); + if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) { + (*dtrace_fasttrap_exit_ptr)(p); + } + proc_unlock(p); +} + +/* + * DTrace Hook Functions + */ + +/* + * APPLE NOTE: dtrace_modctl_* routines for kext support. + * Used to manipulate the modctl list within dtrace xnu. + */ + +modctl_t *dtrace_modctl_list; + +static void +dtrace_modctl_add(struct modctl * newctl) +{ + struct modctl *nextp, *prevp; + + ASSERT(newctl != NULL); + lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + + // Insert new module at the front of the list, + + newctl->mod_next = dtrace_modctl_list; + dtrace_modctl_list = newctl; + + /* + * If a module exists with the same name, then that module + * must have been unloaded with enabled probes. We will move + * the unloaded module to the new module's stale chain and + * then stop traversing the list. + */ + + prevp = newctl; + nextp = newctl->mod_next; + + while (nextp != NULL) { + if (nextp->mod_loaded) { + /* This is a loaded module. Keep traversing. */ + prevp = nextp; + nextp = nextp->mod_next; + continue; + } + else { + /* Found an unloaded module */ + if (strncmp (newctl->mod_modname, nextp->mod_modname, KMOD_MAX_NAME)) { + /* Names don't match. Keep traversing. */ + prevp = nextp; + nextp = nextp->mod_next; + continue; + } + else { + /* We found a stale entry, move it. We're done. */ + prevp->mod_next = nextp->mod_next; + newctl->mod_stale = nextp; + nextp->mod_next = NULL; + break; + } + } + } +} + +static modctl_t * +dtrace_modctl_lookup(struct kmod_info * kmod) +{ + lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + + struct modctl * ctl; + + for (ctl = dtrace_modctl_list; ctl; ctl=ctl->mod_next) { + if (ctl->mod_id == kmod->id) + return(ctl); + } + return (NULL); +} + +/* + * This routine is called from dtrace_module_unloaded(). + * It removes a modctl structure and its stale chain + * from the kext shadow list. + */ +static void +dtrace_modctl_remove(struct modctl * ctl) +{ + ASSERT(ctl != NULL); + lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED); + modctl_t *prevp, *nextp, *curp; + + // Remove stale chain first + for (curp=ctl->mod_stale; curp != NULL; curp=nextp) { + nextp = curp->mod_stale; + /* There should NEVER be user symbols allocated at this point */ + ASSERT(curp->mod_user_symbols == NULL); + kmem_free(curp, sizeof(modctl_t)); + } + + prevp = NULL; + curp = dtrace_modctl_list; + + while (curp != ctl) { + prevp = curp; + curp = curp->mod_next; + } + + if (prevp != NULL) { + prevp->mod_next = ctl->mod_next; + } + else { + dtrace_modctl_list = ctl->mod_next; + } + + /* There should NEVER be user symbols allocated at this point */ + ASSERT(ctl->mod_user_symbols == NULL); + + kmem_free (ctl, sizeof(modctl_t)); +} + /* - * DTrace Hook Functions + * APPLE NOTE: The kext loader will call dtrace_module_loaded + * when the kext is loaded in memory, but before calling the + * kext's start routine. + * + * Return 0 on success + * Return -1 on failure */ -static void -dtrace_module_loaded(struct modctl *ctl) + +static int +dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag) { dtrace_provider_t *prv; + /* + * If kernel symbols have been disabled, return immediately + * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER) + return 0; + + struct modctl *ctl = NULL; + if (!kmod || kmod->address == 0 || kmod->size == 0) + return(-1); + lck_mtx_lock(&dtrace_provider_lock); - lck_mtx_lock(&mod_lock); + lck_mtx_lock(&mod_lock); + + /* + * Have we seen this kext before? + */ + + ctl = dtrace_modctl_lookup(kmod); - // ASSERT(ctl->mod_busy); + if (ctl != NULL) { + /* bail... we already have this kext in the modctl list */ + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + if (dtrace_err_verbose) + cmn_err(CE_WARN, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod->name, (uint_t)kmod->id, ctl->mod_modname, ctl->mod_id); + return(-1); + } + else { + ctl = kmem_alloc(sizeof(struct modctl), KM_SLEEP); + if (ctl == NULL) { + if (dtrace_err_verbose) + cmn_err(CE_WARN, "dtrace module load '%s %u' is failing ", kmod->name, (uint_t)kmod->id); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return (-1); + } + ctl->mod_next = NULL; + ctl->mod_stale = NULL; + strlcpy (ctl->mod_modname, kmod->name, sizeof(ctl->mod_modname)); + ctl->mod_loadcnt = kmod->id; + ctl->mod_nenabled = 0; + ctl->mod_address = kmod->address; + ctl->mod_size = kmod->size; + ctl->mod_id = kmod->id; + ctl->mod_loaded = 1; + ctl->mod_flags = 0; + ctl->mod_user_symbols = NULL; + + /* + * Find the UUID for this module, if it has one + */ + kernel_mach_header_t* header = (kernel_mach_header_t *)ctl->mod_address; + struct load_command* load_cmd = (struct load_command *)&header[1]; + uint32_t i; + for (i = 0; i < header->ncmds; i++) { + if (load_cmd->cmd == LC_UUID) { + struct uuid_command* uuid_cmd = (struct uuid_command *)load_cmd; + memcpy(ctl->mod_uuid, uuid_cmd->uuid, sizeof(uuid_cmd->uuid)); + ctl->mod_flags |= MODCTL_HAS_UUID; + break; + } + load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize); + } + + if (ctl->mod_address == g_kernel_kmod_info.address) { + ctl->mod_flags |= MODCTL_IS_MACH_KERNEL; + } + } + dtrace_modctl_add(ctl); + + /* + * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s) + */ + lck_mtx_lock(&dtrace_lock); + + /* + * DTrace must decide if it will instrument modules lazily via + * userspace symbols (default mode), or instrument immediately via + * kernel symbols (non-default mode) + * + * When in default/lazy mode, DTrace will only support modules + * built with a valid UUID. + * + * Overriding the default can be done explicitly in one of + * the following two ways. + * + * A module can force symbols from kernel space using the plist key, + * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set, + * we fall through and instrument this module now. + * + * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols + * from kernel space (see dtrace_impl.h). If this system state is set + * to a non-userspace mode, we fall through and instrument the module now. + */ + if ((dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) && + (!(flag & KMOD_DTRACE_FORCE_INIT))) + { + /* We will instrument the module lazily -- this is the default */ + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return 0; + } + + /* We will instrument the module immediately using kernel symbols */ + ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS; + + lck_mtx_unlock(&dtrace_lock); + /* * We're going to call each providers per-module provide operation * specifying only this module. */ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) - prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); - + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + + /* + * APPLE NOTE: The contract with the kext loader is that once this function + * has completed, it may delete kernel symbols at will. + * We must set this while still holding the mod_lock. + */ + ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS; + lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); - + /* * If we have any retained enablings, we need to match against them. * Enabling probes requires that cpu_lock be held, and we cannot hold @@ -14537,60 +15844,106 @@ dtrace_module_loaded(struct modctl *ctl) * our task queue to do the match for us. */ lck_mtx_lock(&dtrace_lock); - + if (dtrace_retained == NULL) { lck_mtx_unlock(&dtrace_lock); - return; + return 0; } - - (void) taskq_dispatch(dtrace_taskq, - (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP); - - lck_mtx_unlock(&dtrace_lock); - - /* - * And now, for a little heuristic sleaze: in general, we want to - * match modules as soon as they load. However, we cannot guarantee - * this, because it would lead us to the lock ordering violation - * outlined above. The common case, of course, is that cpu_lock is - * _not_ held -- so we delay here for a clock tick, hoping that that's - * long enough for the task queue to do its work. If it's not, it's - * not a serious problem -- it just means that the module that we - * just loaded may not be immediately instrumentable. + + /* APPLE NOTE! + * + * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually + * holds it for any reason. Thus the comment above is invalid, we can directly invoke + * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid + * the delay call as well. */ - delay(1); + lck_mtx_unlock(&dtrace_lock); + + dtrace_enabling_matchall(); + + return 0; } -static void -dtrace_module_unloaded(struct modctl *ctl) +/* + * Return 0 on success + * Return -1 on failure + */ +static int +dtrace_module_unloaded(struct kmod_info *kmod) { dtrace_probe_t template, *probe, *first, *next; dtrace_provider_t *prov; - - template.dtpr_mod = ctl->mod_modname; - - lck_mtx_lock(&dtrace_provider_lock); + struct modctl *ctl = NULL; + struct modctl *syncctl = NULL; + struct modctl *nextsyncctl = NULL; + int syncmode = 0; + + lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&mod_lock); lck_mtx_lock(&dtrace_lock); + if (kmod == NULL) { + syncmode = 1; + } + else { + ctl = dtrace_modctl_lookup(kmod); + if (ctl == NULL) + { + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return (-1); + } + ctl->mod_loaded = 0; + ctl->mod_address = 0; + ctl->mod_size = 0; + } + if (dtrace_bymod == NULL) { /* * The DTrace module is loaded (obviously) but not attached; * we don't have any work to do. */ - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); + if (ctl != NULL) + (void)dtrace_modctl_remove(ctl); + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return(0); + } + + /* Syncmode set means we target and traverse entire modctl list. */ + if (syncmode) + nextsyncctl = dtrace_modctl_list; + +syncloop: + if (syncmode) + { + /* find a stale modctl struct */ + for (syncctl = nextsyncctl; syncctl != NULL; syncctl=syncctl->mod_next) { + if (syncctl->mod_address == 0) + break; + } + if (syncctl==NULL) + { + /* We have no more work to do */ lck_mtx_unlock(&dtrace_lock); - return; + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + return(0); + } + else { + /* keep track of next syncctl in case this one is removed */ + nextsyncctl = syncctl->mod_next; + ctl = syncctl; + } } + template.dtpr_mod = ctl->mod_modname; + for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template); probe != NULL; probe = probe->dtpr_nextmod) { - if (probe->dtpr_ecb != NULL) { - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&mod_lock); - lck_mtx_unlock(&dtrace_lock); - + if (probe->dtpr_ecb != NULL) { /* * This shouldn't _actually_ be possible -- we're * unloading a module that has an enabled probe in it. @@ -14601,12 +15954,22 @@ dtrace_module_unloaded(struct modctl *ctl) * assert, but we're not going to disable the * probe, either. */ + + + if (syncmode) { + /* We're syncing, let's look at next in list */ + goto syncloop; + } + + lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + if (dtrace_err_verbose) { cmn_err(CE_WARN, "unloaded module '%s' had " "enabled probes", ctl->mod_modname); } - - return; + return(-1); } } @@ -14616,6 +15979,7 @@ dtrace_module_unloaded(struct modctl *ctl) ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe); dtrace_probes[probe->dtpr_id - 1] = NULL; + probe->dtpr_provider->dtpv_probe_count--; next = probe->dtpr_nextmod; dtrace_hash_remove(dtrace_bymod, probe); @@ -14647,16 +16011,20 @@ dtrace_module_unloaded(struct modctl *ctl) kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1); kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1); vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1); -#if !defined(__APPLE__) - kmem_free(probe, sizeof (dtrace_probe_t)); -#else + zfree(dtrace_probe_t_zone, probe); -#endif } + dtrace_modctl_remove(ctl); + + if (syncmode) + goto syncloop; + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); + + return(0); } void @@ -14765,8 +16133,8 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) dtrace_toxrange = range; } - ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == NULL); - ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == NULL); + ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0); + ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0); dtrace_toxrange[dtrace_toxranges].dtt_base = base; dtrace_toxrange[dtrace_toxranges].dtt_limit = limit; @@ -14780,6 +16148,7 @@ dtrace_toxrange_add(uintptr_t base, uintptr_t limit) static int dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) { +#pragma unused(cmd) /* __APPLE__ */ dtrace_provider_id_t id; dtrace_state_t *state = NULL; dtrace_enabling_t *enab; @@ -14788,29 +16157,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_provider_lock); lck_mtx_lock(&dtrace_lock); - if (ddi_soft_state_init(&dtrace_softstate, - sizeof (dtrace_state_t), 0) != 0) { - cmn_err(CE_NOTE, "/dev/dtrace failed to initialize soft state"); - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&dtrace_lock); - return (DDI_FAILURE); - } - -#if !defined(__APPLE__) - if (ddi_create_minor_node(devi, DTRACEMNR_DTRACE, S_IFCHR, - DTRACEMNRN_DTRACE, DDI_PSEUDO, NULL) == DDI_FAILURE || - ddi_create_minor_node(devi, DTRACEMNR_HELPER, S_IFCHR, - DTRACEMNRN_HELPER, DDI_PSEUDO, NULL) == DDI_FAILURE) { - cmn_err(CE_NOTE, "/dev/dtrace couldn't create minor nodes"); - ddi_remove_minor_node(devi, NULL); - ddi_soft_state_fini(&dtrace_softstate); - lck_mtx_unlock(&cpu_lock); - lck_mtx_unlock(&dtrace_provider_lock); - lck_mtx_unlock(&dtrace_lock); - return (DDI_FAILURE); - } -#endif /* __APPLE__ */ + /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ ddi_report_dev(devi); dtrace_devi = devi; @@ -14824,8 +16171,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_cpustart_fini = dtrace_resume; dtrace_debugger_init = dtrace_suspend; dtrace_debugger_fini = dtrace_resume; - dtrace_kreloc_init = dtrace_suspend; - dtrace_kreloc_fini = dtrace_resume; register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL); @@ -14833,14 +16178,11 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1, NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER); - dtrace_minor = vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE, - UINT32_MAX - DTRACEMNRN_CLONE, 1, NULL, NULL, NULL, 0, - VM_SLEEP | VMC_IDENTIFIER); dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri, 1, INT_MAX, 0); dtrace_state_cache = kmem_cache_create("dtrace_state_cache", - sizeof (dtrace_dstate_percpu_t) * NCPU, DTRACE_STATE_ALIGN, + sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN, NULL, NULL, NULL, NULL, NULL, 0); lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED); @@ -14881,21 +16223,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) ASSERT(dtrace_provider != NULL); ASSERT((dtrace_provider_id_t)dtrace_provider == id); -#if !defined(__APPLE__) - dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "BEGIN", 0, NULL); - dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "END", 0, NULL); - dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "ERROR", 1, NULL); -#elif defined(__ppc__) || defined(__ppc64__) - dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "BEGIN", 2, NULL); - dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "END", 1, NULL); - dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t) - dtrace_provider, NULL, NULL, "ERROR", 4, NULL); -#elif (defined(__i386__) || defined (__x86_64__)) +#if defined (__x86_64__) dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t) dtrace_provider, NULL, NULL, "BEGIN", 1, NULL); dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t) @@ -14904,7 +16232,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) dtrace_provider, NULL, NULL, "ERROR", 3, NULL); #else #error Unknown Architecture -#endif /* __APPLE__ */ +#endif dtrace_anon_property(); lck_mtx_unlock(&cpu_lock); @@ -14930,6 +16258,13 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) if (dtrace_anon.dta_enabling != NULL) { ASSERT(dtrace_retained == dtrace_anon.dta_enabling); + /* + * APPLE NOTE: if handling anonymous dof, switch symbol modes. + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; + } + dtrace_enabling_provide(NULL); state = dtrace_anon.dta_state; @@ -14948,7 +16283,7 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) lck_mtx_lock(&dtrace_lock); if ((enab = dtrace_anon.dta_enabling) != NULL) - (void) dtrace_enabling_match(enab, NULL); + (void) dtrace_enabling_match(enab, NULL, NULL); lck_mtx_unlock(&cpu_lock); } @@ -14966,8 +16301,6 @@ dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd) return (DDI_SUCCESS); } -extern void fasttrap_init(void); - /*ARGSUSED*/ static int dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) @@ -14977,19 +16310,9 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) uint32_t priv; uid_t uid; zoneid_t zoneid; + int rv; -#if !defined(__APPLE__) - if (getminor(*devp) == DTRACEMNRN_HELPER) - return (0); - - /* - * If this wasn't an open with the "helper" minor, then it must be - * the "dtrace" minor. - */ - ASSERT(getminor(*devp) == DTRACEMNRN_DTRACE); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ + /* APPLE: Darwin puts Helper on its own major device. */ /* * If no DTRACE_PRIV_* bits are set in the credential, then the @@ -14999,13 +16322,11 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) if (priv == DTRACE_PRIV_NONE) return (EACCES); -#if defined(__APPLE__) /* - * We delay the initialization of fasttrap as late as possible. + * APPLE NOTE: We delay the initialization of fasttrap as late as possible. * It certainly can't be later than now! */ fasttrap_init(); -#endif /* __APPLE__ */ /* * Ask all providers to provide all their probes. @@ -15025,24 +16346,24 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) */ if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) { dtrace_opens--; - lck_mtx_unlock(&cpu_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); return (EBUSY); } - state = dtrace_state_create(devp, cred_p); + rv = dtrace_state_create(devp, cred_p, &state); lck_mtx_unlock(&cpu_lock); - if (state == NULL) { - if (--dtrace_opens == 0) + if (rv != 0 || state == NULL) { + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); lck_mtx_unlock(&dtrace_lock); - return (EAGAIN); + /* propagate EAGAIN or ERESTART */ + return (rv); } - + lck_mtx_unlock(&dtrace_lock); -#if defined(__APPLE__) lck_rw_lock_exclusive(&dtrace_dof_mode_lock); /* @@ -15054,7 +16375,16 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) */ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) { dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF; - + /* + * We do not need to hold the exclusive lock while processing + * DOF on processes. We do need to make sure the mode does not get + * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though + * (which should not happen anyway since it only happens in + * dtrace_close). There is no way imcomplete USDT probes can be + * activate by any DTrace clients here since they all have to + * call dtrace_open and be blocked on dtrace_dof_mode_lock + */ + lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock); /* * Iterate all existing processes and load lazy dofs. */ @@ -15063,10 +16393,34 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) NULL, dtrace_lazy_dofs_proc_iterate_filter, NULL); + + lck_rw_unlock_shared(&dtrace_dof_mode_lock); + } + else { + lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); } - lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); -#endif + + /* + * Update kernel symbol state. + * + * We must own the provider and dtrace locks. + * + * NOTE! It may appear there is a race by setting this value so late + * after dtrace_probe_provide. However, any kext loaded after the + * call to probe provide and before we set LAZY_OFF will be marked as + * eligible for symbols from userspace. The same dtrace that is currently + * calling dtrace_open() (this call!) will get a list of kexts needing + * symbols and fill them in, thus closing the race window. + * + * We want to set this value only after it certain it will succeed, as + * this significantly reduces the complexity of error exits. + */ + lck_mtx_lock(&dtrace_lock); + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL; + } + lck_mtx_unlock(&dtrace_lock); return (0); } @@ -15075,17 +16429,12 @@ dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p) static int dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) { +#pragma unused(flag, otyp, cred_p) /* __APPLE__ */ minor_t minor = getminor(dev); dtrace_state_t *state; -#if !defined(__APPLE__) - if (minor == DTRACEMNRN_HELPER) - return (0); -#else - /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ - - state = ddi_get_soft_state(dtrace_softstate, minor); + /* APPLE NOTE: Darwin puts Helper on its own major device. */ + state = dtrace_state_get(minor); lck_mtx_lock(&cpu_lock); lck_mtx_lock(&dtrace_lock); @@ -15100,49 +16449,58 @@ dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p) dtrace_state_destroy(state); ASSERT(dtrace_opens > 0); - if (--dtrace_opens == 0) - (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); + /* + * Only relinquish control of the kernel debugger interface when there + * are no consumers and no anonymous enablings. + */ + if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) + (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE); + lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&cpu_lock); -#if defined(__APPLE__) - /* * Lock ordering requires the dof mode lock be taken before * the dtrace_lock. */ lck_rw_lock_exclusive(&dtrace_dof_mode_lock); lck_mtx_lock(&dtrace_lock); + + if (dtrace_opens == 0) { + /* + * If we are currently lazy-off, and this is the last close, transition to + * lazy state. + */ + if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) { + dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; + } - /* - * If we are currently lazy-off, and this is the last close, transition to - * lazy state. - */ - if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF && dtrace_opens == 0) { - dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; + /* + * If we are the last dtrace client, switch back to lazy (from userspace) symbols + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } } - + lck_mtx_unlock(&dtrace_lock); lck_rw_unlock_exclusive(&dtrace_dof_mode_lock); -#endif + + /* + * Kext probes may be retained past the end of the kext's lifespan. The + * probes are kept until the last reference to them has been removed. + * Since closing an active dtrace context is likely to drop that last reference, + * lets take a shot at cleaning out the orphaned probes now. + */ + dtrace_module_unloaded(NULL); return (0); } -#if defined(__APPLE__) -/* - * Introduce cast to quiet warnings. - * XXX: This hides a lot of brokenness. - */ -#define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) ) -#define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) ) -#endif /* __APPLE__ */ - -#if defined(__APPLE__) /*ARGSUSED*/ static int -dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) +dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv) { #pragma unused(rv) /* @@ -15152,7 +16510,8 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) return KERN_SUCCESS; switch (cmd) { - case DTRACEHIOC_ADDDOF: { + case DTRACEHIOC_ADDDOF: + { dof_helper_t *dhp = NULL; size_t dof_ioctl_data_size; dof_ioctl_data_t* multi_dof; @@ -15288,28 +16647,23 @@ dtrace_ioctl_helper(int cmd, caddr_t arg, int *rv) return ENOTTY; } -#endif /* __APPLE__ */ /*ARGSUSED*/ static int -dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) +dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv) { +#pragma unused(md) minor_t minor = getminor(dev); dtrace_state_t *state; int rval; -#if !defined(__APPLE__) - if (minor == DTRACEMNRN_HELPER) - return (dtrace_ioctl_helper(cmd, arg, rv)); -#else /* Darwin puts Helper on its own major device. */ -#endif /* __APPLE__ */ - state = ddi_get_soft_state(dtrace_softstate, minor); + state = dtrace_state_get(minor); if (state->dts_anon) { - ASSERT(dtrace_anon.dta_state == NULL); - state = state->dts_anon; + ASSERT(dtrace_anon.dta_state == NULL); + state = state->dts_anon; } switch (cmd) { @@ -15317,14 +16671,14 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_providerdesc_t pvd; dtrace_provider_t *pvp; - if (copyin((void *)arg, &pvd, sizeof (pvd)) != 0) + if (copyin(arg, &pvd, sizeof (pvd)) != 0) return (EFAULT); pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0'; lck_mtx_lock(&dtrace_provider_lock); for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) { - if (strcmp(pvp->dtpv_name, pvd.dtvd_name) == 0) + if (strncmp(pvp->dtpv_name, pvd.dtvd_name, DTRACE_PROVNAMELEN) == 0) break; } @@ -15335,7 +16689,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t)); bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t)); - if (copyout(&pvd, (void *)arg, sizeof (pvd)) != 0) + if (copyout(&pvd, arg, sizeof (pvd)) != 0) return (EFAULT); return (0); @@ -15350,7 +16704,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uintptr_t dest; int nrecs; - if (copyin((void *)arg, &epdesc, sizeof (epdesc)) != 0) + if (copyin(arg, &epdesc, sizeof (epdesc)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); @@ -15385,7 +16739,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * across the copyout(), below. */ size = sizeof (dtrace_eprobedesc_t) + - (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); + (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; @@ -15401,13 +16755,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) break; bcopy(&act->dta_rec, (void *)dest, - sizeof (dtrace_recdesc_t)); + sizeof (dtrace_recdesc_t)); dest += sizeof (dtrace_recdesc_t); } lck_mtx_unlock(&dtrace_lock); - if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { + if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } @@ -15427,7 +16781,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) size_t size; uintptr_t dest; - if (copyin((void *)arg, &aggdesc, sizeof (aggdesc)) != 0) + if (copyin(arg, &aggdesc, sizeof (aggdesc)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); @@ -15448,7 +16802,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) for (act = agg->dtag_first; ; act = act->dta_next) { ASSERT(act->dta_intuple || - DTRACEACT_ISAGG(act->dta_kind)); + DTRACEACT_ISAGG(act->dta_kind)); /* * If this action has a record size of zero, it @@ -15476,7 +16830,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * across the copyout(), below. */ size = sizeof (dtrace_aggdesc_t) + - (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); + (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t)); buf = kmem_alloc(size, KM_SLEEP); dest = (uintptr_t)buf; @@ -15509,7 +16863,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_unlock(&dtrace_lock); - if (copyout(buf, (void *)arg, dest - (uintptr_t)buf) != 0) { + if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) { kmem_free(buf, size); return (EFAULT); } @@ -15530,14 +16884,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) * If a NULL argument has been passed, we take this as our * cue to reevaluate our enablings. */ - if (arg == NULL) { - lck_mtx_lock(&cpu_lock); - lck_mtx_lock(&dtrace_lock); - err = dtrace_enabling_matchstate(state, rv); - lck_mtx_unlock(&dtrace_lock); - lck_mtx_unlock(&cpu_lock); + if (arg == 0) { + dtrace_enabling_matchall(); - return (err); + return (0); } if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL) @@ -15569,14 +16919,14 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (rval); } - if ((err = dtrace_enabling_match(enab, rv)) == 0) { + if ((err = dtrace_enabling_match(enab, rv, NULL)) == 0) { err = dtrace_enabling_retain(enab); } else { dtrace_enabling_destroy(enab); } - lck_mtx_unlock(&cpu_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&cpu_lock); dtrace_dof_destroy(dof); return (err); @@ -15588,7 +16938,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_probedesc_t *create = &desc.dtrpd_create; int err; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; @@ -15619,7 +16969,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uid_t uid; zoneid_t zoneid; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0'; @@ -15648,10 +16998,11 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_lock(&dtrace_lock); if (cmd == DTRACEIOC_PROBEMATCH) { - for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { + /* Quiet compiler warning */ + for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && - (m = dtrace_match_probe(probe, &pkey, - priv, uid, zoneid)) != 0) + (m = dtrace_match_probe(probe, &pkey, + priv, uid, zoneid)) != 0) break; } @@ -15661,9 +17012,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) } } else { - for (i = desc.dtpd_id; i <= dtrace_nprobes; i++) { + /* Quiet compiler warning */ + for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) { if ((probe = dtrace_probes[i - 1]) != NULL && - dtrace_match_priv(probe, priv, uid, zoneid)) + dtrace_match_priv(probe, priv, uid, zoneid)) break; } } @@ -15676,7 +17028,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_probe_description(probe, &desc); lck_mtx_unlock(&dtrace_lock); - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); return (0); @@ -15687,7 +17039,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dtrace_probe_t *probe; dtrace_provider_t *prov; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); if (desc.dtargd_id == DTRACE_IDNONE) @@ -15700,7 +17052,8 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) lck_mtx_lock(&mod_lock); lck_mtx_lock(&dtrace_lock); - if (desc.dtargd_id > dtrace_nprobes) { + /* Quiet compiler warning */ + if (desc.dtargd_id > (dtrace_id_t)dtrace_nprobes) { lck_mtx_unlock(&dtrace_lock); lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); @@ -15719,10 +17072,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) prov = probe->dtpr_provider; if (prov->dtpv_pops.dtps_getargdesc == NULL) { - /* - * There isn't any typed information for this probe. - * Set the argument number to DTRACE_ARGNONE. - */ + /* + * There isn't any typed information for this probe. + * Set the argument number to DTRACE_ARGNONE. + */ desc.dtargd_ndx = DTRACE_ARGNONE; } else { desc.dtargd_native[0] = '\0'; @@ -15730,13 +17083,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtargd_mapping = desc.dtargd_ndx; prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg, - probe->dtpr_id, probe->dtpr_arg, &desc); + probe->dtpr_id, probe->dtpr_arg, &desc); } lck_mtx_unlock(&mod_lock); lck_mtx_unlock(&dtrace_provider_lock); - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); return (0); @@ -15749,7 +17102,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (rval != 0) return (rval); - if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) + if (copyout(&cpuid, arg, sizeof (cpuid)) != 0) return (EFAULT); return (0); @@ -15765,7 +17118,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (rval != 0) return (rval); - if (copyout(&cpuid, (void *)arg, sizeof (cpuid)) != 0) + if (copyout(&cpuid, arg, sizeof (cpuid)) != 0) return (EFAULT); return (0); @@ -15775,30 +17128,65 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) dof_hdr_t hdr, *dof; uint64_t len; - if (copyin((void *)arg, &hdr, sizeof (hdr)) != 0) + if (copyin(arg, &hdr, sizeof (hdr)) != 0) + return (EFAULT); + + lck_mtx_lock(&dtrace_lock); + dof = dtrace_dof_create(state); + lck_mtx_unlock(&dtrace_lock); + + len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); + rval = copyout(dof, arg, len); + dtrace_dof_destroy(dof); + + return (rval == 0 ? 0 : EFAULT); + } + + case DTRACEIOC_SLEEP: { + int64_t time; + uint64_t abstime; + uint64_t rvalue = DTRACE_WAKE_TIMEOUT; + + if (copyin(arg, &time, sizeof(time)) != 0) + return (EFAULT); + + nanoseconds_to_absolutetime((uint64_t)time, &abstime); + clock_absolutetime_interval_to_deadline(abstime, &abstime); + + if (assert_wait_deadline(state, THREAD_ABORTSAFE, abstime) == THREAD_WAITING) { + if (state->dts_buf_over_limit > 0) { + clear_wait(current_thread(), THREAD_INTERRUPTED); + rvalue = DTRACE_WAKE_BUF_LIMIT; + } else { + thread_block(THREAD_CONTINUE_NULL); + if (state->dts_buf_over_limit > 0) { + rvalue = DTRACE_WAKE_BUF_LIMIT; + } + } + } + + if (copyout(&rvalue, arg, sizeof(rvalue)) != 0) return (EFAULT); - lck_mtx_lock(&dtrace_lock); - dof = dtrace_dof_create(state); - lck_mtx_unlock(&dtrace_lock); - - len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz); - rval = copyout(dof, (void *)arg, len); - dtrace_dof_destroy(dof); + return (0); + } - return (rval == 0 ? 0 : EFAULT); + case DTRACEIOC_SIGNAL: { + wakeup(state); + return (0); } case DTRACEIOC_AGGSNAP: case DTRACEIOC_BUFSNAP: { dtrace_bufdesc_t desc; caddr_t cached; + boolean_t over_limit; dtrace_buffer_t *buf; - if (copyin((void *)arg, &desc, sizeof (desc)) != 0) + if (copyin(arg, &desc, sizeof (desc)) != 0) return (EFAULT); - if (desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) + if ((int)desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU) return (EINVAL); lck_mtx_lock(&dtrace_lock); @@ -15831,7 +17219,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtbd_oldest = 0; sz = sizeof (desc); - if (copyout(&desc, (void *)arg, sz) != 0) + if (copyout(&desc, arg, sz) != 0) return (EFAULT); return (0); @@ -15846,7 +17234,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) sz = buf->dtb_size; } - if (copyout(buf->dtb_tomax, desc.dtbd_data, sz) != 0) { + if (copyout(buf->dtb_tomax, (user_addr_t)desc.dtbd_data, sz) != 0) { lck_mtx_unlock(&dtrace_lock); return (EFAULT); } @@ -15855,10 +17243,11 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtbd_drops = buf->dtb_drops; desc.dtbd_errors = buf->dtb_errors; desc.dtbd_oldest = buf->dtb_xamot_offset; + desc.dtbd_timestamp = dtrace_gethrtime(); lck_mtx_unlock(&dtrace_lock); - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); buf->dtb_flags |= DTRACEBUF_CONSUMED; @@ -15873,19 +17262,21 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) } cached = buf->dtb_tomax; + over_limit = buf->dtb_cur_limit == buf->dtb_size; + ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH)); dtrace_xcall(desc.dtbd_cpu, - (dtrace_xcall_t)dtrace_buffer_switch, buf); + (dtrace_xcall_t)dtrace_buffer_switch, buf); state->dts_errors += buf->dtb_xamot_errors; /* - * If the buffers did not actually switch, then the cross call - * did not take place -- presumably because the given CPU is - * not in the ready set. If this is the case, we'll return - * ENOENT. - */ + * If the buffers did not actually switch, then the cross call + * did not take place -- presumably because the given CPU is + * not in the ready set. If this is the case, we'll return + * ENOENT. + */ if (buf->dtb_tomax == cached) { ASSERT(buf->dtb_xamot != cached); lck_mtx_unlock(&dtrace_lock); @@ -15893,12 +17284,28 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) } ASSERT(cached == buf->dtb_xamot); - /* - * We have our snapshot; now copy it out. + * At this point we know the buffer have switched, so we + * can decrement the over limit count if the buffer was over + * its limit. The new buffer might already be over its limit + * yet, but we don't care since we're guaranteed not to be + * checking the buffer over limit count at this point. */ - if (copyout(buf->dtb_xamot, desc.dtbd_data, - buf->dtb_xamot_offset) != 0) { + if (over_limit) { + uint32_t old = atomic_add_32(&state->dts_buf_over_limit, -1); + #pragma unused(old) + + /* + * Verify that we didn't underflow the value + */ + ASSERT(old != 0); + } + + /* + * We have our snapshot; now copy it out. + */ + if (copyout(buf->dtb_xamot, (user_addr_t)desc.dtbd_data, + buf->dtb_xamot_offset) != 0) { lck_mtx_unlock(&dtrace_lock); return (EFAULT); } @@ -15907,13 +17314,14 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) desc.dtbd_drops = buf->dtb_xamot_drops; desc.dtbd_errors = buf->dtb_xamot_errors; desc.dtbd_oldest = 0; + desc.dtbd_timestamp = buf->dtb_switched; lck_mtx_unlock(&dtrace_lock); /* * Finally, copy out the buffer description. */ - if (copyout(&desc, (void *)arg, sizeof (desc)) != 0) + if (copyout(&desc, arg, sizeof (desc)) != 0) return (EFAULT); return (0); @@ -15928,7 +17336,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) conf.dtc_diftupregs = DIF_DTR_NREGS; conf.dtc_ctfmodel = CTF_MODEL_NATIVE; - if (copyout(&conf, (void *)arg, sizeof (conf)) != 0) + if (copyout(&conf, arg, sizeof (conf)) != 0) return (EFAULT); return (0); @@ -15941,10 +17349,10 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) uint64_t nerrs; /* - * See the comment in dtrace_state_deadman() for the reason - * for setting dts_laststatus to INT64_MAX before setting - * it to the correct value. - */ + * See the comment in dtrace_state_deadman() for the reason + * for setting dts_laststatus to INT64_MAX before setting + * it to the correct value. + */ state->dts_laststatus = INT64_MAX; dtrace_membar_producer(); state->dts_laststatus = dtrace_gethrtime(); @@ -15964,7 +17372,7 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) nerrs = state->dts_errors; dstate = &state->dts_vstate.dtvs_dynvars; - for (i = 0; i < NCPU; i++) { + for (i = 0; i < (int)NCPU; i++) { dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i]; stat.dtst_dyndrops += dcpu->dtdsc_drops; @@ -15991,12 +17399,12 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) stat.dtst_stkstroverflows = state->dts_stkstroverflows; stat.dtst_dblerrors = state->dts_dblerrors; stat.dtst_killed = - (state->dts_activity == DTRACE_ACTIVITY_KILLED); + (state->dts_activity == DTRACE_ACTIVITY_KILLED); stat.dtst_errors = nerrs; lck_mtx_unlock(&dtrace_lock); - if (copyout(&stat, (void *)arg, sizeof (stat)) != 0) + if (copyout(&stat, arg, sizeof (stat)) != 0) return (EFAULT); return (0); @@ -16007,13 +17415,13 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) char *str; int len; - if (copyin((void *)arg, &fmt, sizeof (fmt)) != 0) + if (copyin(arg, &fmt, sizeof (fmt)) != 0) return (EFAULT); lck_mtx_lock(&dtrace_lock); if (fmt.dtfd_format == 0 || - fmt.dtfd_format > state->dts_nformats) { + fmt.dtfd_format > state->dts_nformats) { lck_mtx_unlock(&dtrace_lock); return (EINVAL); } @@ -16033,12 +17441,12 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) if (len > fmt.dtfd_length) { fmt.dtfd_length = len; - if (copyout(&fmt, (void *)arg, sizeof (fmt)) != 0) { + if (copyout(&fmt, arg, sizeof (fmt)) != 0) { lck_mtx_unlock(&dtrace_lock); return (EINVAL); } } else { - if (copyout(str, fmt.dtfd_string, len) != 0) { + if (copyout(str, (user_addr_t)fmt.dtfd_string, len) != 0) { lck_mtx_unlock(&dtrace_lock); return (EINVAL); } @@ -16048,6 +17456,312 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (0); } + case DTRACEIOC_MODUUIDSLIST: { + size_t module_uuids_list_size; + dtrace_module_uuids_list_t* uuids_list; + uint64_t dtmul_count; + + /* + * Security restrictions make this operation illegal, if this is enabled DTrace + * must refuse to provide any fbt probes. + */ + if (dtrace_fbt_probes_restricted()) { + cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); + return (EPERM); + } + + /* + * Fail if the kernel symbol mode makes this operation illegal. + * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check + * for them without holding the dtrace_lock. + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER || + dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) { + cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode); + return (EPERM); + } + + /* + * Read the number of symbolsdesc structs being passed in. + */ + if (copyin(arg + offsetof(dtrace_module_uuids_list_t, dtmul_count), + &dtmul_count, + sizeof(dtmul_count))) { + cmn_err(CE_WARN, "failed to copyin dtmul_count"); + return (EFAULT); + } + + /* + * Range check the count. More than 2k kexts is probably an error. + */ + if (dtmul_count > 2048) { + cmn_err(CE_WARN, "dtmul_count is not valid"); + return (EINVAL); + } + + /* + * For all queries, we return EINVAL when the user specified + * count does not match the actual number of modules we find + * available. + * + * If the user specified count is zero, then this serves as a + * simple query to count the available modules in need of symbols. + */ + + rval = 0; + + if (dtmul_count == 0) + { + lck_mtx_lock(&mod_lock); + struct modctl* ctl = dtrace_modctl_list; + while (ctl) { + /* Update the private probes bit */ + if (dtrace_provide_private_probes) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; + + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); + if (!MOD_SYMBOLS_DONE(ctl)) { + dtmul_count++; + rval = EINVAL; + } + ctl = ctl->mod_next; + } + lck_mtx_unlock(&mod_lock); + + if (copyout(&dtmul_count, arg, sizeof (dtmul_count)) != 0) + return (EFAULT); + else + return (rval); + } + + /* + * If we reach this point, then we have a request for full list data. + * Allocate a correctly sized structure and copyin the data. + */ + module_uuids_list_size = DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count); + if ((uuids_list = kmem_alloc(module_uuids_list_size, KM_SLEEP)) == NULL) + return (ENOMEM); + + /* NOTE! We can no longer exit this method via return */ + if (copyin(arg, uuids_list, module_uuids_list_size) != 0) { + cmn_err(CE_WARN, "failed copyin of dtrace_module_uuids_list_t"); + rval = EFAULT; + goto moduuidslist_cleanup; + } + + /* + * Check that the count didn't change between the first copyin and the second. + */ + if (uuids_list->dtmul_count != dtmul_count) { + rval = EINVAL; + goto moduuidslist_cleanup; + } + + /* + * Build the list of UUID's that need symbols + */ + lck_mtx_lock(&mod_lock); + + dtmul_count = 0; + + struct modctl* ctl = dtrace_modctl_list; + while (ctl) { + /* Update the private probes bit */ + if (dtrace_provide_private_probes) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; + + /* + * We assume that userspace symbols will be "better" than kernel level symbols, + * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms + * are available, add user syms if the module might use them. + */ + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); + if (!MOD_SYMBOLS_DONE(ctl)) { + UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count]; + if (dtmul_count++ < uuids_list->dtmul_count) { + memcpy(uuid, ctl->mod_uuid, sizeof(UUID)); + } + } + ctl = ctl->mod_next; + } + + lck_mtx_unlock(&mod_lock); + + if (uuids_list->dtmul_count < dtmul_count) + rval = EINVAL; + + uuids_list->dtmul_count = dtmul_count; + + /* + * Copyout the symbols list (or at least the count!) + */ + if (copyout(uuids_list, arg, module_uuids_list_size) != 0) { + cmn_err(CE_WARN, "failed copyout of dtrace_symbolsdesc_list_t"); + rval = EFAULT; + } + + moduuidslist_cleanup: + /* + * If we had to allocate struct memory, free it. + */ + if (uuids_list != NULL) { + kmem_free(uuids_list, module_uuids_list_size); + } + + return rval; + } + + case DTRACEIOC_PROVMODSYMS: { + size_t module_symbols_size; + dtrace_module_symbols_t* module_symbols; + uint64_t dtmodsyms_count; + + /* + * Security restrictions make this operation illegal, if this is enabled DTrace + * must refuse to provide any fbt probes. + */ + if (dtrace_fbt_probes_restricted()) { + cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST"); + return (EPERM); + } + + /* + * Fail if the kernel symbol mode makes this operation illegal. + * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check + * for them without holding the dtrace_lock. + */ + if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER || + dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) { + cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode); + return (EPERM); + } + + /* + * Read the number of module symbols structs being passed in. + */ + if (copyin(arg + offsetof(dtrace_module_symbols_t, dtmodsyms_count), + &dtmodsyms_count, + sizeof(dtmodsyms_count))) { + cmn_err(CE_WARN, "failed to copyin dtmodsyms_count"); + return (EFAULT); + } + + /* + * Range check the count. How much data can we pass around? + * FIX ME! + */ + if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) { + cmn_err(CE_WARN, "dtmodsyms_count is not valid"); + return (EINVAL); + } + + /* + * Allocate a correctly sized structure and copyin the data. + */ + module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count); + if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL) + return (ENOMEM); + + rval = 0; + + /* NOTE! We can no longer exit this method via return */ + if (copyin(arg, module_symbols, module_symbols_size) != 0) { + cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t"); + rval = EFAULT; + goto module_symbols_cleanup; + } + + /* + * Check that the count didn't change between the first copyin and the second. + */ + if (module_symbols->dtmodsyms_count != dtmodsyms_count) { + rval = EINVAL; + goto module_symbols_cleanup; + } + + /* + * Find the modctl to add symbols to. + */ + lck_mtx_lock(&dtrace_provider_lock); + lck_mtx_lock(&mod_lock); + + struct modctl* ctl = dtrace_modctl_list; + while (ctl) { + /* Update the private probes bit */ + if (dtrace_provide_private_probes) + ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES; + + ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl)); + if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) { + if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) { + /* BINGO! */ + ctl->mod_user_symbols = module_symbols; + break; + } + } + ctl = ctl->mod_next; + } + + if (ctl) { + dtrace_provider_t *prv; + + /* + * We're going to call each providers per-module provide operation + * specifying only this module. + */ + for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next) + prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl); + + /* + * We gave every provider a chance to provide with the user syms, go ahead and clear them + */ + ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ + } + + lck_mtx_unlock(&mod_lock); + lck_mtx_unlock(&dtrace_provider_lock); + + module_symbols_cleanup: + /* + * If we had to allocate struct memory, free it. + */ + if (module_symbols != NULL) { + kmem_free(module_symbols, module_symbols_size); + } + + return rval; + } + + case DTRACEIOC_PROCWAITFOR: { + dtrace_procdesc_t pdesc = { + .p_name = {0}, + .p_pid = -1 + }; + + if ((rval = copyin(arg, &pdesc, sizeof(pdesc))) != 0) + goto proc_waitfor_error; + + if ((rval = dtrace_proc_waitfor(&pdesc)) != 0) + goto proc_waitfor_error; + + if ((rval = copyout(&pdesc, arg, sizeof(pdesc))) != 0) + goto proc_waitfor_error; + + return 0; + + proc_waitfor_error: + /* The process was suspended, revert this since the client will not do it. */ + if (pdesc.p_pid != -1) { + proc_t *proc = proc_find(pdesc.p_pid); + if (proc != PROC_NULL) { + task_pidresume(proc->task); + proc_rele(proc); + } + } + + return rval; + } + default: break; } @@ -16055,11 +17769,9 @@ dtrace_ioctl(dev_t dev, int cmd, intptr_t arg, int md, cred_t *cr, int *rv) return (ENOTTY); } -#if defined(__APPLE__) -#undef copyin -#undef copyout -#endif /* __APPLE__ */ - +/* + * APPLE NOTE: dtrace_detach not implemented + */ #if !defined(__APPLE__) /*ARGSUSED*/ static int @@ -16085,15 +17797,15 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) ASSERT(dtrace_opens == 0); if (dtrace_helpers > 0) { - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) { - lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&dtrace_lock); + lck_mtx_unlock(&dtrace_provider_lock); lck_mtx_unlock(&cpu_lock); return (DDI_FAILURE); } @@ -16149,7 +17861,6 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) dtrace_byname = NULL; kmem_cache_destroy(dtrace_state_cache); - vmem_destroy(dtrace_minor); vmem_destroy(dtrace_arena); if (dtrace_toxrange != NULL) { @@ -16185,90 +17896,7 @@ dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd) return (DDI_SUCCESS); } - -/*ARGSUSED*/ -static int -dtrace_info(dev_info_t *dip, ddi_info_cmd_t infocmd, void *arg, void **result) -{ - int error; - - switch (infocmd) { - case DDI_INFO_DEVT2DEVINFO: - *result = (void *)dtrace_devi; - error = DDI_SUCCESS; - break; - case DDI_INFO_DEVT2INSTANCE: - *result = (void *)0; - error = DDI_SUCCESS; - break; - default: - error = DDI_FAILURE; - } - return (error); -} - -static struct cb_ops dtrace_cb_ops = { - dtrace_open, /* open */ - dtrace_close, /* close */ - nulldev, /* strategy */ - nulldev, /* print */ - nodev, /* dump */ - nodev, /* read */ - nodev, /* write */ - dtrace_ioctl, /* ioctl */ - nodev, /* devmap */ - nodev, /* mmap */ - nodev, /* segmap */ - nochpoll, /* poll */ - ddi_prop_op, /* cb_prop_op */ - 0, /* streamtab */ - D_NEW | D_MP /* Driver compatibility flag */ -}; - -static struct dev_ops dtrace_ops = { - DEVO_REV, /* devo_rev */ - 0, /* refcnt */ - dtrace_info, /* get_dev_info */ - nulldev, /* identify */ - nulldev, /* probe */ - dtrace_attach, /* attach */ - dtrace_detach, /* detach */ - nodev, /* reset */ - &dtrace_cb_ops, /* driver operations */ - NULL, /* bus operations */ - nodev /* dev power */ -}; - -static struct modldrv modldrv = { - &mod_driverops, /* module type (this is a pseudo driver) */ - "Dynamic Tracing", /* name of module */ - &dtrace_ops, /* driver ops */ -}; - -static struct modlinkage modlinkage = { - MODREV_1, - (void *)&modldrv, - NULL -}; - -int -_init(void) -{ - return (mod_install(&modlinkage)); -} - -int -_info(struct modinfo *modinfop) -{ - return (mod_info(&modlinkage, modinfop)); -} - -int -_fini(void) -{ - return (mod_remove(&modlinkage)); -} -#else +#endif /* __APPLE__ */ d_open_t _dtrace_open, helper_open; d_close_t _dtrace_close, helper_close; @@ -16309,16 +17937,22 @@ _dtrace_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) { #pragma unused(p) int err, rv = 0; + user_addr_t uaddrp; + + if (proc_is64bit(p)) + uaddrp = *(user_addr_t *)data; + else + uaddrp = (user_addr_t) *(uint32_t *)data; - err = dtrace_ioctl(dev, (int)cmd, *(intptr_t *)data, fflag, CRED(), &rv); + err = dtrace_ioctl(dev, cmd, uaddrp, fflag, CRED(), &rv); - /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ + /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { ASSERT( (err & 0xfffff000) == 0 ); - return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ + return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ } else if (rv != 0) { ASSERT( (rv & 0xfff00000) == 0 ); - return (((rv & 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */ + return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ } else return 0; } @@ -16329,14 +17963,14 @@ helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p) #pragma unused(dev,fflag,p) int err, rv = 0; - err = dtrace_ioctl_helper((int)cmd, data, &rv); - /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ + err = dtrace_ioctl_helper(cmd, data, &rv); + /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ if (err != 0) { ASSERT( (err & 0xfffff000) == 0 ); - return (err & 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */ + return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ } else if (rv != 0) { ASSERT( (rv & 0xfff00000) == 0 ); - return (((rv & 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */ + return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ } else return 0; } @@ -16402,28 +18036,13 @@ helper_init( void ) #undef HELPER_MAJOR -/* - * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected. - */ static int dtrace_clone_func(dev_t dev, int action) { #pragma unused(dev) if (action == DEVFS_CLONE_ALLOC) { - if (NULL == dtrace_minor) /* Arena not created yet!?! */ - return 0; - else { - /* - * Propose a minor number, namely the next number that vmem_alloc() will return. - * Immediately put it back in play by calling vmem_free(). - */ - int ret = (int)(uintptr_t)vmem_alloc(dtrace_minor, 1, VM_BESTFIT | VM_SLEEP); - - vmem_free(dtrace_minor, (void *)(uintptr_t)ret, 1); - - return ret; - } + return dtrace_state_reserve(); } else if (action == DEVFS_CLONE_FREE) { return 0; @@ -16431,6 +18050,34 @@ dtrace_clone_func(dev_t dev, int action) else return -1; } +void dtrace_ast(void); + +void +dtrace_ast(void) +{ + int i; + uint32_t clients = atomic_and_32(&dtrace_wake_clients, 0); + if (clients == 0) + return; + /** + * We disable preemption here to be sure that we won't get + * interrupted by a wakeup to a thread that is higher + * priority than us, so that we do issue all wakeups + */ + disable_preemption(); + for (i = 0; i < DTRACE_NCLIENTS; i++) { + if (clients & (1 << i)) { + dtrace_state_t *state = dtrace_state_get(i); + if (state) { + wakeup(state); + } + + } + } + enable_preemption(); +} + + #define DTRACE_MAJOR -24 /* let the kernel pick the device number */ static struct cdevsw dtrace_cdevsw = @@ -16461,7 +18108,37 @@ void dtrace_init( void ) { if (0 == gDTraceInited) { - int i, ncpu = NCPU; + int i, ncpu; + size_t size = sizeof(dtrace_buffer_memory_maxsize); + + /* + * DTrace allocates buffers based on the maximum number + * of enabled cpus. This call avoids any race when finding + * that count. + */ + ASSERT(dtrace_max_cpus == 0); + ncpu = dtrace_max_cpus = ml_get_max_cpus(); + + /* + * Retrieve the size of the physical memory in order to define + * the state buffer memory maximal size. If we cannot retrieve + * this value, we'll consider that we have 1Gb of memory per CPU, that's + * still better than raising a kernel panic. + */ + if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize, + &size, NULL, 0)) + { + dtrace_buffer_memory_maxsize = ncpu * 1024 * 1024 * 1024; + printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n", + dtrace_buffer_memory_maxsize); + } + + /* + * Finally, divide by three to prevent DTrace from eating too + * much memory. + */ + dtrace_buffer_memory_maxsize /= 3; + ASSERT(dtrace_buffer_memory_maxsize > 0); gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw); @@ -16479,12 +18156,10 @@ dtrace_init( void ) } #if defined(DTRACE_MEMORY_ZONES) - /* * Initialize the dtrace kalloc-emulation zones. */ dtrace_alloc_init(); - #endif /* DTRACE_MEMORY_ZONES */ /* @@ -16508,7 +18183,8 @@ dtrace_init( void ) lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr); -#ifdef DEBUG + lck_mtx_init(&dtrace_procwaitfor_lock, dtrace_lck_grp, dtrace_lck_attr); +#if DEBUG lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr); #endif lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr); @@ -16521,35 +18197,46 @@ dtrace_init( void ) * the structure is sized to avoid false sharing. */ lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr); + lck_mtx_init(&cyc_lock, dtrace_lck_grp, dtrace_lck_attr); lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr); + /* + * Initialize the CPU offline/online hooks. + */ + dtrace_install_cpu_hooks(); + + dtrace_modctl_list = NULL; + cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP ); for (i = 0; i < ncpu; ++i) { lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr); } - cpu_list = (cpu_t *)kmem_zalloc( ncpu * sizeof(cpu_t), KM_SLEEP ); + cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP ); for (i = 0; i < ncpu; ++i) { cpu_list[i].cpu_id = (processorid_t)i; cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]); + LIST_INIT(&cpu_list[i].cpu_cyc_list); lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr); } lck_mtx_lock(&cpu_lock); for (i = 0; i < ncpu; ++i) + /* FIXME: track CPU configuration */ dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */ lck_mtx_unlock(&cpu_lock); (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ + dtrace_isa_init(); /* * See dtrace_impl.h for a description of dof modes. * The default is lazy dof. * - * XXX Warn if state is LAZY_OFF? It won't break anything, but + * FIXME: Warn if state is LAZY_OFF? It won't break anything, but * makes no sense... */ - if (!PE_parse_boot_arg("dtrace_dof_mode", &dtrace_dof_mode)) { + if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) { dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON; } @@ -16575,6 +18262,16 @@ dtrace_init( void ) break; } + /* + * See dtrace_impl.h for a description of kernel symbol modes. + * The default is to wait for symbols from userspace (lazy symbols). + */ + if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) { + dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE; + } + + dtrace_restriction_policy_load(); + gDTraceInited = 1; } else @@ -16584,7 +18281,29 @@ dtrace_init( void ) void dtrace_postinit(void) { - dtrace_attach( (dev_info_t *)makedev(gMajDevNo, 0), 0 ); + /* + * Called from bsd_init after all provider's *_init() routines have been + * run. That way, anonymous DOF enabled under dtrace_attach() is safe + * to go. + */ + dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */ + + /* + * Add the mach_kernel to the module list for lazy processing + */ + struct kmod_info fake_kernel_kmod; + memset(&fake_kernel_kmod, 0, sizeof(fake_kernel_kmod)); + + strlcpy(fake_kernel_kmod.name, "mach_kernel", sizeof(fake_kernel_kmod.name)); + fake_kernel_kmod.id = 1; + fake_kernel_kmod.address = g_kernel_kmod_info.address; + fake_kernel_kmod.size = g_kernel_kmod_info.size; + + if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) { + printf("dtrace_postinit: Could not register mach_kernel modctl\n"); + } + + (void)OSKextRegisterKextsWithDTrace(); } #undef DTRACE_MAJOR @@ -16603,4 +18322,3 @@ unregister_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2) { #pragma unused(ignore1,ignore2) } -#endif /* __APPLE__ */