*/
/*
- * Copyright 2008 Sun Microsystems, Inc. All rights reserved.
+ * Portions copyright (c) 2011, Joyent, Inc. All rights reserved.
+ */
+
+/*
+ * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
*/
#include <sys/systm.h>
#include <sys/dtrace_impl.h>
#include <sys/param.h>
+#include <sys/proc_internal.h>
#include <sys/ioctl.h>
#include <sys/fcntl.h>
#include <miscfs/devfs/devfs.h>
#include <sys/user.h>
#include <mach/exception_types.h>
#include <sys/signalvar.h>
+#include <mach/task.h>
#include <kern/zalloc.h>
#include <kern/ast.h>
#include <netinet/in.h>
#if defined(__APPLE__)
+#include <kern/cpu_data.h>
extern uint32_t pmap_find_phys(void *, uint64_t);
extern boolean_t pmap_valid_page(uint32_t);
+extern void OSKextRegisterKextsWithDTrace(void);
+extern kmod_info_t g_kernel_kmod_info;
#endif /* __APPLE__ */
extern kern_return_t chudxnu_dtrace_callback
(uint64_t selector, uint64_t *args, uint32_t count);
+
#endif /* __APPLE__ */
/*
size_t dtrace_actions_max = (16 * 1024);
size_t dtrace_retain_max = 1024;
dtrace_optval_t dtrace_helper_actions_max = 32;
-dtrace_optval_t dtrace_helper_providers_max = 32;
+dtrace_optval_t dtrace_helper_providers_max = 64;
dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
size_t dtrace_strsize_default = 256;
-dtrace_optval_t dtrace_cleanrate_default = 9900990; /* 101 hz */
-dtrace_optval_t dtrace_cleanrate_min = 200000; /* 5000 hz */
+dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */
+dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */
dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
* it is used by some translators as an implementation detail.
*/
const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
-
+unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */
/*
* DTrace Internal Variables
*/
static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
#if defined(__APPLE__)
static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */
+
+ /*
+ * This does't quite fit as an internal variable, as it must be accessed in
+ * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either...
+ */
+int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */
#endif
#if defined(__APPLE__)
*/
struct zone *dtrace_probe_t_zone;
+
+static int dtrace_module_unloaded(struct kmod_info *kmod);
#endif /* __APPLE__ */
/*
dtrace_nullop(void)
{}
+static int
+dtrace_enable_nullop(void)
+{
+ return (0);
+}
+
static dtrace_pops_t dtrace_provider_ops = {
(void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
(void (*)(void *, struct modctl *))dtrace_nullop,
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
(where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
(((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
}
-#else
-#if (defined(__x86_64__) || defined(__ppc64__))
+#else
+#if defined (__x86_64__)
/* FIXME: two function calls!! */
#define DTRACE_TLS_THRKEY(where) { \
uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
(((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
}
#else
-/* FIXME: three function calls!!! */
-#define DTRACE_TLS_THRKEY(where) { \
- uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
- uint64_t thr = (uintptr_t)current_thread(); \
- uint_t pid = (uint_t)proc_selfpid(); \
- ASSERT(intr < (1 << 3)); \
- (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \
- (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
-}
+#error Unknown architecture
#endif
#endif /* __APPLE__ */
#define DTRACE_STORE(type, tomax, offset, what) \
*((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
-#if !defined(__APPLE__)
-#ifndef __i386
-#define DTRACE_ALIGNCHECK(addr, size, flags) \
- if (addr & (size - 1)) { \
- *flags |= CPU_DTRACE_BADALIGN; \
- cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
- return (0); \
- }
-#else
-#define DTRACE_ALIGNCHECK(addr, size, flags)
-#endif
-#else /* __APPLE__ */
+
#define DTRACE_ALIGNCHECK(addr, size, flags) \
if (addr & (MIN(size,4) - 1)) { \
*flags |= CPU_DTRACE_BADALIGN; \
cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
return (0); \
}
-#endif /* __APPLE__ */
/*
* Test whether a range of memory starting at testaddr of size testsz falls
return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
}
#else /* __APPLE__ */
-#define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" );
+#define RECOVER_LABEL(bits) dtraceLoadRecover##bits:
-#if (defined(__i386__) || defined (__x86_64__))
+#if defined (__x86_64__)
#define DTRACE_LOADFUNC(bits) \
/*CSTYLED*/ \
-extern vm_offset_t dtraceLoadRecover##bits; \
uint##bits##_t dtrace_load##bits(uintptr_t addr); \
\
uint##bits##_t \
} \
\
{ \
- volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \
+ volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \
*flags |= CPU_DTRACE_NOFAULT; \
recover = dtrace_set_thread_recover(current_thread(), recover); \
/*CSTYLED*/ \
return (rval); \
}
#else /* all other architectures */
-#define DTRACE_LOADFUNC(bits) \
-/*CSTYLED*/ \
-extern vm_offset_t dtraceLoadRecover##bits; \
-uint##bits##_t dtrace_load##bits(uintptr_t addr); \
- \
-uint##bits##_t \
-dtrace_load##bits(uintptr_t addr) \
-{ \
- size_t size = bits / NBBY; \
- /*CSTYLED*/ \
- uint##bits##_t rval = 0; \
- int i; \
- volatile uint16_t *flags = (volatile uint16_t *) \
- &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
- \
- DTRACE_ALIGNCHECK(addr, size, flags); \
- \
- for (i = 0; i < dtrace_toxranges; i++) { \
- if (addr >= dtrace_toxrange[i].dtt_limit) \
- continue; \
- \
- if (addr + size <= dtrace_toxrange[i].dtt_base) \
- continue; \
- \
- /* \
- * This address falls within a toxic region; return 0. \
- */ \
- *flags |= CPU_DTRACE_BADADDR; \
- cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
- return (0); \
- } \
- \
- { \
- volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \
- *flags |= CPU_DTRACE_NOFAULT; \
- recover = dtrace_set_thread_recover(current_thread(), recover); \
- /*CSTYLED*/ \
- rval = *((volatile uint##bits##_t *)addr); \
- RECOVER_LABEL(bits); \
- (void)dtrace_set_thread_recover(current_thread(), recover); \
- *flags &= ~CPU_DTRACE_NOFAULT; \
- } \
- \
- return (rval); \
-}
+#error Unknown Architecture
#endif
#endif /* __APPLE__ */
#define DTRACE_DYNHASH_SINK 1
#define DTRACE_DYNHASH_VALID 2
+#define DTRACE_MATCH_FAIL -1
#define DTRACE_MATCH_NEXT 0
#define DTRACE_MATCH_DONE 1
#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
* for these functions, there will be a comment above the function reading
* "Note: not called from probe context."
*/
-void
-dtrace_panic(const char *format, ...)
-{
- va_list alist;
-
- va_start(alist, format);
- dtrace_vpanic(format, alist);
- va_end(alist);
-}
int
dtrace_assfail(const char *a, const char *f, int l)
{
- dtrace_panic("assertion failed: %s, file: %s, line: %d", a, f, l);
+ panic("dtrace: assertion failed: %s, file: %s, line: %d", a, f, l);
/*
* We just need something here that even the most clever compiler
#else
if ((cr = dtrace_CRED()) != NULL &&
#endif /* __APPLE__ */
- s_cr->cr_uid == cr->cr_uid &&
- s_cr->cr_uid == cr->cr_ruid &&
- s_cr->cr_uid == cr->cr_suid &&
- s_cr->cr_gid == cr->cr_gid &&
- s_cr->cr_gid == cr->cr_rgid &&
- s_cr->cr_gid == cr->cr_sgid)
+ posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid &&
+ posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid &&
+ posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid &&
+ posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_gid &&
+ posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_rgid &&
+ posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_sgid)
return (1);
return (0);
lquanta[levels + 1] += incr;
}
+static int
+dtrace_aggregate_llquantize_bucket(int16_t factor, int16_t low, int16_t high,
+ int16_t nsteps, int64_t value)
+{
+ int64_t this = 1, last, next;
+ int base = 1, order;
+
+ for (order = 0; order < low; ++order)
+ this *= factor;
+
+ /*
+ * If our value is less than our factor taken to the power of the
+ * low order of magnitude, it goes into the zeroth bucket.
+ */
+ if (value < this)
+ return 0;
+ else
+ last = this;
+
+ for (this *= factor; order <= high; ++order) {
+ int nbuckets = this > nsteps ? nsteps : this;
+
+ /*
+ * We should not generally get log/linear quantizations
+ * with a high magnitude that allows 64-bits to
+ * overflow, but we nonetheless protect against this
+ * by explicitly checking for overflow, and clamping
+ * our value accordingly.
+ */
+ next = this * factor;
+ if (next < this) {
+ value = this - 1;
+ }
+
+ /*
+ * If our value lies within this order of magnitude,
+ * determine its position by taking the offset within
+ * the order of magnitude, dividing by the bucket
+ * width, and adding to our (accumulated) base.
+ */
+ if (value < this) {
+ return (base + (value - last) / (this / nbuckets));
+ }
+
+ base += nbuckets - (nbuckets / factor);
+ last = this;
+ this = next;
+ }
+
+ /*
+ * Our value is greater than or equal to our factor taken to the
+ * power of one plus the high magnitude -- return the top bucket.
+ */
+ return base;
+}
+
+static void
+dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
+{
+ uint64_t arg = *llquanta++;
+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
+ uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
+ uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
+ uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
+
+ llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr;
+}
+
/*ARGSUSED*/
static void
dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
/* Anchored probe that fires while on an interrupt accrues to process 0 */
return 0;
- return ((uint64_t)proc_selfpid());
+ return ((uint64_t)dtrace_proc_selfpid());
#endif /* __APPLE__ */
#if !defined(__APPLE__)
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
return (0);
- return ((uint64_t)proc_selfppid());
+ return ((uint64_t)dtrace_proc_selfppid());
#endif /* __APPLE__ */
#if !defined(__APPLE__)
#else
case DIF_VAR_ZONENAME:
+ {
+ /* scratch_size is equal to length('global') + 1 for the null-terminator. */
+ char *zname = (char *)mstate->dtms_scratch_ptr;
+ size_t scratch_size = 6 + 1;
+
if (!dtrace_priv_proc(state))
return (0);
-
- /* FIXME: return e.g. "global" allocated from scratch a la execname. */
- return ((uint64_t)(uintptr_t)NULL); /* Darwin doesn't do "zones" */
+
+ /* The scratch allocation's lifetime is that of the clause. */
+ if (!DTRACE_INSCRATCH(mstate, scratch_size)) {
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
+ return 0;
+ }
+
+ mstate->dtms_scratch_ptr += scratch_size;
+
+ /* The kernel does not provide zonename, it will always return 'global'. */
+ strlcpy(zname, "global", scratch_size);
+
+ return ((uint64_t)(uintptr_t)zname);
+ }
#endif /* __APPLE__ */
#if !defined(__APPLE__)
return ((uint64_t)curthread->t_procp->p_cred->cr_uid);
#else
case DIF_VAR_UID:
- if (!dtrace_priv_proc(state))
+ if (!dtrace_priv_proc_relaxed(state))
return (0);
/*
if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
return (0);
- if (dtrace_CRED() != NULL)
- /* Credential does not require lazy initialization. */
- return ((uint64_t)kauth_getuid());
- else {
- /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */
- DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
- return -1ULL;
- }
+ return ((uint64_t) dtrace_proc_selfruid());
#endif /* __APPLE__ */
#if !defined(__APPLE__)
#if !defined(__APPLE__)
ipaddr_t ip4;
#else
- in_addr_t ip4;
+ uint32_t ip4;
#endif /* __APPLE__ */
uint8_t *ptr8, val;
/*
* Safely load the IPv4 address.
*/
+#if !defined(__APPLE__)
ip4 = dtrace_load32(tupregs[argi].dttk_value);
-
+#else
+ dtrace_bcopy(
+ (void *)(uintptr_t)tupregs[argi].dttk_value,
+ (void *)(uintptr_t)&ip4, sizeof (ip4));
+#endif /* __APPLE__ */
/*
* Check an IPv4 string will fit in scratch.
*/
* thread calls panic() from dtrace_probe(), and that panic() is
* called exactly once.)
*/
- dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
+ panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
probe->dtpr_func, probe->dtpr_name, (void *)ecb);
if (uthread && uthread->t_dtrace_sig == 0) {
uthread->t_dtrace_sig = sig;
- astbsd_on();
+ act_set_astbsd(current_thread());
}
#endif /* __APPLE__ */
}
aston(curthread);
}
#else
- uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
-
- if (uthread && uthread->t_dtrace_stop == 0) {
+ uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
+ if (uthread) {
+ /*
+ * The currently running process will be set to task_suspend
+ * when it next leaves the kernel.
+ */
uthread->t_dtrace_stop = 1;
- astbsd_on();
+ act_set_astbsd(current_thread());
}
#endif /* __APPLE__ */
}
+#if defined(__APPLE__)
+static void
+dtrace_action_pidresume(uint64_t pid)
+{
+ if (dtrace_destructive_disallow)
+ return;
+
+ if (kauth_cred_issuser(kauth_cred_get()) == 0) {
+ DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
+ return;
+ }
+ uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
+
+ /*
+ * When the currently running process leaves the kernel, it attempts to
+ * task_resume the process (denoted by pid), if that pid appears to have
+ * been stopped by dtrace_action_stop().
+ * The currently running process has a pidresume() queue depth of 1 --
+ * subsequent invocations of the pidresume() action are ignored.
+ */
+
+ if (pid != 0 && uthread && uthread->t_dtrace_resumepid == 0) {
+ uthread->t_dtrace_resumepid = pid;
+ act_set_astbsd(current_thread());
+ }
+}
+#endif /* __APPLE__ */
+
+
static void
dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
{
hrtime_t now;
volatile uint16_t *flags;
- cpu_t *cpu = CPU;
+ dtrace_cpu_t *cpu = CPU;
if (dtrace_destructive_disallow)
return;
ASSERT(s_cr != NULL);
+ /*
+ * XXX this is hackish, but so is setting a variable
+ * XXX in a McCarthy OR...
+ */
#if !defined(__APPLE__)
if ((cr = CRED()) == NULL ||
#else
if ((cr = dtrace_CRED()) == NULL ||
#endif /* __APPLE__ */
- s_cr->cr_uid != cr->cr_uid ||
- s_cr->cr_uid != cr->cr_ruid ||
- s_cr->cr_uid != cr->cr_suid ||
- s_cr->cr_gid != cr->cr_gid ||
- s_cr->cr_gid != cr->cr_rgid ||
- s_cr->cr_gid != cr->cr_sgid ||
+ posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid ||
+ posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid ||
+ posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid ||
+ posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_gid ||
+ posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_rgid ||
+ posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_sgid ||
#if !defined(__APPLE__)
(proc = ttoproc(curthread)) == NULL ||
(proc->p_flag & SNOCD))
dtrace_action_raise(val);
continue;
+#if defined(__APPLE__)
+ case DTRACEACT_PIDRESUME:
+ if (dtrace_priv_proc_destructive(state))
+ dtrace_action_pidresume(val);
+ continue;
+#endif /* __APPLE__ */
+
case DTRACEACT_COMMIT:
ASSERT(!committed);
continue;
DTRACE_STORE(uint64_t, tomax,
- valoffs, (uint64_t)proc_selfpid());
+ valoffs, (uint64_t)dtrace_proc_selfpid());
DTRACE_STORE(uint64_t, tomax,
valoffs + sizeof (uint64_t), val);
on some function in the transitive closure of the call to dtrace_probe(). Solaris has some
strong guarantees that this won't happen, the Darwin implementation is not so mature as to
make those guarantees. */
+
void
dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
uint64_t arg2, uint64_t arg3, uint64_t arg4)
{
thread_t thread = current_thread();
-
+ disable_preemption();
if (id == dtrace_probeid_error) {
__dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */
#if DEBUG
else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN);
#endif
+ enable_preemption();
}
#endif /* __APPLE__ */
{
dtrace_probe_t template, *probe;
dtrace_hash_t *hash = NULL;
- int len, best = INT_MAX, nmatched = 0;
+ int len, rc, best = INT_MAX, nmatched = 0;
dtrace_id_t i;
lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
if (pkp->dtpk_id != DTRACE_IDNONE) {
if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
- (void) (*matched)(probe, arg);
+ if ((*matched)(probe, arg) == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
nmatched++;
}
return (nmatched);
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
- break;
+ if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
+ break;
+ }
}
return (nmatched);
nmatched++;
- if ((*matched)(probe, arg) != DTRACE_MATCH_NEXT)
- break;
+ if ((rc = (*matched)(probe, arg)) != DTRACE_MATCH_NEXT) {
+ if (rc == DTRACE_MATCH_FAIL)
+ return (DTRACE_MATCH_FAIL);
+ break;
+ }
}
return (nmatched);
dtrace_probe_t *probe, *first = NULL;
if (old->dtpv_pops.dtps_enable ==
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop) {
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
/*
* If DTrace itself is the provider, we're called with locks
* already held.
/*
* Attempt to destroy the probes associated with this provider.
*/
- for (i = 0; i < dtrace_nprobes; i++) {
- if ((probe = dtrace_probes[i]) == NULL)
- continue;
-
- if (probe->dtpr_provider != old)
- continue;
-
- if (probe->dtpr_ecb == NULL)
- continue;
-
+ if (old->ecb_count!=0) {
/*
* We have at least one ECB; we can't remove this provider.
*/
* All of the probes for this provider are disabled; we can safely
* remove all of them from their hash chains and from the probe array.
*/
- for (i = 0; i < dtrace_nprobes; i++) {
+ for (i = 0; i < dtrace_nprobes && old->probe_count!=0; i++) {
if ((probe = dtrace_probes[i]) == NULL)
continue;
continue;
dtrace_probes[i] = NULL;
+ old->probe_count--;
dtrace_hash_remove(dtrace_bymod, probe);
dtrace_hash_remove(dtrace_byfunc, probe);
dtrace_provider_t *pvp = (dtrace_provider_t *)id;
ASSERT(pvp->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
lck_mtx_lock(&dtrace_provider_lock);
lck_mtx_lock(&dtrace_lock);
* Make sure this isn't the dtrace provider itself.
*/
ASSERT(prov->dtpv_pops.dtps_enable !=
- (void (*)(void *, dtrace_id_t, void *))dtrace_nullop);
+ (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
lck_mtx_lock(&dtrace_provider_lock);
lck_mtx_lock(&dtrace_lock);
continue;
dtrace_probes[i] = NULL;
+ prov->probe_count--;
dtrace_hash_remove(dtrace_bymod, probe);
dtrace_hash_remove(dtrace_byfunc, probe);
ASSERT(dtrace_probes[id - 1] == NULL);
dtrace_probes[id - 1] = probe;
+ provider->probe_count++;
if (provider != dtrace_provider)
lck_mtx_unlock(&dtrace_lock);
{
struct modctl *ctl;
int all = 0;
-#pragma unused(ctl) /* __APPLE__ */
lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
all = 1;
prv = dtrace_provider;
}
-
+
do {
/*
* First, call the blanket provide operation.
*/
prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
-
-#if !defined(__APPLE__)
+
/*
* Now call the per-module provide operation. We will grab
* mod_lock to prevent the list from being modified. Note
* that this also prevents the mod_busy bits from changing.
* (mod_busy can only be changed with mod_lock held.)
*/
- mutex_enter(&mod_lock);
-
+ lck_mtx_lock(&mod_lock);
+
+#if !defined(__APPLE__)
ctl = &modules;
do {
if (ctl->mod_busy || ctl->mod_mp == NULL)
prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
} while ((ctl = ctl->mod_next) != &modules);
-
- mutex_exit(&mod_lock);
#else
-#if 0 /* FIXME: Workaround for PR_4643546 */
- /* NOTE: kmod_lock has been removed. */
- simple_lock(&kmod_lock);
-
- kmod_info_t *ktl = kmod;
- while (ktl) {
- prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ktl);
- ktl = ktl->next;
+ ctl = dtrace_modctl_list;
+ while (ctl) {
+ prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
+ ctl = ctl->mod_next;
}
-
- simple_unlock(&kmod_lock);
-#else
- /*
- * Don't bother to iterate over the kmod list. At present only fbt
- * offers a provide_module in its dtpv_pops, and then it ignores the
- * module anyway.
- */
- prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, NULL);
#endif
-#endif /* __APPLE__ */
+
+ lck_mtx_unlock(&mod_lock);
} while (all && (prv = prv->dtpv_next) != NULL);
}
break;
default:
- err += efunc(dp->dtdo_len - 1, "bad return size");
+ err += efunc(dp->dtdo_len - 1, "bad return size\n");
}
}
return (ecb);
}
-static void
+static int
dtrace_ecb_enable(dtrace_ecb_t *ecb)
{
dtrace_probe_t *probe = ecb->dte_probe;
/*
* This is the NULL probe -- there's nothing to do.
*/
- return;
+ return(0);
}
+ probe->dtpr_provider->ecb_count++;
if (probe->dtpr_ecb == NULL) {
dtrace_provider_t *prov = probe->dtpr_provider;
if (ecb->dte_predicate != NULL)
probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
- prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
- probe->dtpr_id, probe->dtpr_arg);
+ return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
+ probe->dtpr_id, probe->dtpr_arg));
} else {
/*
* This probe is already active. Swing the last pointer to
probe->dtpr_predcache = 0;
dtrace_sync();
+ return(0);
}
}
break;
}
+ case DTRACEAGG_LLQUANTIZE: {
+ uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
+ uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
+ uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
+ uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
+ int64_t v;
+
+ agg->dtag_initial = desc->dtad_arg;
+ agg->dtag_aggregate = dtrace_aggregate_llquantize;
+
+ if (factor < 2 || low >= high || nsteps < factor)
+ goto err;
+
+ /*
+ * Now check that the number of steps evenly divides a power
+ * of the factor. (This assures both integer bucket size and
+ * linearity within each magnitude.)
+ */
+ for (v = factor; v < nsteps; v *= factor)
+ continue;
+
+ if ((v % nsteps) || (nsteps % factor))
+ goto err;
+
+ size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
+ break;
+ }
+
case DTRACEAGG_AVG:
agg->dtag_aggregate = dtrace_aggregate_avg;
size = sizeof (uint64_t) * 2;
case DTRACEACT_CHILL:
case DTRACEACT_DISCARD:
case DTRACEACT_RAISE:
+#if defined(__APPLE__)
+ case DTRACEACT_PIDRESUME:
+#endif /* __APPLE__ */
if (dp == NULL)
return (EINVAL);
break;
probe->dtpr_ecb_last = prev;
}
+ probe->dtpr_provider->ecb_count--;
/*
* The ECB has been disconnected from the probe; now sync to assure
* that all CPUs have seen the change before returning.
if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
return (DTRACE_MATCH_DONE);
- dtrace_ecb_enable(ecb);
+ if (dtrace_ecb_enable(ecb) < 0)
+ return (DTRACE_MATCH_FAIL);
+
return (DTRACE_MATCH_NEXT);
}
dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t size, int flags,
processorid_t cpu)
{
- cpu_t *cp;
+ dtrace_cpu_t *cp;
dtrace_buffer_t *buf;
lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched)
{
int i = 0;
- int matched = 0;
+ int total_matched = 0, matched = 0;
lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
enab->dten_current = ep;
enab->dten_error = 0;
- matched += dtrace_probe_enable(&ep->dted_probe, enab);
+ /*
+ * If a provider failed to enable a probe then get out and
+ * let the consumer know we failed.
+ */
+ if ((matched = dtrace_probe_enable(&ep->dted_probe, enab)) < 0)
+ return (EBUSY);
+
+ total_matched += matched;
if (enab->dten_error != 0) {
/*
enab->dten_probegen = dtrace_probegen;
if (nmatched != NULL)
- *nmatched = matched;
+ *nmatched = total_matched;
return (0);
}
#if !defined(__APPLE__)
dof = kmem_alloc(hdr.dofh_loadsz, KM_SLEEP);
- if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0) {
+ if (copyin((void *)uarg, dof, hdr.dofh_loadsz) != 0 ||
+ dof->dofh_loadsz != hdr.dofh_loadsz) {
+ kmem_free(dof, hdr.dofh_loadsz);
+ *errp = EFAULT;
+ return (NULL);
+ }
#else
dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP);
- if (copyin(uarg, dof, hdr.dofh_loadsz) != 0) {
+ if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 ||
+ dof->dofh_loadsz != hdr.dofh_loadsz) {
+ dt_kmem_free_aligned(dof, hdr.dofh_loadsz);
+ *errp = EFAULT;
+ return (NULL);
+ }
#endif
- dt_kmem_free_aligned(dof, hdr.dofh_loadsz);
- *errp = EFAULT;
- return (NULL);
- }
return (dof);
}
/*
* DTrace Hook Functions
*/
+
+#if defined(__APPLE__)
+/*
+ * Routines to manipulate the modctl list within dtrace
+ */
+
+modctl_t *dtrace_modctl_list;
+
static void
-dtrace_module_loaded(struct modctl *ctl)
+dtrace_modctl_add(struct modctl * newctl)
{
- dtrace_provider_t *prv;
+ struct modctl *nextp, *prevp;
- lck_mtx_lock(&dtrace_provider_lock);
- lck_mtx_lock(&mod_lock);
+ ASSERT(newctl != NULL);
+ lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
-#if !defined(__APPLE__)
- ASSERT(ctl->mod_busy);
-#else
- /* FIXME: awaits kmod awareness PR_4648477. */
-#endif /* __APPLE__ */
+ // Insert new module at the front of the list,
+
+ newctl->mod_next = dtrace_modctl_list;
+ dtrace_modctl_list = newctl;
/*
- * We're going to call each providers per-module provide operation
- * specifying only this module.
+ * If a module exists with the same name, then that module
+ * must have been unloaded with enabled probes. We will move
+ * the unloaded module to the new module's stale chain and
+ * then stop traversing the list.
*/
- for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
- prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
- lck_mtx_unlock(&mod_lock);
- lck_mtx_unlock(&dtrace_provider_lock);
+ prevp = newctl;
+ nextp = newctl->mod_next;
+
+ while (nextp != NULL) {
+ if (nextp->mod_loaded) {
+ /* This is a loaded module. Keep traversing. */
+ prevp = nextp;
+ nextp = nextp->mod_next;
+ continue;
+ }
+ else {
+ /* Found an unloaded module */
+ if (strncmp (newctl->mod_modname, nextp->mod_modname, KMOD_MAX_NAME)) {
+ /* Names don't match. Keep traversing. */
+ prevp = nextp;
+ nextp = nextp->mod_next;
+ continue;
+ }
+ else {
+ /* We found a stale entry, move it. We're done. */
+ prevp->mod_next = nextp->mod_next;
+ newctl->mod_stale = nextp;
+ nextp->mod_next = NULL;
+ break;
+ }
+ }
+ }
+}
- /*
- * If we have any retained enablings, we need to match against them.
- * Enabling probes requires that cpu_lock be held, and we cannot hold
- * cpu_lock here -- it is legal for cpu_lock to be held when loading a
- * module. (In particular, this happens when loading scheduling
- * classes.) So if we have any retained enablings, we need to dispatch
- * our task queue to do the match for us.
- */
+static modctl_t *
+dtrace_modctl_lookup(struct kmod_info * kmod)
+{
+ lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
+
+ struct modctl * ctl;
+
+ for (ctl = dtrace_modctl_list; ctl; ctl=ctl->mod_next) {
+ if (ctl->mod_id == kmod->id)
+ return(ctl);
+ }
+ return (NULL);
+}
+
+/*
+ * This routine is called from dtrace_module_unloaded().
+ * It removes a modctl structure and its stale chain
+ * from the kext shadow list.
+ */
+static void
+dtrace_modctl_remove(struct modctl * ctl)
+{
+ ASSERT(ctl != NULL);
+ lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
+ modctl_t *prevp, *nextp, *curp;
+
+ // Remove stale chain first
+ for (curp=ctl->mod_stale; curp != NULL; curp=nextp) {
+ nextp = curp->mod_stale;
+ /* There should NEVER be user symbols allocated at this point */
+ ASSERT(curp->mod_user_symbols == NULL);
+ kmem_free(curp, sizeof(modctl_t));
+ }
+
+ prevp = NULL;
+ curp = dtrace_modctl_list;
+
+ while (curp != ctl) {
+ prevp = curp;
+ curp = curp->mod_next;
+ }
+
+ if (prevp != NULL) {
+ prevp->mod_next = ctl->mod_next;
+ }
+ else {
+ dtrace_modctl_list = ctl->mod_next;
+ }
+
+ /* There should NEVER be user symbols allocated at this point */
+ ASSERT(ctl->mod_user_symbols == NULL);
+
+ kmem_free (ctl, sizeof(modctl_t));
+}
+
+#endif /* __APPLE__ */
+
+/*
+ * APPLE NOTE: The kext loader will call dtrace_module_loaded
+ * when the kext is loaded in memory, but before calling the
+ * kext's start routine.
+ *
+ * Return 0 on success
+ * Return -1 on failure
+ */
+
+#if !defined (__APPLE__)
+static void
+dtrace_module_loaded(struct modctl *ctl)
+#else
+static int
+dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag)
+#endif /* __APPLE__ */
+{
+ dtrace_provider_t *prv;
+
+#if !defined(__APPLE__)
+ mutex_enter(&dtrace_provider_lock);
+ mutex_enter(&mod_lock);
+
+ ASSERT(ctl->mod_busy);
+#else
+
+ /*
+ * If kernel symbols have been disabled, return immediately
+ * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks
+ */
+ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER)
+ return 0;
+
+ struct modctl *ctl = NULL;
+ if (!kmod || kmod->address == 0 || kmod->size == 0)
+ return(-1);
+
+ lck_mtx_lock(&dtrace_provider_lock);
+ lck_mtx_lock(&mod_lock);
+
+ /*
+ * Have we seen this kext before?
+ */
+
+ ctl = dtrace_modctl_lookup(kmod);
+
+ if (ctl != NULL) {
+ /* bail... we already have this kext in the modctl list */
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_provider_lock);
+ if (dtrace_err_verbose)
+ cmn_err(CE_WARN, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod->name, (uint_t)kmod->id, ctl->mod_modname, ctl->mod_id);
+ return(-1);
+ }
+ else {
+ ctl = kmem_alloc(sizeof(struct modctl), KM_SLEEP);
+ if (ctl == NULL) {
+ if (dtrace_err_verbose)
+ cmn_err(CE_WARN, "dtrace module load '%s %u' is failing ", kmod->name, (uint_t)kmod->id);
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_provider_lock);
+ return (-1);
+ }
+ ctl->mod_next = NULL;
+ ctl->mod_stale = NULL;
+ strlcpy (ctl->mod_modname, kmod->name, sizeof(ctl->mod_modname));
+ ctl->mod_loadcnt = kmod->id;
+ ctl->mod_nenabled = 0;
+ ctl->mod_address = kmod->address;
+ ctl->mod_size = kmod->size;
+ ctl->mod_id = kmod->id;
+ ctl->mod_loaded = 1;
+ ctl->mod_flags = 0;
+ ctl->mod_user_symbols = NULL;
+
+ /*
+ * Find the UUID for this module, if it has one
+ */
+ kernel_mach_header_t* header = (kernel_mach_header_t *)ctl->mod_address;
+ struct load_command* load_cmd = (struct load_command *)&header[1];
+ uint32_t i;
+ for (i = 0; i < header->ncmds; i++) {
+ if (load_cmd->cmd == LC_UUID) {
+ struct uuid_command* uuid_cmd = (struct uuid_command *)load_cmd;
+ memcpy(ctl->mod_uuid, uuid_cmd->uuid, sizeof(uuid_cmd->uuid));
+ ctl->mod_flags |= MODCTL_HAS_UUID;
+ break;
+ }
+ load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize);
+ }
+
+ if (ctl->mod_address == g_kernel_kmod_info.address) {
+ ctl->mod_flags |= MODCTL_IS_MACH_KERNEL;
+ }
+ }
+ dtrace_modctl_add(ctl);
+
+ /*
+ * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s)
+ */
lck_mtx_lock(&dtrace_lock);
+
+ /*
+ * DTrace must decide if it will instrument modules lazily via
+ * userspace symbols (default mode), or instrument immediately via
+ * kernel symbols (non-default mode)
+ *
+ * When in default/lazy mode, DTrace will only support modules
+ * built with a valid UUID.
+ *
+ * Overriding the default can be done explicitly in one of
+ * the following two ways.
+ *
+ * A module can force symbols from kernel space using the plist key,
+ * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set,
+ * we fall through and instrument this module now.
+ *
+ * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols
+ * from kernel space (see dtrace_impl.h). If this system state is set
+ * to a non-userspace mode, we fall through and instrument the module now.
+ */
+ if ((dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) &&
+ (!(flag & KMOD_DTRACE_FORCE_INIT)))
+ {
+ /* We will instrument the module lazily -- this is the default */
+ lck_mtx_unlock(&dtrace_lock);
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_provider_lock);
+ return 0;
+ }
+
+ /* We will instrument the module immediately using kernel symbols */
+ ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS;
+
+ lck_mtx_unlock(&dtrace_lock);
+#endif /* __APPLE__ */
+
+ /*
+ * We're going to call each providers per-module provide operation
+ * specifying only this module.
+ */
+ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
+ prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
+
+#if defined(__APPLE__)
+ /*
+ * The contract with the kext loader is that once this function has completed,
+ * it may delete kernel symbols at will. We must set this while still holding
+ * the mod_lock.
+ */
+ ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS;
+#endif
+
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_provider_lock);
+
+ /*
+ * If we have any retained enablings, we need to match against them.
+ * Enabling probes requires that cpu_lock be held, and we cannot hold
+ * cpu_lock here -- it is legal for cpu_lock to be held when loading a
+ * module. (In particular, this happens when loading scheduling
+ * classes.) So if we have any retained enablings, we need to dispatch
+ * our task queue to do the match for us.
+ */
+ lck_mtx_lock(&dtrace_lock);
+
if (dtrace_retained == NULL) {
lck_mtx_unlock(&dtrace_lock);
+#if !defined(__APPLE__)
return;
+#else
+ return 0;
+#endif
}
-
+
+#if !defined(__APPLE__)
(void) taskq_dispatch(dtrace_taskq,
- (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
-
- lck_mtx_unlock(&dtrace_lock);
-
+ (task_func_t *)dtrace_enabling_matchall, NULL, TQ_SLEEP);
+
+ mutex_exit(&dtrace_lock);
+
/*
* And now, for a little heuristic sleaze: in general, we want to
* match modules as soon as they load. However, we cannot guarantee
* just loaded may not be immediately instrumentable.
*/
delay(1);
+#else
+ /* APPLE NOTE!
+ *
+ * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually
+ * holds it for any reason. Thus the comment above is invalid, we can directly invoke
+ * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid
+ * the delay call as well.
+ */
+ lck_mtx_unlock(&dtrace_lock);
+
+ dtrace_enabling_matchall();
+
+ return 0;
+#endif /* __APPLE__ */
}
-
+
+#if !defined(__APPLE__)
static void
dtrace_module_unloaded(struct modctl *ctl)
{
template.dtpr_mod = ctl->mod_modname;
- lck_mtx_lock(&dtrace_provider_lock);
- lck_mtx_lock(&mod_lock);
- lck_mtx_lock(&dtrace_lock);
+ mutex_enter(&dtrace_provider_lock);
+ mutex_enter(&mod_lock);
+ mutex_enter(&dtrace_lock);
if (dtrace_bymod == NULL) {
/*
* The DTrace module is loaded (obviously) but not attached;
* we don't have any work to do.
*/
- lck_mtx_unlock(&dtrace_provider_lock);
- lck_mtx_unlock(&mod_lock);
- lck_mtx_unlock(&dtrace_lock);
+ mutex_exit(&dtrace_provider_lock);
+ mutex_exit(&mod_lock);
+ mutex_exit(&dtrace_lock);
return;
}
for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
probe != NULL; probe = probe->dtpr_nextmod) {
if (probe->dtpr_ecb != NULL) {
- lck_mtx_unlock(&dtrace_provider_lock);
- lck_mtx_unlock(&mod_lock);
- lck_mtx_unlock(&dtrace_lock);
+ mutex_exit(&dtrace_provider_lock);
+ mutex_exit(&mod_lock);
+ mutex_exit(&dtrace_lock);
/*
* This shouldn't _actually_ be possible -- we're
ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
dtrace_probes[probe->dtpr_id - 1] = NULL;
+ probe->dtpr_provider->probe_count--;
next = probe->dtpr_nextmod;
dtrace_hash_remove(dtrace_bymod, probe);
kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
-#if !defined(__APPLE__)
kmem_free(probe, sizeof (dtrace_probe_t));
-#else
+ }
+
+ mutex_exit(&dtrace_lock);
+ mutex_exit(&mod_lock);
+ mutex_exit(&dtrace_provider_lock);
+}
+#else /* __APPLE__ */
+
+/*
+ * Return 0 on success
+ * Return -1 on failure
+ */
+static int
+dtrace_module_unloaded(struct kmod_info *kmod)
+{
+ dtrace_probe_t template, *probe, *first, *next;
+ dtrace_provider_t *prov;
+ struct modctl *ctl = NULL;
+ struct modctl *syncctl = NULL;
+ struct modctl *nextsyncctl = NULL;
+ int syncmode = 0;
+
+ lck_mtx_lock(&dtrace_provider_lock);
+ lck_mtx_lock(&mod_lock);
+ lck_mtx_lock(&dtrace_lock);
+
+ if (kmod == NULL) {
+ syncmode = 1;
+ }
+ else {
+ ctl = dtrace_modctl_lookup(kmod);
+ if (ctl == NULL)
+ {
+ lck_mtx_unlock(&dtrace_lock);
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_provider_lock);
+ return (-1);
+ }
+ ctl->mod_loaded = 0;
+ ctl->mod_address = 0;
+ ctl->mod_size = 0;
+ }
+
+ if (dtrace_bymod == NULL) {
+ /*
+ * The DTrace module is loaded (obviously) but not attached;
+ * we don't have any work to do.
+ */
+ if (ctl != NULL)
+ (void)dtrace_modctl_remove(ctl);
+ lck_mtx_unlock(&dtrace_provider_lock);
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_lock);
+ return(0);
+ }
+
+ /* Syncmode set means we target and traverse entire modctl list. */
+ if (syncmode)
+ nextsyncctl = dtrace_modctl_list;
+
+syncloop:
+ if (syncmode)
+ {
+ /* find a stale modctl struct */
+ for (syncctl = nextsyncctl; syncctl != NULL; syncctl=syncctl->mod_next) {
+ if (syncctl->mod_address == 0)
+ break;
+ }
+ if (syncctl==NULL)
+ {
+ /* We have no more work to do */
+ lck_mtx_unlock(&dtrace_provider_lock);
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_lock);
+ return(0);
+ }
+ else {
+ /* keep track of next syncctl in case this one is removed */
+ nextsyncctl = syncctl->mod_next;
+ ctl = syncctl;
+ }
+ }
+
+ template.dtpr_mod = ctl->mod_modname;
+
+ for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
+ probe != NULL; probe = probe->dtpr_nextmod) {
+ if (probe->dtpr_ecb != NULL) {
+ /*
+ * This shouldn't _actually_ be possible -- we're
+ * unloading a module that has an enabled probe in it.
+ * (It's normally up to the provider to make sure that
+ * this can't happen.) However, because dtps_enable()
+ * doesn't have a failure mode, there can be an
+ * enable/unload race. Upshot: we don't want to
+ * assert, but we're not going to disable the
+ * probe, either.
+ */
+
+
+ if (syncmode) {
+ /* We're syncing, let's look at next in list */
+ goto syncloop;
+ }
+
+ lck_mtx_unlock(&dtrace_provider_lock);
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_lock);
+
+ if (dtrace_err_verbose) {
+ cmn_err(CE_WARN, "unloaded module '%s' had "
+ "enabled probes", ctl->mod_modname);
+ }
+ return(-1);
+ }
+ }
+
+ probe = first;
+
+ for (first = NULL; probe != NULL; probe = next) {
+ ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
+
+ dtrace_probes[probe->dtpr_id - 1] = NULL;
+ probe->dtpr_provider->probe_count--;
+
+ next = probe->dtpr_nextmod;
+ dtrace_hash_remove(dtrace_bymod, probe);
+ dtrace_hash_remove(dtrace_byfunc, probe);
+ dtrace_hash_remove(dtrace_byname, probe);
+
+ if (first == NULL) {
+ first = probe;
+ probe->dtpr_nextmod = NULL;
+ } else {
+ probe->dtpr_nextmod = first;
+ first = probe;
+ }
+ }
+
+ /*
+ * We've removed all of the module's probes from the hash chains and
+ * from the probe array. Now issue a dtrace_sync() to be sure that
+ * everyone has cleared out from any probe array processing.
+ */
+ dtrace_sync();
+
+ for (probe = first; probe != NULL; probe = first) {
+ first = probe->dtpr_nextmod;
+ prov = probe->dtpr_provider;
+ prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
+ probe->dtpr_arg);
+ kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
+ kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
+ kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
+ vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
+
zfree(dtrace_probe_t_zone, probe);
-#endif /* __APPLE__ */
}
+ dtrace_modctl_remove(ctl);
+
+ if (syncmode)
+ goto syncloop;
+
lck_mtx_unlock(&dtrace_lock);
lck_mtx_unlock(&mod_lock);
lck_mtx_unlock(&dtrace_provider_lock);
+
+ return(0);
}
+#endif /* __APPLE__ */
void
dtrace_suspend(void)
dtrace_provider, NULL, NULL, "END", 0, NULL);
dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
dtrace_provider, NULL, NULL, "ERROR", 1, NULL);
-#elif defined(__ppc__) || defined(__ppc64__)
- dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
- dtrace_provider, NULL, NULL, "BEGIN", 2, NULL);
- dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
- dtrace_provider, NULL, NULL, "END", 1, NULL);
- dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
- dtrace_provider, NULL, NULL, "ERROR", 4, NULL);
-#elif (defined(__i386__) || defined (__x86_64__))
+#elif defined (__x86_64__)
dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
dtrace_provider, NULL, NULL, "BEGIN", 1, NULL);
dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
if (dtrace_anon.dta_enabling != NULL) {
ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
+#if defined(__APPLE__)
+ /*
+ * If there is anonymous dof, we should switch symbol modes.
+ */
+ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
+ dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
+ }
+#endif
+
dtrace_enabling_provide(NULL);
state = dtrace_anon.dta_state;
lck_mtx_unlock(&cpu_lock);
if (state == NULL) {
- if (--dtrace_opens == 0)
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
lck_mtx_unlock(&dtrace_lock);
return (EAGAIN);
lck_mtx_unlock(&cpu_lock);
if (rv != 0 || state == NULL) {
- if (--dtrace_opens == 0)
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
(void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
lck_mtx_unlock(&dtrace_lock);
/* propagate EAGAIN or ERESTART */
}
lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
+
+ /*
+ * Update kernel symbol state.
+ *
+ * We must own the provider and dtrace locks.
+ *
+ * NOTE! It may appear there is a race by setting this value so late
+ * after dtrace_probe_provide. However, any kext loaded after the
+ * call to probe provide and before we set LAZY_OFF will be marked as
+ * eligible for symbols from userspace. The same dtrace that is currently
+ * calling dtrace_open() (this call!) will get a list of kexts needing
+ * symbols and fill them in, thus closing the race window.
+ *
+ * We want to set this value only after it certain it will succeed, as
+ * this significantly reduces the complexity of error exits.
+ */
+ lck_mtx_lock(&dtrace_lock);
+ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
+ dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
+ }
+ lck_mtx_unlock(&dtrace_lock);
#endif /* __APPLE__ */
return (0);
dtrace_state_destroy(state);
ASSERT(dtrace_opens > 0);
- if (--dtrace_opens == 0)
- (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
+ /*
+ * Only relinquish control of the kernel debugger interface when there
+ * are no consumers and no anonymous enablings.
+ */
+ if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
+ (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
+
lck_mtx_unlock(&dtrace_lock);
lck_mtx_unlock(&cpu_lock);
#if defined(__APPLE__)
-
/*
* Lock ordering requires the dof mode lock be taken before
* the dtrace_lock.
*/
lck_rw_lock_exclusive(&dtrace_dof_mode_lock);
lck_mtx_lock(&dtrace_lock);
+
+ if (dtrace_opens == 0) {
+ /*
+ * If we are currently lazy-off, and this is the last close, transition to
+ * lazy state.
+ */
+ if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) {
+ dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
+ }
- /*
- * If we are currently lazy-off, and this is the last close, transition to
- * lazy state.
- */
- if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF && dtrace_opens == 0) {
- dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
+ /*
+ * If we are the last dtrace client, switch back to lazy (from userspace) symbols
+ */
+ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) {
+ dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
+ }
}
-
+
lck_mtx_unlock(&dtrace_lock);
lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
+
+ /*
+ * Kext probes may be retained past the end of the kext's lifespan. The
+ * probes are kept until the last reference to them has been removed.
+ * Since closing an active dtrace context is likely to drop that last reference,
+ * lets take a shot at cleaning out the orphaned probes now.
+ */
+ dtrace_module_unloaded(NULL);
#endif /* __APPLE__ */
return (0);
return KERN_SUCCESS;
switch (cmd) {
- case DTRACEHIOC_ADDDOF: {
+ case DTRACEHIOC_ADDDOF:
+ {
dof_helper_t *dhp = NULL;
size_t dof_ioctl_data_size;
dof_ioctl_data_t* multi_dof;
return (0);
}
- default:
- break;
+ case DTRACEIOC_MODUUIDSLIST: {
+ size_t module_uuids_list_size;
+ dtrace_module_uuids_list_t* uuids_list;
+ uint64_t dtmul_count;
+
+ /*
+ * Fail if the kernel symbol mode makes this operation illegal.
+ * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
+ * for them without holding the dtrace_lock.
+ */
+ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
+ dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
+ cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode);
+ return (EPERM);
+ }
+
+ /*
+ * Read the number of symbolsdesc structs being passed in.
+ */
+ if (copyin(arg + offsetof(dtrace_module_uuids_list_t, dtmul_count),
+ &dtmul_count,
+ sizeof(dtmul_count))) {
+ cmn_err(CE_WARN, "failed to copyin dtmul_count");
+ return (EFAULT);
+ }
+
+ /*
+ * Range check the count. More than 2k kexts is probably an error.
+ */
+ if (dtmul_count > 2048) {
+ cmn_err(CE_WARN, "dtmul_count is not valid");
+ return (EINVAL);
+ }
+
+ /*
+ * For all queries, we return EINVAL when the user specified
+ * count does not match the actual number of modules we find
+ * available.
+ *
+ * If the user specified count is zero, then this serves as a
+ * simple query to count the available modules in need of symbols.
+ */
+
+ rval = 0;
+
+ if (dtmul_count == 0)
+ {
+ lck_mtx_lock(&mod_lock);
+ struct modctl* ctl = dtrace_modctl_list;
+ while (ctl) {
+ ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
+ if (!MOD_SYMBOLS_DONE(ctl)) {
+ dtmul_count++;
+ rval = EINVAL;
+ }
+ ctl = ctl->mod_next;
+ }
+ lck_mtx_unlock(&mod_lock);
+
+ if (copyout(&dtmul_count, arg, sizeof (dtmul_count)) != 0)
+ return (EFAULT);
+ else
+ return (rval);
+ }
+
+ /*
+ * If we reach this point, then we have a request for full list data.
+ * Allocate a correctly sized structure and copyin the data.
+ */
+ module_uuids_list_size = DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count);
+ if ((uuids_list = kmem_alloc(module_uuids_list_size, KM_SLEEP)) == NULL)
+ return (ENOMEM);
+
+ /* NOTE! We can no longer exit this method via return */
+ if (copyin(arg, uuids_list, module_uuids_list_size) != 0) {
+ cmn_err(CE_WARN, "failed copyin of dtrace_module_uuids_list_t");
+ rval = EFAULT;
+ goto moduuidslist_cleanup;
+ }
+
+ /*
+ * Check that the count didn't change between the first copyin and the second.
+ */
+ if (uuids_list->dtmul_count != dtmul_count) {
+ rval = EINVAL;
+ goto moduuidslist_cleanup;
+ }
+
+ /*
+ * Build the list of UUID's that need symbols
+ */
+ lck_mtx_lock(&mod_lock);
+
+ dtmul_count = 0;
+
+ struct modctl* ctl = dtrace_modctl_list;
+ while (ctl) {
+ /*
+ * We assume that userspace symbols will be "better" than kernel level symbols,
+ * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms
+ * are available, add user syms if the module might use them.
+ */
+ ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
+ if (!MOD_SYMBOLS_DONE(ctl)) {
+ UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count];
+ if (dtmul_count++ < uuids_list->dtmul_count) {
+ memcpy(uuid, ctl->mod_uuid, sizeof(UUID));
+ }
+ }
+ ctl = ctl->mod_next;
+ }
+
+ lck_mtx_unlock(&mod_lock);
+
+ if (uuids_list->dtmul_count < dtmul_count)
+ rval = EINVAL;
+
+ uuids_list->dtmul_count = dtmul_count;
+
+ /*
+ * Copyout the symbols list (or at least the count!)
+ */
+ if (copyout(uuids_list, arg, module_uuids_list_size) != 0) {
+ cmn_err(CE_WARN, "failed copyout of dtrace_symbolsdesc_list_t");
+ rval = EFAULT;
+ }
+
+ moduuidslist_cleanup:
+ /*
+ * If we had to allocate struct memory, free it.
+ */
+ if (uuids_list != NULL) {
+ kmem_free(uuids_list, module_uuids_list_size);
+ }
+
+ return rval;
+ }
+
+ case DTRACEIOC_PROVMODSYMS: {
+ size_t module_symbols_size;
+ dtrace_module_symbols_t* module_symbols;
+ uint64_t dtmodsyms_count;
+
+ /*
+ * Fail if the kernel symbol mode makes this operation illegal.
+ * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
+ * for them without holding the dtrace_lock.
+ */
+ if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
+ dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
+ cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode);
+ return (EPERM);
+ }
+
+ /*
+ * Read the number of module symbols structs being passed in.
+ */
+ if (copyin(arg + offsetof(dtrace_module_symbols_t, dtmodsyms_count),
+ &dtmodsyms_count,
+ sizeof(dtmodsyms_count))) {
+ cmn_err(CE_WARN, "failed to copyin dtmodsyms_count");
+ return (EFAULT);
+ }
+
+ /*
+ * Range check the count. How much data can we pass around?
+ * FIX ME!
+ */
+ if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) {
+ cmn_err(CE_WARN, "dtmodsyms_count is not valid");
+ return (EINVAL);
+ }
+
+ /*
+ * Allocate a correctly sized structure and copyin the data.
+ */
+ module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count);
+ if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL)
+ return (ENOMEM);
+
+ rval = 0;
+
+ /* NOTE! We can no longer exit this method via return */
+ if (copyin(arg, module_symbols, module_symbols_size) != 0) {
+ cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t, symbol count %llu", module_symbols->dtmodsyms_count);
+ rval = EFAULT;
+ goto module_symbols_cleanup;
+ }
+
+ /*
+ * Check that the count didn't change between the first copyin and the second.
+ */
+ if (module_symbols->dtmodsyms_count != dtmodsyms_count) {
+ rval = EINVAL;
+ goto module_symbols_cleanup;
+ }
+
+ /*
+ * Find the modctl to add symbols to.
+ */
+ lck_mtx_lock(&dtrace_provider_lock);
+ lck_mtx_lock(&mod_lock);
+
+ struct modctl* ctl = dtrace_modctl_list;
+ while (ctl) {
+ ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
+ if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) {
+ if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) {
+ /* BINGO! */
+ ctl->mod_user_symbols = module_symbols;
+ break;
+ }
+ }
+ ctl = ctl->mod_next;
+ }
+
+ if (ctl) {
+ dtrace_provider_t *prv;
+
+ /*
+ * We're going to call each providers per-module provide operation
+ * specifying only this module.
+ */
+ for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
+ prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
+
+ /*
+ * We gave every provider a chance to provide with the user syms, go ahead and clear them
+ */
+ ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */
+ }
+
+ lck_mtx_unlock(&mod_lock);
+ lck_mtx_unlock(&dtrace_provider_lock);
+
+ module_symbols_cleanup:
+ /*
+ * If we had to allocate struct memory, free it.
+ */
+ if (module_symbols != NULL) {
+ kmem_free(module_symbols, module_symbols_size);
+ }
+
+ return rval;
+ }
+
+ default:
+ break;
}
return (ENOTTY);
dtrace_init( void )
{
if (0 == gDTraceInited) {
- int i, ncpu = NCPU;
+ int i, ncpu;
+ /*
+ * DTrace allocates buffers based on the maximum number
+ * of enabled cpus. This call avoids any race when finding
+ * that count.
+ */
+ ASSERT(dtrace_max_cpus == 0);
+ ncpu = dtrace_max_cpus = ml_get_max_cpus();
+
gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw);
if (gMajDevNo < 0) {
lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr);
lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr);
+ dtrace_modctl_list = NULL;
+
cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP );
for (i = 0; i < ncpu; ++i) {
lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr);
}
- cpu_list = (cpu_t *)kmem_zalloc( ncpu * sizeof(cpu_t), KM_SLEEP );
+ cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP );
for (i = 0; i < ncpu; ++i) {
cpu_list[i].cpu_id = (processorid_t)i;
cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]);
(void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
+ dtrace_isa_init();
+
/*
* See dtrace_impl.h for a description of dof modes.
* The default is lazy dof.
break;
}
+ /*
+ * See dtrace_impl.h for a description of kernel symbol modes.
+ * The default is to wait for symbols from userspace (lazy symbols).
+ */
+ if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) {
+ dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
+ }
+
gDTraceInited = 1;
} else
void
dtrace_postinit(void)
{
- /*
- * Called from bsd_init after all provider's *_init() routines have been
- * run. That way, anonymous DOF enabled under dtrace_attach() is safe
- * to go.
- */
- dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
+ /*
+ * Called from bsd_init after all provider's *_init() routines have been
+ * run. That way, anonymous DOF enabled under dtrace_attach() is safe
+ * to go.
+ */
+ dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
+
+ /*
+ * Add the mach_kernel to the module list for lazy processing
+ */
+ struct kmod_info fake_kernel_kmod;
+ memset(&fake_kernel_kmod, 0, sizeof(fake_kernel_kmod));
+
+ strlcpy(fake_kernel_kmod.name, "mach_kernel", sizeof(fake_kernel_kmod.name));
+ fake_kernel_kmod.id = 1;
+ fake_kernel_kmod.address = g_kernel_kmod_info.address;
+ fake_kernel_kmod.size = g_kernel_kmod_info.size;
+
+ if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) {
+ printf("dtrace_postinit: Could not register mach_kernel modctl\n");
+ }
+
+ (void)OSKextRegisterKextsWithDTrace();
}
#undef DTRACE_MAJOR