4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */
30 * DTrace - Dynamic Tracing for Solaris
32 * This is the implementation of the Solaris Dynamic Tracing framework
33 * (DTrace). The user-visible interface to DTrace is described at length in
34 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
35 * library, the in-kernel DTrace framework, and the DTrace providers are
36 * described in the block comments in the <sys/dtrace.h> header file. The
37 * internal architecture of DTrace is described in the block comments in the
38 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
39 * implementation very much assume mastery of all of these sources; if one has
40 * an unanswered question about the implementation, one should consult them
43 * The functions here are ordered roughly as follows:
45 * - Probe context functions
46 * - Probe hashing functions
47 * - Non-probe context utility functions
48 * - Matching functions
49 * - Provider-to-Framework API functions
50 * - Probe management functions
51 * - DIF object functions
53 * - Predicate functions
56 * - Enabling functions
58 * - Anonymous enabling functions
59 * - Consumer state functions
62 * - Driver cookbook functions
64 * Each group of functions begins with a block comment labelled the "DTrace
65 * [Group] Functions", allowing one to find each block by searching forward
66 * on capital-f functions.
69 #define _DTRACE_WANT_PROC_GLUE_ 1
71 #include <sys/errno.h>
72 #include <sys/types.h>
75 #include <sys/systm.h>
76 #include <sys/dtrace_impl.h>
77 #include <sys/param.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <miscfs/devfs/devfs.h>
81 #include <sys/malloc.h>
82 #include <sys/kernel_types.h>
83 #include <sys/proc_internal.h>
84 #include <sys/uio_internal.h>
85 #include <sys/kauth.h>
88 #include <mach/exception_types.h>
89 #include <sys/signalvar.h>
90 #include <kern/zalloc.h>
92 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
94 extern void dtrace_suspend(void);
95 extern void dtrace_resume(void);
96 extern void dtrace_init(void);
97 extern void helper_init(void);
99 #if defined(__APPLE__)
101 #include "../../../osfmk/chud/chud_dtrace.h"
103 extern kern_return_t chudxnu_dtrace_callback
104 (uint64_t selector
, uint64_t *args
, uint32_t count
);
108 * DTrace Tunable Variables
110 * The following variables may be tuned by adding a line to /etc/system that
111 * includes both the name of the DTrace module ("dtrace") and the name of the
112 * variable. For example:
114 * set dtrace:dtrace_destructive_disallow = 1
116 * In general, the only variables that one should be tuning this way are those
117 * that affect system-wide DTrace behavior, and for which the default behavior
118 * is undesirable. Most of these variables are tunable on a per-consumer
119 * basis using DTrace options, and need not be tuned on a system-wide basis.
120 * When tuning these variables, avoid pathological values; while some attempt
121 * is made to verify the integrity of these variables, they are not considered
122 * part of the supported interface to DTrace, and they are therefore not
123 * checked comprehensively. Further, these variables should not be tuned
124 * dynamically via "mdb -kw" or other means; they should only be tuned via
127 int dtrace_destructive_disallow
= 0;
128 #if defined(__APPLE__)
129 #define proc_t struct proc
130 #endif /* __APPLE__ */
131 dtrace_optval_t dtrace_nonroot_maxsize
= (16 * 1024 * 1024);
132 size_t dtrace_difo_maxsize
= (256 * 1024);
133 dtrace_optval_t dtrace_dof_maxsize
= (256 * 1024);
134 size_t dtrace_global_maxsize
= (16 * 1024);
135 size_t dtrace_actions_max
= (16 * 1024);
136 size_t dtrace_retain_max
= 1024;
137 dtrace_optval_t dtrace_helper_actions_max
= 32;
138 dtrace_optval_t dtrace_helper_providers_max
= 32;
139 dtrace_optval_t dtrace_dstate_defsize
= (1 * 1024 * 1024);
140 size_t dtrace_strsize_default
= 256;
141 dtrace_optval_t dtrace_cleanrate_default
= 9900990; /* 101 hz */
142 dtrace_optval_t dtrace_cleanrate_min
= 200000; /* 5000 hz */
143 dtrace_optval_t dtrace_cleanrate_max
= (uint64_t)60 * NANOSEC
; /* 1/minute */
144 dtrace_optval_t dtrace_aggrate_default
= NANOSEC
; /* 1 hz */
145 dtrace_optval_t dtrace_statusrate_default
= NANOSEC
; /* 1 hz */
146 dtrace_optval_t dtrace_statusrate_max
= (hrtime_t
)10 * NANOSEC
; /* 6/minute */
147 dtrace_optval_t dtrace_switchrate_default
= NANOSEC
; /* 1 hz */
148 dtrace_optval_t dtrace_nspec_default
= 1;
149 dtrace_optval_t dtrace_specsize_default
= 32 * 1024;
150 dtrace_optval_t dtrace_stackframes_default
= 20;
151 dtrace_optval_t dtrace_ustackframes_default
= 20;
152 dtrace_optval_t dtrace_jstackframes_default
= 50;
153 dtrace_optval_t dtrace_jstackstrsize_default
= 512;
154 int dtrace_msgdsize_max
= 128;
155 hrtime_t dtrace_chill_max
= 500 * (NANOSEC
/ MILLISEC
); /* 500 ms */
156 hrtime_t dtrace_chill_interval
= NANOSEC
; /* 1000 ms */
157 int dtrace_devdepth_max
= 32;
158 int dtrace_err_verbose
;
159 hrtime_t dtrace_deadman_interval
= NANOSEC
;
160 hrtime_t dtrace_deadman_timeout
= (hrtime_t
)10 * NANOSEC
;
161 hrtime_t dtrace_deadman_user
= (hrtime_t
)30 * NANOSEC
;
164 * DTrace External Variables
166 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
167 * available to DTrace consumers via the backtick (`) syntax. One of these,
168 * dtrace_zero, is made deliberately so: it is provided as a source of
169 * well-known, zero-filled memory. While this variable is not documented,
170 * it is used by some translators as an implementation detail.
172 const char dtrace_zero
[256] = { 0 }; /* zero-filled memory */
175 * DTrace Internal Variables
177 static dev_info_t
*dtrace_devi
; /* device info */
178 static vmem_t
*dtrace_arena
; /* probe ID arena */
179 static vmem_t
*dtrace_minor
; /* minor number arena */
180 static taskq_t
*dtrace_taskq
; /* task queue */
181 static dtrace_probe_t
**dtrace_probes
; /* array of all probes */
182 static int dtrace_nprobes
; /* number of probes */
183 static dtrace_provider_t
*dtrace_provider
; /* provider list */
184 static dtrace_meta_t
*dtrace_meta_pid
; /* user-land meta provider */
185 static int dtrace_opens
; /* number of opens */
186 static int dtrace_helpers
; /* number of helpers */
187 static void *dtrace_softstate
; /* softstate pointer */
188 static dtrace_hash_t
*dtrace_bymod
; /* probes hashed by module */
189 static dtrace_hash_t
*dtrace_byfunc
; /* probes hashed by function */
190 static dtrace_hash_t
*dtrace_byname
; /* probes hashed by name */
191 static dtrace_toxrange_t
*dtrace_toxrange
; /* toxic range array */
192 static int dtrace_toxranges
; /* number of toxic ranges */
193 static int dtrace_toxranges_max
; /* size of toxic range array */
194 static dtrace_anon_t dtrace_anon
; /* anonymous enabling */
195 static kmem_cache_t
*dtrace_state_cache
; /* cache for dynamic state */
196 static uint64_t dtrace_vtime_references
; /* number of vtimestamp refs */
197 static kthread_t
*dtrace_panicked
; /* panicking thread */
198 static dtrace_ecb_t
*dtrace_ecb_create_cache
; /* cached created ECB */
199 static dtrace_genid_t dtrace_probegen
; /* current probe generation */
200 static dtrace_helpers_t
*dtrace_deferred_pid
; /* deferred helper list */
201 static dtrace_enabling_t
*dtrace_retained
; /* list of retained enablings */
202 static dtrace_dynvar_t dtrace_dynhash_sink
; /* end of dynamic hash chains */
203 #if defined(__APPLE__)
204 static int dtrace_dof_mode
; /* dof mode */
207 #if defined(__APPLE__)
210 * To save memory, some common memory allocations are given a
211 * unique zone. In example, dtrace_probe_t is 72 bytes in size,
212 * which means it would fall into the kalloc.128 bucket. With
213 * 20k elements allocated, the space saved is substantial.
216 struct zone
*dtrace_probe_t_zone
;
222 * DTrace is protected by three (relatively coarse-grained) locks:
224 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
225 * including enabling state, probes, ECBs, consumer state, helper state,
226 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
227 * probe context is lock-free -- synchronization is handled via the
228 * dtrace_sync() cross call mechanism.
230 * (2) dtrace_provider_lock is required when manipulating provider state, or
231 * when provider state must be held constant.
233 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
234 * when meta provider state must be held constant.
236 * The lock ordering between these three locks is dtrace_meta_lock before
237 * dtrace_provider_lock before dtrace_lock. (In particular, there are
238 * several places where dtrace_provider_lock is held by the framework as it
239 * calls into the providers -- which then call back into the framework,
240 * grabbing dtrace_lock.)
242 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
243 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
244 * role as a coarse-grained lock; it is acquired before both of these locks.
245 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
246 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
247 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
248 * acquired _between_ dtrace_provider_lock and dtrace_lock.
254 * All kmutex_t vars have been changed to lck_mtx_t.
255 * Note that lck_mtx_t's require explicit initialization.
257 * mutex_enter() becomes lck_mtx_lock()
258 * mutex_exit() becomes lck_mtx_unlock()
260 * Lock asserts are changed like this:
262 * ASSERT(MUTEX_HELD(&cpu_lock));
264 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
266 * Due to the number of these changes, they are not called out explicitly.
268 static lck_mtx_t dtrace_lock
; /* probe state lock */
269 static lck_mtx_t dtrace_provider_lock
; /* provider state lock */
270 static lck_mtx_t dtrace_meta_lock
; /* meta-provider state lock */
271 #if defined(__APPLE__)
272 static lck_rw_t dtrace_dof_mode_lock
; /* dof mode lock */
276 * DTrace Provider Variables
278 * These are the variables relating to DTrace as a provider (that is, the
279 * provider of the BEGIN, END, and ERROR probes).
281 static dtrace_pattr_t dtrace_provider_attr
= {
282 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
283 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
284 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
285 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
286 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
293 static dtrace_pops_t dtrace_provider_ops
= {
294 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
,
295 (void (*)(void *, struct modctl
*))dtrace_nullop
,
296 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
297 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
298 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
299 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
303 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
306 static dtrace_id_t dtrace_probeid_begin
; /* special BEGIN probe */
307 static dtrace_id_t dtrace_probeid_end
; /* special END probe */
308 dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
311 * DTrace Helper Tracing Variables
313 uint32_t dtrace_helptrace_next
= 0;
314 uint32_t dtrace_helptrace_nlocals
;
315 char *dtrace_helptrace_buffer
;
316 int dtrace_helptrace_bufsize
= 512 * 1024;
319 int dtrace_helptrace_enabled
= 1;
321 int dtrace_helptrace_enabled
= 0;
325 * DTrace Error Hashing
327 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
328 * table. This is very useful for checking coverage of tests that are
329 * expected to induce DIF or DOF processing errors, and may be useful for
330 * debugging problems in the DIF code generator or in DOF generation . The
331 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
334 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
];
335 static const char *dtrace_errlast
;
336 static kthread_t
*dtrace_errthread
;
337 static lck_mtx_t dtrace_errlock
;
341 * DTrace Macros and Constants
343 * These are various macros that are useful in various spots in the
344 * implementation, along with a few random constants that have no meaning
345 * outside of the implementation. There is no real structure to this cpp
346 * mishmash -- but is there ever?
348 #define DTRACE_HASHSTR(hash, probe) \
349 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
351 #define DTRACE_HASHNEXT(hash, probe) \
352 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
354 #define DTRACE_HASHPREV(hash, probe) \
355 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
357 #define DTRACE_HASHEQ(hash, lhs, rhs) \
358 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
359 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
361 #define DTRACE_AGGHASHSIZE_SLEW 17
364 * The key for a thread-local variable consists of the lower 61 bits of the
365 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
366 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
367 * equal to a variable identifier. This is necessary (but not sufficient) to
368 * assure that global associative arrays never collide with thread-local
369 * variables. To guarantee that they cannot collide, we must also define the
370 * order for keying dynamic variables. That order is:
372 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
374 * Because the variable-key and the tls-key are in orthogonal spaces, there is
375 * no way for a global variable key signature to match a thread-local key
378 #if !defined(__APPLE__)
379 #define DTRACE_TLS_THRKEY(where) { \
381 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
382 for (; actv; actv >>= 1) \
384 ASSERT(intr < (1 << 3)); \
385 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
386 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
389 #define DTRACE_TLS_THRKEY(where) { \
390 uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \
391 uint_t thr = (uint_t)current_thread(); \
392 uint_t pid = (uint_t)proc_selfpid(); \
393 ASSERT(intr < (1 << 3)); \
394 (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \
395 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
397 #endif /* __APPLE__ */
399 #define DTRACE_STORE(type, tomax, offset, what) \
400 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
402 #if !defined(__APPLE__)
403 #if !(defined(__i386__) || defined (__x86_64__))
404 #define DTRACE_ALIGNCHECK(addr, size, flags) \
405 if (addr & (size - 1)) { \
406 *flags |= CPU_DTRACE_BADALIGN; \
407 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
411 #define DTRACE_ALIGNCHECK(addr, size, flags)
414 #define DTRACE_LOADFUNC(bits) \
417 dtrace_load##bits(uintptr_t addr) \
419 size_t size = bits / NBBY; \
421 uint##bits##_t rval; \
423 volatile uint16_t *flags = (volatile uint16_t *) \
424 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
426 DTRACE_ALIGNCHECK(addr, size, flags); \
428 for (i = 0; i < dtrace_toxranges; i++) { \
429 if (addr >= dtrace_toxrange[i].dtt_limit) \
432 if (addr + size <= dtrace_toxrange[i].dtt_base) \
436 * This address falls within a toxic region; return 0. \
438 *flags |= CPU_DTRACE_BADADDR; \
439 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
443 *flags |= CPU_DTRACE_NOFAULT; \
445 rval = *((volatile uint##bits##_t *)addr); \
446 *flags &= ~CPU_DTRACE_NOFAULT; \
451 #define DTRACE_ALIGNCHECK(addr, size, flags) \
452 if (addr & (MIN(size,4) - 1)) { \
453 *flags |= CPU_DTRACE_BADALIGN; \
454 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
458 #define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" );
460 #define DTRACE_LOADFUNC(bits) \
462 extern vm_offset_t dtraceLoadRecover##bits; \
463 uint##bits##_t dtrace_load##bits(uintptr_t addr); \
466 dtrace_load##bits(uintptr_t addr) \
468 size_t size = bits / NBBY; \
470 uint##bits##_t rval = 0; \
473 volatile uint16_t *flags = (volatile uint16_t *) \
474 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
476 DTRACE_ALIGNCHECK(addr, size, flags); \
478 for (i = 0; i < dtrace_toxranges; i++) { \
479 if (addr >= dtrace_toxrange[i].dtt_limit) \
482 if (addr + size <= dtrace_toxrange[i].dtt_base) \
486 * This address falls within a toxic region; return 0. \
488 *flags |= CPU_DTRACE_BADADDR; \
489 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
493 pp = pmap_find_phys(kernel_pmap, addr); \
495 if (0 == pp || /* pmap_find_phys failed ? */ \
496 !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \
497 *flags |= CPU_DTRACE_BADADDR; \
498 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
503 volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \
504 *flags |= CPU_DTRACE_NOFAULT; \
505 recover = dtrace_set_thread_recover(current_thread(), recover); \
507 rval = *((volatile uint##bits##_t *)addr); \
508 RECOVER_LABEL(bits); \
509 (void)dtrace_set_thread_recover(current_thread(), recover); \
510 *flags &= ~CPU_DTRACE_NOFAULT; \
515 #endif /* __APPLE__ */
519 #define dtrace_loadptr dtrace_load64
521 #define dtrace_loadptr dtrace_load32
524 #define DTRACE_DYNHASH_FREE 0
525 #define DTRACE_DYNHASH_SINK 1
526 #define DTRACE_DYNHASH_VALID 2
528 #define DTRACE_MATCH_NEXT 0
529 #define DTRACE_MATCH_DONE 1
530 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
531 #define DTRACE_STATE_ALIGN 64
533 #define DTRACE_FLAGS2FLT(flags) \
534 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
535 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
536 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
537 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
538 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
539 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
540 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
541 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
544 #define DTRACEACT_ISSTRING(act) \
545 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
546 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
548 static dtrace_probe_t
*dtrace_probe_lookup_id(dtrace_id_t id
);
549 static void dtrace_enabling_provide(dtrace_provider_t
*);
550 static int dtrace_enabling_match(dtrace_enabling_t
*, int *);
551 static void dtrace_enabling_matchall(void);
552 static dtrace_state_t
*dtrace_anon_grab(void);
553 static uint64_t dtrace_helper(int, dtrace_mstate_t
*,
554 dtrace_state_t
*, uint64_t, uint64_t);
555 static dtrace_helpers_t
*dtrace_helpers_create(proc_t
*);
556 static void dtrace_buffer_drop(dtrace_buffer_t
*);
557 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t
*, size_t, size_t,
558 dtrace_state_t
*, dtrace_mstate_t
*);
559 static int dtrace_state_option(dtrace_state_t
*, dtrace_optid_t
,
561 static int dtrace_ecb_create_enable(dtrace_probe_t
*, void *);
562 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t
*);
565 * DTrace Probe Context Functions
567 * These functions are called from probe context. Because probe context is
568 * any context in which C may be called, arbitrarily locks may be held,
569 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
570 * As a result, functions called from probe context may only call other DTrace
571 * support functions -- they may not interact at all with the system at large.
572 * (Note that the ASSERT macro is made probe-context safe by redefining it in
573 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
574 * loads are to be performed from probe context, they _must_ be in terms of
575 * the safe dtrace_load*() variants.
577 * Some functions in this block are not actually called from probe context;
578 * for these functions, there will be a comment above the function reading
579 * "Note: not called from probe context."
582 dtrace_panic(const char *format
, ...)
586 va_start(alist
, format
);
587 dtrace_vpanic(format
, alist
);
592 dtrace_assfail(const char *a
, const char *f
, int l
)
594 dtrace_panic("assertion failed: %s, file: %s, line: %d", a
, f
, l
);
597 * We just need something here that even the most clever compiler
598 * cannot optimize away.
600 return (a
[(uintptr_t)f
]);
604 * Atomically increment a specified error counter from probe context.
607 dtrace_error(uint32_t *counter
)
610 * Most counters stored to in probe context are per-CPU counters.
611 * However, there are some error conditions that are sufficiently
612 * arcane that they don't merit per-CPU storage. If these counters
613 * are incremented concurrently on different CPUs, scalability will be
614 * adversely affected -- but we don't expect them to be white-hot in a
615 * correctly constructed enabling...
622 if ((nval
= oval
+ 1) == 0) {
624 * If the counter would wrap, set it to 1 -- assuring
625 * that the counter is never zero when we have seen
626 * errors. (The counter must be 32-bits because we
627 * aren't guaranteed a 64-bit compare&swap operation.)
628 * To save this code both the infamy of being fingered
629 * by a priggish news story and the indignity of being
630 * the target of a neo-puritan witch trial, we're
631 * carefully avoiding any colorful description of the
632 * likelihood of this condition -- but suffice it to
633 * say that it is only slightly more likely than the
634 * overflow of predicate cache IDs, as discussed in
635 * dtrace_predicate_create().
639 } while (dtrace_cas32(counter
, oval
, nval
) != oval
);
643 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
644 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
652 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t
*mstate
)
654 if (dest
< mstate
->dtms_scratch_base
)
657 if (dest
+ size
< dest
)
660 if (dest
+ size
> mstate
->dtms_scratch_ptr
)
667 dtrace_canstore_statvar(uint64_t addr
, size_t sz
,
668 dtrace_statvar_t
**svars
, int nsvars
)
672 for (i
= 0; i
< nsvars
; i
++) {
673 dtrace_statvar_t
*svar
= svars
[i
];
675 if (svar
== NULL
|| svar
->dtsv_size
== 0)
678 if (addr
- svar
->dtsv_data
< svar
->dtsv_size
&&
679 addr
+ sz
<= svar
->dtsv_data
+ svar
->dtsv_size
)
687 * Check to see if the address is within a memory region to which a store may
688 * be issued. This includes the DTrace scratch areas, and any DTrace variable
689 * region. The caller of dtrace_canstore() is responsible for performing any
690 * alignment checks that are needed before stores are actually executed.
693 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
694 dtrace_vstate_t
*vstate
)
700 * First, check to see if the address is in scratch space...
702 a
= mstate
->dtms_scratch_base
;
703 s
= mstate
->dtms_scratch_size
;
705 if (addr
- a
< s
&& addr
+ sz
<= a
+ s
)
709 * Now check to see if it's a dynamic variable. This check will pick
710 * up both thread-local variables and any global dynamically-allocated
713 a
= (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
;
714 s
= vstate
->dtvs_dynvars
.dtds_size
;
715 if (addr
- a
< s
&& addr
+ sz
<= a
+ s
)
719 * Finally, check the static local and global variables. These checks
720 * take the longest, so we perform them last.
722 if (dtrace_canstore_statvar(addr
, sz
,
723 vstate
->dtvs_locals
, vstate
->dtvs_nlocals
))
726 if (dtrace_canstore_statvar(addr
, sz
,
727 vstate
->dtvs_globals
, vstate
->dtvs_nglobals
))
734 * Compare two strings using safe loads.
737 dtrace_strncmp(char *s1
, char *s2
, size_t limit
)
740 volatile uint16_t *flags
;
742 if (s1
== s2
|| limit
== 0)
745 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
751 c1
= dtrace_load8((uintptr_t)s1
++);
756 c2
= dtrace_load8((uintptr_t)s2
++);
760 } while (--limit
&& c1
!= '\0' && !(*flags
& CPU_DTRACE_FAULT
));
766 * Compute strlen(s) for a string using safe memory accesses. The additional
767 * len parameter is used to specify a maximum length to ensure completion.
770 dtrace_strlen(const char *s
, size_t lim
)
774 for (len
= 0; len
!= lim
; len
++)
775 if (dtrace_load8((uintptr_t)s
++) == '\0')
782 * Check if an address falls within a toxic region.
785 dtrace_istoxic(uintptr_t kaddr
, size_t size
)
787 uintptr_t taddr
, tsize
;
790 for (i
= 0; i
< dtrace_toxranges
; i
++) {
791 taddr
= dtrace_toxrange
[i
].dtt_base
;
792 tsize
= dtrace_toxrange
[i
].dtt_limit
- taddr
;
794 if (kaddr
- taddr
< tsize
) {
795 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
796 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= kaddr
;
800 if (taddr
- kaddr
< size
) {
801 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
802 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= taddr
;
811 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
812 * memory specified by the DIF program. The dst is assumed to be safe memory
813 * that we can store to directly because it is managed by DTrace. As with
814 * standard bcopy, overlapping copies are handled properly.
817 dtrace_bcopy(const void *src
, void *dst
, size_t len
)
821 const uint8_t *s2
= src
;
825 *s1
++ = dtrace_load8((uintptr_t)s2
++);
826 } while (--len
!= 0);
832 *--s1
= dtrace_load8((uintptr_t)--s2
);
833 } while (--len
!= 0);
839 * Copy src to dst using safe memory accesses, up to either the specified
840 * length, or the point that a nul byte is encountered. The src is assumed to
841 * be unsafe memory specified by the DIF program. The dst is assumed to be
842 * safe memory that we can store to directly because it is managed by DTrace.
843 * Unlike dtrace_bcopy(), overlapping regions are not handled.
846 dtrace_strcpy(const void *src
, void *dst
, size_t len
)
849 uint8_t *s1
= dst
, c
;
850 const uint8_t *s2
= src
;
853 *s1
++ = c
= dtrace_load8((uintptr_t)s2
++);
854 } while (--len
!= 0 && c
!= '\0');
859 * Copy src to dst, deriving the size and type from the specified (BYREF)
860 * variable type. The src is assumed to be unsafe memory specified by the DIF
861 * program. The dst is assumed to be DTrace variable memory that is of the
862 * specified type; we assume that we can store to directly.
865 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t
*type
)
867 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
869 if (type
->dtdt_kind
== DIF_TYPE_STRING
)
870 dtrace_strcpy(src
, dst
, type
->dtdt_size
);
872 dtrace_bcopy(src
, dst
, type
->dtdt_size
);
876 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
877 * unsafe memory specified by the DIF program. The s2 data is assumed to be
878 * safe memory that we can access directly because it is managed by DTrace.
881 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
)
883 volatile uint16_t *flags
;
885 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
890 if (s1
== NULL
|| s2
== NULL
)
893 if (s1
!= s2
&& len
!= 0) {
894 const uint8_t *ps1
= s1
;
895 const uint8_t *ps2
= s2
;
898 if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++)
900 } while (--len
!= 0 && !(*flags
& CPU_DTRACE_FAULT
));
906 * Zero the specified region using a simple byte-by-byte loop. Note that this
907 * is for safe DTrace-managed memory only.
910 dtrace_bzero(void *dst
, size_t len
)
914 for (cp
= dst
; len
!= 0; len
--)
919 * This privilege check should be used by actions and subroutines to
920 * verify that the user credentials of the process that enabled the
921 * invoking ECB match the target credentials
924 dtrace_priv_proc_common_user(dtrace_state_t
*state
)
926 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
929 * We should always have a non-NULL state cred here, since if cred
930 * is null (anonymous tracing), we fast-path bypass this routine.
932 ASSERT(s_cr
!= NULL
);
934 #if !defined(__APPLE__)
935 if ((cr
= CRED()) != NULL
&&
937 if ((cr
= dtrace_CRED()) != NULL
&&
938 #endif /* __APPLE__ */
939 s_cr
->cr_uid
== cr
->cr_uid
&&
940 s_cr
->cr_uid
== cr
->cr_ruid
&&
941 s_cr
->cr_uid
== cr
->cr_suid
&&
942 s_cr
->cr_gid
== cr
->cr_gid
&&
943 s_cr
->cr_gid
== cr
->cr_rgid
&&
944 s_cr
->cr_gid
== cr
->cr_sgid
)
951 * This privilege check should be used by actions and subroutines to
952 * verify that the zone of the process that enabled the invoking ECB
953 * matches the target credentials
956 dtrace_priv_proc_common_zone(dtrace_state_t
*state
)
958 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
961 * We should always have a non-NULL state cred here, since if cred
962 * is null (anonymous tracing), we fast-path bypass this routine.
964 ASSERT(s_cr
!= NULL
);
966 #if !defined(__APPLE__)
967 if ((cr
= CRED()) != NULL
&&
968 s_cr
->cr_zone
== cr
->cr_zone
)
973 #pragma unused(state)
975 return 1; /* Darwin doesn't do zones. */
976 #endif /* __APPLE__ */
980 * This privilege check should be used by actions and subroutines to
981 * verify that the process has not setuid or changed credentials.
983 #if !defined(__APPLE__)
985 dtrace_priv_proc_common_nocd()
989 if ((proc
= ttoproc(curthread
)) != NULL
&&
990 !(proc
->p_flag
& SNOCD
))
997 dtrace_priv_proc_common_nocd(void)
999 return 1; /* Darwin omits "No Core Dump" flag. */
1001 #endif /* __APPLE__ */
1004 dtrace_priv_proc_destructive(dtrace_state_t
*state
)
1006 int action
= state
->dts_cred
.dcr_action
;
1008 #if defined(__APPLE__)
1009 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1011 #endif /* __APPLE__ */
1013 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) &&
1014 dtrace_priv_proc_common_zone(state
) == 0)
1017 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) &&
1018 dtrace_priv_proc_common_user(state
) == 0)
1021 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) &&
1022 dtrace_priv_proc_common_nocd() == 0)
1028 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1034 dtrace_priv_proc_control(dtrace_state_t
*state
)
1036 #if defined(__APPLE__)
1037 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1039 #endif /* __APPLE__ */
1041 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC_CONTROL
)
1044 if (dtrace_priv_proc_common_zone(state
) &&
1045 dtrace_priv_proc_common_user(state
) &&
1046 dtrace_priv_proc_common_nocd())
1049 #if defined(__APPLE__)
1051 #endif /* __APPLE__ */
1052 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1058 dtrace_priv_proc(dtrace_state_t
*state
)
1060 #if defined(__APPLE__)
1061 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1063 #endif /* __APPLE__ */
1065 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1068 #if defined(__APPLE__)
1070 #endif /* __APPLE__ */
1071 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1076 #if defined(__APPLE__)
1077 /* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */
1079 dtrace_priv_proc_relaxed(dtrace_state_t
*state
)
1082 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1085 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1089 #endif /* __APPLE__ */
1092 dtrace_priv_kernel(dtrace_state_t
*state
)
1094 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL
)
1097 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1103 dtrace_priv_kernel_destructive(dtrace_state_t
*state
)
1105 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL_DESTRUCTIVE
)
1108 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1114 * Note: not called from probe context. This function is called
1115 * asynchronously (and at a regular interval) from outside of probe context to
1116 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1117 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1119 #if defined(__APPLE__)
1121 #endif /* __APPLE__ */
1123 dtrace_dynvar_clean(dtrace_dstate_t
*dstate
)
1125 dtrace_dynvar_t
*dirty
;
1126 dtrace_dstate_percpu_t
*dcpu
;
1129 for (i
= 0; i
< (int)NCPU
; i
++) {
1130 dcpu
= &dstate
->dtds_percpu
[i
];
1132 ASSERT(dcpu
->dtdsc_rinsing
== NULL
);
1135 * If the dirty list is NULL, there is no dirty work to do.
1137 if (dcpu
->dtdsc_dirty
== NULL
)
1141 * If the clean list is non-NULL, then we're not going to do
1142 * any work for this CPU -- it means that there has not been
1143 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1144 * since the last time we cleaned house.
1146 if (dcpu
->dtdsc_clean
!= NULL
)
1152 * Atomically move the dirty list aside.
1155 dirty
= dcpu
->dtdsc_dirty
;
1158 * Before we zap the dirty list, set the rinsing list.
1159 * (This allows for a potential assertion in
1160 * dtrace_dynvar(): if a free dynamic variable appears
1161 * on a hash chain, either the dirty list or the
1162 * rinsing list for some CPU must be non-NULL.)
1164 dcpu
->dtdsc_rinsing
= dirty
;
1165 dtrace_membar_producer();
1166 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
,
1167 dirty
, NULL
) != dirty
);
1172 * We have no work to do; we can simply return.
1179 for (i
= 0; i
< (int)NCPU
; i
++) {
1180 dcpu
= &dstate
->dtds_percpu
[i
];
1182 if (dcpu
->dtdsc_rinsing
== NULL
)
1186 * We are now guaranteed that no hash chain contains a pointer
1187 * into this dirty list; we can make it clean.
1189 ASSERT(dcpu
->dtdsc_clean
== NULL
);
1190 dcpu
->dtdsc_clean
= dcpu
->dtdsc_rinsing
;
1191 dcpu
->dtdsc_rinsing
= NULL
;
1195 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1196 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1197 * This prevents a race whereby a CPU incorrectly decides that
1198 * the state should be something other than DTRACE_DSTATE_CLEAN
1199 * after dtrace_dynvar_clean() has completed.
1203 dstate
->dtds_state
= DTRACE_DSTATE_CLEAN
;
1207 * Depending on the value of the op parameter, this function looks-up,
1208 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1209 * allocation is requested, this function will return a pointer to a
1210 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1211 * variable can be allocated. If NULL is returned, the appropriate counter
1212 * will be incremented.
1214 #if defined(__APPLE__)
1216 #endif /* __APPLE__ */
1218 dtrace_dynvar(dtrace_dstate_t
*dstate
, uint_t nkeys
,
1219 dtrace_key_t
*key
, size_t dsize
, dtrace_dynvar_op_t op
)
1221 uint64_t hashval
= DTRACE_DYNHASH_VALID
;
1222 dtrace_dynhash_t
*hash
= dstate
->dtds_hash
;
1223 dtrace_dynvar_t
*free
, *new_free
, *next
, *dvar
, *start
, *prev
= NULL
;
1224 processorid_t me
= CPU
->cpu_id
, cpu
= me
;
1225 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[me
];
1226 size_t bucket
, ksize
;
1227 size_t chunksize
= dstate
->dtds_chunksize
;
1228 uintptr_t kdata
, lock
, nstate
;
1234 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1235 * algorithm. For the by-value portions, we perform the algorithm in
1236 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1237 * bit, and seems to have only a minute effect on distribution. For
1238 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1239 * over each referenced byte. It's painful to do this, but it's much
1240 * better than pathological hash distribution. The efficacy of the
1241 * hashing algorithm (and a comparison with other algorithms) may be
1242 * found by running the ::dtrace_dynstat MDB dcmd.
1244 for (i
= 0; i
< nkeys
; i
++) {
1245 if (key
[i
].dttk_size
== 0) {
1246 uint64_t val
= key
[i
].dttk_value
;
1248 hashval
+= (val
>> 48) & 0xffff;
1249 hashval
+= (hashval
<< 10);
1250 hashval
^= (hashval
>> 6);
1252 hashval
+= (val
>> 32) & 0xffff;
1253 hashval
+= (hashval
<< 10);
1254 hashval
^= (hashval
>> 6);
1256 hashval
+= (val
>> 16) & 0xffff;
1257 hashval
+= (hashval
<< 10);
1258 hashval
^= (hashval
>> 6);
1260 hashval
+= val
& 0xffff;
1261 hashval
+= (hashval
<< 10);
1262 hashval
^= (hashval
>> 6);
1265 * This is incredibly painful, but it beats the hell
1266 * out of the alternative.
1268 uint64_t j
, size
= key
[i
].dttk_size
;
1269 uintptr_t base
= (uintptr_t)key
[i
].dttk_value
;
1271 for (j
= 0; j
< size
; j
++) {
1272 hashval
+= dtrace_load8(base
+ j
);
1273 hashval
+= (hashval
<< 10);
1274 hashval
^= (hashval
>> 6);
1279 hashval
+= (hashval
<< 3);
1280 hashval
^= (hashval
>> 11);
1281 hashval
+= (hashval
<< 15);
1284 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1285 * comes out to be one of our two sentinel hash values. If this
1286 * actually happens, we set the hashval to be a value known to be a
1287 * non-sentinel value.
1289 if (hashval
== DTRACE_DYNHASH_FREE
|| hashval
== DTRACE_DYNHASH_SINK
)
1290 hashval
= DTRACE_DYNHASH_VALID
;
1293 * Yes, it's painful to do a divide here. If the cycle count becomes
1294 * important here, tricks can be pulled to reduce it. (However, it's
1295 * critical that hash collisions be kept to an absolute minimum;
1296 * they're much more painful than a divide.) It's better to have a
1297 * solution that generates few collisions and still keeps things
1298 * relatively simple.
1300 bucket
= hashval
% dstate
->dtds_hashsize
;
1302 if (op
== DTRACE_DYNVAR_DEALLOC
) {
1303 volatile uintptr_t *lockp
= &hash
[bucket
].dtdh_lock
;
1306 while ((lock
= *lockp
) & 1)
1309 if (dtrace_casptr((void *)lockp
,
1310 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1314 dtrace_membar_producer();
1319 lock
= hash
[bucket
].dtdh_lock
;
1321 dtrace_membar_consumer();
1323 start
= hash
[bucket
].dtdh_chain
;
1324 ASSERT(start
!= NULL
&& (start
->dtdv_hashval
== DTRACE_DYNHASH_SINK
||
1325 start
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
||
1326 op
!= DTRACE_DYNVAR_DEALLOC
));
1328 for (dvar
= start
; dvar
!= NULL
; dvar
= dvar
->dtdv_next
) {
1329 dtrace_tuple_t
*dtuple
= &dvar
->dtdv_tuple
;
1330 dtrace_key_t
*dkey
= &dtuple
->dtt_key
[0];
1332 if (dvar
->dtdv_hashval
!= hashval
) {
1333 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_SINK
) {
1335 * We've reached the sink, and therefore the
1336 * end of the hash chain; we can kick out of
1337 * the loop knowing that we have seen a valid
1338 * snapshot of state.
1340 ASSERT(dvar
->dtdv_next
== NULL
);
1341 ASSERT(dvar
== &dtrace_dynhash_sink
);
1345 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
) {
1347 * We've gone off the rails: somewhere along
1348 * the line, one of the members of this hash
1349 * chain was deleted. Note that we could also
1350 * detect this by simply letting this loop run
1351 * to completion, as we would eventually hit
1352 * the end of the dirty list. However, we
1353 * want to avoid running the length of the
1354 * dirty list unnecessarily (it might be quite
1355 * long), so we catch this as early as
1356 * possible by detecting the hash marker. In
1357 * this case, we simply set dvar to NULL and
1358 * break; the conditional after the loop will
1359 * send us back to top.
1368 if (dtuple
->dtt_nkeys
!= nkeys
)
1371 for (i
= 0; i
< nkeys
; i
++, dkey
++) {
1372 if (dkey
->dttk_size
!= key
[i
].dttk_size
)
1373 goto next
; /* size or type mismatch */
1375 if (dkey
->dttk_size
!= 0) {
1377 (void *)(uintptr_t)key
[i
].dttk_value
,
1378 (void *)(uintptr_t)dkey
->dttk_value
,
1382 if (dkey
->dttk_value
!= key
[i
].dttk_value
)
1387 if (op
!= DTRACE_DYNVAR_DEALLOC
)
1390 ASSERT(dvar
->dtdv_next
== NULL
||
1391 dvar
->dtdv_next
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
);
1394 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1395 ASSERT(start
!= dvar
);
1396 ASSERT(prev
->dtdv_next
== dvar
);
1397 prev
->dtdv_next
= dvar
->dtdv_next
;
1399 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
,
1400 start
, dvar
->dtdv_next
) != start
) {
1402 * We have failed to atomically swing the
1403 * hash table head pointer, presumably because
1404 * of a conflicting allocation on another CPU.
1405 * We need to reread the hash chain and try
1412 dtrace_membar_producer();
1415 * Now set the hash value to indicate that it's free.
1417 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1418 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1420 dtrace_membar_producer();
1423 * Set the next pointer to point at the dirty list, and
1424 * atomically swing the dirty pointer to the newly freed dvar.
1427 next
= dcpu
->dtdsc_dirty
;
1428 dvar
->dtdv_next
= next
;
1429 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
);
1432 * Finally, unlock this hash bucket.
1434 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1436 hash
[bucket
].dtdh_lock
++;
1446 * If dvar is NULL, it is because we went off the rails:
1447 * one of the elements that we traversed in the hash chain
1448 * was deleted while we were traversing it. In this case,
1449 * we assert that we aren't doing a dealloc (deallocs lock
1450 * the hash bucket to prevent themselves from racing with
1451 * one another), and retry the hash chain traversal.
1453 ASSERT(op
!= DTRACE_DYNVAR_DEALLOC
);
1457 if (op
!= DTRACE_DYNVAR_ALLOC
) {
1459 * If we are not to allocate a new variable, we want to
1460 * return NULL now. Before we return, check that the value
1461 * of the lock word hasn't changed. If it has, we may have
1462 * seen an inconsistent snapshot.
1464 if (op
== DTRACE_DYNVAR_NOALLOC
) {
1465 if (hash
[bucket
].dtdh_lock
!= lock
)
1468 ASSERT(op
== DTRACE_DYNVAR_DEALLOC
);
1469 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1471 hash
[bucket
].dtdh_lock
++;
1478 * We need to allocate a new dynamic variable. The size we need is the
1479 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1480 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1481 * the size of any referred-to data (dsize). We then round the final
1482 * size up to the chunksize for allocation.
1484 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
1485 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
1488 * This should be pretty much impossible, but could happen if, say,
1489 * strange DIF specified the tuple. Ideally, this should be an
1490 * assertion and not an error condition -- but that requires that the
1491 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1492 * bullet-proof. (That is, it must not be able to be fooled by
1493 * malicious DIF.) Given the lack of backwards branches in DIF,
1494 * solving this would presumably not amount to solving the Halting
1495 * Problem -- but it still seems awfully hard.
1497 if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys
- 1) +
1498 ksize
+ dsize
> chunksize
) {
1499 dcpu
->dtdsc_drops
++;
1503 nstate
= DTRACE_DSTATE_EMPTY
;
1507 free
= dcpu
->dtdsc_free
;
1510 dtrace_dynvar_t
*clean
= dcpu
->dtdsc_clean
;
1513 if (clean
== NULL
) {
1515 * We're out of dynamic variable space on
1516 * this CPU. Unless we have tried all CPUs,
1517 * we'll try to allocate from a different
1520 switch (dstate
->dtds_state
) {
1521 case DTRACE_DSTATE_CLEAN
: {
1522 void *sp
= &dstate
->dtds_state
;
1524 if (++cpu
>= (int)NCPU
)
1527 if (dcpu
->dtdsc_dirty
!= NULL
&&
1528 nstate
== DTRACE_DSTATE_EMPTY
)
1529 nstate
= DTRACE_DSTATE_DIRTY
;
1531 if (dcpu
->dtdsc_rinsing
!= NULL
)
1532 nstate
= DTRACE_DSTATE_RINSING
;
1534 dcpu
= &dstate
->dtds_percpu
[cpu
];
1539 (void) dtrace_cas32(sp
,
1540 DTRACE_DSTATE_CLEAN
, nstate
);
1543 * To increment the correct bean
1544 * counter, take another lap.
1549 case DTRACE_DSTATE_DIRTY
:
1550 dcpu
->dtdsc_dirty_drops
++;
1553 case DTRACE_DSTATE_RINSING
:
1554 dcpu
->dtdsc_rinsing_drops
++;
1557 case DTRACE_DSTATE_EMPTY
:
1558 dcpu
->dtdsc_drops
++;
1562 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
);
1567 * The clean list appears to be non-empty. We want to
1568 * move the clean list to the free list; we start by
1569 * moving the clean pointer aside.
1571 if (dtrace_casptr(&dcpu
->dtdsc_clean
,
1572 clean
, NULL
) != clean
) {
1574 * We are in one of two situations:
1576 * (a) The clean list was switched to the
1577 * free list by another CPU.
1579 * (b) The clean list was added to by the
1582 * In either of these situations, we can
1583 * just reattempt the free list allocation.
1588 ASSERT(clean
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1591 * Now we'll move the clean list to the free list.
1592 * It's impossible for this to fail: the only way
1593 * the free list can be updated is through this
1594 * code path, and only one CPU can own the clean list.
1595 * Thus, it would only be possible for this to fail if
1596 * this code were racing with dtrace_dynvar_clean().
1597 * (That is, if dtrace_dynvar_clean() updated the clean
1598 * list, and we ended up racing to update the free
1599 * list.) This race is prevented by the dtrace_sync()
1600 * in dtrace_dynvar_clean() -- which flushes the
1601 * owners of the clean lists out before resetting
1604 rval
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
);
1605 ASSERT(rval
== NULL
);
1610 new_free
= dvar
->dtdv_next
;
1611 } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
);
1614 * We have now allocated a new chunk. We copy the tuple keys into the
1615 * tuple array and copy any referenced key data into the data space
1616 * following the tuple array. As we do this, we relocate dttk_value
1617 * in the final tuple to point to the key data address in the chunk.
1619 kdata
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
];
1620 dvar
->dtdv_data
= (void *)(kdata
+ ksize
);
1621 dvar
->dtdv_tuple
.dtt_nkeys
= nkeys
;
1623 for (i
= 0; i
< nkeys
; i
++) {
1624 dtrace_key_t
*dkey
= &dvar
->dtdv_tuple
.dtt_key
[i
];
1625 size_t kesize
= key
[i
].dttk_size
;
1629 (const void *)(uintptr_t)key
[i
].dttk_value
,
1630 (void *)kdata
, kesize
);
1631 dkey
->dttk_value
= kdata
;
1632 kdata
+= P2ROUNDUP(kesize
, sizeof (uint64_t));
1634 dkey
->dttk_value
= key
[i
].dttk_value
;
1637 dkey
->dttk_size
= kesize
;
1640 ASSERT(dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1641 dvar
->dtdv_hashval
= hashval
;
1642 dvar
->dtdv_next
= start
;
1644 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
)
1648 * The cas has failed. Either another CPU is adding an element to
1649 * this hash chain, or another CPU is deleting an element from this
1650 * hash chain. The simplest way to deal with both of these cases
1651 * (though not necessarily the most efficient) is to free our
1652 * allocated block and tail-call ourselves. Note that the free is
1653 * to the dirty list and _not_ to the free list. This is to prevent
1654 * races with allocators, above.
1656 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1658 dtrace_membar_producer();
1661 free
= dcpu
->dtdsc_dirty
;
1662 dvar
->dtdv_next
= free
;
1663 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
);
1665 return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
));
1670 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1679 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1687 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
)
1689 int i
, zero
= DTRACE_QUANTIZE_ZEROBUCKET
;
1690 int64_t val
= (int64_t)nval
;
1693 for (i
= 0; i
< zero
; i
++) {
1694 if (val
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1700 for (i
= zero
+ 1; i
< DTRACE_QUANTIZE_NBUCKETS
; i
++) {
1701 if (val
< DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1702 quanta
[i
- 1] += incr
;
1707 quanta
[DTRACE_QUANTIZE_NBUCKETS
- 1] += incr
;
1715 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
)
1717 uint64_t arg
= *lquanta
++;
1718 int32_t base
= DTRACE_LQUANTIZE_BASE(arg
);
1719 uint16_t step
= DTRACE_LQUANTIZE_STEP(arg
);
1720 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(arg
);
1721 int32_t val
= (int32_t)nval
, level
;
1724 ASSERT(levels
!= 0);
1728 * This is an underflow.
1734 level
= (val
- base
) / step
;
1736 if (level
< levels
) {
1737 lquanta
[level
+ 1] += incr
;
1742 * This is an overflow.
1744 lquanta
[levels
+ 1] += incr
;
1749 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
)
1758 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1760 #pragma unused(nval,arg)
1766 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1773 * Aggregate given the tuple in the principal data buffer, and the aggregating
1774 * action denoted by the specified dtrace_aggregation_t. The aggregation
1775 * buffer is specified as the buf parameter. This routine does not return
1776 * failure; if there is no space in the aggregation buffer, the data will be
1777 * dropped, and a corresponding counter incremented.
1780 dtrace_aggregate(dtrace_aggregation_t
*agg
, dtrace_buffer_t
*dbuf
,
1781 intptr_t offset
, dtrace_buffer_t
*buf
, uint64_t expr
, uint64_t arg
)
1784 dtrace_recdesc_t
*rec
= &agg
->dtag_action
.dta_rec
;
1785 uint32_t i
, ndx
, size
, fsize
;
1786 uint32_t align
= sizeof (uint64_t) - 1;
1787 dtrace_aggbuffer_t
*agb
;
1788 dtrace_aggkey_t
*key
;
1789 uint32_t hashval
= 0, limit
, isstr
;
1790 caddr_t tomax
, data
, kdata
;
1791 dtrace_actkind_t action
;
1792 dtrace_action_t
*act
;
1798 if (!agg
->dtag_hasarg
) {
1800 * Currently, only quantize() and lquantize() take additional
1801 * arguments, and they have the same semantics: an increment
1802 * value that defaults to 1 when not present. If additional
1803 * aggregating actions take arguments, the setting of the
1804 * default argument value will presumably have to become more
1810 action
= agg
->dtag_action
.dta_kind
- DTRACEACT_AGGREGATION
;
1811 size
= rec
->dtrd_offset
- agg
->dtag_base
;
1812 fsize
= size
+ rec
->dtrd_size
;
1814 ASSERT(dbuf
->dtb_tomax
!= NULL
);
1815 data
= dbuf
->dtb_tomax
+ offset
+ agg
->dtag_base
;
1817 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
1818 dtrace_buffer_drop(buf
);
1823 * The metastructure is always at the bottom of the buffer.
1825 agb
= (dtrace_aggbuffer_t
*)(tomax
+ buf
->dtb_size
-
1826 sizeof (dtrace_aggbuffer_t
));
1828 if (buf
->dtb_offset
== 0) {
1830 * We just kludge up approximately 1/8th of the size to be
1831 * buckets. If this guess ends up being routinely
1832 * off-the-mark, we may need to dynamically readjust this
1833 * based on past performance.
1835 uintptr_t hashsize
= (buf
->dtb_size
>> 3) / sizeof (uintptr_t);
1837 if ((uintptr_t)agb
- hashsize
* sizeof (dtrace_aggkey_t
*) <
1838 (uintptr_t)tomax
|| hashsize
== 0) {
1840 * We've been given a ludicrously small buffer;
1841 * increment our drop count and leave.
1843 dtrace_buffer_drop(buf
);
1848 * And now, a pathetic attempt to try to get a an odd (or
1849 * perchance, a prime) hash size for better hash distribution.
1851 if (hashsize
> (DTRACE_AGGHASHSIZE_SLEW
<< 3))
1852 hashsize
-= DTRACE_AGGHASHSIZE_SLEW
;
1854 agb
->dtagb_hashsize
= hashsize
;
1855 agb
->dtagb_hash
= (dtrace_aggkey_t
**)((uintptr_t)agb
-
1856 agb
->dtagb_hashsize
* sizeof (dtrace_aggkey_t
*));
1857 agb
->dtagb_free
= (uintptr_t)agb
->dtagb_hash
;
1859 for (i
= 0; i
< agb
->dtagb_hashsize
; i
++)
1860 agb
->dtagb_hash
[i
] = NULL
;
1863 ASSERT(agg
->dtag_first
!= NULL
);
1864 ASSERT(agg
->dtag_first
->dta_intuple
);
1867 * Calculate the hash value based on the key. Note that we _don't_
1868 * include the aggid in the hashing (but we will store it as part of
1869 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
1870 * algorithm: a simple, quick algorithm that has no known funnels, and
1871 * gets good distribution in practice. The efficacy of the hashing
1872 * algorithm (and a comparison with other algorithms) may be found by
1873 * running the ::dtrace_aggstat MDB dcmd.
1875 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
1876 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1877 limit
= i
+ act
->dta_rec
.dtrd_size
;
1878 ASSERT(limit
<= size
);
1879 isstr
= DTRACEACT_ISSTRING(act
);
1881 for (; i
< limit
; i
++) {
1883 hashval
+= (hashval
<< 10);
1884 hashval
^= (hashval
>> 6);
1886 if (isstr
&& data
[i
] == '\0')
1891 hashval
+= (hashval
<< 3);
1892 hashval
^= (hashval
>> 11);
1893 hashval
+= (hashval
<< 15);
1896 * Yes, the divide here is expensive -- but it's generally the least
1897 * of the performance issues given the amount of data that we iterate
1898 * over to compute hash values, compare data, etc.
1900 ndx
= hashval
% agb
->dtagb_hashsize
;
1902 for (key
= agb
->dtagb_hash
[ndx
]; key
!= NULL
; key
= key
->dtak_next
) {
1903 ASSERT((caddr_t
)key
>= tomax
);
1904 ASSERT((caddr_t
)key
< tomax
+ buf
->dtb_size
);
1906 if (hashval
!= key
->dtak_hashval
|| key
->dtak_size
!= size
)
1909 kdata
= key
->dtak_data
;
1910 ASSERT(kdata
>= tomax
&& kdata
< tomax
+ buf
->dtb_size
);
1912 for (act
= agg
->dtag_first
; act
->dta_intuple
;
1913 act
= act
->dta_next
) {
1914 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1915 limit
= i
+ act
->dta_rec
.dtrd_size
;
1916 ASSERT(limit
<= size
);
1917 isstr
= DTRACEACT_ISSTRING(act
);
1919 for (; i
< limit
; i
++) {
1920 if (kdata
[i
] != data
[i
])
1923 if (isstr
&& data
[i
] == '\0')
1928 if (action
!= key
->dtak_action
) {
1930 * We are aggregating on the same value in the same
1931 * aggregation with two different aggregating actions.
1932 * (This should have been picked up in the compiler,
1933 * so we may be dealing with errant or devious DIF.)
1934 * This is an error condition; we indicate as much,
1937 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
1942 * This is a hit: we need to apply the aggregator to
1943 * the value at this key.
1945 agg
->dtag_aggregate((uint64_t *)(kdata
+ size
), expr
, arg
);
1952 * We didn't find it. We need to allocate some zero-filled space,
1953 * link it into the hash table appropriately, and apply the aggregator
1954 * to the (zero-filled) value.
1956 offs
= buf
->dtb_offset
;
1957 while (offs
& (align
- 1))
1958 offs
+= sizeof (uint32_t);
1961 * If we don't have enough room to both allocate a new key _and_
1962 * its associated data, increment the drop count and return.
1964 if ((uintptr_t)tomax
+ offs
+ fsize
>
1965 agb
->dtagb_free
- sizeof (dtrace_aggkey_t
)) {
1966 dtrace_buffer_drop(buf
);
1971 ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1)));
1972 key
= (dtrace_aggkey_t
*)(agb
->dtagb_free
- sizeof (dtrace_aggkey_t
));
1973 agb
->dtagb_free
-= sizeof (dtrace_aggkey_t
);
1975 key
->dtak_data
= kdata
= tomax
+ offs
;
1976 buf
->dtb_offset
= offs
+ fsize
;
1979 * Now copy the data across.
1981 *((dtrace_aggid_t
*)kdata
) = agg
->dtag_id
;
1983 for (i
= sizeof (dtrace_aggid_t
); i
< size
; i
++)
1987 * Because strings are not zeroed out by default, we need to iterate
1988 * looking for actions that store strings, and we need to explicitly
1989 * pad these strings out with zeroes.
1991 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
1994 if (!DTRACEACT_ISSTRING(act
))
1997 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1998 limit
= i
+ act
->dta_rec
.dtrd_size
;
1999 ASSERT(limit
<= size
);
2001 for (nul
= 0; i
< limit
; i
++) {
2007 if (data
[i
] != '\0')
2014 for (i
= size
; i
< fsize
; i
++)
2017 key
->dtak_hashval
= hashval
;
2018 key
->dtak_size
= size
;
2019 key
->dtak_action
= action
;
2020 key
->dtak_next
= agb
->dtagb_hash
[ndx
];
2021 agb
->dtagb_hash
[ndx
] = key
;
2024 * Finally, apply the aggregator.
2026 *((uint64_t *)(key
->dtak_data
+ size
)) = agg
->dtag_initial
;
2027 agg
->dtag_aggregate((uint64_t *)(key
->dtak_data
+ size
), expr
, arg
);
2031 * Given consumer state, this routine finds a speculation in the INACTIVE
2032 * state and transitions it into the ACTIVE state. If there is no speculation
2033 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2034 * incremented -- it is up to the caller to take appropriate action.
2037 dtrace_speculation(dtrace_state_t
*state
)
2040 dtrace_speculation_state_t current
;
2041 uint32_t *stat
= &state
->dts_speculations_unavail
, count
;
2043 while (i
< state
->dts_nspeculations
) {
2044 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2046 current
= spec
->dtsp_state
;
2048 if (current
!= DTRACESPEC_INACTIVE
) {
2049 if (current
== DTRACESPEC_COMMITTINGMANY
||
2050 current
== DTRACESPEC_COMMITTING
||
2051 current
== DTRACESPEC_DISCARDING
)
2052 stat
= &state
->dts_speculations_busy
;
2057 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2058 current
, DTRACESPEC_ACTIVE
) == current
)
2063 * We couldn't find a speculation. If we found as much as a single
2064 * busy speculation buffer, we'll attribute this failure as "busy"
2065 * instead of "unavail".
2069 } while (dtrace_cas32(stat
, count
, count
+ 1) != count
);
2075 * This routine commits an active speculation. If the specified speculation
2076 * is not in a valid state to perform a commit(), this routine will silently do
2077 * nothing. The state of the specified speculation is transitioned according
2078 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2081 dtrace_speculation_commit(dtrace_state_t
*state
, processorid_t cpu
,
2082 dtrace_specid_t which
)
2084 dtrace_speculation_t
*spec
;
2085 dtrace_buffer_t
*src
, *dest
;
2086 uintptr_t daddr
, saddr
, dlimit
;
2087 dtrace_speculation_state_t current
, new;
2093 if (which
> state
->dts_nspeculations
) {
2094 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2098 spec
= &state
->dts_speculations
[which
- 1];
2099 src
= &spec
->dtsp_buffer
[cpu
];
2100 dest
= &state
->dts_buffer
[cpu
];
2103 current
= spec
->dtsp_state
;
2105 if (current
== DTRACESPEC_COMMITTINGMANY
)
2109 case DTRACESPEC_INACTIVE
:
2110 case DTRACESPEC_DISCARDING
:
2113 case DTRACESPEC_COMMITTING
:
2115 * This is only possible if we are (a) commit()'ing
2116 * without having done a prior speculate() on this CPU
2117 * and (b) racing with another commit() on a different
2118 * CPU. There's nothing to do -- we just assert that
2121 ASSERT(src
->dtb_offset
== 0);
2124 case DTRACESPEC_ACTIVE
:
2125 new = DTRACESPEC_COMMITTING
;
2128 case DTRACESPEC_ACTIVEONE
:
2130 * This speculation is active on one CPU. If our
2131 * buffer offset is non-zero, we know that the one CPU
2132 * must be us. Otherwise, we are committing on a
2133 * different CPU from the speculate(), and we must
2134 * rely on being asynchronously cleaned.
2136 if (src
->dtb_offset
!= 0) {
2137 new = DTRACESPEC_COMMITTING
;
2142 case DTRACESPEC_ACTIVEMANY
:
2143 new = DTRACESPEC_COMMITTINGMANY
;
2149 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2150 current
, new) != current
);
2153 * We have set the state to indicate that we are committing this
2154 * speculation. Now reserve the necessary space in the destination
2157 if ((offs
= dtrace_buffer_reserve(dest
, src
->dtb_offset
,
2158 sizeof (uint64_t), state
, NULL
)) < 0) {
2159 dtrace_buffer_drop(dest
);
2164 * We have the space; copy the buffer across. (Note that this is a
2165 * highly subobtimal bcopy(); in the unlikely event that this becomes
2166 * a serious performance issue, a high-performance DTrace-specific
2167 * bcopy() should obviously be invented.)
2169 daddr
= (uintptr_t)dest
->dtb_tomax
+ offs
;
2170 dlimit
= daddr
+ src
->dtb_offset
;
2171 saddr
= (uintptr_t)src
->dtb_tomax
;
2174 * First, the aligned portion.
2176 while (dlimit
- daddr
>= sizeof (uint64_t)) {
2177 *((uint64_t *)daddr
) = *((uint64_t *)saddr
);
2179 daddr
+= sizeof (uint64_t);
2180 saddr
+= sizeof (uint64_t);
2184 * Now any left-over bit...
2186 while (dlimit
- daddr
)
2187 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++);
2190 * Finally, commit the reserved space in the destination buffer.
2192 dest
->dtb_offset
= offs
+ src
->dtb_offset
;
2196 * If we're lucky enough to be the only active CPU on this speculation
2197 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2199 if (current
== DTRACESPEC_ACTIVE
||
2200 (current
== DTRACESPEC_ACTIVEONE
&& new == DTRACESPEC_COMMITTING
)) {
2201 uint32_t rval
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2202 DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
);
2204 ASSERT(rval
== DTRACESPEC_COMMITTING
);
2207 src
->dtb_offset
= 0;
2208 src
->dtb_xamot_drops
+= src
->dtb_drops
;
2213 * This routine discards an active speculation. If the specified speculation
2214 * is not in a valid state to perform a discard(), this routine will silently
2215 * do nothing. The state of the specified speculation is transitioned
2216 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2219 dtrace_speculation_discard(dtrace_state_t
*state
, processorid_t cpu
,
2220 dtrace_specid_t which
)
2222 dtrace_speculation_t
*spec
;
2223 dtrace_speculation_state_t current
, new;
2224 dtrace_buffer_t
*buf
;
2229 if (which
> state
->dts_nspeculations
) {
2230 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2234 spec
= &state
->dts_speculations
[which
- 1];
2235 buf
= &spec
->dtsp_buffer
[cpu
];
2238 current
= spec
->dtsp_state
;
2241 case DTRACESPEC_INACTIVE
:
2242 case DTRACESPEC_COMMITTINGMANY
:
2243 case DTRACESPEC_COMMITTING
:
2244 case DTRACESPEC_DISCARDING
:
2247 case DTRACESPEC_ACTIVE
:
2248 case DTRACESPEC_ACTIVEMANY
:
2249 new = DTRACESPEC_DISCARDING
;
2252 case DTRACESPEC_ACTIVEONE
:
2253 if (buf
->dtb_offset
!= 0) {
2254 new = DTRACESPEC_INACTIVE
;
2256 new = DTRACESPEC_DISCARDING
;
2263 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2264 current
, new) != current
);
2266 buf
->dtb_offset
= 0;
2271 * Note: not called from probe context. This function is called
2272 * asynchronously from cross call context to clean any speculations that are
2273 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2274 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2278 dtrace_speculation_clean_here(dtrace_state_t
*state
)
2280 dtrace_icookie_t cookie
;
2281 processorid_t cpu
= CPU
->cpu_id
;
2282 dtrace_buffer_t
*dest
= &state
->dts_buffer
[cpu
];
2285 cookie
= dtrace_interrupt_disable();
2287 if (dest
->dtb_tomax
== NULL
) {
2288 dtrace_interrupt_enable(cookie
);
2292 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2293 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2294 dtrace_buffer_t
*src
= &spec
->dtsp_buffer
[cpu
];
2296 if (src
->dtb_tomax
== NULL
)
2299 if (spec
->dtsp_state
== DTRACESPEC_DISCARDING
) {
2300 src
->dtb_offset
= 0;
2304 if (spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2307 if (src
->dtb_offset
== 0)
2310 dtrace_speculation_commit(state
, cpu
, i
+ 1);
2313 dtrace_interrupt_enable(cookie
);
2317 * Note: not called from probe context. This function is called
2318 * asynchronously (and at a regular interval) to clean any speculations that
2319 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2320 * is work to be done, it cross calls all CPUs to perform that work;
2321 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2322 * INACTIVE state until they have been cleaned by all CPUs.
2325 dtrace_speculation_clean(dtrace_state_t
*state
)
2330 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2331 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2333 ASSERT(!spec
->dtsp_cleaning
);
2335 if (spec
->dtsp_state
!= DTRACESPEC_DISCARDING
&&
2336 spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2340 spec
->dtsp_cleaning
= 1;
2346 dtrace_xcall(DTRACE_CPUALL
,
2347 (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
);
2350 * We now know that all CPUs have committed or discarded their
2351 * speculation buffers, as appropriate. We can now set the state
2354 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2355 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2356 dtrace_speculation_state_t current
, new;
2358 if (!spec
->dtsp_cleaning
)
2361 current
= spec
->dtsp_state
;
2362 ASSERT(current
== DTRACESPEC_DISCARDING
||
2363 current
== DTRACESPEC_COMMITTINGMANY
);
2365 new = DTRACESPEC_INACTIVE
;
2367 rv
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new);
2368 ASSERT(rv
== current
);
2369 spec
->dtsp_cleaning
= 0;
2374 * Called as part of a speculate() to get the speculative buffer associated
2375 * with a given speculation. Returns NULL if the specified speculation is not
2376 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2377 * the active CPU is not the specified CPU -- the speculation will be
2378 * atomically transitioned into the ACTIVEMANY state.
2380 static dtrace_buffer_t
*
2381 dtrace_speculation_buffer(dtrace_state_t
*state
, processorid_t cpuid
,
2382 dtrace_specid_t which
)
2384 dtrace_speculation_t
*spec
;
2385 dtrace_speculation_state_t current
, new;
2386 dtrace_buffer_t
*buf
;
2391 if (which
> state
->dts_nspeculations
) {
2392 cpu_core
[cpuid
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2396 spec
= &state
->dts_speculations
[which
- 1];
2397 buf
= &spec
->dtsp_buffer
[cpuid
];
2400 current
= spec
->dtsp_state
;
2403 case DTRACESPEC_INACTIVE
:
2404 case DTRACESPEC_COMMITTINGMANY
:
2405 case DTRACESPEC_DISCARDING
:
2408 case DTRACESPEC_COMMITTING
:
2409 ASSERT(buf
->dtb_offset
== 0);
2412 case DTRACESPEC_ACTIVEONE
:
2414 * This speculation is currently active on one CPU.
2415 * Check the offset in the buffer; if it's non-zero,
2416 * that CPU must be us (and we leave the state alone).
2417 * If it's zero, assume that we're starting on a new
2418 * CPU -- and change the state to indicate that the
2419 * speculation is active on more than one CPU.
2421 if (buf
->dtb_offset
!= 0)
2424 new = DTRACESPEC_ACTIVEMANY
;
2427 case DTRACESPEC_ACTIVEMANY
:
2430 case DTRACESPEC_ACTIVE
:
2431 new = DTRACESPEC_ACTIVEONE
;
2437 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2438 current
, new) != current
);
2440 ASSERT(new == DTRACESPEC_ACTIVEONE
|| new == DTRACESPEC_ACTIVEMANY
);
2445 * This function implements the DIF emulator's variable lookups. The emulator
2446 * passes a reserved variable identifier and optional built-in array index.
2449 dtrace_dif_variable(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
, uint64_t v
,
2453 * If we're accessing one of the uncached arguments, we'll turn this
2454 * into a reference in the args array.
2456 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
) {
2457 ndx
= v
- DIF_VAR_ARG0
;
2463 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_ARGS
);
2464 if (ndx
>= sizeof (mstate
->dtms_arg
) /
2465 sizeof (mstate
->dtms_arg
[0])) {
2466 #if !defined(__APPLE__)
2467 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2469 /* Account for introduction of __dtrace_probe() on xnu. */
2470 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2471 #endif /* __APPLE__ */
2472 dtrace_provider_t
*pv
;
2475 pv
= mstate
->dtms_probe
->dtpr_provider
;
2476 if (pv
->dtpv_pops
.dtps_getargval
!= NULL
)
2477 val
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
,
2478 mstate
->dtms_probe
->dtpr_id
,
2479 mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
);
2480 #if defined(__APPLE__)
2481 /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */
2482 else if (mstate
->dtms_probe
->dtpr_id
== dtrace_probeid_error
&& ndx
== 5) {
2483 return ((dtrace_state_t
*)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
;
2485 #endif /* __APPLE__ */
2487 val
= dtrace_getarg(ndx
, aframes
);
2490 * This is regrettably required to keep the compiler
2491 * from tail-optimizing the call to dtrace_getarg().
2492 * The condition always evaluates to true, but the
2493 * compiler has no way of figuring that out a priori.
2494 * (None of this would be necessary if the compiler
2495 * could be relied upon to _always_ tail-optimize
2496 * the call to dtrace_getarg() -- but it can't.)
2498 if (mstate
->dtms_probe
!= NULL
)
2504 return (mstate
->dtms_arg
[ndx
]);
2506 #if !defined(__APPLE__)
2507 case DIF_VAR_UREGS
: {
2510 if (!dtrace_priv_proc(state
))
2513 if ((lwp
= curthread
->t_lwp
) == NULL
) {
2514 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2515 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= NULL
;
2519 return (dtrace_getreg(lwp
->lwp_regs
, ndx
));
2522 case DIF_VAR_UREGS
: {
2525 if (!dtrace_priv_proc(state
))
2528 if ((thread
= current_thread()) == NULL
) {
2529 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2530 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= 0;
2534 return (dtrace_getreg(find_user_regs(thread
), ndx
));
2536 #endif /* __APPLE__ */
2538 #if !defined(__APPLE__)
2539 case DIF_VAR_CURTHREAD
:
2540 if (!dtrace_priv_kernel(state
))
2542 return ((uint64_t)(uintptr_t)curthread
);
2544 case DIF_VAR_CURTHREAD
:
2545 if (!dtrace_priv_kernel(state
))
2548 return ((uint64_t)(uintptr_t)current_thread());
2549 #endif /* __APPLE__ */
2551 case DIF_VAR_TIMESTAMP
:
2552 if (!(mstate
->dtms_present
& DTRACE_MSTATE_TIMESTAMP
)) {
2553 mstate
->dtms_timestamp
= dtrace_gethrtime();
2554 mstate
->dtms_present
|= DTRACE_MSTATE_TIMESTAMP
;
2556 return (mstate
->dtms_timestamp
);
2558 #if !defined(__APPLE__)
2559 case DIF_VAR_VTIMESTAMP
:
2560 ASSERT(dtrace_vtime_references
!= 0);
2561 return (curthread
->t_dtrace_vtime
);
2563 case DIF_VAR_VTIMESTAMP
:
2564 ASSERT(dtrace_vtime_references
!= 0);
2565 return (dtrace_get_thread_vtime(current_thread()));
2566 #endif /* __APPLE__ */
2568 case DIF_VAR_WALLTIMESTAMP
:
2569 if (!(mstate
->dtms_present
& DTRACE_MSTATE_WALLTIMESTAMP
)) {
2570 mstate
->dtms_walltimestamp
= dtrace_gethrestime();
2571 mstate
->dtms_present
|= DTRACE_MSTATE_WALLTIMESTAMP
;
2573 return (mstate
->dtms_walltimestamp
);
2576 if (!dtrace_priv_kernel(state
))
2578 if (!(mstate
->dtms_present
& DTRACE_MSTATE_IPL
)) {
2579 mstate
->dtms_ipl
= dtrace_getipl();
2580 mstate
->dtms_present
|= DTRACE_MSTATE_IPL
;
2582 return (mstate
->dtms_ipl
);
2585 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_EPID
);
2586 return (mstate
->dtms_epid
);
2589 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2590 return (mstate
->dtms_probe
->dtpr_id
);
2592 case DIF_VAR_STACKDEPTH
:
2593 if (!dtrace_priv_kernel(state
))
2595 if (!(mstate
->dtms_present
& DTRACE_MSTATE_STACKDEPTH
)) {
2596 #if !defined(__APPLE__)
2597 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2599 /* Account for introduction of __dtrace_probe() on xnu. */
2600 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2601 #endif /* __APPLE__ */
2603 mstate
->dtms_stackdepth
= dtrace_getstackdepth(aframes
);
2604 mstate
->dtms_present
|= DTRACE_MSTATE_STACKDEPTH
;
2606 return (mstate
->dtms_stackdepth
);
2608 case DIF_VAR_USTACKDEPTH
:
2609 if (!dtrace_priv_proc(state
))
2611 if (!(mstate
->dtms_present
& DTRACE_MSTATE_USTACKDEPTH
)) {
2613 * See comment in DIF_VAR_PID.
2615 if (DTRACE_ANCHORED(mstate
->dtms_probe
) &&
2617 mstate
->dtms_ustackdepth
= 0;
2619 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
2620 mstate
->dtms_ustackdepth
=
2621 dtrace_getustackdepth();
2622 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
2624 mstate
->dtms_present
|= DTRACE_MSTATE_USTACKDEPTH
;
2626 return (mstate
->dtms_ustackdepth
);
2628 case DIF_VAR_CALLER
:
2629 if (!dtrace_priv_kernel(state
))
2631 if (!(mstate
->dtms_present
& DTRACE_MSTATE_CALLER
)) {
2632 #if !defined(__APPLE__)
2633 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2635 /* Account for introduction of __dtrace_probe() on xnu. */
2636 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2637 #endif /* __APPLE__ */
2639 if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) {
2641 * If this is an unanchored probe, we are
2642 * required to go through the slow path:
2643 * dtrace_caller() only guarantees correct
2644 * results for anchored probes.
2648 dtrace_getpcstack(caller
, 2, aframes
,
2649 (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]);
2650 mstate
->dtms_caller
= caller
[1];
2651 } else if ((mstate
->dtms_caller
=
2652 dtrace_caller(aframes
)) == -1) {
2654 * We have failed to do this the quick way;
2655 * we must resort to the slower approach of
2656 * calling dtrace_getpcstack().
2660 dtrace_getpcstack(&caller
, 1, aframes
, NULL
);
2661 mstate
->dtms_caller
= caller
;
2664 mstate
->dtms_present
|= DTRACE_MSTATE_CALLER
;
2666 return (mstate
->dtms_caller
);
2668 case DIF_VAR_UCALLER
:
2669 if (!dtrace_priv_proc(state
))
2672 if (!(mstate
->dtms_present
& DTRACE_MSTATE_UCALLER
)) {
2676 * dtrace_getupcstack() fills in the first uint64_t
2677 * with the current PID. The second uint64_t will
2678 * be the program counter at user-level. The third
2679 * uint64_t will contain the caller, which is what
2683 dtrace_getupcstack(ustack
, 3);
2684 mstate
->dtms_ucaller
= ustack
[2];
2685 mstate
->dtms_present
|= DTRACE_MSTATE_UCALLER
;
2688 return (mstate
->dtms_ucaller
);
2690 case DIF_VAR_PROBEPROV
:
2691 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2692 return ((uint64_t)(uintptr_t)
2693 mstate
->dtms_probe
->dtpr_provider
->dtpv_name
);
2695 case DIF_VAR_PROBEMOD
:
2696 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2697 return ((uint64_t)(uintptr_t)
2698 mstate
->dtms_probe
->dtpr_mod
);
2700 case DIF_VAR_PROBEFUNC
:
2701 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2702 return ((uint64_t)(uintptr_t)
2703 mstate
->dtms_probe
->dtpr_func
);
2705 case DIF_VAR_PROBENAME
:
2706 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2707 return ((uint64_t)(uintptr_t)
2708 mstate
->dtms_probe
->dtpr_name
);
2710 #if !defined(__APPLE__)
2712 if (!dtrace_priv_proc(state
))
2716 * Note that we are assuming that an unanchored probe is
2717 * always due to a high-level interrupt. (And we're assuming
2718 * that there is only a single high level interrupt.)
2720 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2721 return (pid0
.pid_id
);
2724 * It is always safe to dereference one's own t_procp pointer:
2725 * it always points to a valid, allocated proc structure.
2726 * Further, it is always safe to dereference the p_pidp member
2727 * of one's own proc structure. (These are truisms becuase
2728 * threads and processes don't clean up their own state --
2729 * they leave that task to whomever reaps them.)
2731 return ((uint64_t)curthread
->t_procp
->p_pidp
->pid_id
);
2735 if (!dtrace_priv_proc_relaxed(state
))
2739 * Note that we are assuming that an unanchored probe is
2740 * always due to a high-level interrupt. (And we're assuming
2741 * that there is only a single high level interrupt.)
2743 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2744 /* Anchored probe that fires while on an interrupt accrues to process 0 */
2747 return ((uint64_t)proc_selfpid());
2748 #endif /* __APPLE__ */
2750 #if !defined(__APPLE__)
2752 if (!dtrace_priv_proc(state
))
2756 * See comment in DIF_VAR_PID.
2758 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2759 return (pid0
.pid_id
);
2761 return ((uint64_t)curthread
->t_procp
->p_ppid
);
2764 if (!dtrace_priv_proc_relaxed(state
))
2768 * See comment in DIF_VAR_PID.
2770 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2773 return ((uint64_t)(uintptr_t)(current_proc()->p_ppid
));
2774 #endif /* __APPLE__ */
2776 #if !defined(__APPLE__)
2779 * See comment in DIF_VAR_PID.
2781 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2784 return ((uint64_t)curthread
->t_tid
);
2788 * See comment in DIF_VAR_PID.
2790 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2793 return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */
2794 #endif /* __APPLE__ */
2796 #if !defined(__APPLE__)
2797 case DIF_VAR_EXECNAME
:
2798 if (!dtrace_priv_proc(state
))
2802 * See comment in DIF_VAR_PID.
2804 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2805 return ((uint64_t)(uintptr_t)p0
.p_user
.u_comm
);
2808 * It is always safe to dereference one's own t_procp pointer:
2809 * it always points to a valid, allocated proc structure.
2810 * (This is true because threads don't clean up their own
2811 * state -- they leave that task to whomever reaps them.)
2813 return ((uint64_t)(uintptr_t)
2814 curthread
->t_procp
->p_user
.u_comm
);
2816 case DIF_VAR_EXECNAME
:
2818 char *xname
= (char *)mstate
->dtms_scratch_ptr
;
2819 size_t scratch_size
= MAXCOMLEN
+1;
2821 /* The scratch allocation's lifetime is that of the clause. */
2822 if (mstate
->dtms_scratch_ptr
+ scratch_size
>
2823 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
)
2826 if (!dtrace_priv_proc_relaxed(state
))
2829 mstate
->dtms_scratch_ptr
+= scratch_size
;
2830 proc_selfname( xname
, MAXCOMLEN
);
2832 return ((uint64_t)(uintptr_t)xname
);
2834 #endif /* __APPLE__ */
2835 #if !defined(__APPLE__)
2836 case DIF_VAR_ZONENAME
:
2837 if (!dtrace_priv_proc(state
))
2841 * See comment in DIF_VAR_PID.
2843 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2844 return ((uint64_t)(uintptr_t)p0
.p_zone
->zone_name
);
2847 * It is always safe to dereference one's own t_procp pointer:
2848 * it always points to a valid, allocated proc structure.
2849 * (This is true because threads don't clean up their own
2850 * state -- they leave that task to whomever reaps them.)
2852 return ((uint64_t)(uintptr_t)
2853 curthread
->t_procp
->p_zone
->zone_name
);
2856 case DIF_VAR_ZONENAME
:
2857 if (!dtrace_priv_proc(state
))
2860 return ((uint64_t)(uintptr_t)NULL
); /* Darwin doesn't do "zones" */
2861 #endif /* __APPLE__ */
2863 #if !defined(__APPLE__)
2865 if (!dtrace_priv_proc(state
))
2869 * See comment in DIF_VAR_PID.
2871 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2872 return ((uint64_t)p0
.p_cred
->cr_uid
);
2874 return ((uint64_t)curthread
->t_cred
->cr_uid
);
2877 if (!dtrace_priv_proc(state
))
2881 * See comment in DIF_VAR_PID.
2883 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2886 if (dtrace_CRED() != NULL
)
2887 return ((uint64_t)kauth_getuid());
2890 #endif /* __APPLE__ */
2892 #if !defined(__APPLE__)
2894 if (!dtrace_priv_proc(state
))
2898 * See comment in DIF_VAR_PID.
2900 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2901 return ((uint64_t)p0
.p_cred
->cr_gid
);
2903 return ((uint64_t)curthread
->t_cred
->cr_gid
);
2906 if (!dtrace_priv_proc(state
))
2910 * See comment in DIF_VAR_PID.
2912 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2915 if (dtrace_CRED() != NULL
)
2916 return ((uint64_t)kauth_getgid());
2919 #endif /* __APPLE__ */
2921 #if !defined(__APPLE__)
2922 case DIF_VAR_ERRNO
: {
2924 if (!dtrace_priv_proc(state
))
2928 * See comment in DIF_VAR_PID.
2930 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2933 if ((lwp
= curthread
->t_lwp
) == NULL
)
2936 return ((uint64_t)lwp
->lwp_errno
);
2939 case DIF_VAR_ERRNO
: {
2940 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
2941 if (!dtrace_priv_proc(state
))
2945 * See comment in DIF_VAR_PID.
2947 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2950 return (uthread
? uthread
->t_dtrace_errno
: -1);
2952 #endif /* __APPLE__ */
2955 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
2961 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
2962 * Notice that we don't bother validating the proper number of arguments or
2963 * their types in the tuple stack. This isn't needed because all argument
2964 * interpretation is safe because of our load safety -- the worst that can
2965 * happen is that a bogus program can obtain bogus results.
2968 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
,
2969 dtrace_key_t
*tupregs
, int nargs
,
2970 dtrace_mstate_t
*mstate
, dtrace_state_t
*state
)
2972 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
2973 #if !defined(__APPLE__)
2974 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
2976 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
2977 #endif /* __APPLE__ */
2979 #if !defined(__APPLE__)
2990 /* XXX awaits lock/mutex work */
2991 #endif /* __APPLE__ */
2995 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
2998 #if !defined(__APPLE__)
2999 case DIF_SUBR_MUTEX_OWNED
:
3000 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3001 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
))
3002 regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
;
3004 regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
);
3007 case DIF_SUBR_MUTEX_OWNER
:
3008 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3009 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) &&
3010 MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
)
3011 regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
);
3016 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
:
3017 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3018 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
);
3021 case DIF_SUBR_MUTEX_TYPE_SPIN
:
3022 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3023 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
);
3026 case DIF_SUBR_RW_READ_HELD
: {
3029 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3030 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
);
3034 case DIF_SUBR_RW_WRITE_HELD
:
3035 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3036 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
);
3039 case DIF_SUBR_RW_ISWRITER
:
3040 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3041 regs
[rd
] = _RW_ISWRITER(&r
.ri
);
3044 /* XXX awaits lock/mutex work */
3045 #endif /* __APPLE__ */
3047 case DIF_SUBR_BCOPY
: {
3049 * We need to be sure that the destination is in the scratch
3050 * region -- no other region is allowed.
3052 uintptr_t src
= tupregs
[0].dttk_value
;
3053 uintptr_t dest
= tupregs
[1].dttk_value
;
3054 size_t size
= tupregs
[2].dttk_value
;
3056 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3057 *flags
|= CPU_DTRACE_BADADDR
;
3062 dtrace_bcopy((void *)src
, (void *)dest
, size
);
3066 case DIF_SUBR_ALLOCA
:
3067 case DIF_SUBR_COPYIN
: {
3068 uintptr_t dest
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
3070 tupregs
[subr
== DIF_SUBR_ALLOCA
? 0 : 1].dttk_value
;
3071 size_t scratch_size
= (dest
- mstate
->dtms_scratch_ptr
) + size
;
3074 * This action doesn't require any credential checks since
3075 * probes will not activate in user contexts to which the
3076 * enabling user does not have permissions.
3078 if (mstate
->dtms_scratch_ptr
+ scratch_size
>
3079 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3080 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3085 if (subr
== DIF_SUBR_COPYIN
) {
3086 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3087 #if !defined(__APPLE__)
3088 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3090 if (dtrace_priv_proc(state
))
3091 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3092 #endif /* __APPLE__ */
3093 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3096 mstate
->dtms_scratch_ptr
+= scratch_size
;
3101 case DIF_SUBR_COPYINTO
: {
3102 uint64_t size
= tupregs
[1].dttk_value
;
3103 uintptr_t dest
= tupregs
[2].dttk_value
;
3106 * This action doesn't require any credential checks since
3107 * probes will not activate in user contexts to which the
3108 * enabling user does not have permissions.
3110 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3111 *flags
|= CPU_DTRACE_BADADDR
;
3116 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3117 #if !defined(__APPLE__)
3118 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3120 if (dtrace_priv_proc(state
))
3121 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3122 #endif /* __APPLE__ */
3123 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3127 case DIF_SUBR_COPYINSTR
: {
3128 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3129 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3131 if (nargs
> 1 && tupregs
[1].dttk_value
< size
)
3132 size
= tupregs
[1].dttk_value
+ 1;
3135 * This action doesn't require any credential checks since
3136 * probes will not activate in user contexts to which the
3137 * enabling user does not have permissions.
3139 if (mstate
->dtms_scratch_ptr
+ size
>
3140 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3141 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3146 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3147 #if !defined(__APPLE__)
3148 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
);
3150 if (dtrace_priv_proc(state
))
3151 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
);
3152 #endif /* __APPLE__ */
3153 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3155 ((char *)dest
)[size
- 1] = '\0';
3156 mstate
->dtms_scratch_ptr
+= size
;
3161 #if !defined(__APPLE__)
3162 case DIF_SUBR_MSGSIZE
:
3163 case DIF_SUBR_MSGDSIZE
: {
3164 uintptr_t baddr
= tupregs
[0].dttk_value
, daddr
;
3165 uintptr_t wptr
, rptr
;
3169 while (baddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3170 wptr
= dtrace_loadptr(baddr
+
3171 offsetof(mblk_t
, b_wptr
));
3173 rptr
= dtrace_loadptr(baddr
+
3174 offsetof(mblk_t
, b_rptr
));
3177 *flags
|= CPU_DTRACE_BADADDR
;
3178 *illval
= tupregs
[0].dttk_value
;
3182 daddr
= dtrace_loadptr(baddr
+
3183 offsetof(mblk_t
, b_datap
));
3185 baddr
= dtrace_loadptr(baddr
+
3186 offsetof(mblk_t
, b_cont
));
3189 * We want to prevent against denial-of-service here,
3190 * so we're only going to search the list for
3191 * dtrace_msgdsize_max mblks.
3193 if (cont
++ > dtrace_msgdsize_max
) {
3194 *flags
|= CPU_DTRACE_ILLOP
;
3198 if (subr
== DIF_SUBR_MSGDSIZE
) {
3199 if (dtrace_load8(daddr
+
3200 offsetof(dblk_t
, db_type
)) != M_DATA
)
3204 count
+= wptr
- rptr
;
3207 if (!(*flags
& CPU_DTRACE_FAULT
))
3213 case DIF_SUBR_MSGSIZE
:
3214 case DIF_SUBR_MSGDSIZE
: {
3215 /* Darwin does not implement SysV streams messages */
3219 #endif /* __APPLE__ */
3221 #if !defined(__APPLE__)
3222 case DIF_SUBR_PROGENYOF
: {
3223 pid_t pid
= tupregs
[0].dttk_value
;
3227 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3229 for (p
= curthread
->t_procp
; p
!= NULL
; p
= p
->p_parent
) {
3230 if (p
->p_pidp
->pid_id
== pid
) {
3236 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3242 case DIF_SUBR_PROGENYOF
: {
3243 pid_t pid
= tupregs
[0].dttk_value
;
3244 struct proc
*p
= current_proc();
3245 int rval
= 0, lim
= nprocs
;
3247 while(p
&& (lim
-- > 0)) {
3250 ppid
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
));
3251 if (*flags
& CPU_DTRACE_FAULT
)
3260 break; /* Can't climb process tree any further. */
3262 p
= (struct proc
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
));
3263 if (*flags
& CPU_DTRACE_FAULT
)
3270 #endif /* __APPLE__ */
3272 case DIF_SUBR_SPECULATION
:
3273 regs
[rd
] = dtrace_speculation(state
);
3276 #if !defined(__APPLE__)
3277 case DIF_SUBR_COPYOUT
: {
3278 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3279 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3280 uint64_t size
= tupregs
[2].dttk_value
;
3282 if (!dtrace_destructive_disallow
&&
3283 dtrace_priv_proc_control(state
) &&
3284 !dtrace_istoxic(kaddr
, size
)) {
3285 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3286 dtrace_copyout(kaddr
, uaddr
, size
);
3287 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3292 case DIF_SUBR_COPYOUTSTR
: {
3293 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3294 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3295 uint64_t size
= tupregs
[2].dttk_value
;
3297 if (!dtrace_destructive_disallow
&&
3298 dtrace_priv_proc_control(state
) &&
3299 !dtrace_istoxic(kaddr
, size
)) {
3300 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3301 dtrace_copyoutstr(kaddr
, uaddr
, size
);
3302 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3307 case DIF_SUBR_COPYOUT
: {
3308 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3309 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3310 uint64_t size
= tupregs
[2].dttk_value
;
3312 if (!dtrace_destructive_disallow
&&
3313 dtrace_priv_proc_control(state
) &&
3314 !dtrace_istoxic(kaddr
, size
)) {
3315 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3316 dtrace_copyout(kaddr
, uaddr
, size
);
3317 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3322 case DIF_SUBR_COPYOUTSTR
: {
3323 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3324 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3325 uint64_t size
= tupregs
[2].dttk_value
;
3327 if (!dtrace_destructive_disallow
&&
3328 dtrace_priv_proc_control(state
) &&
3329 !dtrace_istoxic(kaddr
, size
)) {
3330 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3331 dtrace_copyoutstr(kaddr
, uaddr
, size
);
3332 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3336 #endif /* __APPLE__ */
3338 case DIF_SUBR_STRLEN
:
3339 regs
[rd
] = dtrace_strlen((char *)(uintptr_t)
3340 tupregs
[0].dttk_value
,
3341 state
->dts_options
[DTRACEOPT_STRSIZE
]);
3344 case DIF_SUBR_STRCHR
:
3345 case DIF_SUBR_STRRCHR
: {
3347 * We're going to iterate over the string looking for the
3348 * specified character. We will iterate until we have reached
3349 * the string length or we have found the character. If this
3350 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3351 * of the specified character instead of the first.
3353 uintptr_t addr
= tupregs
[0].dttk_value
;
3354 uintptr_t limit
= addr
+ state
->dts_options
[DTRACEOPT_STRSIZE
];
3355 char c
, target
= (char)tupregs
[1].dttk_value
;
3357 for (regs
[rd
] = NULL
; addr
< limit
; addr
++) {
3358 if ((c
= dtrace_load8(addr
)) == target
) {
3361 if (subr
== DIF_SUBR_STRCHR
)
3372 case DIF_SUBR_STRSTR
:
3373 case DIF_SUBR_INDEX
:
3374 case DIF_SUBR_RINDEX
: {
3376 * We're going to iterate over the string looking for the
3377 * specified string. We will iterate until we have reached
3378 * the string length or we have found the string. (Yes, this
3379 * is done in the most naive way possible -- but considering
3380 * that the string we're searching for is likely to be
3381 * relatively short, the complexity of Rabin-Karp or similar
3382 * hardly seems merited.)
3384 char *addr
= (char *)(uintptr_t)tupregs
[0].dttk_value
;
3385 char *substr
= (char *)(uintptr_t)tupregs
[1].dttk_value
;
3386 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3387 size_t len
= dtrace_strlen(addr
, size
);
3388 size_t sublen
= dtrace_strlen(substr
, size
);
3389 char *limit
= addr
+ len
, *orig
= addr
;
3390 int notfound
= subr
== DIF_SUBR_STRSTR
? 0 : -1;
3393 regs
[rd
] = notfound
;
3396 * strstr() and index()/rindex() have similar semantics if
3397 * both strings are the empty string: strstr() returns a
3398 * pointer to the (empty) string, and index() and rindex()
3399 * both return index 0 (regardless of any position argument).
3401 if (sublen
== 0 && len
== 0) {
3402 if (subr
== DIF_SUBR_STRSTR
)
3403 regs
[rd
] = (uintptr_t)addr
;
3409 if (subr
!= DIF_SUBR_STRSTR
) {
3410 if (subr
== DIF_SUBR_RINDEX
) {
3417 * Both index() and rindex() take an optional position
3418 * argument that denotes the starting position.
3421 int64_t pos
= (int64_t)tupregs
[2].dttk_value
;
3424 * If the position argument to index() is
3425 * negative, Perl implicitly clamps it at
3426 * zero. This semantic is a little surprising
3427 * given the special meaning of negative
3428 * positions to similar Perl functions like
3429 * substr(), but it appears to reflect a
3430 * notion that index() can start from a
3431 * negative index and increment its way up to
3432 * the string. Given this notion, Perl's
3433 * rindex() is at least self-consistent in
3434 * that it implicitly clamps positions greater
3435 * than the string length to be the string
3436 * length. Where Perl completely loses
3437 * coherence, however, is when the specified
3438 * substring is the empty string (""). In
3439 * this case, even if the position is
3440 * negative, rindex() returns 0 -- and even if
3441 * the position is greater than the length,
3442 * index() returns the string length. These
3443 * semantics violate the notion that index()
3444 * should never return a value less than the
3445 * specified position and that rindex() should
3446 * never return a value greater than the
3447 * specified position. (One assumes that
3448 * these semantics are artifacts of Perl's
3449 * implementation and not the results of
3450 * deliberate design -- it beggars belief that
3451 * even Larry Wall could desire such oddness.)
3452 * While in the abstract one would wish for
3453 * consistent position semantics across
3454 * substr(), index() and rindex() -- or at the
3455 * very least self-consistent position
3456 * semantics for index() and rindex() -- we
3457 * instead opt to keep with the extant Perl
3458 * semantics, in all their broken glory. (Do
3459 * we have more desire to maintain Perl's
3460 * semantics than Perl does? Probably.)
3462 if (subr
== DIF_SUBR_RINDEX
) {
3486 for (regs
[rd
] = notfound
; addr
!= limit
; addr
+= inc
) {
3487 if (dtrace_strncmp(addr
, substr
, sublen
) == 0) {
3488 if (subr
!= DIF_SUBR_STRSTR
) {
3490 * As D index() and rindex() are
3491 * modeled on Perl (and not on awk),
3492 * we return a zero-based (and not a
3493 * one-based) index. (For you Perl
3494 * weenies: no, we're not going to add
3495 * $[ -- and shouldn't you be at a con
3498 regs
[rd
] = (uintptr_t)(addr
- orig
);
3502 ASSERT(subr
== DIF_SUBR_STRSTR
);
3503 regs
[rd
] = (uintptr_t)addr
;
3511 case DIF_SUBR_STRTOK
: {
3512 uintptr_t addr
= tupregs
[0].dttk_value
;
3513 uintptr_t tokaddr
= tupregs
[1].dttk_value
;
3514 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3515 uintptr_t limit
, toklimit
= tokaddr
+ size
;
3516 uint8_t c
, tokmap
[32]; /* 256 / 8 */
3517 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3520 if (mstate
->dtms_scratch_ptr
+ size
>
3521 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3522 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3529 * If the address specified is NULL, we use our saved
3530 * strtok pointer from the mstate. Note that this
3531 * means that the saved strtok pointer is _only_
3532 * valid within multiple enablings of the same probe --
3533 * it behaves like an implicit clause-local variable.
3535 addr
= mstate
->dtms_strtok
;
3539 * First, zero the token map, and then process the token
3540 * string -- setting a bit in the map for every character
3541 * found in the token string.
3543 for (i
= 0; i
< (int)sizeof (tokmap
); i
++)
3546 for (; tokaddr
< toklimit
; tokaddr
++) {
3547 if ((c
= dtrace_load8(tokaddr
)) == '\0')
3550 ASSERT((c
>> 3) < sizeof (tokmap
));
3551 tokmap
[c
>> 3] |= (1 << (c
& 0x7));
3554 for (limit
= addr
+ size
; addr
< limit
; addr
++) {
3556 * We're looking for a character that is _not_ contained
3557 * in the token string.
3559 if ((c
= dtrace_load8(addr
)) == '\0')
3562 if (!(tokmap
[c
>> 3] & (1 << (c
& 0x7))))
3568 * We reached the end of the string without finding
3569 * any character that was not in the token string.
3570 * We return NULL in this case, and we set the saved
3571 * address to NULL as well.
3574 mstate
->dtms_strtok
= NULL
;
3579 * From here on, we're copying into the destination string.
3581 for (i
= 0; addr
< limit
&& i
< size
- 1; addr
++) {
3582 if ((c
= dtrace_load8(addr
)) == '\0')
3585 if (tokmap
[c
>> 3] & (1 << (c
& 0x7)))
3594 regs
[rd
] = (uintptr_t)dest
;
3595 mstate
->dtms_scratch_ptr
+= size
;
3596 mstate
->dtms_strtok
= addr
;
3600 case DIF_SUBR_SUBSTR
: {
3601 uintptr_t s
= tupregs
[0].dttk_value
;
3602 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3603 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3604 int64_t index
= (int64_t)tupregs
[1].dttk_value
;
3605 int64_t remaining
= (int64_t)tupregs
[2].dttk_value
;
3606 size_t len
= dtrace_strlen((char *)s
, size
);
3610 remaining
= (int64_t)size
;
3612 if (mstate
->dtms_scratch_ptr
+ size
>
3613 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3614 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3622 if (index
< 0 && index
+ remaining
> 0) {
3628 if (index
>= len
|| index
< 0)
3631 for (d
[0] = '\0'; remaining
> 0; remaining
--) {
3632 if ((d
[i
++] = dtrace_load8(s
++ + index
)) == '\0')
3641 mstate
->dtms_scratch_ptr
+= size
;
3642 regs
[rd
] = (uintptr_t)d
;
3646 #if !defined(__APPLE__)
3647 case DIF_SUBR_GETMAJOR
:
3649 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR64
) & MAXMAJ64
;
3651 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR
) & MAXMAJ
;
3655 #else /* __APPLE__ */
3656 case DIF_SUBR_GETMAJOR
:
3657 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value
);
3659 #endif /* __APPLE__ */
3661 #if !defined(__APPLE__)
3662 case DIF_SUBR_GETMINOR
:
3664 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN64
;
3666 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN
;
3670 #else /* __APPLE__ */
3671 case DIF_SUBR_GETMINOR
:
3672 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value
);
3674 #endif /* __APPLE__ */
3676 #if !defined(__APPLE__)
3677 case DIF_SUBR_DDI_PATHNAME
: {
3679 * This one is a galactic mess. We are going to roughly
3680 * emulate ddi_pathname(), but it's made more complicated
3681 * by the fact that we (a) want to include the minor name and
3682 * (b) must proceed iteratively instead of recursively.
3684 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3685 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3686 char *start
= (char *)dest
, *end
= start
+ size
- 1;
3687 uintptr_t daddr
= tupregs
[0].dttk_value
;
3688 int64_t minor
= (int64_t)tupregs
[1].dttk_value
;
3690 int i
, len
, depth
= 0;
3692 if (size
== 0 || mstate
->dtms_scratch_ptr
+ size
>
3693 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3694 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3702 * We want to have a name for the minor. In order to do this,
3703 * we need to walk the minor list from the devinfo. We want
3704 * to be sure that we don't infinitely walk a circular list,
3705 * so we check for circularity by sending a scout pointer
3706 * ahead two elements for every element that we iterate over;
3707 * if the list is circular, these will ultimately point to the
3708 * same element. You may recognize this little trick as the
3709 * answer to a stupid interview question -- one that always
3710 * seems to be asked by those who had to have it laboriously
3711 * explained to them, and who can't even concisely describe
3712 * the conditions under which one would be forced to resort to
3713 * this technique. Needless to say, those conditions are
3714 * found here -- and probably only here. Is this is the only
3715 * use of this infamous trick in shipping, production code?
3716 * If it isn't, it probably should be...
3719 uintptr_t maddr
= dtrace_loadptr(daddr
+
3720 offsetof(struct dev_info
, devi_minor
));
3722 uintptr_t next
= offsetof(struct ddi_minor_data
, next
);
3723 uintptr_t name
= offsetof(struct ddi_minor_data
,
3724 d_minor
) + offsetof(struct ddi_minor
, name
);
3725 uintptr_t dev
= offsetof(struct ddi_minor_data
,
3726 d_minor
) + offsetof(struct ddi_minor
, dev
);
3730 scout
= dtrace_loadptr(maddr
+ next
);
3732 while (maddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3735 m
= dtrace_load64(maddr
+ dev
) & MAXMIN64
;
3737 m
= dtrace_load32(maddr
+ dev
) & MAXMIN
;
3740 maddr
= dtrace_loadptr(maddr
+ next
);
3745 scout
= dtrace_loadptr(scout
+ next
);
3750 scout
= dtrace_loadptr(scout
+ next
);
3755 if (scout
== maddr
) {
3756 *flags
|= CPU_DTRACE_ILLOP
;
3764 * We have the minor data. Now we need to
3765 * copy the minor's name into the end of the
3768 s
= (char *)dtrace_loadptr(maddr
+ name
);
3769 len
= dtrace_strlen(s
, size
);
3771 if (*flags
& CPU_DTRACE_FAULT
)
3775 if ((end
-= (len
+ 1)) < start
)
3781 for (i
= 1; i
<= len
; i
++)
3782 end
[i
] = dtrace_load8((uintptr_t)s
++);
3787 while (daddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3788 ddi_node_state_t devi_state
;
3790 devi_state
= dtrace_load32(daddr
+
3791 offsetof(struct dev_info
, devi_node_state
));
3793 if (*flags
& CPU_DTRACE_FAULT
)
3796 if (devi_state
>= DS_INITIALIZED
) {
3797 s
= (char *)dtrace_loadptr(daddr
+
3798 offsetof(struct dev_info
, devi_addr
));
3799 len
= dtrace_strlen(s
, size
);
3801 if (*flags
& CPU_DTRACE_FAULT
)
3805 if ((end
-= (len
+ 1)) < start
)
3811 for (i
= 1; i
<= len
; i
++)
3812 end
[i
] = dtrace_load8((uintptr_t)s
++);
3816 * Now for the node name...
3818 s
= (char *)dtrace_loadptr(daddr
+
3819 offsetof(struct dev_info
, devi_node_name
));
3821 daddr
= dtrace_loadptr(daddr
+
3822 offsetof(struct dev_info
, devi_parent
));
3825 * If our parent is NULL (that is, if we're the root
3826 * node), we're going to use the special path
3832 len
= dtrace_strlen(s
, size
);
3833 if (*flags
& CPU_DTRACE_FAULT
)
3836 if ((end
-= (len
+ 1)) < start
)
3839 for (i
= 1; i
<= len
; i
++)
3840 end
[i
] = dtrace_load8((uintptr_t)s
++);
3843 if (depth
++ > dtrace_devdepth_max
) {
3844 *flags
|= CPU_DTRACE_ILLOP
;
3850 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3852 if (daddr
== NULL
) {
3853 regs
[rd
] = (uintptr_t)end
;
3854 mstate
->dtms_scratch_ptr
+= size
;
3860 case DIF_SUBR_DDI_PATHNAME
: {
3861 /* XXX awaits galactic disentanglement ;-} */
3865 #endif /* __APPLE__ */
3867 case DIF_SUBR_STRJOIN
: {
3868 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3869 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3870 uintptr_t s1
= tupregs
[0].dttk_value
;
3871 uintptr_t s2
= tupregs
[1].dttk_value
;
3874 if (mstate
->dtms_scratch_ptr
+ size
>
3875 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3876 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3883 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3888 if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') {
3896 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3901 if ((d
[i
++] = dtrace_load8(s2
++)) == '\0')
3906 mstate
->dtms_scratch_ptr
+= i
;
3907 regs
[rd
] = (uintptr_t)d
;
3913 case DIF_SUBR_LLTOSTR
: {
3914 int64_t i
= (int64_t)tupregs
[0].dttk_value
;
3915 int64_t val
= i
< 0 ? i
* -1 : i
;
3916 uint64_t size
= 22; /* enough room for 2^64 in decimal */
3917 char *end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
3919 if (mstate
->dtms_scratch_ptr
+ size
>
3920 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3921 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3926 for (*end
-- = '\0'; val
; val
/= 10)
3927 *end
-- = '0' + (val
% 10);
3935 regs
[rd
] = (uintptr_t)end
+ 1;
3936 mstate
->dtms_scratch_ptr
+= size
;
3940 case DIF_SUBR_DIRNAME
:
3941 case DIF_SUBR_BASENAME
: {
3942 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3943 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3944 uintptr_t src
= tupregs
[0].dttk_value
;
3945 int i
, j
, len
= dtrace_strlen((char *)src
, size
);
3946 int lastbase
= -1, firstbase
= -1, lastdir
= -1;
3949 if (mstate
->dtms_scratch_ptr
+ size
>
3950 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3951 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3957 * The basename and dirname for a zero-length string is
3962 src
= (uintptr_t)".";
3966 * Start from the back of the string, moving back toward the
3967 * front until we see a character that isn't a slash. That
3968 * character is the last character in the basename.
3970 for (i
= len
- 1; i
>= 0; i
--) {
3971 if (dtrace_load8(src
+ i
) != '/')
3979 * Starting from the last character in the basename, move
3980 * towards the front until we find a slash. The character
3981 * that we processed immediately before that is the first
3982 * character in the basename.
3984 for (; i
>= 0; i
--) {
3985 if (dtrace_load8(src
+ i
) == '/')
3993 * Now keep going until we find a non-slash character. That
3994 * character is the last character in the dirname.
3996 for (; i
>= 0; i
--) {
3997 if (dtrace_load8(src
+ i
) != '/')
4004 ASSERT(!(lastbase
== -1 && firstbase
!= -1));
4005 ASSERT(!(firstbase
== -1 && lastdir
!= -1));
4007 if (lastbase
== -1) {
4009 * We didn't find a non-slash character. We know that
4010 * the length is non-zero, so the whole string must be
4011 * slashes. In either the dirname or the basename
4012 * case, we return '/'.
4014 ASSERT(firstbase
== -1);
4015 firstbase
= lastbase
= lastdir
= 0;
4018 if (firstbase
== -1) {
4020 * The entire string consists only of a basename
4021 * component. If we're looking for dirname, we need
4022 * to change our string to be just "."; if we're
4023 * looking for a basename, we'll just set the first
4024 * character of the basename to be 0.
4026 if (subr
== DIF_SUBR_DIRNAME
) {
4027 ASSERT(lastdir
== -1);
4028 src
= (uintptr_t)".";
4035 if (subr
== DIF_SUBR_DIRNAME
) {
4036 if (lastdir
== -1) {
4038 * We know that we have a slash in the name --
4039 * or lastdir would be set to 0, above. And
4040 * because lastdir is -1, we know that this
4041 * slash must be the first character. (That
4042 * is, the full string must be of the form
4043 * "/basename".) In this case, the last
4044 * character of the directory name is 0.
4052 ASSERT(subr
== DIF_SUBR_BASENAME
);
4053 ASSERT(firstbase
!= -1 && lastbase
!= -1);
4058 for (i
= start
, j
= 0; i
<= end
&& j
< size
- 1; i
++, j
++)
4059 dest
[j
] = dtrace_load8(src
+ i
);
4062 regs
[rd
] = (uintptr_t)dest
;
4063 mstate
->dtms_scratch_ptr
+= size
;
4067 case DIF_SUBR_CLEANPATH
: {
4068 char *dest
= (char *)mstate
->dtms_scratch_ptr
, c
;
4069 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4070 uintptr_t src
= tupregs
[0].dttk_value
;
4073 if (mstate
->dtms_scratch_ptr
+ size
>
4074 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
4075 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4081 * Move forward, loading each character.
4084 c
= dtrace_load8(src
+ i
++);
4086 if (j
+ 5 >= size
) /* 5 = strlen("/..c\0") */
4094 c
= dtrace_load8(src
+ i
++);
4098 * We have two slashes -- we can just advance
4099 * to the next character.
4106 * This is not "." and it's not ".." -- we can
4107 * just store the "/" and this character and
4115 c
= dtrace_load8(src
+ i
++);
4119 * This is a "/./" component. We're not going
4120 * to store anything in the destination buffer;
4121 * we're just going to go to the next component.
4128 * This is not ".." -- we can just store the
4129 * "/." and this character and continue
4138 c
= dtrace_load8(src
+ i
++);
4140 if (c
!= '/' && c
!= '\0') {
4142 * This is not ".." -- it's "..[mumble]".
4143 * We'll store the "/.." and this character
4144 * and continue processing.
4154 * This is "/../" or "/..\0". We need to back up
4155 * our destination pointer until we find a "/".
4158 while (j
!= 0 && dest
[--j
] != '/')
4163 } while (c
!= '\0');
4166 regs
[rd
] = (uintptr_t)dest
;
4167 mstate
->dtms_scratch_ptr
+= size
;
4172 /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */
4173 case DIF_SUBR_CHUD
: {
4174 uint64_t selector
= tupregs
[0].dttk_value
;
4175 uint64_t args
[DIF_DTR_NREGS
-1] = {0ULL};
4178 /* copy in any variadic argument list */
4179 for(ii
= 0; ii
< DIF_DTR_NREGS
-1; ii
++) {
4180 args
[ii
] = tupregs
[ii
+1].dttk_value
;
4184 chudxnu_dtrace_callback(selector
, args
, DIF_DTR_NREGS
-1);
4185 if(KERN_SUCCESS
!= ret
) {
4191 #endif /* __APPLE__ */
4197 * Emulate the execution of DTrace IR instructions specified by the given
4198 * DIF object. This function is deliberately void of assertions as all of
4199 * the necessary checks are handled by a call to dtrace_difo_validate().
4202 dtrace_dif_emulate(dtrace_difo_t
*difo
, dtrace_mstate_t
*mstate
,
4203 dtrace_vstate_t
*vstate
, dtrace_state_t
*state
)
4205 const dif_instr_t
*text
= difo
->dtdo_buf
;
4206 const uint_t textlen
= difo
->dtdo_len
;
4207 const char *strtab
= difo
->dtdo_strtab
;
4208 const uint64_t *inttab
= difo
->dtdo_inttab
;
4211 dtrace_statvar_t
*svar
;
4212 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
4214 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
4215 #if !defined(__APPLE__)
4216 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4218 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4219 #endif /* __APPLE__ */
4221 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
4222 uint64_t regs
[DIF_DIR_NREGS
];
4225 uint8_t cc_n
= 0, cc_z
= 0, cc_v
= 0, cc_c
= 0;
4227 uint_t pc
= 0, id
, opc
;
4232 regs
[DIF_REG_R0
] = 0; /* %r0 is fixed at zero */
4234 while (pc
< textlen
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4238 r1
= DIF_INSTR_R1(instr
);
4239 r2
= DIF_INSTR_R2(instr
);
4240 rd
= DIF_INSTR_RD(instr
);
4242 switch (DIF_INSTR_OP(instr
)) {
4244 regs
[rd
] = regs
[r1
] | regs
[r2
];
4247 regs
[rd
] = regs
[r1
] ^ regs
[r2
];
4250 regs
[rd
] = regs
[r1
] & regs
[r2
];
4253 regs
[rd
] = regs
[r1
] << regs
[r2
];
4256 regs
[rd
] = regs
[r1
] >> regs
[r2
];
4259 regs
[rd
] = regs
[r1
] - regs
[r2
];
4262 regs
[rd
] = regs
[r1
] + regs
[r2
];
4265 regs
[rd
] = regs
[r1
] * regs
[r2
];
4268 if (regs
[r2
] == 0) {
4270 *flags
|= CPU_DTRACE_DIVZERO
;
4272 regs
[rd
] = (int64_t)regs
[r1
] /
4278 if (regs
[r2
] == 0) {
4280 *flags
|= CPU_DTRACE_DIVZERO
;
4282 regs
[rd
] = regs
[r1
] / regs
[r2
];
4287 if (regs
[r2
] == 0) {
4289 *flags
|= CPU_DTRACE_DIVZERO
;
4291 regs
[rd
] = (int64_t)regs
[r1
] %
4297 if (regs
[r2
] == 0) {
4299 *flags
|= CPU_DTRACE_DIVZERO
;
4301 regs
[rd
] = regs
[r1
] % regs
[r2
];
4306 regs
[rd
] = ~regs
[r1
];
4309 regs
[rd
] = regs
[r1
];
4312 cc_r
= regs
[r1
] - regs
[r2
];
4316 cc_c
= regs
[r1
] < regs
[r2
];
4319 cc_n
= cc_v
= cc_c
= 0;
4320 cc_z
= regs
[r1
] == 0;
4323 pc
= DIF_INSTR_LABEL(instr
);
4327 pc
= DIF_INSTR_LABEL(instr
);
4331 pc
= DIF_INSTR_LABEL(instr
);
4334 if ((cc_z
| (cc_n
^ cc_v
)) == 0)
4335 pc
= DIF_INSTR_LABEL(instr
);
4338 if ((cc_c
| cc_z
) == 0)
4339 pc
= DIF_INSTR_LABEL(instr
);
4342 if ((cc_n
^ cc_v
) == 0)
4343 pc
= DIF_INSTR_LABEL(instr
);
4347 pc
= DIF_INSTR_LABEL(instr
);
4351 pc
= DIF_INSTR_LABEL(instr
);
4355 pc
= DIF_INSTR_LABEL(instr
);
4358 if (cc_z
| (cc_n
^ cc_v
))
4359 pc
= DIF_INSTR_LABEL(instr
);
4363 pc
= DIF_INSTR_LABEL(instr
);
4366 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4367 *flags
|= CPU_DTRACE_KPRIV
;
4373 regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]);
4376 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4377 *flags
|= CPU_DTRACE_KPRIV
;
4383 regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]);
4386 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4387 *flags
|= CPU_DTRACE_KPRIV
;
4393 regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]);
4396 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4397 *flags
|= CPU_DTRACE_KPRIV
;
4403 regs
[rd
] = dtrace_load8(regs
[r1
]);
4406 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4407 *flags
|= CPU_DTRACE_KPRIV
;
4413 regs
[rd
] = dtrace_load16(regs
[r1
]);
4416 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4417 *flags
|= CPU_DTRACE_KPRIV
;
4423 regs
[rd
] = dtrace_load32(regs
[r1
]);
4426 if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) {
4427 *flags
|= CPU_DTRACE_KPRIV
;
4433 regs
[rd
] = dtrace_load64(regs
[r1
]);
4437 dtrace_fuword8(regs
[r1
]);
4440 regs
[rd
] = (int16_t)
4441 dtrace_fuword16(regs
[r1
]);
4444 regs
[rd
] = (int32_t)
4445 dtrace_fuword32(regs
[r1
]);
4449 dtrace_fuword8(regs
[r1
]);
4453 dtrace_fuword16(regs
[r1
]);
4457 dtrace_fuword32(regs
[r1
]);
4461 dtrace_fuword64(regs
[r1
]);
4469 regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)];
4472 regs
[rd
] = (uint64_t)(uintptr_t)
4473 (strtab
+ DIF_INSTR_STRING(instr
));
4476 cc_r
= dtrace_strncmp((char *)(uintptr_t)regs
[r1
],
4477 (char *)(uintptr_t)regs
[r2
],
4478 state
->dts_options
[DTRACEOPT_STRSIZE
]);
4485 regs
[rd
] = dtrace_dif_variable(mstate
, state
,
4489 id
= DIF_INSTR_VAR(instr
);
4491 if (id
>= DIF_VAR_OTHER_UBASE
) {
4494 id
-= DIF_VAR_OTHER_UBASE
;
4495 svar
= vstate
->dtvs_globals
[id
];
4496 ASSERT(svar
!= NULL
);
4497 v
= &svar
->dtsv_var
;
4499 if (!(v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)) {
4500 regs
[rd
] = svar
->dtsv_data
;
4504 a
= (uintptr_t)svar
->dtsv_data
;
4506 if (*(uint8_t *)a
== UINT8_MAX
) {
4508 * If the 0th byte is set to UINT8_MAX
4509 * then this is to be treated as a
4510 * reference to a NULL variable.
4514 regs
[rd
] = a
+ sizeof (uint64_t);
4520 regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0);
4524 id
= DIF_INSTR_VAR(instr
);
4526 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4527 id
-= DIF_VAR_OTHER_UBASE
;
4529 svar
= vstate
->dtvs_globals
[id
];
4530 ASSERT(svar
!= NULL
);
4531 v
= &svar
->dtsv_var
;
4533 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4534 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4537 ASSERT(svar
->dtsv_size
!= 0);
4539 if (regs
[rd
] == NULL
) {
4540 *(uint8_t *)a
= UINT8_MAX
;
4544 a
+= sizeof (uint64_t);
4547 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4548 (void *)a
, &v
->dtdv_type
);
4552 svar
->dtsv_data
= regs
[rd
];
4557 * There are no DTrace built-in thread-local arrays at
4558 * present. This opcode is saved for future work.
4560 *flags
|= CPU_DTRACE_ILLOP
;
4565 id
= DIF_INSTR_VAR(instr
);
4567 if (id
< DIF_VAR_OTHER_UBASE
) {
4569 * For now, this has no meaning.
4575 id
-= DIF_VAR_OTHER_UBASE
;
4577 ASSERT(id
< vstate
->dtvs_nlocals
);
4578 ASSERT(vstate
->dtvs_locals
!= NULL
);
4580 svar
= vstate
->dtvs_locals
[id
];
4581 ASSERT(svar
!= NULL
);
4582 v
= &svar
->dtsv_var
;
4584 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4585 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4586 size_t sz
= v
->dtdv_type
.dtdt_size
;
4588 sz
+= sizeof (uint64_t);
4589 ASSERT(svar
->dtsv_size
== (int)NCPU
* sz
);
4590 a
+= CPU
->cpu_id
* sz
;
4592 if (*(uint8_t *)a
== UINT8_MAX
) {
4594 * If the 0th byte is set to UINT8_MAX
4595 * then this is to be treated as a
4596 * reference to a NULL variable.
4600 regs
[rd
] = a
+ sizeof (uint64_t);
4606 ASSERT(svar
->dtsv_size
== (int)NCPU
* sizeof (uint64_t));
4607 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
4608 regs
[rd
] = tmp
[CPU
->cpu_id
];
4612 id
= DIF_INSTR_VAR(instr
);
4614 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4615 id
-= DIF_VAR_OTHER_UBASE
;
4616 ASSERT(id
< vstate
->dtvs_nlocals
);
4618 ASSERT(vstate
->dtvs_locals
!= NULL
);
4619 svar
= vstate
->dtvs_locals
[id
];
4620 ASSERT(svar
!= NULL
);
4621 v
= &svar
->dtsv_var
;
4623 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4624 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4625 size_t sz
= v
->dtdv_type
.dtdt_size
;
4627 sz
+= sizeof (uint64_t);
4628 ASSERT(svar
->dtsv_size
== (int)NCPU
* sz
);
4629 a
+= CPU
->cpu_id
* sz
;
4631 if (regs
[rd
] == NULL
) {
4632 *(uint8_t *)a
= UINT8_MAX
;
4636 a
+= sizeof (uint64_t);
4639 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4640 (void *)a
, &v
->dtdv_type
);
4644 ASSERT(svar
->dtsv_size
== (int)NCPU
* sizeof (uint64_t));
4645 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
4646 tmp
[CPU
->cpu_id
] = regs
[rd
];
4650 dtrace_dynvar_t
*dvar
;
4653 id
= DIF_INSTR_VAR(instr
);
4654 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4655 id
-= DIF_VAR_OTHER_UBASE
;
4656 v
= &vstate
->dtvs_tlocals
[id
];
4658 key
= &tupregs
[DIF_DTR_NREGS
];
4659 key
[0].dttk_value
= (uint64_t)id
;
4660 key
[0].dttk_size
= 0;
4661 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
4662 key
[1].dttk_size
= 0;
4664 dvar
= dtrace_dynvar(dstate
, 2, key
,
4665 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
);
4672 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4673 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
4675 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
4682 dtrace_dynvar_t
*dvar
;
4685 id
= DIF_INSTR_VAR(instr
);
4686 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4687 id
-= DIF_VAR_OTHER_UBASE
;
4689 key
= &tupregs
[DIF_DTR_NREGS
];
4690 key
[0].dttk_value
= (uint64_t)id
;
4691 key
[0].dttk_size
= 0;
4692 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
4693 key
[1].dttk_size
= 0;
4694 v
= &vstate
->dtvs_tlocals
[id
];
4696 dvar
= dtrace_dynvar(dstate
, 2, key
,
4697 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4698 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4699 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
4700 DTRACE_DYNVAR_DEALLOC
);
4703 * Given that we're storing to thread-local data,
4704 * we need to flush our predicate cache.
4706 #if !defined(__APPLE__)
4707 curthread
->t_predcache
= NULL
;
4709 dtrace_set_thread_predcache(current_thread(), 0);
4710 #endif /* __APPLE__ */
4716 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4717 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4718 dvar
->dtdv_data
, &v
->dtdv_type
);
4720 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
4727 regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
];
4731 dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
,
4732 regs
, tupregs
, ttop
, mstate
, state
);
4736 if (ttop
== DIF_DTR_NREGS
) {
4737 *flags
|= CPU_DTRACE_TUPOFLOW
;
4741 if (r1
== DIF_TYPE_STRING
) {
4743 * If this is a string type and the size is 0,
4744 * we'll use the system-wide default string
4745 * size. Note that we are _not_ looking at
4746 * the value of the DTRACEOPT_STRSIZE option;
4747 * had this been set, we would expect to have
4748 * a non-zero size value in the "pushtr".
4750 tupregs
[ttop
].dttk_size
=
4751 dtrace_strlen((char *)(uintptr_t)regs
[rd
],
4752 regs
[r2
] ? regs
[r2
] :
4753 dtrace_strsize_default
) + 1;
4755 tupregs
[ttop
].dttk_size
= regs
[r2
];
4758 tupregs
[ttop
++].dttk_value
= regs
[rd
];
4762 if (ttop
== DIF_DTR_NREGS
) {
4763 *flags
|= CPU_DTRACE_TUPOFLOW
;
4767 tupregs
[ttop
].dttk_value
= regs
[rd
];
4768 tupregs
[ttop
++].dttk_size
= 0;
4776 case DIF_OP_FLUSHTS
:
4781 case DIF_OP_LDTAA
: {
4782 dtrace_dynvar_t
*dvar
;
4783 dtrace_key_t
*key
= tupregs
;
4784 uint_t nkeys
= ttop
;
4786 id
= DIF_INSTR_VAR(instr
);
4787 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4788 id
-= DIF_VAR_OTHER_UBASE
;
4790 key
[nkeys
].dttk_value
= (uint64_t)id
;
4791 key
[nkeys
++].dttk_size
= 0;
4793 if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) {
4794 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
4795 key
[nkeys
++].dttk_size
= 0;
4796 v
= &vstate
->dtvs_tlocals
[id
];
4798 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
4801 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
4802 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4803 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4804 DTRACE_DYNVAR_NOALLOC
);
4811 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4812 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
4814 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
4821 case DIF_OP_STTAA
: {
4822 dtrace_dynvar_t
*dvar
;
4823 dtrace_key_t
*key
= tupregs
;
4824 uint_t nkeys
= ttop
;
4826 id
= DIF_INSTR_VAR(instr
);
4827 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4828 id
-= DIF_VAR_OTHER_UBASE
;
4830 key
[nkeys
].dttk_value
= (uint64_t)id
;
4831 key
[nkeys
++].dttk_size
= 0;
4833 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) {
4834 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
4835 key
[nkeys
++].dttk_size
= 0;
4836 v
= &vstate
->dtvs_tlocals
[id
];
4838 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
4841 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
4842 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4843 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4844 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
4845 DTRACE_DYNVAR_DEALLOC
);
4850 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4851 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4852 dvar
->dtdv_data
, &v
->dtdv_type
);
4854 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
4860 case DIF_OP_ALLOCS
: {
4861 uintptr_t ptr
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
4862 size_t size
= ptr
- mstate
->dtms_scratch_ptr
+ regs
[r1
];
4864 if (mstate
->dtms_scratch_ptr
+ size
>
4865 mstate
->dtms_scratch_base
+
4866 mstate
->dtms_scratch_size
) {
4867 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4870 dtrace_bzero((void *)
4871 mstate
->dtms_scratch_ptr
, size
);
4872 mstate
->dtms_scratch_ptr
+= size
;
4879 if (!dtrace_canstore(regs
[rd
], regs
[r2
],
4881 *flags
|= CPU_DTRACE_BADADDR
;
4886 dtrace_bcopy((void *)(uintptr_t)regs
[r1
],
4887 (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]);
4891 if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) {
4892 *flags
|= CPU_DTRACE_BADADDR
;
4896 *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
];
4900 if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) {
4901 *flags
|= CPU_DTRACE_BADADDR
;
4906 *flags
|= CPU_DTRACE_BADALIGN
;
4910 *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
];
4914 if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) {
4915 *flags
|= CPU_DTRACE_BADADDR
;
4920 *flags
|= CPU_DTRACE_BADALIGN
;
4924 *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
];
4928 if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) {
4929 *flags
|= CPU_DTRACE_BADADDR
;
4933 #if !defined(__APPLE__)
4936 if (regs
[rd
] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */
4937 #endif /* __APPLE__ */
4938 *flags
|= CPU_DTRACE_BADALIGN
;
4942 *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
];
4947 if (!(*flags
& CPU_DTRACE_FAULT
))
4950 mstate
->dtms_fltoffs
= opc
* sizeof (dif_instr_t
);
4951 mstate
->dtms_present
|= DTRACE_MSTATE_FLTOFFS
;
4957 dtrace_action_breakpoint(dtrace_ecb_t
*ecb
)
4959 dtrace_probe_t
*probe
= ecb
->dte_probe
;
4960 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
4961 char c
[DTRACE_FULLNAMELEN
+ 80], *str
;
4962 char *msg
= "dtrace: breakpoint action at probe ";
4963 char *ecbmsg
= " (ecb ";
4964 uintptr_t mask
= (0xf << (sizeof (uintptr_t) * NBBY
/ 4));
4965 uintptr_t val
= (uintptr_t)ecb
;
4966 int shift
= (sizeof (uintptr_t) * NBBY
) - 4, i
= 0;
4968 if (dtrace_destructive_disallow
)
4972 * It's impossible to be taking action on the NULL probe.
4974 ASSERT(probe
!= NULL
);
4977 * This is a poor man's (destitute man's?) sprintf(): we want to
4978 * print the provider name, module name, function name and name of
4979 * the probe, along with the hex address of the ECB with the breakpoint
4980 * action -- all of which we must place in the character buffer by
4983 while (*msg
!= '\0')
4986 for (str
= prov
->dtpv_name
; *str
!= '\0'; str
++)
4990 for (str
= probe
->dtpr_mod
; *str
!= '\0'; str
++)
4994 for (str
= probe
->dtpr_func
; *str
!= '\0'; str
++)
4998 for (str
= probe
->dtpr_name
; *str
!= '\0'; str
++)
5001 while (*ecbmsg
!= '\0')
5004 while (shift
>= 0) {
5005 mask
= (uintptr_t)0xf << shift
;
5007 if (val
>= ((uintptr_t)1 << shift
))
5008 c
[i
++] = "0123456789abcdef"[(val
& mask
) >> shift
];
5019 dtrace_action_panic(dtrace_ecb_t
*ecb
)
5021 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5024 * It's impossible to be taking action on the NULL probe.
5026 ASSERT(probe
!= NULL
);
5028 if (dtrace_destructive_disallow
)
5031 if (dtrace_panicked
!= NULL
)
5034 #if !defined(__APPLE__)
5035 if (dtrace_casptr(&dtrace_panicked
, NULL
, curthread
) != NULL
)
5038 if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
)
5040 #endif /* __APPLE__ */
5043 * We won the right to panic. (We want to be sure that only one
5044 * thread calls panic() from dtrace_probe(), and that panic() is
5045 * called exactly once.)
5047 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5048 probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
,
5049 probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
);
5051 #if defined(__APPLE__)
5052 /* Mac OS X debug feature -- can return from panic() */
5053 dtrace_panicked
= NULL
;
5054 #endif /* __APPLE__ */
5058 dtrace_action_raise(uint64_t sig
)
5060 if (dtrace_destructive_disallow
)
5064 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5068 #if !defined(__APPLE__)
5070 * raise() has a queue depth of 1 -- we ignore all subsequent
5071 * invocations of the raise() action.
5073 if (curthread
->t_dtrace_sig
== 0)
5074 curthread
->t_dtrace_sig
= (uint8_t)sig
;
5076 curthread
->t_sig_check
= 1;
5079 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
5081 if (uthread
&& uthread
->t_dtrace_sig
== 0) {
5082 uthread
->t_dtrace_sig
= sig
;
5083 psignal(current_proc(), (int)sig
);
5085 #endif /* __APPLE__ */
5089 dtrace_action_stop(void)
5091 if (dtrace_destructive_disallow
)
5094 #if !defined(__APPLE__)
5095 if (!curthread
->t_dtrace_stop
) {
5096 curthread
->t_dtrace_stop
= 1;
5097 curthread
->t_sig_check
= 1;
5101 psignal(current_proc(), SIGSTOP
);
5102 #endif /* __APPLE__ */
5106 dtrace_action_chill(dtrace_mstate_t
*mstate
, hrtime_t val
)
5109 volatile uint16_t *flags
;
5112 if (dtrace_destructive_disallow
)
5115 flags
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
;
5117 now
= dtrace_gethrtime();
5119 if (now
- cpu
->cpu_dtrace_chillmark
> dtrace_chill_interval
) {
5121 * We need to advance the mark to the current time.
5123 cpu
->cpu_dtrace_chillmark
= now
;
5124 cpu
->cpu_dtrace_chilled
= 0;
5128 * Now check to see if the requested chill time would take us over
5129 * the maximum amount of time allowed in the chill interval. (Or
5130 * worse, if the calculation itself induces overflow.)
5132 if (cpu
->cpu_dtrace_chilled
+ val
> dtrace_chill_max
||
5133 cpu
->cpu_dtrace_chilled
+ val
< cpu
->cpu_dtrace_chilled
) {
5134 *flags
|= CPU_DTRACE_ILLOP
;
5138 while (dtrace_gethrtime() - now
< val
)
5142 * Normally, we assure that the value of the variable "timestamp" does
5143 * not change within an ECB. The presence of chill() represents an
5144 * exception to this rule, however.
5146 mstate
->dtms_present
&= ~DTRACE_MSTATE_TIMESTAMP
;
5147 cpu
->cpu_dtrace_chilled
+= val
;
5151 dtrace_action_ustack(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
,
5152 uint64_t *buf
, uint64_t arg
)
5154 int nframes
= DTRACE_USTACK_NFRAMES(arg
);
5155 int strsize
= DTRACE_USTACK_STRSIZE(arg
);
5156 uint64_t *pcs
= &buf
[1], *fps
;
5157 char *str
= (char *)&pcs
[nframes
];
5158 int size
, offs
= 0, i
, j
;
5159 uintptr_t old
= mstate
->dtms_scratch_ptr
, saved
;
5160 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
5164 * Should be taking a faster path if string space has not been
5167 ASSERT(strsize
!= 0);
5170 * We will first allocate some temporary space for the frame pointers.
5172 fps
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5173 size
= (uintptr_t)fps
- mstate
->dtms_scratch_ptr
+
5174 (nframes
* sizeof (uint64_t));
5176 if (mstate
->dtms_scratch_ptr
+ size
>
5177 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
5179 * Not enough room for our frame pointers -- need to indicate
5180 * that we ran out of scratch space.
5182 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5186 mstate
->dtms_scratch_ptr
+= size
;
5187 saved
= mstate
->dtms_scratch_ptr
;
5190 * Now get a stack with both program counters and frame pointers.
5192 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5193 dtrace_getufpstack(buf
, fps
, nframes
+ 1);
5194 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5197 * If that faulted, we're cooked.
5199 if (*flags
& CPU_DTRACE_FAULT
)
5203 * Now we want to walk up the stack, calling the USTACK helper. For
5204 * each iteration, we restore the scratch pointer.
5206 for (i
= 0; i
< nframes
; i
++) {
5207 mstate
->dtms_scratch_ptr
= saved
;
5209 if (offs
>= strsize
)
5212 sym
= (char *)(uintptr_t)dtrace_helper(
5213 DTRACE_HELPER_ACTION_USTACK
,
5214 mstate
, state
, pcs
[i
], fps
[i
]);
5217 * If we faulted while running the helper, we're going to
5218 * clear the fault and null out the corresponding string.
5220 if (*flags
& CPU_DTRACE_FAULT
) {
5221 *flags
&= ~CPU_DTRACE_FAULT
;
5231 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5234 * Now copy in the string that the helper returned to us.
5236 for (j
= 0; offs
+ j
< strsize
; j
++) {
5237 if ((str
[offs
+ j
] = sym
[j
]) == '\0')
5241 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5246 if (offs
>= strsize
) {
5248 * If we didn't have room for all of the strings, we don't
5249 * abort processing -- this needn't be a fatal error -- but we
5250 * still want to increment a counter (dts_stkstroverflows) to
5251 * allow this condition to be warned about. (If this is from
5252 * a jstack() action, it is easily tuned via jstackstrsize.)
5254 dtrace_error(&state
->dts_stkstroverflows
);
5257 while (offs
< strsize
)
5261 mstate
->dtms_scratch_ptr
= old
;
5265 * If you're looking for the epicenter of DTrace, you just found it. This
5266 * is the function called by the provider to fire a probe -- from which all
5267 * subsequent probe-context DTrace activity emanates.
5269 #if !defined(__APPLE__)
5271 dtrace_probe(dtrace_id_t id
, uintptr_t arg0
, uintptr_t arg1
,
5272 uintptr_t arg2
, uintptr_t arg3
, uintptr_t arg4
)
5275 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5276 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5277 #endif /* __APPLE__ */
5279 processorid_t cpuid
;
5280 dtrace_icookie_t cookie
;
5281 dtrace_probe_t
*probe
;
5282 dtrace_mstate_t mstate
;
5284 dtrace_action_t
*act
;
5288 volatile uint16_t *flags
;
5291 #if !defined(__APPLE__)
5293 * Kick out immediately if this CPU is still being born (in which case
5294 * curthread will be set to -1)
5296 if ((uintptr_t)curthread
& 1)
5299 #endif /* __APPLE__ */
5301 cookie
= dtrace_interrupt_disable();
5302 probe
= dtrace_probes
[id
- 1];
5303 cpuid
= CPU
->cpu_id
;
5304 onintr
= CPU_ON_INTR(CPU
);
5306 #if !defined(__APPLE__)
5307 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5308 probe
->dtpr_predcache
== curthread
->t_predcache
) {
5310 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5311 probe
->dtpr_predcache
== dtrace_get_thread_predcache(current_thread())) {
5312 #endif /* __APPLE__ */
5314 * We have hit in the predicate cache; we know that
5315 * this predicate would evaluate to be false.
5317 dtrace_interrupt_enable(cookie
);
5321 if (panic_quiesce
) {
5323 * We don't trace anything if we're panicking.
5325 dtrace_interrupt_enable(cookie
);
5329 #if !defined(__APPLE__)
5330 now
= dtrace_gethrtime();
5331 vtime
= dtrace_vtime_references
!= 0;
5333 if (vtime
&& curthread
->t_dtrace_start
)
5334 curthread
->t_dtrace_vtime
+= now
- curthread
->t_dtrace_start
;
5336 vtime
= dtrace_vtime_references
!= 0;
5340 int64_t dtrace_accum_time
, recent_vtime
;
5341 thread_t thread
= current_thread();
5343 dtrace_accum_time
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */
5345 if (dtrace_accum_time
>= 0) {
5346 recent_vtime
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */
5348 recent_vtime
= recent_vtime
- dtrace_accum_time
; /* Time without DTrace contribution */
5350 dtrace_set_thread_vtime(thread
, recent_vtime
);
5354 now
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
5355 #endif /* __APPLE__ */
5357 #if defined(__APPLE__)
5359 * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances.
5360 * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes
5361 * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here
5362 * and cons up a viable state (from the probe_id).
5364 if (dtrace_probeid_error
== id
&& NULL
== arg0
) {
5365 dtrace_id_t ftp_id
= (dtrace_id_t
)arg1
;
5366 dtrace_probe_t
*ftp_probe
= dtrace_probes
[ftp_id
- 1];
5367 dtrace_ecb_t
*ftp_ecb
= ftp_probe
->dtpr_ecb
;
5369 if (NULL
!= ftp_ecb
) {
5370 dtrace_state_t
*ftp_state
= ftp_ecb
->dte_state
;
5372 arg0
= (uint64_t)(uintptr_t)ftp_state
;
5373 arg1
= ftp_ecb
->dte_epid
;
5375 * args[2-4] established by caller.
5377 ftp_state
->dts_arg_error_illval
= -1; /* arg5 */
5380 #endif /* __APPLE__ */
5382 mstate
.dtms_probe
= probe
;
5383 mstate
.dtms_arg
[0] = arg0
;
5384 mstate
.dtms_arg
[1] = arg1
;
5385 mstate
.dtms_arg
[2] = arg2
;
5386 mstate
.dtms_arg
[3] = arg3
;
5387 mstate
.dtms_arg
[4] = arg4
;
5389 flags
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
;
5391 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
5392 dtrace_predicate_t
*pred
= ecb
->dte_predicate
;
5393 dtrace_state_t
*state
= ecb
->dte_state
;
5394 dtrace_buffer_t
*buf
= &state
->dts_buffer
[cpuid
];
5395 dtrace_buffer_t
*aggbuf
= &state
->dts_aggbuffer
[cpuid
];
5396 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
5397 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5402 * A little subtlety with the following (seemingly innocuous)
5403 * declaration of the automatic 'val': by looking at the
5404 * code, you might think that it could be declared in the
5405 * action processing loop, below. (That is, it's only used in
5406 * the action processing loop.) However, it must be declared
5407 * out of that scope because in the case of DIF expression
5408 * arguments to aggregating actions, one iteration of the
5409 * action loop will use the last iteration's value.
5417 mstate
.dtms_present
= DTRACE_MSTATE_ARGS
| DTRACE_MSTATE_PROBE
;
5418 *flags
&= ~CPU_DTRACE_ERROR
;
5420 if (prov
== dtrace_provider
) {
5422 * If dtrace itself is the provider of this probe,
5423 * we're only going to continue processing the ECB if
5424 * arg0 (the dtrace_state_t) is equal to the ECB's
5425 * creating state. (This prevents disjoint consumers
5426 * from seeing one another's metaprobes.)
5428 if (arg0
!= (uint64_t)(uintptr_t)state
)
5432 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
) {
5434 * We're not currently active. If our provider isn't
5435 * the dtrace pseudo provider, we're not interested.
5437 if (prov
!= dtrace_provider
)
5441 * Now we must further check if we are in the BEGIN
5442 * probe. If we are, we will only continue processing
5443 * if we're still in WARMUP -- if one BEGIN enabling
5444 * has invoked the exit() action, we don't want to
5445 * evaluate subsequent BEGIN enablings.
5447 if (probe
->dtpr_id
== dtrace_probeid_begin
&&
5448 state
->dts_activity
!= DTRACE_ACTIVITY_WARMUP
) {
5449 ASSERT(state
->dts_activity
==
5450 DTRACE_ACTIVITY_DRAINING
);
5455 if (ecb
->dte_cond
) {
5457 * If the dte_cond bits indicate that this
5458 * consumer is only allowed to see user-mode firings
5459 * of this probe, call the provider's dtps_usermode()
5460 * entry point to check that the probe was fired
5461 * while in a user context. Skip this ECB if that's
5464 if ((ecb
->dte_cond
& DTRACE_COND_USERMODE
) &&
5465 prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
,
5466 probe
->dtpr_id
, probe
->dtpr_arg
) == 0)
5470 * This is more subtle than it looks. We have to be
5471 * absolutely certain that CRED() isn't going to
5472 * change out from under us so it's only legit to
5473 * examine that structure if we're in constrained
5474 * situations. Currently, the only times we'll this
5475 * check is if a non-super-user has enabled the
5476 * profile or syscall providers -- providers that
5477 * allow visibility of all processes. For the
5478 * profile case, the check above will ensure that
5479 * we're examining a user context.
5481 if (ecb
->dte_cond
& DTRACE_COND_OWNER
) {
5484 ecb
->dte_state
->dts_cred
.dcr_cred
;
5487 ASSERT(s_cr
!= NULL
);
5489 #if !defined(__APPLE__)
5490 if ((cr
= CRED()) == NULL
||
5492 if ((cr
= dtrace_CRED()) == NULL
||
5493 #endif /* __APPLE__ */
5494 s_cr
->cr_uid
!= cr
->cr_uid
||
5495 s_cr
->cr_uid
!= cr
->cr_ruid
||
5496 s_cr
->cr_uid
!= cr
->cr_suid
||
5497 s_cr
->cr_gid
!= cr
->cr_gid
||
5498 s_cr
->cr_gid
!= cr
->cr_rgid
||
5499 s_cr
->cr_gid
!= cr
->cr_sgid
||
5500 #if !defined(__APPLE__)
5501 (proc
= ttoproc(curthread
)) == NULL
||
5502 (proc
->p_flag
& SNOCD
))
5504 1) /* Darwin omits "No Core Dump" flag. */
5505 #endif /* __APPLE__ */
5509 if (ecb
->dte_cond
& DTRACE_COND_ZONEOWNER
) {
5512 ecb
->dte_state
->dts_cred
.dcr_cred
;
5514 ASSERT(s_cr
!= NULL
);
5516 #if !defined(__APPLE__) /* Darwin doesn't do zones. */
5517 if ((cr
= CRED()) == NULL
||
5518 s_cr
->cr_zone
->zone_id
!=
5519 cr
->cr_zone
->zone_id
)
5521 #endif /* __APPLE__ */
5525 if (now
- state
->dts_alive
> dtrace_deadman_timeout
) {
5527 * We seem to be dead. Unless we (a) have kernel
5528 * destructive permissions (b) have expicitly enabled
5529 * destructive actions and (c) destructive actions have
5530 * not been disabled, we're going to transition into
5531 * the KILLED state, from which no further processing
5532 * on this state will be performed.
5534 if (!dtrace_priv_kernel_destructive(state
) ||
5535 !state
->dts_cred
.dcr_destructive
||
5536 dtrace_destructive_disallow
) {
5537 void *activity
= &state
->dts_activity
;
5538 dtrace_activity_t current
;
5541 current
= state
->dts_activity
;
5542 } while (dtrace_cas32(activity
, current
,
5543 DTRACE_ACTIVITY_KILLED
) != current
);
5549 if ((offs
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
,
5550 ecb
->dte_alignment
, state
, &mstate
)) < 0)
5553 tomax
= buf
->dtb_tomax
;
5554 ASSERT(tomax
!= NULL
);
5556 if (ecb
->dte_size
!= 0)
5557 DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
);
5559 mstate
.dtms_epid
= ecb
->dte_epid
;
5560 mstate
.dtms_present
|= DTRACE_MSTATE_EPID
;
5563 dtrace_difo_t
*dp
= pred
->dtp_difo
;
5566 rval
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5568 if (!(*flags
& CPU_DTRACE_ERROR
) && !rval
) {
5569 dtrace_cacheid_t cid
= probe
->dtpr_predcache
;
5571 if (cid
!= DTRACE_CACHEIDNONE
&& !onintr
) {
5573 * Update the predicate cache...
5575 ASSERT(cid
== pred
->dtp_cacheid
);
5576 #if !defined(__APPLE__)
5577 curthread
->t_predcache
= cid
;
5579 dtrace_set_thread_predcache(current_thread(), cid
);
5580 #endif /* __APPLE__ */
5587 for (act
= ecb
->dte_action
; !(*flags
& CPU_DTRACE_ERROR
) &&
5588 act
!= NULL
; act
= act
->dta_next
) {
5591 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
5593 size
= rec
->dtrd_size
;
5594 valoffs
= offs
+ rec
->dtrd_offset
;
5596 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
5598 dtrace_aggregation_t
*agg
;
5600 agg
= (dtrace_aggregation_t
*)act
;
5602 if ((dp
= act
->dta_difo
) != NULL
)
5603 v
= dtrace_dif_emulate(dp
,
5604 &mstate
, vstate
, state
);
5606 if (*flags
& CPU_DTRACE_ERROR
)
5610 * Note that we always pass the expression
5611 * value from the previous iteration of the
5612 * action loop. This value will only be used
5613 * if there is an expression argument to the
5614 * aggregating action, denoted by the
5615 * dtag_hasarg field.
5617 dtrace_aggregate(agg
, buf
,
5618 offs
, aggbuf
, v
, val
);
5622 switch (act
->dta_kind
) {
5623 case DTRACEACT_STOP
:
5624 if (dtrace_priv_proc_destructive(state
))
5625 dtrace_action_stop();
5628 case DTRACEACT_BREAKPOINT
:
5629 if (dtrace_priv_kernel_destructive(state
))
5630 dtrace_action_breakpoint(ecb
);
5633 case DTRACEACT_PANIC
:
5634 if (dtrace_priv_kernel_destructive(state
))
5635 dtrace_action_panic(ecb
);
5638 case DTRACEACT_STACK
:
5639 if (!dtrace_priv_kernel(state
))
5642 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
5643 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
5644 DTRACE_ANCHORED(probe
) ? NULL
:
5649 case DTRACEACT_JSTACK
:
5650 case DTRACEACT_USTACK
:
5651 if (!dtrace_priv_proc(state
))
5655 * See comment in DIF_VAR_PID.
5657 if (DTRACE_ANCHORED(mstate
.dtms_probe
) &&
5659 int depth
= DTRACE_USTACK_NFRAMES(
5662 dtrace_bzero((void *)(tomax
+ valoffs
),
5663 DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
)
5664 + depth
* sizeof (uint64_t));
5669 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 &&
5670 curproc
->p_dtrace_helpers
!= NULL
) {
5672 * This is the slow path -- we have
5673 * allocated string space, and we're
5674 * getting the stack of a process that
5675 * has helpers. Call into a separate
5676 * routine to perform this processing.
5678 dtrace_action_ustack(&mstate
, state
,
5679 (uint64_t *)(tomax
+ valoffs
),
5684 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5685 dtrace_getupcstack((uint64_t *)
5687 DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1);
5688 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5698 val
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5700 if (*flags
& CPU_DTRACE_ERROR
)
5703 switch (act
->dta_kind
) {
5704 case DTRACEACT_SPECULATE
:
5705 ASSERT(buf
== &state
->dts_buffer
[cpuid
]);
5706 buf
= dtrace_speculation_buffer(state
,
5710 *flags
|= CPU_DTRACE_DROP
;
5714 offs
= dtrace_buffer_reserve(buf
,
5715 ecb
->dte_needed
, ecb
->dte_alignment
,
5719 *flags
|= CPU_DTRACE_DROP
;
5723 tomax
= buf
->dtb_tomax
;
5724 ASSERT(tomax
!= NULL
);
5726 if (ecb
->dte_size
!= 0)
5727 DTRACE_STORE(uint32_t, tomax
, offs
,
5731 case DTRACEACT_CHILL
:
5732 if (dtrace_priv_kernel_destructive(state
))
5733 dtrace_action_chill(&mstate
, val
);
5736 case DTRACEACT_RAISE
:
5737 if (dtrace_priv_proc_destructive(state
))
5738 dtrace_action_raise(val
);
5741 case DTRACEACT_COMMIT
:
5745 * We need to commit our buffer state.
5748 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
5749 buf
= &state
->dts_buffer
[cpuid
];
5750 dtrace_speculation_commit(state
, cpuid
, val
);
5754 case DTRACEACT_DISCARD
:
5755 dtrace_speculation_discard(state
, cpuid
, val
);
5758 case DTRACEACT_DIFEXPR
:
5759 case DTRACEACT_LIBACT
:
5760 case DTRACEACT_PRINTF
:
5761 case DTRACEACT_PRINTA
:
5762 case DTRACEACT_SYSTEM
:
5763 case DTRACEACT_FREOPEN
:
5768 if (!dtrace_priv_kernel(state
))
5772 #if !defined(__APPLE__)
5773 case DTRACEACT_USYM
:
5774 case DTRACEACT_UMOD
:
5775 case DTRACEACT_UADDR
: {
5776 struct pid
*pid
= curthread
->t_procp
->p_pidp
;
5778 if (!dtrace_priv_proc(state
))
5781 DTRACE_STORE(uint64_t, tomax
,
5782 valoffs
, (uint64_t)pid
->pid_id
);
5783 DTRACE_STORE(uint64_t, tomax
,
5784 valoffs
+ sizeof (uint64_t), val
);
5789 case DTRACEACT_USYM
:
5790 case DTRACEACT_UMOD
:
5791 case DTRACEACT_UADDR
: {
5792 if (!dtrace_priv_proc(state
))
5795 DTRACE_STORE(uint64_t, tomax
,
5796 valoffs
, (uint64_t)proc_selfpid());
5797 DTRACE_STORE(uint64_t, tomax
,
5798 valoffs
+ sizeof (uint64_t), val
);
5802 #endif /* __APPLE__ */
5804 case DTRACEACT_EXIT
: {
5806 * For the exit action, we are going to attempt
5807 * to atomically set our activity to be
5808 * draining. If this fails (either because
5809 * another CPU has beat us to the exit action,
5810 * or because our current activity is something
5811 * other than ACTIVE or WARMUP), we will
5812 * continue. This assures that the exit action
5813 * can be successfully recorded at most once
5814 * when we're in the ACTIVE state. If we're
5815 * encountering the exit() action while in
5816 * COOLDOWN, however, we want to honor the new
5817 * status code. (We know that we're the only
5818 * thread in COOLDOWN, so there is no race.)
5820 void *activity
= &state
->dts_activity
;
5821 dtrace_activity_t current
= state
->dts_activity
;
5823 if (current
== DTRACE_ACTIVITY_COOLDOWN
)
5826 if (current
!= DTRACE_ACTIVITY_WARMUP
)
5827 current
= DTRACE_ACTIVITY_ACTIVE
;
5829 if (dtrace_cas32(activity
, current
,
5830 DTRACE_ACTIVITY_DRAINING
) != current
) {
5831 *flags
|= CPU_DTRACE_DROP
;
5842 if (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
) {
5843 uintptr_t end
= valoffs
+ size
;
5846 * If this is a string, we're going to only
5847 * load until we find the zero byte -- after
5848 * which we'll store zero bytes.
5850 if (dp
->dtdo_rtype
.dtdt_kind
==
5853 int intuple
= act
->dta_intuple
;
5856 for (s
= 0; s
< size
; s
++) {
5858 c
= dtrace_load8(val
++);
5860 DTRACE_STORE(uint8_t, tomax
,
5863 if (c
== '\0' && intuple
)
5870 while (valoffs
< end
) {
5871 DTRACE_STORE(uint8_t, tomax
, valoffs
++,
5872 dtrace_load8(val
++));
5882 case sizeof (uint8_t):
5883 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
);
5885 case sizeof (uint16_t):
5886 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
);
5888 case sizeof (uint32_t):
5889 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
);
5891 case sizeof (uint64_t):
5892 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
);
5896 * Any other size should have been returned by
5897 * reference, not by value.
5904 if (*flags
& CPU_DTRACE_DROP
)
5907 if (*flags
& CPU_DTRACE_FAULT
) {
5909 dtrace_action_t
*err
;
5913 if (probe
->dtpr_id
== dtrace_probeid_error
) {
5915 * There's nothing we can do -- we had an
5916 * error on the error probe. We bump an
5917 * error counter to at least indicate that
5918 * this condition happened.
5920 dtrace_error(&state
->dts_dblerrors
);
5926 * Before recursing on dtrace_probe(), we
5927 * need to explicitly clear out our start
5928 * time to prevent it from being accumulated
5929 * into t_dtrace_vtime.
5931 #if !defined(__APPLE__)
5932 curthread
->t_dtrace_start
= 0;
5934 /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */
5935 dtrace_set_thread_tracing(current_thread(),
5936 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
5937 #endif /* __APPLE__ */
5941 * Iterate over the actions to figure out which action
5942 * we were processing when we experienced the error.
5943 * Note that act points _past_ the faulting action; if
5944 * act is ecb->dte_action, the fault was in the
5945 * predicate, if it's ecb->dte_action->dta_next it's
5946 * in action #1, and so on.
5948 for (err
= ecb
->dte_action
, ndx
= 0;
5949 err
!= act
; err
= err
->dta_next
, ndx
++)
5952 dtrace_probe_error(state
, ecb
->dte_epid
, ndx
,
5953 (mstate
.dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
5954 mstate
.dtms_fltoffs
: -1, DTRACE_FLAGS2FLT(*flags
),
5955 cpu_core
[cpuid
].cpuc_dtrace_illval
);
5961 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
5964 #if !defined(__APPLE__)
5966 curthread
->t_dtrace_start
= dtrace_gethrtime();
5969 thread_t thread
= current_thread();
5970 int64_t t
= dtrace_get_thread_tracing(thread
);
5973 /* Usual case, accumulate time spent here into t_dtrace_tracing */
5974 dtrace_set_thread_tracing(thread
, t
+ (dtrace_gethrtime() - now
));
5976 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
5977 dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);
5980 #endif /* __APPLE__ */
5982 dtrace_interrupt_enable(cookie
);
5985 #if defined(__APPLE__)
5986 /* Don't allow a thread to re-enter dtrace_probe() */
5988 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5989 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5991 thread_t thread
= current_thread();
5993 if (id
== dtrace_probeid_error
) {
5994 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
5995 dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */
5996 } else if (!dtrace_get_thread_reentering(thread
)) {
5997 dtrace_set_thread_reentering(thread
, TRUE
);
5998 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
5999 dtrace_set_thread_reentering(thread
, FALSE
);
6002 #endif /* __APPLE__ */
6005 * DTrace Probe Hashing Functions
6007 * The functions in this section (and indeed, the functions in remaining
6008 * sections) are not _called_ from probe context. (Any exceptions to this are
6009 * marked with a "Note:".) Rather, they are called from elsewhere in the
6010 * DTrace framework to look-up probes in, add probes to and remove probes from
6011 * the DTrace probe hashes. (Each probe is hashed by each element of the
6012 * probe tuple -- allowing for fast lookups, regardless of what was
6016 dtrace_hash_str(char *p
)
6022 hval
= (hval
<< 4) + *p
++;
6023 if ((g
= (hval
& 0xf0000000)) != 0)
6030 static dtrace_hash_t
*
6031 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
)
6033 dtrace_hash_t
*hash
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
);
6035 hash
->dth_stroffs
= stroffs
;
6036 hash
->dth_nextoffs
= nextoffs
;
6037 hash
->dth_prevoffs
= prevoffs
;
6040 hash
->dth_mask
= hash
->dth_size
- 1;
6042 hash
->dth_tab
= kmem_zalloc(hash
->dth_size
*
6043 sizeof (dtrace_hashbucket_t
*), KM_SLEEP
);
6048 #if !defined(__APPLE__) /* Quiet compiler warning */
6050 dtrace_hash_destroy(dtrace_hash_t
*hash
)
6055 for (i
= 0; i
< hash
->dth_size
; i
++)
6056 ASSERT(hash
->dth_tab
[i
] == NULL
);
6059 kmem_free(hash
->dth_tab
,
6060 hash
->dth_size
* sizeof (dtrace_hashbucket_t
*));
6061 kmem_free(hash
, sizeof (dtrace_hash_t
));
6063 #endif /* __APPLE__ */
6066 dtrace_hash_resize(dtrace_hash_t
*hash
)
6068 int size
= hash
->dth_size
, i
, ndx
;
6069 int new_size
= hash
->dth_size
<< 1;
6070 int new_mask
= new_size
- 1;
6071 dtrace_hashbucket_t
**new_tab
, *bucket
, *next
;
6073 ASSERT((new_size
& new_mask
) == 0);
6075 new_tab
= kmem_zalloc(new_size
* sizeof (void *), KM_SLEEP
);
6077 for (i
= 0; i
< size
; i
++) {
6078 for (bucket
= hash
->dth_tab
[i
]; bucket
!= NULL
; bucket
= next
) {
6079 dtrace_probe_t
*probe
= bucket
->dthb_chain
;
6081 ASSERT(probe
!= NULL
);
6082 ndx
= DTRACE_HASHSTR(hash
, probe
) & new_mask
;
6084 next
= bucket
->dthb_next
;
6085 bucket
->dthb_next
= new_tab
[ndx
];
6086 new_tab
[ndx
] = bucket
;
6090 kmem_free(hash
->dth_tab
, hash
->dth_size
* sizeof (void *));
6091 hash
->dth_tab
= new_tab
;
6092 hash
->dth_size
= new_size
;
6093 hash
->dth_mask
= new_mask
;
6097 dtrace_hash_add(dtrace_hash_t
*hash
, dtrace_probe_t
*new)
6099 int hashval
= DTRACE_HASHSTR(hash
, new);
6100 int ndx
= hashval
& hash
->dth_mask
;
6101 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6102 dtrace_probe_t
**nextp
, **prevp
;
6104 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6105 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new))
6109 if ((hash
->dth_nbuckets
>> 1) > hash
->dth_size
) {
6110 dtrace_hash_resize(hash
);
6111 dtrace_hash_add(hash
, new);
6115 bucket
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
);
6116 bucket
->dthb_next
= hash
->dth_tab
[ndx
];
6117 hash
->dth_tab
[ndx
] = bucket
;
6118 hash
->dth_nbuckets
++;
6121 nextp
= DTRACE_HASHNEXT(hash
, new);
6122 ASSERT(*nextp
== NULL
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
);
6123 *nextp
= bucket
->dthb_chain
;
6125 if (bucket
->dthb_chain
!= NULL
) {
6126 prevp
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
);
6127 ASSERT(*prevp
== NULL
);
6131 bucket
->dthb_chain
= new;
6135 static dtrace_probe_t
*
6136 dtrace_hash_lookup(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6138 int hashval
= DTRACE_HASHSTR(hash
, template);
6139 int ndx
= hashval
& hash
->dth_mask
;
6140 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6142 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6143 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6144 return (bucket
->dthb_chain
);
6151 dtrace_hash_collisions(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6153 int hashval
= DTRACE_HASHSTR(hash
, template);
6154 int ndx
= hashval
& hash
->dth_mask
;
6155 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6157 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6158 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6159 return (bucket
->dthb_len
);
6166 dtrace_hash_remove(dtrace_hash_t
*hash
, dtrace_probe_t
*probe
)
6168 int ndx
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
;
6169 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6171 dtrace_probe_t
**prevp
= DTRACE_HASHPREV(hash
, probe
);
6172 dtrace_probe_t
**nextp
= DTRACE_HASHNEXT(hash
, probe
);
6175 * Find the bucket that we're removing this probe from.
6177 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6178 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
))
6182 ASSERT(bucket
!= NULL
);
6184 if (*prevp
== NULL
) {
6185 if (*nextp
== NULL
) {
6187 * The removed probe was the only probe on this
6188 * bucket; we need to remove the bucket.
6190 dtrace_hashbucket_t
*b
= hash
->dth_tab
[ndx
];
6192 ASSERT(bucket
->dthb_chain
== probe
);
6196 hash
->dth_tab
[ndx
] = bucket
->dthb_next
;
6198 while (b
->dthb_next
!= bucket
)
6200 b
->dthb_next
= bucket
->dthb_next
;
6203 ASSERT(hash
->dth_nbuckets
> 0);
6204 hash
->dth_nbuckets
--;
6205 kmem_free(bucket
, sizeof (dtrace_hashbucket_t
));
6209 bucket
->dthb_chain
= *nextp
;
6211 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
;
6215 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
;
6219 * DTrace Utility Functions
6221 * These are random utility functions that are _not_ called from probe context.
6224 dtrace_badattr(const dtrace_attribute_t
*a
)
6226 return (a
->dtat_name
> DTRACE_STABILITY_MAX
||
6227 a
->dtat_data
> DTRACE_STABILITY_MAX
||
6228 a
->dtat_class
> DTRACE_CLASS_MAX
);
6232 * Return a duplicate copy of a string. If the specified string is NULL,
6233 * this function returns a zero-length string.
6236 dtrace_strdup(const char *str
)
6238 char *new = kmem_zalloc((str
!= NULL
? strlen(str
) : 0) + 1, KM_SLEEP
);
6241 (void) strcpy(new, str
);
6246 #define DTRACE_ISALPHA(c) \
6247 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6250 dtrace_badname(const char *s
)
6254 if (s
== NULL
|| (c
= *s
++) == '\0')
6257 if (!DTRACE_ISALPHA(c
) && c
!= '-' && c
!= '_' && c
!= '.')
6260 while ((c
= *s
++) != '\0') {
6261 if (!DTRACE_ISALPHA(c
) && (c
< '0' || c
> '9') &&
6262 c
!= '-' && c
!= '_' && c
!= '.' && c
!= '`')
6270 dtrace_cred2priv(cred_t
*cr
, uint32_t *privp
, uid_t
*uidp
, zoneid_t
*zoneidp
)
6274 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
6276 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6278 priv
= DTRACE_PRIV_ALL
;
6280 *uidp
= crgetuid(cr
);
6281 *zoneidp
= crgetzoneid(cr
);
6284 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
))
6285 priv
|= DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
;
6286 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
))
6287 priv
|= DTRACE_PRIV_USER
;
6288 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
))
6289 priv
|= DTRACE_PRIV_PROC
;
6290 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
6291 priv
|= DTRACE_PRIV_OWNER
;
6292 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
6293 priv
|= DTRACE_PRIV_ZONEOWNER
;
6299 #ifdef DTRACE_ERRDEBUG
6301 dtrace_errdebug(const char *str
)
6303 int hval
= dtrace_hash_str((char *)str
) % DTRACE_ERRHASHSZ
;
6306 lck_mtx_lock(&dtrace_errlock
);
6307 dtrace_errlast
= str
;
6308 #if !defined(__APPLE__)
6309 dtrace_errthread
= curthread
;
6311 dtrace_errthread
= current_thread();
6312 #endif /* __APPLE__ */
6314 while (occupied
++ < DTRACE_ERRHASHSZ
) {
6315 if (dtrace_errhash
[hval
].dter_msg
== str
) {
6316 dtrace_errhash
[hval
].dter_count
++;
6320 if (dtrace_errhash
[hval
].dter_msg
!= NULL
) {
6321 hval
= (hval
+ 1) % DTRACE_ERRHASHSZ
;
6325 dtrace_errhash
[hval
].dter_msg
= str
;
6326 dtrace_errhash
[hval
].dter_count
= 1;
6330 panic("dtrace: undersized error hash");
6332 lck_mtx_unlock(&dtrace_errlock
);
6337 * DTrace Matching Functions
6339 * These functions are used to match groups of probes, given some elements of
6340 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6343 dtrace_match_priv(const dtrace_probe_t
*prp
, uint32_t priv
, uid_t uid
,
6346 if (priv
!= DTRACE_PRIV_ALL
) {
6347 uint32_t ppriv
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
;
6348 uint32_t match
= priv
& ppriv
;
6351 * No PRIV_DTRACE_* privileges...
6353 if ((priv
& (DTRACE_PRIV_PROC
| DTRACE_PRIV_USER
|
6354 DTRACE_PRIV_KERNEL
)) == 0)
6358 * No matching bits, but there were bits to match...
6360 if (match
== 0 && ppriv
!= 0)
6364 * Need to have permissions to the process, but don't...
6366 if (((ppriv
& ~match
) & DTRACE_PRIV_OWNER
) != 0 &&
6367 uid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) {
6372 * Need to be in the same zone unless we possess the
6373 * privilege to examine all zones.
6375 if (((ppriv
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 &&
6376 zoneid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) {
6385 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6386 * consists of input pattern strings and an ops-vector to evaluate them.
6387 * This function returns >0 for match, 0 for no match, and <0 for error.
6390 dtrace_match_probe(const dtrace_probe_t
*prp
, const dtrace_probekey_t
*pkp
,
6391 uint32_t priv
, uid_t uid
, zoneid_t zoneid
)
6393 dtrace_provider_t
*pvp
= prp
->dtpr_provider
;
6396 if (pvp
->dtpv_defunct
)
6399 if ((rv
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0)
6402 if ((rv
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0)
6405 if ((rv
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0)
6408 if ((rv
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0)
6411 if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0)
6418 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6419 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6420 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6421 * In addition, all of the recursion cases except for '*' matching have been
6422 * unwound. For '*', we still implement recursive evaluation, but a depth
6423 * counter is maintained and matching is aborted if we recurse too deep.
6424 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6427 dtrace_match_glob(const char *s
, const char *p
, int depth
)
6433 if (depth
> DTRACE_PROBEKEY_MAXDEPTH
)
6437 s
= ""; /* treat NULL as empty string */
6446 if ((c
= *p
++) == '\0')
6447 return (s1
== '\0');
6451 int ok
= 0, notflag
= 0;
6462 if ((c
= *p
++) == '\0')
6466 if (c
== '-' && lc
!= '\0' && *p
!= ']') {
6467 if ((c
= *p
++) == '\0')
6469 if (c
== '\\' && (c
= *p
++) == '\0')
6473 if (s1
< lc
|| s1
> c
)
6477 } else if (lc
<= s1
&& s1
<= c
)
6480 } else if (c
== '\\' && (c
= *p
++) == '\0')
6483 lc
= c
; /* save left-hand 'c' for next iteration */
6493 if ((c
= *p
++) == '\0')
6505 if ((c
= *p
++) == '\0')
6521 p
++; /* consecutive *'s are identical to a single one */
6526 for (s
= olds
; *s
!= '\0'; s
++) {
6527 if ((gs
= dtrace_match_glob(s
, p
, depth
+ 1)) != 0)
6537 dtrace_match_string(const char *s
, const char *p
, int depth
)
6539 return (s
!= NULL
&& strcmp(s
, p
) == 0);
6544 dtrace_match_nul(const char *s
, const char *p
, int depth
)
6546 #pragma unused(s,p,depth)
6547 return (1); /* always match the empty pattern */
6552 dtrace_match_nonzero(const char *s
, const char *p
, int depth
)
6554 #pragma unused(p,depth)
6555 return (s
!= NULL
&& s
[0] != '\0');
6559 dtrace_match(const dtrace_probekey_t
*pkp
, uint32_t priv
, uid_t uid
,
6560 zoneid_t zoneid
, int (*matched
)(dtrace_probe_t
*, void *), void *arg
)
6562 dtrace_probe_t
template, *probe
;
6563 dtrace_hash_t
*hash
= NULL
;
6564 int len
, best
= INT_MAX
, nmatched
= 0;
6567 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6570 * If the probe ID is specified in the key, just lookup by ID and
6571 * invoke the match callback once if a matching probe is found.
6573 if (pkp
->dtpk_id
!= DTRACE_IDNONE
) {
6574 if ((probe
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL
&&
6575 dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) {
6576 (void) (*matched
)(probe
, arg
);
6582 template.dtpr_mod
= (char *)pkp
->dtpk_mod
;
6583 template.dtpr_func
= (char *)pkp
->dtpk_func
;
6584 template.dtpr_name
= (char *)pkp
->dtpk_name
;
6587 * We want to find the most distinct of the module name, function
6588 * name, and name. So for each one that is not a glob pattern or
6589 * empty string, we perform a lookup in the corresponding hash and
6590 * use the hash table with the fewest collisions to do our search.
6592 if (pkp
->dtpk_mmatch
== &dtrace_match_string
&&
6593 (len
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) {
6595 hash
= dtrace_bymod
;
6598 if (pkp
->dtpk_fmatch
== &dtrace_match_string
&&
6599 (len
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) {
6601 hash
= dtrace_byfunc
;
6604 if (pkp
->dtpk_nmatch
== &dtrace_match_string
&&
6605 (len
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) {
6607 hash
= dtrace_byname
;
6611 * If we did not select a hash table, iterate over every probe and
6612 * invoke our callback for each one that matches our input probe key.
6615 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6616 if ((probe
= dtrace_probes
[i
]) == NULL
||
6617 dtrace_match_probe(probe
, pkp
, priv
, uid
,
6623 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
)
6631 * If we selected a hash table, iterate over each probe of the same key
6632 * name and invoke the callback for every probe that matches the other
6633 * attributes of our input probe key.
6635 for (probe
= dtrace_hash_lookup(hash
, &template); probe
!= NULL
;
6636 probe
= *(DTRACE_HASHNEXT(hash
, probe
))) {
6638 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0)
6643 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
)
6651 * Return the function pointer dtrace_probecmp() should use to compare the
6652 * specified pattern with a string. For NULL or empty patterns, we select
6653 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
6654 * For non-empty non-glob strings, we use dtrace_match_string().
6656 static dtrace_probekey_f
*
6657 dtrace_probekey_func(const char *p
)
6661 if (p
== NULL
|| *p
== '\0')
6662 return (&dtrace_match_nul
);
6664 while ((c
= *p
++) != '\0') {
6665 if (c
== '[' || c
== '?' || c
== '*' || c
== '\\')
6666 return (&dtrace_match_glob
);
6669 return (&dtrace_match_string
);
6673 * Build a probe comparison key for use with dtrace_match_probe() from the
6674 * given probe description. By convention, a null key only matches anchored
6675 * probes: if each field is the empty string, reset dtpk_fmatch to
6676 * dtrace_match_nonzero().
6679 dtrace_probekey(const dtrace_probedesc_t
*pdp
, dtrace_probekey_t
*pkp
)
6681 pkp
->dtpk_prov
= pdp
->dtpd_provider
;
6682 pkp
->dtpk_pmatch
= dtrace_probekey_func(pdp
->dtpd_provider
);
6684 pkp
->dtpk_mod
= pdp
->dtpd_mod
;
6685 pkp
->dtpk_mmatch
= dtrace_probekey_func(pdp
->dtpd_mod
);
6687 pkp
->dtpk_func
= pdp
->dtpd_func
;
6688 pkp
->dtpk_fmatch
= dtrace_probekey_func(pdp
->dtpd_func
);
6690 pkp
->dtpk_name
= pdp
->dtpd_name
;
6691 pkp
->dtpk_nmatch
= dtrace_probekey_func(pdp
->dtpd_name
);
6693 pkp
->dtpk_id
= pdp
->dtpd_id
;
6695 if (pkp
->dtpk_id
== DTRACE_IDNONE
&&
6696 pkp
->dtpk_pmatch
== &dtrace_match_nul
&&
6697 pkp
->dtpk_mmatch
== &dtrace_match_nul
&&
6698 pkp
->dtpk_fmatch
== &dtrace_match_nul
&&
6699 pkp
->dtpk_nmatch
== &dtrace_match_nul
)
6700 pkp
->dtpk_fmatch
= &dtrace_match_nonzero
;
6704 * DTrace Provider-to-Framework API Functions
6706 * These functions implement much of the Provider-to-Framework API, as
6707 * described in <sys/dtrace.h>. The parts of the API not in this section are
6708 * the functions in the API for probe management (found below), and
6709 * dtrace_probe() itself (found above).
6713 * Register the calling provider with the DTrace framework. This should
6714 * generally be called by DTrace providers in their attach(9E) entry point.
6717 dtrace_register(const char *name
, const dtrace_pattr_t
*pap
, uint32_t priv
,
6718 cred_t
*cr
, const dtrace_pops_t
*pops
, void *arg
, dtrace_provider_id_t
*idp
)
6720 dtrace_provider_t
*provider
;
6722 if (name
== NULL
|| pap
== NULL
|| pops
== NULL
|| idp
== NULL
) {
6723 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6724 "arguments", name
? name
: "<NULL>");
6728 if (name
[0] == '\0' || dtrace_badname(name
)) {
6729 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6730 "provider name", name
);
6734 if ((pops
->dtps_provide
== NULL
&& pops
->dtps_provide_module
== NULL
) ||
6735 pops
->dtps_enable
== NULL
|| pops
->dtps_disable
== NULL
||
6736 pops
->dtps_destroy
== NULL
||
6737 ((pops
->dtps_resume
== NULL
) != (pops
->dtps_suspend
== NULL
))) {
6738 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6739 "provider ops", name
);
6743 if (dtrace_badattr(&pap
->dtpa_provider
) ||
6744 dtrace_badattr(&pap
->dtpa_mod
) ||
6745 dtrace_badattr(&pap
->dtpa_func
) ||
6746 dtrace_badattr(&pap
->dtpa_name
) ||
6747 dtrace_badattr(&pap
->dtpa_args
)) {
6748 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6749 "provider attributes", name
);
6753 if (priv
& ~DTRACE_PRIV_ALL
) {
6754 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6755 "privilege attributes", name
);
6759 if ((priv
& DTRACE_PRIV_KERNEL
) &&
6760 (priv
& (DTRACE_PRIV_USER
| DTRACE_PRIV_OWNER
)) &&
6761 pops
->dtps_usermode
== NULL
) {
6762 cmn_err(CE_WARN
, "failed to register provider '%s': need "
6763 "dtps_usermode() op for given privilege attributes", name
);
6767 provider
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
);
6768 provider
->dtpv_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
6769 (void) strcpy(provider
->dtpv_name
, name
);
6771 provider
->dtpv_attr
= *pap
;
6772 provider
->dtpv_priv
.dtpp_flags
= priv
;
6774 provider
->dtpv_priv
.dtpp_uid
= crgetuid(cr
);
6775 provider
->dtpv_priv
.dtpp_zoneid
= crgetzoneid(cr
);
6777 provider
->dtpv_pops
= *pops
;
6779 if (pops
->dtps_provide
== NULL
) {
6780 ASSERT(pops
->dtps_provide_module
!= NULL
);
6781 provider
->dtpv_pops
.dtps_provide
=
6782 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
;
6785 if (pops
->dtps_provide_module
== NULL
) {
6786 ASSERT(pops
->dtps_provide
!= NULL
);
6787 provider
->dtpv_pops
.dtps_provide_module
=
6788 (void (*)(void *, struct modctl
*))dtrace_nullop
;
6791 if (pops
->dtps_suspend
== NULL
) {
6792 ASSERT(pops
->dtps_resume
== NULL
);
6793 provider
->dtpv_pops
.dtps_suspend
=
6794 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
6795 provider
->dtpv_pops
.dtps_resume
=
6796 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
6799 provider
->dtpv_arg
= arg
;
6800 *idp
= (dtrace_provider_id_t
)provider
;
6802 if (pops
== &dtrace_provider_ops
) {
6803 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
6804 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6805 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
6808 * We make sure that the DTrace provider is at the head of
6809 * the provider chain.
6811 provider
->dtpv_next
= dtrace_provider
;
6812 dtrace_provider
= provider
;
6816 lck_mtx_lock(&dtrace_provider_lock
);
6817 lck_mtx_lock(&dtrace_lock
);
6820 * If there is at least one provider registered, we'll add this
6821 * provider after the first provider.
6823 if (dtrace_provider
!= NULL
) {
6824 provider
->dtpv_next
= dtrace_provider
->dtpv_next
;
6825 dtrace_provider
->dtpv_next
= provider
;
6827 dtrace_provider
= provider
;
6830 if (dtrace_retained
!= NULL
) {
6831 dtrace_enabling_provide(provider
);
6834 * Now we need to call dtrace_enabling_matchall() -- which
6835 * will acquire cpu_lock and dtrace_lock. We therefore need
6836 * to drop all of our locks before calling into it...
6838 lck_mtx_unlock(&dtrace_lock
);
6839 lck_mtx_unlock(&dtrace_provider_lock
);
6840 dtrace_enabling_matchall();
6845 lck_mtx_unlock(&dtrace_lock
);
6846 lck_mtx_unlock(&dtrace_provider_lock
);
6852 * Unregister the specified provider from the DTrace framework. This should
6853 * generally be called by DTrace providers in their detach(9E) entry point.
6856 dtrace_unregister(dtrace_provider_id_t id
)
6858 dtrace_provider_t
*old
= (dtrace_provider_t
*)id
;
6859 dtrace_provider_t
*prev
= NULL
;
6861 dtrace_probe_t
*probe
, *first
= NULL
;
6863 if (old
->dtpv_pops
.dtps_enable
==
6864 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
) {
6866 * If DTrace itself is the provider, we're called with locks
6869 ASSERT(old
== dtrace_provider
);
6870 ASSERT(dtrace_devi
!= NULL
);
6871 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
6872 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6876 if (dtrace_provider
->dtpv_next
!= NULL
) {
6878 * There's another provider here; return failure.
6883 lck_mtx_lock(&dtrace_provider_lock
);
6884 lck_mtx_lock(&mod_lock
);
6885 lck_mtx_lock(&dtrace_lock
);
6889 * If anyone has /dev/dtrace open, or if there are anonymous enabled
6890 * probes, we refuse to let providers slither away, unless this
6891 * provider has already been explicitly invalidated.
6893 if (!old
->dtpv_defunct
&&
6894 (dtrace_opens
|| (dtrace_anon
.dta_state
!= NULL
&&
6895 dtrace_anon
.dta_state
->dts_necbs
> 0))) {
6897 lck_mtx_unlock(&dtrace_lock
);
6898 lck_mtx_unlock(&mod_lock
);
6899 lck_mtx_unlock(&dtrace_provider_lock
);
6905 * Attempt to destroy the probes associated with this provider.
6907 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6908 if ((probe
= dtrace_probes
[i
]) == NULL
)
6911 if (probe
->dtpr_provider
!= old
)
6914 if (probe
->dtpr_ecb
== NULL
)
6918 * We have at least one ECB; we can't remove this provider.
6921 lck_mtx_unlock(&dtrace_lock
);
6922 lck_mtx_unlock(&mod_lock
);
6923 lck_mtx_unlock(&dtrace_provider_lock
);
6929 * All of the probes for this provider are disabled; we can safely
6930 * remove all of them from their hash chains and from the probe array.
6932 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6933 if ((probe
= dtrace_probes
[i
]) == NULL
)
6936 if (probe
->dtpr_provider
!= old
)
6939 dtrace_probes
[i
] = NULL
;
6941 dtrace_hash_remove(dtrace_bymod
, probe
);
6942 dtrace_hash_remove(dtrace_byfunc
, probe
);
6943 dtrace_hash_remove(dtrace_byname
, probe
);
6945 if (first
== NULL
) {
6947 probe
->dtpr_nextmod
= NULL
;
6949 probe
->dtpr_nextmod
= first
;
6955 * The provider's probes have been removed from the hash chains and
6956 * from the probe array. Now issue a dtrace_sync() to be sure that
6957 * everyone has cleared out from any probe array processing.
6961 for (probe
= first
; probe
!= NULL
; probe
= first
) {
6962 first
= probe
->dtpr_nextmod
;
6964 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
,
6966 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
6967 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
6968 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
6969 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1);
6970 #if !defined(__APPLE__)
6971 kmem_free(probe
, sizeof (dtrace_probe_t
));
6973 zfree(dtrace_probe_t_zone
, probe
);
6977 if ((prev
= dtrace_provider
) == old
) {
6978 ASSERT(self
|| dtrace_devi
== NULL
);
6979 ASSERT(old
->dtpv_next
== NULL
|| dtrace_devi
== NULL
);
6980 dtrace_provider
= old
->dtpv_next
;
6982 while (prev
!= NULL
&& prev
->dtpv_next
!= old
)
6983 prev
= prev
->dtpv_next
;
6986 panic("attempt to unregister non-existent "
6987 "dtrace provider %p\n", (void *)id
);
6990 prev
->dtpv_next
= old
->dtpv_next
;
6994 lck_mtx_unlock(&dtrace_lock
);
6995 lck_mtx_unlock(&mod_lock
);
6996 lck_mtx_unlock(&dtrace_provider_lock
);
6999 kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1);
7000 kmem_free(old
, sizeof (dtrace_provider_t
));
7006 * Invalidate the specified provider. All subsequent probe lookups for the
7007 * specified provider will fail, but its probes will not be removed.
7010 dtrace_invalidate(dtrace_provider_id_t id
)
7012 dtrace_provider_t
*pvp
= (dtrace_provider_t
*)id
;
7014 ASSERT(pvp
->dtpv_pops
.dtps_enable
!=
7015 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
);
7017 lck_mtx_lock(&dtrace_provider_lock
);
7018 lck_mtx_lock(&dtrace_lock
);
7020 pvp
->dtpv_defunct
= 1;
7022 lck_mtx_unlock(&dtrace_lock
);
7023 lck_mtx_unlock(&dtrace_provider_lock
);
7027 * Indicate whether or not DTrace has attached.
7030 dtrace_attached(void)
7033 * dtrace_provider will be non-NULL iff the DTrace driver has
7034 * attached. (It's non-NULL because DTrace is always itself a
7037 return (dtrace_provider
!= NULL
);
7041 * Remove all the unenabled probes for the given provider. This function is
7042 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7043 * -- just as many of its associated probes as it can.
7046 dtrace_condense(dtrace_provider_id_t id
)
7048 dtrace_provider_t
*prov
= (dtrace_provider_t
*)id
;
7050 dtrace_probe_t
*probe
;
7053 * Make sure this isn't the dtrace provider itself.
7055 ASSERT(prov
->dtpv_pops
.dtps_enable
!=
7056 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
);
7058 lck_mtx_lock(&dtrace_provider_lock
);
7059 lck_mtx_lock(&dtrace_lock
);
7062 * Attempt to destroy the probes associated with this provider.
7064 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7065 if ((probe
= dtrace_probes
[i
]) == NULL
)
7068 if (probe
->dtpr_provider
!= prov
)
7071 if (probe
->dtpr_ecb
!= NULL
)
7074 dtrace_probes
[i
] = NULL
;
7076 dtrace_hash_remove(dtrace_bymod
, probe
);
7077 dtrace_hash_remove(dtrace_byfunc
, probe
);
7078 dtrace_hash_remove(dtrace_byname
, probe
);
7080 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i
+ 1,
7082 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7083 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7084 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7085 #if !defined(__APPLE__)
7086 kmem_free(probe
, sizeof (dtrace_probe_t
));
7088 zfree(dtrace_probe_t_zone
, probe
);
7090 vmem_free(dtrace_arena
, (void *)((uintptr_t)i
+ 1), 1);
7093 lck_mtx_unlock(&dtrace_lock
);
7094 lck_mtx_unlock(&dtrace_provider_lock
);
7100 * DTrace Probe Management Functions
7102 * The functions in this section perform the DTrace probe management,
7103 * including functions to create probes, look-up probes, and call into the
7104 * providers to request that probes be provided. Some of these functions are
7105 * in the Provider-to-Framework API; these functions can be identified by the
7106 * fact that they are not declared "static".
7110 * Create a probe with the specified module name, function name, and name.
7113 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
,
7114 const char *func
, const char *name
, int aframes
, void *arg
)
7116 dtrace_probe_t
*probe
, **probes
;
7117 dtrace_provider_t
*provider
= (dtrace_provider_t
*)prov
;
7120 if (provider
== dtrace_provider
) {
7121 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7123 lck_mtx_lock(&dtrace_lock
);
7126 id
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1,
7127 VM_BESTFIT
| VM_SLEEP
);
7128 #if !defined(__APPLE__)
7129 probe
= kmem_zalloc(sizeof (dtrace_probe_t
), KM_SLEEP
);
7131 probe
= zalloc(dtrace_probe_t_zone
);
7132 bzero(probe
, sizeof (dtrace_probe_t
));
7135 probe
->dtpr_id
= id
;
7136 probe
->dtpr_gen
= dtrace_probegen
++;
7137 probe
->dtpr_mod
= dtrace_strdup(mod
);
7138 probe
->dtpr_func
= dtrace_strdup(func
);
7139 probe
->dtpr_name
= dtrace_strdup(name
);
7140 probe
->dtpr_arg
= arg
;
7141 probe
->dtpr_aframes
= aframes
;
7142 probe
->dtpr_provider
= provider
;
7144 dtrace_hash_add(dtrace_bymod
, probe
);
7145 dtrace_hash_add(dtrace_byfunc
, probe
);
7146 dtrace_hash_add(dtrace_byname
, probe
);
7148 if (id
- 1 >= dtrace_nprobes
) {
7149 size_t osize
= dtrace_nprobes
* sizeof (dtrace_probe_t
*);
7150 size_t nsize
= osize
<< 1;
7154 ASSERT(dtrace_probes
== NULL
);
7155 nsize
= sizeof (dtrace_probe_t
*);
7158 probes
= kmem_zalloc(nsize
, KM_SLEEP
);
7160 if (dtrace_probes
== NULL
) {
7162 dtrace_probes
= probes
;
7165 dtrace_probe_t
**oprobes
= dtrace_probes
;
7167 bcopy(oprobes
, probes
, osize
);
7168 dtrace_membar_producer();
7169 dtrace_probes
= probes
;
7174 * All CPUs are now seeing the new probes array; we can
7175 * safely free the old array.
7177 kmem_free(oprobes
, osize
);
7178 dtrace_nprobes
<<= 1;
7181 ASSERT(id
- 1 < dtrace_nprobes
);
7184 ASSERT(dtrace_probes
[id
- 1] == NULL
);
7185 dtrace_probes
[id
- 1] = probe
;
7187 if (provider
!= dtrace_provider
)
7188 lck_mtx_unlock(&dtrace_lock
);
7193 static dtrace_probe_t
*
7194 dtrace_probe_lookup_id(dtrace_id_t id
)
7196 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7198 if (id
== 0 || id
> dtrace_nprobes
)
7201 return (dtrace_probes
[id
- 1]);
7205 dtrace_probe_lookup_match(dtrace_probe_t
*probe
, void *arg
)
7207 *((dtrace_id_t
*)arg
) = probe
->dtpr_id
;
7209 return (DTRACE_MATCH_DONE
);
7213 * Look up a probe based on provider and one or more of module name, function
7214 * name and probe name.
7217 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
,
7218 const char *func
, const char *name
)
7220 dtrace_probekey_t pkey
;
7224 pkey
.dtpk_prov
= ((dtrace_provider_t
*)prid
)->dtpv_name
;
7225 pkey
.dtpk_pmatch
= &dtrace_match_string
;
7226 pkey
.dtpk_mod
= mod
;
7227 pkey
.dtpk_mmatch
= mod
? &dtrace_match_string
: &dtrace_match_nul
;
7228 pkey
.dtpk_func
= func
;
7229 pkey
.dtpk_fmatch
= func
? &dtrace_match_string
: &dtrace_match_nul
;
7230 pkey
.dtpk_name
= name
;
7231 pkey
.dtpk_nmatch
= name
? &dtrace_match_string
: &dtrace_match_nul
;
7232 pkey
.dtpk_id
= DTRACE_IDNONE
;
7234 lck_mtx_lock(&dtrace_lock
);
7235 match
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0,
7236 dtrace_probe_lookup_match
, &id
);
7237 lck_mtx_unlock(&dtrace_lock
);
7239 ASSERT(match
== 1 || match
== 0);
7240 return (match
? id
: 0);
7244 * Returns the probe argument associated with the specified probe.
7247 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
)
7249 dtrace_probe_t
*probe
;
7252 lck_mtx_lock(&dtrace_lock
);
7254 if ((probe
= dtrace_probe_lookup_id(pid
)) != NULL
&&
7255 probe
->dtpr_provider
== (dtrace_provider_t
*)id
)
7256 rval
= probe
->dtpr_arg
;
7258 lck_mtx_unlock(&dtrace_lock
);
7264 * Copy a probe into a probe description.
7267 dtrace_probe_description(const dtrace_probe_t
*prp
, dtrace_probedesc_t
*pdp
)
7269 bzero(pdp
, sizeof (dtrace_probedesc_t
));
7270 pdp
->dtpd_id
= prp
->dtpr_id
;
7272 (void) strlcpy(pdp
->dtpd_provider
,
7273 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
);
7275 (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
);
7276 (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
);
7277 (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
);
7281 * Called to indicate that a probe -- or probes -- should be provided by a
7282 * specfied provider. If the specified description is NULL, the provider will
7283 * be told to provide all of its probes. (This is done whenever a new
7284 * consumer comes along, or whenever a retained enabling is to be matched.) If
7285 * the specified description is non-NULL, the provider is given the
7286 * opportunity to dynamically provide the specified probe, allowing providers
7287 * to support the creation of probes on-the-fly. (So-called _autocreated_
7288 * probes.) If the provider is NULL, the operations will be applied to all
7289 * providers; if the provider is non-NULL the operations will only be applied
7290 * to the specified provider. The dtrace_provider_lock must be held, and the
7291 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7292 * will need to grab the dtrace_lock when it reenters the framework through
7293 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7296 dtrace_probe_provide(dtrace_probedesc_t
*desc
, dtrace_provider_t
*prv
)
7301 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
7305 prv
= dtrace_provider
;
7310 * First, call the blanket provide operation.
7312 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
);
7314 #if !defined(__APPLE__)
7316 * Now call the per-module provide operation. We will grab
7317 * mod_lock to prevent the list from being modified. Note
7318 * that this also prevents the mod_busy bits from changing.
7319 * (mod_busy can only be changed with mod_lock held.)
7321 lck_mtx_lock(&mod_lock
);
7325 if (ctl
->mod_busy
|| ctl
->mod_mp
== NULL
)
7328 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
7330 } while ((ctl
= ctl
->mod_next
) != &modules
);
7332 lck_mtx_unlock(&mod_lock
);
7334 #if 0 /* FIXME: Workaround for PR_4643546 */
7335 simple_lock(&kmod_lock
);
7337 kmod_info_t
*ktl
= kmod
;
7339 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ktl
);
7343 simple_unlock(&kmod_lock
);
7346 * Don't bother to iterate over the kmod list. At present only fbt
7347 * offers a provide_module in its dtpv_pops, and then it ignores the
7350 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, NULL
);
7352 #endif /* __APPLE__ */
7353 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
7357 * Iterate over each probe, and call the Framework-to-Provider API function
7361 dtrace_probe_foreach(uintptr_t offs
)
7363 dtrace_provider_t
*prov
;
7364 void (*func
)(void *, dtrace_id_t
, void *);
7365 dtrace_probe_t
*probe
;
7366 dtrace_icookie_t cookie
;
7370 * We disable interrupts to walk through the probe array. This is
7371 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7372 * won't see stale data.
7374 cookie
= dtrace_interrupt_disable();
7376 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7377 if ((probe
= dtrace_probes
[i
]) == NULL
)
7380 if (probe
->dtpr_ecb
== NULL
) {
7382 * This probe isn't enabled -- don't call the function.
7387 prov
= probe
->dtpr_provider
;
7388 func
= *((void(**)(void *, dtrace_id_t
, void *))
7389 ((uintptr_t)&prov
->dtpv_pops
+ offs
));
7391 func(prov
->dtpv_arg
, i
+ 1, probe
->dtpr_arg
);
7394 dtrace_interrupt_enable(cookie
);
7398 dtrace_probe_enable(const dtrace_probedesc_t
*desc
, dtrace_enabling_t
*enab
)
7400 dtrace_probekey_t pkey
;
7405 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7407 dtrace_ecb_create_cache
= NULL
;
7411 * If we're passed a NULL description, we're being asked to
7412 * create an ECB with a NULL probe.
7414 (void) dtrace_ecb_create_enable(NULL
, enab
);
7418 dtrace_probekey(desc
, &pkey
);
7419 dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
,
7420 &priv
, &uid
, &zoneid
);
7422 return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
,
7427 * DTrace Helper Provider Functions
7430 dtrace_dofattr2attr(dtrace_attribute_t
*attr
, const dof_attr_t dofattr
)
7432 attr
->dtat_name
= DOF_ATTR_NAME(dofattr
);
7433 attr
->dtat_data
= DOF_ATTR_DATA(dofattr
);
7434 attr
->dtat_class
= DOF_ATTR_CLASS(dofattr
);
7438 dtrace_dofprov2hprov(dtrace_helper_provdesc_t
*hprov
,
7439 const dof_provider_t
*dofprov
, char *strtab
)
7441 hprov
->dthpv_provname
= strtab
+ dofprov
->dofpv_name
;
7442 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
,
7443 dofprov
->dofpv_provattr
);
7444 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
,
7445 dofprov
->dofpv_modattr
);
7446 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
,
7447 dofprov
->dofpv_funcattr
);
7448 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
,
7449 dofprov
->dofpv_nameattr
);
7450 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
,
7451 dofprov
->dofpv_argsattr
);
7455 dtrace_helper_provide_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7457 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7458 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7459 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
7460 dof_provider_t
*provider
;
7462 uint32_t *off
, *enoff
;
7466 dtrace_helper_provdesc_t dhpv
;
7467 dtrace_helper_probedesc_t dhpb
;
7468 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7469 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7472 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7473 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7474 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7475 prb_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7476 provider
->dofpv_probes
* dof
->dofh_secsize
);
7477 arg_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7478 provider
->dofpv_prargs
* dof
->dofh_secsize
);
7479 off_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7480 provider
->dofpv_proffs
* dof
->dofh_secsize
);
7482 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7483 off
= (uint32_t *)(uintptr_t)(daddr
+ off_sec
->dofs_offset
);
7484 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
7488 * See dtrace_helper_provider_validate().
7490 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
7491 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
) {
7492 enoff_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7493 provider
->dofpv_prenoffs
* dof
->dofh_secsize
);
7494 enoff
= (uint32_t *)(uintptr_t)(daddr
+ enoff_sec
->dofs_offset
);
7497 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
7500 * Create the provider.
7502 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7504 if ((parg
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
)
7510 * Create the probes.
7512 for (i
= 0; i
< nprobes
; i
++) {
7513 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
7514 prb_sec
->dofs_offset
+ i
* prb_sec
->dofs_entsize
);
7516 dhpb
.dthpb_mod
= dhp
->dofhp_mod
;
7517 dhpb
.dthpb_func
= strtab
+ probe
->dofpr_func
;
7518 dhpb
.dthpb_name
= strtab
+ probe
->dofpr_name
;
7519 #if defined(__APPLE__)
7520 dhpb
.dthpb_base
= dhp
->dofhp_addr
;
7522 dhpb
.dthpb_base
= probe
->dofpr_addr
;
7524 dhpb
.dthpb_offs
= off
+ probe
->dofpr_offidx
;
7525 dhpb
.dthpb_noffs
= probe
->dofpr_noffs
;
7526 if (enoff
!= NULL
) {
7527 dhpb
.dthpb_enoffs
= enoff
+ probe
->dofpr_enoffidx
;
7528 dhpb
.dthpb_nenoffs
= probe
->dofpr_nenoffs
;
7530 dhpb
.dthpb_enoffs
= NULL
;
7531 dhpb
.dthpb_nenoffs
= 0;
7533 dhpb
.dthpb_args
= arg
+ probe
->dofpr_argidx
;
7534 dhpb
.dthpb_nargc
= probe
->dofpr_nargc
;
7535 dhpb
.dthpb_xargc
= probe
->dofpr_xargc
;
7536 dhpb
.dthpb_ntypes
= strtab
+ probe
->dofpr_nargv
;
7537 dhpb
.dthpb_xtypes
= strtab
+ probe
->dofpr_xargv
;
7539 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
);
7544 dtrace_helper_provide(dof_helper_t
*dhp
, pid_t pid
)
7546 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7547 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7550 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
7552 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7553 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7554 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7556 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7559 dtrace_helper_provide_one(dhp
, sec
, pid
);
7563 * We may have just created probes, so we must now rematch against
7564 * any retained enablings. Note that this call will acquire both
7565 * cpu_lock and dtrace_lock; the fact that we are holding
7566 * dtrace_meta_lock now is what defines the ordering with respect to
7567 * these three locks.
7569 dtrace_enabling_matchall();
7573 dtrace_helper_provider_remove_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7575 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7576 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7578 dof_provider_t
*provider
;
7580 dtrace_helper_provdesc_t dhpv
;
7581 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7582 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7584 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7585 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7586 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7588 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7591 * Create the provider.
7593 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7595 mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
);
7601 dtrace_helper_provider_remove(dof_helper_t
*dhp
, pid_t pid
)
7603 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7604 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7607 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
7609 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7610 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7611 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7613 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7616 dtrace_helper_provider_remove_one(dhp
, sec
, pid
);
7621 * DTrace Meta Provider-to-Framework API Functions
7623 * These functions implement the Meta Provider-to-Framework API, as described
7624 * in <sys/dtrace.h>.
7627 dtrace_meta_register(const char *name
, const dtrace_mops_t
*mops
, void *arg
,
7628 dtrace_meta_provider_id_t
*idp
)
7630 dtrace_meta_t
*meta
;
7631 dtrace_helpers_t
*help
, *next
;
7634 *idp
= DTRACE_METAPROVNONE
;
7637 * We strictly don't need the name, but we hold onto it for
7638 * debuggability. All hail error queues!
7641 cmn_err(CE_WARN
, "failed to register meta-provider: "
7647 mops
->dtms_create_probe
== NULL
||
7648 mops
->dtms_provide_pid
== NULL
||
7649 mops
->dtms_remove_pid
== NULL
) {
7650 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7651 "invalid ops", name
);
7655 meta
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
);
7656 meta
->dtm_mops
= *mops
;
7657 meta
->dtm_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
7658 (void) strcpy(meta
->dtm_name
, name
);
7659 meta
->dtm_arg
= arg
;
7661 lck_mtx_lock(&dtrace_meta_lock
);
7662 lck_mtx_lock(&dtrace_lock
);
7664 if (dtrace_meta_pid
!= NULL
) {
7665 lck_mtx_unlock(&dtrace_lock
);
7666 lck_mtx_unlock(&dtrace_meta_lock
);
7667 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7668 "user-land meta-provider exists", name
);
7669 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1);
7670 kmem_free(meta
, sizeof (dtrace_meta_t
));
7674 dtrace_meta_pid
= meta
;
7675 *idp
= (dtrace_meta_provider_id_t
)meta
;
7678 * If there are providers and probes ready to go, pass them
7679 * off to the new meta provider now.
7682 help
= dtrace_deferred_pid
;
7683 dtrace_deferred_pid
= NULL
;
7685 lck_mtx_unlock(&dtrace_lock
);
7687 while (help
!= NULL
) {
7688 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
7689 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
7693 next
= help
->dthps_next
;
7694 help
->dthps_next
= NULL
;
7695 help
->dthps_prev
= NULL
;
7696 help
->dthps_deferred
= 0;
7700 lck_mtx_unlock(&dtrace_meta_lock
);
7706 dtrace_meta_unregister(dtrace_meta_provider_id_t id
)
7708 dtrace_meta_t
**pp
, *old
= (dtrace_meta_t
*)id
;
7710 lck_mtx_lock(&dtrace_meta_lock
);
7711 lck_mtx_lock(&dtrace_lock
);
7713 if (old
== dtrace_meta_pid
) {
7714 pp
= &dtrace_meta_pid
;
7716 panic("attempt to unregister non-existent "
7717 "dtrace meta-provider %p\n", (void *)old
);
7720 if (old
->dtm_count
!= 0) {
7721 lck_mtx_unlock(&dtrace_lock
);
7722 lck_mtx_unlock(&dtrace_meta_lock
);
7728 lck_mtx_unlock(&dtrace_lock
);
7729 lck_mtx_unlock(&dtrace_meta_lock
);
7731 kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1);
7732 kmem_free(old
, sizeof (dtrace_meta_t
));
7739 * DTrace DIF Object Functions
7742 dtrace_difo_err(uint_t pc
, const char *format
, ...)
7744 if (dtrace_err_verbose
) {
7747 (void) uprintf("dtrace DIF object error: [%u]: ", pc
);
7748 va_start(alist
, format
);
7749 (void) vuprintf(format
, alist
);
7753 #ifdef DTRACE_ERRDEBUG
7754 dtrace_errdebug(format
);
7760 * Validate a DTrace DIF object by checking the IR instructions. The following
7761 * rules are currently enforced by dtrace_difo_validate():
7763 * 1. Each instruction must have a valid opcode
7764 * 2. Each register, string, variable, or subroutine reference must be valid
7765 * 3. No instruction can modify register %r0 (must be zero)
7766 * 4. All instruction reserved bits must be set to zero
7767 * 5. The last instruction must be a "ret" instruction
7768 * 6. All branch targets must reference a valid instruction _after_ the branch
7771 dtrace_difo_validate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
, uint_t nregs
,
7775 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
7779 kcheck
= cr
== NULL
||
7780 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
) == 0;
7782 dp
->dtdo_destructive
= 0;
7784 for (pc
= 0; pc
< dp
->dtdo_len
&& err
== 0; pc
++) {
7785 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
7787 uint_t r1
= DIF_INSTR_R1(instr
);
7788 uint_t r2
= DIF_INSTR_R2(instr
);
7789 uint_t rd
= DIF_INSTR_RD(instr
);
7790 uint_t rs
= DIF_INSTR_RS(instr
);
7791 uint_t label
= DIF_INSTR_LABEL(instr
);
7792 uint_t v
= DIF_INSTR_VAR(instr
);
7793 uint_t subr
= DIF_INSTR_SUBR(instr
);
7794 uint_t type
= DIF_INSTR_TYPE(instr
);
7795 uint_t op
= DIF_INSTR_OP(instr
);
7813 err
+= efunc(pc
, "invalid register %u\n", r1
);
7815 err
+= efunc(pc
, "invalid register %u\n", r2
);
7817 err
+= efunc(pc
, "invalid register %u\n", rd
);
7819 err
+= efunc(pc
, "cannot write to %r0\n");
7825 err
+= efunc(pc
, "invalid register %u\n", r1
);
7827 err
+= efunc(pc
, "non-zero reserved bits\n");
7829 err
+= efunc(pc
, "invalid register %u\n", rd
);
7831 err
+= efunc(pc
, "cannot write to %r0\n");
7841 err
+= efunc(pc
, "invalid register %u\n", r1
);
7843 err
+= efunc(pc
, "non-zero reserved bits\n");
7845 err
+= efunc(pc
, "invalid register %u\n", rd
);
7847 err
+= efunc(pc
, "cannot write to %r0\n");
7849 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op
+
7850 DIF_OP_RLDSB
- DIF_OP_LDSB
, r1
, rd
);
7860 err
+= efunc(pc
, "invalid register %u\n", r1
);
7862 err
+= efunc(pc
, "non-zero reserved bits\n");
7864 err
+= efunc(pc
, "invalid register %u\n", rd
);
7866 err
+= efunc(pc
, "cannot write to %r0\n");
7876 err
+= efunc(pc
, "invalid register %u\n", r1
);
7878 err
+= efunc(pc
, "non-zero reserved bits\n");
7880 err
+= efunc(pc
, "invalid register %u\n", rd
);
7882 err
+= efunc(pc
, "cannot write to %r0\n");
7889 err
+= efunc(pc
, "invalid register %u\n", r1
);
7891 err
+= efunc(pc
, "non-zero reserved bits\n");
7893 err
+= efunc(pc
, "invalid register %u\n", rd
);
7895 err
+= efunc(pc
, "cannot write to 0 address\n");
7900 err
+= efunc(pc
, "invalid register %u\n", r1
);
7902 err
+= efunc(pc
, "invalid register %u\n", r2
);
7904 err
+= efunc(pc
, "non-zero reserved bits\n");
7908 err
+= efunc(pc
, "invalid register %u\n", r1
);
7909 if (r2
!= 0 || rd
!= 0)
7910 err
+= efunc(pc
, "non-zero reserved bits\n");
7923 if (label
>= dp
->dtdo_len
) {
7924 err
+= efunc(pc
, "invalid branch target %u\n",
7928 err
+= efunc(pc
, "backward branch to %u\n",
7933 if (r1
!= 0 || r2
!= 0)
7934 err
+= efunc(pc
, "non-zero reserved bits\n");
7936 err
+= efunc(pc
, "invalid register %u\n", rd
);
7940 case DIF_OP_FLUSHTS
:
7941 if (r1
!= 0 || r2
!= 0 || rd
!= 0)
7942 err
+= efunc(pc
, "non-zero reserved bits\n");
7945 if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) {
7946 err
+= efunc(pc
, "invalid integer ref %u\n",
7947 DIF_INSTR_INTEGER(instr
));
7950 err
+= efunc(pc
, "invalid register %u\n", rd
);
7952 err
+= efunc(pc
, "cannot write to %r0\n");
7955 if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) {
7956 err
+= efunc(pc
, "invalid string ref %u\n",
7957 DIF_INSTR_STRING(instr
));
7960 err
+= efunc(pc
, "invalid register %u\n", rd
);
7962 err
+= efunc(pc
, "cannot write to %r0\n");
7966 if (r1
> DIF_VAR_ARRAY_MAX
)
7967 err
+= efunc(pc
, "invalid array %u\n", r1
);
7969 err
+= efunc(pc
, "invalid register %u\n", r2
);
7971 err
+= efunc(pc
, "invalid register %u\n", rd
);
7973 err
+= efunc(pc
, "cannot write to %r0\n");
7980 if (v
< DIF_VAR_OTHER_MIN
|| v
> DIF_VAR_OTHER_MAX
)
7981 err
+= efunc(pc
, "invalid variable %u\n", v
);
7983 err
+= efunc(pc
, "invalid register %u\n", rd
);
7985 err
+= efunc(pc
, "cannot write to %r0\n");
7992 if (v
< DIF_VAR_OTHER_UBASE
|| v
> DIF_VAR_OTHER_MAX
)
7993 err
+= efunc(pc
, "invalid variable %u\n", v
);
7995 err
+= efunc(pc
, "invalid register %u\n", rd
);
7998 if (subr
> DIF_SUBR_MAX
)
7999 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8001 err
+= efunc(pc
, "invalid register %u\n", rd
);
8003 err
+= efunc(pc
, "cannot write to %r0\n");
8005 if (subr
== DIF_SUBR_COPYOUT
||
8006 subr
== DIF_SUBR_COPYOUTSTR
) {
8007 dp
->dtdo_destructive
= 1;
8011 if (type
!= DIF_TYPE_STRING
&& type
!= DIF_TYPE_CTF
)
8012 err
+= efunc(pc
, "invalid ref type %u\n", type
);
8014 err
+= efunc(pc
, "invalid register %u\n", r2
);
8016 err
+= efunc(pc
, "invalid register %u\n", rs
);
8019 if (type
!= DIF_TYPE_CTF
)
8020 err
+= efunc(pc
, "invalid val type %u\n", type
);
8022 err
+= efunc(pc
, "invalid register %u\n", r2
);
8024 err
+= efunc(pc
, "invalid register %u\n", rs
);
8027 err
+= efunc(pc
, "invalid opcode %u\n",
8028 DIF_INSTR_OP(instr
));
8032 if (dp
->dtdo_len
!= 0 &&
8033 DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len
- 1]) != DIF_OP_RET
) {
8034 err
+= efunc(dp
->dtdo_len
- 1,
8035 "expected 'ret' as last DIF instruction\n");
8038 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
)) {
8040 * If we're not returning by reference, the size must be either
8041 * 0 or the size of one of the base types.
8043 switch (dp
->dtdo_rtype
.dtdt_size
) {
8045 case sizeof (uint8_t):
8046 case sizeof (uint16_t):
8047 case sizeof (uint32_t):
8048 case sizeof (uint64_t):
8052 err
+= efunc(dp
->dtdo_len
- 1, "bad return size");
8056 for (i
= 0; i
< dp
->dtdo_varlen
&& err
== 0; i
++) {
8057 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
], *existing
= NULL
;
8058 dtrace_diftype_t
*vt
, *et
;
8061 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
&&
8062 v
->dtdv_scope
!= DIFV_SCOPE_THREAD
&&
8063 v
->dtdv_scope
!= DIFV_SCOPE_LOCAL
) {
8064 err
+= efunc(i
, "unrecognized variable scope %d\n",
8069 if (v
->dtdv_kind
!= DIFV_KIND_ARRAY
&&
8070 v
->dtdv_kind
!= DIFV_KIND_SCALAR
) {
8071 err
+= efunc(i
, "unrecognized variable type %d\n",
8076 if ((id
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) {
8077 err
+= efunc(i
, "%d exceeds variable id limit\n", id
);
8081 if (id
< DIF_VAR_OTHER_UBASE
)
8085 * For user-defined variables, we need to check that this
8086 * definition is identical to any previous definition that we
8089 ndx
= id
- DIF_VAR_OTHER_UBASE
;
8091 switch (v
->dtdv_scope
) {
8092 case DIFV_SCOPE_GLOBAL
:
8093 if (ndx
< vstate
->dtvs_nglobals
) {
8094 dtrace_statvar_t
*svar
;
8096 if ((svar
= vstate
->dtvs_globals
[ndx
]) != NULL
)
8097 existing
= &svar
->dtsv_var
;
8102 case DIFV_SCOPE_THREAD
:
8103 if (ndx
< vstate
->dtvs_ntlocals
)
8104 existing
= &vstate
->dtvs_tlocals
[ndx
];
8107 case DIFV_SCOPE_LOCAL
:
8108 if (ndx
< vstate
->dtvs_nlocals
) {
8109 dtrace_statvar_t
*svar
;
8111 if ((svar
= vstate
->dtvs_locals
[ndx
]) != NULL
)
8112 existing
= &svar
->dtsv_var
;
8120 if (vt
->dtdt_flags
& DIF_TF_BYREF
) {
8121 if (vt
->dtdt_size
== 0) {
8122 err
+= efunc(i
, "zero-sized variable\n");
8126 if (v
->dtdv_scope
== DIFV_SCOPE_GLOBAL
&&
8127 vt
->dtdt_size
> dtrace_global_maxsize
) {
8128 err
+= efunc(i
, "oversized by-ref global\n");
8133 if (existing
== NULL
|| existing
->dtdv_id
== 0)
8136 ASSERT(existing
->dtdv_id
== v
->dtdv_id
);
8137 ASSERT(existing
->dtdv_scope
== v
->dtdv_scope
);
8139 if (existing
->dtdv_kind
!= v
->dtdv_kind
)
8140 err
+= efunc(i
, "%d changed variable kind\n", id
);
8142 et
= &existing
->dtdv_type
;
8144 if (vt
->dtdt_flags
!= et
->dtdt_flags
) {
8145 err
+= efunc(i
, "%d changed variable type flags\n", id
);
8149 if (vt
->dtdt_size
!= 0 && vt
->dtdt_size
!= et
->dtdt_size
) {
8150 err
+= efunc(i
, "%d changed variable type size\n", id
);
8159 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8160 * are much more constrained than normal DIFOs. Specifically, they may
8163 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8164 * miscellaneous string routines
8165 * 2. Access DTrace variables other than the args[] array, and the
8166 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8167 * 3. Have thread-local variables.
8168 * 4. Have dynamic variables.
8171 dtrace_difo_validate_helper(dtrace_difo_t
*dp
)
8173 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8177 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8178 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8180 uint_t v
= DIF_INSTR_VAR(instr
);
8181 uint_t subr
= DIF_INSTR_SUBR(instr
);
8182 uint_t op
= DIF_INSTR_OP(instr
);
8237 case DIF_OP_FLUSHTS
:
8249 if (v
>= DIF_VAR_OTHER_UBASE
)
8252 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
)
8255 if (v
== DIF_VAR_CURTHREAD
|| v
== DIF_VAR_PID
||
8256 v
== DIF_VAR_PPID
|| v
== DIF_VAR_TID
||
8257 v
== DIF_VAR_EXECNAME
|| v
== DIF_VAR_ZONENAME
||
8258 v
== DIF_VAR_UID
|| v
== DIF_VAR_GID
)
8261 err
+= efunc(pc
, "illegal variable %u\n", v
);
8268 err
+= efunc(pc
, "illegal dynamic variable load\n");
8274 err
+= efunc(pc
, "illegal dynamic variable store\n");
8278 if (subr
== DIF_SUBR_ALLOCA
||
8279 subr
== DIF_SUBR_BCOPY
||
8280 subr
== DIF_SUBR_COPYIN
||
8281 subr
== DIF_SUBR_COPYINTO
||
8282 subr
== DIF_SUBR_COPYINSTR
||
8283 subr
== DIF_SUBR_INDEX
||
8284 subr
== DIF_SUBR_LLTOSTR
||
8285 subr
== DIF_SUBR_RINDEX
||
8286 subr
== DIF_SUBR_STRCHR
||
8287 subr
== DIF_SUBR_STRJOIN
||
8288 subr
== DIF_SUBR_STRRCHR
||
8289 subr
== DIF_SUBR_STRSTR
||
8290 subr
== DIF_SUBR_CHUD
)
8293 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8297 err
+= efunc(pc
, "invalid opcode %u\n",
8298 DIF_INSTR_OP(instr
));
8306 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8310 dtrace_difo_cacheable(dtrace_difo_t
*dp
)
8317 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8318 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8320 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
)
8323 switch (v
->dtdv_id
) {
8324 case DIF_VAR_CURTHREAD
:
8327 case DIF_VAR_EXECNAME
:
8328 case DIF_VAR_ZONENAME
:
8337 * This DIF object may be cacheable. Now we need to look for any
8338 * array loading instructions, any memory loading instructions, or
8339 * any stores to thread-local variables.
8341 for (i
= 0; i
< dp
->dtdo_len
; i
++) {
8342 uint_t op
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]);
8344 if ((op
>= DIF_OP_LDSB
&& op
<= DIF_OP_LDX
) ||
8345 (op
>= DIF_OP_ULDSB
&& op
<= DIF_OP_ULDX
) ||
8346 (op
>= DIF_OP_RLDSB
&& op
<= DIF_OP_RLDX
) ||
8347 op
== DIF_OP_LDGA
|| op
== DIF_OP_STTS
)
8355 dtrace_difo_hold(dtrace_difo_t
*dp
)
8359 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8362 ASSERT(dp
->dtdo_refcnt
!= 0);
8365 * We need to check this DIF object for references to the variable
8366 * DIF_VAR_VTIMESTAMP.
8368 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8369 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8371 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8374 if (dtrace_vtime_references
++ == 0)
8375 dtrace_vtime_enable();
8380 * This routine calculates the dynamic variable chunksize for a given DIF
8381 * object. The calculation is not fool-proof, and can probably be tricked by
8382 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8383 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8384 * if a dynamic variable size exceeds the chunksize.
8387 dtrace_difo_chunksize(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8390 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
8391 const dif_instr_t
*text
= dp
->dtdo_buf
;
8397 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8398 dif_instr_t instr
= text
[pc
];
8399 uint_t op
= DIF_INSTR_OP(instr
);
8400 uint_t rd
= DIF_INSTR_RD(instr
);
8401 uint_t r1
= DIF_INSTR_R1(instr
);
8405 dtrace_key_t
*key
= tupregs
;
8409 sval
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)];
8414 key
= &tupregs
[DIF_DTR_NREGS
];
8415 key
[0].dttk_size
= 0;
8416 key
[1].dttk_size
= 0;
8418 scope
= DIFV_SCOPE_THREAD
;
8425 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
)
8426 key
[nkeys
++].dttk_size
= 0;
8428 key
[nkeys
++].dttk_size
= 0;
8430 if (op
== DIF_OP_STTAA
) {
8431 scope
= DIFV_SCOPE_THREAD
;
8433 scope
= DIFV_SCOPE_GLOBAL
;
8439 if (ttop
== DIF_DTR_NREGS
)
8442 if ((srd
== 0 || sval
== 0) && r1
== DIF_TYPE_STRING
) {
8444 * If the register for the size of the "pushtr"
8445 * is %r0 (or the value is 0) and the type is
8446 * a string, we'll use the system-wide default
8449 tupregs
[ttop
++].dttk_size
=
8450 dtrace_strsize_default
;
8455 tupregs
[ttop
++].dttk_size
= sval
;
8461 if (ttop
== DIF_DTR_NREGS
)
8464 tupregs
[ttop
++].dttk_size
= 0;
8467 case DIF_OP_FLUSHTS
:
8484 * We have a dynamic variable allocation; calculate its size.
8486 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
8487 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
8489 size
= sizeof (dtrace_dynvar_t
);
8490 size
+= sizeof (dtrace_key_t
) * (nkeys
- 1);
8494 * Now we need to determine the size of the stored data.
8496 id
= DIF_INSTR_VAR(instr
);
8498 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8499 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8501 if (v
->dtdv_id
== id
&& v
->dtdv_scope
== scope
) {
8502 size
+= v
->dtdv_type
.dtdt_size
;
8507 if (i
== dp
->dtdo_varlen
)
8511 * We have the size. If this is larger than the chunk size
8512 * for our dynamic variable state, reset the chunk size.
8514 size
= P2ROUNDUP(size
, sizeof (uint64_t));
8516 if (size
> vstate
->dtvs_dynvars
.dtds_chunksize
)
8517 vstate
->dtvs_dynvars
.dtds_chunksize
= size
;
8522 dtrace_difo_init(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8524 int i
, oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
8527 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8528 ASSERT(dp
->dtdo_buf
!= NULL
&& dp
->dtdo_len
!= 0);
8530 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8531 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8532 dtrace_statvar_t
*svar
, ***svarp
;
8534 uint8_t scope
= v
->dtdv_scope
;
8537 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8540 id
-= DIF_VAR_OTHER_UBASE
;
8543 case DIFV_SCOPE_THREAD
:
8544 while (id
>= (otlocals
= vstate
->dtvs_ntlocals
)) {
8545 dtrace_difv_t
*tlocals
;
8547 if ((ntlocals
= (otlocals
<< 1)) == 0)
8550 osz
= otlocals
* sizeof (dtrace_difv_t
);
8551 nsz
= ntlocals
* sizeof (dtrace_difv_t
);
8553 tlocals
= kmem_zalloc(nsz
, KM_SLEEP
);
8556 bcopy(vstate
->dtvs_tlocals
,
8558 kmem_free(vstate
->dtvs_tlocals
, osz
);
8561 vstate
->dtvs_tlocals
= tlocals
;
8562 vstate
->dtvs_ntlocals
= ntlocals
;
8565 vstate
->dtvs_tlocals
[id
] = *v
;
8568 case DIFV_SCOPE_LOCAL
:
8569 np
= &vstate
->dtvs_nlocals
;
8570 svarp
= &vstate
->dtvs_locals
;
8572 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8573 dsize
= (int)NCPU
* (v
->dtdv_type
.dtdt_size
+
8576 dsize
= (int)NCPU
* sizeof (uint64_t);
8580 case DIFV_SCOPE_GLOBAL
:
8581 np
= &vstate
->dtvs_nglobals
;
8582 svarp
= &vstate
->dtvs_globals
;
8584 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8585 dsize
= v
->dtdv_type
.dtdt_size
+
8594 while (id
>= (oldsvars
= *np
)) {
8595 dtrace_statvar_t
**statics
;
8596 int newsvars
, oldsize
, newsize
;
8598 if ((newsvars
= (oldsvars
<< 1)) == 0)
8601 oldsize
= oldsvars
* sizeof (dtrace_statvar_t
*);
8602 newsize
= newsvars
* sizeof (dtrace_statvar_t
*);
8604 statics
= kmem_zalloc(newsize
, KM_SLEEP
);
8607 bcopy(*svarp
, statics
, oldsize
);
8608 kmem_free(*svarp
, oldsize
);
8615 if ((svar
= (*svarp
)[id
]) == NULL
) {
8616 svar
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
);
8617 svar
->dtsv_var
= *v
;
8619 if ((svar
->dtsv_size
= dsize
) != 0) {
8620 svar
->dtsv_data
= (uint64_t)(uintptr_t)
8621 kmem_zalloc(dsize
, KM_SLEEP
);
8624 (*svarp
)[id
] = svar
;
8627 svar
->dtsv_refcnt
++;
8630 dtrace_difo_chunksize(dp
, vstate
);
8631 dtrace_difo_hold(dp
);
8634 static dtrace_difo_t
*
8635 dtrace_difo_duplicate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8640 ASSERT(dp
->dtdo_buf
!= NULL
);
8641 ASSERT(dp
->dtdo_refcnt
!= 0);
8643 new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
8645 ASSERT(dp
->dtdo_buf
!= NULL
);
8646 sz
= dp
->dtdo_len
* sizeof (dif_instr_t
);
8647 new->dtdo_buf
= kmem_alloc(sz
, KM_SLEEP
);
8648 bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
);
8649 new->dtdo_len
= dp
->dtdo_len
;
8651 if (dp
->dtdo_strtab
!= NULL
) {
8652 ASSERT(dp
->dtdo_strlen
!= 0);
8653 new->dtdo_strtab
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
);
8654 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
);
8655 new->dtdo_strlen
= dp
->dtdo_strlen
;
8658 if (dp
->dtdo_inttab
!= NULL
) {
8659 ASSERT(dp
->dtdo_intlen
!= 0);
8660 sz
= dp
->dtdo_intlen
* sizeof (uint64_t);
8661 new->dtdo_inttab
= kmem_alloc(sz
, KM_SLEEP
);
8662 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
);
8663 new->dtdo_intlen
= dp
->dtdo_intlen
;
8666 if (dp
->dtdo_vartab
!= NULL
) {
8667 ASSERT(dp
->dtdo_varlen
!= 0);
8668 sz
= dp
->dtdo_varlen
* sizeof (dtrace_difv_t
);
8669 new->dtdo_vartab
= kmem_alloc(sz
, KM_SLEEP
);
8670 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
);
8671 new->dtdo_varlen
= dp
->dtdo_varlen
;
8674 dtrace_difo_init(new, vstate
);
8679 dtrace_difo_destroy(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8683 ASSERT(dp
->dtdo_refcnt
== 0);
8685 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8686 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8687 dtrace_statvar_t
*svar
, **svarp
;
8689 uint8_t scope
= v
->dtdv_scope
;
8693 case DIFV_SCOPE_THREAD
:
8696 case DIFV_SCOPE_LOCAL
:
8697 np
= &vstate
->dtvs_nlocals
;
8698 svarp
= vstate
->dtvs_locals
;
8701 case DIFV_SCOPE_GLOBAL
:
8702 np
= &vstate
->dtvs_nglobals
;
8703 svarp
= vstate
->dtvs_globals
;
8710 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8713 id
-= DIF_VAR_OTHER_UBASE
;
8717 ASSERT(svar
!= NULL
);
8718 ASSERT(svar
->dtsv_refcnt
> 0);
8720 if (--svar
->dtsv_refcnt
> 0)
8723 if (svar
->dtsv_size
!= 0) {
8724 ASSERT(svar
->dtsv_data
!= NULL
);
8725 kmem_free((void *)(uintptr_t)svar
->dtsv_data
,
8729 kmem_free(svar
, sizeof (dtrace_statvar_t
));
8733 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
8734 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
8735 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
8736 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
8738 kmem_free(dp
, sizeof (dtrace_difo_t
));
8742 dtrace_difo_release(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8746 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8747 ASSERT(dp
->dtdo_refcnt
!= 0);
8749 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8750 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8752 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8755 ASSERT(dtrace_vtime_references
> 0);
8756 if (--dtrace_vtime_references
== 0)
8757 dtrace_vtime_disable();
8760 if (--dp
->dtdo_refcnt
== 0)
8761 dtrace_difo_destroy(dp
, vstate
);
8765 * DTrace Format Functions
8768 dtrace_format_add(dtrace_state_t
*state
, char *str
)
8771 uint16_t ndx
, len
= strlen(str
) + 1;
8773 fmt
= kmem_zalloc(len
, KM_SLEEP
);
8774 bcopy(str
, fmt
, len
);
8776 for (ndx
= 0; ndx
< state
->dts_nformats
; ndx
++) {
8777 if (state
->dts_formats
[ndx
] == NULL
) {
8778 state
->dts_formats
[ndx
] = fmt
;
8783 if (state
->dts_nformats
== USHRT_MAX
) {
8785 * This is only likely if a denial-of-service attack is being
8786 * attempted. As such, it's okay to fail silently here.
8788 kmem_free(fmt
, len
);
8793 * For simplicity, we always resize the formats array to be exactly the
8794 * number of formats.
8796 ndx
= state
->dts_nformats
++;
8797 new = kmem_alloc((ndx
+ 1) * sizeof (char *), KM_SLEEP
);
8799 if (state
->dts_formats
!= NULL
) {
8801 bcopy(state
->dts_formats
, new, ndx
* sizeof (char *));
8802 kmem_free(state
->dts_formats
, ndx
* sizeof (char *));
8805 state
->dts_formats
= new;
8806 state
->dts_formats
[ndx
] = fmt
;
8812 dtrace_format_remove(dtrace_state_t
*state
, uint16_t format
)
8816 ASSERT(state
->dts_formats
!= NULL
);
8817 ASSERT(format
<= state
->dts_nformats
);
8818 ASSERT(state
->dts_formats
[format
- 1] != NULL
);
8820 fmt
= state
->dts_formats
[format
- 1];
8821 kmem_free(fmt
, strlen(fmt
) + 1);
8822 state
->dts_formats
[format
- 1] = NULL
;
8826 dtrace_format_destroy(dtrace_state_t
*state
)
8830 if (state
->dts_nformats
== 0) {
8831 ASSERT(state
->dts_formats
== NULL
);
8835 ASSERT(state
->dts_formats
!= NULL
);
8837 for (i
= 0; i
< state
->dts_nformats
; i
++) {
8838 char *fmt
= state
->dts_formats
[i
];
8843 kmem_free(fmt
, strlen(fmt
) + 1);
8846 kmem_free(state
->dts_formats
, state
->dts_nformats
* sizeof (char *));
8847 state
->dts_nformats
= 0;
8848 state
->dts_formats
= NULL
;
8852 * DTrace Predicate Functions
8854 static dtrace_predicate_t
*
8855 dtrace_predicate_create(dtrace_difo_t
*dp
)
8857 dtrace_predicate_t
*pred
;
8859 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8860 ASSERT(dp
->dtdo_refcnt
!= 0);
8862 pred
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
);
8863 pred
->dtp_difo
= dp
;
8864 pred
->dtp_refcnt
= 1;
8866 if (!dtrace_difo_cacheable(dp
))
8869 if (dtrace_predcache_id
== DTRACE_CACHEIDNONE
) {
8871 * This is only theoretically possible -- we have had 2^32
8872 * cacheable predicates on this machine. We cannot allow any
8873 * more predicates to become cacheable: as unlikely as it is,
8874 * there may be a thread caching a (now stale) predicate cache
8875 * ID. (N.B.: the temptation is being successfully resisted to
8876 * have this cmn_err() "Holy shit -- we executed this code!")
8881 pred
->dtp_cacheid
= dtrace_predcache_id
++;
8887 dtrace_predicate_hold(dtrace_predicate_t
*pred
)
8889 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8890 ASSERT(pred
->dtp_difo
!= NULL
&& pred
->dtp_difo
->dtdo_refcnt
!= 0);
8891 ASSERT(pred
->dtp_refcnt
> 0);
8897 dtrace_predicate_release(dtrace_predicate_t
*pred
, dtrace_vstate_t
*vstate
)
8899 dtrace_difo_t
*dp
= pred
->dtp_difo
;
8901 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8902 ASSERT(dp
!= NULL
&& dp
->dtdo_refcnt
!= 0);
8903 ASSERT(pred
->dtp_refcnt
> 0);
8905 if (--pred
->dtp_refcnt
== 0) {
8906 dtrace_difo_release(pred
->dtp_difo
, vstate
);
8907 kmem_free(pred
, sizeof (dtrace_predicate_t
));
8912 * DTrace Action Description Functions
8914 static dtrace_actdesc_t
*
8915 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
,
8916 uint64_t uarg
, uint64_t arg
)
8918 dtrace_actdesc_t
*act
;
8920 /* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
8921 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/
8923 act
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
);
8924 act
->dtad_kind
= kind
;
8925 act
->dtad_ntuple
= ntuple
;
8926 act
->dtad_uarg
= uarg
;
8927 act
->dtad_arg
= arg
;
8928 act
->dtad_refcnt
= 1;
8934 dtrace_actdesc_hold(dtrace_actdesc_t
*act
)
8936 ASSERT(act
->dtad_refcnt
>= 1);
8941 dtrace_actdesc_release(dtrace_actdesc_t
*act
, dtrace_vstate_t
*vstate
)
8943 dtrace_actkind_t kind
= act
->dtad_kind
;
8946 ASSERT(act
->dtad_refcnt
>= 1);
8948 if (--act
->dtad_refcnt
!= 0)
8951 if ((dp
= act
->dtad_difo
) != NULL
)
8952 dtrace_difo_release(dp
, vstate
);
8954 if (DTRACEACT_ISPRINTFLIKE(kind
)) {
8955 char *str
= (char *)(uintptr_t)act
->dtad_arg
;
8957 /* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
8958 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/
8961 kmem_free(str
, strlen(str
) + 1);
8964 kmem_free(act
, sizeof (dtrace_actdesc_t
));
8968 * DTrace ECB Functions
8970 static dtrace_ecb_t
*
8971 dtrace_ecb_add(dtrace_state_t
*state
, dtrace_probe_t
*probe
)
8976 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8978 ecb
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
);
8979 ecb
->dte_predicate
= NULL
;
8980 ecb
->dte_probe
= probe
;
8983 * The default size is the size of the default action: recording
8986 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
8987 ecb
->dte_alignment
= sizeof (dtrace_epid_t
);
8989 epid
= state
->dts_epid
++;
8991 if (epid
- 1 >= state
->dts_necbs
) {
8992 dtrace_ecb_t
**oecbs
= state
->dts_ecbs
, **ecbs
;
8993 int necbs
= state
->dts_necbs
<< 1;
8995 ASSERT(epid
== state
->dts_necbs
+ 1);
8998 ASSERT(oecbs
== NULL
);
9002 ecbs
= kmem_zalloc(necbs
* sizeof (*ecbs
), KM_SLEEP
);
9005 bcopy(oecbs
, ecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9007 dtrace_membar_producer();
9008 state
->dts_ecbs
= ecbs
;
9010 if (oecbs
!= NULL
) {
9012 * If this state is active, we must dtrace_sync()
9013 * before we can free the old dts_ecbs array: we're
9014 * coming in hot, and there may be active ring
9015 * buffer processing (which indexes into the dts_ecbs
9016 * array) on another CPU.
9018 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
9021 kmem_free(oecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9024 dtrace_membar_producer();
9025 state
->dts_necbs
= necbs
;
9028 ecb
->dte_state
= state
;
9030 ASSERT(state
->dts_ecbs
[epid
- 1] == NULL
);
9031 dtrace_membar_producer();
9032 state
->dts_ecbs
[(ecb
->dte_epid
= epid
) - 1] = ecb
;
9038 dtrace_ecb_enable(dtrace_ecb_t
*ecb
)
9040 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9042 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9043 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9044 ASSERT(ecb
->dte_next
== NULL
);
9046 if (probe
== NULL
) {
9048 * This is the NULL probe -- there's nothing to do.
9053 if (probe
->dtpr_ecb
== NULL
) {
9054 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9057 * We're the first ECB on this probe.
9059 probe
->dtpr_ecb
= probe
->dtpr_ecb_last
= ecb
;
9061 if (ecb
->dte_predicate
!= NULL
)
9062 probe
->dtpr_predcache
= ecb
->dte_predicate
->dtp_cacheid
;
9064 prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
,
9065 probe
->dtpr_id
, probe
->dtpr_arg
);
9068 * This probe is already active. Swing the last pointer to
9069 * point to the new ECB, and issue a dtrace_sync() to assure
9070 * that all CPUs have seen the change.
9072 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9073 probe
->dtpr_ecb_last
->dte_next
= ecb
;
9074 probe
->dtpr_ecb_last
= ecb
;
9075 probe
->dtpr_predcache
= 0;
9082 dtrace_ecb_resize(dtrace_ecb_t
*ecb
)
9084 uint32_t maxalign
= sizeof (dtrace_epid_t
);
9085 uint32_t align
= sizeof (uint8_t), offs
, diff
;
9086 dtrace_action_t
*act
;
9088 uint32_t aggbase
= UINT32_MAX
;
9089 dtrace_state_t
*state
= ecb
->dte_state
;
9092 * If we record anything, we always record the epid. (And we always
9095 offs
= sizeof (dtrace_epid_t
);
9096 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9098 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9099 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
9101 if ((align
= rec
->dtrd_alignment
) > maxalign
)
9104 if (!wastuple
&& act
->dta_intuple
) {
9106 * This is the first record in a tuple. Align the
9107 * offset to be at offset 4 in an 8-byte aligned
9110 diff
= offs
+ sizeof (dtrace_aggid_t
);
9112 if ((diff
= (diff
& (sizeof (uint64_t) - 1))))
9113 offs
+= sizeof (uint64_t) - diff
;
9115 aggbase
= offs
- sizeof (dtrace_aggid_t
);
9116 ASSERT(!(aggbase
& (sizeof (uint64_t) - 1)));
9120 if (rec
->dtrd_size
!= 0 && (diff
= (offs
& (align
- 1)))) {
9122 * The current offset is not properly aligned; align it.
9124 offs
+= align
- diff
;
9127 rec
->dtrd_offset
= offs
;
9129 if (offs
+ rec
->dtrd_size
> ecb
->dte_needed
) {
9130 ecb
->dte_needed
= offs
+ rec
->dtrd_size
;
9132 if (ecb
->dte_needed
> state
->dts_needed
)
9133 state
->dts_needed
= ecb
->dte_needed
;
9136 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9137 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9138 dtrace_action_t
*first
= agg
->dtag_first
, *prev
;
9140 ASSERT(rec
->dtrd_size
!= 0 && first
!= NULL
);
9142 ASSERT(aggbase
!= UINT32_MAX
);
9144 agg
->dtag_base
= aggbase
;
9146 while ((prev
= first
->dta_prev
) != NULL
&&
9147 DTRACEACT_ISAGG(prev
->dta_kind
)) {
9148 agg
= (dtrace_aggregation_t
*)prev
;
9149 first
= agg
->dtag_first
;
9153 offs
= prev
->dta_rec
.dtrd_offset
+
9154 prev
->dta_rec
.dtrd_size
;
9156 offs
= sizeof (dtrace_epid_t
);
9160 if (!act
->dta_intuple
)
9161 ecb
->dte_size
= offs
+ rec
->dtrd_size
;
9163 offs
+= rec
->dtrd_size
;
9166 wastuple
= act
->dta_intuple
;
9169 if ((act
= ecb
->dte_action
) != NULL
&&
9170 !(act
->dta_kind
== DTRACEACT_SPECULATE
&& act
->dta_next
== NULL
) &&
9171 ecb
->dte_size
== sizeof (dtrace_epid_t
)) {
9173 * If the size is still sizeof (dtrace_epid_t), then all
9174 * actions store no data; set the size to 0.
9176 ecb
->dte_alignment
= maxalign
;
9180 * If the needed space is still sizeof (dtrace_epid_t), then
9181 * all actions need no additional space; set the needed
9184 if (ecb
->dte_needed
== sizeof (dtrace_epid_t
))
9185 ecb
->dte_needed
= 0;
9191 * Set our alignment, and make sure that the dte_size and dte_needed
9192 * are aligned to the size of an EPID.
9194 ecb
->dte_alignment
= maxalign
;
9195 ecb
->dte_size
= (ecb
->dte_size
+ (sizeof (dtrace_epid_t
) - 1)) &
9196 ~(sizeof (dtrace_epid_t
) - 1);
9197 ecb
->dte_needed
= (ecb
->dte_needed
+ (sizeof (dtrace_epid_t
) - 1)) &
9198 ~(sizeof (dtrace_epid_t
) - 1);
9199 ASSERT(ecb
->dte_size
<= ecb
->dte_needed
);
9202 static dtrace_action_t
*
9203 dtrace_ecb_aggregation_create(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9205 dtrace_aggregation_t
*agg
;
9206 size_t size
= sizeof (uint64_t);
9207 int ntuple
= desc
->dtad_ntuple
;
9208 dtrace_action_t
*act
;
9209 dtrace_recdesc_t
*frec
;
9210 dtrace_aggid_t aggid
;
9211 dtrace_state_t
*state
= ecb
->dte_state
;
9213 agg
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
);
9214 agg
->dtag_ecb
= ecb
;
9216 ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
));
9218 switch (desc
->dtad_kind
) {
9220 agg
->dtag_initial
= UINT64_MAX
;
9221 agg
->dtag_aggregate
= dtrace_aggregate_min
;
9225 agg
->dtag_aggregate
= dtrace_aggregate_max
;
9228 case DTRACEAGG_COUNT
:
9229 agg
->dtag_aggregate
= dtrace_aggregate_count
;
9232 case DTRACEAGG_QUANTIZE
:
9233 agg
->dtag_aggregate
= dtrace_aggregate_quantize
;
9234 size
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) *
9238 case DTRACEAGG_LQUANTIZE
: {
9239 uint16_t step
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
);
9240 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
);
9242 agg
->dtag_initial
= desc
->dtad_arg
;
9243 agg
->dtag_aggregate
= dtrace_aggregate_lquantize
;
9245 if (step
== 0 || levels
== 0)
9248 size
= levels
* sizeof (uint64_t) + 3 * sizeof (uint64_t);
9253 agg
->dtag_aggregate
= dtrace_aggregate_avg
;
9254 size
= sizeof (uint64_t) * 2;
9258 agg
->dtag_aggregate
= dtrace_aggregate_sum
;
9265 agg
->dtag_action
.dta_rec
.dtrd_size
= size
;
9271 * We must make sure that we have enough actions for the n-tuple.
9273 for (act
= ecb
->dte_action_last
; act
!= NULL
; act
= act
->dta_prev
) {
9274 if (DTRACEACT_ISAGG(act
->dta_kind
))
9277 if (--ntuple
== 0) {
9279 * This is the action with which our n-tuple begins.
9281 agg
->dtag_first
= act
;
9287 * This n-tuple is short by ntuple elements. Return failure.
9289 ASSERT(ntuple
!= 0);
9291 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9296 * If the last action in the tuple has a size of zero, it's actually
9297 * an expression argument for the aggregating action.
9299 ASSERT(ecb
->dte_action_last
!= NULL
);
9300 act
= ecb
->dte_action_last
;
9302 if (act
->dta_kind
== DTRACEACT_DIFEXPR
) {
9303 ASSERT(act
->dta_difo
!= NULL
);
9305 if (act
->dta_difo
->dtdo_rtype
.dtdt_size
== 0)
9306 agg
->dtag_hasarg
= 1;
9310 * We need to allocate an id for this aggregation.
9312 aggid
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1,
9313 VM_BESTFIT
| VM_SLEEP
);
9315 if (aggid
- 1 >= state
->dts_naggregations
) {
9316 dtrace_aggregation_t
**oaggs
= state
->dts_aggregations
;
9317 dtrace_aggregation_t
**aggs
;
9318 int naggs
= state
->dts_naggregations
<< 1;
9319 int onaggs
= state
->dts_naggregations
;
9321 ASSERT(aggid
== state
->dts_naggregations
+ 1);
9324 ASSERT(oaggs
== NULL
);
9328 aggs
= kmem_zalloc(naggs
* sizeof (*aggs
), KM_SLEEP
);
9330 if (oaggs
!= NULL
) {
9331 bcopy(oaggs
, aggs
, onaggs
* sizeof (*aggs
));
9332 kmem_free(oaggs
, onaggs
* sizeof (*aggs
));
9335 state
->dts_aggregations
= aggs
;
9336 state
->dts_naggregations
= naggs
;
9339 ASSERT(state
->dts_aggregations
[aggid
- 1] == NULL
);
9340 state
->dts_aggregations
[(agg
->dtag_id
= aggid
) - 1] = agg
;
9342 frec
= &agg
->dtag_first
->dta_rec
;
9343 if (frec
->dtrd_alignment
< sizeof (dtrace_aggid_t
))
9344 frec
->dtrd_alignment
= sizeof (dtrace_aggid_t
);
9346 for (act
= agg
->dtag_first
; act
!= NULL
; act
= act
->dta_next
) {
9347 ASSERT(!act
->dta_intuple
);
9348 act
->dta_intuple
= 1;
9351 return (&agg
->dtag_action
);
9355 dtrace_ecb_aggregation_destroy(dtrace_ecb_t
*ecb
, dtrace_action_t
*act
)
9357 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9358 dtrace_state_t
*state
= ecb
->dte_state
;
9359 dtrace_aggid_t aggid
= agg
->dtag_id
;
9361 ASSERT(DTRACEACT_ISAGG(act
->dta_kind
));
9362 vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1);
9364 ASSERT(state
->dts_aggregations
[aggid
- 1] == agg
);
9365 state
->dts_aggregations
[aggid
- 1] = NULL
;
9367 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9371 dtrace_ecb_action_add(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9373 dtrace_action_t
*action
, *last
;
9374 dtrace_difo_t
*dp
= desc
->dtad_difo
;
9375 uint32_t size
= 0, align
= sizeof (uint8_t), mask
;
9376 uint16_t format
= 0;
9377 dtrace_recdesc_t
*rec
;
9378 dtrace_state_t
*state
= ecb
->dte_state
;
9379 dtrace_optval_t
*opt
= state
->dts_options
, nframes
, strsize
;
9380 uint64_t arg
= desc
->dtad_arg
;
9382 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9383 ASSERT(ecb
->dte_action
== NULL
|| ecb
->dte_action
->dta_refcnt
== 1);
9385 if (DTRACEACT_ISAGG(desc
->dtad_kind
)) {
9387 * If this is an aggregating action, there must be neither
9388 * a speculate nor a commit on the action chain.
9390 dtrace_action_t
*act
;
9392 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9393 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9396 if (act
->dta_kind
== DTRACEACT_SPECULATE
)
9400 action
= dtrace_ecb_aggregation_create(ecb
, desc
);
9405 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) ||
9406 (desc
->dtad_kind
== DTRACEACT_DIFEXPR
&&
9407 dp
!= NULL
&& dp
->dtdo_destructive
)) {
9408 state
->dts_destructive
= 1;
9411 switch (desc
->dtad_kind
) {
9412 case DTRACEACT_PRINTF
:
9413 case DTRACEACT_PRINTA
:
9414 case DTRACEACT_SYSTEM
:
9415 case DTRACEACT_FREOPEN
:
9417 * We know that our arg is a string -- turn it into a
9421 ASSERT(desc
->dtad_kind
== DTRACEACT_PRINTA
);
9424 ASSERT(arg
!= NULL
);
9425 /* ASSERT(arg > KERNELBASE); */
9426 format
= dtrace_format_add(state
,
9427 (char *)(uintptr_t)arg
);
9431 case DTRACEACT_LIBACT
:
9432 case DTRACEACT_DIFEXPR
:
9436 if ((size
= dp
->dtdo_rtype
.dtdt_size
) != 0)
9439 if (dp
->dtdo_rtype
.dtdt_kind
== DIF_TYPE_STRING
) {
9440 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9443 size
= opt
[DTRACEOPT_STRSIZE
];
9448 case DTRACEACT_STACK
:
9449 if ((nframes
= arg
) == 0) {
9450 nframes
= opt
[DTRACEOPT_STACKFRAMES
];
9451 ASSERT(nframes
> 0);
9455 size
= nframes
* sizeof (pc_t
);
9458 case DTRACEACT_JSTACK
:
9459 if ((strsize
= DTRACE_USTACK_STRSIZE(arg
)) == 0)
9460 strsize
= opt
[DTRACEOPT_JSTACKSTRSIZE
];
9462 if ((nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0)
9463 nframes
= opt
[DTRACEOPT_JSTACKFRAMES
];
9465 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9468 case DTRACEACT_USTACK
:
9469 if (desc
->dtad_kind
!= DTRACEACT_JSTACK
&&
9470 (nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0) {
9471 strsize
= DTRACE_USTACK_STRSIZE(arg
);
9472 nframes
= opt
[DTRACEOPT_USTACKFRAMES
];
9473 ASSERT(nframes
> 0);
9474 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9478 * Save a slot for the pid.
9480 size
= (nframes
+ 1) * sizeof (uint64_t);
9481 size
+= DTRACE_USTACK_STRSIZE(arg
);
9482 size
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t)));
9488 if (dp
== NULL
|| ((size
= dp
->dtdo_rtype
.dtdt_size
) !=
9489 sizeof (uint64_t)) ||
9490 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9494 case DTRACEACT_USYM
:
9495 case DTRACEACT_UMOD
:
9496 case DTRACEACT_UADDR
:
9498 (dp
->dtdo_rtype
.dtdt_size
!= sizeof (uint64_t)) ||
9499 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9503 * We have a slot for the pid, plus a slot for the
9504 * argument. To keep things simple (aligned with
9505 * bitness-neutral sizing), we store each as a 64-bit
9508 size
= 2 * sizeof (uint64_t);
9511 case DTRACEACT_STOP
:
9512 case DTRACEACT_BREAKPOINT
:
9513 case DTRACEACT_PANIC
:
9516 case DTRACEACT_CHILL
:
9517 case DTRACEACT_DISCARD
:
9518 case DTRACEACT_RAISE
:
9523 case DTRACEACT_EXIT
:
9525 (size
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) ||
9526 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9530 case DTRACEACT_SPECULATE
:
9531 if (ecb
->dte_size
> sizeof (dtrace_epid_t
))
9537 state
->dts_speculates
= 1;
9540 case DTRACEACT_COMMIT
: {
9541 dtrace_action_t
*act
= ecb
->dte_action
;
9543 for (; act
!= NULL
; act
= act
->dta_next
) {
9544 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9557 if (size
!= 0 || desc
->dtad_kind
== DTRACEACT_SPECULATE
) {
9559 * If this is a data-storing action or a speculate,
9560 * we must be sure that there isn't a commit on the
9563 dtrace_action_t
*act
= ecb
->dte_action
;
9565 for (; act
!= NULL
; act
= act
->dta_next
) {
9566 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9571 action
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
);
9572 action
->dta_rec
.dtrd_size
= size
;
9575 action
->dta_refcnt
= 1;
9576 rec
= &action
->dta_rec
;
9577 size
= rec
->dtrd_size
;
9579 for (mask
= sizeof (uint64_t) - 1; size
!= 0 && mask
> 0; mask
>>= 1) {
9580 if (!(size
& mask
)) {
9586 action
->dta_kind
= desc
->dtad_kind
;
9588 if ((action
->dta_difo
= dp
) != NULL
)
9589 dtrace_difo_hold(dp
);
9591 rec
->dtrd_action
= action
->dta_kind
;
9592 rec
->dtrd_arg
= arg
;
9593 rec
->dtrd_uarg
= desc
->dtad_uarg
;
9594 rec
->dtrd_alignment
= (uint16_t)align
;
9595 rec
->dtrd_format
= format
;
9597 if ((last
= ecb
->dte_action_last
) != NULL
) {
9598 ASSERT(ecb
->dte_action
!= NULL
);
9599 action
->dta_prev
= last
;
9600 last
->dta_next
= action
;
9602 ASSERT(ecb
->dte_action
== NULL
);
9603 ecb
->dte_action
= action
;
9606 ecb
->dte_action_last
= action
;
9612 dtrace_ecb_action_remove(dtrace_ecb_t
*ecb
)
9614 dtrace_action_t
*act
= ecb
->dte_action
, *next
;
9615 dtrace_vstate_t
*vstate
= &ecb
->dte_state
->dts_vstate
;
9619 if (act
!= NULL
&& act
->dta_refcnt
> 1) {
9620 ASSERT(act
->dta_next
== NULL
|| act
->dta_next
->dta_refcnt
== 1);
9623 for (; act
!= NULL
; act
= next
) {
9624 next
= act
->dta_next
;
9625 ASSERT(next
!= NULL
|| act
== ecb
->dte_action_last
);
9626 ASSERT(act
->dta_refcnt
== 1);
9628 if ((format
= act
->dta_rec
.dtrd_format
) != 0)
9629 dtrace_format_remove(ecb
->dte_state
, format
);
9631 if ((dp
= act
->dta_difo
) != NULL
)
9632 dtrace_difo_release(dp
, vstate
);
9634 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9635 dtrace_ecb_aggregation_destroy(ecb
, act
);
9637 kmem_free(act
, sizeof (dtrace_action_t
));
9642 ecb
->dte_action
= NULL
;
9643 ecb
->dte_action_last
= NULL
;
9644 ecb
->dte_size
= sizeof (dtrace_epid_t
);
9648 dtrace_ecb_disable(dtrace_ecb_t
*ecb
)
9651 * We disable the ECB by removing it from its probe.
9653 dtrace_ecb_t
*pecb
, *prev
= NULL
;
9654 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9656 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9658 if (probe
== NULL
) {
9660 * This is the NULL probe; there is nothing to disable.
9665 for (pecb
= probe
->dtpr_ecb
; pecb
!= NULL
; pecb
= pecb
->dte_next
) {
9671 ASSERT(pecb
!= NULL
);
9674 probe
->dtpr_ecb
= ecb
->dte_next
;
9676 prev
->dte_next
= ecb
->dte_next
;
9679 if (ecb
== probe
->dtpr_ecb_last
) {
9680 ASSERT(ecb
->dte_next
== NULL
);
9681 probe
->dtpr_ecb_last
= prev
;
9685 * The ECB has been disconnected from the probe; now sync to assure
9686 * that all CPUs have seen the change before returning.
9690 if (probe
->dtpr_ecb
== NULL
) {
9692 * That was the last ECB on the probe; clear the predicate
9693 * cache ID for the probe, disable it and sync one more time
9694 * to assure that we'll never hit it again.
9696 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9698 ASSERT(ecb
->dte_next
== NULL
);
9699 ASSERT(probe
->dtpr_ecb_last
== NULL
);
9700 probe
->dtpr_predcache
= DTRACE_CACHEIDNONE
;
9701 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
,
9702 probe
->dtpr_id
, probe
->dtpr_arg
);
9706 * There is at least one ECB remaining on the probe. If there
9707 * is _exactly_ one, set the probe's predicate cache ID to be
9708 * the predicate cache ID of the remaining ECB.
9710 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9711 ASSERT(probe
->dtpr_predcache
== DTRACE_CACHEIDNONE
);
9713 if (probe
->dtpr_ecb
== probe
->dtpr_ecb_last
) {
9714 dtrace_predicate_t
*p
= probe
->dtpr_ecb
->dte_predicate
;
9716 ASSERT(probe
->dtpr_ecb
->dte_next
== NULL
);
9719 probe
->dtpr_predcache
= p
->dtp_cacheid
;
9722 ecb
->dte_next
= NULL
;
9727 dtrace_ecb_destroy(dtrace_ecb_t
*ecb
)
9729 dtrace_state_t
*state
= ecb
->dte_state
;
9730 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
9731 dtrace_predicate_t
*pred
;
9732 dtrace_epid_t epid
= ecb
->dte_epid
;
9734 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9735 ASSERT(ecb
->dte_next
== NULL
);
9736 ASSERT(ecb
->dte_probe
== NULL
|| ecb
->dte_probe
->dtpr_ecb
!= ecb
);
9738 if ((pred
= ecb
->dte_predicate
) != NULL
)
9739 dtrace_predicate_release(pred
, vstate
);
9741 dtrace_ecb_action_remove(ecb
);
9743 ASSERT(state
->dts_ecbs
[epid
- 1] == ecb
);
9744 state
->dts_ecbs
[epid
- 1] = NULL
;
9746 kmem_free(ecb
, sizeof (dtrace_ecb_t
));
9749 static dtrace_ecb_t
*
9750 dtrace_ecb_create(dtrace_state_t
*state
, dtrace_probe_t
*probe
,
9751 dtrace_enabling_t
*enab
)
9754 dtrace_predicate_t
*pred
;
9755 dtrace_actdesc_t
*act
;
9756 dtrace_provider_t
*prov
;
9757 dtrace_ecbdesc_t
*desc
= enab
->dten_current
;
9759 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9760 ASSERT(state
!= NULL
);
9762 ecb
= dtrace_ecb_add(state
, probe
);
9763 ecb
->dte_uarg
= desc
->dted_uarg
;
9765 if ((pred
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) {
9766 dtrace_predicate_hold(pred
);
9767 ecb
->dte_predicate
= pred
;
9770 if (probe
!= NULL
) {
9772 * If the provider shows more leg than the consumer is old
9773 * enough to see, we need to enable the appropriate implicit
9774 * predicate bits to prevent the ecb from activating at
9777 * Providers specifying DTRACE_PRIV_USER at register time
9778 * are stating that they need the /proc-style privilege
9779 * model to be enforced, and this is what DTRACE_COND_OWNER
9780 * and DTRACE_COND_ZONEOWNER will then do at probe time.
9782 prov
= probe
->dtpr_provider
;
9783 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLPROC
) &&
9784 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
9785 ecb
->dte_cond
|= DTRACE_COND_OWNER
;
9787 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLZONE
) &&
9788 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
9789 ecb
->dte_cond
|= DTRACE_COND_ZONEOWNER
;
9792 * If the provider shows us kernel innards and the user
9793 * is lacking sufficient privilege, enable the
9794 * DTRACE_COND_USERMODE implicit predicate.
9796 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) &&
9797 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_KERNEL
))
9798 ecb
->dte_cond
|= DTRACE_COND_USERMODE
;
9801 if (dtrace_ecb_create_cache
!= NULL
) {
9803 * If we have a cached ecb, we'll use its action list instead
9804 * of creating our own (saving both time and space).
9806 dtrace_ecb_t
*cached
= dtrace_ecb_create_cache
;
9807 dtrace_action_t
*act_if
= cached
->dte_action
;
9809 if (act_if
!= NULL
) {
9810 ASSERT(act_if
->dta_refcnt
> 0);
9811 act_if
->dta_refcnt
++;
9812 ecb
->dte_action
= act_if
;
9813 ecb
->dte_action_last
= cached
->dte_action_last
;
9814 ecb
->dte_needed
= cached
->dte_needed
;
9815 ecb
->dte_size
= cached
->dte_size
;
9816 ecb
->dte_alignment
= cached
->dte_alignment
;
9822 for (act
= desc
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
9823 if ((enab
->dten_error
= dtrace_ecb_action_add(ecb
, act
)) != 0) {
9824 dtrace_ecb_destroy(ecb
);
9829 dtrace_ecb_resize(ecb
);
9831 return (dtrace_ecb_create_cache
= ecb
);
9835 dtrace_ecb_create_enable(dtrace_probe_t
*probe
, void *arg
)
9838 dtrace_enabling_t
*enab
= arg
;
9839 dtrace_state_t
*state
= enab
->dten_vstate
->dtvs_state
;
9841 ASSERT(state
!= NULL
);
9843 if (probe
!= NULL
&& probe
->dtpr_gen
< enab
->dten_probegen
) {
9845 * This probe was created in a generation for which this
9846 * enabling has previously created ECBs; we don't want to
9847 * enable it again, so just kick out.
9849 return (DTRACE_MATCH_NEXT
);
9852 if ((ecb
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
)
9853 return (DTRACE_MATCH_DONE
);
9855 dtrace_ecb_enable(ecb
);
9856 return (DTRACE_MATCH_NEXT
);
9859 static dtrace_ecb_t
*
9860 dtrace_epid2ecb(dtrace_state_t
*state
, dtrace_epid_t id
)
9864 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9866 if (id
== 0 || id
> state
->dts_necbs
)
9869 ASSERT(state
->dts_necbs
> 0 && state
->dts_ecbs
!= NULL
);
9870 ASSERT((ecb
= state
->dts_ecbs
[id
- 1]) == NULL
|| ecb
->dte_epid
== id
);
9872 return (state
->dts_ecbs
[id
- 1]);
9875 static dtrace_aggregation_t
*
9876 dtrace_aggid2agg(dtrace_state_t
*state
, dtrace_aggid_t id
)
9878 dtrace_aggregation_t
*agg
;
9880 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9882 if (id
== 0 || id
> state
->dts_naggregations
)
9885 ASSERT(state
->dts_naggregations
> 0 && state
->dts_aggregations
!= NULL
);
9886 ASSERT((agg
= state
->dts_aggregations
[id
- 1]) == NULL
||
9887 agg
->dtag_id
== id
);
9889 return (state
->dts_aggregations
[id
- 1]);
9893 * DTrace Buffer Functions
9895 * The following functions manipulate DTrace buffers. Most of these functions
9896 * are called in the context of establishing or processing consumer state;
9897 * exceptions are explicitly noted.
9901 * Note: called from cross call context. This function switches the two
9902 * buffers on a given CPU. The atomicity of this operation is assured by
9903 * disabling interrupts while the actual switch takes place; the disabling of
9904 * interrupts serializes the execution with any execution of dtrace_probe() on
9908 dtrace_buffer_switch(dtrace_buffer_t
*buf
)
9910 caddr_t tomax
= buf
->dtb_tomax
;
9911 caddr_t xamot
= buf
->dtb_xamot
;
9912 dtrace_icookie_t cookie
;
9914 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
9915 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_RING
));
9917 cookie
= dtrace_interrupt_disable();
9918 buf
->dtb_tomax
= xamot
;
9919 buf
->dtb_xamot
= tomax
;
9920 buf
->dtb_xamot_drops
= buf
->dtb_drops
;
9921 buf
->dtb_xamot_offset
= buf
->dtb_offset
;
9922 buf
->dtb_xamot_errors
= buf
->dtb_errors
;
9923 buf
->dtb_xamot_flags
= buf
->dtb_flags
;
9924 buf
->dtb_offset
= 0;
9926 buf
->dtb_errors
= 0;
9927 buf
->dtb_flags
&= ~(DTRACEBUF_ERROR
| DTRACEBUF_DROPPED
);
9928 dtrace_interrupt_enable(cookie
);
9932 * Note: called from cross call context. This function activates a buffer
9933 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
9934 * is guaranteed by the disabling of interrupts.
9937 dtrace_buffer_activate(dtrace_state_t
*state
)
9939 dtrace_buffer_t
*buf
;
9940 dtrace_icookie_t cookie
= dtrace_interrupt_disable();
9942 buf
= &state
->dts_buffer
[CPU
->cpu_id
];
9944 if (buf
->dtb_tomax
!= NULL
) {
9946 * We might like to assert that the buffer is marked inactive,
9947 * but this isn't necessarily true: the buffer for the CPU
9948 * that processes the BEGIN probe has its buffer activated
9949 * manually. In this case, we take the (harmless) action
9950 * re-clearing the bit INACTIVE bit.
9952 buf
->dtb_flags
&= ~DTRACEBUF_INACTIVE
;
9955 dtrace_interrupt_enable(cookie
);
9959 dtrace_buffer_alloc(dtrace_buffer_t
*bufs
, size_t size
, int flags
,
9963 dtrace_buffer_t
*buf
;
9965 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9966 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9968 if (size
> dtrace_nonroot_maxsize
&&
9969 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
9972 #if defined(__APPLE__)
9973 if (size
> (sane_size
/ 8) / (int)NCPU
) /* As in kdbg_set_nkdbufs(), roughly. */
9975 #endif /* __APPLE__ */
9980 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
9983 buf
= &bufs
[cp
->cpu_id
];
9986 * If there is already a buffer allocated for this CPU, it
9987 * is only possible that this is a DR event. In this case,
9988 * the buffer size must match our specified size.
9990 if (buf
->dtb_tomax
!= NULL
) {
9991 ASSERT(buf
->dtb_size
== size
);
9995 ASSERT(buf
->dtb_xamot
== NULL
);
9997 if ((buf
->dtb_tomax
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
10000 buf
->dtb_size
= size
;
10001 buf
->dtb_flags
= flags
;
10002 buf
->dtb_offset
= 0;
10003 buf
->dtb_drops
= 0;
10005 if (flags
& DTRACEBUF_NOSWITCH
)
10008 if ((buf
->dtb_xamot
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
10010 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10018 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
10021 buf
= &bufs
[cp
->cpu_id
];
10023 if (buf
->dtb_xamot
!= NULL
) {
10024 ASSERT(buf
->dtb_tomax
!= NULL
);
10025 ASSERT(buf
->dtb_size
== size
);
10026 kmem_free(buf
->dtb_xamot
, size
);
10029 if (buf
->dtb_tomax
!= NULL
) {
10030 ASSERT(buf
->dtb_size
== size
);
10031 kmem_free(buf
->dtb_tomax
, size
);
10034 buf
->dtb_tomax
= NULL
;
10035 buf
->dtb_xamot
= NULL
;
10037 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10043 * Note: called from probe context. This function just increments the drop
10044 * count on a buffer. It has been made a function to allow for the
10045 * possibility of understanding the source of mysterious drop counts. (A
10046 * problem for which one may be particularly disappointed that DTrace cannot
10047 * be used to understand DTrace.)
10050 dtrace_buffer_drop(dtrace_buffer_t
*buf
)
10056 * Note: called from probe context. This function is called to reserve space
10057 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10058 * mstate. Returns the new offset in the buffer, or a negative value if an
10059 * error has occurred.
10062 dtrace_buffer_reserve(dtrace_buffer_t
*buf
, size_t needed
, size_t align
,
10063 dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
10065 intptr_t offs
= buf
->dtb_offset
, soffs
;
10070 if (buf
->dtb_flags
& DTRACEBUF_INACTIVE
)
10073 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
10074 dtrace_buffer_drop(buf
);
10078 if (!(buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
))) {
10079 while (offs
& (align
- 1)) {
10081 * Assert that our alignment is off by a number which
10082 * is itself sizeof (uint32_t) aligned.
10084 ASSERT(!((align
- (offs
& (align
- 1))) &
10085 (sizeof (uint32_t) - 1)));
10086 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10087 offs
+= sizeof (uint32_t);
10090 if ((soffs
= offs
+ needed
) > buf
->dtb_size
) {
10091 dtrace_buffer_drop(buf
);
10095 if (mstate
== NULL
)
10098 mstate
->dtms_scratch_base
= (uintptr_t)tomax
+ soffs
;
10099 mstate
->dtms_scratch_size
= buf
->dtb_size
- soffs
;
10100 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10105 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10106 if (state
->dts_activity
!= DTRACE_ACTIVITY_COOLDOWN
&&
10107 (buf
->dtb_flags
& DTRACEBUF_FULL
))
10112 total_off
= needed
+ (offs
& (align
- 1));
10115 * For a ring buffer, life is quite a bit more complicated. Before
10116 * we can store any padding, we need to adjust our wrapping offset.
10117 * (If we've never before wrapped or we're not about to, no adjustment
10120 if ((buf
->dtb_flags
& DTRACEBUF_WRAPPED
) ||
10121 offs
+ total_off
> buf
->dtb_size
) {
10122 woffs
= buf
->dtb_xamot_offset
;
10124 if (offs
+ total_off
> buf
->dtb_size
) {
10126 * We can't fit in the end of the buffer. First, a
10127 * sanity check that we can fit in the buffer at all.
10129 if (total_off
> buf
->dtb_size
) {
10130 dtrace_buffer_drop(buf
);
10135 * We're going to be storing at the top of the buffer,
10136 * so now we need to deal with the wrapped offset. We
10137 * only reset our wrapped offset to 0 if it is
10138 * currently greater than the current offset. If it
10139 * is less than the current offset, it is because a
10140 * previous allocation induced a wrap -- but the
10141 * allocation didn't subsequently take the space due
10142 * to an error or false predicate evaluation. In this
10143 * case, we'll just leave the wrapped offset alone: if
10144 * the wrapped offset hasn't been advanced far enough
10145 * for this allocation, it will be adjusted in the
10148 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
10156 * Now we know that we're going to be storing to the
10157 * top of the buffer and that there is room for us
10158 * there. We need to clear the buffer from the current
10159 * offset to the end (there may be old gunk there).
10161 while (offs
< buf
->dtb_size
)
10165 * We need to set our offset to zero. And because we
10166 * are wrapping, we need to set the bit indicating as
10167 * much. We can also adjust our needed space back
10168 * down to the space required by the ECB -- we know
10169 * that the top of the buffer is aligned.
10172 total_off
= needed
;
10173 buf
->dtb_flags
|= DTRACEBUF_WRAPPED
;
10176 * There is room for us in the buffer, so we simply
10177 * need to check the wrapped offset.
10179 if (woffs
< offs
) {
10181 * The wrapped offset is less than the offset.
10182 * This can happen if we allocated buffer space
10183 * that induced a wrap, but then we didn't
10184 * subsequently take the space due to an error
10185 * or false predicate evaluation. This is
10186 * okay; we know that _this_ allocation isn't
10187 * going to induce a wrap. We still can't
10188 * reset the wrapped offset to be zero,
10189 * however: the space may have been trashed in
10190 * the previous failed probe attempt. But at
10191 * least the wrapped offset doesn't need to
10192 * be adjusted at all...
10198 while (offs
+ total_off
> woffs
) {
10199 dtrace_epid_t epid
= *(uint32_t *)(tomax
+ woffs
);
10202 if (epid
== DTRACE_EPIDNONE
) {
10203 size
= sizeof (uint32_t);
10205 ASSERT(epid
<= state
->dts_necbs
);
10206 ASSERT(state
->dts_ecbs
[epid
- 1] != NULL
);
10208 size
= state
->dts_ecbs
[epid
- 1]->dte_size
;
10211 ASSERT(woffs
+ size
<= buf
->dtb_size
);
10214 if (woffs
+ size
== buf
->dtb_size
) {
10216 * We've reached the end of the buffer; we want
10217 * to set the wrapped offset to 0 and break
10218 * out. However, if the offs is 0, then we're
10219 * in a strange edge-condition: the amount of
10220 * space that we want to reserve plus the size
10221 * of the record that we're overwriting is
10222 * greater than the size of the buffer. This
10223 * is problematic because if we reserve the
10224 * space but subsequently don't consume it (due
10225 * to a failed predicate or error) the wrapped
10226 * offset will be 0 -- yet the EPID at offset 0
10227 * will not be committed. This situation is
10228 * relatively easy to deal with: if we're in
10229 * this case, the buffer is indistinguishable
10230 * from one that hasn't wrapped; we need only
10231 * finish the job by clearing the wrapped bit,
10232 * explicitly setting the offset to be 0, and
10233 * zero'ing out the old data in the buffer.
10236 buf
->dtb_flags
&= ~DTRACEBUF_WRAPPED
;
10237 buf
->dtb_offset
= 0;
10240 while (woffs
< buf
->dtb_size
)
10241 tomax
[woffs
++] = 0;
10252 * We have a wrapped offset. It may be that the wrapped offset
10253 * has become zero -- that's okay.
10255 buf
->dtb_xamot_offset
= woffs
;
10260 * Now we can plow the buffer with any necessary padding.
10262 while (offs
& (align
- 1)) {
10264 * Assert that our alignment is off by a number which
10265 * is itself sizeof (uint32_t) aligned.
10267 ASSERT(!((align
- (offs
& (align
- 1))) &
10268 (sizeof (uint32_t) - 1)));
10269 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10270 offs
+= sizeof (uint32_t);
10273 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10274 if (offs
+ needed
> buf
->dtb_size
- state
->dts_reserve
) {
10275 buf
->dtb_flags
|= DTRACEBUF_FULL
;
10280 if (mstate
== NULL
)
10284 * For ring buffers and fill buffers, the scratch space is always
10285 * the inactive buffer.
10287 mstate
->dtms_scratch_base
= (uintptr_t)buf
->dtb_xamot
;
10288 mstate
->dtms_scratch_size
= buf
->dtb_size
;
10289 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10295 dtrace_buffer_polish(dtrace_buffer_t
*buf
)
10297 ASSERT(buf
->dtb_flags
& DTRACEBUF_RING
);
10298 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10300 if (!(buf
->dtb_flags
& DTRACEBUF_WRAPPED
))
10304 * We need to polish the ring buffer. There are three cases:
10306 * - The first (and presumably most common) is that there is no gap
10307 * between the buffer offset and the wrapped offset. In this case,
10308 * there is nothing in the buffer that isn't valid data; we can
10309 * mark the buffer as polished and return.
10311 * - The second (less common than the first but still more common
10312 * than the third) is that there is a gap between the buffer offset
10313 * and the wrapped offset, and the wrapped offset is larger than the
10314 * buffer offset. This can happen because of an alignment issue, or
10315 * can happen because of a call to dtrace_buffer_reserve() that
10316 * didn't subsequently consume the buffer space. In this case,
10317 * we need to zero the data from the buffer offset to the wrapped
10320 * - The third (and least common) is that there is a gap between the
10321 * buffer offset and the wrapped offset, but the wrapped offset is
10322 * _less_ than the buffer offset. This can only happen because a
10323 * call to dtrace_buffer_reserve() induced a wrap, but the space
10324 * was not subsequently consumed. In this case, we need to zero the
10325 * space from the offset to the end of the buffer _and_ from the
10326 * top of the buffer to the wrapped offset.
10328 if (buf
->dtb_offset
< buf
->dtb_xamot_offset
) {
10329 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10330 buf
->dtb_xamot_offset
- buf
->dtb_offset
);
10333 if (buf
->dtb_offset
> buf
->dtb_xamot_offset
) {
10334 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10335 buf
->dtb_size
- buf
->dtb_offset
);
10336 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
);
10341 dtrace_buffer_free(dtrace_buffer_t
*bufs
)
10345 for (i
= 0; i
< (int)NCPU
; i
++) {
10346 dtrace_buffer_t
*buf
= &bufs
[i
];
10348 if (buf
->dtb_tomax
== NULL
) {
10349 ASSERT(buf
->dtb_xamot
== NULL
);
10350 ASSERT(buf
->dtb_size
== 0);
10354 if (buf
->dtb_xamot
!= NULL
) {
10355 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
10356 kmem_free(buf
->dtb_xamot
, buf
->dtb_size
);
10359 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
);
10361 buf
->dtb_tomax
= NULL
;
10362 buf
->dtb_xamot
= NULL
;
10367 * DTrace Enabling Functions
10369 static dtrace_enabling_t
*
10370 dtrace_enabling_create(dtrace_vstate_t
*vstate
)
10372 dtrace_enabling_t
*enab
;
10374 enab
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
);
10375 enab
->dten_vstate
= vstate
;
10381 dtrace_enabling_add(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
)
10383 dtrace_ecbdesc_t
**ndesc
;
10384 size_t osize
, nsize
;
10387 * We can't add to enablings after we've enabled them, or after we've
10390 ASSERT(enab
->dten_probegen
== 0);
10391 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10393 #if defined(__APPLE__)
10394 if (ecb
== NULL
) return; /* XXX protection against gcc 4.0 botch on x86 */
10395 #endif /* __APPLE__ */
10397 if (enab
->dten_ndesc
< enab
->dten_maxdesc
) {
10398 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10402 osize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10404 if (enab
->dten_maxdesc
== 0) {
10405 enab
->dten_maxdesc
= 1;
10407 enab
->dten_maxdesc
<<= 1;
10410 ASSERT(enab
->dten_ndesc
< enab
->dten_maxdesc
);
10412 nsize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10413 ndesc
= kmem_zalloc(nsize
, KM_SLEEP
);
10414 bcopy(enab
->dten_desc
, ndesc
, osize
);
10415 kmem_free(enab
->dten_desc
, osize
);
10417 enab
->dten_desc
= ndesc
;
10418 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10422 dtrace_enabling_addlike(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
,
10423 dtrace_probedesc_t
*pd
)
10425 dtrace_ecbdesc_t
*new;
10426 dtrace_predicate_t
*pred
;
10427 dtrace_actdesc_t
*act
;
10430 * We're going to create a new ECB description that matches the
10431 * specified ECB in every way, but has the specified probe description.
10433 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
10435 if ((pred
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
)
10436 dtrace_predicate_hold(pred
);
10438 for (act
= ecb
->dted_action
; act
!= NULL
; act
= act
->dtad_next
)
10439 dtrace_actdesc_hold(act
);
10441 new->dted_action
= ecb
->dted_action
;
10442 new->dted_pred
= ecb
->dted_pred
;
10443 new->dted_probe
= *pd
;
10444 new->dted_uarg
= ecb
->dted_uarg
;
10446 dtrace_enabling_add(enab
, new);
10450 dtrace_enabling_dump(dtrace_enabling_t
*enab
)
10454 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10455 dtrace_probedesc_t
*desc
= &enab
->dten_desc
[i
]->dted_probe
;
10457 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
,
10458 desc
->dtpd_provider
, desc
->dtpd_mod
,
10459 desc
->dtpd_func
, desc
->dtpd_name
);
10464 dtrace_enabling_destroy(dtrace_enabling_t
*enab
)
10467 dtrace_ecbdesc_t
*ep
;
10468 dtrace_vstate_t
*vstate
= enab
->dten_vstate
;
10470 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10472 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10473 dtrace_actdesc_t
*act
, *next
;
10474 dtrace_predicate_t
*pred
;
10476 ep
= enab
->dten_desc
[i
];
10478 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
)
10479 dtrace_predicate_release(pred
, vstate
);
10481 for (act
= ep
->dted_action
; act
!= NULL
; act
= next
) {
10482 next
= act
->dtad_next
;
10483 dtrace_actdesc_release(act
, vstate
);
10486 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
10489 kmem_free(enab
->dten_desc
,
10490 enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*));
10493 * If this was a retained enabling, decrement the dts_nretained count
10494 * and take it off of the dtrace_retained list.
10496 if (enab
->dten_prev
!= NULL
|| enab
->dten_next
!= NULL
||
10497 dtrace_retained
== enab
) {
10498 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10499 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained
> 0);
10500 enab
->dten_vstate
->dtvs_state
->dts_nretained
--;
10503 if (enab
->dten_prev
== NULL
) {
10504 if (dtrace_retained
== enab
) {
10505 dtrace_retained
= enab
->dten_next
;
10507 if (dtrace_retained
!= NULL
)
10508 dtrace_retained
->dten_prev
= NULL
;
10511 ASSERT(enab
!= dtrace_retained
);
10512 ASSERT(dtrace_retained
!= NULL
);
10513 enab
->dten_prev
->dten_next
= enab
->dten_next
;
10516 if (enab
->dten_next
!= NULL
) {
10517 ASSERT(dtrace_retained
!= NULL
);
10518 enab
->dten_next
->dten_prev
= enab
->dten_prev
;
10521 kmem_free(enab
, sizeof (dtrace_enabling_t
));
10525 dtrace_enabling_retain(dtrace_enabling_t
*enab
)
10527 dtrace_state_t
*state
;
10529 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10530 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10531 ASSERT(enab
->dten_vstate
!= NULL
);
10533 state
= enab
->dten_vstate
->dtvs_state
;
10534 ASSERT(state
!= NULL
);
10537 * We only allow each state to retain dtrace_retain_max enablings.
10539 if (state
->dts_nretained
>= dtrace_retain_max
)
10542 state
->dts_nretained
++;
10544 if (dtrace_retained
== NULL
) {
10545 dtrace_retained
= enab
;
10549 enab
->dten_next
= dtrace_retained
;
10550 dtrace_retained
->dten_prev
= enab
;
10551 dtrace_retained
= enab
;
10557 dtrace_enabling_replicate(dtrace_state_t
*state
, dtrace_probedesc_t
*match
,
10558 dtrace_probedesc_t
*create
)
10560 dtrace_enabling_t
*new, *enab
;
10561 int found
= 0, err
= ENOENT
;
10563 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10564 ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
);
10565 ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
);
10566 ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
);
10567 ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
);
10569 new = dtrace_enabling_create(&state
->dts_vstate
);
10572 * Iterate over all retained enablings, looking for enablings that
10573 * match the specified state.
10575 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10579 * dtvs_state can only be NULL for helper enablings -- and
10580 * helper enablings can't be retained.
10582 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10584 if (enab
->dten_vstate
->dtvs_state
!= state
)
10588 * Now iterate over each probe description; we're looking for
10589 * an exact match to the specified probe description.
10591 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10592 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10593 dtrace_probedesc_t
*pd
= &ep
->dted_probe
;
10595 if (strcmp(pd
->dtpd_provider
, match
->dtpd_provider
))
10598 if (strcmp(pd
->dtpd_mod
, match
->dtpd_mod
))
10601 if (strcmp(pd
->dtpd_func
, match
->dtpd_func
))
10604 if (strcmp(pd
->dtpd_name
, match
->dtpd_name
))
10608 * We have a winning probe! Add it to our growing
10612 dtrace_enabling_addlike(new, ep
, create
);
10616 if (!found
|| (err
= dtrace_enabling_retain(new)) != 0) {
10617 dtrace_enabling_destroy(new);
10625 dtrace_enabling_retract(dtrace_state_t
*state
)
10627 dtrace_enabling_t
*enab
, *next
;
10629 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10632 * Iterate over all retained enablings, destroy the enablings retained
10633 * for the specified state.
10635 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= next
) {
10636 next
= enab
->dten_next
;
10639 * dtvs_state can only be NULL for helper enablings -- and
10640 * helper enablings can't be retained.
10642 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10644 if (enab
->dten_vstate
->dtvs_state
== state
) {
10645 ASSERT(state
->dts_nretained
> 0);
10646 dtrace_enabling_destroy(enab
);
10650 ASSERT(state
->dts_nretained
== 0);
10654 dtrace_enabling_match(dtrace_enabling_t
*enab
, int *nmatched
)
10659 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10660 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10662 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10663 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10665 enab
->dten_current
= ep
;
10666 enab
->dten_error
= 0;
10668 matched
+= dtrace_probe_enable(&ep
->dted_probe
, enab
);
10670 if (enab
->dten_error
!= 0) {
10672 * If we get an error half-way through enabling the
10673 * probes, we kick out -- perhaps with some number of
10674 * them enabled. Leaving enabled probes enabled may
10675 * be slightly confusing for user-level, but we expect
10676 * that no one will attempt to actually drive on in
10677 * the face of such errors. If this is an anonymous
10678 * enabling (indicated with a NULL nmatched pointer),
10679 * we cmn_err() a message. We aren't expecting to
10680 * get such an error -- such as it can exist at all,
10681 * it would be a result of corrupted DOF in the driver
10684 if (nmatched
== NULL
) {
10685 cmn_err(CE_WARN
, "dtrace_enabling_match() "
10686 "error on %p: %d", (void *)ep
,
10690 return (enab
->dten_error
);
10694 enab
->dten_probegen
= dtrace_probegen
;
10695 if (nmatched
!= NULL
)
10696 *nmatched
= matched
;
10702 dtrace_enabling_matchall(void)
10704 dtrace_enabling_t
*enab
;
10706 lck_mtx_lock(&cpu_lock
);
10707 lck_mtx_lock(&dtrace_lock
);
10710 * Because we can be called after dtrace_detach() has been called, we
10711 * cannot assert that there are retained enablings. We can safely
10712 * load from dtrace_retained, however: the taskq_destroy() at the
10713 * end of dtrace_detach() will block pending our completion.
10715 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
)
10716 (void) dtrace_enabling_match(enab
, NULL
);
10718 lck_mtx_unlock(&dtrace_lock
);
10719 lck_mtx_unlock(&cpu_lock
);
10723 dtrace_enabling_matchstate(dtrace_state_t
*state
, int *nmatched
)
10725 dtrace_enabling_t
*enab
;
10726 int matched
, total_matched
= 0, err
;
10728 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10729 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10731 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10732 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10734 if (enab
->dten_vstate
->dtvs_state
!= state
)
10737 if ((err
= dtrace_enabling_match(enab
, &matched
)) != 0)
10740 total_matched
+= matched
;
10743 if (nmatched
!= NULL
)
10744 *nmatched
= total_matched
;
10750 * If an enabling is to be enabled without having matched probes (that is, if
10751 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
10752 * enabling must be _primed_ by creating an ECB for every ECB description.
10753 * This must be done to assure that we know the number of speculations, the
10754 * number of aggregations, the minimum buffer size needed, etc. before we
10755 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
10756 * enabling any probes, we create ECBs for every ECB decription, but with a
10757 * NULL probe -- which is exactly what this function does.
10760 dtrace_enabling_prime(dtrace_state_t
*state
)
10762 dtrace_enabling_t
*enab
;
10765 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10766 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10768 if (enab
->dten_vstate
->dtvs_state
!= state
)
10772 * We don't want to prime an enabling more than once, lest
10773 * we allow a malicious user to induce resource exhaustion.
10774 * (The ECBs that result from priming an enabling aren't
10775 * leaked -- but they also aren't deallocated until the
10776 * consumer state is destroyed.)
10778 if (enab
->dten_primed
)
10781 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10782 enab
->dten_current
= enab
->dten_desc
[i
];
10783 (void) dtrace_probe_enable(NULL
, enab
);
10786 enab
->dten_primed
= 1;
10791 * Called to indicate that probes should be provided due to retained
10792 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
10793 * must take an initial lap through the enabling calling the dtps_provide()
10794 * entry point explicitly to allow for autocreated probes.
10797 dtrace_enabling_provide(dtrace_provider_t
*prv
)
10800 dtrace_probedesc_t desc
;
10802 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10803 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
10807 prv
= dtrace_provider
;
10811 dtrace_enabling_t
*enab
= dtrace_retained
;
10812 void *parg
= prv
->dtpv_arg
;
10814 for (; enab
!= NULL
; enab
= enab
->dten_next
) {
10815 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10816 desc
= enab
->dten_desc
[i
]->dted_probe
;
10817 lck_mtx_unlock(&dtrace_lock
);
10818 prv
->dtpv_pops
.dtps_provide(parg
, &desc
);
10819 lck_mtx_lock(&dtrace_lock
);
10822 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
10824 lck_mtx_unlock(&dtrace_lock
);
10825 dtrace_probe_provide(NULL
, all
? NULL
: prv
);
10826 lck_mtx_lock(&dtrace_lock
);
10830 * DTrace DOF Functions
10834 dtrace_dof_error(dof_hdr_t
*dof
, const char *str
)
10836 #pragma unused(dof)
10837 if (dtrace_err_verbose
)
10838 cmn_err(CE_WARN
, "failed to process DOF: %s", str
);
10840 #ifdef DTRACE_ERRDEBUG
10841 dtrace_errdebug(str
);
10846 * Create DOF out of a currently enabled state. Right now, we only create
10847 * DOF containing the run-time options -- but this could be expanded to create
10848 * complete DOF representing the enabled state.
10851 dtrace_dof_create(dtrace_state_t
*state
)
10855 dof_optdesc_t
*opt
;
10856 int i
, len
= sizeof (dof_hdr_t
) +
10857 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) +
10858 sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
10860 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10862 dof
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
);
10863 dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
;
10864 dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
;
10865 dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
;
10866 dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
;
10868 dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
;
10869 dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
;
10870 dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
;
10871 dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
;
10872 dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
;
10873 dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
;
10875 dof
->dofh_flags
= 0;
10876 dof
->dofh_hdrsize
= sizeof (dof_hdr_t
);
10877 dof
->dofh_secsize
= sizeof (dof_sec_t
);
10878 dof
->dofh_secnum
= 1; /* only DOF_SECT_OPTDESC */
10879 dof
->dofh_secoff
= sizeof (dof_hdr_t
);
10880 dof
->dofh_loadsz
= len
;
10881 dof
->dofh_filesz
= len
;
10885 * Fill in the option section header...
10887 sec
= (dof_sec_t
*)((uintptr_t)dof
+ sizeof (dof_hdr_t
));
10888 sec
->dofs_type
= DOF_SECT_OPTDESC
;
10889 sec
->dofs_align
= sizeof (uint64_t);
10890 sec
->dofs_flags
= DOF_SECF_LOAD
;
10891 sec
->dofs_entsize
= sizeof (dof_optdesc_t
);
10893 opt
= (dof_optdesc_t
*)((uintptr_t)sec
+
10894 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)));
10896 sec
->dofs_offset
= (uintptr_t)opt
- (uintptr_t)dof
;
10897 sec
->dofs_size
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
10899 for (i
= 0; i
< DTRACEOPT_MAX
; i
++) {
10900 opt
[i
].dofo_option
= i
;
10901 opt
[i
].dofo_strtab
= DOF_SECIDX_NONE
;
10902 opt
[i
].dofo_value
= state
->dts_options
[i
];
10909 #if defined(__APPLE__)
10910 dtrace_dof_copyin(user_addr_t uarg
, int *errp
)
10912 dtrace_dof_copyin(uintptr_t uarg
, int *errp
)
10915 dof_hdr_t hdr
, *dof
;
10917 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
10920 * First, we're going to copyin() the sizeof (dof_hdr_t).
10922 #if defined(__APPLE__)
10923 if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) {
10925 if (copyin((void *)uarg
, &hdr
, sizeof (hdr
)) != 0) {
10927 dtrace_dof_error(NULL
, "failed to copyin DOF header");
10933 * Now we'll allocate the entire DOF and copy it in -- provided
10934 * that the length isn't outrageous.
10936 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
10937 dtrace_dof_error(&hdr
, "load size exceeds maximum");
10942 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
10943 dtrace_dof_error(&hdr
, "invalid load size");
10948 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
10950 #if defined(__APPLE__)
10951 if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0) {
10953 if (copyin((void *)uarg
, dof
, hdr
.dofh_loadsz
) != 0) {
10955 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
10963 #if defined(__APPLE__)
10966 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
)
10968 dof_hdr_t hdr
, *dof
;
10970 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
10973 * First, we're going to copyin() the sizeof (dof_hdr_t).
10975 if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) {
10976 dtrace_dof_error(NULL
, "failed to copyin DOF header");
10982 * Now we'll allocate the entire DOF and copy it in -- provided
10983 * that the length isn't outrageous.
10985 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
10986 dtrace_dof_error(&hdr
, "load size exceeds maximum");
10991 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
10992 dtrace_dof_error(&hdr
, "invalid load size");
10997 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
10999 if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) {
11000 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
11008 #endif /* __APPLE__ */
11011 dtrace_dof_property(const char *name
)
11015 unsigned int len
, i
;
11019 * Unfortunately, array of values in .conf files are always (and
11020 * only) interpreted to be integer arrays. We must read our DOF
11021 * as an integer array, and then squeeze it into a byte array.
11023 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
11024 (char *)name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
11027 for (i
= 0; i
< len
; i
++)
11028 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]);
11030 if (len
< sizeof (dof_hdr_t
)) {
11031 ddi_prop_free(buf
);
11032 dtrace_dof_error(NULL
, "truncated header");
11036 if (len
< (loadsz
= ((dof_hdr_t
*)buf
)->dofh_loadsz
)) {
11037 ddi_prop_free(buf
);
11038 dtrace_dof_error(NULL
, "truncated DOF");
11042 if (loadsz
>= dtrace_dof_maxsize
) {
11043 ddi_prop_free(buf
);
11044 dtrace_dof_error(NULL
, "oversized DOF");
11048 dof
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
);
11049 bcopy(buf
, dof
, loadsz
);
11050 ddi_prop_free(buf
);
11056 dtrace_dof_destroy(dof_hdr_t
*dof
)
11058 dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
);
11062 * Return the dof_sec_t pointer corresponding to a given section index. If the
11063 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11064 * a type other than DOF_SECT_NONE is specified, the header is checked against
11065 * this type and NULL is returned if the types do not match.
11068 dtrace_dof_sect(dof_hdr_t
*dof
, uint32_t type
, dof_secidx_t i
)
11070 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)
11071 ((uintptr_t)dof
+ dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11073 if (i
>= dof
->dofh_secnum
) {
11074 dtrace_dof_error(dof
, "referenced section index is invalid");
11078 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
11079 dtrace_dof_error(dof
, "referenced section is not loadable");
11083 if (type
!= DOF_SECT_NONE
&& type
!= sec
->dofs_type
) {
11084 dtrace_dof_error(dof
, "referenced section is the wrong type");
11091 static dtrace_probedesc_t
*
11092 dtrace_dof_probedesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_probedesc_t
*desc
)
11094 dof_probedesc_t
*probe
;
11096 uintptr_t daddr
= (uintptr_t)dof
;
11100 if (sec
->dofs_type
!= DOF_SECT_PROBEDESC
) {
11101 dtrace_dof_error(dof
, "invalid probe section");
11105 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11106 dtrace_dof_error(dof
, "bad alignment in probe description");
11110 if (sec
->dofs_offset
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) {
11111 dtrace_dof_error(dof
, "truncated probe description");
11115 probe
= (dof_probedesc_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11116 strtab
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
);
11118 if (strtab
== NULL
)
11121 str
= daddr
+ strtab
->dofs_offset
;
11122 size
= strtab
->dofs_size
;
11124 if (probe
->dofp_provider
>= strtab
->dofs_size
) {
11125 dtrace_dof_error(dof
, "corrupt probe provider");
11129 (void) strncpy(desc
->dtpd_provider
,
11130 (char *)(str
+ probe
->dofp_provider
),
11131 MIN(DTRACE_PROVNAMELEN
- 1, size
- probe
->dofp_provider
));
11133 if (probe
->dofp_mod
>= strtab
->dofs_size
) {
11134 dtrace_dof_error(dof
, "corrupt probe module");
11138 (void) strncpy(desc
->dtpd_mod
, (char *)(str
+ probe
->dofp_mod
),
11139 MIN(DTRACE_MODNAMELEN
- 1, size
- probe
->dofp_mod
));
11141 if (probe
->dofp_func
>= strtab
->dofs_size
) {
11142 dtrace_dof_error(dof
, "corrupt probe function");
11146 (void) strncpy(desc
->dtpd_func
, (char *)(str
+ probe
->dofp_func
),
11147 MIN(DTRACE_FUNCNAMELEN
- 1, size
- probe
->dofp_func
));
11149 if (probe
->dofp_name
>= strtab
->dofs_size
) {
11150 dtrace_dof_error(dof
, "corrupt probe name");
11154 (void) strncpy(desc
->dtpd_name
, (char *)(str
+ probe
->dofp_name
),
11155 MIN(DTRACE_NAMELEN
- 1, size
- probe
->dofp_name
));
11160 static dtrace_difo_t
*
11161 dtrace_dof_difo(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11166 dof_difohdr_t
*dofd
;
11167 uintptr_t daddr
= (uintptr_t)dof
;
11168 size_t max_size
= dtrace_difo_maxsize
;
11171 static const struct {
11179 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
),
11180 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
),
11181 sizeof (dif_instr_t
), "multiple DIF sections" },
11183 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
),
11184 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t),
11185 sizeof (uint64_t), "multiple integer tables" },
11187 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
),
11188 offsetof(dtrace_difo_t
, dtdo_strlen
), 0,
11189 sizeof (char), "multiple string tables" },
11191 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
),
11192 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
),
11193 sizeof (uint_t
), "multiple variable tables" },
11195 #if !defined(__APPLE__)
11196 { DOF_SECT_NONE
, 0, 0, 0, NULL
}
11198 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL
}
11199 #endif /* __APPLE__ */
11202 if (sec
->dofs_type
!= DOF_SECT_DIFOHDR
) {
11203 dtrace_dof_error(dof
, "invalid DIFO header section");
11207 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11208 dtrace_dof_error(dof
, "bad alignment in DIFO header");
11212 if (sec
->dofs_size
< sizeof (dof_difohdr_t
) ||
11213 sec
->dofs_size
% sizeof (dof_secidx_t
)) {
11214 dtrace_dof_error(dof
, "bad size in DIFO header");
11218 dofd
= (dof_difohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11219 n
= (sec
->dofs_size
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1;
11221 dp
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
11222 dp
->dtdo_rtype
= dofd
->dofd_rtype
;
11224 for (l
= 0; l
< n
; l
++) {
11229 if ((subsec
= dtrace_dof_sect(dof
, DOF_SECT_NONE
,
11230 dofd
->dofd_links
[l
])) == NULL
)
11231 goto err
; /* invalid section link */
11233 if (ttl
+ subsec
->dofs_size
> max_size
) {
11234 dtrace_dof_error(dof
, "exceeds maximum size");
11238 ttl
+= subsec
->dofs_size
;
11240 for (i
= 0; difo
[i
].section
!= DOF_SECT_NONE
; i
++) {
11241 if (subsec
->dofs_type
!= difo
[i
].section
)
11244 if (!(subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11245 dtrace_dof_error(dof
, "section not loaded");
11249 if (subsec
->dofs_align
!= difo
[i
].align
) {
11250 dtrace_dof_error(dof
, "bad alignment");
11254 bufp
= (void **)((uintptr_t)dp
+ difo
[i
].bufoffs
);
11255 lenp
= (uint32_t *)((uintptr_t)dp
+ difo
[i
].lenoffs
);
11257 if (*bufp
!= NULL
) {
11258 dtrace_dof_error(dof
, difo
[i
].msg
);
11262 if (difo
[i
].entsize
!= subsec
->dofs_entsize
) {
11263 dtrace_dof_error(dof
, "entry size mismatch");
11267 if (subsec
->dofs_entsize
!= 0 &&
11268 (subsec
->dofs_size
% subsec
->dofs_entsize
) != 0) {
11269 dtrace_dof_error(dof
, "corrupt entry size");
11273 *lenp
= subsec
->dofs_size
;
11274 *bufp
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
);
11275 bcopy((char *)(uintptr_t)(daddr
+ subsec
->dofs_offset
),
11276 *bufp
, subsec
->dofs_size
);
11278 if (subsec
->dofs_entsize
!= 0)
11279 *lenp
/= subsec
->dofs_entsize
;
11285 * If we encounter a loadable DIFO sub-section that is not
11286 * known to us, assume this is a broken program and fail.
11288 if (difo
[i
].section
== DOF_SECT_NONE
&&
11289 (subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11290 dtrace_dof_error(dof
, "unrecognized DIFO subsection");
11295 if (dp
->dtdo_buf
== NULL
) {
11297 * We can't have a DIF object without DIF text.
11299 dtrace_dof_error(dof
, "missing DIF text");
11304 * Before we validate the DIF object, run through the variable table
11305 * looking for the strings -- if any of their size are under, we'll set
11306 * their size to be the system-wide default string size. Note that
11307 * this should _not_ happen if the "strsize" option has been set --
11308 * in this case, the compiler should have set the size to reflect the
11309 * setting of the option.
11311 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
11312 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
11313 dtrace_diftype_t
*t
= &v
->dtdv_type
;
11315 if (v
->dtdv_id
< DIF_VAR_OTHER_UBASE
)
11318 if (t
->dtdt_kind
== DIF_TYPE_STRING
&& t
->dtdt_size
== 0)
11319 t
->dtdt_size
= dtrace_strsize_default
;
11322 if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0)
11325 dtrace_difo_init(dp
, vstate
);
11329 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
11330 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
11331 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
11332 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
11334 kmem_free(dp
, sizeof (dtrace_difo_t
));
11338 static dtrace_predicate_t
*
11339 dtrace_dof_predicate(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11344 if ((dp
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
)
11347 return (dtrace_predicate_create(dp
));
11350 static dtrace_actdesc_t
*
11351 dtrace_dof_actdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11354 dtrace_actdesc_t
*act
, *first
= NULL
, *last
= NULL
, *next
;
11355 dof_actdesc_t
*desc
;
11356 dof_sec_t
*difosec
;
11358 uintptr_t daddr
= (uintptr_t)dof
;
11360 dtrace_actkind_t kind
;
11362 if (sec
->dofs_type
!= DOF_SECT_ACTDESC
) {
11363 dtrace_dof_error(dof
, "invalid action section");
11367 if (sec
->dofs_offset
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) {
11368 dtrace_dof_error(dof
, "truncated action description");
11372 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11373 dtrace_dof_error(dof
, "bad alignment in action description");
11377 if (sec
->dofs_size
< sec
->dofs_entsize
) {
11378 dtrace_dof_error(dof
, "section entry size exceeds total size");
11382 if (sec
->dofs_entsize
!= sizeof (dof_actdesc_t
)) {
11383 dtrace_dof_error(dof
, "bad entry size in action description");
11387 if (sec
->dofs_size
/ sec
->dofs_entsize
> dtrace_actions_max
) {
11388 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max");
11392 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= sec
->dofs_entsize
) {
11393 desc
= (dof_actdesc_t
*)(daddr
+
11394 (uintptr_t)sec
->dofs_offset
+ offs
);
11395 kind
= (dtrace_actkind_t
)desc
->dofa_kind
;
11397 if (DTRACEACT_ISPRINTFLIKE(kind
) &&
11398 (kind
!= DTRACEACT_PRINTA
||
11399 desc
->dofa_strtab
!= DOF_SECIDX_NONE
)) {
11405 * printf()-like actions must have a format string.
11407 if ((strtab
= dtrace_dof_sect(dof
,
11408 DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
)
11411 str
= (char *)((uintptr_t)dof
+
11412 (uintptr_t)strtab
->dofs_offset
);
11414 for (i
= desc
->dofa_arg
; i
< strtab
->dofs_size
; i
++) {
11415 if (str
[i
] == '\0')
11419 if (i
>= strtab
->dofs_size
) {
11420 dtrace_dof_error(dof
, "bogus format string");
11424 if (i
== desc
->dofa_arg
) {
11425 dtrace_dof_error(dof
, "empty format string");
11429 i
-= desc
->dofa_arg
;
11430 fmt
= kmem_alloc(i
+ 1, KM_SLEEP
);
11431 bcopy(&str
[desc
->dofa_arg
], fmt
, i
+ 1);
11432 arg
= (uint64_t)(uintptr_t)fmt
;
11434 if (kind
== DTRACEACT_PRINTA
) {
11435 ASSERT(desc
->dofa_strtab
== DOF_SECIDX_NONE
);
11438 arg
= desc
->dofa_arg
;
11442 act
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
,
11443 desc
->dofa_uarg
, arg
);
11445 if (last
!= NULL
) {
11446 last
->dtad_next
= act
;
11453 if (desc
->dofa_difo
== DOF_SECIDX_NONE
)
11456 if ((difosec
= dtrace_dof_sect(dof
,
11457 DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
)
11460 act
->dtad_difo
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
);
11462 if (act
->dtad_difo
== NULL
)
11466 ASSERT(first
!= NULL
);
11470 for (act
= first
; act
!= NULL
; act
= next
) {
11471 next
= act
->dtad_next
;
11472 dtrace_actdesc_release(act
, vstate
);
11478 static dtrace_ecbdesc_t
*
11479 dtrace_dof_ecbdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11482 dtrace_ecbdesc_t
*ep
;
11483 dof_ecbdesc_t
*ecb
;
11484 dtrace_probedesc_t
*desc
;
11485 dtrace_predicate_t
*pred
= NULL
;
11487 if (sec
->dofs_size
< sizeof (dof_ecbdesc_t
)) {
11488 dtrace_dof_error(dof
, "truncated ECB description");
11492 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11493 dtrace_dof_error(dof
, "bad alignment in ECB description");
11497 ecb
= (dof_ecbdesc_t
*)((uintptr_t)dof
+ (uintptr_t)sec
->dofs_offset
);
11498 sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
);
11503 ep
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
11504 ep
->dted_uarg
= ecb
->dofe_uarg
;
11505 desc
= &ep
->dted_probe
;
11507 if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
)
11510 if (ecb
->dofe_pred
!= DOF_SECIDX_NONE
) {
11511 if ((sec
= dtrace_dof_sect(dof
,
11512 DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
)
11515 if ((pred
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
)
11518 ep
->dted_pred
.dtpdd_predicate
= pred
;
11521 if (ecb
->dofe_actions
!= DOF_SECIDX_NONE
) {
11522 if ((sec
= dtrace_dof_sect(dof
,
11523 DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
)
11526 ep
->dted_action
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
);
11528 if (ep
->dted_action
== NULL
)
11536 dtrace_predicate_release(pred
, vstate
);
11537 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
11541 #if !defined(__APPLE__) /* APPLE dyld has already done this for us */
11543 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
11544 * specified DOF. At present, this amounts to simply adding 'ubase' to the
11545 * site of any user SETX relocations to account for load object base address.
11546 * In the future, if we need other relocations, this function can be extended.
11549 dtrace_dof_relocate(dof_hdr_t
*dof
, dof_sec_t
*sec
, uint64_t ubase
)
11551 uintptr_t daddr
= (uintptr_t)dof
;
11552 dof_relohdr_t
*dofr
=
11553 (dof_relohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11554 dof_sec_t
*ss
, *rs
, *ts
;
11558 if (sec
->dofs_size
< sizeof (dof_relohdr_t
) ||
11559 sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11560 dtrace_dof_error(dof
, "invalid relocation header");
11564 ss
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, dofr
->dofr_strtab
);
11565 rs
= dtrace_dof_sect(dof
, DOF_SECT_RELTAB
, dofr
->dofr_relsec
);
11566 ts
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, dofr
->dofr_tgtsec
);
11568 if (ss
== NULL
|| rs
== NULL
|| ts
== NULL
)
11569 return (-1); /* dtrace_dof_error() has been called already */
11571 if (rs
->dofs_entsize
< sizeof (dof_relodesc_t
) ||
11572 rs
->dofs_align
!= sizeof (uint64_t)) {
11573 dtrace_dof_error(dof
, "invalid relocation section");
11577 r
= (dof_relodesc_t
*)(uintptr_t)(daddr
+ rs
->dofs_offset
);
11578 n
= rs
->dofs_size
/ rs
->dofs_entsize
;
11580 for (i
= 0; i
< n
; i
++) {
11581 uintptr_t taddr
= daddr
+ ts
->dofs_offset
+ r
->dofr_offset
;
11583 switch (r
->dofr_type
) {
11584 case DOF_RELO_NONE
:
11586 case DOF_RELO_SETX
:
11587 if (r
->dofr_offset
>= ts
->dofs_size
|| r
->dofr_offset
+
11588 sizeof (uint64_t) > ts
->dofs_size
) {
11589 dtrace_dof_error(dof
, "bad relocation offset");
11593 if (!IS_P2ALIGNED(taddr
, sizeof (uint64_t))) {
11594 dtrace_dof_error(dof
, "misaligned setx relo");
11598 *(uint64_t *)taddr
+= ubase
;
11601 dtrace_dof_error(dof
, "invalid relocation type");
11605 r
= (dof_relodesc_t
*)((uintptr_t)r
+ rs
->dofs_entsize
);
11610 #endif /* __APPLE__ */
11613 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
11614 * header: it should be at the front of a memory region that is at least
11615 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
11616 * size. It need not be validated in any other way.
11619 dtrace_dof_slurp(dof_hdr_t
*dof
, dtrace_vstate_t
*vstate
, cred_t
*cr
,
11620 dtrace_enabling_t
**enabp
, uint64_t ubase
, int noprobes
)
11622 uint64_t len
= dof
->dofh_loadsz
, seclen
;
11623 uintptr_t daddr
= (uintptr_t)dof
;
11624 dtrace_ecbdesc_t
*ep
;
11625 dtrace_enabling_t
*enab
;
11628 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11629 ASSERT(dof
->dofh_loadsz
>= sizeof (dof_hdr_t
));
11632 * Check the DOF header identification bytes. In addition to checking
11633 * valid settings, we also verify that unused bits/bytes are zeroed so
11634 * we can use them later without fear of regressing existing binaries.
11636 if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
],
11637 DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) {
11638 dtrace_dof_error(dof
, "DOF magic string mismatch");
11642 if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32
&&
11643 dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) {
11644 dtrace_dof_error(dof
, "DOF has invalid data model");
11648 if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) {
11649 dtrace_dof_error(dof
, "DOF encoding mismatch");
11653 #if !defined(__APPLE__)
11654 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
11655 dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_2
) {
11656 dtrace_dof_error(dof
, "DOF version mismatch");
11661 * We only support DOF_VERSION_3 for now.
11663 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) {
11664 dtrace_dof_error(dof
, "DOF version mismatch");
11669 if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) {
11670 dtrace_dof_error(dof
, "DOF uses unsupported instruction set");
11674 if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) {
11675 dtrace_dof_error(dof
, "DOF uses too many integer registers");
11679 if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) {
11680 dtrace_dof_error(dof
, "DOF uses too many tuple registers");
11684 for (i
= DOF_ID_PAD
; i
< DOF_ID_SIZE
; i
++) {
11685 if (dof
->dofh_ident
[i
] != 0) {
11686 dtrace_dof_error(dof
, "DOF has invalid ident byte set");
11691 if (dof
->dofh_flags
& ~DOF_FL_VALID
) {
11692 dtrace_dof_error(dof
, "DOF has invalid flag bits set");
11696 if (dof
->dofh_secsize
== 0) {
11697 dtrace_dof_error(dof
, "zero section header size");
11702 * Check that the section headers don't exceed the amount of DOF
11703 * data. Note that we cast the section size and number of sections
11704 * to uint64_t's to prevent possible overflow in the multiplication.
11706 seclen
= (uint64_t)dof
->dofh_secnum
* (uint64_t)dof
->dofh_secsize
;
11708 if (dof
->dofh_secoff
> len
|| seclen
> len
||
11709 dof
->dofh_secoff
+ seclen
> len
) {
11710 dtrace_dof_error(dof
, "truncated section headers");
11714 if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) {
11715 dtrace_dof_error(dof
, "misaligned section headers");
11719 if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) {
11720 dtrace_dof_error(dof
, "misaligned section size");
11725 * Take an initial pass through the section headers to be sure that
11726 * the headers don't have stray offsets. If the 'noprobes' flag is
11727 * set, do not permit sections relating to providers, probes, or args.
11729 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11730 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11731 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11734 switch (sec
->dofs_type
) {
11735 case DOF_SECT_PROVIDER
:
11736 case DOF_SECT_PROBES
:
11737 case DOF_SECT_PRARGS
:
11738 case DOF_SECT_PROFFS
:
11739 dtrace_dof_error(dof
, "illegal sections "
11745 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
11746 continue; /* just ignore non-loadable sections */
11748 if (sec
->dofs_align
& (sec
->dofs_align
- 1)) {
11749 dtrace_dof_error(dof
, "bad section alignment");
11753 if (sec
->dofs_offset
& (sec
->dofs_align
- 1)) {
11754 dtrace_dof_error(dof
, "misaligned section");
11758 if (sec
->dofs_offset
> len
|| sec
->dofs_size
> len
||
11759 sec
->dofs_offset
+ sec
->dofs_size
> len
) {
11760 dtrace_dof_error(dof
, "corrupt section header");
11764 if (sec
->dofs_type
== DOF_SECT_STRTAB
&& *((char *)daddr
+
11765 sec
->dofs_offset
+ sec
->dofs_size
- 1) != '\0') {
11766 dtrace_dof_error(dof
, "non-terminating string table");
11771 #if !defined(__APPLE__)
11773 * APPLE NOTE: We have no relocation to perform. All dof values are
11774 * relative offsets.
11778 * Take a second pass through the sections and locate and perform any
11779 * relocations that are present. We do this after the first pass to
11780 * be sure that all sections have had their headers validated.
11782 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11783 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11784 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11786 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
11787 continue; /* skip sections that are not loadable */
11789 switch (sec
->dofs_type
) {
11790 case DOF_SECT_URELHDR
:
11791 if (dtrace_dof_relocate(dof
, sec
, ubase
) != 0)
11796 #endif /* __APPLE__ */
11798 if ((enab
= *enabp
) == NULL
)
11799 enab
= *enabp
= dtrace_enabling_create(vstate
);
11801 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11802 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11803 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11805 if (sec
->dofs_type
!= DOF_SECT_ECBDESC
)
11808 #if !defined(__APPLE__)
11809 if ((ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
)) == NULL
) {
11810 dtrace_enabling_destroy(enab
);
11815 /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc
11816 are checked for the NULL return value.) */
11817 ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
);
11819 dtrace_enabling_destroy(enab
);
11823 #endif /* __APPLE__ */
11825 dtrace_enabling_add(enab
, ep
);
11832 * Process DOF for any options. This routine assumes that the DOF has been
11833 * at least processed by dtrace_dof_slurp().
11836 dtrace_dof_options(dof_hdr_t
*dof
, dtrace_state_t
*state
)
11841 dof_optdesc_t
*desc
;
11843 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11844 dof_sec_t
*sec
= (dof_sec_t
*)((uintptr_t)dof
+
11845 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11847 if (sec
->dofs_type
!= DOF_SECT_OPTDESC
)
11850 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11851 dtrace_dof_error(dof
, "bad alignment in "
11852 "option description");
11856 if ((entsize
= sec
->dofs_entsize
) == 0) {
11857 dtrace_dof_error(dof
, "zeroed option entry size");
11861 if (entsize
< sizeof (dof_optdesc_t
)) {
11862 dtrace_dof_error(dof
, "bad option entry size");
11866 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= entsize
) {
11867 desc
= (dof_optdesc_t
*)((uintptr_t)dof
+
11868 (uintptr_t)sec
->dofs_offset
+ offs
);
11870 if (desc
->dofo_strtab
!= DOF_SECIDX_NONE
) {
11871 dtrace_dof_error(dof
, "non-zero option string");
11875 if (desc
->dofo_value
== DTRACEOPT_UNSET
) {
11876 dtrace_dof_error(dof
, "unset option");
11880 if ((rval
= dtrace_state_option(state
,
11881 desc
->dofo_option
, desc
->dofo_value
)) != 0) {
11882 dtrace_dof_error(dof
, "rejected option");
11892 * DTrace Consumer State Functions
11894 #if defined(__APPLE__)
11896 #endif /* __APPLE__ */
11898 dtrace_dstate_init(dtrace_dstate_t
*dstate
, size_t size
)
11900 size_t hashsize
, maxper
, min_size
, chunksize
= dstate
->dtds_chunksize
;
11903 dtrace_dynvar_t
*dvar
, *next
, *start
;
11906 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11907 ASSERT(dstate
->dtds_base
== NULL
&& dstate
->dtds_percpu
== NULL
);
11909 bzero(dstate
, sizeof (dtrace_dstate_t
));
11911 if ((dstate
->dtds_chunksize
= chunksize
) == 0)
11912 dstate
->dtds_chunksize
= DTRACE_DYNVAR_CHUNKSIZE
;
11914 if (size
< (min_size
= dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
)))
11917 if ((base
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
11920 dstate
->dtds_size
= size
;
11921 dstate
->dtds_base
= base
;
11922 dstate
->dtds_percpu
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
);
11923 bzero(dstate
->dtds_percpu
, (int)NCPU
* sizeof (dtrace_dstate_percpu_t
));
11925 hashsize
= size
/ (dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
));
11927 if (hashsize
!= 1 && (hashsize
& 1))
11930 dstate
->dtds_hashsize
= hashsize
;
11931 dstate
->dtds_hash
= dstate
->dtds_base
;
11934 * Set all of our hash buckets to point to the single sink, and (if
11935 * it hasn't already been set), set the sink's hash value to be the
11936 * sink sentinel value. The sink is needed for dynamic variable
11937 * lookups to know that they have iterated over an entire, valid hash
11940 for (i
= 0; i
< hashsize
; i
++)
11941 dstate
->dtds_hash
[i
].dtdh_chain
= &dtrace_dynhash_sink
;
11943 if (dtrace_dynhash_sink
.dtdv_hashval
!= DTRACE_DYNHASH_SINK
)
11944 dtrace_dynhash_sink
.dtdv_hashval
= DTRACE_DYNHASH_SINK
;
11947 * Determine number of active CPUs. Divide free list evenly among
11950 start
= (dtrace_dynvar_t
*)
11951 ((uintptr_t)base
+ hashsize
* sizeof (dtrace_dynhash_t
));
11952 limit
= (uintptr_t)base
+ size
;
11954 maxper
= (limit
- (uintptr_t)start
) / (int)NCPU
;
11955 maxper
= (maxper
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
;
11957 for (i
= 0; i
< (int)NCPU
; i
++) {
11958 dstate
->dtds_percpu
[i
].dtdsc_free
= dvar
= start
;
11961 * If we don't even have enough chunks to make it once through
11962 * NCPUs, we're just going to allocate everything to the first
11963 * CPU. And if we're on the last CPU, we're going to allocate
11964 * whatever is left over. In either case, we set the limit to
11965 * be the limit of the dynamic variable space.
11967 if (maxper
== 0 || i
== (int)NCPU
- 1) {
11968 limit
= (uintptr_t)base
+ size
;
11971 limit
= (uintptr_t)start
+ maxper
;
11972 start
= (dtrace_dynvar_t
*)limit
;
11975 ASSERT(limit
<= (uintptr_t)base
+ size
);
11978 next
= (dtrace_dynvar_t
*)((uintptr_t)dvar
+
11979 dstate
->dtds_chunksize
);
11981 if ((uintptr_t)next
+ dstate
->dtds_chunksize
>= limit
)
11984 dvar
->dtdv_next
= next
;
11995 #if defined(__APPLE__)
11997 #endif /* __APPLE__ */
11999 dtrace_dstate_fini(dtrace_dstate_t
*dstate
)
12001 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12003 if (dstate
->dtds_base
== NULL
)
12006 kmem_free(dstate
->dtds_base
, dstate
->dtds_size
);
12007 kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
);
12011 dtrace_vstate_fini(dtrace_vstate_t
*vstate
)
12014 * Logical XOR, where are you?
12016 ASSERT((vstate
->dtvs_nglobals
== 0) ^ (vstate
->dtvs_globals
!= NULL
));
12018 if (vstate
->dtvs_nglobals
> 0) {
12019 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals
*
12020 sizeof (dtrace_statvar_t
*));
12023 if (vstate
->dtvs_ntlocals
> 0) {
12024 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals
*
12025 sizeof (dtrace_difv_t
));
12028 ASSERT((vstate
->dtvs_nlocals
== 0) ^ (vstate
->dtvs_locals
!= NULL
));
12030 if (vstate
->dtvs_nlocals
> 0) {
12031 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals
*
12032 sizeof (dtrace_statvar_t
*));
12037 dtrace_state_clean(dtrace_state_t
*state
)
12039 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
)
12042 dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
);
12043 dtrace_speculation_clean(state
);
12047 dtrace_state_deadman(dtrace_state_t
*state
)
12053 now
= dtrace_gethrtime();
12055 if (state
!= dtrace_anon
.dta_state
&&
12056 now
- state
->dts_laststatus
>= dtrace_deadman_user
)
12060 * We must be sure that dts_alive never appears to be less than the
12061 * value upon entry to dtrace_state_deadman(), and because we lack a
12062 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12063 * store INT64_MAX to it, followed by a memory barrier, followed by
12064 * the new value. This assures that dts_alive never appears to be
12065 * less than its true value, regardless of the order in which the
12066 * stores to the underlying storage are issued.
12068 state
->dts_alive
= INT64_MAX
;
12069 dtrace_membar_producer();
12070 state
->dts_alive
= now
;
12073 #if defined(__APPLE__)
12075 #endif /* __APPLE__ */
12077 dtrace_state_create(dev_t
*devp
, cred_t
*cr
)
12082 dtrace_state_t
*state
;
12083 dtrace_optval_t
*opt
;
12084 int bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
), i
;
12086 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12087 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12089 #if !defined(__APPLE__)
12090 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12091 VM_BESTFIT
| VM_SLEEP
);
12094 * Darwin's DEVFS layer acquired the minor number for this "device" when it called
12095 * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number
12096 * (next unused according to vmem_alloc()) and then immediately put the number back in play
12097 * (by calling vmem_free()). Now that minor number is being used for an open, so committing it
12098 * to use. The following vmem_alloc() must deliver that same minor number.
12101 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12102 VM_BESTFIT
| VM_SLEEP
);
12104 if (NULL
!= devp
) {
12105 ASSERT(getminor(*devp
) == minor
);
12106 if (getminor(*devp
) != minor
) {
12107 printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n",
12108 getminor(*devp
), minor
);
12109 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12113 /* NULL==devp iff "Anonymous state" (see dtrace_anon_property),
12114 * so just vend the minor device number here de novo since no "open" has occurred. */
12117 #endif /* __APPLE__ */
12119 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
12120 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12124 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
12125 state
->dts_epid
= DTRACE_EPIDNONE
+ 1;
12127 (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
);
12128 state
->dts_aggid_arena
= vmem_create(c
, (void *)1, UINT32_MAX
, 1,
12129 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
12131 if (devp
!= NULL
) {
12132 major
= getemajor(*devp
);
12134 major
= ddi_driver_major(dtrace_devi
);
12137 state
->dts_dev
= makedevice(major
, minor
);
12140 *devp
= state
->dts_dev
;
12143 * We allocate NCPU buffers. On the one hand, this can be quite
12144 * a bit of memory per instance (nearly 36K on a Starcat). On the
12145 * other hand, it saves an additional memory reference in the probe
12148 state
->dts_buffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12149 state
->dts_aggbuffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12150 state
->dts_cleaner
= CYCLIC_NONE
;
12151 state
->dts_deadman
= CYCLIC_NONE
;
12152 state
->dts_vstate
.dtvs_state
= state
;
12154 for (i
= 0; i
< DTRACEOPT_MAX
; i
++)
12155 state
->dts_options
[i
] = DTRACEOPT_UNSET
;
12158 * Set the default options.
12160 opt
= state
->dts_options
;
12161 opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
;
12162 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
;
12163 opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
;
12164 opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
;
12165 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
;
12166 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
;
12167 opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
;
12168 opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
;
12169 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
;
12170 opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
;
12171 opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
;
12172 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
;
12173 opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
;
12174 opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
;
12176 state
->dts_activity
= DTRACE_ACTIVITY_INACTIVE
;
12179 * Depending on the user credentials, we set flag bits which alter probe
12180 * visibility or the amount of destructiveness allowed. In the case of
12181 * actual anonymous tracing, or the possession of all privileges, all of
12182 * the normal checks are bypassed.
12184 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
12185 state
->dts_cred
.dcr_visible
= DTRACE_CRV_ALL
;
12186 state
->dts_cred
.dcr_action
= DTRACE_CRA_ALL
;
12189 * Set up the credentials for this instantiation. We take a
12190 * hold on the credential to prevent it from disappearing on
12191 * us; this in turn prevents the zone_t referenced by this
12192 * credential from disappearing. This means that we can
12193 * examine the credential and the zone from probe context.
12196 state
->dts_cred
.dcr_cred
= cr
;
12199 * CRA_PROC means "we have *some* privilege for dtrace" and
12200 * unlocks the use of variables like pid, zonename, etc.
12202 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) ||
12203 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12204 state
->dts_cred
.dcr_action
|= DTRACE_CRA_PROC
;
12208 * dtrace_user allows use of syscall and profile providers.
12209 * If the user also has proc_owner and/or proc_zone, we
12210 * extend the scope to include additional visibility and
12211 * destructive power.
12213 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) {
12214 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) {
12215 state
->dts_cred
.dcr_visible
|=
12216 DTRACE_CRV_ALLPROC
;
12218 state
->dts_cred
.dcr_action
|=
12219 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12222 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) {
12223 state
->dts_cred
.dcr_visible
|=
12224 DTRACE_CRV_ALLZONE
;
12226 state
->dts_cred
.dcr_action
|=
12227 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12231 * If we have all privs in whatever zone this is,
12232 * we can do destructive things to processes which
12233 * have altered credentials.
12235 #if !defined(__APPLE__)
12236 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12237 cr
->cr_zone
->zone_privset
)) {
12238 state
->dts_cred
.dcr_action
|=
12239 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12242 /* Darwin doesn't do zones. */
12243 state
->dts_cred
.dcr_action
|=
12244 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12245 #endif /* __APPLE__ */
12249 * Holding the dtrace_kernel privilege also implies that
12250 * the user has the dtrace_user privilege from a visibility
12251 * perspective. But without further privileges, some
12252 * destructive actions are not available.
12254 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) {
12256 * Make all probes in all zones visible. However,
12257 * this doesn't mean that all actions become available
12260 state
->dts_cred
.dcr_visible
|= DTRACE_CRV_KERNEL
|
12261 DTRACE_CRV_ALLPROC
| DTRACE_CRV_ALLZONE
;
12263 state
->dts_cred
.dcr_action
|= DTRACE_CRA_KERNEL
|
12266 * Holding proc_owner means that destructive actions
12267 * for *this* zone are allowed.
12269 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12270 state
->dts_cred
.dcr_action
|=
12271 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12274 * Holding proc_zone means that destructive actions
12275 * for this user/group ID in all zones is allowed.
12277 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12278 state
->dts_cred
.dcr_action
|=
12279 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12282 * If we have all privs in whatever zone this is,
12283 * we can do destructive things to processes which
12284 * have altered credentials.
12286 #if !defined(__APPLE__)
12287 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12288 cr
->cr_zone
->zone_privset
)) {
12289 state
->dts_cred
.dcr_action
|=
12290 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12293 /* Darwin doesn't do zones. */
12294 state
->dts_cred
.dcr_action
|=
12295 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12296 #endif /* __APPLE__ */
12300 * Holding the dtrace_proc privilege gives control over fasttrap
12301 * and pid providers. We need to grant wider destructive
12302 * privileges in the event that the user has proc_owner and/or
12305 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12306 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12307 state
->dts_cred
.dcr_action
|=
12308 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12310 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12311 state
->dts_cred
.dcr_action
|=
12312 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12320 dtrace_state_buffer(dtrace_state_t
*state
, dtrace_buffer_t
*buf
, int which
)
12322 dtrace_optval_t
*opt
= state
->dts_options
, size
;
12323 processorid_t cpu
= 0;
12324 int flags
= 0, rval
;
12326 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12327 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12328 ASSERT(which
< DTRACEOPT_MAX
);
12329 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
||
12330 (state
== dtrace_anon
.dta_state
&&
12331 state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
));
12333 if (opt
[which
] == DTRACEOPT_UNSET
|| opt
[which
] == 0)
12336 if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
)
12337 cpu
= opt
[DTRACEOPT_CPU
];
12339 if (which
== DTRACEOPT_SPECSIZE
)
12340 flags
|= DTRACEBUF_NOSWITCH
;
12342 if (which
== DTRACEOPT_BUFSIZE
) {
12343 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
)
12344 flags
|= DTRACEBUF_RING
;
12346 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
)
12347 flags
|= DTRACEBUF_FILL
;
12349 if (state
!= dtrace_anon
.dta_state
||
12350 state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
12351 flags
|= DTRACEBUF_INACTIVE
;
12354 for (size
= opt
[which
]; size
>= sizeof (uint64_t); size
>>= 1) {
12356 * The size must be 8-byte aligned. If the size is not 8-byte
12357 * aligned, drop it down by the difference.
12359 if (size
& (sizeof (uint64_t) - 1))
12360 size
-= size
& (sizeof (uint64_t) - 1);
12362 if (size
< state
->dts_reserve
) {
12364 * Buffers always must be large enough to accommodate
12365 * their prereserved space. We return E2BIG instead
12366 * of ENOMEM in this case to allow for user-level
12367 * software to differentiate the cases.
12372 rval
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
);
12374 if (rval
!= ENOMEM
) {
12379 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12387 dtrace_state_buffers(dtrace_state_t
*state
)
12389 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12392 if ((rval
= dtrace_state_buffer(state
, state
->dts_buffer
,
12393 DTRACEOPT_BUFSIZE
)) != 0)
12396 if ((rval
= dtrace_state_buffer(state
, state
->dts_aggbuffer
,
12397 DTRACEOPT_AGGSIZE
)) != 0)
12400 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12401 if ((rval
= dtrace_state_buffer(state
,
12402 spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0)
12410 dtrace_state_prereserve(dtrace_state_t
*state
)
12413 dtrace_probe_t
*probe
;
12415 state
->dts_reserve
= 0;
12417 if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
)
12421 * If our buffer policy is a "fill" buffer policy, we need to set the
12422 * prereserved space to be the space required by the END probes.
12424 probe
= dtrace_probes
[dtrace_probeid_end
- 1];
12425 ASSERT(probe
!= NULL
);
12427 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
12428 if (ecb
->dte_state
!= state
)
12431 state
->dts_reserve
+= ecb
->dte_needed
+ ecb
->dte_alignment
;
12436 dtrace_state_go(dtrace_state_t
*state
, processorid_t
*cpu
)
12438 dtrace_optval_t
*opt
= state
->dts_options
, sz
, nspec
;
12439 dtrace_speculation_t
*spec
;
12440 dtrace_buffer_t
*buf
;
12441 cyc_handler_t hdlr
;
12443 int rval
= 0, i
, bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
);
12444 dtrace_icookie_t cookie
;
12446 lck_mtx_lock(&cpu_lock
);
12447 lck_mtx_lock(&dtrace_lock
);
12449 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
12455 * Before we can perform any checks, we must prime all of the
12456 * retained enablings that correspond to this state.
12458 dtrace_enabling_prime(state
);
12460 if (state
->dts_destructive
&& !state
->dts_cred
.dcr_destructive
) {
12465 dtrace_state_prereserve(state
);
12468 * Now we want to do is try to allocate our speculations.
12469 * We do not automatically resize the number of speculations; if
12470 * this fails, we will fail the operation.
12472 nspec
= opt
[DTRACEOPT_NSPEC
];
12473 ASSERT(nspec
!= DTRACEOPT_UNSET
);
12475 if (nspec
> INT_MAX
) {
12480 spec
= kmem_zalloc(nspec
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
);
12482 if (spec
== NULL
) {
12487 state
->dts_speculations
= spec
;
12488 state
->dts_nspeculations
= (int)nspec
;
12490 for (i
= 0; i
< nspec
; i
++) {
12491 if ((buf
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) {
12496 spec
[i
].dtsp_buffer
= buf
;
12499 if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) {
12500 if (dtrace_anon
.dta_state
== NULL
) {
12505 if (state
->dts_necbs
!= 0) {
12510 state
->dts_anon
= dtrace_anon_grab();
12511 ASSERT(state
->dts_anon
!= NULL
);
12512 state
= state
->dts_anon
;
12515 * We want "grabanon" to be set in the grabbed state, so we'll
12516 * copy that option value from the grabbing state into the
12519 state
->dts_options
[DTRACEOPT_GRABANON
] =
12520 opt
[DTRACEOPT_GRABANON
];
12522 *cpu
= dtrace_anon
.dta_beganon
;
12525 * If the anonymous state is active (as it almost certainly
12526 * is if the anonymous enabling ultimately matched anything),
12527 * we don't allow any further option processing -- but we
12528 * don't return failure.
12530 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12534 if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET
&&
12535 opt
[DTRACEOPT_AGGSIZE
] != 0) {
12536 if (state
->dts_aggregations
== NULL
) {
12538 * We're not going to create an aggregation buffer
12539 * because we don't have any ECBs that contain
12540 * aggregations -- set this option to 0.
12542 opt
[DTRACEOPT_AGGSIZE
] = 0;
12545 * If we have an aggregation buffer, we must also have
12546 * a buffer to use as scratch.
12548 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
12549 opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
12550 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
12555 if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET
&&
12556 opt
[DTRACEOPT_SPECSIZE
] != 0) {
12557 if (!state
->dts_speculates
) {
12559 * We're not going to create speculation buffers
12560 * because we don't have any ECBs that actually
12561 * speculate -- set the speculation size to 0.
12563 opt
[DTRACEOPT_SPECSIZE
] = 0;
12568 * The bare minimum size for any buffer that we're actually going to
12569 * do anything to is sizeof (uint64_t).
12571 sz
= sizeof (uint64_t);
12573 if ((state
->dts_needed
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) ||
12574 (state
->dts_speculates
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) ||
12575 (state
->dts_aggregations
!= NULL
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) {
12577 * A buffer size has been explicitly set to 0 (or to a size
12578 * that will be adjusted to 0) and we need the space -- we
12579 * need to return failure. We return ENOSPC to differentiate
12580 * it from failing to allocate a buffer due to failure to meet
12581 * the reserve (for which we return E2BIG).
12587 if ((rval
= dtrace_state_buffers(state
)) != 0)
12590 if ((sz
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
)
12591 sz
= dtrace_dstate_defsize
;
12594 rval
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
);
12599 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12601 } while (sz
>>= 1);
12603 opt
[DTRACEOPT_DYNVARSIZE
] = sz
;
12608 if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
)
12609 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
;
12611 if (opt
[DTRACEOPT_CLEANRATE
] == 0)
12612 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12614 if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
)
12615 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
;
12617 if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
)
12618 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12620 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_clean
;
12621 hdlr
.cyh_arg
= state
;
12622 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12625 when
.cyt_interval
= opt
[DTRACEOPT_CLEANRATE
];
12627 state
->dts_cleaner
= cyclic_add(&hdlr
, &when
);
12629 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_deadman
;
12630 hdlr
.cyh_arg
= state
;
12631 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12634 when
.cyt_interval
= dtrace_deadman_interval
;
12636 state
->dts_alive
= state
->dts_laststatus
= dtrace_gethrtime();
12637 state
->dts_deadman
= cyclic_add(&hdlr
, &when
);
12639 state
->dts_activity
= DTRACE_ACTIVITY_WARMUP
;
12642 * Now it's time to actually fire the BEGIN probe. We need to disable
12643 * interrupts here both to record the CPU on which we fired the BEGIN
12644 * probe (the data from this CPU will be processed first at user
12645 * level) and to manually activate the buffer for this CPU.
12647 cookie
= dtrace_interrupt_disable();
12648 *cpu
= CPU
->cpu_id
;
12649 ASSERT(state
->dts_buffer
[*cpu
].dtb_flags
& DTRACEBUF_INACTIVE
);
12650 state
->dts_buffer
[*cpu
].dtb_flags
&= ~DTRACEBUF_INACTIVE
;
12652 dtrace_probe(dtrace_probeid_begin
,
12653 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12654 dtrace_interrupt_enable(cookie
);
12656 * We may have had an exit action from a BEGIN probe; only change our
12657 * state to ACTIVE if we're still in WARMUP.
12659 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
||
12660 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
);
12662 if (state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
)
12663 state
->dts_activity
= DTRACE_ACTIVITY_ACTIVE
;
12666 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
12667 * want each CPU to transition its principal buffer out of the
12668 * INACTIVE state. Doing this assures that no CPU will suddenly begin
12669 * processing an ECB halfway down a probe's ECB chain; all CPUs will
12670 * atomically transition from processing none of a state's ECBs to
12671 * processing all of them.
12673 dtrace_xcall(DTRACE_CPUALL
,
12674 (dtrace_xcall_t
)dtrace_buffer_activate
, state
);
12678 dtrace_buffer_free(state
->dts_buffer
);
12679 dtrace_buffer_free(state
->dts_aggbuffer
);
12681 if ((nspec
= state
->dts_nspeculations
) == 0) {
12682 ASSERT(state
->dts_speculations
== NULL
);
12686 spec
= state
->dts_speculations
;
12687 ASSERT(spec
!= NULL
);
12689 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12690 if ((buf
= spec
[i
].dtsp_buffer
) == NULL
)
12693 dtrace_buffer_free(buf
);
12694 kmem_free(buf
, bufsize
);
12697 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
12698 state
->dts_nspeculations
= 0;
12699 state
->dts_speculations
= NULL
;
12702 lck_mtx_unlock(&dtrace_lock
);
12703 lck_mtx_unlock(&cpu_lock
);
12709 dtrace_state_stop(dtrace_state_t
*state
, processorid_t
*cpu
)
12711 dtrace_icookie_t cookie
;
12713 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12715 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
&&
12716 state
->dts_activity
!= DTRACE_ACTIVITY_DRAINING
)
12720 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
12721 * to be sure that every CPU has seen it. See below for the details
12722 * on why this is done.
12724 state
->dts_activity
= DTRACE_ACTIVITY_DRAINING
;
12728 * By this point, it is impossible for any CPU to be still processing
12729 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
12730 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
12731 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
12732 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
12733 * iff we're in the END probe.
12735 state
->dts_activity
= DTRACE_ACTIVITY_COOLDOWN
;
12737 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_COOLDOWN
);
12740 * Finally, we can release the reserve and call the END probe. We
12741 * disable interrupts across calling the END probe to allow us to
12742 * return the CPU on which we actually called the END probe. This
12743 * allows user-land to be sure that this CPU's principal buffer is
12746 state
->dts_reserve
= 0;
12748 cookie
= dtrace_interrupt_disable();
12749 *cpu
= CPU
->cpu_id
;
12750 dtrace_probe(dtrace_probeid_end
,
12751 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12752 dtrace_interrupt_enable(cookie
);
12754 state
->dts_activity
= DTRACE_ACTIVITY_STOPPED
;
12761 dtrace_state_option(dtrace_state_t
*state
, dtrace_optid_t option
,
12762 dtrace_optval_t val
)
12764 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12766 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12769 if (option
>= DTRACEOPT_MAX
)
12772 if (option
!= DTRACEOPT_CPU
&& val
< 0)
12776 case DTRACEOPT_DESTRUCTIVE
:
12777 if (dtrace_destructive_disallow
)
12780 state
->dts_cred
.dcr_destructive
= 1;
12783 case DTRACEOPT_BUFSIZE
:
12784 case DTRACEOPT_DYNVARSIZE
:
12785 case DTRACEOPT_AGGSIZE
:
12786 case DTRACEOPT_SPECSIZE
:
12787 case DTRACEOPT_STRSIZE
:
12791 if (val
>= LONG_MAX
) {
12793 * If this is an otherwise negative value, set it to
12794 * the highest multiple of 128m less than LONG_MAX.
12795 * Technically, we're adjusting the size without
12796 * regard to the buffer resizing policy, but in fact,
12797 * this has no effect -- if we set the buffer size to
12798 * ~LONG_MAX and the buffer policy is ultimately set to
12799 * be "manual", the buffer allocation is guaranteed to
12800 * fail, if only because the allocation requires two
12801 * buffers. (We set the the size to the highest
12802 * multiple of 128m because it ensures that the size
12803 * will remain a multiple of a megabyte when
12804 * repeatedly halved -- all the way down to 15m.)
12806 val
= LONG_MAX
- (1 << 27) + 1;
12810 state
->dts_options
[option
] = val
;
12816 dtrace_state_destroy(dtrace_state_t
*state
)
12819 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
12820 minor_t minor
= getminor(state
->dts_dev
);
12821 int i
, bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
);
12822 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12823 int nspec
= state
->dts_nspeculations
;
12826 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12827 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12830 * First, retract any retained enablings for this state.
12832 dtrace_enabling_retract(state
);
12833 ASSERT(state
->dts_nretained
== 0);
12835 if (state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
||
12836 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
) {
12838 * We have managed to come into dtrace_state_destroy() on a
12839 * hot enabling -- almost certainly because of a disorderly
12840 * shutdown of a consumer. (That is, a consumer that is
12841 * exiting without having called dtrace_stop().) In this case,
12842 * we're going to set our activity to be KILLED, and then
12843 * issue a sync to be sure that everyone is out of probe
12844 * context before we start blowing away ECBs.
12846 state
->dts_activity
= DTRACE_ACTIVITY_KILLED
;
12851 * Release the credential hold we took in dtrace_state_create().
12853 if (state
->dts_cred
.dcr_cred
!= NULL
)
12854 crfree(state
->dts_cred
.dcr_cred
);
12857 * Now we can safely disable and destroy any enabled probes. Because
12858 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
12859 * (especially if they're all enabled), we take two passes through the
12860 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
12861 * in the second we disable whatever is left over.
12863 for (match
= DTRACE_PRIV_KERNEL
; ; match
= 0) {
12864 for (i
= 0; i
< state
->dts_necbs
; i
++) {
12865 if ((ecb
= state
->dts_ecbs
[i
]) == NULL
)
12868 if (match
&& ecb
->dte_probe
!= NULL
) {
12869 dtrace_probe_t
*probe
= ecb
->dte_probe
;
12870 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
12872 if (!(prov
->dtpv_priv
.dtpp_flags
& match
))
12876 dtrace_ecb_disable(ecb
);
12877 dtrace_ecb_destroy(ecb
);
12885 * Before we free the buffers, perform one more sync to assure that
12886 * every CPU is out of probe context.
12890 dtrace_buffer_free(state
->dts_buffer
);
12891 dtrace_buffer_free(state
->dts_aggbuffer
);
12893 for (i
= 0; i
< nspec
; i
++)
12894 dtrace_buffer_free(spec
[i
].dtsp_buffer
);
12896 if (state
->dts_cleaner
!= CYCLIC_NONE
)
12897 cyclic_remove(state
->dts_cleaner
);
12899 if (state
->dts_deadman
!= CYCLIC_NONE
)
12900 cyclic_remove(state
->dts_deadman
);
12902 dtrace_dstate_fini(&vstate
->dtvs_dynvars
);
12903 dtrace_vstate_fini(vstate
);
12904 kmem_free(state
->dts_ecbs
, state
->dts_necbs
* sizeof (dtrace_ecb_t
*));
12906 if (state
->dts_aggregations
!= NULL
) {
12908 for (i
= 0; i
< state
->dts_naggregations
; i
++)
12909 ASSERT(state
->dts_aggregations
[i
] == NULL
);
12911 ASSERT(state
->dts_naggregations
> 0);
12912 kmem_free(state
->dts_aggregations
,
12913 state
->dts_naggregations
* sizeof (dtrace_aggregation_t
*));
12916 kmem_free(state
->dts_buffer
, bufsize
);
12917 kmem_free(state
->dts_aggbuffer
, bufsize
);
12919 for (i
= 0; i
< nspec
; i
++)
12920 kmem_free(spec
[i
].dtsp_buffer
, bufsize
);
12922 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
12924 dtrace_format_destroy(state
);
12926 vmem_destroy(state
->dts_aggid_arena
);
12927 ddi_soft_state_free(dtrace_softstate
, minor
);
12928 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12932 * DTrace Anonymous Enabling Functions
12934 static dtrace_state_t
*
12935 dtrace_anon_grab(void)
12937 dtrace_state_t
*state
;
12939 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12941 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
12942 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
12946 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
12947 ASSERT(dtrace_retained
!= NULL
);
12949 dtrace_enabling_destroy(dtrace_anon
.dta_enabling
);
12950 dtrace_anon
.dta_enabling
= NULL
;
12951 dtrace_anon
.dta_state
= NULL
;
12957 dtrace_anon_property(void)
12960 dtrace_state_t
*state
;
12962 char c
[32]; /* enough for "dof-data-" + digits */
12964 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12965 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12967 for (i
= 0; ; i
++) {
12968 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
);
12970 dtrace_err_verbose
= 1;
12972 if ((dof
= dtrace_dof_property(c
)) == NULL
) {
12973 dtrace_err_verbose
= 0;
12978 * We want to create anonymous state, so we need to transition
12979 * the kernel debugger to indicate that DTrace is active. If
12980 * this fails (e.g. because the debugger has modified text in
12981 * some way), we won't continue with the processing.
12983 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
12984 cmn_err(CE_NOTE
, "kernel debugger active; anonymous "
12985 "enabling ignored.");
12986 dtrace_dof_destroy(dof
);
12991 * If we haven't allocated an anonymous state, we'll do so now.
12993 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
12994 state
= dtrace_state_create(NULL
, NULL
);
12995 dtrace_anon
.dta_state
= state
;
12997 if (state
== NULL
) {
12999 * This basically shouldn't happen: the only
13000 * failure mode from dtrace_state_create() is a
13001 * failure of ddi_soft_state_zalloc() that
13002 * itself should never happen. Still, the
13003 * interface allows for a failure mode, and
13004 * we want to fail as gracefully as possible:
13005 * we'll emit an error message and cease
13006 * processing anonymous state in this case.
13008 cmn_err(CE_WARN
, "failed to create "
13009 "anonymous state");
13010 dtrace_dof_destroy(dof
);
13015 rv
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(),
13016 &dtrace_anon
.dta_enabling
, 0, B_TRUE
);
13019 rv
= dtrace_dof_options(dof
, state
);
13021 dtrace_err_verbose
= 0;
13022 dtrace_dof_destroy(dof
);
13026 * This is malformed DOF; chuck any anonymous state
13029 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13030 dtrace_state_destroy(state
);
13031 dtrace_anon
.dta_state
= NULL
;
13035 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13038 if (dtrace_anon
.dta_enabling
!= NULL
) {
13042 * dtrace_enabling_retain() can only fail because we are
13043 * trying to retain more enablings than are allowed -- but
13044 * we only have one anonymous enabling, and we are guaranteed
13045 * to be allowed at least one retained enabling; we assert
13046 * that dtrace_enabling_retain() returns success.
13048 rval
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
);
13051 dtrace_enabling_dump(dtrace_anon
.dta_enabling
);
13056 * DTrace Helper Functions
13059 dtrace_helper_trace(dtrace_helper_action_t
*helper
,
13060 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
, int where
)
13062 uint32_t size
, next
, nnext
, i
;
13063 dtrace_helptrace_t
*ent
;
13064 uint16_t flags
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13066 if (!dtrace_helptrace_enabled
)
13069 ASSERT(vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
13072 * What would a tracing framework be without its own tracing
13073 * framework? (Well, a hell of a lot simpler, for starters...)
13075 size
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals
*
13076 sizeof (uint64_t) - sizeof (uint64_t);
13079 * Iterate until we can allocate a slot in the trace buffer.
13082 next
= dtrace_helptrace_next
;
13084 if (next
+ size
< dtrace_helptrace_bufsize
) {
13085 nnext
= next
+ size
;
13089 } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
);
13092 * We have our slot; fill it in.
13097 ent
= (dtrace_helptrace_t
*)&dtrace_helptrace_buffer
[next
];
13098 ent
->dtht_helper
= helper
;
13099 ent
->dtht_where
= where
;
13100 ent
->dtht_nlocals
= vstate
->dtvs_nlocals
;
13102 ent
->dtht_fltoffs
= (mstate
->dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
13103 mstate
->dtms_fltoffs
: -1;
13104 ent
->dtht_fault
= DTRACE_FLAGS2FLT(flags
);
13105 ent
->dtht_illval
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
13107 for (i
= 0; i
< vstate
->dtvs_nlocals
; i
++) {
13108 dtrace_statvar_t
*svar
;
13110 if ((svar
= vstate
->dtvs_locals
[i
]) == NULL
)
13113 ASSERT(svar
->dtsv_size
>= (int)NCPU
* sizeof (uint64_t));
13114 ent
->dtht_locals
[i
] =
13115 ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
];
13120 dtrace_helper(int which
, dtrace_mstate_t
*mstate
,
13121 dtrace_state_t
*state
, uint64_t arg0
, uint64_t arg1
)
13123 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13124 uint64_t sarg0
= mstate
->dtms_arg
[0];
13125 uint64_t sarg1
= mstate
->dtms_arg
[1];
13127 dtrace_helpers_t
*helpers
= curproc
->p_dtrace_helpers
;
13128 dtrace_helper_action_t
*helper
;
13129 dtrace_vstate_t
*vstate
;
13130 dtrace_difo_t
*pred
;
13131 int i
, trace
= dtrace_helptrace_enabled
;
13133 ASSERT(which
>= 0 && which
< DTRACE_NHELPER_ACTIONS
);
13135 if (helpers
== NULL
)
13138 if ((helper
= helpers
->dthps_actions
[which
]) == NULL
)
13141 vstate
= &helpers
->dthps_vstate
;
13142 mstate
->dtms_arg
[0] = arg0
;
13143 mstate
->dtms_arg
[1] = arg1
;
13146 * Now iterate over each helper. If its predicate evaluates to 'true',
13147 * we'll call the corresponding actions. Note that the below calls
13148 * to dtrace_dif_emulate() may set faults in machine state. This is
13149 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13150 * the stored DIF offset with its own (which is the desired behavior).
13151 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13152 * from machine state; this is okay, too.
13154 for (; helper
!= NULL
; helper
= helper
->dtha_next
) {
13155 if ((pred
= helper
->dtha_predicate
) != NULL
) {
13157 dtrace_helper_trace(helper
, mstate
, vstate
, 0);
13159 if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
))
13162 if (*flags
& CPU_DTRACE_FAULT
)
13166 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13168 dtrace_helper_trace(helper
,
13169 mstate
, vstate
, i
+ 1);
13171 rval
= dtrace_dif_emulate(helper
->dtha_actions
[i
],
13172 mstate
, vstate
, state
);
13174 if (*flags
& CPU_DTRACE_FAULT
)
13180 dtrace_helper_trace(helper
, mstate
, vstate
,
13181 DTRACE_HELPTRACE_NEXT
);
13185 dtrace_helper_trace(helper
, mstate
, vstate
,
13186 DTRACE_HELPTRACE_DONE
);
13189 * Restore the arg0 that we saved upon entry.
13191 mstate
->dtms_arg
[0] = sarg0
;
13192 mstate
->dtms_arg
[1] = sarg1
;
13198 dtrace_helper_trace(helper
, mstate
, vstate
,
13199 DTRACE_HELPTRACE_ERR
);
13202 * Restore the arg0 that we saved upon entry.
13204 mstate
->dtms_arg
[0] = sarg0
;
13205 mstate
->dtms_arg
[1] = sarg1
;
13211 dtrace_helper_action_destroy(dtrace_helper_action_t
*helper
,
13212 dtrace_vstate_t
*vstate
)
13216 if (helper
->dtha_predicate
!= NULL
)
13217 dtrace_difo_release(helper
->dtha_predicate
, vstate
);
13219 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13220 ASSERT(helper
->dtha_actions
[i
] != NULL
);
13221 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
);
13224 kmem_free(helper
->dtha_actions
,
13225 helper
->dtha_nactions
* sizeof (dtrace_difo_t
*));
13226 kmem_free(helper
, sizeof (dtrace_helper_action_t
));
13229 #if !defined(__APPLE__)
13231 dtrace_helper_destroygen(int gen
)
13233 proc_t
*p
= curproc
;
13236 dtrace_helper_destroygen(proc_t
* p
, int gen
)
13239 dtrace_helpers_t
*help
= p
->p_dtrace_helpers
;
13240 dtrace_vstate_t
*vstate
;
13243 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13245 if (help
== NULL
|| gen
> help
->dthps_generation
)
13248 vstate
= &help
->dthps_vstate
;
13250 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
13251 dtrace_helper_action_t
*last
= NULL
, *h
, *next
;
13253 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
13254 next
= h
->dtha_next
;
13256 if (h
->dtha_generation
== gen
) {
13257 if (last
!= NULL
) {
13258 last
->dtha_next
= next
;
13260 help
->dthps_actions
[i
] = next
;
13263 dtrace_helper_action_destroy(h
, vstate
);
13271 * Interate until we've cleared out all helper providers with the
13272 * given generation number.
13275 dtrace_helper_provider_t
*prov
= NULL
;
13278 * Look for a helper provider with the right generation. We
13279 * have to start back at the beginning of the list each time
13280 * because we drop dtrace_lock. It's unlikely that we'll make
13281 * more than two passes.
13283 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13284 prov
= help
->dthps_provs
[i
];
13286 if (prov
->dthp_generation
== gen
)
13291 * If there were no matches, we're done.
13293 if (i
== help
->dthps_nprovs
)
13297 * Move the last helper provider into this slot.
13299 help
->dthps_nprovs
--;
13300 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
];
13301 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
;
13303 lck_mtx_unlock(&dtrace_lock
);
13306 * If we have a meta provider, remove this helper provider.
13308 lck_mtx_lock(&dtrace_meta_lock
);
13309 if (dtrace_meta_pid
!= NULL
) {
13310 ASSERT(dtrace_deferred_pid
== NULL
);
13311 dtrace_helper_provider_remove(&prov
->dthp_prov
,
13314 lck_mtx_unlock(&dtrace_meta_lock
);
13316 dtrace_helper_provider_destroy(prov
);
13318 lck_mtx_lock(&dtrace_lock
);
13325 dtrace_helper_validate(dtrace_helper_action_t
*helper
)
13330 if ((dp
= helper
->dtha_predicate
) != NULL
)
13331 err
+= dtrace_difo_validate_helper(dp
);
13333 for (i
= 0; i
< helper
->dtha_nactions
; i
++)
13334 err
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]);
13339 #if !defined(__APPLE__)
13341 dtrace_helper_action_add(int which
, dtrace_ecbdesc_t
*ep
)
13344 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t
*ep
)
13347 dtrace_helpers_t
*help
;
13348 dtrace_helper_action_t
*helper
, *last
;
13349 dtrace_actdesc_t
*act
;
13350 dtrace_vstate_t
*vstate
;
13351 dtrace_predicate_t
*pred
;
13352 int count
= 0, nactions
= 0, i
;
13354 if (which
< 0 || which
>= DTRACE_NHELPER_ACTIONS
)
13357 #if !defined(__APPLE__)
13358 help
= curproc
->p_dtrace_helpers
;
13360 help
= p
->p_dtrace_helpers
;
13362 last
= help
->dthps_actions
[which
];
13363 vstate
= &help
->dthps_vstate
;
13365 for (count
= 0; last
!= NULL
; last
= last
->dtha_next
) {
13367 if (last
->dtha_next
== NULL
)
13372 * If we already have dtrace_helper_actions_max helper actions for this
13373 * helper action type, we'll refuse to add a new one.
13375 if (count
>= dtrace_helper_actions_max
)
13378 helper
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
);
13379 helper
->dtha_generation
= help
->dthps_generation
;
13381 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) {
13382 ASSERT(pred
->dtp_difo
!= NULL
);
13383 dtrace_difo_hold(pred
->dtp_difo
);
13384 helper
->dtha_predicate
= pred
->dtp_difo
;
13387 for (act
= ep
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
13388 if (act
->dtad_kind
!= DTRACEACT_DIFEXPR
)
13391 if (act
->dtad_difo
== NULL
)
13397 helper
->dtha_actions
= kmem_zalloc(sizeof (dtrace_difo_t
*) *
13398 (helper
->dtha_nactions
= nactions
), KM_SLEEP
);
13400 for (act
= ep
->dted_action
, i
= 0; act
!= NULL
; act
= act
->dtad_next
) {
13401 dtrace_difo_hold(act
->dtad_difo
);
13402 helper
->dtha_actions
[i
++] = act
->dtad_difo
;
13405 if (!dtrace_helper_validate(helper
))
13408 if (last
== NULL
) {
13409 help
->dthps_actions
[which
] = helper
;
13411 last
->dtha_next
= helper
;
13414 if (vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
13415 dtrace_helptrace_nlocals
= vstate
->dtvs_nlocals
;
13416 dtrace_helptrace_next
= 0;
13421 dtrace_helper_action_destroy(helper
, vstate
);
13426 dtrace_helper_provider_register(proc_t
*p
, dtrace_helpers_t
*help
,
13427 dof_helper_t
*dofhp
)
13429 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
13431 lck_mtx_lock(&dtrace_meta_lock
);
13432 lck_mtx_lock(&dtrace_lock
);
13434 if (!dtrace_attached() || dtrace_meta_pid
== NULL
) {
13436 * If the dtrace module is loaded but not attached, or if
13437 * there aren't isn't a meta provider registered to deal with
13438 * these provider descriptions, we need to postpone creating
13439 * the actual providers until later.
13442 if (help
->dthps_next
== NULL
&& help
->dthps_prev
== NULL
&&
13443 dtrace_deferred_pid
!= help
) {
13444 help
->dthps_deferred
= 1;
13445 help
->dthps_pid
= p
->p_pid
;
13446 help
->dthps_next
= dtrace_deferred_pid
;
13447 help
->dthps_prev
= NULL
;
13448 if (dtrace_deferred_pid
!= NULL
)
13449 dtrace_deferred_pid
->dthps_prev
= help
;
13450 dtrace_deferred_pid
= help
;
13453 lck_mtx_unlock(&dtrace_lock
);
13455 } else if (dofhp
!= NULL
) {
13457 * If the dtrace module is loaded and we have a particular
13458 * helper provider description, pass that off to the
13462 lck_mtx_unlock(&dtrace_lock
);
13464 dtrace_helper_provide(dofhp
, p
->p_pid
);
13468 * Otherwise, just pass all the helper provider descriptions
13469 * off to the meta provider.
13473 lck_mtx_unlock(&dtrace_lock
);
13475 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13476 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
13481 lck_mtx_unlock(&dtrace_meta_lock
);
13484 #if !defined(__APPLE__)
13486 dtrace_helper_provider_add(dof_helper_t
*dofhp
, int gen
)
13489 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t
*dofhp
, int gen
)
13492 dtrace_helpers_t
*help
;
13493 dtrace_helper_provider_t
*hprov
, **tmp_provs
;
13494 uint_t tmp_maxprovs
, i
;
13496 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13498 #if !defined(__APPLE__)
13499 help
= curproc
->p_dtrace_helpers
;
13501 help
= p
->p_dtrace_helpers
;
13503 ASSERT(help
!= NULL
);
13506 * If we already have dtrace_helper_providers_max helper providers,
13507 * we're refuse to add a new one.
13509 if (help
->dthps_nprovs
>= dtrace_helper_providers_max
)
13513 * Check to make sure this isn't a duplicate.
13515 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13516 if (dofhp
->dofhp_addr
==
13517 help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
)
13521 hprov
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
);
13522 hprov
->dthp_prov
= *dofhp
;
13523 hprov
->dthp_ref
= 1;
13524 hprov
->dthp_generation
= gen
;
13527 * Allocate a bigger table for helper providers if it's already full.
13529 if (help
->dthps_maxprovs
== help
->dthps_nprovs
) {
13530 tmp_maxprovs
= help
->dthps_maxprovs
;
13531 tmp_provs
= help
->dthps_provs
;
13533 if (help
->dthps_maxprovs
== 0)
13534 help
->dthps_maxprovs
= 2;
13536 help
->dthps_maxprovs
*= 2;
13537 if (help
->dthps_maxprovs
> dtrace_helper_providers_max
)
13538 help
->dthps_maxprovs
= dtrace_helper_providers_max
;
13540 ASSERT(tmp_maxprovs
< help
->dthps_maxprovs
);
13542 help
->dthps_provs
= kmem_zalloc(help
->dthps_maxprovs
*
13543 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
13545 if (tmp_provs
!= NULL
) {
13546 bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs
*
13547 sizeof (dtrace_helper_provider_t
*));
13548 kmem_free(tmp_provs
, tmp_maxprovs
*
13549 sizeof (dtrace_helper_provider_t
*));
13553 help
->dthps_provs
[help
->dthps_nprovs
] = hprov
;
13554 help
->dthps_nprovs
++;
13560 dtrace_helper_provider_destroy(dtrace_helper_provider_t
*hprov
)
13562 lck_mtx_lock(&dtrace_lock
);
13564 if (--hprov
->dthp_ref
== 0) {
13566 lck_mtx_unlock(&dtrace_lock
);
13567 dof
= (dof_hdr_t
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
;
13568 dtrace_dof_destroy(dof
);
13569 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
));
13571 lck_mtx_unlock(&dtrace_lock
);
13576 dtrace_helper_provider_validate(dof_hdr_t
*dof
, dof_sec_t
*sec
)
13578 uintptr_t daddr
= (uintptr_t)dof
;
13579 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
13580 dof_provider_t
*provider
;
13581 dof_probe_t
*probe
;
13583 char *strtab
, *typestr
;
13584 dof_stridx_t typeidx
;
13586 uint_t nprobes
, j
, k
;
13588 ASSERT(sec
->dofs_type
== DOF_SECT_PROVIDER
);
13590 if (sec
->dofs_offset
& (sizeof (uint_t
) - 1)) {
13591 dtrace_dof_error(dof
, "misaligned section offset");
13596 * The section needs to be large enough to contain the DOF provider
13597 * structure appropriate for the given version.
13599 if (sec
->dofs_size
<
13600 ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ?
13601 offsetof(dof_provider_t
, dofpv_prenoffs
) :
13602 sizeof (dof_provider_t
))) {
13603 dtrace_dof_error(dof
, "provider section too small");
13607 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
13608 str_sec
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
);
13609 prb_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
);
13610 arg_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
);
13611 off_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
);
13613 if (str_sec
== NULL
|| prb_sec
== NULL
||
13614 arg_sec
== NULL
|| off_sec
== NULL
)
13619 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
13620 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
&&
13621 (enoff_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
,
13622 provider
->dofpv_prenoffs
)) == NULL
)
13625 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
13627 if (provider
->dofpv_name
>= str_sec
->dofs_size
||
13628 strlen(strtab
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) {
13629 dtrace_dof_error(dof
, "invalid provider name");
13633 if (prb_sec
->dofs_entsize
== 0 ||
13634 prb_sec
->dofs_entsize
> prb_sec
->dofs_size
) {
13635 dtrace_dof_error(dof
, "invalid entry size");
13639 if (prb_sec
->dofs_entsize
& (sizeof (uintptr_t) - 1)) {
13640 dtrace_dof_error(dof
, "misaligned entry size");
13644 if (off_sec
->dofs_entsize
!= sizeof (uint32_t)) {
13645 dtrace_dof_error(dof
, "invalid entry size");
13649 if (off_sec
->dofs_offset
& (sizeof (uint32_t) - 1)) {
13650 dtrace_dof_error(dof
, "misaligned section offset");
13654 if (arg_sec
->dofs_entsize
!= sizeof (uint8_t)) {
13655 dtrace_dof_error(dof
, "invalid entry size");
13659 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
13661 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
13664 * Take a pass through the probes to check for errors.
13666 for (j
= 0; j
< nprobes
; j
++) {
13667 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
13668 prb_sec
->dofs_offset
+ j
* prb_sec
->dofs_entsize
);
13670 if (probe
->dofpr_func
>= str_sec
->dofs_size
) {
13671 dtrace_dof_error(dof
, "invalid function name");
13675 if (strlen(strtab
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) {
13676 dtrace_dof_error(dof
, "function name too long");
13680 if (probe
->dofpr_name
>= str_sec
->dofs_size
||
13681 strlen(strtab
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) {
13682 dtrace_dof_error(dof
, "invalid probe name");
13687 * The offset count must not wrap the index, and the offsets
13688 * must also not overflow the section's data.
13690 if (probe
->dofpr_offidx
+ probe
->dofpr_noffs
<
13691 probe
->dofpr_offidx
||
13692 (probe
->dofpr_offidx
+ probe
->dofpr_noffs
) *
13693 off_sec
->dofs_entsize
> off_sec
->dofs_size
) {
13694 dtrace_dof_error(dof
, "invalid probe offset");
13698 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) {
13700 * If there's no is-enabled offset section, make sure
13701 * there aren't any is-enabled offsets. Otherwise
13702 * perform the same checks as for probe offsets
13703 * (immediately above).
13705 if (enoff_sec
== NULL
) {
13706 if (probe
->dofpr_enoffidx
!= 0 ||
13707 probe
->dofpr_nenoffs
!= 0) {
13708 dtrace_dof_error(dof
, "is-enabled "
13709 "offsets with null section");
13712 } else if (probe
->dofpr_enoffidx
+
13713 probe
->dofpr_nenoffs
< probe
->dofpr_enoffidx
||
13714 (probe
->dofpr_enoffidx
+ probe
->dofpr_nenoffs
) *
13715 enoff_sec
->dofs_entsize
> enoff_sec
->dofs_size
) {
13716 dtrace_dof_error(dof
, "invalid is-enabled "
13721 if (probe
->dofpr_noffs
+ probe
->dofpr_nenoffs
== 0) {
13722 dtrace_dof_error(dof
, "zero probe and "
13723 "is-enabled offsets");
13726 } else if (probe
->dofpr_noffs
== 0) {
13727 dtrace_dof_error(dof
, "zero probe offsets");
13731 if (probe
->dofpr_argidx
+ probe
->dofpr_xargc
<
13732 probe
->dofpr_argidx
||
13733 (probe
->dofpr_argidx
+ probe
->dofpr_xargc
) *
13734 arg_sec
->dofs_entsize
> arg_sec
->dofs_size
) {
13735 dtrace_dof_error(dof
, "invalid args");
13739 typeidx
= probe
->dofpr_nargv
;
13740 typestr
= strtab
+ probe
->dofpr_nargv
;
13741 for (k
= 0; k
< probe
->dofpr_nargc
; k
++) {
13742 if (typeidx
>= str_sec
->dofs_size
) {
13743 dtrace_dof_error(dof
, "bad "
13744 "native argument type");
13748 typesz
= strlen(typestr
) + 1;
13749 if (typesz
> DTRACE_ARGTYPELEN
) {
13750 dtrace_dof_error(dof
, "native "
13751 "argument type too long");
13758 typeidx
= probe
->dofpr_xargv
;
13759 typestr
= strtab
+ probe
->dofpr_xargv
;
13760 for (k
= 0; k
< probe
->dofpr_xargc
; k
++) {
13761 if (arg
[probe
->dofpr_argidx
+ k
] > probe
->dofpr_nargc
) {
13762 dtrace_dof_error(dof
, "bad "
13763 "native argument index");
13767 if (typeidx
>= str_sec
->dofs_size
) {
13768 dtrace_dof_error(dof
, "bad "
13769 "translated argument type");
13773 typesz
= strlen(typestr
) + 1;
13774 if (typesz
> DTRACE_ARGTYPELEN
) {
13775 dtrace_dof_error(dof
, "translated argument "
13788 #if !defined(__APPLE__)
13790 dtrace_helper_slurp(dof_hdr_t
*dof
, dof_helper_t
*dhp
)
13793 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t
*dof
, dof_helper_t
*dhp
)
13796 dtrace_helpers_t
*help
;
13797 dtrace_vstate_t
*vstate
;
13798 dtrace_enabling_t
*enab
= NULL
;
13799 int i
, gen
, rv
, nhelpers
= 0, nprovs
= 0, destroy
= 1;
13800 uintptr_t daddr
= (uintptr_t)dof
;
13802 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13804 #if !defined(__APPLE__)
13805 if ((help
= curproc
->p_dtrace_helpers
) == NULL
)
13806 help
= dtrace_helpers_create(curproc
);
13808 if ((help
= p
->p_dtrace_helpers
) == NULL
)
13809 help
= dtrace_helpers_create(p
);
13812 vstate
= &help
->dthps_vstate
;
13814 if ((rv
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
,
13815 dhp
!= NULL
? dhp
->dofhp_addr
: 0, B_FALSE
)) != 0) {
13816 dtrace_dof_destroy(dof
);
13821 * Look for helper providers and validate their descriptions.
13824 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13825 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
13826 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13828 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
13831 if (dtrace_helper_provider_validate(dof
, sec
) != 0) {
13832 dtrace_enabling_destroy(enab
);
13833 dtrace_dof_destroy(dof
);
13842 * Now we need to walk through the ECB descriptions in the enabling.
13844 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
13845 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
13846 dtrace_probedesc_t
*desc
= &ep
->dted_probe
;
13848 if (strcmp(desc
->dtpd_provider
, "dtrace") != 0)
13851 if (strcmp(desc
->dtpd_mod
, "helper") != 0)
13854 if (strcmp(desc
->dtpd_func
, "ustack") != 0)
13857 #if !defined(__APPLE__)
13858 if ((rv
= dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0)
13860 if ((rv
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0)
13864 * Adding this helper action failed -- we are now going
13865 * to rip out the entire generation and return failure.
13867 #if !defined(__APPLE__)
13868 (void) dtrace_helper_destroygen(help
->dthps_generation
);
13870 (void) dtrace_helper_destroygen(p
, help
->dthps_generation
);
13872 dtrace_enabling_destroy(enab
);
13873 dtrace_dof_destroy(dof
);
13880 if (nhelpers
< enab
->dten_ndesc
)
13881 dtrace_dof_error(dof
, "unmatched helpers");
13883 gen
= help
->dthps_generation
++;
13884 dtrace_enabling_destroy(enab
);
13886 if (dhp
!= NULL
&& nprovs
> 0) {
13887 dhp
->dofhp_dof
= (uint64_t)(uintptr_t)dof
;
13888 #if !defined(__APPLE__)
13889 if (dtrace_helper_provider_add(dhp
, gen
) == 0) {
13891 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) {
13893 lck_mtx_unlock(&dtrace_lock
);
13894 #if !defined(__APPLE__)
13895 dtrace_helper_provider_register(curproc
, help
, dhp
);
13897 dtrace_helper_provider_register(p
, help
, dhp
);
13899 lck_mtx_lock(&dtrace_lock
);
13906 dtrace_dof_destroy(dof
);
13911 #if defined(__APPLE__)
13916 * DTrace user static probes (USDT probes) and helper actions are loaded
13917 * in a process by proccessing dof sections. The dof sections are passed
13918 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
13919 * expensive to process dof for a process that will never use it. There
13920 * is a memory cost (allocating the providers/probes), and a cpu cost
13921 * (creating the providers/probes).
13923 * To reduce this cost, we use "lazy dof". The normal proceedure for
13924 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
13925 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
13926 * used, each process retains the dof_ioctl_data_t block, instead of
13927 * copying in the data it points to.
13929 * The dof_ioctl_data_t blocks are managed as if they were the actual
13930 * processed dof; on fork the block is copied to the child, on exec and
13931 * exit the block is freed.
13933 * If the process loads library(s) containing additional dof, the
13934 * new dof_ioctl_data_t is merged with the existing block.
13936 * There are a few catches that make this slightly more difficult.
13937 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
13938 * identifier value for each dof in the block. In non-lazy dof terms,
13939 * this is the generation that dof was loaded in. If we hand back
13940 * a UID for a lazy dof, that same UID must be able to unload the
13941 * dof once it has become non-lazy. To meet this requirement, the
13942 * code that loads lazy dof requires that the UID's for dof(s) in
13943 * the lazy dof be sorted, and in ascending order. It is okay to skip
13944 * UID's, I.E., 1 -> 5 -> 6 is legal.
13946 * Once a process has become non-lazy, it will stay non-lazy. All
13947 * future dof operations for that process will be non-lazy, even
13948 * if the dof mode transitions back to lazy.
13950 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
13951 * That way if the lazy check fails due to transitioning to non-lazy, the
13952 * right thing is done with the newly faulted in dof.
13956 * This method is a bit squicky. It must handle:
13958 * dof should not be lazy.
13959 * dof should have been handled lazily, but there was an error
13960 * dof was handled lazily, and needs to be freed.
13961 * dof was handled lazily, and must not be freed.
13964 * Returns EACCESS if dof should be handled non-lazily.
13966 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
13968 * If the dofs data is claimed by this method, dofs_claimed will be set.
13969 * Callers should not free claimed dofs.
13972 dtrace_lazy_dofs_add(proc_t
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
)
13975 ASSERT(incoming_dofs
&& incoming_dofs
->dofiod_count
> 0);
13980 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
13983 * If we have lazy dof, dof mode better be LAZY_ON.
13985 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
13986 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
13987 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
13990 * Any existing helpers force non-lazy behavior.
13992 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
13993 lck_mtx_lock(&p
->p_dtrace_sprlock
);
13995 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
13996 unsigned int existing_dofs_count
= (existing_dofs
) ? existing_dofs
->dofiod_count
: 0;
13997 unsigned int i
, merged_dofs_count
= incoming_dofs
->dofiod_count
+ existing_dofs_count
;
14002 if (merged_dofs_count
== 0 || merged_dofs_count
> 1024) {
14003 dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range");
14009 * Each dof being added must be assigned a unique generation.
14011 uint64_t generation
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count
- 1].dofhp_dof
+ 1 : 1;
14012 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) {
14014 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
14016 ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
);
14017 incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
= generation
++;
14021 if (existing_dofs
) {
14023 * Merge the existing and incoming dofs
14025 size_t merged_dofs_size
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
);
14026 dof_ioctl_data_t
* merged_dofs
= kmem_alloc(merged_dofs_size
, KM_SLEEP
);
14028 bcopy(&existing_dofs
->dofiod_helpers
[0],
14029 &merged_dofs
->dofiod_helpers
[0],
14030 sizeof(dof_helper_t
) * existing_dofs_count
);
14031 bcopy(&incoming_dofs
->dofiod_helpers
[0],
14032 &merged_dofs
->dofiod_helpers
[existing_dofs_count
],
14033 sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
);
14035 merged_dofs
->dofiod_count
= merged_dofs_count
;
14037 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14039 p
->p_dtrace_lazy_dofs
= merged_dofs
;
14042 * Claim the incoming dofs
14045 p
->p_dtrace_lazy_dofs
= incoming_dofs
;
14049 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14050 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14051 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14056 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14061 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14069 * EINVAL: lazy dof is enabled, but the requested generation was not found.
14070 * EACCES: This removal needs to be handled non-lazily.
14073 dtrace_lazy_dofs_remove(proc_t
*p
, int generation
)
14077 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14080 * If we have lazy dof, dof mode better be LAZY_ON.
14082 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14083 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14084 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
14087 * Any existing helpers force non-lazy behavior.
14089 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
14090 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14092 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
14094 if (existing_dofs
) {
14095 int index
, existing_dofs_count
= existing_dofs
->dofiod_count
;
14096 for (index
=0; index
<existing_dofs_count
; index
++) {
14097 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof
== generation
) {
14098 dof_ioctl_data_t
* removed_dofs
= NULL
;
14101 * If there is only 1 dof, we'll delete it and swap in NULL.
14103 if (existing_dofs_count
> 1) {
14104 int removed_dofs_count
= existing_dofs_count
- 1;
14105 size_t removed_dofs_size
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
);
14107 removed_dofs
= kmem_alloc(removed_dofs_size
, KM_SLEEP
);
14108 removed_dofs
->dofiod_count
= removed_dofs_count
;
14111 * copy the remaining data.
14114 bcopy(&existing_dofs
->dofiod_helpers
[0],
14115 &removed_dofs
->dofiod_helpers
[0],
14116 index
* sizeof(dof_helper_t
));
14119 if (index
< existing_dofs_count
-1) {
14120 bcopy(&existing_dofs
->dofiod_helpers
[index
+1],
14121 &removed_dofs
->dofiod_helpers
[index
],
14122 (existing_dofs_count
- index
- 1) * sizeof(dof_helper_t
));
14126 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14128 p
->p_dtrace_lazy_dofs
= removed_dofs
;
14130 rval
= KERN_SUCCESS
;
14137 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14140 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14141 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14148 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14153 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14159 dtrace_lazy_dofs_destroy(proc_t
*p
)
14161 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14162 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14165 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14166 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14167 * kern_exit.c and kern_exec.c.
14169 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
|| p
->p_lflag
& P_LEXIT
);
14170 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14172 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14173 p
->p_dtrace_lazy_dofs
= NULL
;
14175 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14176 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14179 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14184 dtrace_lazy_dofs_duplicate(proc_t
*parent
, proc_t
*child
)
14186 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
14187 lck_mtx_assert(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14188 lck_mtx_assert(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14190 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14191 lck_mtx_lock(&parent
->p_dtrace_sprlock
);
14194 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14195 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14198 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14199 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| parent
->p_dtrace_helpers
== NULL
);
14201 * In theory we should hold the child sprlock, but this is safe...
14203 ASSERT(child
->p_dtrace_lazy_dofs
== NULL
&& child
->p_dtrace_helpers
== NULL
);
14205 dof_ioctl_data_t
* parent_dofs
= parent
->p_dtrace_lazy_dofs
;
14206 dof_ioctl_data_t
* child_dofs
= NULL
;
14208 size_t parent_dofs_size
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
);
14209 child_dofs
= kmem_alloc(parent_dofs_size
, KM_SLEEP
);
14210 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
);
14213 lck_mtx_unlock(&parent
->p_dtrace_sprlock
);
14216 lck_mtx_lock(&child
->p_dtrace_sprlock
);
14217 child
->p_dtrace_lazy_dofs
= child_dofs
;
14218 lck_mtx_unlock(&child
->p_dtrace_sprlock
);
14221 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14225 dtrace_lazy_dofs_proc_iterate_filter(proc_t
*p
, void* ignored
)
14227 #pragma unused(ignored)
14229 * Okay to NULL test without taking the sprlock.
14231 return p
->p_dtrace_lazy_dofs
!= NULL
;
14235 dtrace_lazy_dofs_proc_iterate_doit(proc_t
*p
, void* ignored
)
14237 #pragma unused(ignored)
14239 * It is possible this process may exit during our attempt to
14240 * fault in the dof. We could fix this by holding locks longer,
14241 * but the errors are benign.
14243 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14246 * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF
14248 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14249 ASSERT(dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
);
14252 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14253 p
->p_dtrace_lazy_dofs
= NULL
;
14255 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14258 * Process each dof_helper_t
14260 if (lazy_dofs
!= NULL
) {
14264 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) {
14266 * When loading lazy dof, we depend on the generations being sorted in ascending order.
14268 ASSERT(i
>= (lazy_dofs
->dofiod_count
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14270 dof_helper_t
*dhp
= &lazy_dofs
->dofiod_helpers
[i
];
14273 * We stored the generation in dofhp_dof. Save it, and restore the original value.
14275 int generation
= dhp
->dofhp_dof
;
14276 dhp
->dofhp_dof
= dhp
->dofhp_addr
;
14278 dof_hdr_t
*dof
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
);
14281 dtrace_helpers_t
*help
;
14283 lck_mtx_lock(&dtrace_lock
);
14286 * This must be done with the dtrace_lock held
14288 if ((help
= p
->p_dtrace_helpers
) == NULL
)
14289 help
= dtrace_helpers_create(p
);
14292 * If the generation value has been bumped, someone snuck in
14293 * when we released the dtrace lock. We have to dump this generation,
14294 * there is no safe way to load it.
14296 if (help
->dthps_generation
<= generation
) {
14297 help
->dthps_generation
= generation
;
14300 * dtrace_helper_slurp() takes responsibility for the dof --
14301 * it may free it now or it may save it and free it later.
14303 if ((rval
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) {
14304 dtrace_dof_error(NULL
, "returned value did not match expected generation");
14308 lck_mtx_unlock(&dtrace_lock
);
14312 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14315 return PROC_RETURNED
;
14318 #endif /* __APPLE__ */
14320 static dtrace_helpers_t
*
14321 dtrace_helpers_create(proc_t
*p
)
14323 dtrace_helpers_t
*help
;
14325 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14326 ASSERT(p
->p_dtrace_helpers
== NULL
);
14328 help
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
);
14329 help
->dthps_actions
= kmem_zalloc(sizeof (dtrace_helper_action_t
*) *
14330 DTRACE_NHELPER_ACTIONS
, KM_SLEEP
);
14332 p
->p_dtrace_helpers
= help
;
14338 #if !defined(__APPLE__)
14340 dtrace_helpers_destroy(void)
14342 proc_t
*p
= curproc
;
14345 dtrace_helpers_destroy(proc_t
* p
)
14348 dtrace_helpers_t
*help
;
14349 dtrace_vstate_t
*vstate
;
14352 lck_mtx_lock(&dtrace_lock
);
14354 ASSERT(p
->p_dtrace_helpers
!= NULL
);
14355 ASSERT(dtrace_helpers
> 0);
14357 help
= p
->p_dtrace_helpers
;
14358 vstate
= &help
->dthps_vstate
;
14361 * We're now going to lose the help from this process.
14363 p
->p_dtrace_helpers
= NULL
;
14367 * Destory the helper actions.
14369 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14370 dtrace_helper_action_t
*h
, *next
;
14372 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
14373 next
= h
->dtha_next
;
14374 dtrace_helper_action_destroy(h
, vstate
);
14379 lck_mtx_unlock(&dtrace_lock
);
14382 * Destroy the helper providers.
14384 if (help
->dthps_maxprovs
> 0) {
14385 lck_mtx_lock(&dtrace_meta_lock
);
14386 if (dtrace_meta_pid
!= NULL
) {
14387 ASSERT(dtrace_deferred_pid
== NULL
);
14389 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14390 dtrace_helper_provider_remove(
14391 &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
);
14394 lck_mtx_lock(&dtrace_lock
);
14395 ASSERT(help
->dthps_deferred
== 0 ||
14396 help
->dthps_next
!= NULL
||
14397 help
->dthps_prev
!= NULL
||
14398 help
== dtrace_deferred_pid
);
14401 * Remove the helper from the deferred list.
14403 if (help
->dthps_next
!= NULL
)
14404 help
->dthps_next
->dthps_prev
= help
->dthps_prev
;
14405 if (help
->dthps_prev
!= NULL
)
14406 help
->dthps_prev
->dthps_next
= help
->dthps_next
;
14407 if (dtrace_deferred_pid
== help
) {
14408 dtrace_deferred_pid
= help
->dthps_next
;
14409 ASSERT(help
->dthps_prev
== NULL
);
14412 lck_mtx_unlock(&dtrace_lock
);
14415 lck_mtx_unlock(&dtrace_meta_lock
);
14417 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14418 dtrace_helper_provider_destroy(help
->dthps_provs
[i
]);
14421 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs
*
14422 sizeof (dtrace_helper_provider_t
*));
14425 lck_mtx_lock(&dtrace_lock
);
14427 dtrace_vstate_fini(&help
->dthps_vstate
);
14428 kmem_free(help
->dthps_actions
,
14429 sizeof (dtrace_helper_action_t
*) * DTRACE_NHELPER_ACTIONS
);
14430 kmem_free(help
, sizeof (dtrace_helpers_t
));
14433 lck_mtx_unlock(&dtrace_lock
);
14437 dtrace_helpers_duplicate(proc_t
*from
, proc_t
*to
)
14439 dtrace_helpers_t
*help
, *newhelp
;
14440 dtrace_helper_action_t
*helper
, *new, *last
;
14442 dtrace_vstate_t
*vstate
;
14443 int i
, j
, sz
, hasprovs
= 0;
14445 lck_mtx_lock(&dtrace_lock
);
14446 ASSERT(from
->p_dtrace_helpers
!= NULL
);
14447 ASSERT(dtrace_helpers
> 0);
14449 help
= from
->p_dtrace_helpers
;
14450 newhelp
= dtrace_helpers_create(to
);
14451 ASSERT(to
->p_dtrace_helpers
!= NULL
);
14453 newhelp
->dthps_generation
= help
->dthps_generation
;
14454 vstate
= &newhelp
->dthps_vstate
;
14457 * Duplicate the helper actions.
14459 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14460 if ((helper
= help
->dthps_actions
[i
]) == NULL
)
14463 for (last
= NULL
; helper
!= NULL
; helper
= helper
->dtha_next
) {
14464 new = kmem_zalloc(sizeof (dtrace_helper_action_t
),
14466 new->dtha_generation
= helper
->dtha_generation
;
14468 if ((dp
= helper
->dtha_predicate
) != NULL
) {
14469 dp
= dtrace_difo_duplicate(dp
, vstate
);
14470 new->dtha_predicate
= dp
;
14473 new->dtha_nactions
= helper
->dtha_nactions
;
14474 sz
= sizeof (dtrace_difo_t
*) * new->dtha_nactions
;
14475 new->dtha_actions
= kmem_alloc(sz
, KM_SLEEP
);
14477 for (j
= 0; j
< new->dtha_nactions
; j
++) {
14478 dtrace_difo_t
*dp
= helper
->dtha_actions
[j
];
14480 ASSERT(dp
!= NULL
);
14481 dp
= dtrace_difo_duplicate(dp
, vstate
);
14482 new->dtha_actions
[j
] = dp
;
14485 if (last
!= NULL
) {
14486 last
->dtha_next
= new;
14488 newhelp
->dthps_actions
[i
] = new;
14496 * Duplicate the helper providers and register them with the
14497 * DTrace framework.
14499 if (help
->dthps_nprovs
> 0) {
14500 newhelp
->dthps_nprovs
= help
->dthps_nprovs
;
14501 newhelp
->dthps_maxprovs
= help
->dthps_nprovs
;
14502 newhelp
->dthps_provs
= kmem_alloc(newhelp
->dthps_nprovs
*
14503 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
14504 for (i
= 0; i
< newhelp
->dthps_nprovs
; i
++) {
14505 newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
];
14506 newhelp
->dthps_provs
[i
]->dthp_ref
++;
14512 lck_mtx_unlock(&dtrace_lock
);
14515 dtrace_helper_provider_register(to
, newhelp
, NULL
);
14519 * DTrace Hook Functions
14522 dtrace_module_loaded(struct modctl
*ctl
)
14524 dtrace_provider_t
*prv
;
14526 lck_mtx_lock(&dtrace_provider_lock
);
14527 lck_mtx_lock(&mod_lock
);
14529 // ASSERT(ctl->mod_busy);
14532 * We're going to call each providers per-module provide operation
14533 * specifying only this module.
14535 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
14536 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
14538 lck_mtx_unlock(&mod_lock
);
14539 lck_mtx_unlock(&dtrace_provider_lock
);
14542 * If we have any retained enablings, we need to match against them.
14543 * Enabling probes requires that cpu_lock be held, and we cannot hold
14544 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14545 * module. (In particular, this happens when loading scheduling
14546 * classes.) So if we have any retained enablings, we need to dispatch
14547 * our task queue to do the match for us.
14549 lck_mtx_lock(&dtrace_lock
);
14551 if (dtrace_retained
== NULL
) {
14552 lck_mtx_unlock(&dtrace_lock
);
14556 (void) taskq_dispatch(dtrace_taskq
,
14557 (task_func_t
*)dtrace_enabling_matchall
, NULL
, TQ_SLEEP
);
14559 lck_mtx_unlock(&dtrace_lock
);
14562 * And now, for a little heuristic sleaze: in general, we want to
14563 * match modules as soon as they load. However, we cannot guarantee
14564 * this, because it would lead us to the lock ordering violation
14565 * outlined above. The common case, of course, is that cpu_lock is
14566 * _not_ held -- so we delay here for a clock tick, hoping that that's
14567 * long enough for the task queue to do its work. If it's not, it's
14568 * not a serious problem -- it just means that the module that we
14569 * just loaded may not be immediately instrumentable.
14575 dtrace_module_unloaded(struct modctl
*ctl
)
14577 dtrace_probe_t
template, *probe
, *first
, *next
;
14578 dtrace_provider_t
*prov
;
14580 template.dtpr_mod
= ctl
->mod_modname
;
14582 lck_mtx_lock(&dtrace_provider_lock
);
14583 lck_mtx_lock(&mod_lock
);
14584 lck_mtx_lock(&dtrace_lock
);
14586 if (dtrace_bymod
== NULL
) {
14588 * The DTrace module is loaded (obviously) but not attached;
14589 * we don't have any work to do.
14591 lck_mtx_unlock(&dtrace_provider_lock
);
14592 lck_mtx_unlock(&mod_lock
);
14593 lck_mtx_unlock(&dtrace_lock
);
14597 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
14598 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
14599 if (probe
->dtpr_ecb
!= NULL
) {
14600 lck_mtx_unlock(&dtrace_provider_lock
);
14601 lck_mtx_unlock(&mod_lock
);
14602 lck_mtx_unlock(&dtrace_lock
);
14605 * This shouldn't _actually_ be possible -- we're
14606 * unloading a module that has an enabled probe in it.
14607 * (It's normally up to the provider to make sure that
14608 * this can't happen.) However, because dtps_enable()
14609 * doesn't have a failure mode, there can be an
14610 * enable/unload race. Upshot: we don't want to
14611 * assert, but we're not going to disable the
14614 if (dtrace_err_verbose
) {
14615 cmn_err(CE_WARN
, "unloaded module '%s' had "
14616 "enabled probes", ctl
->mod_modname
);
14625 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
14626 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
14628 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
14630 next
= probe
->dtpr_nextmod
;
14631 dtrace_hash_remove(dtrace_bymod
, probe
);
14632 dtrace_hash_remove(dtrace_byfunc
, probe
);
14633 dtrace_hash_remove(dtrace_byname
, probe
);
14635 if (first
== NULL
) {
14637 probe
->dtpr_nextmod
= NULL
;
14639 probe
->dtpr_nextmod
= first
;
14645 * We've removed all of the module's probes from the hash chains and
14646 * from the probe array. Now issue a dtrace_sync() to be sure that
14647 * everyone has cleared out from any probe array processing.
14651 for (probe
= first
; probe
!= NULL
; probe
= first
) {
14652 first
= probe
->dtpr_nextmod
;
14653 prov
= probe
->dtpr_provider
;
14654 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
14656 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
14657 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
14658 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
14659 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
14660 #if !defined(__APPLE__)
14661 kmem_free(probe
, sizeof (dtrace_probe_t
));
14663 zfree(dtrace_probe_t_zone
, probe
);
14667 lck_mtx_unlock(&dtrace_lock
);
14668 lck_mtx_unlock(&mod_lock
);
14669 lck_mtx_unlock(&dtrace_provider_lock
);
14673 dtrace_suspend(void)
14675 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
));
14679 dtrace_resume(void)
14681 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
));
14685 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
)
14687 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14688 lck_mtx_lock(&dtrace_lock
);
14692 dtrace_state_t
*state
;
14693 dtrace_optval_t
*opt
, rs
, c
;
14696 * For now, we only allocate a new buffer for anonymous state.
14698 if ((state
= dtrace_anon
.dta_state
) == NULL
)
14701 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
14704 opt
= state
->dts_options
;
14705 c
= opt
[DTRACEOPT_CPU
];
14707 if (c
!= DTRACE_CPUALL
&& c
!= DTRACEOPT_UNSET
&& c
!= cpu
)
14711 * Regardless of what the actual policy is, we're going to
14712 * temporarily set our resize policy to be manual. We're
14713 * also going to temporarily set our CPU option to denote
14714 * the newly configured CPU.
14716 rs
= opt
[DTRACEOPT_BUFRESIZE
];
14717 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
;
14718 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
;
14720 (void) dtrace_state_buffers(state
);
14722 opt
[DTRACEOPT_BUFRESIZE
] = rs
;
14723 opt
[DTRACEOPT_CPU
] = c
;
14730 * We don't free the buffer in the CPU_UNCONFIG case. (The
14731 * buffer will be freed when the consumer exits.)
14739 lck_mtx_unlock(&dtrace_lock
);
14744 dtrace_cpu_setup_initial(processorid_t cpu
)
14746 (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
);
14750 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
)
14752 if (dtrace_toxranges
>= dtrace_toxranges_max
) {
14754 dtrace_toxrange_t
*range
;
14756 osize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14759 ASSERT(dtrace_toxrange
== NULL
);
14760 ASSERT(dtrace_toxranges_max
== 0);
14761 dtrace_toxranges_max
= 1;
14763 dtrace_toxranges_max
<<= 1;
14766 nsize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14767 range
= kmem_zalloc(nsize
, KM_SLEEP
);
14769 if (dtrace_toxrange
!= NULL
) {
14770 ASSERT(osize
!= 0);
14771 bcopy(dtrace_toxrange
, range
, osize
);
14772 kmem_free(dtrace_toxrange
, osize
);
14775 dtrace_toxrange
= range
;
14778 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base
== NULL
);
14779 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit
== NULL
);
14781 dtrace_toxrange
[dtrace_toxranges
].dtt_base
= base
;
14782 dtrace_toxrange
[dtrace_toxranges
].dtt_limit
= limit
;
14783 dtrace_toxranges
++;
14787 * DTrace Driver Cookbook Functions
14791 dtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
14793 dtrace_provider_id_t id
;
14794 dtrace_state_t
*state
= NULL
;
14795 dtrace_enabling_t
*enab
;
14797 lck_mtx_lock(&cpu_lock
);
14798 lck_mtx_lock(&dtrace_provider_lock
);
14799 lck_mtx_lock(&dtrace_lock
);
14801 if (ddi_soft_state_init(&dtrace_softstate
,
14802 sizeof (dtrace_state_t
), 0) != 0) {
14803 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state");
14804 lck_mtx_unlock(&cpu_lock
);
14805 lck_mtx_unlock(&dtrace_provider_lock
);
14806 lck_mtx_unlock(&dtrace_lock
);
14807 return (DDI_FAILURE
);
14810 #if !defined(__APPLE__)
14811 if (ddi_create_minor_node(devi
, DTRACEMNR_DTRACE
, S_IFCHR
,
14812 DTRACEMNRN_DTRACE
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
14813 ddi_create_minor_node(devi
, DTRACEMNR_HELPER
, S_IFCHR
,
14814 DTRACEMNRN_HELPER
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) {
14815 cmn_err(CE_NOTE
, "/dev/dtrace couldn't create minor nodes");
14816 ddi_remove_minor_node(devi
, NULL
);
14817 ddi_soft_state_fini(&dtrace_softstate
);
14818 lck_mtx_unlock(&cpu_lock
);
14819 lck_mtx_unlock(&dtrace_provider_lock
);
14820 lck_mtx_unlock(&dtrace_lock
);
14821 return (DDI_FAILURE
);
14823 #endif /* __APPLE__ */
14825 ddi_report_dev(devi
);
14826 dtrace_devi
= devi
;
14828 dtrace_modload
= dtrace_module_loaded
;
14829 dtrace_modunload
= dtrace_module_unloaded
;
14830 dtrace_cpu_init
= dtrace_cpu_setup_initial
;
14831 dtrace_helpers_cleanup
= dtrace_helpers_destroy
;
14832 dtrace_helpers_fork
= dtrace_helpers_duplicate
;
14833 dtrace_cpustart_init
= dtrace_suspend
;
14834 dtrace_cpustart_fini
= dtrace_resume
;
14835 dtrace_debugger_init
= dtrace_suspend
;
14836 dtrace_debugger_fini
= dtrace_resume
;
14837 dtrace_kreloc_init
= dtrace_suspend
;
14838 dtrace_kreloc_fini
= dtrace_resume
;
14840 register_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
14842 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14844 dtrace_arena
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1,
14845 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
14846 dtrace_minor
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
,
14847 UINT32_MAX
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
14848 VM_SLEEP
| VMC_IDENTIFIER
);
14849 dtrace_taskq
= taskq_create("dtrace_taskq", 1, maxclsyspri
,
14852 dtrace_state_cache
= kmem_cache_create("dtrace_state_cache",
14853 sizeof (dtrace_dstate_percpu_t
) * (int)NCPU
, DTRACE_STATE_ALIGN
,
14854 NULL
, NULL
, NULL
, NULL
, NULL
, 0);
14856 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14858 dtrace_bymod
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
),
14859 offsetof(dtrace_probe_t
, dtpr_nextmod
),
14860 offsetof(dtrace_probe_t
, dtpr_prevmod
));
14862 dtrace_byfunc
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
),
14863 offsetof(dtrace_probe_t
, dtpr_nextfunc
),
14864 offsetof(dtrace_probe_t
, dtpr_prevfunc
));
14866 dtrace_byname
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
),
14867 offsetof(dtrace_probe_t
, dtpr_nextname
),
14868 offsetof(dtrace_probe_t
, dtpr_prevname
));
14870 if (dtrace_retain_max
< 1) {
14871 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; "
14872 "setting to 1", dtrace_retain_max
);
14873 dtrace_retain_max
= 1;
14877 * Now discover our toxic ranges.
14879 dtrace_toxic_ranges(dtrace_toxrange_add
);
14882 * Before we register ourselves as a provider to our own framework,
14883 * we would like to assert that dtrace_provider is NULL -- but that's
14884 * not true if we were loaded as a dependency of a DTrace provider.
14885 * Once we've registered, we can assert that dtrace_provider is our
14888 (void) dtrace_register("dtrace", &dtrace_provider_attr
,
14889 DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
);
14891 ASSERT(dtrace_provider
!= NULL
);
14892 ASSERT((dtrace_provider_id_t
)dtrace_provider
== id
);
14894 #if !defined(__APPLE__)
14895 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14896 dtrace_provider
, NULL
, NULL
, "BEGIN", 0, NULL
);
14897 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14898 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14899 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14900 dtrace_provider
, NULL
, NULL
, "ERROR", 1, NULL
);
14901 #elif defined(__ppc__) || defined(__ppc64__)
14902 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14903 dtrace_provider
, NULL
, NULL
, "BEGIN", 2, NULL
);
14904 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14905 dtrace_provider
, NULL
, NULL
, "END", 1, NULL
);
14906 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14907 dtrace_provider
, NULL
, NULL
, "ERROR", 4, NULL
);
14908 #elif (defined(__i386__) || defined (__x86_64__))
14909 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14910 dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
);
14911 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14912 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14913 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14914 dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
);
14916 #error Unknown Architecture
14917 #endif /* __APPLE__ */
14919 dtrace_anon_property();
14920 lck_mtx_unlock(&cpu_lock
);
14923 * If DTrace helper tracing is enabled, we need to allocate the
14924 * trace buffer and initialize the values.
14926 if (dtrace_helptrace_enabled
) {
14927 ASSERT(dtrace_helptrace_buffer
== NULL
);
14928 dtrace_helptrace_buffer
=
14929 kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
);
14930 dtrace_helptrace_next
= 0;
14934 * If there are already providers, we must ask them to provide their
14935 * probes, and then match any anonymous enabling against them. Note
14936 * that there should be no other retained enablings at this time:
14937 * the only retained enablings at this time should be the anonymous
14940 if (dtrace_anon
.dta_enabling
!= NULL
) {
14941 ASSERT(dtrace_retained
== dtrace_anon
.dta_enabling
);
14943 dtrace_enabling_provide(NULL
);
14944 state
= dtrace_anon
.dta_state
;
14947 * We couldn't hold cpu_lock across the above call to
14948 * dtrace_enabling_provide(), but we must hold it to actually
14949 * enable the probes. We have to drop all of our locks, pick
14950 * up cpu_lock, and regain our locks before matching the
14951 * retained anonymous enabling.
14953 lck_mtx_unlock(&dtrace_lock
);
14954 lck_mtx_unlock(&dtrace_provider_lock
);
14956 lck_mtx_lock(&cpu_lock
);
14957 lck_mtx_lock(&dtrace_provider_lock
);
14958 lck_mtx_lock(&dtrace_lock
);
14960 if ((enab
= dtrace_anon
.dta_enabling
) != NULL
)
14961 (void) dtrace_enabling_match(enab
, NULL
);
14963 lck_mtx_unlock(&cpu_lock
);
14966 lck_mtx_unlock(&dtrace_lock
);
14967 lck_mtx_unlock(&dtrace_provider_lock
);
14969 if (state
!= NULL
) {
14971 * If we created any anonymous state, set it going now.
14973 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
);
14976 return (DDI_SUCCESS
);
14979 extern void fasttrap_init(void);
14983 dtrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
14985 #pragma unused(flag, otyp)
14986 dtrace_state_t
*state
;
14991 #if !defined(__APPLE__)
14992 if (getminor(*devp
) == DTRACEMNRN_HELPER
)
14996 * If this wasn't an open with the "helper" minor, then it must be
14997 * the "dtrace" minor.
14999 ASSERT(getminor(*devp
) == DTRACEMNRN_DTRACE
);
15001 /* Darwin puts Helper on its own major device. */
15002 #endif /* __APPLE__ */
15005 * If no DTRACE_PRIV_* bits are set in the credential, then the
15006 * caller lacks sufficient permission to do anything with DTrace.
15008 dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
);
15009 if (priv
== DTRACE_PRIV_NONE
)
15012 #if defined(__APPLE__)
15014 * We delay the initialization of fasttrap as late as possible.
15015 * It certainly can't be later than now!
15018 #endif /* __APPLE__ */
15021 * Ask all providers to provide all their probes.
15023 lck_mtx_lock(&dtrace_provider_lock
);
15024 dtrace_probe_provide(NULL
, NULL
);
15025 lck_mtx_unlock(&dtrace_provider_lock
);
15027 lck_mtx_lock(&cpu_lock
);
15028 lck_mtx_lock(&dtrace_lock
);
15030 dtrace_membar_producer();
15033 * If the kernel debugger is active (that is, if the kernel debugger
15034 * modified text in some way), we won't allow the open.
15036 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
15038 lck_mtx_unlock(&cpu_lock
);
15039 lck_mtx_unlock(&dtrace_lock
);
15043 state
= dtrace_state_create(devp
, cred_p
);
15044 lck_mtx_unlock(&cpu_lock
);
15046 if (state
== NULL
) {
15047 if (--dtrace_opens
== 0)
15048 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15049 lck_mtx_unlock(&dtrace_lock
);
15053 lck_mtx_unlock(&dtrace_lock
);
15055 #if defined(__APPLE__)
15056 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15059 * If we are currently lazy, transition states.
15061 * Unlike dtrace_close, we do not need to check the
15062 * value of dtrace_opens, as any positive value (and
15063 * we count as 1) means we transition states.
15065 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
) {
15066 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_OFF
;
15069 * Iterate all existing processes and load lazy dofs.
15071 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
,
15072 dtrace_lazy_dofs_proc_iterate_doit
,
15074 dtrace_lazy_dofs_proc_iterate_filter
,
15078 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15086 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
15088 #pragma unused(flag,otyp,cred_p)
15089 minor_t minor
= getminor(dev
);
15090 dtrace_state_t
*state
;
15092 #if !defined(__APPLE__)
15093 if (minor
== DTRACEMNRN_HELPER
)
15096 /* Darwin puts Helper on its own major device. */
15097 #endif /* __APPLE__ */
15099 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15101 lck_mtx_lock(&cpu_lock
);
15102 lck_mtx_lock(&dtrace_lock
);
15104 if (state
->dts_anon
) {
15106 * There is anonymous state. Destroy that first.
15108 ASSERT(dtrace_anon
.dta_state
== NULL
);
15109 dtrace_state_destroy(state
->dts_anon
);
15112 dtrace_state_destroy(state
);
15113 ASSERT(dtrace_opens
> 0);
15114 if (--dtrace_opens
== 0)
15115 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15117 lck_mtx_unlock(&dtrace_lock
);
15118 lck_mtx_unlock(&cpu_lock
);
15120 #if defined(__APPLE__)
15123 * Lock ordering requires the dof mode lock be taken before
15126 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15127 lck_mtx_lock(&dtrace_lock
);
15130 * If we are currently lazy-off, and this is the last close, transition to
15133 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
&& dtrace_opens
== 0) {
15134 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
15137 lck_mtx_unlock(&dtrace_lock
);
15138 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15144 #if defined(__APPLE__)
15146 * Introduce cast to quiet warnings.
15147 * XXX: This hides a lot of brokenness.
15149 #define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) )
15150 #define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) )
15151 #endif /* __APPLE__ */
15153 #if defined(__APPLE__)
15156 dtrace_ioctl_helper(int cmd
, caddr_t arg
, int *rv
)
15160 * Safe to check this outside the dof mode lock
15162 if (dtrace_dof_mode
== DTRACE_DOF_MODE_NEVER
)
15163 return KERN_SUCCESS
;
15166 case DTRACEHIOC_ADDDOF
: {
15167 dof_helper_t
*dhp
= NULL
;
15168 size_t dof_ioctl_data_size
;
15169 dof_ioctl_data_t
* multi_dof
;
15172 user_addr_t user_address
= *(user_addr_t
*)arg
;
15173 uint64_t dof_count
;
15174 int multi_dof_claimed
= 0;
15175 proc_t
* p
= current_proc();
15178 * Read the number of DOF sections being passed in.
15180 if (copyin(user_address
+ offsetof(dof_ioctl_data_t
, dofiod_count
),
15182 sizeof(dof_count
))) {
15183 dtrace_dof_error(NULL
, "failed to copyin dofiod_count");
15188 * Range check the count.
15190 if (dof_count
== 0 || dof_count
> 1024) {
15191 dtrace_dof_error(NULL
, "dofiod_count is not valid");
15196 * Allocate a correctly sized structure and copyin the data.
15198 dof_ioctl_data_size
= DOF_IOCTL_DATA_T_SIZE(dof_count
);
15199 if ((multi_dof
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)
15202 /* NOTE! We can no longer exit this method via return */
15203 if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) {
15204 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t");
15210 * Check that the count didn't change between the first copyin and the second.
15212 if (multi_dof
->dofiod_count
!= dof_count
) {
15218 * Try to process lazily first.
15220 rval
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
);
15223 * If rval is EACCES, we must be non-lazy.
15225 if (rval
== EACCES
) {
15228 * Process each dof_helper_t
15232 dhp
= &multi_dof
->dofiod_helpers
[i
];
15234 dof_hdr_t
*dof
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
);
15237 lck_mtx_lock(&dtrace_lock
);
15240 * dtrace_helper_slurp() takes responsibility for the dof --
15241 * it may free it now or it may save it and free it later.
15243 if ((dhp
->dofhp_dof
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) {
15247 lck_mtx_unlock(&dtrace_lock
);
15249 } while (++i
< multi_dof
->dofiod_count
&& rval
== 0);
15253 * We need to copyout the multi_dof struct, because it contains
15254 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
15256 * This could certainly be better optimized.
15258 if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) {
15259 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t");
15260 /* Don't overwrite pre-existing error code */
15261 if (rval
== 0) rval
= EFAULT
;
15266 * If we had to allocate struct memory, free it.
15268 if (multi_dof
!= NULL
&& !multi_dof_claimed
) {
15269 kmem_free(multi_dof
, dof_ioctl_data_size
);
15275 case DTRACEHIOC_REMOVE
: {
15276 int generation
= *(int*)arg
;
15277 proc_t
* p
= current_proc();
15282 int rval
= dtrace_lazy_dofs_remove(p
, generation
);
15285 * EACCES means non-lazy
15287 if (rval
== EACCES
) {
15288 lck_mtx_lock(&dtrace_lock
);
15289 rval
= dtrace_helper_destroygen(p
, generation
);
15290 lck_mtx_unlock(&dtrace_lock
);
15302 #endif /* __APPLE__ */
15306 dtrace_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
15310 minor_t minor
= getminor(dev
);
15311 dtrace_state_t
*state
;
15314 #if !defined(__APPLE__)
15315 if (minor
== DTRACEMNRN_HELPER
)
15316 return (dtrace_ioctl_helper(cmd
, arg
, rv
));
15318 /* Darwin puts Helper on its own major device. */
15319 #endif /* __APPLE__ */
15321 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15323 if (state
->dts_anon
) {
15324 ASSERT(dtrace_anon
.dta_state
== NULL
);
15325 state
= state
->dts_anon
;
15329 case DTRACEIOC_PROVIDER
: {
15330 dtrace_providerdesc_t pvd
;
15331 dtrace_provider_t
*pvp
;
15333 if (copyin((void *)arg
, &pvd
, sizeof (pvd
)) != 0)
15336 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
15337 lck_mtx_lock(&dtrace_provider_lock
);
15339 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
15340 if (strcmp(pvp
->dtpv_name
, pvd
.dtvd_name
) == 0)
15344 lck_mtx_unlock(&dtrace_provider_lock
);
15349 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
15350 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
15351 if (copyout(&pvd
, (void *)arg
, sizeof (pvd
)) != 0)
15357 case DTRACEIOC_EPROBE
: {
15358 dtrace_eprobedesc_t epdesc
;
15360 dtrace_action_t
*act
;
15366 if (copyin((void *)arg
, &epdesc
, sizeof (epdesc
)) != 0)
15369 lck_mtx_lock(&dtrace_lock
);
15371 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
15372 lck_mtx_unlock(&dtrace_lock
);
15376 if (ecb
->dte_probe
== NULL
) {
15377 lck_mtx_unlock(&dtrace_lock
);
15381 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
15382 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
15383 epdesc
.dtepd_size
= ecb
->dte_size
;
15385 nrecs
= epdesc
.dtepd_nrecs
;
15386 epdesc
.dtepd_nrecs
= 0;
15387 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15388 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15391 epdesc
.dtepd_nrecs
++;
15395 * Now that we have the size, we need to allocate a temporary
15396 * buffer in which to store the complete description. We need
15397 * the temporary buffer to be able to drop dtrace_lock()
15398 * across the copyout(), below.
15400 size
= sizeof (dtrace_eprobedesc_t
) +
15401 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
15403 buf
= kmem_alloc(size
, KM_SLEEP
);
15404 dest
= (uintptr_t)buf
;
15406 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
15407 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
15409 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15410 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15416 bcopy(&act
->dta_rec
, (void *)dest
,
15417 sizeof (dtrace_recdesc_t
));
15418 dest
+= sizeof (dtrace_recdesc_t
);
15421 lck_mtx_unlock(&dtrace_lock
);
15423 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15424 kmem_free(buf
, size
);
15428 kmem_free(buf
, size
);
15432 case DTRACEIOC_AGGDESC
: {
15433 dtrace_aggdesc_t aggdesc
;
15434 dtrace_action_t
*act
;
15435 dtrace_aggregation_t
*agg
;
15438 dtrace_recdesc_t
*lrec
;
15443 if (copyin((void *)arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
15446 lck_mtx_lock(&dtrace_lock
);
15448 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
15449 lck_mtx_unlock(&dtrace_lock
);
15453 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
15455 nrecs
= aggdesc
.dtagd_nrecs
;
15456 aggdesc
.dtagd_nrecs
= 0;
15458 offs
= agg
->dtag_base
;
15459 lrec
= &agg
->dtag_action
.dta_rec
;
15460 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
15462 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15463 ASSERT(act
->dta_intuple
||
15464 DTRACEACT_ISAGG(act
->dta_kind
));
15467 * If this action has a record size of zero, it
15468 * denotes an argument to the aggregating action.
15469 * Because the presence of this record doesn't (or
15470 * shouldn't) affect the way the data is interpreted,
15471 * we don't copy it out to save user-level the
15472 * confusion of dealing with a zero-length record.
15474 if (act
->dta_rec
.dtrd_size
== 0) {
15475 ASSERT(agg
->dtag_hasarg
);
15479 aggdesc
.dtagd_nrecs
++;
15481 if (act
== &agg
->dtag_action
)
15486 * Now that we have the size, we need to allocate a temporary
15487 * buffer in which to store the complete description. We need
15488 * the temporary buffer to be able to drop dtrace_lock()
15489 * across the copyout(), below.
15491 size
= sizeof (dtrace_aggdesc_t
) +
15492 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
15494 buf
= kmem_alloc(size
, KM_SLEEP
);
15495 dest
= (uintptr_t)buf
;
15497 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
15498 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
15500 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15501 dtrace_recdesc_t rec
= act
->dta_rec
;
15504 * See the comment in the above loop for why we pass
15505 * over zero-length records.
15507 if (rec
.dtrd_size
== 0) {
15508 ASSERT(agg
->dtag_hasarg
);
15515 rec
.dtrd_offset
-= offs
;
15516 bcopy(&rec
, (void *)dest
, sizeof (rec
));
15517 dest
+= sizeof (dtrace_recdesc_t
);
15519 if (act
== &agg
->dtag_action
)
15523 lck_mtx_unlock(&dtrace_lock
);
15525 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15526 kmem_free(buf
, size
);
15530 kmem_free(buf
, size
);
15534 case DTRACEIOC_ENABLE
: {
15536 dtrace_enabling_t
*enab
= NULL
;
15537 dtrace_vstate_t
*vstate
;
15543 * If a NULL argument has been passed, we take this as our
15544 * cue to reevaluate our enablings.
15547 lck_mtx_lock(&cpu_lock
);
15548 lck_mtx_lock(&dtrace_lock
);
15549 err
= dtrace_enabling_matchstate(state
, rv
);
15550 lck_mtx_unlock(&dtrace_lock
);
15551 lck_mtx_unlock(&cpu_lock
);
15556 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
15559 lck_mtx_lock(&cpu_lock
);
15560 lck_mtx_lock(&dtrace_lock
);
15561 vstate
= &state
->dts_vstate
;
15563 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
15564 lck_mtx_unlock(&dtrace_lock
);
15565 lck_mtx_unlock(&cpu_lock
);
15566 dtrace_dof_destroy(dof
);
15570 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
15571 lck_mtx_unlock(&dtrace_lock
);
15572 lck_mtx_unlock(&cpu_lock
);
15573 dtrace_dof_destroy(dof
);
15577 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
15578 dtrace_enabling_destroy(enab
);
15579 lck_mtx_unlock(&dtrace_lock
);
15580 lck_mtx_unlock(&cpu_lock
);
15581 dtrace_dof_destroy(dof
);
15585 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
15586 err
= dtrace_enabling_retain(enab
);
15588 dtrace_enabling_destroy(enab
);
15591 lck_mtx_unlock(&cpu_lock
);
15592 lck_mtx_unlock(&dtrace_lock
);
15593 dtrace_dof_destroy(dof
);
15598 case DTRACEIOC_REPLICATE
: {
15599 dtrace_repldesc_t desc
;
15600 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
15601 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
15604 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15607 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15608 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15609 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15610 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15612 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15613 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15614 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15615 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15617 lck_mtx_lock(&dtrace_lock
);
15618 err
= dtrace_enabling_replicate(state
, match
, create
);
15619 lck_mtx_unlock(&dtrace_lock
);
15624 case DTRACEIOC_PROBEMATCH
:
15625 case DTRACEIOC_PROBES
: {
15626 dtrace_probe_t
*probe
= NULL
;
15627 dtrace_probedesc_t desc
;
15628 dtrace_probekey_t pkey
;
15635 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15638 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15639 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15640 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15641 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15644 * Before we attempt to match this probe, we want to give
15645 * all providers the opportunity to provide it.
15647 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
15648 lck_mtx_lock(&dtrace_provider_lock
);
15649 dtrace_probe_provide(&desc
, NULL
);
15650 lck_mtx_unlock(&dtrace_provider_lock
);
15654 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15655 dtrace_probekey(&desc
, &pkey
);
15656 pkey
.dtpk_id
= DTRACE_IDNONE
;
15659 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
15661 lck_mtx_lock(&dtrace_lock
);
15663 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15664 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15665 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15666 (m
= dtrace_match_probe(probe
, &pkey
,
15667 priv
, uid
, zoneid
)) != 0)
15672 lck_mtx_unlock(&dtrace_lock
);
15677 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15678 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15679 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
15684 if (probe
== NULL
) {
15685 lck_mtx_unlock(&dtrace_lock
);
15689 dtrace_probe_description(probe
, &desc
);
15690 lck_mtx_unlock(&dtrace_lock
);
15692 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15698 case DTRACEIOC_PROBEARG
: {
15699 dtrace_argdesc_t desc
;
15700 dtrace_probe_t
*probe
;
15701 dtrace_provider_t
*prov
;
15703 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15706 if (desc
.dtargd_id
== DTRACE_IDNONE
)
15709 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
15712 lck_mtx_lock(&dtrace_provider_lock
);
15713 lck_mtx_lock(&mod_lock
);
15714 lck_mtx_lock(&dtrace_lock
);
15716 if (desc
.dtargd_id
> dtrace_nprobes
) {
15717 lck_mtx_unlock(&dtrace_lock
);
15718 lck_mtx_unlock(&mod_lock
);
15719 lck_mtx_unlock(&dtrace_provider_lock
);
15723 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
15724 lck_mtx_unlock(&dtrace_lock
);
15725 lck_mtx_unlock(&mod_lock
);
15726 lck_mtx_unlock(&dtrace_provider_lock
);
15730 lck_mtx_unlock(&dtrace_lock
);
15732 prov
= probe
->dtpr_provider
;
15734 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
15736 * There isn't any typed information for this probe.
15737 * Set the argument number to DTRACE_ARGNONE.
15739 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
15741 desc
.dtargd_native
[0] = '\0';
15742 desc
.dtargd_xlate
[0] = '\0';
15743 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
15745 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
15746 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
15749 lck_mtx_unlock(&mod_lock
);
15750 lck_mtx_unlock(&dtrace_provider_lock
);
15752 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15758 case DTRACEIOC_GO
: {
15759 processorid_t cpuid
;
15760 rval
= dtrace_state_go(state
, &cpuid
);
15765 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15771 case DTRACEIOC_STOP
: {
15772 processorid_t cpuid
;
15774 lck_mtx_lock(&dtrace_lock
);
15775 rval
= dtrace_state_stop(state
, &cpuid
);
15776 lck_mtx_unlock(&dtrace_lock
);
15781 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15787 case DTRACEIOC_DOFGET
: {
15788 dof_hdr_t hdr
, *dof
;
15791 if (copyin((void *)arg
, &hdr
, sizeof (hdr
)) != 0)
15794 lck_mtx_lock(&dtrace_lock
);
15795 dof
= dtrace_dof_create(state
);
15796 lck_mtx_unlock(&dtrace_lock
);
15798 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
15799 rval
= copyout(dof
, (void *)arg
, len
);
15800 dtrace_dof_destroy(dof
);
15802 return (rval
== 0 ? 0 : EFAULT
);
15805 case DTRACEIOC_AGGSNAP
:
15806 case DTRACEIOC_BUFSNAP
: {
15807 dtrace_bufdesc_t desc
;
15809 dtrace_buffer_t
*buf
;
15811 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15814 if (desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= (int)NCPU
)
15817 lck_mtx_lock(&dtrace_lock
);
15819 if (cmd
== DTRACEIOC_BUFSNAP
) {
15820 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
15822 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
15825 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
15826 size_t sz
= buf
->dtb_offset
;
15828 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
15829 lck_mtx_unlock(&dtrace_lock
);
15834 * If this buffer has already been consumed, we're
15835 * going to indicate that there's nothing left here
15838 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
15839 lck_mtx_unlock(&dtrace_lock
);
15841 desc
.dtbd_size
= 0;
15842 desc
.dtbd_drops
= 0;
15843 desc
.dtbd_errors
= 0;
15844 desc
.dtbd_oldest
= 0;
15845 sz
= sizeof (desc
);
15847 if (copyout(&desc
, (void *)arg
, sz
) != 0)
15854 * If this is a ring buffer that has wrapped, we want
15855 * to copy the whole thing out.
15857 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
15858 dtrace_buffer_polish(buf
);
15859 sz
= buf
->dtb_size
;
15862 if (copyout(buf
->dtb_tomax
, desc
.dtbd_data
, sz
) != 0) {
15863 lck_mtx_unlock(&dtrace_lock
);
15867 desc
.dtbd_size
= sz
;
15868 desc
.dtbd_drops
= buf
->dtb_drops
;
15869 desc
.dtbd_errors
= buf
->dtb_errors
;
15870 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
15872 lck_mtx_unlock(&dtrace_lock
);
15874 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15877 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
15882 if (buf
->dtb_tomax
== NULL
) {
15883 ASSERT(buf
->dtb_xamot
== NULL
);
15884 lck_mtx_unlock(&dtrace_lock
);
15888 cached
= buf
->dtb_tomax
;
15889 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
15891 dtrace_xcall(desc
.dtbd_cpu
,
15892 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
15894 state
->dts_errors
+= buf
->dtb_xamot_errors
;
15897 * If the buffers did not actually switch, then the cross call
15898 * did not take place -- presumably because the given CPU is
15899 * not in the ready set. If this is the case, we'll return
15902 if (buf
->dtb_tomax
== cached
) {
15903 ASSERT(buf
->dtb_xamot
!= cached
);
15904 lck_mtx_unlock(&dtrace_lock
);
15908 ASSERT(cached
== buf
->dtb_xamot
);
15911 * We have our snapshot; now copy it out.
15913 if (copyout(buf
->dtb_xamot
, desc
.dtbd_data
,
15914 buf
->dtb_xamot_offset
) != 0) {
15915 lck_mtx_unlock(&dtrace_lock
);
15919 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
15920 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
15921 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
15922 desc
.dtbd_oldest
= 0;
15924 lck_mtx_unlock(&dtrace_lock
);
15927 * Finally, copy out the buffer description.
15929 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15935 case DTRACEIOC_CONF
: {
15936 dtrace_conf_t conf
;
15938 bzero(&conf
, sizeof (conf
));
15939 conf
.dtc_difversion
= DIF_VERSION
;
15940 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
15941 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
15942 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
15944 if (copyout(&conf
, (void *)arg
, sizeof (conf
)) != 0)
15950 case DTRACEIOC_STATUS
: {
15951 dtrace_status_t stat
;
15952 dtrace_dstate_t
*dstate
;
15957 * See the comment in dtrace_state_deadman() for the reason
15958 * for setting dts_laststatus to INT64_MAX before setting
15959 * it to the correct value.
15961 state
->dts_laststatus
= INT64_MAX
;
15962 dtrace_membar_producer();
15963 state
->dts_laststatus
= dtrace_gethrtime();
15965 bzero(&stat
, sizeof (stat
));
15967 lck_mtx_lock(&dtrace_lock
);
15969 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
15970 lck_mtx_unlock(&dtrace_lock
);
15974 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
15975 stat
.dtst_exiting
= 1;
15977 nerrs
= state
->dts_errors
;
15978 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
15980 for (i
= 0; i
< (int)NCPU
; i
++) {
15981 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
15983 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
15984 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
15985 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
15987 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
15988 stat
.dtst_filled
++;
15990 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
15992 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
15993 dtrace_speculation_t
*spec
;
15994 dtrace_buffer_t
*buf
;
15996 spec
= &state
->dts_speculations
[j
];
15997 buf
= &spec
->dtsp_buffer
[i
];
15998 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
16002 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
16003 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
16004 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
16005 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
16007 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
16008 stat
.dtst_errors
= nerrs
;
16010 lck_mtx_unlock(&dtrace_lock
);
16012 if (copyout(&stat
, (void *)arg
, sizeof (stat
)) != 0)
16018 case DTRACEIOC_FORMAT
: {
16019 dtrace_fmtdesc_t fmt
;
16023 if (copyin((void *)arg
, &fmt
, sizeof (fmt
)) != 0)
16026 lck_mtx_lock(&dtrace_lock
);
16028 if (fmt
.dtfd_format
== 0 ||
16029 fmt
.dtfd_format
> state
->dts_nformats
) {
16030 lck_mtx_unlock(&dtrace_lock
);
16035 * Format strings are allocated contiguously and they are
16036 * never freed; if a format index is less than the number
16037 * of formats, we can assert that the format map is non-NULL
16038 * and that the format for the specified index is non-NULL.
16040 ASSERT(state
->dts_formats
!= NULL
);
16041 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
16042 ASSERT(str
!= NULL
);
16044 len
= strlen(str
) + 1;
16046 if (len
> fmt
.dtfd_length
) {
16047 fmt
.dtfd_length
= len
;
16049 if (copyout(&fmt
, (void *)arg
, sizeof (fmt
)) != 0) {
16050 lck_mtx_unlock(&dtrace_lock
);
16054 if (copyout(str
, fmt
.dtfd_string
, len
) != 0) {
16055 lck_mtx_unlock(&dtrace_lock
);
16060 lck_mtx_unlock(&dtrace_lock
);
16071 #if defined(__APPLE__)
16074 #endif /* __APPLE__ */
16076 #if !defined(__APPLE__)
16079 dtrace_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
16081 dtrace_state_t
*state
;
16088 return (DDI_SUCCESS
);
16091 return (DDI_FAILURE
);
16094 lck_mtx_lock(&cpu_lock
);
16095 lck_mtx_lock(&dtrace_provider_lock
);
16096 lck_mtx_lock(&dtrace_lock
);
16098 ASSERT(dtrace_opens
== 0);
16100 if (dtrace_helpers
> 0) {
16101 lck_mtx_unlock(&dtrace_provider_lock
);
16102 lck_mtx_unlock(&dtrace_lock
);
16103 lck_mtx_unlock(&cpu_lock
);
16104 return (DDI_FAILURE
);
16107 if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) {
16108 lck_mtx_unlock(&dtrace_provider_lock
);
16109 lck_mtx_unlock(&dtrace_lock
);
16110 lck_mtx_unlock(&cpu_lock
);
16111 return (DDI_FAILURE
);
16114 dtrace_provider
= NULL
;
16116 if ((state
= dtrace_anon_grab()) != NULL
) {
16118 * If there were ECBs on this state, the provider should
16119 * have not been allowed to detach; assert that there is
16122 ASSERT(state
->dts_necbs
== 0);
16123 dtrace_state_destroy(state
);
16126 * If we're being detached with anonymous state, we need to
16127 * indicate to the kernel debugger that DTrace is now inactive.
16129 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
16132 bzero(&dtrace_anon
, sizeof (dtrace_anon_t
));
16133 unregister_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
16134 dtrace_cpu_init
= NULL
;
16135 dtrace_helpers_cleanup
= NULL
;
16136 dtrace_helpers_fork
= NULL
;
16137 dtrace_cpustart_init
= NULL
;
16138 dtrace_cpustart_fini
= NULL
;
16139 dtrace_debugger_init
= NULL
;
16140 dtrace_debugger_fini
= NULL
;
16141 dtrace_kreloc_init
= NULL
;
16142 dtrace_kreloc_fini
= NULL
;
16143 dtrace_modload
= NULL
;
16144 dtrace_modunload
= NULL
;
16146 lck_mtx_unlock(&cpu_lock
);
16148 if (dtrace_helptrace_enabled
) {
16149 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
);
16150 dtrace_helptrace_buffer
= NULL
;
16153 kmem_free(dtrace_probes
, dtrace_nprobes
* sizeof (dtrace_probe_t
*));
16154 dtrace_probes
= NULL
;
16155 dtrace_nprobes
= 0;
16157 dtrace_hash_destroy(dtrace_bymod
);
16158 dtrace_hash_destroy(dtrace_byfunc
);
16159 dtrace_hash_destroy(dtrace_byname
);
16160 dtrace_bymod
= NULL
;
16161 dtrace_byfunc
= NULL
;
16162 dtrace_byname
= NULL
;
16164 kmem_cache_destroy(dtrace_state_cache
);
16165 vmem_destroy(dtrace_minor
);
16166 vmem_destroy(dtrace_arena
);
16168 if (dtrace_toxrange
!= NULL
) {
16169 kmem_free(dtrace_toxrange
,
16170 dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
));
16171 dtrace_toxrange
= NULL
;
16172 dtrace_toxranges
= 0;
16173 dtrace_toxranges_max
= 0;
16176 ddi_remove_minor_node(dtrace_devi
, NULL
);
16177 dtrace_devi
= NULL
;
16179 ddi_soft_state_fini(&dtrace_softstate
);
16181 ASSERT(dtrace_vtime_references
== 0);
16182 ASSERT(dtrace_opens
== 0);
16183 ASSERT(dtrace_retained
== NULL
);
16185 lck_mtx_unlock(&dtrace_lock
);
16186 lck_mtx_unlock(&dtrace_provider_lock
);
16189 * We don't destroy the task queue until after we have dropped our
16190 * locks (taskq_destroy() may block on running tasks). To prevent
16191 * attempting to do work after we have effectively detached but before
16192 * the task queue has been destroyed, all tasks dispatched via the
16193 * task queue must check that DTrace is still attached before
16194 * performing any operation.
16196 taskq_destroy(dtrace_taskq
);
16197 dtrace_taskq
= NULL
;
16199 return (DDI_SUCCESS
);
16204 dtrace_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
16209 case DDI_INFO_DEVT2DEVINFO
:
16210 *result
= (void *)dtrace_devi
;
16211 error
= DDI_SUCCESS
;
16213 case DDI_INFO_DEVT2INSTANCE
:
16214 *result
= (void *)0;
16215 error
= DDI_SUCCESS
;
16218 error
= DDI_FAILURE
;
16223 static struct cb_ops dtrace_cb_ops
= {
16224 dtrace_open
, /* open */
16225 dtrace_close
, /* close */
16226 nulldev
, /* strategy */
16227 nulldev
, /* print */
16231 dtrace_ioctl
, /* ioctl */
16232 nodev
, /* devmap */
16234 nodev
, /* segmap */
16235 nochpoll
, /* poll */
16236 ddi_prop_op
, /* cb_prop_op */
16238 D_NEW
| D_MP
/* Driver compatibility flag */
16241 static struct dev_ops dtrace_ops
= {
16242 DEVO_REV
, /* devo_rev */
16244 dtrace_info
, /* get_dev_info */
16245 nulldev
, /* identify */
16246 nulldev
, /* probe */
16247 dtrace_attach
, /* attach */
16248 dtrace_detach
, /* detach */
16250 &dtrace_cb_ops
, /* driver operations */
16251 NULL
, /* bus operations */
16252 nodev
/* dev power */
16255 static struct modldrv modldrv
= {
16256 &mod_driverops
, /* module type (this is a pseudo driver) */
16257 "Dynamic Tracing", /* name of module */
16258 &dtrace_ops
, /* driver ops */
16261 static struct modlinkage modlinkage
= {
16270 return (mod_install(&modlinkage
));
16274 _info(struct modinfo
*modinfop
)
16276 return (mod_info(&modlinkage
, modinfop
));
16282 return (mod_remove(&modlinkage
));
16286 d_open_t _dtrace_open
, helper_open
;
16287 d_close_t _dtrace_close
, helper_close
;
16288 d_ioctl_t _dtrace_ioctl
, helper_ioctl
;
16291 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16294 dev_t locdev
= dev
;
16296 return dtrace_open( &locdev
, flags
, devtype
, CRED());
16300 helper_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16302 #pragma unused(dev,flags,devtype,p)
16307 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16310 return dtrace_close( dev
, flags
, devtype
, CRED());
16314 helper_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16316 #pragma unused(dev,flags,devtype,p)
16321 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
16326 err
= dtrace_ioctl(dev
, (int)cmd
, *(intptr_t *)data
, fflag
, CRED(), &rv
);
16328 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
16330 ASSERT( (err
& 0xfffff000) == 0 );
16331 return (err
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */
16332 } else if (rv
!= 0) {
16333 ASSERT( (rv
& 0xfff00000) == 0 );
16334 return (((rv
& 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */
16340 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
16342 #pragma unused(dev,fflag,p)
16345 err
= dtrace_ioctl_helper((int)cmd
, data
, &rv
);
16346 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
16348 ASSERT( (err
& 0xfffff000) == 0 );
16349 return (err
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */
16350 } else if (rv
!= 0) {
16351 ASSERT( (rv
& 0xfff00000) == 0 );
16352 return (((rv
& 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */
16357 #define HELPER_MAJOR -24 /* let the kernel pick the device number */
16360 * A struct describing which functions will get invoked for certain
16363 static struct cdevsw helper_cdevsw
=
16365 helper_open
, /* open */
16366 helper_close
, /* close */
16367 eno_rdwrt
, /* read */
16368 eno_rdwrt
, /* write */
16369 helper_ioctl
, /* ioctl */
16370 (stop_fcn_t
*)nulldev
, /* stop */
16371 (reset_fcn_t
*)nulldev
, /* reset */
16373 eno_select
, /* select */
16374 eno_mmap
, /* mmap */
16375 eno_strat
, /* strategy */
16376 eno_getc
, /* getc */
16377 eno_putc
, /* putc */
16381 static int helper_majdevno
= 0;
16383 static int gDTraceInited
= 0;
16386 helper_init( void )
16389 * Once the "helper" is initialized, it can take ioctl calls that use locks
16390 * and zones initialized in dtrace_init. Make certain dtrace_init was called
16394 if (!gDTraceInited
) {
16395 panic("helper_init before dtrace_init\n");
16398 if (0 >= helper_majdevno
)
16400 helper_majdevno
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
);
16402 if (helper_majdevno
< 0) {
16403 printf("helper_init: failed to allocate a major number!\n");
16407 if (NULL
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
16408 DTRACEMNR_HELPER
, 0 )) {
16409 printf("dtrace_init: failed to devfs_make_node for helper!\n");
16413 panic("helper_init: called twice!\n");
16416 #undef HELPER_MAJOR
16419 * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected.
16422 dtrace_clone_func(dev_t dev
, int action
)
16424 #pragma unused(dev)
16426 if (action
== DEVFS_CLONE_ALLOC
) {
16427 if (NULL
== dtrace_minor
) /* Arena not created yet!?! */
16431 * Propose a minor number, namely the next number that vmem_alloc() will return.
16432 * Immediately put it back in play by calling vmem_free().
16434 int ret
= (int)(uintptr_t)vmem_alloc(dtrace_minor
, 1, VM_BESTFIT
| VM_SLEEP
);
16436 vmem_free(dtrace_minor
, (void *)(uintptr_t)ret
, 1);
16441 else if (action
== DEVFS_CLONE_FREE
) {
16447 #define DTRACE_MAJOR -24 /* let the kernel pick the device number */
16449 static struct cdevsw dtrace_cdevsw
=
16451 _dtrace_open
, /* open */
16452 _dtrace_close
, /* close */
16453 eno_rdwrt
, /* read */
16454 eno_rdwrt
, /* write */
16455 _dtrace_ioctl
, /* ioctl */
16456 (stop_fcn_t
*)nulldev
, /* stop */
16457 (reset_fcn_t
*)nulldev
, /* reset */
16459 eno_select
, /* select */
16460 eno_mmap
, /* mmap */
16461 eno_strat
, /* strategy */
16462 eno_getc
, /* getc */
16463 eno_putc
, /* putc */
16467 lck_attr_t
* dtrace_lck_attr
;
16468 lck_grp_attr_t
* dtrace_lck_grp_attr
;
16469 lck_grp_t
* dtrace_lck_grp
;
16471 static int gMajDevNo
;
16474 dtrace_init( void )
16476 if (0 == gDTraceInited
) {
16477 int i
, ncpu
= NCPU
;
16479 gMajDevNo
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
);
16481 if (gMajDevNo
< 0) {
16482 printf("dtrace_init: failed to allocate a major number!\n");
16487 if (NULL
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
16488 dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) {
16489 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
16494 #if defined(DTRACE_MEMORY_ZONES)
16497 * Initialize the dtrace kalloc-emulation zones.
16499 dtrace_alloc_init();
16501 #endif /* DTRACE_MEMORY_ZONES */
16504 * Allocate the dtrace_probe_t zone
16506 dtrace_probe_t_zone
= zinit(sizeof(dtrace_probe_t
),
16507 1024 * sizeof(dtrace_probe_t
),
16508 sizeof(dtrace_probe_t
),
16509 "dtrace.dtrace_probe_t");
16512 * Create the dtrace lock group and attrs.
16514 dtrace_lck_attr
= lck_attr_alloc_init();
16515 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();
16516 dtrace_lck_grp
= lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr
);
16519 * We have to initialize all locks explicitly
16521 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16522 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16523 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16525 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
);
16527 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16530 * The cpu_core structure consists of per-CPU state available in any context.
16531 * On some architectures, this may mean that the page(s) containing the
16532 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
16533 * is up to the platform to assure that this is performed properly. Note that
16534 * the structure is sized to avoid false sharing.
16536 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16537 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16539 cpu_core
= (cpu_core_t
*)kmem_zalloc( ncpu
* sizeof(cpu_core_t
), KM_SLEEP
);
16540 for (i
= 0; i
< ncpu
; ++i
) {
16541 lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16544 cpu_list
= (cpu_t
*)kmem_zalloc( ncpu
* sizeof(cpu_t
), KM_SLEEP
);
16545 for (i
= 0; i
< ncpu
; ++i
) {
16546 cpu_list
[i
].cpu_id
= (processorid_t
)i
;
16547 cpu_list
[i
].cpu_next
= &(cpu_list
[(i
+1) % ncpu
]);
16548 lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16551 lck_mtx_lock(&cpu_lock
);
16552 for (i
= 0; i
< ncpu
; ++i
)
16553 dtrace_cpu_setup_initial( (processorid_t
)i
); /* In lieu of register_cpu_setup_func() callback */
16554 lck_mtx_unlock(&cpu_lock
);
16556 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
16559 * See dtrace_impl.h for a description of dof modes.
16560 * The default is lazy dof.
16562 * XXX Warn if state is LAZY_OFF? It won't break anything, but
16563 * makes no sense...
16565 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode
, sizeof (dtrace_dof_mode
))) {
16566 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
16570 * Sanity check of dof mode value.
16572 switch (dtrace_dof_mode
) {
16573 case DTRACE_DOF_MODE_NEVER
:
16574 case DTRACE_DOF_MODE_LAZY_ON
:
16575 /* valid modes, but nothing else we need to do */
16578 case DTRACE_DOF_MODE_LAZY_OFF
:
16579 case DTRACE_DOF_MODE_NON_LAZY
:
16580 /* Cannot wait for a dtrace_open to init fasttrap */
16585 /* Invalid, clamp to non lazy */
16586 dtrace_dof_mode
= DTRACE_DOF_MODE_NON_LAZY
;
16594 panic("dtrace_init: called twice!\n");
16598 dtrace_postinit(void)
16600 dtrace_attach( (dev_info_t
*)makedev(gMajDevNo
, 0), 0 );
16602 #undef DTRACE_MAJOR
16605 * Routines used to register interest in cpu's being added to or removed
16609 register_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
16611 #pragma unused(ignore1,ignore2)
16615 unregister_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
16617 #pragma unused(ignore1,ignore2)
16619 #endif /* __APPLE__ */