4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */
30 * DTrace - Dynamic Tracing for Solaris
32 * This is the implementation of the Solaris Dynamic Tracing framework
33 * (DTrace). The user-visible interface to DTrace is described at length in
34 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
35 * library, the in-kernel DTrace framework, and the DTrace providers are
36 * described in the block comments in the <sys/dtrace.h> header file. The
37 * internal architecture of DTrace is described in the block comments in the
38 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
39 * implementation very much assume mastery of all of these sources; if one has
40 * an unanswered question about the implementation, one should consult them
43 * The functions here are ordered roughly as follows:
45 * - Probe context functions
46 * - Probe hashing functions
47 * - Non-probe context utility functions
48 * - Matching functions
49 * - Provider-to-Framework API functions
50 * - Probe management functions
51 * - DIF object functions
53 * - Predicate functions
56 * - Enabling functions
58 * - Anonymous enabling functions
59 * - Consumer state functions
62 * - Driver cookbook functions
64 * Each group of functions begins with a block comment labelled the "DTrace
65 * [Group] Functions", allowing one to find each block by searching forward
66 * on capital-f functions.
69 #define _DTRACE_WANT_PROC_GLUE_ 1
71 #include <sys/errno.h>
72 #include <sys/types.h>
75 #include <sys/systm.h>
76 #include <sys/dtrace_impl.h>
77 #include <sys/param.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <miscfs/devfs/devfs.h>
81 #include <sys/malloc.h>
82 #include <sys/kernel_types.h>
83 #include <sys/proc_internal.h>
84 #include <sys/uio_internal.h>
85 #include <sys/kauth.h>
88 #include <mach/exception_types.h>
89 #include <sys/signalvar.h>
90 #include <kern/zalloc.h>
92 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
94 extern void dtrace_suspend(void);
95 extern void dtrace_resume(void);
96 extern void dtrace_init(void);
97 extern void helper_init(void);
99 #if defined(__APPLE__)
101 #include "../../../osfmk/chud/chud_dtrace.h"
103 extern kern_return_t chudxnu_dtrace_callback
104 (uint64_t selector
, uint64_t *args
, uint32_t count
);
108 * DTrace Tunable Variables
110 * The following variables may be tuned by adding a line to /etc/system that
111 * includes both the name of the DTrace module ("dtrace") and the name of the
112 * variable. For example:
114 * set dtrace:dtrace_destructive_disallow = 1
116 * In general, the only variables that one should be tuning this way are those
117 * that affect system-wide DTrace behavior, and for which the default behavior
118 * is undesirable. Most of these variables are tunable on a per-consumer
119 * basis using DTrace options, and need not be tuned on a system-wide basis.
120 * When tuning these variables, avoid pathological values; while some attempt
121 * is made to verify the integrity of these variables, they are not considered
122 * part of the supported interface to DTrace, and they are therefore not
123 * checked comprehensively. Further, these variables should not be tuned
124 * dynamically via "mdb -kw" or other means; they should only be tuned via
127 int dtrace_destructive_disallow
= 0;
128 #if defined(__APPLE__)
129 #define proc_t struct proc
130 #endif /* __APPLE__ */
131 dtrace_optval_t dtrace_nonroot_maxsize
= (16 * 1024 * 1024);
132 size_t dtrace_difo_maxsize
= (256 * 1024);
133 dtrace_optval_t dtrace_dof_maxsize
= (256 * 1024);
134 size_t dtrace_global_maxsize
= (16 * 1024);
135 size_t dtrace_actions_max
= (16 * 1024);
136 size_t dtrace_retain_max
= 1024;
137 dtrace_optval_t dtrace_helper_actions_max
= 32;
138 dtrace_optval_t dtrace_helper_providers_max
= 32;
139 dtrace_optval_t dtrace_dstate_defsize
= (1 * 1024 * 1024);
140 size_t dtrace_strsize_default
= 256;
141 dtrace_optval_t dtrace_cleanrate_default
= 9900990; /* 101 hz */
142 dtrace_optval_t dtrace_cleanrate_min
= 200000; /* 5000 hz */
143 dtrace_optval_t dtrace_cleanrate_max
= (uint64_t)60 * NANOSEC
; /* 1/minute */
144 dtrace_optval_t dtrace_aggrate_default
= NANOSEC
; /* 1 hz */
145 dtrace_optval_t dtrace_statusrate_default
= NANOSEC
; /* 1 hz */
146 dtrace_optval_t dtrace_statusrate_max
= (hrtime_t
)10 * NANOSEC
; /* 6/minute */
147 dtrace_optval_t dtrace_switchrate_default
= NANOSEC
; /* 1 hz */
148 dtrace_optval_t dtrace_nspec_default
= 1;
149 dtrace_optval_t dtrace_specsize_default
= 32 * 1024;
150 dtrace_optval_t dtrace_stackframes_default
= 20;
151 dtrace_optval_t dtrace_ustackframes_default
= 20;
152 dtrace_optval_t dtrace_jstackframes_default
= 50;
153 dtrace_optval_t dtrace_jstackstrsize_default
= 512;
154 int dtrace_msgdsize_max
= 128;
155 hrtime_t dtrace_chill_max
= 500 * (NANOSEC
/ MILLISEC
); /* 500 ms */
156 hrtime_t dtrace_chill_interval
= NANOSEC
; /* 1000 ms */
157 int dtrace_devdepth_max
= 32;
158 int dtrace_err_verbose
;
159 hrtime_t dtrace_deadman_interval
= NANOSEC
;
160 hrtime_t dtrace_deadman_timeout
= (hrtime_t
)10 * NANOSEC
;
161 hrtime_t dtrace_deadman_user
= (hrtime_t
)30 * NANOSEC
;
164 * DTrace External Variables
166 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
167 * available to DTrace consumers via the backtick (`) syntax. One of these,
168 * dtrace_zero, is made deliberately so: it is provided as a source of
169 * well-known, zero-filled memory. While this variable is not documented,
170 * it is used by some translators as an implementation detail.
172 const char dtrace_zero
[256] = { 0 }; /* zero-filled memory */
175 * DTrace Internal Variables
177 static dev_info_t
*dtrace_devi
; /* device info */
178 static vmem_t
*dtrace_arena
; /* probe ID arena */
179 static vmem_t
*dtrace_minor
; /* minor number arena */
180 static taskq_t
*dtrace_taskq
; /* task queue */
181 static dtrace_probe_t
**dtrace_probes
; /* array of all probes */
182 static int dtrace_nprobes
; /* number of probes */
183 static dtrace_provider_t
*dtrace_provider
; /* provider list */
184 static dtrace_meta_t
*dtrace_meta_pid
; /* user-land meta provider */
185 static int dtrace_opens
; /* number of opens */
186 static int dtrace_helpers
; /* number of helpers */
187 static void *dtrace_softstate
; /* softstate pointer */
188 static dtrace_hash_t
*dtrace_bymod
; /* probes hashed by module */
189 static dtrace_hash_t
*dtrace_byfunc
; /* probes hashed by function */
190 static dtrace_hash_t
*dtrace_byname
; /* probes hashed by name */
191 static dtrace_toxrange_t
*dtrace_toxrange
; /* toxic range array */
192 static int dtrace_toxranges
; /* number of toxic ranges */
193 static int dtrace_toxranges_max
; /* size of toxic range array */
194 static dtrace_anon_t dtrace_anon
; /* anonymous enabling */
195 static kmem_cache_t
*dtrace_state_cache
; /* cache for dynamic state */
196 static uint64_t dtrace_vtime_references
; /* number of vtimestamp refs */
197 static kthread_t
*dtrace_panicked
; /* panicking thread */
198 static dtrace_ecb_t
*dtrace_ecb_create_cache
; /* cached created ECB */
199 static dtrace_genid_t dtrace_probegen
; /* current probe generation */
200 static dtrace_helpers_t
*dtrace_deferred_pid
; /* deferred helper list */
201 static dtrace_enabling_t
*dtrace_retained
; /* list of retained enablings */
202 static dtrace_dynvar_t dtrace_dynhash_sink
; /* end of dynamic hash chains */
203 #if defined(__APPLE__)
204 static int dtrace_dof_mode
; /* dof mode */
207 #if defined(__APPLE__)
210 * To save memory, some common memory allocations are given a
211 * unique zone. In example, dtrace_probe_t is 72 bytes in size,
212 * which means it would fall into the kalloc.128 bucket. With
213 * 20k elements allocated, the space saved is substantial.
216 struct zone
*dtrace_probe_t_zone
;
222 * DTrace is protected by three (relatively coarse-grained) locks:
224 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
225 * including enabling state, probes, ECBs, consumer state, helper state,
226 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
227 * probe context is lock-free -- synchronization is handled via the
228 * dtrace_sync() cross call mechanism.
230 * (2) dtrace_provider_lock is required when manipulating provider state, or
231 * when provider state must be held constant.
233 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
234 * when meta provider state must be held constant.
236 * The lock ordering between these three locks is dtrace_meta_lock before
237 * dtrace_provider_lock before dtrace_lock. (In particular, there are
238 * several places where dtrace_provider_lock is held by the framework as it
239 * calls into the providers -- which then call back into the framework,
240 * grabbing dtrace_lock.)
242 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
243 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
244 * role as a coarse-grained lock; it is acquired before both of these locks.
245 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
246 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
247 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
248 * acquired _between_ dtrace_provider_lock and dtrace_lock.
254 * All kmutex_t vars have been changed to lck_mtx_t.
255 * Note that lck_mtx_t's require explicit initialization.
257 * mutex_enter() becomes lck_mtx_lock()
258 * mutex_exit() becomes lck_mtx_unlock()
260 * Lock asserts are changed like this:
262 * ASSERT(MUTEX_HELD(&cpu_lock));
264 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
266 * Due to the number of these changes, they are not called out explicitly.
268 static lck_mtx_t dtrace_lock
; /* probe state lock */
269 static lck_mtx_t dtrace_provider_lock
; /* provider state lock */
270 static lck_mtx_t dtrace_meta_lock
; /* meta-provider state lock */
271 #if defined(__APPLE__)
272 static lck_rw_t dtrace_dof_mode_lock
; /* dof mode lock */
276 * DTrace Provider Variables
278 * These are the variables relating to DTrace as a provider (that is, the
279 * provider of the BEGIN, END, and ERROR probes).
281 static dtrace_pattr_t dtrace_provider_attr
= {
282 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
283 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
284 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
285 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
286 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
293 static dtrace_pops_t dtrace_provider_ops
= {
294 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
,
295 (void (*)(void *, struct modctl
*))dtrace_nullop
,
296 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
297 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
298 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
299 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
303 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
306 static dtrace_id_t dtrace_probeid_begin
; /* special BEGIN probe */
307 static dtrace_id_t dtrace_probeid_end
; /* special END probe */
308 dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
311 * DTrace Helper Tracing Variables
313 uint32_t dtrace_helptrace_next
= 0;
314 uint32_t dtrace_helptrace_nlocals
;
315 char *dtrace_helptrace_buffer
;
316 int dtrace_helptrace_bufsize
= 512 * 1024;
319 int dtrace_helptrace_enabled
= 1;
321 int dtrace_helptrace_enabled
= 0;
325 * DTrace Error Hashing
327 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
328 * table. This is very useful for checking coverage of tests that are
329 * expected to induce DIF or DOF processing errors, and may be useful for
330 * debugging problems in the DIF code generator or in DOF generation . The
331 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
334 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
];
335 static const char *dtrace_errlast
;
336 static kthread_t
*dtrace_errthread
;
337 static lck_mtx_t dtrace_errlock
;
341 * DTrace Macros and Constants
343 * These are various macros that are useful in various spots in the
344 * implementation, along with a few random constants that have no meaning
345 * outside of the implementation. There is no real structure to this cpp
346 * mishmash -- but is there ever?
348 #define DTRACE_HASHSTR(hash, probe) \
349 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
351 #define DTRACE_HASHNEXT(hash, probe) \
352 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
354 #define DTRACE_HASHPREV(hash, probe) \
355 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
357 #define DTRACE_HASHEQ(hash, lhs, rhs) \
358 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
359 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
361 #define DTRACE_AGGHASHSIZE_SLEW 17
364 * The key for a thread-local variable consists of the lower 61 bits of the
365 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
366 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
367 * equal to a variable identifier. This is necessary (but not sufficient) to
368 * assure that global associative arrays never collide with thread-local
369 * variables. To guarantee that they cannot collide, we must also define the
370 * order for keying dynamic variables. That order is:
372 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
374 * Because the variable-key and the tls-key are in orthogonal spaces, there is
375 * no way for a global variable key signature to match a thread-local key
378 #if !defined(__APPLE__)
379 #define DTRACE_TLS_THRKEY(where) { \
381 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
382 for (; actv; actv >>= 1) \
384 ASSERT(intr < (1 << 3)); \
385 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
386 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
389 #define DTRACE_TLS_THRKEY(where) { \
390 uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \
391 uint_t thr = (uint_t)current_thread(); \
392 uint_t pid = (uint_t)proc_selfpid(); \
393 ASSERT(intr < (1 << 3)); \
394 (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \
395 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
397 #endif /* __APPLE__ */
399 #define DTRACE_STORE(type, tomax, offset, what) \
400 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
402 #if !defined(__APPLE__)
403 #if !(defined(__i386__) || defined (__x86_64__))
404 #define DTRACE_ALIGNCHECK(addr, size, flags) \
405 if (addr & (size - 1)) { \
406 *flags |= CPU_DTRACE_BADALIGN; \
407 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
411 #define DTRACE_ALIGNCHECK(addr, size, flags)
414 #define DTRACE_LOADFUNC(bits) \
417 dtrace_load##bits(uintptr_t addr) \
419 size_t size = bits / NBBY; \
421 uint##bits##_t rval; \
423 volatile uint16_t *flags = (volatile uint16_t *) \
424 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
426 DTRACE_ALIGNCHECK(addr, size, flags); \
428 for (i = 0; i < dtrace_toxranges; i++) { \
429 if (addr >= dtrace_toxrange[i].dtt_limit) \
432 if (addr + size <= dtrace_toxrange[i].dtt_base) \
436 * This address falls within a toxic region; return 0. \
438 *flags |= CPU_DTRACE_BADADDR; \
439 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
443 *flags |= CPU_DTRACE_NOFAULT; \
445 rval = *((volatile uint##bits##_t *)addr); \
446 *flags &= ~CPU_DTRACE_NOFAULT; \
451 #define DTRACE_ALIGNCHECK(addr, size, flags) \
452 if (addr & (MIN(size,4) - 1)) { \
453 *flags |= CPU_DTRACE_BADALIGN; \
454 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
458 #define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" );
460 #define DTRACE_LOADFUNC(bits) \
462 extern vm_offset_t dtraceLoadRecover##bits; \
463 uint##bits##_t dtrace_load##bits(uintptr_t addr); \
466 dtrace_load##bits(uintptr_t addr) \
468 size_t size = bits / NBBY; \
470 uint##bits##_t rval = 0; \
473 volatile uint16_t *flags = (volatile uint16_t *) \
474 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
476 DTRACE_ALIGNCHECK(addr, size, flags); \
478 for (i = 0; i < dtrace_toxranges; i++) { \
479 if (addr >= dtrace_toxrange[i].dtt_limit) \
482 if (addr + size <= dtrace_toxrange[i].dtt_base) \
486 * This address falls within a toxic region; return 0. \
488 *flags |= CPU_DTRACE_BADADDR; \
489 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
493 pp = pmap_find_phys(kernel_pmap, addr); \
495 if (0 == pp || /* pmap_find_phys failed ? */ \
496 !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \
497 *flags |= CPU_DTRACE_BADADDR; \
498 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
503 volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \
504 *flags |= CPU_DTRACE_NOFAULT; \
505 recover = dtrace_set_thread_recover(current_thread(), recover); \
507 rval = *((volatile uint##bits##_t *)addr); \
508 RECOVER_LABEL(bits); \
509 (void)dtrace_set_thread_recover(current_thread(), recover); \
510 *flags &= ~CPU_DTRACE_NOFAULT; \
515 #endif /* __APPLE__ */
519 #define dtrace_loadptr dtrace_load64
521 #define dtrace_loadptr dtrace_load32
524 #define DTRACE_DYNHASH_FREE 0
525 #define DTRACE_DYNHASH_SINK 1
526 #define DTRACE_DYNHASH_VALID 2
528 #define DTRACE_MATCH_NEXT 0
529 #define DTRACE_MATCH_DONE 1
530 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
531 #define DTRACE_STATE_ALIGN 64
533 #define DTRACE_FLAGS2FLT(flags) \
534 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
535 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
536 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
537 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
538 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
539 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
540 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
541 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
544 #define DTRACEACT_ISSTRING(act) \
545 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
546 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
548 static dtrace_probe_t
*dtrace_probe_lookup_id(dtrace_id_t id
);
549 static void dtrace_enabling_provide(dtrace_provider_t
*);
550 static int dtrace_enabling_match(dtrace_enabling_t
*, int *);
551 static void dtrace_enabling_matchall(void);
552 static dtrace_state_t
*dtrace_anon_grab(void);
553 static uint64_t dtrace_helper(int, dtrace_mstate_t
*,
554 dtrace_state_t
*, uint64_t, uint64_t);
555 static dtrace_helpers_t
*dtrace_helpers_create(proc_t
*);
556 static void dtrace_buffer_drop(dtrace_buffer_t
*);
557 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t
*, size_t, size_t,
558 dtrace_state_t
*, dtrace_mstate_t
*);
559 static int dtrace_state_option(dtrace_state_t
*, dtrace_optid_t
,
561 static int dtrace_ecb_create_enable(dtrace_probe_t
*, void *);
562 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t
*);
565 * DTrace Probe Context Functions
567 * These functions are called from probe context. Because probe context is
568 * any context in which C may be called, arbitrarily locks may be held,
569 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
570 * As a result, functions called from probe context may only call other DTrace
571 * support functions -- they may not interact at all with the system at large.
572 * (Note that the ASSERT macro is made probe-context safe by redefining it in
573 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
574 * loads are to be performed from probe context, they _must_ be in terms of
575 * the safe dtrace_load*() variants.
577 * Some functions in this block are not actually called from probe context;
578 * for these functions, there will be a comment above the function reading
579 * "Note: not called from probe context."
582 dtrace_panic(const char *format
, ...)
586 va_start(alist
, format
);
587 dtrace_vpanic(format
, alist
);
592 dtrace_assfail(const char *a
, const char *f
, int l
)
594 dtrace_panic("assertion failed: %s, file: %s, line: %d", a
, f
, l
);
597 * We just need something here that even the most clever compiler
598 * cannot optimize away.
600 return (a
[(uintptr_t)f
]);
604 * Atomically increment a specified error counter from probe context.
607 dtrace_error(uint32_t *counter
)
610 * Most counters stored to in probe context are per-CPU counters.
611 * However, there are some error conditions that are sufficiently
612 * arcane that they don't merit per-CPU storage. If these counters
613 * are incremented concurrently on different CPUs, scalability will be
614 * adversely affected -- but we don't expect them to be white-hot in a
615 * correctly constructed enabling...
622 if ((nval
= oval
+ 1) == 0) {
624 * If the counter would wrap, set it to 1 -- assuring
625 * that the counter is never zero when we have seen
626 * errors. (The counter must be 32-bits because we
627 * aren't guaranteed a 64-bit compare&swap operation.)
628 * To save this code both the infamy of being fingered
629 * by a priggish news story and the indignity of being
630 * the target of a neo-puritan witch trial, we're
631 * carefully avoiding any colorful description of the
632 * likelihood of this condition -- but suffice it to
633 * say that it is only slightly more likely than the
634 * overflow of predicate cache IDs, as discussed in
635 * dtrace_predicate_create().
639 } while (dtrace_cas32(counter
, oval
, nval
) != oval
);
643 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
644 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
652 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t
*mstate
)
654 if (dest
< mstate
->dtms_scratch_base
)
657 if (dest
+ size
< dest
)
660 if (dest
+ size
> mstate
->dtms_scratch_ptr
)
667 dtrace_canstore_statvar(uint64_t addr
, size_t sz
,
668 dtrace_statvar_t
**svars
, int nsvars
)
672 for (i
= 0; i
< nsvars
; i
++) {
673 dtrace_statvar_t
*svar
= svars
[i
];
675 if (svar
== NULL
|| svar
->dtsv_size
== 0)
678 if (addr
- svar
->dtsv_data
< svar
->dtsv_size
&&
679 addr
+ sz
<= svar
->dtsv_data
+ svar
->dtsv_size
)
687 * Check to see if the address is within a memory region to which a store may
688 * be issued. This includes the DTrace scratch areas, and any DTrace variable
689 * region. The caller of dtrace_canstore() is responsible for performing any
690 * alignment checks that are needed before stores are actually executed.
693 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
694 dtrace_vstate_t
*vstate
)
700 * First, check to see if the address is in scratch space...
702 a
= mstate
->dtms_scratch_base
;
703 s
= mstate
->dtms_scratch_size
;
705 if (addr
- a
< s
&& addr
+ sz
<= a
+ s
)
709 * Now check to see if it's a dynamic variable. This check will pick
710 * up both thread-local variables and any global dynamically-allocated
713 a
= (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
;
714 s
= vstate
->dtvs_dynvars
.dtds_size
;
715 if (addr
- a
< s
&& addr
+ sz
<= a
+ s
)
719 * Finally, check the static local and global variables. These checks
720 * take the longest, so we perform them last.
722 if (dtrace_canstore_statvar(addr
, sz
,
723 vstate
->dtvs_locals
, vstate
->dtvs_nlocals
))
726 if (dtrace_canstore_statvar(addr
, sz
,
727 vstate
->dtvs_globals
, vstate
->dtvs_nglobals
))
734 * Compare two strings using safe loads.
737 dtrace_strncmp(char *s1
, char *s2
, size_t limit
)
740 volatile uint16_t *flags
;
742 if (s1
== s2
|| limit
== 0)
745 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
751 c1
= dtrace_load8((uintptr_t)s1
++);
756 c2
= dtrace_load8((uintptr_t)s2
++);
760 } while (--limit
&& c1
!= '\0' && !(*flags
& CPU_DTRACE_FAULT
));
766 * Compute strlen(s) for a string using safe memory accesses. The additional
767 * len parameter is used to specify a maximum length to ensure completion.
770 dtrace_strlen(const char *s
, size_t lim
)
774 for (len
= 0; len
!= lim
; len
++)
775 if (dtrace_load8((uintptr_t)s
++) == '\0')
782 * Check if an address falls within a toxic region.
785 dtrace_istoxic(uintptr_t kaddr
, size_t size
)
787 uintptr_t taddr
, tsize
;
790 for (i
= 0; i
< dtrace_toxranges
; i
++) {
791 taddr
= dtrace_toxrange
[i
].dtt_base
;
792 tsize
= dtrace_toxrange
[i
].dtt_limit
- taddr
;
794 if (kaddr
- taddr
< tsize
) {
795 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
796 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= kaddr
;
800 if (taddr
- kaddr
< size
) {
801 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
802 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= taddr
;
811 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
812 * memory specified by the DIF program. The dst is assumed to be safe memory
813 * that we can store to directly because it is managed by DTrace. As with
814 * standard bcopy, overlapping copies are handled properly.
817 dtrace_bcopy(const void *src
, void *dst
, size_t len
)
821 const uint8_t *s2
= src
;
825 *s1
++ = dtrace_load8((uintptr_t)s2
++);
826 } while (--len
!= 0);
832 *--s1
= dtrace_load8((uintptr_t)--s2
);
833 } while (--len
!= 0);
839 * Copy src to dst using safe memory accesses, up to either the specified
840 * length, or the point that a nul byte is encountered. The src is assumed to
841 * be unsafe memory specified by the DIF program. The dst is assumed to be
842 * safe memory that we can store to directly because it is managed by DTrace.
843 * Unlike dtrace_bcopy(), overlapping regions are not handled.
846 dtrace_strcpy(const void *src
, void *dst
, size_t len
)
849 uint8_t *s1
= dst
, c
;
850 const uint8_t *s2
= src
;
853 *s1
++ = c
= dtrace_load8((uintptr_t)s2
++);
854 } while (--len
!= 0 && c
!= '\0');
859 * Copy src to dst, deriving the size and type from the specified (BYREF)
860 * variable type. The src is assumed to be unsafe memory specified by the DIF
861 * program. The dst is assumed to be DTrace variable memory that is of the
862 * specified type; we assume that we can store to directly.
865 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t
*type
)
867 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
869 if (type
->dtdt_kind
== DIF_TYPE_STRING
)
870 dtrace_strcpy(src
, dst
, type
->dtdt_size
);
872 dtrace_bcopy(src
, dst
, type
->dtdt_size
);
876 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
877 * unsafe memory specified by the DIF program. The s2 data is assumed to be
878 * safe memory that we can access directly because it is managed by DTrace.
881 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
)
883 volatile uint16_t *flags
;
885 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
890 if (s1
== NULL
|| s2
== NULL
)
893 if (s1
!= s2
&& len
!= 0) {
894 const uint8_t *ps1
= s1
;
895 const uint8_t *ps2
= s2
;
898 if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++)
900 } while (--len
!= 0 && !(*flags
& CPU_DTRACE_FAULT
));
906 * Zero the specified region using a simple byte-by-byte loop. Note that this
907 * is for safe DTrace-managed memory only.
910 dtrace_bzero(void *dst
, size_t len
)
914 for (cp
= dst
; len
!= 0; len
--)
919 * This privilege check should be used by actions and subroutines to
920 * verify that the user credentials of the process that enabled the
921 * invoking ECB match the target credentials
924 dtrace_priv_proc_common_user(dtrace_state_t
*state
)
926 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
929 * We should always have a non-NULL state cred here, since if cred
930 * is null (anonymous tracing), we fast-path bypass this routine.
932 ASSERT(s_cr
!= NULL
);
934 #if !defined(__APPLE__)
935 if ((cr
= CRED()) != NULL
&&
937 if ((cr
= dtrace_CRED()) != NULL
&&
938 #endif /* __APPLE__ */
939 s_cr
->cr_uid
== cr
->cr_uid
&&
940 s_cr
->cr_uid
== cr
->cr_ruid
&&
941 s_cr
->cr_uid
== cr
->cr_suid
&&
942 s_cr
->cr_gid
== cr
->cr_gid
&&
943 s_cr
->cr_gid
== cr
->cr_rgid
&&
944 s_cr
->cr_gid
== cr
->cr_sgid
)
951 * This privilege check should be used by actions and subroutines to
952 * verify that the zone of the process that enabled the invoking ECB
953 * matches the target credentials
956 dtrace_priv_proc_common_zone(dtrace_state_t
*state
)
958 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
961 * We should always have a non-NULL state cred here, since if cred
962 * is null (anonymous tracing), we fast-path bypass this routine.
964 ASSERT(s_cr
!= NULL
);
966 #if !defined(__APPLE__)
967 if ((cr
= CRED()) != NULL
&&
968 s_cr
->cr_zone
== cr
->cr_zone
)
973 return 1; /* Darwin doesn't do zones. */
974 #endif /* __APPLE__ */
978 * This privilege check should be used by actions and subroutines to
979 * verify that the process has not setuid or changed credentials.
981 #if !defined(__APPLE__)
983 dtrace_priv_proc_common_nocd()
987 if ((proc
= ttoproc(curthread
)) != NULL
&&
988 !(proc
->p_flag
& SNOCD
))
995 dtrace_priv_proc_common_nocd(void)
997 return 1; /* Darwin omits "No Core Dump" flag. */
999 #endif /* __APPLE__ */
1002 dtrace_priv_proc_destructive(dtrace_state_t
*state
)
1004 int action
= state
->dts_cred
.dcr_action
;
1006 #if defined(__APPLE__)
1007 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1009 #endif /* __APPLE__ */
1011 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) &&
1012 dtrace_priv_proc_common_zone(state
) == 0)
1015 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) &&
1016 dtrace_priv_proc_common_user(state
) == 0)
1019 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) &&
1020 dtrace_priv_proc_common_nocd() == 0)
1026 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1032 dtrace_priv_proc_control(dtrace_state_t
*state
)
1034 #if defined(__APPLE__)
1035 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1037 #endif /* __APPLE__ */
1039 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC_CONTROL
)
1042 if (dtrace_priv_proc_common_zone(state
) &&
1043 dtrace_priv_proc_common_user(state
) &&
1044 dtrace_priv_proc_common_nocd())
1047 #if defined(__APPLE__)
1049 #endif /* __APPLE__ */
1050 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1056 dtrace_priv_proc(dtrace_state_t
*state
)
1058 #if defined(__APPLE__)
1059 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1061 #endif /* __APPLE__ */
1063 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1066 #if defined(__APPLE__)
1068 #endif /* __APPLE__ */
1069 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1074 #if defined(__APPLE__)
1075 /* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */
1077 dtrace_priv_proc_relaxed(dtrace_state_t
*state
)
1080 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1083 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1087 #endif /* __APPLE__ */
1090 dtrace_priv_kernel(dtrace_state_t
*state
)
1092 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL
)
1095 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1101 dtrace_priv_kernel_destructive(dtrace_state_t
*state
)
1103 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL_DESTRUCTIVE
)
1106 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1112 * Note: not called from probe context. This function is called
1113 * asynchronously (and at a regular interval) from outside of probe context to
1114 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1115 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1117 #if defined(__APPLE__)
1119 #endif /* __APPLE__ */
1121 dtrace_dynvar_clean(dtrace_dstate_t
*dstate
)
1123 dtrace_dynvar_t
*dirty
;
1124 dtrace_dstate_percpu_t
*dcpu
;
1127 for (i
= 0; i
< NCPU
; i
++) {
1128 dcpu
= &dstate
->dtds_percpu
[i
];
1130 ASSERT(dcpu
->dtdsc_rinsing
== NULL
);
1133 * If the dirty list is NULL, there is no dirty work to do.
1135 if (dcpu
->dtdsc_dirty
== NULL
)
1139 * If the clean list is non-NULL, then we're not going to do
1140 * any work for this CPU -- it means that there has not been
1141 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1142 * since the last time we cleaned house.
1144 if (dcpu
->dtdsc_clean
!= NULL
)
1150 * Atomically move the dirty list aside.
1153 dirty
= dcpu
->dtdsc_dirty
;
1156 * Before we zap the dirty list, set the rinsing list.
1157 * (This allows for a potential assertion in
1158 * dtrace_dynvar(): if a free dynamic variable appears
1159 * on a hash chain, either the dirty list or the
1160 * rinsing list for some CPU must be non-NULL.)
1162 dcpu
->dtdsc_rinsing
= dirty
;
1163 dtrace_membar_producer();
1164 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
,
1165 dirty
, NULL
) != dirty
);
1170 * We have no work to do; we can simply return.
1177 for (i
= 0; i
< NCPU
; i
++) {
1178 dcpu
= &dstate
->dtds_percpu
[i
];
1180 if (dcpu
->dtdsc_rinsing
== NULL
)
1184 * We are now guaranteed that no hash chain contains a pointer
1185 * into this dirty list; we can make it clean.
1187 ASSERT(dcpu
->dtdsc_clean
== NULL
);
1188 dcpu
->dtdsc_clean
= dcpu
->dtdsc_rinsing
;
1189 dcpu
->dtdsc_rinsing
= NULL
;
1193 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1194 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1195 * This prevents a race whereby a CPU incorrectly decides that
1196 * the state should be something other than DTRACE_DSTATE_CLEAN
1197 * after dtrace_dynvar_clean() has completed.
1201 dstate
->dtds_state
= DTRACE_DSTATE_CLEAN
;
1205 * Depending on the value of the op parameter, this function looks-up,
1206 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1207 * allocation is requested, this function will return a pointer to a
1208 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1209 * variable can be allocated. If NULL is returned, the appropriate counter
1210 * will be incremented.
1212 #if defined(__APPLE__)
1214 #endif /* __APPLE__ */
1216 dtrace_dynvar(dtrace_dstate_t
*dstate
, uint_t nkeys
,
1217 dtrace_key_t
*key
, size_t dsize
, dtrace_dynvar_op_t op
)
1219 uint64_t hashval
= DTRACE_DYNHASH_VALID
;
1220 dtrace_dynhash_t
*hash
= dstate
->dtds_hash
;
1221 dtrace_dynvar_t
*free
, *new_free
, *next
, *dvar
, *start
, *prev
= NULL
;
1222 processorid_t me
= CPU
->cpu_id
, cpu
= me
;
1223 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[me
];
1224 size_t bucket
, ksize
;
1225 size_t chunksize
= dstate
->dtds_chunksize
;
1226 uintptr_t kdata
, lock
, nstate
;
1232 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1233 * algorithm. For the by-value portions, we perform the algorithm in
1234 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1235 * bit, and seems to have only a minute effect on distribution. For
1236 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1237 * over each referenced byte. It's painful to do this, but it's much
1238 * better than pathological hash distribution. The efficacy of the
1239 * hashing algorithm (and a comparison with other algorithms) may be
1240 * found by running the ::dtrace_dynstat MDB dcmd.
1242 for (i
= 0; i
< nkeys
; i
++) {
1243 if (key
[i
].dttk_size
== 0) {
1244 uint64_t val
= key
[i
].dttk_value
;
1246 hashval
+= (val
>> 48) & 0xffff;
1247 hashval
+= (hashval
<< 10);
1248 hashval
^= (hashval
>> 6);
1250 hashval
+= (val
>> 32) & 0xffff;
1251 hashval
+= (hashval
<< 10);
1252 hashval
^= (hashval
>> 6);
1254 hashval
+= (val
>> 16) & 0xffff;
1255 hashval
+= (hashval
<< 10);
1256 hashval
^= (hashval
>> 6);
1258 hashval
+= val
& 0xffff;
1259 hashval
+= (hashval
<< 10);
1260 hashval
^= (hashval
>> 6);
1263 * This is incredibly painful, but it beats the hell
1264 * out of the alternative.
1266 uint64_t j
, size
= key
[i
].dttk_size
;
1267 uintptr_t base
= (uintptr_t)key
[i
].dttk_value
;
1269 for (j
= 0; j
< size
; j
++) {
1270 hashval
+= dtrace_load8(base
+ j
);
1271 hashval
+= (hashval
<< 10);
1272 hashval
^= (hashval
>> 6);
1277 hashval
+= (hashval
<< 3);
1278 hashval
^= (hashval
>> 11);
1279 hashval
+= (hashval
<< 15);
1282 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1283 * comes out to be one of our two sentinel hash values. If this
1284 * actually happens, we set the hashval to be a value known to be a
1285 * non-sentinel value.
1287 if (hashval
== DTRACE_DYNHASH_FREE
|| hashval
== DTRACE_DYNHASH_SINK
)
1288 hashval
= DTRACE_DYNHASH_VALID
;
1291 * Yes, it's painful to do a divide here. If the cycle count becomes
1292 * important here, tricks can be pulled to reduce it. (However, it's
1293 * critical that hash collisions be kept to an absolute minimum;
1294 * they're much more painful than a divide.) It's better to have a
1295 * solution that generates few collisions and still keeps things
1296 * relatively simple.
1298 bucket
= hashval
% dstate
->dtds_hashsize
;
1300 if (op
== DTRACE_DYNVAR_DEALLOC
) {
1301 volatile uintptr_t *lockp
= &hash
[bucket
].dtdh_lock
;
1304 while ((lock
= *lockp
) & 1)
1307 if (dtrace_casptr((void *)lockp
,
1308 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1312 dtrace_membar_producer();
1317 lock
= hash
[bucket
].dtdh_lock
;
1319 dtrace_membar_consumer();
1321 start
= hash
[bucket
].dtdh_chain
;
1322 ASSERT(start
!= NULL
&& (start
->dtdv_hashval
== DTRACE_DYNHASH_SINK
||
1323 start
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
||
1324 op
!= DTRACE_DYNVAR_DEALLOC
));
1326 for (dvar
= start
; dvar
!= NULL
; dvar
= dvar
->dtdv_next
) {
1327 dtrace_tuple_t
*dtuple
= &dvar
->dtdv_tuple
;
1328 dtrace_key_t
*dkey
= &dtuple
->dtt_key
[0];
1330 if (dvar
->dtdv_hashval
!= hashval
) {
1331 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_SINK
) {
1333 * We've reached the sink, and therefore the
1334 * end of the hash chain; we can kick out of
1335 * the loop knowing that we have seen a valid
1336 * snapshot of state.
1338 ASSERT(dvar
->dtdv_next
== NULL
);
1339 ASSERT(dvar
== &dtrace_dynhash_sink
);
1343 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
) {
1345 * We've gone off the rails: somewhere along
1346 * the line, one of the members of this hash
1347 * chain was deleted. Note that we could also
1348 * detect this by simply letting this loop run
1349 * to completion, as we would eventually hit
1350 * the end of the dirty list. However, we
1351 * want to avoid running the length of the
1352 * dirty list unnecessarily (it might be quite
1353 * long), so we catch this as early as
1354 * possible by detecting the hash marker. In
1355 * this case, we simply set dvar to NULL and
1356 * break; the conditional after the loop will
1357 * send us back to top.
1366 if (dtuple
->dtt_nkeys
!= nkeys
)
1369 for (i
= 0; i
< nkeys
; i
++, dkey
++) {
1370 if (dkey
->dttk_size
!= key
[i
].dttk_size
)
1371 goto next
; /* size or type mismatch */
1373 if (dkey
->dttk_size
!= 0) {
1375 (void *)(uintptr_t)key
[i
].dttk_value
,
1376 (void *)(uintptr_t)dkey
->dttk_value
,
1380 if (dkey
->dttk_value
!= key
[i
].dttk_value
)
1385 if (op
!= DTRACE_DYNVAR_DEALLOC
)
1388 ASSERT(dvar
->dtdv_next
== NULL
||
1389 dvar
->dtdv_next
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
);
1392 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1393 ASSERT(start
!= dvar
);
1394 ASSERT(prev
->dtdv_next
== dvar
);
1395 prev
->dtdv_next
= dvar
->dtdv_next
;
1397 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
,
1398 start
, dvar
->dtdv_next
) != start
) {
1400 * We have failed to atomically swing the
1401 * hash table head pointer, presumably because
1402 * of a conflicting allocation on another CPU.
1403 * We need to reread the hash chain and try
1410 dtrace_membar_producer();
1413 * Now set the hash value to indicate that it's free.
1415 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1416 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1418 dtrace_membar_producer();
1421 * Set the next pointer to point at the dirty list, and
1422 * atomically swing the dirty pointer to the newly freed dvar.
1425 next
= dcpu
->dtdsc_dirty
;
1426 dvar
->dtdv_next
= next
;
1427 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
);
1430 * Finally, unlock this hash bucket.
1432 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1434 hash
[bucket
].dtdh_lock
++;
1444 * If dvar is NULL, it is because we went off the rails:
1445 * one of the elements that we traversed in the hash chain
1446 * was deleted while we were traversing it. In this case,
1447 * we assert that we aren't doing a dealloc (deallocs lock
1448 * the hash bucket to prevent themselves from racing with
1449 * one another), and retry the hash chain traversal.
1451 ASSERT(op
!= DTRACE_DYNVAR_DEALLOC
);
1455 if (op
!= DTRACE_DYNVAR_ALLOC
) {
1457 * If we are not to allocate a new variable, we want to
1458 * return NULL now. Before we return, check that the value
1459 * of the lock word hasn't changed. If it has, we may have
1460 * seen an inconsistent snapshot.
1462 if (op
== DTRACE_DYNVAR_NOALLOC
) {
1463 if (hash
[bucket
].dtdh_lock
!= lock
)
1466 ASSERT(op
== DTRACE_DYNVAR_DEALLOC
);
1467 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1469 hash
[bucket
].dtdh_lock
++;
1476 * We need to allocate a new dynamic variable. The size we need is the
1477 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1478 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1479 * the size of any referred-to data (dsize). We then round the final
1480 * size up to the chunksize for allocation.
1482 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
1483 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
1486 * This should be pretty much impossible, but could happen if, say,
1487 * strange DIF specified the tuple. Ideally, this should be an
1488 * assertion and not an error condition -- but that requires that the
1489 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1490 * bullet-proof. (That is, it must not be able to be fooled by
1491 * malicious DIF.) Given the lack of backwards branches in DIF,
1492 * solving this would presumably not amount to solving the Halting
1493 * Problem -- but it still seems awfully hard.
1495 if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys
- 1) +
1496 ksize
+ dsize
> chunksize
) {
1497 dcpu
->dtdsc_drops
++;
1501 nstate
= DTRACE_DSTATE_EMPTY
;
1505 free
= dcpu
->dtdsc_free
;
1508 dtrace_dynvar_t
*clean
= dcpu
->dtdsc_clean
;
1511 if (clean
== NULL
) {
1513 * We're out of dynamic variable space on
1514 * this CPU. Unless we have tried all CPUs,
1515 * we'll try to allocate from a different
1518 switch (dstate
->dtds_state
) {
1519 case DTRACE_DSTATE_CLEAN
: {
1520 void *sp
= &dstate
->dtds_state
;
1525 if (dcpu
->dtdsc_dirty
!= NULL
&&
1526 nstate
== DTRACE_DSTATE_EMPTY
)
1527 nstate
= DTRACE_DSTATE_DIRTY
;
1529 if (dcpu
->dtdsc_rinsing
!= NULL
)
1530 nstate
= DTRACE_DSTATE_RINSING
;
1532 dcpu
= &dstate
->dtds_percpu
[cpu
];
1537 (void) dtrace_cas32(sp
,
1538 DTRACE_DSTATE_CLEAN
, nstate
);
1541 * To increment the correct bean
1542 * counter, take another lap.
1547 case DTRACE_DSTATE_DIRTY
:
1548 dcpu
->dtdsc_dirty_drops
++;
1551 case DTRACE_DSTATE_RINSING
:
1552 dcpu
->dtdsc_rinsing_drops
++;
1555 case DTRACE_DSTATE_EMPTY
:
1556 dcpu
->dtdsc_drops
++;
1560 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
);
1565 * The clean list appears to be non-empty. We want to
1566 * move the clean list to the free list; we start by
1567 * moving the clean pointer aside.
1569 if (dtrace_casptr(&dcpu
->dtdsc_clean
,
1570 clean
, NULL
) != clean
) {
1572 * We are in one of two situations:
1574 * (a) The clean list was switched to the
1575 * free list by another CPU.
1577 * (b) The clean list was added to by the
1580 * In either of these situations, we can
1581 * just reattempt the free list allocation.
1586 ASSERT(clean
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1589 * Now we'll move the clean list to the free list.
1590 * It's impossible for this to fail: the only way
1591 * the free list can be updated is through this
1592 * code path, and only one CPU can own the clean list.
1593 * Thus, it would only be possible for this to fail if
1594 * this code were racing with dtrace_dynvar_clean().
1595 * (That is, if dtrace_dynvar_clean() updated the clean
1596 * list, and we ended up racing to update the free
1597 * list.) This race is prevented by the dtrace_sync()
1598 * in dtrace_dynvar_clean() -- which flushes the
1599 * owners of the clean lists out before resetting
1602 rval
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
);
1603 ASSERT(rval
== NULL
);
1608 new_free
= dvar
->dtdv_next
;
1609 } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
);
1612 * We have now allocated a new chunk. We copy the tuple keys into the
1613 * tuple array and copy any referenced key data into the data space
1614 * following the tuple array. As we do this, we relocate dttk_value
1615 * in the final tuple to point to the key data address in the chunk.
1617 kdata
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
];
1618 dvar
->dtdv_data
= (void *)(kdata
+ ksize
);
1619 dvar
->dtdv_tuple
.dtt_nkeys
= nkeys
;
1621 for (i
= 0; i
< nkeys
; i
++) {
1622 dtrace_key_t
*dkey
= &dvar
->dtdv_tuple
.dtt_key
[i
];
1623 size_t kesize
= key
[i
].dttk_size
;
1627 (const void *)(uintptr_t)key
[i
].dttk_value
,
1628 (void *)kdata
, kesize
);
1629 dkey
->dttk_value
= kdata
;
1630 kdata
+= P2ROUNDUP(kesize
, sizeof (uint64_t));
1632 dkey
->dttk_value
= key
[i
].dttk_value
;
1635 dkey
->dttk_size
= kesize
;
1638 ASSERT(dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1639 dvar
->dtdv_hashval
= hashval
;
1640 dvar
->dtdv_next
= start
;
1642 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
)
1646 * The cas has failed. Either another CPU is adding an element to
1647 * this hash chain, or another CPU is deleting an element from this
1648 * hash chain. The simplest way to deal with both of these cases
1649 * (though not necessarily the most efficient) is to free our
1650 * allocated block and tail-call ourselves. Note that the free is
1651 * to the dirty list and _not_ to the free list. This is to prevent
1652 * races with allocators, above.
1654 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1656 dtrace_membar_producer();
1659 free
= dcpu
->dtdsc_dirty
;
1660 dvar
->dtdv_next
= free
;
1661 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
);
1663 return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
));
1668 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1676 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1683 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
)
1685 int i
, zero
= DTRACE_QUANTIZE_ZEROBUCKET
;
1686 int64_t val
= (int64_t)nval
;
1689 for (i
= 0; i
< zero
; i
++) {
1690 if (val
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1696 for (i
= zero
+ 1; i
< DTRACE_QUANTIZE_NBUCKETS
; i
++) {
1697 if (val
< DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1698 quanta
[i
- 1] += incr
;
1703 quanta
[DTRACE_QUANTIZE_NBUCKETS
- 1] += incr
;
1711 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
)
1713 uint64_t arg
= *lquanta
++;
1714 int32_t base
= DTRACE_LQUANTIZE_BASE(arg
);
1715 uint16_t step
= DTRACE_LQUANTIZE_STEP(arg
);
1716 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(arg
);
1717 int32_t val
= (int32_t)nval
, level
;
1720 ASSERT(levels
!= 0);
1724 * This is an underflow.
1730 level
= (val
- base
) / step
;
1732 if (level
< levels
) {
1733 lquanta
[level
+ 1] += incr
;
1738 * This is an overflow.
1740 lquanta
[levels
+ 1] += incr
;
1745 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
)
1753 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1760 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1766 * Aggregate given the tuple in the principal data buffer, and the aggregating
1767 * action denoted by the specified dtrace_aggregation_t. The aggregation
1768 * buffer is specified as the buf parameter. This routine does not return
1769 * failure; if there is no space in the aggregation buffer, the data will be
1770 * dropped, and a corresponding counter incremented.
1773 dtrace_aggregate(dtrace_aggregation_t
*agg
, dtrace_buffer_t
*dbuf
,
1774 intptr_t offset
, dtrace_buffer_t
*buf
, uint64_t expr
, uint64_t arg
)
1776 dtrace_recdesc_t
*rec
= &agg
->dtag_action
.dta_rec
;
1777 uint32_t i
, ndx
, size
, fsize
;
1778 uint32_t align
= sizeof (uint64_t) - 1;
1779 dtrace_aggbuffer_t
*agb
;
1780 dtrace_aggkey_t
*key
;
1781 uint32_t hashval
= 0, limit
, isstr
;
1782 caddr_t tomax
, data
, kdata
;
1783 dtrace_actkind_t action
;
1784 dtrace_action_t
*act
;
1790 if (!agg
->dtag_hasarg
) {
1792 * Currently, only quantize() and lquantize() take additional
1793 * arguments, and they have the same semantics: an increment
1794 * value that defaults to 1 when not present. If additional
1795 * aggregating actions take arguments, the setting of the
1796 * default argument value will presumably have to become more
1802 action
= agg
->dtag_action
.dta_kind
- DTRACEACT_AGGREGATION
;
1803 size
= rec
->dtrd_offset
- agg
->dtag_base
;
1804 fsize
= size
+ rec
->dtrd_size
;
1806 ASSERT(dbuf
->dtb_tomax
!= NULL
);
1807 data
= dbuf
->dtb_tomax
+ offset
+ agg
->dtag_base
;
1809 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
1810 dtrace_buffer_drop(buf
);
1815 * The metastructure is always at the bottom of the buffer.
1817 agb
= (dtrace_aggbuffer_t
*)(tomax
+ buf
->dtb_size
-
1818 sizeof (dtrace_aggbuffer_t
));
1820 if (buf
->dtb_offset
== 0) {
1822 * We just kludge up approximately 1/8th of the size to be
1823 * buckets. If this guess ends up being routinely
1824 * off-the-mark, we may need to dynamically readjust this
1825 * based on past performance.
1827 uintptr_t hashsize
= (buf
->dtb_size
>> 3) / sizeof (uintptr_t);
1829 if ((uintptr_t)agb
- hashsize
* sizeof (dtrace_aggkey_t
*) <
1830 (uintptr_t)tomax
|| hashsize
== 0) {
1832 * We've been given a ludicrously small buffer;
1833 * increment our drop count and leave.
1835 dtrace_buffer_drop(buf
);
1840 * And now, a pathetic attempt to try to get a an odd (or
1841 * perchance, a prime) hash size for better hash distribution.
1843 if (hashsize
> (DTRACE_AGGHASHSIZE_SLEW
<< 3))
1844 hashsize
-= DTRACE_AGGHASHSIZE_SLEW
;
1846 agb
->dtagb_hashsize
= hashsize
;
1847 agb
->dtagb_hash
= (dtrace_aggkey_t
**)((uintptr_t)agb
-
1848 agb
->dtagb_hashsize
* sizeof (dtrace_aggkey_t
*));
1849 agb
->dtagb_free
= (uintptr_t)agb
->dtagb_hash
;
1851 for (i
= 0; i
< agb
->dtagb_hashsize
; i
++)
1852 agb
->dtagb_hash
[i
] = NULL
;
1855 ASSERT(agg
->dtag_first
!= NULL
);
1856 ASSERT(agg
->dtag_first
->dta_intuple
);
1859 * Calculate the hash value based on the key. Note that we _don't_
1860 * include the aggid in the hashing (but we will store it as part of
1861 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
1862 * algorithm: a simple, quick algorithm that has no known funnels, and
1863 * gets good distribution in practice. The efficacy of the hashing
1864 * algorithm (and a comparison with other algorithms) may be found by
1865 * running the ::dtrace_aggstat MDB dcmd.
1867 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
1868 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1869 limit
= i
+ act
->dta_rec
.dtrd_size
;
1870 ASSERT(limit
<= size
);
1871 isstr
= DTRACEACT_ISSTRING(act
);
1873 for (; i
< limit
; i
++) {
1875 hashval
+= (hashval
<< 10);
1876 hashval
^= (hashval
>> 6);
1878 if (isstr
&& data
[i
] == '\0')
1883 hashval
+= (hashval
<< 3);
1884 hashval
^= (hashval
>> 11);
1885 hashval
+= (hashval
<< 15);
1888 * Yes, the divide here is expensive -- but it's generally the least
1889 * of the performance issues given the amount of data that we iterate
1890 * over to compute hash values, compare data, etc.
1892 ndx
= hashval
% agb
->dtagb_hashsize
;
1894 for (key
= agb
->dtagb_hash
[ndx
]; key
!= NULL
; key
= key
->dtak_next
) {
1895 ASSERT((caddr_t
)key
>= tomax
);
1896 ASSERT((caddr_t
)key
< tomax
+ buf
->dtb_size
);
1898 if (hashval
!= key
->dtak_hashval
|| key
->dtak_size
!= size
)
1901 kdata
= key
->dtak_data
;
1902 ASSERT(kdata
>= tomax
&& kdata
< tomax
+ buf
->dtb_size
);
1904 for (act
= agg
->dtag_first
; act
->dta_intuple
;
1905 act
= act
->dta_next
) {
1906 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1907 limit
= i
+ act
->dta_rec
.dtrd_size
;
1908 ASSERT(limit
<= size
);
1909 isstr
= DTRACEACT_ISSTRING(act
);
1911 for (; i
< limit
; i
++) {
1912 if (kdata
[i
] != data
[i
])
1915 if (isstr
&& data
[i
] == '\0')
1920 if (action
!= key
->dtak_action
) {
1922 * We are aggregating on the same value in the same
1923 * aggregation with two different aggregating actions.
1924 * (This should have been picked up in the compiler,
1925 * so we may be dealing with errant or devious DIF.)
1926 * This is an error condition; we indicate as much,
1929 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
1934 * This is a hit: we need to apply the aggregator to
1935 * the value at this key.
1937 agg
->dtag_aggregate((uint64_t *)(kdata
+ size
), expr
, arg
);
1944 * We didn't find it. We need to allocate some zero-filled space,
1945 * link it into the hash table appropriately, and apply the aggregator
1946 * to the (zero-filled) value.
1948 offs
= buf
->dtb_offset
;
1949 while (offs
& (align
- 1))
1950 offs
+= sizeof (uint32_t);
1953 * If we don't have enough room to both allocate a new key _and_
1954 * its associated data, increment the drop count and return.
1956 if ((uintptr_t)tomax
+ offs
+ fsize
>
1957 agb
->dtagb_free
- sizeof (dtrace_aggkey_t
)) {
1958 dtrace_buffer_drop(buf
);
1963 ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1)));
1964 key
= (dtrace_aggkey_t
*)(agb
->dtagb_free
- sizeof (dtrace_aggkey_t
));
1965 agb
->dtagb_free
-= sizeof (dtrace_aggkey_t
);
1967 key
->dtak_data
= kdata
= tomax
+ offs
;
1968 buf
->dtb_offset
= offs
+ fsize
;
1971 * Now copy the data across.
1973 *((dtrace_aggid_t
*)kdata
) = agg
->dtag_id
;
1975 for (i
= sizeof (dtrace_aggid_t
); i
< size
; i
++)
1979 * Because strings are not zeroed out by default, we need to iterate
1980 * looking for actions that store strings, and we need to explicitly
1981 * pad these strings out with zeroes.
1983 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
1986 if (!DTRACEACT_ISSTRING(act
))
1989 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1990 limit
= i
+ act
->dta_rec
.dtrd_size
;
1991 ASSERT(limit
<= size
);
1993 for (nul
= 0; i
< limit
; i
++) {
1999 if (data
[i
] != '\0')
2006 for (i
= size
; i
< fsize
; i
++)
2009 key
->dtak_hashval
= hashval
;
2010 key
->dtak_size
= size
;
2011 key
->dtak_action
= action
;
2012 key
->dtak_next
= agb
->dtagb_hash
[ndx
];
2013 agb
->dtagb_hash
[ndx
] = key
;
2016 * Finally, apply the aggregator.
2018 *((uint64_t *)(key
->dtak_data
+ size
)) = agg
->dtag_initial
;
2019 agg
->dtag_aggregate((uint64_t *)(key
->dtak_data
+ size
), expr
, arg
);
2023 * Given consumer state, this routine finds a speculation in the INACTIVE
2024 * state and transitions it into the ACTIVE state. If there is no speculation
2025 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2026 * incremented -- it is up to the caller to take appropriate action.
2029 dtrace_speculation(dtrace_state_t
*state
)
2032 dtrace_speculation_state_t current
;
2033 uint32_t *stat
= &state
->dts_speculations_unavail
, count
;
2035 while (i
< state
->dts_nspeculations
) {
2036 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2038 current
= spec
->dtsp_state
;
2040 if (current
!= DTRACESPEC_INACTIVE
) {
2041 if (current
== DTRACESPEC_COMMITTINGMANY
||
2042 current
== DTRACESPEC_COMMITTING
||
2043 current
== DTRACESPEC_DISCARDING
)
2044 stat
= &state
->dts_speculations_busy
;
2049 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2050 current
, DTRACESPEC_ACTIVE
) == current
)
2055 * We couldn't find a speculation. If we found as much as a single
2056 * busy speculation buffer, we'll attribute this failure as "busy"
2057 * instead of "unavail".
2061 } while (dtrace_cas32(stat
, count
, count
+ 1) != count
);
2067 * This routine commits an active speculation. If the specified speculation
2068 * is not in a valid state to perform a commit(), this routine will silently do
2069 * nothing. The state of the specified speculation is transitioned according
2070 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2073 dtrace_speculation_commit(dtrace_state_t
*state
, processorid_t cpu
,
2074 dtrace_specid_t which
)
2076 dtrace_speculation_t
*spec
;
2077 dtrace_buffer_t
*src
, *dest
;
2078 uintptr_t daddr
, saddr
, dlimit
;
2079 dtrace_speculation_state_t current
, new;
2085 if (which
> state
->dts_nspeculations
) {
2086 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2090 spec
= &state
->dts_speculations
[which
- 1];
2091 src
= &spec
->dtsp_buffer
[cpu
];
2092 dest
= &state
->dts_buffer
[cpu
];
2095 current
= spec
->dtsp_state
;
2097 if (current
== DTRACESPEC_COMMITTINGMANY
)
2101 case DTRACESPEC_INACTIVE
:
2102 case DTRACESPEC_DISCARDING
:
2105 case DTRACESPEC_COMMITTING
:
2107 * This is only possible if we are (a) commit()'ing
2108 * without having done a prior speculate() on this CPU
2109 * and (b) racing with another commit() on a different
2110 * CPU. There's nothing to do -- we just assert that
2113 ASSERT(src
->dtb_offset
== 0);
2116 case DTRACESPEC_ACTIVE
:
2117 new = DTRACESPEC_COMMITTING
;
2120 case DTRACESPEC_ACTIVEONE
:
2122 * This speculation is active on one CPU. If our
2123 * buffer offset is non-zero, we know that the one CPU
2124 * must be us. Otherwise, we are committing on a
2125 * different CPU from the speculate(), and we must
2126 * rely on being asynchronously cleaned.
2128 if (src
->dtb_offset
!= 0) {
2129 new = DTRACESPEC_COMMITTING
;
2134 case DTRACESPEC_ACTIVEMANY
:
2135 new = DTRACESPEC_COMMITTINGMANY
;
2141 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2142 current
, new) != current
);
2145 * We have set the state to indicate that we are committing this
2146 * speculation. Now reserve the necessary space in the destination
2149 if ((offs
= dtrace_buffer_reserve(dest
, src
->dtb_offset
,
2150 sizeof (uint64_t), state
, NULL
)) < 0) {
2151 dtrace_buffer_drop(dest
);
2156 * We have the space; copy the buffer across. (Note that this is a
2157 * highly subobtimal bcopy(); in the unlikely event that this becomes
2158 * a serious performance issue, a high-performance DTrace-specific
2159 * bcopy() should obviously be invented.)
2161 daddr
= (uintptr_t)dest
->dtb_tomax
+ offs
;
2162 dlimit
= daddr
+ src
->dtb_offset
;
2163 saddr
= (uintptr_t)src
->dtb_tomax
;
2166 * First, the aligned portion.
2168 while (dlimit
- daddr
>= sizeof (uint64_t)) {
2169 *((uint64_t *)daddr
) = *((uint64_t *)saddr
);
2171 daddr
+= sizeof (uint64_t);
2172 saddr
+= sizeof (uint64_t);
2176 * Now any left-over bit...
2178 while (dlimit
- daddr
)
2179 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++);
2182 * Finally, commit the reserved space in the destination buffer.
2184 dest
->dtb_offset
= offs
+ src
->dtb_offset
;
2188 * If we're lucky enough to be the only active CPU on this speculation
2189 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2191 if (current
== DTRACESPEC_ACTIVE
||
2192 (current
== DTRACESPEC_ACTIVEONE
&& new == DTRACESPEC_COMMITTING
)) {
2193 uint32_t rval
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2194 DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
);
2196 ASSERT(rval
== DTRACESPEC_COMMITTING
);
2199 src
->dtb_offset
= 0;
2200 src
->dtb_xamot_drops
+= src
->dtb_drops
;
2205 * This routine discards an active speculation. If the specified speculation
2206 * is not in a valid state to perform a discard(), this routine will silently
2207 * do nothing. The state of the specified speculation is transitioned
2208 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2211 dtrace_speculation_discard(dtrace_state_t
*state
, processorid_t cpu
,
2212 dtrace_specid_t which
)
2214 dtrace_speculation_t
*spec
;
2215 dtrace_speculation_state_t current
, new;
2216 dtrace_buffer_t
*buf
;
2221 if (which
> state
->dts_nspeculations
) {
2222 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2226 spec
= &state
->dts_speculations
[which
- 1];
2227 buf
= &spec
->dtsp_buffer
[cpu
];
2230 current
= spec
->dtsp_state
;
2233 case DTRACESPEC_INACTIVE
:
2234 case DTRACESPEC_COMMITTINGMANY
:
2235 case DTRACESPEC_COMMITTING
:
2236 case DTRACESPEC_DISCARDING
:
2239 case DTRACESPEC_ACTIVE
:
2240 case DTRACESPEC_ACTIVEMANY
:
2241 new = DTRACESPEC_DISCARDING
;
2244 case DTRACESPEC_ACTIVEONE
:
2245 if (buf
->dtb_offset
!= 0) {
2246 new = DTRACESPEC_INACTIVE
;
2248 new = DTRACESPEC_DISCARDING
;
2255 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2256 current
, new) != current
);
2258 buf
->dtb_offset
= 0;
2263 * Note: not called from probe context. This function is called
2264 * asynchronously from cross call context to clean any speculations that are
2265 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2266 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2270 dtrace_speculation_clean_here(dtrace_state_t
*state
)
2272 dtrace_icookie_t cookie
;
2273 processorid_t cpu
= CPU
->cpu_id
;
2274 dtrace_buffer_t
*dest
= &state
->dts_buffer
[cpu
];
2277 cookie
= dtrace_interrupt_disable();
2279 if (dest
->dtb_tomax
== NULL
) {
2280 dtrace_interrupt_enable(cookie
);
2284 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2285 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2286 dtrace_buffer_t
*src
= &spec
->dtsp_buffer
[cpu
];
2288 if (src
->dtb_tomax
== NULL
)
2291 if (spec
->dtsp_state
== DTRACESPEC_DISCARDING
) {
2292 src
->dtb_offset
= 0;
2296 if (spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2299 if (src
->dtb_offset
== 0)
2302 dtrace_speculation_commit(state
, cpu
, i
+ 1);
2305 dtrace_interrupt_enable(cookie
);
2309 * Note: not called from probe context. This function is called
2310 * asynchronously (and at a regular interval) to clean any speculations that
2311 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2312 * is work to be done, it cross calls all CPUs to perform that work;
2313 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2314 * INACTIVE state until they have been cleaned by all CPUs.
2317 dtrace_speculation_clean(dtrace_state_t
*state
)
2322 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2323 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2325 ASSERT(!spec
->dtsp_cleaning
);
2327 if (spec
->dtsp_state
!= DTRACESPEC_DISCARDING
&&
2328 spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2332 spec
->dtsp_cleaning
= 1;
2338 dtrace_xcall(DTRACE_CPUALL
,
2339 (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
);
2342 * We now know that all CPUs have committed or discarded their
2343 * speculation buffers, as appropriate. We can now set the state
2346 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2347 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2348 dtrace_speculation_state_t current
, new;
2350 if (!spec
->dtsp_cleaning
)
2353 current
= spec
->dtsp_state
;
2354 ASSERT(current
== DTRACESPEC_DISCARDING
||
2355 current
== DTRACESPEC_COMMITTINGMANY
);
2357 new = DTRACESPEC_INACTIVE
;
2359 rv
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new);
2360 ASSERT(rv
== current
);
2361 spec
->dtsp_cleaning
= 0;
2366 * Called as part of a speculate() to get the speculative buffer associated
2367 * with a given speculation. Returns NULL if the specified speculation is not
2368 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2369 * the active CPU is not the specified CPU -- the speculation will be
2370 * atomically transitioned into the ACTIVEMANY state.
2372 static dtrace_buffer_t
*
2373 dtrace_speculation_buffer(dtrace_state_t
*state
, processorid_t cpuid
,
2374 dtrace_specid_t which
)
2376 dtrace_speculation_t
*spec
;
2377 dtrace_speculation_state_t current
, new;
2378 dtrace_buffer_t
*buf
;
2383 if (which
> state
->dts_nspeculations
) {
2384 cpu_core
[cpuid
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2388 spec
= &state
->dts_speculations
[which
- 1];
2389 buf
= &spec
->dtsp_buffer
[cpuid
];
2392 current
= spec
->dtsp_state
;
2395 case DTRACESPEC_INACTIVE
:
2396 case DTRACESPEC_COMMITTINGMANY
:
2397 case DTRACESPEC_DISCARDING
:
2400 case DTRACESPEC_COMMITTING
:
2401 ASSERT(buf
->dtb_offset
== 0);
2404 case DTRACESPEC_ACTIVEONE
:
2406 * This speculation is currently active on one CPU.
2407 * Check the offset in the buffer; if it's non-zero,
2408 * that CPU must be us (and we leave the state alone).
2409 * If it's zero, assume that we're starting on a new
2410 * CPU -- and change the state to indicate that the
2411 * speculation is active on more than one CPU.
2413 if (buf
->dtb_offset
!= 0)
2416 new = DTRACESPEC_ACTIVEMANY
;
2419 case DTRACESPEC_ACTIVEMANY
:
2422 case DTRACESPEC_ACTIVE
:
2423 new = DTRACESPEC_ACTIVEONE
;
2429 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2430 current
, new) != current
);
2432 ASSERT(new == DTRACESPEC_ACTIVEONE
|| new == DTRACESPEC_ACTIVEMANY
);
2437 * This function implements the DIF emulator's variable lookups. The emulator
2438 * passes a reserved variable identifier and optional built-in array index.
2441 dtrace_dif_variable(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
, uint64_t v
,
2445 * If we're accessing one of the uncached arguments, we'll turn this
2446 * into a reference in the args array.
2448 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
) {
2449 ndx
= v
- DIF_VAR_ARG0
;
2455 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_ARGS
);
2456 if (ndx
>= sizeof (mstate
->dtms_arg
) /
2457 sizeof (mstate
->dtms_arg
[0])) {
2458 #if !defined(__APPLE__)
2459 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2461 /* Account for introduction of __dtrace_probe() on xnu. */
2462 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2463 #endif /* __APPLE__ */
2464 dtrace_provider_t
*pv
;
2467 pv
= mstate
->dtms_probe
->dtpr_provider
;
2468 if (pv
->dtpv_pops
.dtps_getargval
!= NULL
)
2469 val
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
,
2470 mstate
->dtms_probe
->dtpr_id
,
2471 mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
);
2472 #if defined(__APPLE__)
2473 /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */
2474 else if (mstate
->dtms_probe
->dtpr_id
== dtrace_probeid_error
&& ndx
== 5) {
2475 return ((dtrace_state_t
*)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
;
2477 #endif /* __APPLE__ */
2479 val
= dtrace_getarg(ndx
, aframes
);
2482 * This is regrettably required to keep the compiler
2483 * from tail-optimizing the call to dtrace_getarg().
2484 * The condition always evaluates to true, but the
2485 * compiler has no way of figuring that out a priori.
2486 * (None of this would be necessary if the compiler
2487 * could be relied upon to _always_ tail-optimize
2488 * the call to dtrace_getarg() -- but it can't.)
2490 if (mstate
->dtms_probe
!= NULL
)
2496 return (mstate
->dtms_arg
[ndx
]);
2498 #if !defined(__APPLE__)
2499 case DIF_VAR_UREGS
: {
2502 if (!dtrace_priv_proc(state
))
2505 if ((lwp
= curthread
->t_lwp
) == NULL
) {
2506 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2507 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= NULL
;
2511 return (dtrace_getreg(lwp
->lwp_regs
, ndx
));
2514 case DIF_VAR_UREGS
: {
2517 if (!dtrace_priv_proc(state
))
2520 if ((thread
= current_thread()) == NULL
) {
2521 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2522 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= 0;
2526 return (dtrace_getreg(find_user_regs(thread
), ndx
));
2528 #endif /* __APPLE__ */
2530 #if !defined(__APPLE__)
2531 case DIF_VAR_CURTHREAD
:
2532 if (!dtrace_priv_kernel(state
))
2534 return ((uint64_t)(uintptr_t)curthread
);
2536 case DIF_VAR_CURTHREAD
:
2537 if (!dtrace_priv_kernel(state
))
2540 return ((uint64_t)(uintptr_t)current_thread());
2541 #endif /* __APPLE__ */
2543 case DIF_VAR_TIMESTAMP
:
2544 if (!(mstate
->dtms_present
& DTRACE_MSTATE_TIMESTAMP
)) {
2545 mstate
->dtms_timestamp
= dtrace_gethrtime();
2546 mstate
->dtms_present
|= DTRACE_MSTATE_TIMESTAMP
;
2548 return (mstate
->dtms_timestamp
);
2550 #if !defined(__APPLE__)
2551 case DIF_VAR_VTIMESTAMP
:
2552 ASSERT(dtrace_vtime_references
!= 0);
2553 return (curthread
->t_dtrace_vtime
);
2555 case DIF_VAR_VTIMESTAMP
:
2556 ASSERT(dtrace_vtime_references
!= 0);
2557 return (dtrace_get_thread_vtime(current_thread()));
2558 #endif /* __APPLE__ */
2560 case DIF_VAR_WALLTIMESTAMP
:
2561 if (!(mstate
->dtms_present
& DTRACE_MSTATE_WALLTIMESTAMP
)) {
2562 mstate
->dtms_walltimestamp
= dtrace_gethrestime();
2563 mstate
->dtms_present
|= DTRACE_MSTATE_WALLTIMESTAMP
;
2565 return (mstate
->dtms_walltimestamp
);
2568 if (!dtrace_priv_kernel(state
))
2570 if (!(mstate
->dtms_present
& DTRACE_MSTATE_IPL
)) {
2571 mstate
->dtms_ipl
= dtrace_getipl();
2572 mstate
->dtms_present
|= DTRACE_MSTATE_IPL
;
2574 return (mstate
->dtms_ipl
);
2577 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_EPID
);
2578 return (mstate
->dtms_epid
);
2581 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2582 return (mstate
->dtms_probe
->dtpr_id
);
2584 case DIF_VAR_STACKDEPTH
:
2585 if (!dtrace_priv_kernel(state
))
2587 if (!(mstate
->dtms_present
& DTRACE_MSTATE_STACKDEPTH
)) {
2588 #if !defined(__APPLE__)
2589 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2591 /* Account for introduction of __dtrace_probe() on xnu. */
2592 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2593 #endif /* __APPLE__ */
2595 mstate
->dtms_stackdepth
= dtrace_getstackdepth(aframes
);
2596 mstate
->dtms_present
|= DTRACE_MSTATE_STACKDEPTH
;
2598 return (mstate
->dtms_stackdepth
);
2600 case DIF_VAR_USTACKDEPTH
:
2601 if (!dtrace_priv_proc(state
))
2603 if (!(mstate
->dtms_present
& DTRACE_MSTATE_USTACKDEPTH
)) {
2605 * See comment in DIF_VAR_PID.
2607 if (DTRACE_ANCHORED(mstate
->dtms_probe
) &&
2609 mstate
->dtms_ustackdepth
= 0;
2611 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
2612 mstate
->dtms_ustackdepth
=
2613 dtrace_getustackdepth();
2614 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
2616 mstate
->dtms_present
|= DTRACE_MSTATE_USTACKDEPTH
;
2618 return (mstate
->dtms_ustackdepth
);
2620 case DIF_VAR_CALLER
:
2621 if (!dtrace_priv_kernel(state
))
2623 if (!(mstate
->dtms_present
& DTRACE_MSTATE_CALLER
)) {
2624 #if !defined(__APPLE__)
2625 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2627 /* Account for introduction of __dtrace_probe() on xnu. */
2628 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2629 #endif /* __APPLE__ */
2631 if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) {
2633 * If this is an unanchored probe, we are
2634 * required to go through the slow path:
2635 * dtrace_caller() only guarantees correct
2636 * results for anchored probes.
2640 dtrace_getpcstack(caller
, 2, aframes
,
2641 (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]);
2642 mstate
->dtms_caller
= caller
[1];
2643 } else if ((mstate
->dtms_caller
=
2644 dtrace_caller(aframes
)) == -1) {
2646 * We have failed to do this the quick way;
2647 * we must resort to the slower approach of
2648 * calling dtrace_getpcstack().
2652 dtrace_getpcstack(&caller
, 1, aframes
, NULL
);
2653 mstate
->dtms_caller
= caller
;
2656 mstate
->dtms_present
|= DTRACE_MSTATE_CALLER
;
2658 return (mstate
->dtms_caller
);
2660 case DIF_VAR_UCALLER
:
2661 if (!dtrace_priv_proc(state
))
2664 if (!(mstate
->dtms_present
& DTRACE_MSTATE_UCALLER
)) {
2668 * dtrace_getupcstack() fills in the first uint64_t
2669 * with the current PID. The second uint64_t will
2670 * be the program counter at user-level. The third
2671 * uint64_t will contain the caller, which is what
2675 dtrace_getupcstack(ustack
, 3);
2676 mstate
->dtms_ucaller
= ustack
[2];
2677 mstate
->dtms_present
|= DTRACE_MSTATE_UCALLER
;
2680 return (mstate
->dtms_ucaller
);
2682 case DIF_VAR_PROBEPROV
:
2683 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2684 return ((uint64_t)(uintptr_t)
2685 mstate
->dtms_probe
->dtpr_provider
->dtpv_name
);
2687 case DIF_VAR_PROBEMOD
:
2688 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2689 return ((uint64_t)(uintptr_t)
2690 mstate
->dtms_probe
->dtpr_mod
);
2692 case DIF_VAR_PROBEFUNC
:
2693 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2694 return ((uint64_t)(uintptr_t)
2695 mstate
->dtms_probe
->dtpr_func
);
2697 case DIF_VAR_PROBENAME
:
2698 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2699 return ((uint64_t)(uintptr_t)
2700 mstate
->dtms_probe
->dtpr_name
);
2702 #if !defined(__APPLE__)
2704 if (!dtrace_priv_proc(state
))
2708 * Note that we are assuming that an unanchored probe is
2709 * always due to a high-level interrupt. (And we're assuming
2710 * that there is only a single high level interrupt.)
2712 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2713 return (pid0
.pid_id
);
2716 * It is always safe to dereference one's own t_procp pointer:
2717 * it always points to a valid, allocated proc structure.
2718 * Further, it is always safe to dereference the p_pidp member
2719 * of one's own proc structure. (These are truisms becuase
2720 * threads and processes don't clean up their own state --
2721 * they leave that task to whomever reaps them.)
2723 return ((uint64_t)curthread
->t_procp
->p_pidp
->pid_id
);
2727 if (!dtrace_priv_proc_relaxed(state
))
2731 * Note that we are assuming that an unanchored probe is
2732 * always due to a high-level interrupt. (And we're assuming
2733 * that there is only a single high level interrupt.)
2735 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2736 /* Anchored probe that fires while on an interrupt accrues to process 0 */
2739 return ((uint64_t)proc_selfpid());
2740 #endif /* __APPLE__ */
2742 #if !defined(__APPLE__)
2744 if (!dtrace_priv_proc(state
))
2748 * See comment in DIF_VAR_PID.
2750 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2751 return (pid0
.pid_id
);
2753 return ((uint64_t)curthread
->t_procp
->p_ppid
);
2756 if (!dtrace_priv_proc_relaxed(state
))
2760 * See comment in DIF_VAR_PID.
2762 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2765 return ((uint64_t)(uintptr_t)(current_proc()->p_ppid
));
2766 #endif /* __APPLE__ */
2768 #if !defined(__APPLE__)
2771 * See comment in DIF_VAR_PID.
2773 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2776 return ((uint64_t)curthread
->t_tid
);
2780 * See comment in DIF_VAR_PID.
2782 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2785 return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */
2786 #endif /* __APPLE__ */
2788 #if !defined(__APPLE__)
2789 case DIF_VAR_EXECNAME
:
2790 if (!dtrace_priv_proc(state
))
2794 * See comment in DIF_VAR_PID.
2796 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2797 return ((uint64_t)(uintptr_t)p0
.p_user
.u_comm
);
2800 * It is always safe to dereference one's own t_procp pointer:
2801 * it always points to a valid, allocated proc structure.
2802 * (This is true because threads don't clean up their own
2803 * state -- they leave that task to whomever reaps them.)
2805 return ((uint64_t)(uintptr_t)
2806 curthread
->t_procp
->p_user
.u_comm
);
2808 case DIF_VAR_EXECNAME
:
2810 char *xname
= (char *)mstate
->dtms_scratch_ptr
;
2811 size_t scratch_size
= MAXCOMLEN
+1;
2813 /* The scratch allocation's lifetime is that of the clause. */
2814 if (mstate
->dtms_scratch_ptr
+ scratch_size
>
2815 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
)
2818 if (!dtrace_priv_proc_relaxed(state
))
2821 mstate
->dtms_scratch_ptr
+= scratch_size
;
2822 proc_selfname( xname
, MAXCOMLEN
);
2824 return ((uint64_t)(uintptr_t)xname
);
2826 #endif /* __APPLE__ */
2827 #if !defined(__APPLE__)
2828 case DIF_VAR_ZONENAME
:
2829 if (!dtrace_priv_proc(state
))
2833 * See comment in DIF_VAR_PID.
2835 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2836 return ((uint64_t)(uintptr_t)p0
.p_zone
->zone_name
);
2839 * It is always safe to dereference one's own t_procp pointer:
2840 * it always points to a valid, allocated proc structure.
2841 * (This is true because threads don't clean up their own
2842 * state -- they leave that task to whomever reaps them.)
2844 return ((uint64_t)(uintptr_t)
2845 curthread
->t_procp
->p_zone
->zone_name
);
2848 case DIF_VAR_ZONENAME
:
2849 if (!dtrace_priv_proc(state
))
2852 return ((uint64_t)(uintptr_t)NULL
); /* Darwin doesn't do "zones" */
2853 #endif /* __APPLE__ */
2855 #if !defined(__APPLE__)
2857 if (!dtrace_priv_proc(state
))
2861 * See comment in DIF_VAR_PID.
2863 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2864 return ((uint64_t)p0
.p_cred
->cr_uid
);
2866 return ((uint64_t)curthread
->t_cred
->cr_uid
);
2869 if (!dtrace_priv_proc(state
))
2873 * See comment in DIF_VAR_PID.
2875 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2878 if (dtrace_CRED() != NULL
)
2879 return ((uint64_t)kauth_getuid());
2882 #endif /* __APPLE__ */
2884 #if !defined(__APPLE__)
2886 if (!dtrace_priv_proc(state
))
2890 * See comment in DIF_VAR_PID.
2892 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2893 return ((uint64_t)p0
.p_cred
->cr_gid
);
2895 return ((uint64_t)curthread
->t_cred
->cr_gid
);
2898 if (!dtrace_priv_proc(state
))
2902 * See comment in DIF_VAR_PID.
2904 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2907 if (dtrace_CRED() != NULL
)
2908 return ((uint64_t)kauth_getgid());
2911 #endif /* __APPLE__ */
2913 #if !defined(__APPLE__)
2914 case DIF_VAR_ERRNO
: {
2916 if (!dtrace_priv_proc(state
))
2920 * See comment in DIF_VAR_PID.
2922 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2925 if ((lwp
= curthread
->t_lwp
) == NULL
)
2928 return ((uint64_t)lwp
->lwp_errno
);
2931 case DIF_VAR_ERRNO
: {
2932 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
2933 if (!dtrace_priv_proc(state
))
2937 * See comment in DIF_VAR_PID.
2939 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2942 return (uthread
? uthread
->t_dtrace_errno
: -1);
2944 #endif /* __APPLE__ */
2947 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
2953 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
2954 * Notice that we don't bother validating the proper number of arguments or
2955 * their types in the tuple stack. This isn't needed because all argument
2956 * interpretation is safe because of our load safety -- the worst that can
2957 * happen is that a bogus program can obtain bogus results.
2960 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
,
2961 dtrace_key_t
*tupregs
, int nargs
,
2962 dtrace_mstate_t
*mstate
, dtrace_state_t
*state
)
2964 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
2965 #if !defined(__APPLE__)
2966 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
2968 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
2969 #endif /* __APPLE__ */
2971 #if !defined(__APPLE__)
2982 /* XXX awaits lock/mutex work */
2983 #endif /* __APPLE__ */
2987 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
2990 #if !defined(__APPLE__)
2991 case DIF_SUBR_MUTEX_OWNED
:
2992 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
2993 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
))
2994 regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
;
2996 regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
);
2999 case DIF_SUBR_MUTEX_OWNER
:
3000 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3001 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) &&
3002 MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
)
3003 regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
);
3008 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
:
3009 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3010 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
);
3013 case DIF_SUBR_MUTEX_TYPE_SPIN
:
3014 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3015 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
);
3018 case DIF_SUBR_RW_READ_HELD
: {
3021 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3022 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
);
3026 case DIF_SUBR_RW_WRITE_HELD
:
3027 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3028 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
);
3031 case DIF_SUBR_RW_ISWRITER
:
3032 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3033 regs
[rd
] = _RW_ISWRITER(&r
.ri
);
3036 /* XXX awaits lock/mutex work */
3037 #endif /* __APPLE__ */
3039 case DIF_SUBR_BCOPY
: {
3041 * We need to be sure that the destination is in the scratch
3042 * region -- no other region is allowed.
3044 uintptr_t src
= tupregs
[0].dttk_value
;
3045 uintptr_t dest
= tupregs
[1].dttk_value
;
3046 size_t size
= tupregs
[2].dttk_value
;
3048 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3049 *flags
|= CPU_DTRACE_BADADDR
;
3054 dtrace_bcopy((void *)src
, (void *)dest
, size
);
3058 case DIF_SUBR_ALLOCA
:
3059 case DIF_SUBR_COPYIN
: {
3060 uintptr_t dest
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
3062 tupregs
[subr
== DIF_SUBR_ALLOCA
? 0 : 1].dttk_value
;
3063 size_t scratch_size
= (dest
- mstate
->dtms_scratch_ptr
) + size
;
3066 * This action doesn't require any credential checks since
3067 * probes will not activate in user contexts to which the
3068 * enabling user does not have permissions.
3070 if (mstate
->dtms_scratch_ptr
+ scratch_size
>
3071 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3072 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3077 if (subr
== DIF_SUBR_COPYIN
) {
3078 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3079 #if !defined(__APPLE__)
3080 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3082 if (dtrace_priv_proc(state
))
3083 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3084 #endif /* __APPLE__ */
3085 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3088 mstate
->dtms_scratch_ptr
+= scratch_size
;
3093 case DIF_SUBR_COPYINTO
: {
3094 uint64_t size
= tupregs
[1].dttk_value
;
3095 uintptr_t dest
= tupregs
[2].dttk_value
;
3098 * This action doesn't require any credential checks since
3099 * probes will not activate in user contexts to which the
3100 * enabling user does not have permissions.
3102 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3103 *flags
|= CPU_DTRACE_BADADDR
;
3108 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3109 #if !defined(__APPLE__)
3110 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3112 if (dtrace_priv_proc(state
))
3113 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3114 #endif /* __APPLE__ */
3115 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3119 case DIF_SUBR_COPYINSTR
: {
3120 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3121 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3123 if (nargs
> 1 && tupregs
[1].dttk_value
< size
)
3124 size
= tupregs
[1].dttk_value
+ 1;
3127 * This action doesn't require any credential checks since
3128 * probes will not activate in user contexts to which the
3129 * enabling user does not have permissions.
3131 if (mstate
->dtms_scratch_ptr
+ size
>
3132 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3133 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3138 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3139 #if !defined(__APPLE__)
3140 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
);
3142 if (dtrace_priv_proc(state
))
3143 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
);
3144 #endif /* __APPLE__ */
3145 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3147 ((char *)dest
)[size
- 1] = '\0';
3148 mstate
->dtms_scratch_ptr
+= size
;
3153 #if !defined(__APPLE__)
3154 case DIF_SUBR_MSGSIZE
:
3155 case DIF_SUBR_MSGDSIZE
: {
3156 uintptr_t baddr
= tupregs
[0].dttk_value
, daddr
;
3157 uintptr_t wptr
, rptr
;
3161 while (baddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3162 wptr
= dtrace_loadptr(baddr
+
3163 offsetof(mblk_t
, b_wptr
));
3165 rptr
= dtrace_loadptr(baddr
+
3166 offsetof(mblk_t
, b_rptr
));
3169 *flags
|= CPU_DTRACE_BADADDR
;
3170 *illval
= tupregs
[0].dttk_value
;
3174 daddr
= dtrace_loadptr(baddr
+
3175 offsetof(mblk_t
, b_datap
));
3177 baddr
= dtrace_loadptr(baddr
+
3178 offsetof(mblk_t
, b_cont
));
3181 * We want to prevent against denial-of-service here,
3182 * so we're only going to search the list for
3183 * dtrace_msgdsize_max mblks.
3185 if (cont
++ > dtrace_msgdsize_max
) {
3186 *flags
|= CPU_DTRACE_ILLOP
;
3190 if (subr
== DIF_SUBR_MSGDSIZE
) {
3191 if (dtrace_load8(daddr
+
3192 offsetof(dblk_t
, db_type
)) != M_DATA
)
3196 count
+= wptr
- rptr
;
3199 if (!(*flags
& CPU_DTRACE_FAULT
))
3205 case DIF_SUBR_MSGSIZE
:
3206 case DIF_SUBR_MSGDSIZE
: {
3207 /* Darwin does not implement SysV streams messages */
3211 #endif /* __APPLE__ */
3213 #if !defined(__APPLE__)
3214 case DIF_SUBR_PROGENYOF
: {
3215 pid_t pid
= tupregs
[0].dttk_value
;
3219 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3221 for (p
= curthread
->t_procp
; p
!= NULL
; p
= p
->p_parent
) {
3222 if (p
->p_pidp
->pid_id
== pid
) {
3228 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3234 case DIF_SUBR_PROGENYOF
: {
3235 pid_t pid
= tupregs
[0].dttk_value
;
3236 struct proc
*p
= current_proc();
3237 int rval
= 0, lim
= nprocs
;
3239 while(p
&& (lim
-- > 0)) {
3242 ppid
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
));
3243 if (*flags
& CPU_DTRACE_FAULT
)
3252 break; /* Can't climb process tree any further. */
3254 p
= (struct proc
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
));
3255 if (*flags
& CPU_DTRACE_FAULT
)
3262 #endif /* __APPLE__ */
3264 case DIF_SUBR_SPECULATION
:
3265 regs
[rd
] = dtrace_speculation(state
);
3268 #if !defined(__APPLE__)
3269 case DIF_SUBR_COPYOUT
: {
3270 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3271 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3272 uint64_t size
= tupregs
[2].dttk_value
;
3274 if (!dtrace_destructive_disallow
&&
3275 dtrace_priv_proc_control(state
) &&
3276 !dtrace_istoxic(kaddr
, size
)) {
3277 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3278 dtrace_copyout(kaddr
, uaddr
, size
);
3279 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3284 case DIF_SUBR_COPYOUTSTR
: {
3285 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3286 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3287 uint64_t size
= tupregs
[2].dttk_value
;
3289 if (!dtrace_destructive_disallow
&&
3290 dtrace_priv_proc_control(state
) &&
3291 !dtrace_istoxic(kaddr
, size
)) {
3292 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3293 dtrace_copyoutstr(kaddr
, uaddr
, size
);
3294 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3299 case DIF_SUBR_COPYOUT
: {
3300 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3301 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3302 uint64_t size
= tupregs
[2].dttk_value
;
3304 if (!dtrace_destructive_disallow
&&
3305 dtrace_priv_proc_control(state
) &&
3306 !dtrace_istoxic(kaddr
, size
)) {
3307 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3308 dtrace_copyout(kaddr
, uaddr
, size
);
3309 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3314 case DIF_SUBR_COPYOUTSTR
: {
3315 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3316 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3317 uint64_t size
= tupregs
[2].dttk_value
;
3319 if (!dtrace_destructive_disallow
&&
3320 dtrace_priv_proc_control(state
) &&
3321 !dtrace_istoxic(kaddr
, size
)) {
3322 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3323 dtrace_copyoutstr(kaddr
, uaddr
, size
);
3324 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3328 #endif /* __APPLE__ */
3330 case DIF_SUBR_STRLEN
:
3331 regs
[rd
] = dtrace_strlen((char *)(uintptr_t)
3332 tupregs
[0].dttk_value
,
3333 state
->dts_options
[DTRACEOPT_STRSIZE
]);
3336 case DIF_SUBR_STRCHR
:
3337 case DIF_SUBR_STRRCHR
: {
3339 * We're going to iterate over the string looking for the
3340 * specified character. We will iterate until we have reached
3341 * the string length or we have found the character. If this
3342 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3343 * of the specified character instead of the first.
3345 uintptr_t addr
= tupregs
[0].dttk_value
;
3346 uintptr_t limit
= addr
+ state
->dts_options
[DTRACEOPT_STRSIZE
];
3347 char c
, target
= (char)tupregs
[1].dttk_value
;
3349 for (regs
[rd
] = NULL
; addr
< limit
; addr
++) {
3350 if ((c
= dtrace_load8(addr
)) == target
) {
3353 if (subr
== DIF_SUBR_STRCHR
)
3364 case DIF_SUBR_STRSTR
:
3365 case DIF_SUBR_INDEX
:
3366 case DIF_SUBR_RINDEX
: {
3368 * We're going to iterate over the string looking for the
3369 * specified string. We will iterate until we have reached
3370 * the string length or we have found the string. (Yes, this
3371 * is done in the most naive way possible -- but considering
3372 * that the string we're searching for is likely to be
3373 * relatively short, the complexity of Rabin-Karp or similar
3374 * hardly seems merited.)
3376 char *addr
= (char *)(uintptr_t)tupregs
[0].dttk_value
;
3377 char *substr
= (char *)(uintptr_t)tupregs
[1].dttk_value
;
3378 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3379 size_t len
= dtrace_strlen(addr
, size
);
3380 size_t sublen
= dtrace_strlen(substr
, size
);
3381 char *limit
= addr
+ len
, *orig
= addr
;
3382 int notfound
= subr
== DIF_SUBR_STRSTR
? 0 : -1;
3385 regs
[rd
] = notfound
;
3388 * strstr() and index()/rindex() have similar semantics if
3389 * both strings are the empty string: strstr() returns a
3390 * pointer to the (empty) string, and index() and rindex()
3391 * both return index 0 (regardless of any position argument).
3393 if (sublen
== 0 && len
== 0) {
3394 if (subr
== DIF_SUBR_STRSTR
)
3395 regs
[rd
] = (uintptr_t)addr
;
3401 if (subr
!= DIF_SUBR_STRSTR
) {
3402 if (subr
== DIF_SUBR_RINDEX
) {
3409 * Both index() and rindex() take an optional position
3410 * argument that denotes the starting position.
3413 int64_t pos
= (int64_t)tupregs
[2].dttk_value
;
3416 * If the position argument to index() is
3417 * negative, Perl implicitly clamps it at
3418 * zero. This semantic is a little surprising
3419 * given the special meaning of negative
3420 * positions to similar Perl functions like
3421 * substr(), but it appears to reflect a
3422 * notion that index() can start from a
3423 * negative index and increment its way up to
3424 * the string. Given this notion, Perl's
3425 * rindex() is at least self-consistent in
3426 * that it implicitly clamps positions greater
3427 * than the string length to be the string
3428 * length. Where Perl completely loses
3429 * coherence, however, is when the specified
3430 * substring is the empty string (""). In
3431 * this case, even if the position is
3432 * negative, rindex() returns 0 -- and even if
3433 * the position is greater than the length,
3434 * index() returns the string length. These
3435 * semantics violate the notion that index()
3436 * should never return a value less than the
3437 * specified position and that rindex() should
3438 * never return a value greater than the
3439 * specified position. (One assumes that
3440 * these semantics are artifacts of Perl's
3441 * implementation and not the results of
3442 * deliberate design -- it beggars belief that
3443 * even Larry Wall could desire such oddness.)
3444 * While in the abstract one would wish for
3445 * consistent position semantics across
3446 * substr(), index() and rindex() -- or at the
3447 * very least self-consistent position
3448 * semantics for index() and rindex() -- we
3449 * instead opt to keep with the extant Perl
3450 * semantics, in all their broken glory. (Do
3451 * we have more desire to maintain Perl's
3452 * semantics than Perl does? Probably.)
3454 if (subr
== DIF_SUBR_RINDEX
) {
3478 for (regs
[rd
] = notfound
; addr
!= limit
; addr
+= inc
) {
3479 if (dtrace_strncmp(addr
, substr
, sublen
) == 0) {
3480 if (subr
!= DIF_SUBR_STRSTR
) {
3482 * As D index() and rindex() are
3483 * modeled on Perl (and not on awk),
3484 * we return a zero-based (and not a
3485 * one-based) index. (For you Perl
3486 * weenies: no, we're not going to add
3487 * $[ -- and shouldn't you be at a con
3490 regs
[rd
] = (uintptr_t)(addr
- orig
);
3494 ASSERT(subr
== DIF_SUBR_STRSTR
);
3495 regs
[rd
] = (uintptr_t)addr
;
3503 case DIF_SUBR_STRTOK
: {
3504 uintptr_t addr
= tupregs
[0].dttk_value
;
3505 uintptr_t tokaddr
= tupregs
[1].dttk_value
;
3506 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3507 uintptr_t limit
, toklimit
= tokaddr
+ size
;
3508 uint8_t c
, tokmap
[32]; /* 256 / 8 */
3509 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3512 if (mstate
->dtms_scratch_ptr
+ size
>
3513 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3514 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3521 * If the address specified is NULL, we use our saved
3522 * strtok pointer from the mstate. Note that this
3523 * means that the saved strtok pointer is _only_
3524 * valid within multiple enablings of the same probe --
3525 * it behaves like an implicit clause-local variable.
3527 addr
= mstate
->dtms_strtok
;
3531 * First, zero the token map, and then process the token
3532 * string -- setting a bit in the map for every character
3533 * found in the token string.
3535 for (i
= 0; i
< sizeof (tokmap
); i
++)
3538 for (; tokaddr
< toklimit
; tokaddr
++) {
3539 if ((c
= dtrace_load8(tokaddr
)) == '\0')
3542 ASSERT((c
>> 3) < sizeof (tokmap
));
3543 tokmap
[c
>> 3] |= (1 << (c
& 0x7));
3546 for (limit
= addr
+ size
; addr
< limit
; addr
++) {
3548 * We're looking for a character that is _not_ contained
3549 * in the token string.
3551 if ((c
= dtrace_load8(addr
)) == '\0')
3554 if (!(tokmap
[c
>> 3] & (1 << (c
& 0x7))))
3560 * We reached the end of the string without finding
3561 * any character that was not in the token string.
3562 * We return NULL in this case, and we set the saved
3563 * address to NULL as well.
3566 mstate
->dtms_strtok
= NULL
;
3571 * From here on, we're copying into the destination string.
3573 for (i
= 0; addr
< limit
&& i
< size
- 1; addr
++) {
3574 if ((c
= dtrace_load8(addr
)) == '\0')
3577 if (tokmap
[c
>> 3] & (1 << (c
& 0x7)))
3586 regs
[rd
] = (uintptr_t)dest
;
3587 mstate
->dtms_scratch_ptr
+= size
;
3588 mstate
->dtms_strtok
= addr
;
3592 case DIF_SUBR_SUBSTR
: {
3593 uintptr_t s
= tupregs
[0].dttk_value
;
3594 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3595 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3596 int64_t index
= (int64_t)tupregs
[1].dttk_value
;
3597 int64_t remaining
= (int64_t)tupregs
[2].dttk_value
;
3598 size_t len
= dtrace_strlen((char *)s
, size
);
3602 remaining
= (int64_t)size
;
3604 if (mstate
->dtms_scratch_ptr
+ size
>
3605 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3606 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3614 if (index
< 0 && index
+ remaining
> 0) {
3620 if (index
>= len
|| index
< 0)
3623 for (d
[0] = '\0'; remaining
> 0; remaining
--) {
3624 if ((d
[i
++] = dtrace_load8(s
++ + index
)) == '\0')
3633 mstate
->dtms_scratch_ptr
+= size
;
3634 regs
[rd
] = (uintptr_t)d
;
3638 #if !defined(__APPLE__)
3639 case DIF_SUBR_GETMAJOR
:
3641 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR64
) & MAXMAJ64
;
3643 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR
) & MAXMAJ
;
3647 #else /* __APPLE__ */
3648 case DIF_SUBR_GETMAJOR
:
3649 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value
);
3651 #endif /* __APPLE__ */
3653 #if !defined(__APPLE__)
3654 case DIF_SUBR_GETMINOR
:
3656 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN64
;
3658 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN
;
3662 #else /* __APPLE__ */
3663 case DIF_SUBR_GETMINOR
:
3664 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value
);
3666 #endif /* __APPLE__ */
3668 #if !defined(__APPLE__)
3669 case DIF_SUBR_DDI_PATHNAME
: {
3671 * This one is a galactic mess. We are going to roughly
3672 * emulate ddi_pathname(), but it's made more complicated
3673 * by the fact that we (a) want to include the minor name and
3674 * (b) must proceed iteratively instead of recursively.
3676 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3677 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3678 char *start
= (char *)dest
, *end
= start
+ size
- 1;
3679 uintptr_t daddr
= tupregs
[0].dttk_value
;
3680 int64_t minor
= (int64_t)tupregs
[1].dttk_value
;
3682 int i
, len
, depth
= 0;
3684 if (size
== 0 || mstate
->dtms_scratch_ptr
+ size
>
3685 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3686 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3694 * We want to have a name for the minor. In order to do this,
3695 * we need to walk the minor list from the devinfo. We want
3696 * to be sure that we don't infinitely walk a circular list,
3697 * so we check for circularity by sending a scout pointer
3698 * ahead two elements for every element that we iterate over;
3699 * if the list is circular, these will ultimately point to the
3700 * same element. You may recognize this little trick as the
3701 * answer to a stupid interview question -- one that always
3702 * seems to be asked by those who had to have it laboriously
3703 * explained to them, and who can't even concisely describe
3704 * the conditions under which one would be forced to resort to
3705 * this technique. Needless to say, those conditions are
3706 * found here -- and probably only here. Is this is the only
3707 * use of this infamous trick in shipping, production code?
3708 * If it isn't, it probably should be...
3711 uintptr_t maddr
= dtrace_loadptr(daddr
+
3712 offsetof(struct dev_info
, devi_minor
));
3714 uintptr_t next
= offsetof(struct ddi_minor_data
, next
);
3715 uintptr_t name
= offsetof(struct ddi_minor_data
,
3716 d_minor
) + offsetof(struct ddi_minor
, name
);
3717 uintptr_t dev
= offsetof(struct ddi_minor_data
,
3718 d_minor
) + offsetof(struct ddi_minor
, dev
);
3722 scout
= dtrace_loadptr(maddr
+ next
);
3724 while (maddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3727 m
= dtrace_load64(maddr
+ dev
) & MAXMIN64
;
3729 m
= dtrace_load32(maddr
+ dev
) & MAXMIN
;
3732 maddr
= dtrace_loadptr(maddr
+ next
);
3737 scout
= dtrace_loadptr(scout
+ next
);
3742 scout
= dtrace_loadptr(scout
+ next
);
3747 if (scout
== maddr
) {
3748 *flags
|= CPU_DTRACE_ILLOP
;
3756 * We have the minor data. Now we need to
3757 * copy the minor's name into the end of the
3760 s
= (char *)dtrace_loadptr(maddr
+ name
);
3761 len
= dtrace_strlen(s
, size
);
3763 if (*flags
& CPU_DTRACE_FAULT
)
3767 if ((end
-= (len
+ 1)) < start
)
3773 for (i
= 1; i
<= len
; i
++)
3774 end
[i
] = dtrace_load8((uintptr_t)s
++);
3779 while (daddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3780 ddi_node_state_t devi_state
;
3782 devi_state
= dtrace_load32(daddr
+
3783 offsetof(struct dev_info
, devi_node_state
));
3785 if (*flags
& CPU_DTRACE_FAULT
)
3788 if (devi_state
>= DS_INITIALIZED
) {
3789 s
= (char *)dtrace_loadptr(daddr
+
3790 offsetof(struct dev_info
, devi_addr
));
3791 len
= dtrace_strlen(s
, size
);
3793 if (*flags
& CPU_DTRACE_FAULT
)
3797 if ((end
-= (len
+ 1)) < start
)
3803 for (i
= 1; i
<= len
; i
++)
3804 end
[i
] = dtrace_load8((uintptr_t)s
++);
3808 * Now for the node name...
3810 s
= (char *)dtrace_loadptr(daddr
+
3811 offsetof(struct dev_info
, devi_node_name
));
3813 daddr
= dtrace_loadptr(daddr
+
3814 offsetof(struct dev_info
, devi_parent
));
3817 * If our parent is NULL (that is, if we're the root
3818 * node), we're going to use the special path
3824 len
= dtrace_strlen(s
, size
);
3825 if (*flags
& CPU_DTRACE_FAULT
)
3828 if ((end
-= (len
+ 1)) < start
)
3831 for (i
= 1; i
<= len
; i
++)
3832 end
[i
] = dtrace_load8((uintptr_t)s
++);
3835 if (depth
++ > dtrace_devdepth_max
) {
3836 *flags
|= CPU_DTRACE_ILLOP
;
3842 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3844 if (daddr
== NULL
) {
3845 regs
[rd
] = (uintptr_t)end
;
3846 mstate
->dtms_scratch_ptr
+= size
;
3852 case DIF_SUBR_DDI_PATHNAME
: {
3853 /* XXX awaits galactic disentanglement ;-} */
3857 #endif /* __APPLE__ */
3859 case DIF_SUBR_STRJOIN
: {
3860 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3861 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3862 uintptr_t s1
= tupregs
[0].dttk_value
;
3863 uintptr_t s2
= tupregs
[1].dttk_value
;
3866 if (mstate
->dtms_scratch_ptr
+ size
>
3867 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3868 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3875 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3880 if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') {
3888 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3893 if ((d
[i
++] = dtrace_load8(s2
++)) == '\0')
3898 mstate
->dtms_scratch_ptr
+= i
;
3899 regs
[rd
] = (uintptr_t)d
;
3905 case DIF_SUBR_LLTOSTR
: {
3906 int64_t i
= (int64_t)tupregs
[0].dttk_value
;
3907 int64_t val
= i
< 0 ? i
* -1 : i
;
3908 uint64_t size
= 22; /* enough room for 2^64 in decimal */
3909 char *end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
3911 if (mstate
->dtms_scratch_ptr
+ size
>
3912 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3913 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3918 for (*end
-- = '\0'; val
; val
/= 10)
3919 *end
-- = '0' + (val
% 10);
3927 regs
[rd
] = (uintptr_t)end
+ 1;
3928 mstate
->dtms_scratch_ptr
+= size
;
3932 case DIF_SUBR_DIRNAME
:
3933 case DIF_SUBR_BASENAME
: {
3934 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3935 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3936 uintptr_t src
= tupregs
[0].dttk_value
;
3937 int i
, j
, len
= dtrace_strlen((char *)src
, size
);
3938 int lastbase
= -1, firstbase
= -1, lastdir
= -1;
3941 if (mstate
->dtms_scratch_ptr
+ size
>
3942 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3943 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3949 * The basename and dirname for a zero-length string is
3954 src
= (uintptr_t)".";
3958 * Start from the back of the string, moving back toward the
3959 * front until we see a character that isn't a slash. That
3960 * character is the last character in the basename.
3962 for (i
= len
- 1; i
>= 0; i
--) {
3963 if (dtrace_load8(src
+ i
) != '/')
3971 * Starting from the last character in the basename, move
3972 * towards the front until we find a slash. The character
3973 * that we processed immediately before that is the first
3974 * character in the basename.
3976 for (; i
>= 0; i
--) {
3977 if (dtrace_load8(src
+ i
) == '/')
3985 * Now keep going until we find a non-slash character. That
3986 * character is the last character in the dirname.
3988 for (; i
>= 0; i
--) {
3989 if (dtrace_load8(src
+ i
) != '/')
3996 ASSERT(!(lastbase
== -1 && firstbase
!= -1));
3997 ASSERT(!(firstbase
== -1 && lastdir
!= -1));
3999 if (lastbase
== -1) {
4001 * We didn't find a non-slash character. We know that
4002 * the length is non-zero, so the whole string must be
4003 * slashes. In either the dirname or the basename
4004 * case, we return '/'.
4006 ASSERT(firstbase
== -1);
4007 firstbase
= lastbase
= lastdir
= 0;
4010 if (firstbase
== -1) {
4012 * The entire string consists only of a basename
4013 * component. If we're looking for dirname, we need
4014 * to change our string to be just "."; if we're
4015 * looking for a basename, we'll just set the first
4016 * character of the basename to be 0.
4018 if (subr
== DIF_SUBR_DIRNAME
) {
4019 ASSERT(lastdir
== -1);
4020 src
= (uintptr_t)".";
4027 if (subr
== DIF_SUBR_DIRNAME
) {
4028 if (lastdir
== -1) {
4030 * We know that we have a slash in the name --
4031 * or lastdir would be set to 0, above. And
4032 * because lastdir is -1, we know that this
4033 * slash must be the first character. (That
4034 * is, the full string must be of the form
4035 * "/basename".) In this case, the last
4036 * character of the directory name is 0.
4044 ASSERT(subr
== DIF_SUBR_BASENAME
);
4045 ASSERT(firstbase
!= -1 && lastbase
!= -1);
4050 for (i
= start
, j
= 0; i
<= end
&& j
< size
- 1; i
++, j
++)
4051 dest
[j
] = dtrace_load8(src
+ i
);
4054 regs
[rd
] = (uintptr_t)dest
;
4055 mstate
->dtms_scratch_ptr
+= size
;
4059 case DIF_SUBR_CLEANPATH
: {
4060 char *dest
= (char *)mstate
->dtms_scratch_ptr
, c
;
4061 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4062 uintptr_t src
= tupregs
[0].dttk_value
;
4065 if (mstate
->dtms_scratch_ptr
+ size
>
4066 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
4067 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4073 * Move forward, loading each character.
4076 c
= dtrace_load8(src
+ i
++);
4078 if (j
+ 5 >= size
) /* 5 = strlen("/..c\0") */
4086 c
= dtrace_load8(src
+ i
++);
4090 * We have two slashes -- we can just advance
4091 * to the next character.
4098 * This is not "." and it's not ".." -- we can
4099 * just store the "/" and this character and
4107 c
= dtrace_load8(src
+ i
++);
4111 * This is a "/./" component. We're not going
4112 * to store anything in the destination buffer;
4113 * we're just going to go to the next component.
4120 * This is not ".." -- we can just store the
4121 * "/." and this character and continue
4130 c
= dtrace_load8(src
+ i
++);
4132 if (c
!= '/' && c
!= '\0') {
4134 * This is not ".." -- it's "..[mumble]".
4135 * We'll store the "/.." and this character
4136 * and continue processing.
4146 * This is "/../" or "/..\0". We need to back up
4147 * our destination pointer until we find a "/".
4150 while (j
!= 0 && dest
[--j
] != '/')
4155 } while (c
!= '\0');
4158 regs
[rd
] = (uintptr_t)dest
;
4159 mstate
->dtms_scratch_ptr
+= size
;
4164 /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */
4165 case DIF_SUBR_CHUD
: {
4166 uint64_t selector
= tupregs
[0].dttk_value
;
4167 uint64_t args
[DIF_DTR_NREGS
-1] = {0ULL};
4170 /* copy in any variadic argument list */
4171 for(ii
= 0; ii
< DIF_DTR_NREGS
-1; ii
++) {
4172 args
[ii
] = tupregs
[ii
+1].dttk_value
;
4176 chudxnu_dtrace_callback(selector
, args
, DIF_DTR_NREGS
-1);
4177 if(KERN_SUCCESS
!= ret
) {
4183 #endif /* __APPLE__ */
4189 * Emulate the execution of DTrace IR instructions specified by the given
4190 * DIF object. This function is deliberately void of assertions as all of
4191 * the necessary checks are handled by a call to dtrace_difo_validate().
4194 dtrace_dif_emulate(dtrace_difo_t
*difo
, dtrace_mstate_t
*mstate
,
4195 dtrace_vstate_t
*vstate
, dtrace_state_t
*state
)
4197 const dif_instr_t
*text
= difo
->dtdo_buf
;
4198 const uint_t textlen
= difo
->dtdo_len
;
4199 const char *strtab
= difo
->dtdo_strtab
;
4200 const uint64_t *inttab
= difo
->dtdo_inttab
;
4203 dtrace_statvar_t
*svar
;
4204 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
4206 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
4207 #if !defined(__APPLE__)
4208 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4210 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4211 #endif /* __APPLE__ */
4213 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
4214 uint64_t regs
[DIF_DIR_NREGS
];
4217 uint8_t cc_n
= 0, cc_z
= 0, cc_v
= 0, cc_c
= 0;
4219 uint_t pc
= 0, id
, opc
;
4224 regs
[DIF_REG_R0
] = 0; /* %r0 is fixed at zero */
4226 while (pc
< textlen
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4230 r1
= DIF_INSTR_R1(instr
);
4231 r2
= DIF_INSTR_R2(instr
);
4232 rd
= DIF_INSTR_RD(instr
);
4234 switch (DIF_INSTR_OP(instr
)) {
4236 regs
[rd
] = regs
[r1
] | regs
[r2
];
4239 regs
[rd
] = regs
[r1
] ^ regs
[r2
];
4242 regs
[rd
] = regs
[r1
] & regs
[r2
];
4245 regs
[rd
] = regs
[r1
] << regs
[r2
];
4248 regs
[rd
] = regs
[r1
] >> regs
[r2
];
4251 regs
[rd
] = regs
[r1
] - regs
[r2
];
4254 regs
[rd
] = regs
[r1
] + regs
[r2
];
4257 regs
[rd
] = regs
[r1
] * regs
[r2
];
4260 if (regs
[r2
] == 0) {
4262 *flags
|= CPU_DTRACE_DIVZERO
;
4264 regs
[rd
] = (int64_t)regs
[r1
] /
4270 if (regs
[r2
] == 0) {
4272 *flags
|= CPU_DTRACE_DIVZERO
;
4274 regs
[rd
] = regs
[r1
] / regs
[r2
];
4279 if (regs
[r2
] == 0) {
4281 *flags
|= CPU_DTRACE_DIVZERO
;
4283 regs
[rd
] = (int64_t)regs
[r1
] %
4289 if (regs
[r2
] == 0) {
4291 *flags
|= CPU_DTRACE_DIVZERO
;
4293 regs
[rd
] = regs
[r1
] % regs
[r2
];
4298 regs
[rd
] = ~regs
[r1
];
4301 regs
[rd
] = regs
[r1
];
4304 cc_r
= regs
[r1
] - regs
[r2
];
4308 cc_c
= regs
[r1
] < regs
[r2
];
4311 cc_n
= cc_v
= cc_c
= 0;
4312 cc_z
= regs
[r1
] == 0;
4315 pc
= DIF_INSTR_LABEL(instr
);
4319 pc
= DIF_INSTR_LABEL(instr
);
4323 pc
= DIF_INSTR_LABEL(instr
);
4326 if ((cc_z
| (cc_n
^ cc_v
)) == 0)
4327 pc
= DIF_INSTR_LABEL(instr
);
4330 if ((cc_c
| cc_z
) == 0)
4331 pc
= DIF_INSTR_LABEL(instr
);
4334 if ((cc_n
^ cc_v
) == 0)
4335 pc
= DIF_INSTR_LABEL(instr
);
4339 pc
= DIF_INSTR_LABEL(instr
);
4343 pc
= DIF_INSTR_LABEL(instr
);
4347 pc
= DIF_INSTR_LABEL(instr
);
4350 if (cc_z
| (cc_n
^ cc_v
))
4351 pc
= DIF_INSTR_LABEL(instr
);
4355 pc
= DIF_INSTR_LABEL(instr
);
4358 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4359 *flags
|= CPU_DTRACE_KPRIV
;
4365 regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]);
4368 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4369 *flags
|= CPU_DTRACE_KPRIV
;
4375 regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]);
4378 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4379 *flags
|= CPU_DTRACE_KPRIV
;
4385 regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]);
4388 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4389 *flags
|= CPU_DTRACE_KPRIV
;
4395 regs
[rd
] = dtrace_load8(regs
[r1
]);
4398 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4399 *flags
|= CPU_DTRACE_KPRIV
;
4405 regs
[rd
] = dtrace_load16(regs
[r1
]);
4408 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4409 *flags
|= CPU_DTRACE_KPRIV
;
4415 regs
[rd
] = dtrace_load32(regs
[r1
]);
4418 if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) {
4419 *flags
|= CPU_DTRACE_KPRIV
;
4425 regs
[rd
] = dtrace_load64(regs
[r1
]);
4429 dtrace_fuword8(regs
[r1
]);
4432 regs
[rd
] = (int16_t)
4433 dtrace_fuword16(regs
[r1
]);
4436 regs
[rd
] = (int32_t)
4437 dtrace_fuword32(regs
[r1
]);
4441 dtrace_fuword8(regs
[r1
]);
4445 dtrace_fuword16(regs
[r1
]);
4449 dtrace_fuword32(regs
[r1
]);
4453 dtrace_fuword64(regs
[r1
]);
4461 regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)];
4464 regs
[rd
] = (uint64_t)(uintptr_t)
4465 (strtab
+ DIF_INSTR_STRING(instr
));
4468 cc_r
= dtrace_strncmp((char *)(uintptr_t)regs
[r1
],
4469 (char *)(uintptr_t)regs
[r2
],
4470 state
->dts_options
[DTRACEOPT_STRSIZE
]);
4477 regs
[rd
] = dtrace_dif_variable(mstate
, state
,
4481 id
= DIF_INSTR_VAR(instr
);
4483 if (id
>= DIF_VAR_OTHER_UBASE
) {
4486 id
-= DIF_VAR_OTHER_UBASE
;
4487 svar
= vstate
->dtvs_globals
[id
];
4488 ASSERT(svar
!= NULL
);
4489 v
= &svar
->dtsv_var
;
4491 if (!(v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)) {
4492 regs
[rd
] = svar
->dtsv_data
;
4496 a
= (uintptr_t)svar
->dtsv_data
;
4498 if (*(uint8_t *)a
== UINT8_MAX
) {
4500 * If the 0th byte is set to UINT8_MAX
4501 * then this is to be treated as a
4502 * reference to a NULL variable.
4506 regs
[rd
] = a
+ sizeof (uint64_t);
4512 regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0);
4516 id
= DIF_INSTR_VAR(instr
);
4518 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4519 id
-= DIF_VAR_OTHER_UBASE
;
4521 svar
= vstate
->dtvs_globals
[id
];
4522 ASSERT(svar
!= NULL
);
4523 v
= &svar
->dtsv_var
;
4525 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4526 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4529 ASSERT(svar
->dtsv_size
!= 0);
4531 if (regs
[rd
] == NULL
) {
4532 *(uint8_t *)a
= UINT8_MAX
;
4536 a
+= sizeof (uint64_t);
4539 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4540 (void *)a
, &v
->dtdv_type
);
4544 svar
->dtsv_data
= regs
[rd
];
4549 * There are no DTrace built-in thread-local arrays at
4550 * present. This opcode is saved for future work.
4552 *flags
|= CPU_DTRACE_ILLOP
;
4557 id
= DIF_INSTR_VAR(instr
);
4559 if (id
< DIF_VAR_OTHER_UBASE
) {
4561 * For now, this has no meaning.
4567 id
-= DIF_VAR_OTHER_UBASE
;
4569 ASSERT(id
< vstate
->dtvs_nlocals
);
4570 ASSERT(vstate
->dtvs_locals
!= NULL
);
4572 svar
= vstate
->dtvs_locals
[id
];
4573 ASSERT(svar
!= NULL
);
4574 v
= &svar
->dtsv_var
;
4576 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4577 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4578 size_t sz
= v
->dtdv_type
.dtdt_size
;
4580 sz
+= sizeof (uint64_t);
4581 ASSERT(svar
->dtsv_size
== NCPU
* sz
);
4582 a
+= CPU
->cpu_id
* sz
;
4584 if (*(uint8_t *)a
== UINT8_MAX
) {
4586 * If the 0th byte is set to UINT8_MAX
4587 * then this is to be treated as a
4588 * reference to a NULL variable.
4592 regs
[rd
] = a
+ sizeof (uint64_t);
4598 ASSERT(svar
->dtsv_size
== NCPU
* sizeof (uint64_t));
4599 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
4600 regs
[rd
] = tmp
[CPU
->cpu_id
];
4604 id
= DIF_INSTR_VAR(instr
);
4606 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4607 id
-= DIF_VAR_OTHER_UBASE
;
4608 ASSERT(id
< vstate
->dtvs_nlocals
);
4610 ASSERT(vstate
->dtvs_locals
!= NULL
);
4611 svar
= vstate
->dtvs_locals
[id
];
4612 ASSERT(svar
!= NULL
);
4613 v
= &svar
->dtsv_var
;
4615 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4616 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4617 size_t sz
= v
->dtdv_type
.dtdt_size
;
4619 sz
+= sizeof (uint64_t);
4620 ASSERT(svar
->dtsv_size
== NCPU
* sz
);
4621 a
+= CPU
->cpu_id
* sz
;
4623 if (regs
[rd
] == NULL
) {
4624 *(uint8_t *)a
= UINT8_MAX
;
4628 a
+= sizeof (uint64_t);
4631 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4632 (void *)a
, &v
->dtdv_type
);
4636 ASSERT(svar
->dtsv_size
== NCPU
* sizeof (uint64_t));
4637 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
4638 tmp
[CPU
->cpu_id
] = regs
[rd
];
4642 dtrace_dynvar_t
*dvar
;
4645 id
= DIF_INSTR_VAR(instr
);
4646 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4647 id
-= DIF_VAR_OTHER_UBASE
;
4648 v
= &vstate
->dtvs_tlocals
[id
];
4650 key
= &tupregs
[DIF_DTR_NREGS
];
4651 key
[0].dttk_value
= (uint64_t)id
;
4652 key
[0].dttk_size
= 0;
4653 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
4654 key
[1].dttk_size
= 0;
4656 dvar
= dtrace_dynvar(dstate
, 2, key
,
4657 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
);
4664 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4665 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
4667 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
4674 dtrace_dynvar_t
*dvar
;
4677 id
= DIF_INSTR_VAR(instr
);
4678 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4679 id
-= DIF_VAR_OTHER_UBASE
;
4681 key
= &tupregs
[DIF_DTR_NREGS
];
4682 key
[0].dttk_value
= (uint64_t)id
;
4683 key
[0].dttk_size
= 0;
4684 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
4685 key
[1].dttk_size
= 0;
4686 v
= &vstate
->dtvs_tlocals
[id
];
4688 dvar
= dtrace_dynvar(dstate
, 2, key
,
4689 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4690 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4691 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
4692 DTRACE_DYNVAR_DEALLOC
);
4695 * Given that we're storing to thread-local data,
4696 * we need to flush our predicate cache.
4698 #if !defined(__APPLE__)
4699 curthread
->t_predcache
= NULL
;
4701 dtrace_set_thread_predcache(current_thread(), 0);
4702 #endif /* __APPLE__ */
4708 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4709 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4710 dvar
->dtdv_data
, &v
->dtdv_type
);
4712 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
4719 regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
];
4723 dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
,
4724 regs
, tupregs
, ttop
, mstate
, state
);
4728 if (ttop
== DIF_DTR_NREGS
) {
4729 *flags
|= CPU_DTRACE_TUPOFLOW
;
4733 if (r1
== DIF_TYPE_STRING
) {
4735 * If this is a string type and the size is 0,
4736 * we'll use the system-wide default string
4737 * size. Note that we are _not_ looking at
4738 * the value of the DTRACEOPT_STRSIZE option;
4739 * had this been set, we would expect to have
4740 * a non-zero size value in the "pushtr".
4742 tupregs
[ttop
].dttk_size
=
4743 dtrace_strlen((char *)(uintptr_t)regs
[rd
],
4744 regs
[r2
] ? regs
[r2
] :
4745 dtrace_strsize_default
) + 1;
4747 tupregs
[ttop
].dttk_size
= regs
[r2
];
4750 tupregs
[ttop
++].dttk_value
= regs
[rd
];
4754 if (ttop
== DIF_DTR_NREGS
) {
4755 *flags
|= CPU_DTRACE_TUPOFLOW
;
4759 tupregs
[ttop
].dttk_value
= regs
[rd
];
4760 tupregs
[ttop
++].dttk_size
= 0;
4768 case DIF_OP_FLUSHTS
:
4773 case DIF_OP_LDTAA
: {
4774 dtrace_dynvar_t
*dvar
;
4775 dtrace_key_t
*key
= tupregs
;
4776 uint_t nkeys
= ttop
;
4778 id
= DIF_INSTR_VAR(instr
);
4779 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4780 id
-= DIF_VAR_OTHER_UBASE
;
4782 key
[nkeys
].dttk_value
= (uint64_t)id
;
4783 key
[nkeys
++].dttk_size
= 0;
4785 if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) {
4786 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
4787 key
[nkeys
++].dttk_size
= 0;
4788 v
= &vstate
->dtvs_tlocals
[id
];
4790 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
4793 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
4794 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4795 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4796 DTRACE_DYNVAR_NOALLOC
);
4803 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4804 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
4806 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
4813 case DIF_OP_STTAA
: {
4814 dtrace_dynvar_t
*dvar
;
4815 dtrace_key_t
*key
= tupregs
;
4816 uint_t nkeys
= ttop
;
4818 id
= DIF_INSTR_VAR(instr
);
4819 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4820 id
-= DIF_VAR_OTHER_UBASE
;
4822 key
[nkeys
].dttk_value
= (uint64_t)id
;
4823 key
[nkeys
++].dttk_size
= 0;
4825 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) {
4826 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
4827 key
[nkeys
++].dttk_size
= 0;
4828 v
= &vstate
->dtvs_tlocals
[id
];
4830 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
4833 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
4834 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4835 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4836 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
4837 DTRACE_DYNVAR_DEALLOC
);
4842 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4843 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4844 dvar
->dtdv_data
, &v
->dtdv_type
);
4846 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
4852 case DIF_OP_ALLOCS
: {
4853 uintptr_t ptr
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
4854 size_t size
= ptr
- mstate
->dtms_scratch_ptr
+ regs
[r1
];
4856 if (mstate
->dtms_scratch_ptr
+ size
>
4857 mstate
->dtms_scratch_base
+
4858 mstate
->dtms_scratch_size
) {
4859 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4862 dtrace_bzero((void *)
4863 mstate
->dtms_scratch_ptr
, size
);
4864 mstate
->dtms_scratch_ptr
+= size
;
4871 if (!dtrace_canstore(regs
[rd
], regs
[r2
],
4873 *flags
|= CPU_DTRACE_BADADDR
;
4878 dtrace_bcopy((void *)(uintptr_t)regs
[r1
],
4879 (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]);
4883 if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) {
4884 *flags
|= CPU_DTRACE_BADADDR
;
4888 *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
];
4892 if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) {
4893 *flags
|= CPU_DTRACE_BADADDR
;
4898 *flags
|= CPU_DTRACE_BADALIGN
;
4902 *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
];
4906 if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) {
4907 *flags
|= CPU_DTRACE_BADADDR
;
4912 *flags
|= CPU_DTRACE_BADALIGN
;
4916 *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
];
4920 if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) {
4921 *flags
|= CPU_DTRACE_BADADDR
;
4925 #if !defined(__APPLE__)
4928 if (regs
[rd
] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */
4929 #endif /* __APPLE__ */
4930 *flags
|= CPU_DTRACE_BADALIGN
;
4934 *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
];
4939 if (!(*flags
& CPU_DTRACE_FAULT
))
4942 mstate
->dtms_fltoffs
= opc
* sizeof (dif_instr_t
);
4943 mstate
->dtms_present
|= DTRACE_MSTATE_FLTOFFS
;
4949 dtrace_action_breakpoint(dtrace_ecb_t
*ecb
)
4951 dtrace_probe_t
*probe
= ecb
->dte_probe
;
4952 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
4953 char c
[DTRACE_FULLNAMELEN
+ 80], *str
;
4954 char *msg
= "dtrace: breakpoint action at probe ";
4955 char *ecbmsg
= " (ecb ";
4956 uintptr_t mask
= (0xf << (sizeof (uintptr_t) * NBBY
/ 4));
4957 uintptr_t val
= (uintptr_t)ecb
;
4958 int shift
= (sizeof (uintptr_t) * NBBY
) - 4, i
= 0;
4960 if (dtrace_destructive_disallow
)
4964 * It's impossible to be taking action on the NULL probe.
4966 ASSERT(probe
!= NULL
);
4969 * This is a poor man's (destitute man's?) sprintf(): we want to
4970 * print the provider name, module name, function name and name of
4971 * the probe, along with the hex address of the ECB with the breakpoint
4972 * action -- all of which we must place in the character buffer by
4975 while (*msg
!= '\0')
4978 for (str
= prov
->dtpv_name
; *str
!= '\0'; str
++)
4982 for (str
= probe
->dtpr_mod
; *str
!= '\0'; str
++)
4986 for (str
= probe
->dtpr_func
; *str
!= '\0'; str
++)
4990 for (str
= probe
->dtpr_name
; *str
!= '\0'; str
++)
4993 while (*ecbmsg
!= '\0')
4996 while (shift
>= 0) {
4997 mask
= (uintptr_t)0xf << shift
;
4999 if (val
>= ((uintptr_t)1 << shift
))
5000 c
[i
++] = "0123456789abcdef"[(val
& mask
) >> shift
];
5011 dtrace_action_panic(dtrace_ecb_t
*ecb
)
5013 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5016 * It's impossible to be taking action on the NULL probe.
5018 ASSERT(probe
!= NULL
);
5020 if (dtrace_destructive_disallow
)
5023 if (dtrace_panicked
!= NULL
)
5026 #if !defined(__APPLE__)
5027 if (dtrace_casptr(&dtrace_panicked
, NULL
, curthread
) != NULL
)
5030 if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
)
5032 #endif /* __APPLE__ */
5035 * We won the right to panic. (We want to be sure that only one
5036 * thread calls panic() from dtrace_probe(), and that panic() is
5037 * called exactly once.)
5039 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5040 probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
,
5041 probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
);
5043 #if defined(__APPLE__)
5044 /* Mac OS X debug feature -- can return from panic() */
5045 dtrace_panicked
= NULL
;
5046 #endif /* __APPLE__ */
5050 dtrace_action_raise(uint64_t sig
)
5052 if (dtrace_destructive_disallow
)
5056 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5060 #if !defined(__APPLE__)
5062 * raise() has a queue depth of 1 -- we ignore all subsequent
5063 * invocations of the raise() action.
5065 if (curthread
->t_dtrace_sig
== 0)
5066 curthread
->t_dtrace_sig
= (uint8_t)sig
;
5068 curthread
->t_sig_check
= 1;
5071 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
5073 if (uthread
&& uthread
->t_dtrace_sig
== 0) {
5074 uthread
->t_dtrace_sig
= sig
;
5075 psignal(current_proc(), (int)sig
);
5077 #endif /* __APPLE__ */
5081 dtrace_action_stop(void)
5083 if (dtrace_destructive_disallow
)
5086 #if !defined(__APPLE__)
5087 if (!curthread
->t_dtrace_stop
) {
5088 curthread
->t_dtrace_stop
= 1;
5089 curthread
->t_sig_check
= 1;
5093 psignal(current_proc(), SIGSTOP
);
5094 #endif /* __APPLE__ */
5098 dtrace_action_chill(dtrace_mstate_t
*mstate
, hrtime_t val
)
5101 volatile uint16_t *flags
;
5104 if (dtrace_destructive_disallow
)
5107 flags
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
;
5109 now
= dtrace_gethrtime();
5111 if (now
- cpu
->cpu_dtrace_chillmark
> dtrace_chill_interval
) {
5113 * We need to advance the mark to the current time.
5115 cpu
->cpu_dtrace_chillmark
= now
;
5116 cpu
->cpu_dtrace_chilled
= 0;
5120 * Now check to see if the requested chill time would take us over
5121 * the maximum amount of time allowed in the chill interval. (Or
5122 * worse, if the calculation itself induces overflow.)
5124 if (cpu
->cpu_dtrace_chilled
+ val
> dtrace_chill_max
||
5125 cpu
->cpu_dtrace_chilled
+ val
< cpu
->cpu_dtrace_chilled
) {
5126 *flags
|= CPU_DTRACE_ILLOP
;
5130 while (dtrace_gethrtime() - now
< val
)
5134 * Normally, we assure that the value of the variable "timestamp" does
5135 * not change within an ECB. The presence of chill() represents an
5136 * exception to this rule, however.
5138 mstate
->dtms_present
&= ~DTRACE_MSTATE_TIMESTAMP
;
5139 cpu
->cpu_dtrace_chilled
+= val
;
5143 dtrace_action_ustack(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
,
5144 uint64_t *buf
, uint64_t arg
)
5146 int nframes
= DTRACE_USTACK_NFRAMES(arg
);
5147 int strsize
= DTRACE_USTACK_STRSIZE(arg
);
5148 uint64_t *pcs
= &buf
[1], *fps
;
5149 char *str
= (char *)&pcs
[nframes
];
5150 int size
, offs
= 0, i
, j
;
5151 uintptr_t old
= mstate
->dtms_scratch_ptr
, saved
;
5152 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
5156 * Should be taking a faster path if string space has not been
5159 ASSERT(strsize
!= 0);
5162 * We will first allocate some temporary space for the frame pointers.
5164 fps
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5165 size
= (uintptr_t)fps
- mstate
->dtms_scratch_ptr
+
5166 (nframes
* sizeof (uint64_t));
5168 if (mstate
->dtms_scratch_ptr
+ size
>
5169 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
5171 * Not enough room for our frame pointers -- need to indicate
5172 * that we ran out of scratch space.
5174 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5178 mstate
->dtms_scratch_ptr
+= size
;
5179 saved
= mstate
->dtms_scratch_ptr
;
5182 * Now get a stack with both program counters and frame pointers.
5184 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5185 dtrace_getufpstack(buf
, fps
, nframes
+ 1);
5186 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5189 * If that faulted, we're cooked.
5191 if (*flags
& CPU_DTRACE_FAULT
)
5195 * Now we want to walk up the stack, calling the USTACK helper. For
5196 * each iteration, we restore the scratch pointer.
5198 for (i
= 0; i
< nframes
; i
++) {
5199 mstate
->dtms_scratch_ptr
= saved
;
5201 if (offs
>= strsize
)
5204 sym
= (char *)(uintptr_t)dtrace_helper(
5205 DTRACE_HELPER_ACTION_USTACK
,
5206 mstate
, state
, pcs
[i
], fps
[i
]);
5209 * If we faulted while running the helper, we're going to
5210 * clear the fault and null out the corresponding string.
5212 if (*flags
& CPU_DTRACE_FAULT
) {
5213 *flags
&= ~CPU_DTRACE_FAULT
;
5223 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5226 * Now copy in the string that the helper returned to us.
5228 for (j
= 0; offs
+ j
< strsize
; j
++) {
5229 if ((str
[offs
+ j
] = sym
[j
]) == '\0')
5233 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5238 if (offs
>= strsize
) {
5240 * If we didn't have room for all of the strings, we don't
5241 * abort processing -- this needn't be a fatal error -- but we
5242 * still want to increment a counter (dts_stkstroverflows) to
5243 * allow this condition to be warned about. (If this is from
5244 * a jstack() action, it is easily tuned via jstackstrsize.)
5246 dtrace_error(&state
->dts_stkstroverflows
);
5249 while (offs
< strsize
)
5253 mstate
->dtms_scratch_ptr
= old
;
5257 * If you're looking for the epicenter of DTrace, you just found it. This
5258 * is the function called by the provider to fire a probe -- from which all
5259 * subsequent probe-context DTrace activity emanates.
5261 #if !defined(__APPLE__)
5263 dtrace_probe(dtrace_id_t id
, uintptr_t arg0
, uintptr_t arg1
,
5264 uintptr_t arg2
, uintptr_t arg3
, uintptr_t arg4
)
5267 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5268 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5269 #endif /* __APPLE__ */
5271 processorid_t cpuid
;
5272 dtrace_icookie_t cookie
;
5273 dtrace_probe_t
*probe
;
5274 dtrace_mstate_t mstate
;
5276 dtrace_action_t
*act
;
5280 volatile uint16_t *flags
;
5283 #if !defined(__APPLE__)
5285 * Kick out immediately if this CPU is still being born (in which case
5286 * curthread will be set to -1)
5288 if ((uintptr_t)curthread
& 1)
5291 #endif /* __APPLE__ */
5293 cookie
= dtrace_interrupt_disable();
5294 probe
= dtrace_probes
[id
- 1];
5295 cpuid
= CPU
->cpu_id
;
5296 onintr
= CPU_ON_INTR(CPU
);
5298 #if !defined(__APPLE__)
5299 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5300 probe
->dtpr_predcache
== curthread
->t_predcache
) {
5302 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5303 probe
->dtpr_predcache
== dtrace_get_thread_predcache(current_thread())) {
5304 #endif /* __APPLE__ */
5306 * We have hit in the predicate cache; we know that
5307 * this predicate would evaluate to be false.
5309 dtrace_interrupt_enable(cookie
);
5313 if (panic_quiesce
) {
5315 * We don't trace anything if we're panicking.
5317 dtrace_interrupt_enable(cookie
);
5321 #if !defined(__APPLE__)
5322 now
= dtrace_gethrtime();
5323 vtime
= dtrace_vtime_references
!= 0;
5325 if (vtime
&& curthread
->t_dtrace_start
)
5326 curthread
->t_dtrace_vtime
+= now
- curthread
->t_dtrace_start
;
5328 vtime
= dtrace_vtime_references
!= 0;
5332 int64_t dtrace_accum_time
, recent_vtime
;
5333 thread_t thread
= current_thread();
5335 dtrace_accum_time
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */
5337 if (dtrace_accum_time
>= 0) {
5338 recent_vtime
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */
5340 recent_vtime
= recent_vtime
- dtrace_accum_time
; /* Time without DTrace contribution */
5342 dtrace_set_thread_vtime(thread
, recent_vtime
);
5346 now
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
5347 #endif /* __APPLE__ */
5349 #if defined(__APPLE__)
5351 * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances.
5352 * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes
5353 * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here
5354 * and cons up a viable state (from the probe_id).
5356 if (dtrace_probeid_error
== id
&& NULL
== arg0
) {
5357 dtrace_id_t ftp_id
= (dtrace_id_t
)arg1
;
5358 dtrace_probe_t
*ftp_probe
= dtrace_probes
[ftp_id
- 1];
5359 dtrace_ecb_t
*ftp_ecb
= ftp_probe
->dtpr_ecb
;
5361 if (NULL
!= ftp_ecb
) {
5362 dtrace_state_t
*ftp_state
= ftp_ecb
->dte_state
;
5364 arg0
= (uint64_t)(uintptr_t)ftp_state
;
5365 arg1
= ftp_ecb
->dte_epid
;
5367 * args[2-4] established by caller.
5369 ftp_state
->dts_arg_error_illval
= -1; /* arg5 */
5372 #endif /* __APPLE__ */
5374 mstate
.dtms_probe
= probe
;
5375 mstate
.dtms_arg
[0] = arg0
;
5376 mstate
.dtms_arg
[1] = arg1
;
5377 mstate
.dtms_arg
[2] = arg2
;
5378 mstate
.dtms_arg
[3] = arg3
;
5379 mstate
.dtms_arg
[4] = arg4
;
5381 flags
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
;
5383 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
5384 dtrace_predicate_t
*pred
= ecb
->dte_predicate
;
5385 dtrace_state_t
*state
= ecb
->dte_state
;
5386 dtrace_buffer_t
*buf
= &state
->dts_buffer
[cpuid
];
5387 dtrace_buffer_t
*aggbuf
= &state
->dts_aggbuffer
[cpuid
];
5388 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
5389 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5394 * A little subtlety with the following (seemingly innocuous)
5395 * declaration of the automatic 'val': by looking at the
5396 * code, you might think that it could be declared in the
5397 * action processing loop, below. (That is, it's only used in
5398 * the action processing loop.) However, it must be declared
5399 * out of that scope because in the case of DIF expression
5400 * arguments to aggregating actions, one iteration of the
5401 * action loop will use the last iteration's value.
5409 mstate
.dtms_present
= DTRACE_MSTATE_ARGS
| DTRACE_MSTATE_PROBE
;
5410 *flags
&= ~CPU_DTRACE_ERROR
;
5412 if (prov
== dtrace_provider
) {
5414 * If dtrace itself is the provider of this probe,
5415 * we're only going to continue processing the ECB if
5416 * arg0 (the dtrace_state_t) is equal to the ECB's
5417 * creating state. (This prevents disjoint consumers
5418 * from seeing one another's metaprobes.)
5420 if (arg0
!= (uint64_t)(uintptr_t)state
)
5424 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
) {
5426 * We're not currently active. If our provider isn't
5427 * the dtrace pseudo provider, we're not interested.
5429 if (prov
!= dtrace_provider
)
5433 * Now we must further check if we are in the BEGIN
5434 * probe. If we are, we will only continue processing
5435 * if we're still in WARMUP -- if one BEGIN enabling
5436 * has invoked the exit() action, we don't want to
5437 * evaluate subsequent BEGIN enablings.
5439 if (probe
->dtpr_id
== dtrace_probeid_begin
&&
5440 state
->dts_activity
!= DTRACE_ACTIVITY_WARMUP
) {
5441 ASSERT(state
->dts_activity
==
5442 DTRACE_ACTIVITY_DRAINING
);
5447 if (ecb
->dte_cond
) {
5449 * If the dte_cond bits indicate that this
5450 * consumer is only allowed to see user-mode firings
5451 * of this probe, call the provider's dtps_usermode()
5452 * entry point to check that the probe was fired
5453 * while in a user context. Skip this ECB if that's
5456 if ((ecb
->dte_cond
& DTRACE_COND_USERMODE
) &&
5457 prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
,
5458 probe
->dtpr_id
, probe
->dtpr_arg
) == 0)
5462 * This is more subtle than it looks. We have to be
5463 * absolutely certain that CRED() isn't going to
5464 * change out from under us so it's only legit to
5465 * examine that structure if we're in constrained
5466 * situations. Currently, the only times we'll this
5467 * check is if a non-super-user has enabled the
5468 * profile or syscall providers -- providers that
5469 * allow visibility of all processes. For the
5470 * profile case, the check above will ensure that
5471 * we're examining a user context.
5473 if (ecb
->dte_cond
& DTRACE_COND_OWNER
) {
5476 ecb
->dte_state
->dts_cred
.dcr_cred
;
5479 ASSERT(s_cr
!= NULL
);
5481 #if !defined(__APPLE__)
5482 if ((cr
= CRED()) == NULL
||
5484 if ((cr
= dtrace_CRED()) == NULL
||
5485 #endif /* __APPLE__ */
5486 s_cr
->cr_uid
!= cr
->cr_uid
||
5487 s_cr
->cr_uid
!= cr
->cr_ruid
||
5488 s_cr
->cr_uid
!= cr
->cr_suid
||
5489 s_cr
->cr_gid
!= cr
->cr_gid
||
5490 s_cr
->cr_gid
!= cr
->cr_rgid
||
5491 s_cr
->cr_gid
!= cr
->cr_sgid
||
5492 #if !defined(__APPLE__)
5493 (proc
= ttoproc(curthread
)) == NULL
||
5494 (proc
->p_flag
& SNOCD
))
5496 1) /* Darwin omits "No Core Dump" flag. */
5497 #endif /* __APPLE__ */
5501 if (ecb
->dte_cond
& DTRACE_COND_ZONEOWNER
) {
5504 ecb
->dte_state
->dts_cred
.dcr_cred
;
5506 ASSERT(s_cr
!= NULL
);
5508 #if !defined(__APPLE__) /* Darwin doesn't do zones. */
5509 if ((cr
= CRED()) == NULL
||
5510 s_cr
->cr_zone
->zone_id
!=
5511 cr
->cr_zone
->zone_id
)
5513 #endif /* __APPLE__ */
5517 if (now
- state
->dts_alive
> dtrace_deadman_timeout
) {
5519 * We seem to be dead. Unless we (a) have kernel
5520 * destructive permissions (b) have expicitly enabled
5521 * destructive actions and (c) destructive actions have
5522 * not been disabled, we're going to transition into
5523 * the KILLED state, from which no further processing
5524 * on this state will be performed.
5526 if (!dtrace_priv_kernel_destructive(state
) ||
5527 !state
->dts_cred
.dcr_destructive
||
5528 dtrace_destructive_disallow
) {
5529 void *activity
= &state
->dts_activity
;
5530 dtrace_activity_t current
;
5533 current
= state
->dts_activity
;
5534 } while (dtrace_cas32(activity
, current
,
5535 DTRACE_ACTIVITY_KILLED
) != current
);
5541 if ((offs
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
,
5542 ecb
->dte_alignment
, state
, &mstate
)) < 0)
5545 tomax
= buf
->dtb_tomax
;
5546 ASSERT(tomax
!= NULL
);
5548 if (ecb
->dte_size
!= 0)
5549 DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
);
5551 mstate
.dtms_epid
= ecb
->dte_epid
;
5552 mstate
.dtms_present
|= DTRACE_MSTATE_EPID
;
5555 dtrace_difo_t
*dp
= pred
->dtp_difo
;
5558 rval
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5560 if (!(*flags
& CPU_DTRACE_ERROR
) && !rval
) {
5561 dtrace_cacheid_t cid
= probe
->dtpr_predcache
;
5563 if (cid
!= DTRACE_CACHEIDNONE
&& !onintr
) {
5565 * Update the predicate cache...
5567 ASSERT(cid
== pred
->dtp_cacheid
);
5568 #if !defined(__APPLE__)
5569 curthread
->t_predcache
= cid
;
5571 dtrace_set_thread_predcache(current_thread(), cid
);
5572 #endif /* __APPLE__ */
5579 for (act
= ecb
->dte_action
; !(*flags
& CPU_DTRACE_ERROR
) &&
5580 act
!= NULL
; act
= act
->dta_next
) {
5583 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
5585 size
= rec
->dtrd_size
;
5586 valoffs
= offs
+ rec
->dtrd_offset
;
5588 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
5590 dtrace_aggregation_t
*agg
;
5592 agg
= (dtrace_aggregation_t
*)act
;
5594 if ((dp
= act
->dta_difo
) != NULL
)
5595 v
= dtrace_dif_emulate(dp
,
5596 &mstate
, vstate
, state
);
5598 if (*flags
& CPU_DTRACE_ERROR
)
5602 * Note that we always pass the expression
5603 * value from the previous iteration of the
5604 * action loop. This value will only be used
5605 * if there is an expression argument to the
5606 * aggregating action, denoted by the
5607 * dtag_hasarg field.
5609 dtrace_aggregate(agg
, buf
,
5610 offs
, aggbuf
, v
, val
);
5614 switch (act
->dta_kind
) {
5615 case DTRACEACT_STOP
:
5616 if (dtrace_priv_proc_destructive(state
))
5617 dtrace_action_stop();
5620 case DTRACEACT_BREAKPOINT
:
5621 if (dtrace_priv_kernel_destructive(state
))
5622 dtrace_action_breakpoint(ecb
);
5625 case DTRACEACT_PANIC
:
5626 if (dtrace_priv_kernel_destructive(state
))
5627 dtrace_action_panic(ecb
);
5630 case DTRACEACT_STACK
:
5631 if (!dtrace_priv_kernel(state
))
5634 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
5635 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
5636 DTRACE_ANCHORED(probe
) ? NULL
:
5641 case DTRACEACT_JSTACK
:
5642 case DTRACEACT_USTACK
:
5643 if (!dtrace_priv_proc(state
))
5647 * See comment in DIF_VAR_PID.
5649 if (DTRACE_ANCHORED(mstate
.dtms_probe
) &&
5651 int depth
= DTRACE_USTACK_NFRAMES(
5654 dtrace_bzero((void *)(tomax
+ valoffs
),
5655 DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
)
5656 + depth
* sizeof (uint64_t));
5661 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 &&
5662 curproc
->p_dtrace_helpers
!= NULL
) {
5664 * This is the slow path -- we have
5665 * allocated string space, and we're
5666 * getting the stack of a process that
5667 * has helpers. Call into a separate
5668 * routine to perform this processing.
5670 dtrace_action_ustack(&mstate
, state
,
5671 (uint64_t *)(tomax
+ valoffs
),
5676 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5677 dtrace_getupcstack((uint64_t *)
5679 DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1);
5680 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5690 val
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5692 if (*flags
& CPU_DTRACE_ERROR
)
5695 switch (act
->dta_kind
) {
5696 case DTRACEACT_SPECULATE
:
5697 ASSERT(buf
== &state
->dts_buffer
[cpuid
]);
5698 buf
= dtrace_speculation_buffer(state
,
5702 *flags
|= CPU_DTRACE_DROP
;
5706 offs
= dtrace_buffer_reserve(buf
,
5707 ecb
->dte_needed
, ecb
->dte_alignment
,
5711 *flags
|= CPU_DTRACE_DROP
;
5715 tomax
= buf
->dtb_tomax
;
5716 ASSERT(tomax
!= NULL
);
5718 if (ecb
->dte_size
!= 0)
5719 DTRACE_STORE(uint32_t, tomax
, offs
,
5723 case DTRACEACT_CHILL
:
5724 if (dtrace_priv_kernel_destructive(state
))
5725 dtrace_action_chill(&mstate
, val
);
5728 case DTRACEACT_RAISE
:
5729 if (dtrace_priv_proc_destructive(state
))
5730 dtrace_action_raise(val
);
5733 case DTRACEACT_COMMIT
:
5737 * We need to commit our buffer state.
5740 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
5741 buf
= &state
->dts_buffer
[cpuid
];
5742 dtrace_speculation_commit(state
, cpuid
, val
);
5746 case DTRACEACT_DISCARD
:
5747 dtrace_speculation_discard(state
, cpuid
, val
);
5750 case DTRACEACT_DIFEXPR
:
5751 case DTRACEACT_LIBACT
:
5752 case DTRACEACT_PRINTF
:
5753 case DTRACEACT_PRINTA
:
5754 case DTRACEACT_SYSTEM
:
5755 case DTRACEACT_FREOPEN
:
5760 if (!dtrace_priv_kernel(state
))
5764 #if !defined(__APPLE__)
5765 case DTRACEACT_USYM
:
5766 case DTRACEACT_UMOD
:
5767 case DTRACEACT_UADDR
: {
5768 struct pid
*pid
= curthread
->t_procp
->p_pidp
;
5770 if (!dtrace_priv_proc(state
))
5773 DTRACE_STORE(uint64_t, tomax
,
5774 valoffs
, (uint64_t)pid
->pid_id
);
5775 DTRACE_STORE(uint64_t, tomax
,
5776 valoffs
+ sizeof (uint64_t), val
);
5781 case DTRACEACT_USYM
:
5782 case DTRACEACT_UMOD
:
5783 case DTRACEACT_UADDR
: {
5784 if (!dtrace_priv_proc(state
))
5787 DTRACE_STORE(uint64_t, tomax
,
5788 valoffs
, (uint64_t)proc_selfpid());
5789 DTRACE_STORE(uint64_t, tomax
,
5790 valoffs
+ sizeof (uint64_t), val
);
5794 #endif /* __APPLE__ */
5796 case DTRACEACT_EXIT
: {
5798 * For the exit action, we are going to attempt
5799 * to atomically set our activity to be
5800 * draining. If this fails (either because
5801 * another CPU has beat us to the exit action,
5802 * or because our current activity is something
5803 * other than ACTIVE or WARMUP), we will
5804 * continue. This assures that the exit action
5805 * can be successfully recorded at most once
5806 * when we're in the ACTIVE state. If we're
5807 * encountering the exit() action while in
5808 * COOLDOWN, however, we want to honor the new
5809 * status code. (We know that we're the only
5810 * thread in COOLDOWN, so there is no race.)
5812 void *activity
= &state
->dts_activity
;
5813 dtrace_activity_t current
= state
->dts_activity
;
5815 if (current
== DTRACE_ACTIVITY_COOLDOWN
)
5818 if (current
!= DTRACE_ACTIVITY_WARMUP
)
5819 current
= DTRACE_ACTIVITY_ACTIVE
;
5821 if (dtrace_cas32(activity
, current
,
5822 DTRACE_ACTIVITY_DRAINING
) != current
) {
5823 *flags
|= CPU_DTRACE_DROP
;
5834 if (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
) {
5835 uintptr_t end
= valoffs
+ size
;
5838 * If this is a string, we're going to only
5839 * load until we find the zero byte -- after
5840 * which we'll store zero bytes.
5842 if (dp
->dtdo_rtype
.dtdt_kind
==
5845 int intuple
= act
->dta_intuple
;
5848 for (s
= 0; s
< size
; s
++) {
5850 c
= dtrace_load8(val
++);
5852 DTRACE_STORE(uint8_t, tomax
,
5855 if (c
== '\0' && intuple
)
5862 while (valoffs
< end
) {
5863 DTRACE_STORE(uint8_t, tomax
, valoffs
++,
5864 dtrace_load8(val
++));
5874 case sizeof (uint8_t):
5875 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
);
5877 case sizeof (uint16_t):
5878 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
);
5880 case sizeof (uint32_t):
5881 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
);
5883 case sizeof (uint64_t):
5884 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
);
5888 * Any other size should have been returned by
5889 * reference, not by value.
5896 if (*flags
& CPU_DTRACE_DROP
)
5899 if (*flags
& CPU_DTRACE_FAULT
) {
5901 dtrace_action_t
*err
;
5905 if (probe
->dtpr_id
== dtrace_probeid_error
) {
5907 * There's nothing we can do -- we had an
5908 * error on the error probe. We bump an
5909 * error counter to at least indicate that
5910 * this condition happened.
5912 dtrace_error(&state
->dts_dblerrors
);
5918 * Before recursing on dtrace_probe(), we
5919 * need to explicitly clear out our start
5920 * time to prevent it from being accumulated
5921 * into t_dtrace_vtime.
5923 #if !defined(__APPLE__)
5924 curthread
->t_dtrace_start
= 0;
5926 /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */
5927 dtrace_set_thread_tracing(current_thread(),
5928 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
5929 #endif /* __APPLE__ */
5933 * Iterate over the actions to figure out which action
5934 * we were processing when we experienced the error.
5935 * Note that act points _past_ the faulting action; if
5936 * act is ecb->dte_action, the fault was in the
5937 * predicate, if it's ecb->dte_action->dta_next it's
5938 * in action #1, and so on.
5940 for (err
= ecb
->dte_action
, ndx
= 0;
5941 err
!= act
; err
= err
->dta_next
, ndx
++)
5944 dtrace_probe_error(state
, ecb
->dte_epid
, ndx
,
5945 (mstate
.dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
5946 mstate
.dtms_fltoffs
: -1, DTRACE_FLAGS2FLT(*flags
),
5947 cpu_core
[cpuid
].cpuc_dtrace_illval
);
5953 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
5956 #if !defined(__APPLE__)
5958 curthread
->t_dtrace_start
= dtrace_gethrtime();
5961 thread_t thread
= current_thread();
5962 int64_t t
= dtrace_get_thread_tracing(thread
);
5965 /* Usual case, accumulate time spent here into t_dtrace_tracing */
5966 dtrace_set_thread_tracing(thread
, t
+ (dtrace_gethrtime() - now
));
5968 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
5969 dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);
5972 #endif /* __APPLE__ */
5974 dtrace_interrupt_enable(cookie
);
5977 #if defined(__APPLE__)
5978 /* Don't allow a thread to re-enter dtrace_probe() */
5980 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5981 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5983 thread_t thread
= current_thread();
5985 if (id
== dtrace_probeid_error
) {
5986 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
5987 dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */
5988 } else if (!dtrace_get_thread_reentering(thread
)) {
5989 dtrace_set_thread_reentering(thread
, TRUE
);
5990 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
5991 dtrace_set_thread_reentering(thread
, FALSE
);
5994 #endif /* __APPLE__ */
5997 * DTrace Probe Hashing Functions
5999 * The functions in this section (and indeed, the functions in remaining
6000 * sections) are not _called_ from probe context. (Any exceptions to this are
6001 * marked with a "Note:".) Rather, they are called from elsewhere in the
6002 * DTrace framework to look-up probes in, add probes to and remove probes from
6003 * the DTrace probe hashes. (Each probe is hashed by each element of the
6004 * probe tuple -- allowing for fast lookups, regardless of what was
6008 dtrace_hash_str(char *p
)
6014 hval
= (hval
<< 4) + *p
++;
6015 if ((g
= (hval
& 0xf0000000)) != 0)
6022 static dtrace_hash_t
*
6023 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
)
6025 dtrace_hash_t
*hash
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
);
6027 hash
->dth_stroffs
= stroffs
;
6028 hash
->dth_nextoffs
= nextoffs
;
6029 hash
->dth_prevoffs
= prevoffs
;
6032 hash
->dth_mask
= hash
->dth_size
- 1;
6034 hash
->dth_tab
= kmem_zalloc(hash
->dth_size
*
6035 sizeof (dtrace_hashbucket_t
*), KM_SLEEP
);
6040 #if !defined(__APPLE__) /* Quiet compiler warning */
6042 dtrace_hash_destroy(dtrace_hash_t
*hash
)
6047 for (i
= 0; i
< hash
->dth_size
; i
++)
6048 ASSERT(hash
->dth_tab
[i
] == NULL
);
6051 kmem_free(hash
->dth_tab
,
6052 hash
->dth_size
* sizeof (dtrace_hashbucket_t
*));
6053 kmem_free(hash
, sizeof (dtrace_hash_t
));
6055 #endif /* __APPLE__ */
6058 dtrace_hash_resize(dtrace_hash_t
*hash
)
6060 int size
= hash
->dth_size
, i
, ndx
;
6061 int new_size
= hash
->dth_size
<< 1;
6062 int new_mask
= new_size
- 1;
6063 dtrace_hashbucket_t
**new_tab
, *bucket
, *next
;
6065 ASSERT((new_size
& new_mask
) == 0);
6067 new_tab
= kmem_zalloc(new_size
* sizeof (void *), KM_SLEEP
);
6069 for (i
= 0; i
< size
; i
++) {
6070 for (bucket
= hash
->dth_tab
[i
]; bucket
!= NULL
; bucket
= next
) {
6071 dtrace_probe_t
*probe
= bucket
->dthb_chain
;
6073 ASSERT(probe
!= NULL
);
6074 ndx
= DTRACE_HASHSTR(hash
, probe
) & new_mask
;
6076 next
= bucket
->dthb_next
;
6077 bucket
->dthb_next
= new_tab
[ndx
];
6078 new_tab
[ndx
] = bucket
;
6082 kmem_free(hash
->dth_tab
, hash
->dth_size
* sizeof (void *));
6083 hash
->dth_tab
= new_tab
;
6084 hash
->dth_size
= new_size
;
6085 hash
->dth_mask
= new_mask
;
6089 dtrace_hash_add(dtrace_hash_t
*hash
, dtrace_probe_t
*new)
6091 int hashval
= DTRACE_HASHSTR(hash
, new);
6092 int ndx
= hashval
& hash
->dth_mask
;
6093 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6094 dtrace_probe_t
**nextp
, **prevp
;
6096 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6097 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new))
6101 if ((hash
->dth_nbuckets
>> 1) > hash
->dth_size
) {
6102 dtrace_hash_resize(hash
);
6103 dtrace_hash_add(hash
, new);
6107 bucket
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
);
6108 bucket
->dthb_next
= hash
->dth_tab
[ndx
];
6109 hash
->dth_tab
[ndx
] = bucket
;
6110 hash
->dth_nbuckets
++;
6113 nextp
= DTRACE_HASHNEXT(hash
, new);
6114 ASSERT(*nextp
== NULL
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
);
6115 *nextp
= bucket
->dthb_chain
;
6117 if (bucket
->dthb_chain
!= NULL
) {
6118 prevp
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
);
6119 ASSERT(*prevp
== NULL
);
6123 bucket
->dthb_chain
= new;
6127 static dtrace_probe_t
*
6128 dtrace_hash_lookup(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6130 int hashval
= DTRACE_HASHSTR(hash
, template);
6131 int ndx
= hashval
& hash
->dth_mask
;
6132 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6134 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6135 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6136 return (bucket
->dthb_chain
);
6143 dtrace_hash_collisions(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6145 int hashval
= DTRACE_HASHSTR(hash
, template);
6146 int ndx
= hashval
& hash
->dth_mask
;
6147 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6149 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6150 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6151 return (bucket
->dthb_len
);
6158 dtrace_hash_remove(dtrace_hash_t
*hash
, dtrace_probe_t
*probe
)
6160 int ndx
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
;
6161 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6163 dtrace_probe_t
**prevp
= DTRACE_HASHPREV(hash
, probe
);
6164 dtrace_probe_t
**nextp
= DTRACE_HASHNEXT(hash
, probe
);
6167 * Find the bucket that we're removing this probe from.
6169 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6170 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
))
6174 ASSERT(bucket
!= NULL
);
6176 if (*prevp
== NULL
) {
6177 if (*nextp
== NULL
) {
6179 * The removed probe was the only probe on this
6180 * bucket; we need to remove the bucket.
6182 dtrace_hashbucket_t
*b
= hash
->dth_tab
[ndx
];
6184 ASSERT(bucket
->dthb_chain
== probe
);
6188 hash
->dth_tab
[ndx
] = bucket
->dthb_next
;
6190 while (b
->dthb_next
!= bucket
)
6192 b
->dthb_next
= bucket
->dthb_next
;
6195 ASSERT(hash
->dth_nbuckets
> 0);
6196 hash
->dth_nbuckets
--;
6197 kmem_free(bucket
, sizeof (dtrace_hashbucket_t
));
6201 bucket
->dthb_chain
= *nextp
;
6203 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
;
6207 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
;
6211 * DTrace Utility Functions
6213 * These are random utility functions that are _not_ called from probe context.
6216 dtrace_badattr(const dtrace_attribute_t
*a
)
6218 return (a
->dtat_name
> DTRACE_STABILITY_MAX
||
6219 a
->dtat_data
> DTRACE_STABILITY_MAX
||
6220 a
->dtat_class
> DTRACE_CLASS_MAX
);
6224 * Return a duplicate copy of a string. If the specified string is NULL,
6225 * this function returns a zero-length string.
6228 dtrace_strdup(const char *str
)
6230 char *new = kmem_zalloc((str
!= NULL
? strlen(str
) : 0) + 1, KM_SLEEP
);
6233 (void) strcpy(new, str
);
6238 #define DTRACE_ISALPHA(c) \
6239 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6242 dtrace_badname(const char *s
)
6246 if (s
== NULL
|| (c
= *s
++) == '\0')
6249 if (!DTRACE_ISALPHA(c
) && c
!= '-' && c
!= '_' && c
!= '.')
6252 while ((c
= *s
++) != '\0') {
6253 if (!DTRACE_ISALPHA(c
) && (c
< '0' || c
> '9') &&
6254 c
!= '-' && c
!= '_' && c
!= '.' && c
!= '`')
6262 dtrace_cred2priv(cred_t
*cr
, uint32_t *privp
, uid_t
*uidp
, zoneid_t
*zoneidp
)
6266 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
6268 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6270 priv
= DTRACE_PRIV_ALL
;
6272 *uidp
= crgetuid(cr
);
6273 *zoneidp
= crgetzoneid(cr
);
6276 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
))
6277 priv
|= DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
;
6278 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
))
6279 priv
|= DTRACE_PRIV_USER
;
6280 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
))
6281 priv
|= DTRACE_PRIV_PROC
;
6282 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
6283 priv
|= DTRACE_PRIV_OWNER
;
6284 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
6285 priv
|= DTRACE_PRIV_ZONEOWNER
;
6291 #ifdef DTRACE_ERRDEBUG
6293 dtrace_errdebug(const char *str
)
6295 int hval
= dtrace_hash_str((char *)str
) % DTRACE_ERRHASHSZ
;
6298 lck_mtx_lock(&dtrace_errlock
);
6299 dtrace_errlast
= str
;
6300 #if !defined(__APPLE__)
6301 dtrace_errthread
= curthread
;
6303 dtrace_errthread
= current_thread();
6304 #endif /* __APPLE__ */
6306 while (occupied
++ < DTRACE_ERRHASHSZ
) {
6307 if (dtrace_errhash
[hval
].dter_msg
== str
) {
6308 dtrace_errhash
[hval
].dter_count
++;
6312 if (dtrace_errhash
[hval
].dter_msg
!= NULL
) {
6313 hval
= (hval
+ 1) % DTRACE_ERRHASHSZ
;
6317 dtrace_errhash
[hval
].dter_msg
= str
;
6318 dtrace_errhash
[hval
].dter_count
= 1;
6322 panic("dtrace: undersized error hash");
6324 lck_mtx_unlock(&dtrace_errlock
);
6329 * DTrace Matching Functions
6331 * These functions are used to match groups of probes, given some elements of
6332 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6335 dtrace_match_priv(const dtrace_probe_t
*prp
, uint32_t priv
, uid_t uid
,
6338 if (priv
!= DTRACE_PRIV_ALL
) {
6339 uint32_t ppriv
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
;
6340 uint32_t match
= priv
& ppriv
;
6343 * No PRIV_DTRACE_* privileges...
6345 if ((priv
& (DTRACE_PRIV_PROC
| DTRACE_PRIV_USER
|
6346 DTRACE_PRIV_KERNEL
)) == 0)
6350 * No matching bits, but there were bits to match...
6352 if (match
== 0 && ppriv
!= 0)
6356 * Need to have permissions to the process, but don't...
6358 if (((ppriv
& ~match
) & DTRACE_PRIV_OWNER
) != 0 &&
6359 uid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) {
6364 * Need to be in the same zone unless we possess the
6365 * privilege to examine all zones.
6367 if (((ppriv
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 &&
6368 zoneid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) {
6377 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6378 * consists of input pattern strings and an ops-vector to evaluate them.
6379 * This function returns >0 for match, 0 for no match, and <0 for error.
6382 dtrace_match_probe(const dtrace_probe_t
*prp
, const dtrace_probekey_t
*pkp
,
6383 uint32_t priv
, uid_t uid
, zoneid_t zoneid
)
6385 dtrace_provider_t
*pvp
= prp
->dtpr_provider
;
6388 if (pvp
->dtpv_defunct
)
6391 if ((rv
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0)
6394 if ((rv
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0)
6397 if ((rv
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0)
6400 if ((rv
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0)
6403 if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0)
6410 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6411 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6412 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6413 * In addition, all of the recursion cases except for '*' matching have been
6414 * unwound. For '*', we still implement recursive evaluation, but a depth
6415 * counter is maintained and matching is aborted if we recurse too deep.
6416 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6419 dtrace_match_glob(const char *s
, const char *p
, int depth
)
6425 if (depth
> DTRACE_PROBEKEY_MAXDEPTH
)
6429 s
= ""; /* treat NULL as empty string */
6438 if ((c
= *p
++) == '\0')
6439 return (s1
== '\0');
6443 int ok
= 0, notflag
= 0;
6454 if ((c
= *p
++) == '\0')
6458 if (c
== '-' && lc
!= '\0' && *p
!= ']') {
6459 if ((c
= *p
++) == '\0')
6461 if (c
== '\\' && (c
= *p
++) == '\0')
6465 if (s1
< lc
|| s1
> c
)
6469 } else if (lc
<= s1
&& s1
<= c
)
6472 } else if (c
== '\\' && (c
= *p
++) == '\0')
6475 lc
= c
; /* save left-hand 'c' for next iteration */
6485 if ((c
= *p
++) == '\0')
6497 if ((c
= *p
++) == '\0')
6513 p
++; /* consecutive *'s are identical to a single one */
6518 for (s
= olds
; *s
!= '\0'; s
++) {
6519 if ((gs
= dtrace_match_glob(s
, p
, depth
+ 1)) != 0)
6529 dtrace_match_string(const char *s
, const char *p
, int depth
)
6531 return (s
!= NULL
&& strcmp(s
, p
) == 0);
6536 dtrace_match_nul(const char *s
, const char *p
, int depth
)
6538 return (1); /* always match the empty pattern */
6543 dtrace_match_nonzero(const char *s
, const char *p
, int depth
)
6545 return (s
!= NULL
&& s
[0] != '\0');
6549 dtrace_match(const dtrace_probekey_t
*pkp
, uint32_t priv
, uid_t uid
,
6550 zoneid_t zoneid
, int (*matched
)(dtrace_probe_t
*, void *), void *arg
)
6552 dtrace_probe_t
template, *probe
;
6553 dtrace_hash_t
*hash
= NULL
;
6554 int len
, best
= INT_MAX
, nmatched
= 0;
6557 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6560 * If the probe ID is specified in the key, just lookup by ID and
6561 * invoke the match callback once if a matching probe is found.
6563 if (pkp
->dtpk_id
!= DTRACE_IDNONE
) {
6564 if ((probe
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL
&&
6565 dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) {
6566 (void) (*matched
)(probe
, arg
);
6572 template.dtpr_mod
= (char *)pkp
->dtpk_mod
;
6573 template.dtpr_func
= (char *)pkp
->dtpk_func
;
6574 template.dtpr_name
= (char *)pkp
->dtpk_name
;
6577 * We want to find the most distinct of the module name, function
6578 * name, and name. So for each one that is not a glob pattern or
6579 * empty string, we perform a lookup in the corresponding hash and
6580 * use the hash table with the fewest collisions to do our search.
6582 if (pkp
->dtpk_mmatch
== &dtrace_match_string
&&
6583 (len
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) {
6585 hash
= dtrace_bymod
;
6588 if (pkp
->dtpk_fmatch
== &dtrace_match_string
&&
6589 (len
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) {
6591 hash
= dtrace_byfunc
;
6594 if (pkp
->dtpk_nmatch
== &dtrace_match_string
&&
6595 (len
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) {
6597 hash
= dtrace_byname
;
6601 * If we did not select a hash table, iterate over every probe and
6602 * invoke our callback for each one that matches our input probe key.
6605 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6606 if ((probe
= dtrace_probes
[i
]) == NULL
||
6607 dtrace_match_probe(probe
, pkp
, priv
, uid
,
6613 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
)
6621 * If we selected a hash table, iterate over each probe of the same key
6622 * name and invoke the callback for every probe that matches the other
6623 * attributes of our input probe key.
6625 for (probe
= dtrace_hash_lookup(hash
, &template); probe
!= NULL
;
6626 probe
= *(DTRACE_HASHNEXT(hash
, probe
))) {
6628 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0)
6633 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
)
6641 * Return the function pointer dtrace_probecmp() should use to compare the
6642 * specified pattern with a string. For NULL or empty patterns, we select
6643 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
6644 * For non-empty non-glob strings, we use dtrace_match_string().
6646 static dtrace_probekey_f
*
6647 dtrace_probekey_func(const char *p
)
6651 if (p
== NULL
|| *p
== '\0')
6652 return (&dtrace_match_nul
);
6654 while ((c
= *p
++) != '\0') {
6655 if (c
== '[' || c
== '?' || c
== '*' || c
== '\\')
6656 return (&dtrace_match_glob
);
6659 return (&dtrace_match_string
);
6663 * Build a probe comparison key for use with dtrace_match_probe() from the
6664 * given probe description. By convention, a null key only matches anchored
6665 * probes: if each field is the empty string, reset dtpk_fmatch to
6666 * dtrace_match_nonzero().
6669 dtrace_probekey(const dtrace_probedesc_t
*pdp
, dtrace_probekey_t
*pkp
)
6671 pkp
->dtpk_prov
= pdp
->dtpd_provider
;
6672 pkp
->dtpk_pmatch
= dtrace_probekey_func(pdp
->dtpd_provider
);
6674 pkp
->dtpk_mod
= pdp
->dtpd_mod
;
6675 pkp
->dtpk_mmatch
= dtrace_probekey_func(pdp
->dtpd_mod
);
6677 pkp
->dtpk_func
= pdp
->dtpd_func
;
6678 pkp
->dtpk_fmatch
= dtrace_probekey_func(pdp
->dtpd_func
);
6680 pkp
->dtpk_name
= pdp
->dtpd_name
;
6681 pkp
->dtpk_nmatch
= dtrace_probekey_func(pdp
->dtpd_name
);
6683 pkp
->dtpk_id
= pdp
->dtpd_id
;
6685 if (pkp
->dtpk_id
== DTRACE_IDNONE
&&
6686 pkp
->dtpk_pmatch
== &dtrace_match_nul
&&
6687 pkp
->dtpk_mmatch
== &dtrace_match_nul
&&
6688 pkp
->dtpk_fmatch
== &dtrace_match_nul
&&
6689 pkp
->dtpk_nmatch
== &dtrace_match_nul
)
6690 pkp
->dtpk_fmatch
= &dtrace_match_nonzero
;
6694 * DTrace Provider-to-Framework API Functions
6696 * These functions implement much of the Provider-to-Framework API, as
6697 * described in <sys/dtrace.h>. The parts of the API not in this section are
6698 * the functions in the API for probe management (found below), and
6699 * dtrace_probe() itself (found above).
6703 * Register the calling provider with the DTrace framework. This should
6704 * generally be called by DTrace providers in their attach(9E) entry point.
6707 dtrace_register(const char *name
, const dtrace_pattr_t
*pap
, uint32_t priv
,
6708 cred_t
*cr
, const dtrace_pops_t
*pops
, void *arg
, dtrace_provider_id_t
*idp
)
6710 dtrace_provider_t
*provider
;
6712 if (name
== NULL
|| pap
== NULL
|| pops
== NULL
|| idp
== NULL
) {
6713 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6714 "arguments", name
? name
: "<NULL>");
6718 if (name
[0] == '\0' || dtrace_badname(name
)) {
6719 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6720 "provider name", name
);
6724 if ((pops
->dtps_provide
== NULL
&& pops
->dtps_provide_module
== NULL
) ||
6725 pops
->dtps_enable
== NULL
|| pops
->dtps_disable
== NULL
||
6726 pops
->dtps_destroy
== NULL
||
6727 ((pops
->dtps_resume
== NULL
) != (pops
->dtps_suspend
== NULL
))) {
6728 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6729 "provider ops", name
);
6733 if (dtrace_badattr(&pap
->dtpa_provider
) ||
6734 dtrace_badattr(&pap
->dtpa_mod
) ||
6735 dtrace_badattr(&pap
->dtpa_func
) ||
6736 dtrace_badattr(&pap
->dtpa_name
) ||
6737 dtrace_badattr(&pap
->dtpa_args
)) {
6738 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6739 "provider attributes", name
);
6743 if (priv
& ~DTRACE_PRIV_ALL
) {
6744 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6745 "privilege attributes", name
);
6749 if ((priv
& DTRACE_PRIV_KERNEL
) &&
6750 (priv
& (DTRACE_PRIV_USER
| DTRACE_PRIV_OWNER
)) &&
6751 pops
->dtps_usermode
== NULL
) {
6752 cmn_err(CE_WARN
, "failed to register provider '%s': need "
6753 "dtps_usermode() op for given privilege attributes", name
);
6757 provider
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
);
6758 provider
->dtpv_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
6759 (void) strcpy(provider
->dtpv_name
, name
);
6761 provider
->dtpv_attr
= *pap
;
6762 provider
->dtpv_priv
.dtpp_flags
= priv
;
6764 provider
->dtpv_priv
.dtpp_uid
= crgetuid(cr
);
6765 provider
->dtpv_priv
.dtpp_zoneid
= crgetzoneid(cr
);
6767 provider
->dtpv_pops
= *pops
;
6769 if (pops
->dtps_provide
== NULL
) {
6770 ASSERT(pops
->dtps_provide_module
!= NULL
);
6771 provider
->dtpv_pops
.dtps_provide
=
6772 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
;
6775 if (pops
->dtps_provide_module
== NULL
) {
6776 ASSERT(pops
->dtps_provide
!= NULL
);
6777 provider
->dtpv_pops
.dtps_provide_module
=
6778 (void (*)(void *, struct modctl
*))dtrace_nullop
;
6781 if (pops
->dtps_suspend
== NULL
) {
6782 ASSERT(pops
->dtps_resume
== NULL
);
6783 provider
->dtpv_pops
.dtps_suspend
=
6784 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
6785 provider
->dtpv_pops
.dtps_resume
=
6786 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
6789 provider
->dtpv_arg
= arg
;
6790 *idp
= (dtrace_provider_id_t
)provider
;
6792 if (pops
== &dtrace_provider_ops
) {
6793 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
6794 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6795 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
6798 * We make sure that the DTrace provider is at the head of
6799 * the provider chain.
6801 provider
->dtpv_next
= dtrace_provider
;
6802 dtrace_provider
= provider
;
6806 lck_mtx_lock(&dtrace_provider_lock
);
6807 lck_mtx_lock(&dtrace_lock
);
6810 * If there is at least one provider registered, we'll add this
6811 * provider after the first provider.
6813 if (dtrace_provider
!= NULL
) {
6814 provider
->dtpv_next
= dtrace_provider
->dtpv_next
;
6815 dtrace_provider
->dtpv_next
= provider
;
6817 dtrace_provider
= provider
;
6820 if (dtrace_retained
!= NULL
) {
6821 dtrace_enabling_provide(provider
);
6824 * Now we need to call dtrace_enabling_matchall() -- which
6825 * will acquire cpu_lock and dtrace_lock. We therefore need
6826 * to drop all of our locks before calling into it...
6828 lck_mtx_unlock(&dtrace_lock
);
6829 lck_mtx_unlock(&dtrace_provider_lock
);
6830 dtrace_enabling_matchall();
6835 lck_mtx_unlock(&dtrace_lock
);
6836 lck_mtx_unlock(&dtrace_provider_lock
);
6842 * Unregister the specified provider from the DTrace framework. This should
6843 * generally be called by DTrace providers in their detach(9E) entry point.
6846 dtrace_unregister(dtrace_provider_id_t id
)
6848 dtrace_provider_t
*old
= (dtrace_provider_t
*)id
;
6849 dtrace_provider_t
*prev
= NULL
;
6851 dtrace_probe_t
*probe
, *first
= NULL
;
6853 if (old
->dtpv_pops
.dtps_enable
==
6854 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
) {
6856 * If DTrace itself is the provider, we're called with locks
6859 ASSERT(old
== dtrace_provider
);
6860 ASSERT(dtrace_devi
!= NULL
);
6861 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
6862 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6866 if (dtrace_provider
->dtpv_next
!= NULL
) {
6868 * There's another provider here; return failure.
6873 lck_mtx_lock(&dtrace_provider_lock
);
6874 lck_mtx_lock(&mod_lock
);
6875 lck_mtx_lock(&dtrace_lock
);
6879 * If anyone has /dev/dtrace open, or if there are anonymous enabled
6880 * probes, we refuse to let providers slither away, unless this
6881 * provider has already been explicitly invalidated.
6883 if (!old
->dtpv_defunct
&&
6884 (dtrace_opens
|| (dtrace_anon
.dta_state
!= NULL
&&
6885 dtrace_anon
.dta_state
->dts_necbs
> 0))) {
6887 lck_mtx_unlock(&dtrace_lock
);
6888 lck_mtx_unlock(&mod_lock
);
6889 lck_mtx_unlock(&dtrace_provider_lock
);
6895 * Attempt to destroy the probes associated with this provider.
6897 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6898 if ((probe
= dtrace_probes
[i
]) == NULL
)
6901 if (probe
->dtpr_provider
!= old
)
6904 if (probe
->dtpr_ecb
== NULL
)
6908 * We have at least one ECB; we can't remove this provider.
6911 lck_mtx_unlock(&dtrace_lock
);
6912 lck_mtx_unlock(&mod_lock
);
6913 lck_mtx_unlock(&dtrace_provider_lock
);
6919 * All of the probes for this provider are disabled; we can safely
6920 * remove all of them from their hash chains and from the probe array.
6922 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6923 if ((probe
= dtrace_probes
[i
]) == NULL
)
6926 if (probe
->dtpr_provider
!= old
)
6929 dtrace_probes
[i
] = NULL
;
6931 dtrace_hash_remove(dtrace_bymod
, probe
);
6932 dtrace_hash_remove(dtrace_byfunc
, probe
);
6933 dtrace_hash_remove(dtrace_byname
, probe
);
6935 if (first
== NULL
) {
6937 probe
->dtpr_nextmod
= NULL
;
6939 probe
->dtpr_nextmod
= first
;
6945 * The provider's probes have been removed from the hash chains and
6946 * from the probe array. Now issue a dtrace_sync() to be sure that
6947 * everyone has cleared out from any probe array processing.
6951 for (probe
= first
; probe
!= NULL
; probe
= first
) {
6952 first
= probe
->dtpr_nextmod
;
6954 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
,
6956 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
6957 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
6958 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
6959 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1);
6960 #if !defined(__APPLE__)
6961 kmem_free(probe
, sizeof (dtrace_probe_t
));
6963 zfree(dtrace_probe_t_zone
, probe
);
6967 if ((prev
= dtrace_provider
) == old
) {
6968 ASSERT(self
|| dtrace_devi
== NULL
);
6969 ASSERT(old
->dtpv_next
== NULL
|| dtrace_devi
== NULL
);
6970 dtrace_provider
= old
->dtpv_next
;
6972 while (prev
!= NULL
&& prev
->dtpv_next
!= old
)
6973 prev
= prev
->dtpv_next
;
6976 panic("attempt to unregister non-existent "
6977 "dtrace provider %p\n", (void *)id
);
6980 prev
->dtpv_next
= old
->dtpv_next
;
6984 lck_mtx_unlock(&dtrace_lock
);
6985 lck_mtx_unlock(&mod_lock
);
6986 lck_mtx_unlock(&dtrace_provider_lock
);
6989 kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1);
6990 kmem_free(old
, sizeof (dtrace_provider_t
));
6996 * Invalidate the specified provider. All subsequent probe lookups for the
6997 * specified provider will fail, but its probes will not be removed.
7000 dtrace_invalidate(dtrace_provider_id_t id
)
7002 dtrace_provider_t
*pvp
= (dtrace_provider_t
*)id
;
7004 ASSERT(pvp
->dtpv_pops
.dtps_enable
!=
7005 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
);
7007 lck_mtx_lock(&dtrace_provider_lock
);
7008 lck_mtx_lock(&dtrace_lock
);
7010 pvp
->dtpv_defunct
= 1;
7012 lck_mtx_unlock(&dtrace_lock
);
7013 lck_mtx_unlock(&dtrace_provider_lock
);
7017 * Indicate whether or not DTrace has attached.
7020 dtrace_attached(void)
7023 * dtrace_provider will be non-NULL iff the DTrace driver has
7024 * attached. (It's non-NULL because DTrace is always itself a
7027 return (dtrace_provider
!= NULL
);
7031 * Remove all the unenabled probes for the given provider. This function is
7032 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7033 * -- just as many of its associated probes as it can.
7036 dtrace_condense(dtrace_provider_id_t id
)
7038 dtrace_provider_t
*prov
= (dtrace_provider_t
*)id
;
7040 dtrace_probe_t
*probe
;
7043 * Make sure this isn't the dtrace provider itself.
7045 ASSERT(prov
->dtpv_pops
.dtps_enable
!=
7046 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
);
7048 lck_mtx_lock(&dtrace_provider_lock
);
7049 lck_mtx_lock(&dtrace_lock
);
7052 * Attempt to destroy the probes associated with this provider.
7054 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7055 if ((probe
= dtrace_probes
[i
]) == NULL
)
7058 if (probe
->dtpr_provider
!= prov
)
7061 if (probe
->dtpr_ecb
!= NULL
)
7064 dtrace_probes
[i
] = NULL
;
7066 dtrace_hash_remove(dtrace_bymod
, probe
);
7067 dtrace_hash_remove(dtrace_byfunc
, probe
);
7068 dtrace_hash_remove(dtrace_byname
, probe
);
7070 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i
+ 1,
7072 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7073 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7074 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7075 #if !defined(__APPLE__)
7076 kmem_free(probe
, sizeof (dtrace_probe_t
));
7078 zfree(dtrace_probe_t_zone
, probe
);
7080 vmem_free(dtrace_arena
, (void *)((uintptr_t)i
+ 1), 1);
7083 lck_mtx_unlock(&dtrace_lock
);
7084 lck_mtx_unlock(&dtrace_provider_lock
);
7090 * DTrace Probe Management Functions
7092 * The functions in this section perform the DTrace probe management,
7093 * including functions to create probes, look-up probes, and call into the
7094 * providers to request that probes be provided. Some of these functions are
7095 * in the Provider-to-Framework API; these functions can be identified by the
7096 * fact that they are not declared "static".
7100 * Create a probe with the specified module name, function name, and name.
7103 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
,
7104 const char *func
, const char *name
, int aframes
, void *arg
)
7106 dtrace_probe_t
*probe
, **probes
;
7107 dtrace_provider_t
*provider
= (dtrace_provider_t
*)prov
;
7110 if (provider
== dtrace_provider
) {
7111 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7113 lck_mtx_lock(&dtrace_lock
);
7116 id
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1,
7117 VM_BESTFIT
| VM_SLEEP
);
7118 #if !defined(__APPLE__)
7119 probe
= kmem_zalloc(sizeof (dtrace_probe_t
), KM_SLEEP
);
7121 probe
= zalloc(dtrace_probe_t_zone
);
7122 bzero(probe
, sizeof (dtrace_probe_t
));
7125 probe
->dtpr_id
= id
;
7126 probe
->dtpr_gen
= dtrace_probegen
++;
7127 probe
->dtpr_mod
= dtrace_strdup(mod
);
7128 probe
->dtpr_func
= dtrace_strdup(func
);
7129 probe
->dtpr_name
= dtrace_strdup(name
);
7130 probe
->dtpr_arg
= arg
;
7131 probe
->dtpr_aframes
= aframes
;
7132 probe
->dtpr_provider
= provider
;
7134 dtrace_hash_add(dtrace_bymod
, probe
);
7135 dtrace_hash_add(dtrace_byfunc
, probe
);
7136 dtrace_hash_add(dtrace_byname
, probe
);
7138 if (id
- 1 >= dtrace_nprobes
) {
7139 size_t osize
= dtrace_nprobes
* sizeof (dtrace_probe_t
*);
7140 size_t nsize
= osize
<< 1;
7144 ASSERT(dtrace_probes
== NULL
);
7145 nsize
= sizeof (dtrace_probe_t
*);
7148 probes
= kmem_zalloc(nsize
, KM_SLEEP
);
7150 if (dtrace_probes
== NULL
) {
7152 dtrace_probes
= probes
;
7155 dtrace_probe_t
**oprobes
= dtrace_probes
;
7157 bcopy(oprobes
, probes
, osize
);
7158 dtrace_membar_producer();
7159 dtrace_probes
= probes
;
7164 * All CPUs are now seeing the new probes array; we can
7165 * safely free the old array.
7167 kmem_free(oprobes
, osize
);
7168 dtrace_nprobes
<<= 1;
7171 ASSERT(id
- 1 < dtrace_nprobes
);
7174 ASSERT(dtrace_probes
[id
- 1] == NULL
);
7175 dtrace_probes
[id
- 1] = probe
;
7177 if (provider
!= dtrace_provider
)
7178 lck_mtx_unlock(&dtrace_lock
);
7183 static dtrace_probe_t
*
7184 dtrace_probe_lookup_id(dtrace_id_t id
)
7186 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7188 if (id
== 0 || id
> dtrace_nprobes
)
7191 return (dtrace_probes
[id
- 1]);
7195 dtrace_probe_lookup_match(dtrace_probe_t
*probe
, void *arg
)
7197 *((dtrace_id_t
*)arg
) = probe
->dtpr_id
;
7199 return (DTRACE_MATCH_DONE
);
7203 * Look up a probe based on provider and one or more of module name, function
7204 * name and probe name.
7207 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
,
7208 const char *func
, const char *name
)
7210 dtrace_probekey_t pkey
;
7214 pkey
.dtpk_prov
= ((dtrace_provider_t
*)prid
)->dtpv_name
;
7215 pkey
.dtpk_pmatch
= &dtrace_match_string
;
7216 pkey
.dtpk_mod
= mod
;
7217 pkey
.dtpk_mmatch
= mod
? &dtrace_match_string
: &dtrace_match_nul
;
7218 pkey
.dtpk_func
= func
;
7219 pkey
.dtpk_fmatch
= func
? &dtrace_match_string
: &dtrace_match_nul
;
7220 pkey
.dtpk_name
= name
;
7221 pkey
.dtpk_nmatch
= name
? &dtrace_match_string
: &dtrace_match_nul
;
7222 pkey
.dtpk_id
= DTRACE_IDNONE
;
7224 lck_mtx_lock(&dtrace_lock
);
7225 match
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0,
7226 dtrace_probe_lookup_match
, &id
);
7227 lck_mtx_unlock(&dtrace_lock
);
7229 ASSERT(match
== 1 || match
== 0);
7230 return (match
? id
: 0);
7234 * Returns the probe argument associated with the specified probe.
7237 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
)
7239 dtrace_probe_t
*probe
;
7242 lck_mtx_lock(&dtrace_lock
);
7244 if ((probe
= dtrace_probe_lookup_id(pid
)) != NULL
&&
7245 probe
->dtpr_provider
== (dtrace_provider_t
*)id
)
7246 rval
= probe
->dtpr_arg
;
7248 lck_mtx_unlock(&dtrace_lock
);
7254 * Copy a probe into a probe description.
7257 dtrace_probe_description(const dtrace_probe_t
*prp
, dtrace_probedesc_t
*pdp
)
7259 bzero(pdp
, sizeof (dtrace_probedesc_t
));
7260 pdp
->dtpd_id
= prp
->dtpr_id
;
7262 (void) strlcpy(pdp
->dtpd_provider
,
7263 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
);
7265 (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
);
7266 (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
);
7267 (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
);
7271 * Called to indicate that a probe -- or probes -- should be provided by a
7272 * specfied provider. If the specified description is NULL, the provider will
7273 * be told to provide all of its probes. (This is done whenever a new
7274 * consumer comes along, or whenever a retained enabling is to be matched.) If
7275 * the specified description is non-NULL, the provider is given the
7276 * opportunity to dynamically provide the specified probe, allowing providers
7277 * to support the creation of probes on-the-fly. (So-called _autocreated_
7278 * probes.) If the provider is NULL, the operations will be applied to all
7279 * providers; if the provider is non-NULL the operations will only be applied
7280 * to the specified provider. The dtrace_provider_lock must be held, and the
7281 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7282 * will need to grab the dtrace_lock when it reenters the framework through
7283 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7286 dtrace_probe_provide(dtrace_probedesc_t
*desc
, dtrace_provider_t
*prv
)
7291 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
7295 prv
= dtrace_provider
;
7301 * First, call the blanket provide operation.
7303 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
);
7305 #if !defined(__APPLE__)
7307 * Now call the per-module provide operation. We will grab
7308 * mod_lock to prevent the list from being modified. Note
7309 * that this also prevents the mod_busy bits from changing.
7310 * (mod_busy can only be changed with mod_lock held.)
7312 lck_mtx_lock(&mod_lock
);
7316 if (ctl
->mod_busy
|| ctl
->mod_mp
== NULL
)
7319 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
7321 } while ((ctl
= ctl
->mod_next
) != &modules
);
7323 lck_mtx_unlock(&mod_lock
);
7325 #if 0 /* XXX Workaround for PR_4643546 XXX */
7326 simple_lock(&kmod_lock
);
7330 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ktl
);
7334 simple_unlock(&kmod_lock
);
7337 * Don't bother to iterate over the kmod list. At present only fbt
7338 * offers a provide_module in its dtpv_pops, and then it ignores the
7341 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, NULL
);
7343 #endif /* __APPLE__ */
7344 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
7348 * Iterate over each probe, and call the Framework-to-Provider API function
7352 dtrace_probe_foreach(uintptr_t offs
)
7354 dtrace_provider_t
*prov
;
7355 void (*func
)(void *, dtrace_id_t
, void *);
7356 dtrace_probe_t
*probe
;
7357 dtrace_icookie_t cookie
;
7361 * We disable interrupts to walk through the probe array. This is
7362 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7363 * won't see stale data.
7365 cookie
= dtrace_interrupt_disable();
7367 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7368 if ((probe
= dtrace_probes
[i
]) == NULL
)
7371 if (probe
->dtpr_ecb
== NULL
) {
7373 * This probe isn't enabled -- don't call the function.
7378 prov
= probe
->dtpr_provider
;
7379 func
= *((void(**)(void *, dtrace_id_t
, void *))
7380 ((uintptr_t)&prov
->dtpv_pops
+ offs
));
7382 func(prov
->dtpv_arg
, i
+ 1, probe
->dtpr_arg
);
7385 dtrace_interrupt_enable(cookie
);
7389 dtrace_probe_enable(const dtrace_probedesc_t
*desc
, dtrace_enabling_t
*enab
)
7391 dtrace_probekey_t pkey
;
7396 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7398 dtrace_ecb_create_cache
= NULL
;
7402 * If we're passed a NULL description, we're being asked to
7403 * create an ECB with a NULL probe.
7405 (void) dtrace_ecb_create_enable(NULL
, enab
);
7409 dtrace_probekey(desc
, &pkey
);
7410 dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
,
7411 &priv
, &uid
, &zoneid
);
7413 return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
,
7418 * DTrace Helper Provider Functions
7421 dtrace_dofattr2attr(dtrace_attribute_t
*attr
, const dof_attr_t dofattr
)
7423 attr
->dtat_name
= DOF_ATTR_NAME(dofattr
);
7424 attr
->dtat_data
= DOF_ATTR_DATA(dofattr
);
7425 attr
->dtat_class
= DOF_ATTR_CLASS(dofattr
);
7429 dtrace_dofprov2hprov(dtrace_helper_provdesc_t
*hprov
,
7430 const dof_provider_t
*dofprov
, char *strtab
)
7432 hprov
->dthpv_provname
= strtab
+ dofprov
->dofpv_name
;
7433 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
,
7434 dofprov
->dofpv_provattr
);
7435 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
,
7436 dofprov
->dofpv_modattr
);
7437 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
,
7438 dofprov
->dofpv_funcattr
);
7439 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
,
7440 dofprov
->dofpv_nameattr
);
7441 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
,
7442 dofprov
->dofpv_argsattr
);
7446 dtrace_helper_provide_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7448 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7449 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7450 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
7451 dof_provider_t
*provider
;
7453 uint32_t *off
, *enoff
;
7457 dtrace_helper_provdesc_t dhpv
;
7458 dtrace_helper_probedesc_t dhpb
;
7459 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7460 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7463 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7464 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7465 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7466 prb_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7467 provider
->dofpv_probes
* dof
->dofh_secsize
);
7468 arg_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7469 provider
->dofpv_prargs
* dof
->dofh_secsize
);
7470 off_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7471 provider
->dofpv_proffs
* dof
->dofh_secsize
);
7473 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7474 off
= (uint32_t *)(uintptr_t)(daddr
+ off_sec
->dofs_offset
);
7475 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
7479 * See dtrace_helper_provider_validate().
7481 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
7482 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
) {
7483 enoff_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7484 provider
->dofpv_prenoffs
* dof
->dofh_secsize
);
7485 enoff
= (uint32_t *)(uintptr_t)(daddr
+ enoff_sec
->dofs_offset
);
7488 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
7491 * Create the provider.
7493 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7495 if ((parg
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
)
7501 * Create the probes.
7503 for (i
= 0; i
< nprobes
; i
++) {
7504 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
7505 prb_sec
->dofs_offset
+ i
* prb_sec
->dofs_entsize
);
7507 dhpb
.dthpb_mod
= dhp
->dofhp_mod
;
7508 dhpb
.dthpb_func
= strtab
+ probe
->dofpr_func
;
7509 dhpb
.dthpb_name
= strtab
+ probe
->dofpr_name
;
7510 #if defined(__APPLE__)
7511 dhpb
.dthpb_base
= dhp
->dofhp_addr
;
7513 dhpb
.dthpb_base
= probe
->dofpr_addr
;
7515 dhpb
.dthpb_offs
= off
+ probe
->dofpr_offidx
;
7516 dhpb
.dthpb_noffs
= probe
->dofpr_noffs
;
7517 if (enoff
!= NULL
) {
7518 dhpb
.dthpb_enoffs
= enoff
+ probe
->dofpr_enoffidx
;
7519 dhpb
.dthpb_nenoffs
= probe
->dofpr_nenoffs
;
7521 dhpb
.dthpb_enoffs
= NULL
;
7522 dhpb
.dthpb_nenoffs
= 0;
7524 dhpb
.dthpb_args
= arg
+ probe
->dofpr_argidx
;
7525 dhpb
.dthpb_nargc
= probe
->dofpr_nargc
;
7526 dhpb
.dthpb_xargc
= probe
->dofpr_xargc
;
7527 dhpb
.dthpb_ntypes
= strtab
+ probe
->dofpr_nargv
;
7528 dhpb
.dthpb_xtypes
= strtab
+ probe
->dofpr_xargv
;
7530 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
);
7535 dtrace_helper_provide(dof_helper_t
*dhp
, pid_t pid
)
7537 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7538 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7541 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
7543 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7544 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7545 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7547 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7550 dtrace_helper_provide_one(dhp
, sec
, pid
);
7554 * We may have just created probes, so we must now rematch against
7555 * any retained enablings. Note that this call will acquire both
7556 * cpu_lock and dtrace_lock; the fact that we are holding
7557 * dtrace_meta_lock now is what defines the ordering with respect to
7558 * these three locks.
7560 dtrace_enabling_matchall();
7564 dtrace_helper_provider_remove_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7566 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7567 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7569 dof_provider_t
*provider
;
7571 dtrace_helper_provdesc_t dhpv
;
7572 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7573 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7575 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7576 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7577 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7579 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7582 * Create the provider.
7584 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7586 mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
);
7592 dtrace_helper_provider_remove(dof_helper_t
*dhp
, pid_t pid
)
7594 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7595 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7598 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
7600 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7601 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7602 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7604 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7607 dtrace_helper_provider_remove_one(dhp
, sec
, pid
);
7612 * DTrace Meta Provider-to-Framework API Functions
7614 * These functions implement the Meta Provider-to-Framework API, as described
7615 * in <sys/dtrace.h>.
7618 dtrace_meta_register(const char *name
, const dtrace_mops_t
*mops
, void *arg
,
7619 dtrace_meta_provider_id_t
*idp
)
7621 dtrace_meta_t
*meta
;
7622 dtrace_helpers_t
*help
, *next
;
7625 *idp
= DTRACE_METAPROVNONE
;
7628 * We strictly don't need the name, but we hold onto it for
7629 * debuggability. All hail error queues!
7632 cmn_err(CE_WARN
, "failed to register meta-provider: "
7638 mops
->dtms_create_probe
== NULL
||
7639 mops
->dtms_provide_pid
== NULL
||
7640 mops
->dtms_remove_pid
== NULL
) {
7641 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7642 "invalid ops", name
);
7646 meta
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
);
7647 meta
->dtm_mops
= *mops
;
7648 meta
->dtm_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
7649 (void) strcpy(meta
->dtm_name
, name
);
7650 meta
->dtm_arg
= arg
;
7652 lck_mtx_lock(&dtrace_meta_lock
);
7653 lck_mtx_lock(&dtrace_lock
);
7655 if (dtrace_meta_pid
!= NULL
) {
7656 lck_mtx_unlock(&dtrace_lock
);
7657 lck_mtx_unlock(&dtrace_meta_lock
);
7658 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7659 "user-land meta-provider exists", name
);
7660 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1);
7661 kmem_free(meta
, sizeof (dtrace_meta_t
));
7665 dtrace_meta_pid
= meta
;
7666 *idp
= (dtrace_meta_provider_id_t
)meta
;
7669 * If there are providers and probes ready to go, pass them
7670 * off to the new meta provider now.
7673 help
= dtrace_deferred_pid
;
7674 dtrace_deferred_pid
= NULL
;
7676 lck_mtx_unlock(&dtrace_lock
);
7678 while (help
!= NULL
) {
7679 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
7680 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
7684 next
= help
->dthps_next
;
7685 help
->dthps_next
= NULL
;
7686 help
->dthps_prev
= NULL
;
7687 help
->dthps_deferred
= 0;
7691 lck_mtx_unlock(&dtrace_meta_lock
);
7697 dtrace_meta_unregister(dtrace_meta_provider_id_t id
)
7699 dtrace_meta_t
**pp
, *old
= (dtrace_meta_t
*)id
;
7701 lck_mtx_lock(&dtrace_meta_lock
);
7702 lck_mtx_lock(&dtrace_lock
);
7704 if (old
== dtrace_meta_pid
) {
7705 pp
= &dtrace_meta_pid
;
7707 panic("attempt to unregister non-existent "
7708 "dtrace meta-provider %p\n", (void *)old
);
7711 if (old
->dtm_count
!= 0) {
7712 lck_mtx_unlock(&dtrace_lock
);
7713 lck_mtx_unlock(&dtrace_meta_lock
);
7719 lck_mtx_unlock(&dtrace_lock
);
7720 lck_mtx_unlock(&dtrace_meta_lock
);
7722 kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1);
7723 kmem_free(old
, sizeof (dtrace_meta_t
));
7730 * DTrace DIF Object Functions
7733 dtrace_difo_err(uint_t pc
, const char *format
, ...)
7735 if (dtrace_err_verbose
) {
7738 (void) uprintf("dtrace DIF object error: [%u]: ", pc
);
7739 va_start(alist
, format
);
7740 (void) vuprintf(format
, alist
);
7744 #ifdef DTRACE_ERRDEBUG
7745 dtrace_errdebug(format
);
7751 * Validate a DTrace DIF object by checking the IR instructions. The following
7752 * rules are currently enforced by dtrace_difo_validate():
7754 * 1. Each instruction must have a valid opcode
7755 * 2. Each register, string, variable, or subroutine reference must be valid
7756 * 3. No instruction can modify register %r0 (must be zero)
7757 * 4. All instruction reserved bits must be set to zero
7758 * 5. The last instruction must be a "ret" instruction
7759 * 6. All branch targets must reference a valid instruction _after_ the branch
7762 dtrace_difo_validate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
, uint_t nregs
,
7766 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
7770 kcheck
= cr
== NULL
||
7771 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
) == 0;
7773 dp
->dtdo_destructive
= 0;
7775 for (pc
= 0; pc
< dp
->dtdo_len
&& err
== 0; pc
++) {
7776 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
7778 uint_t r1
= DIF_INSTR_R1(instr
);
7779 uint_t r2
= DIF_INSTR_R2(instr
);
7780 uint_t rd
= DIF_INSTR_RD(instr
);
7781 uint_t rs
= DIF_INSTR_RS(instr
);
7782 uint_t label
= DIF_INSTR_LABEL(instr
);
7783 uint_t v
= DIF_INSTR_VAR(instr
);
7784 uint_t subr
= DIF_INSTR_SUBR(instr
);
7785 uint_t type
= DIF_INSTR_TYPE(instr
);
7786 uint_t op
= DIF_INSTR_OP(instr
);
7804 err
+= efunc(pc
, "invalid register %u\n", r1
);
7806 err
+= efunc(pc
, "invalid register %u\n", r2
);
7808 err
+= efunc(pc
, "invalid register %u\n", rd
);
7810 err
+= efunc(pc
, "cannot write to %r0\n");
7816 err
+= efunc(pc
, "invalid register %u\n", r1
);
7818 err
+= efunc(pc
, "non-zero reserved bits\n");
7820 err
+= efunc(pc
, "invalid register %u\n", rd
);
7822 err
+= efunc(pc
, "cannot write to %r0\n");
7832 err
+= efunc(pc
, "invalid register %u\n", r1
);
7834 err
+= efunc(pc
, "non-zero reserved bits\n");
7836 err
+= efunc(pc
, "invalid register %u\n", rd
);
7838 err
+= efunc(pc
, "cannot write to %r0\n");
7840 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op
+
7841 DIF_OP_RLDSB
- DIF_OP_LDSB
, r1
, rd
);
7851 err
+= efunc(pc
, "invalid register %u\n", r1
);
7853 err
+= efunc(pc
, "non-zero reserved bits\n");
7855 err
+= efunc(pc
, "invalid register %u\n", rd
);
7857 err
+= efunc(pc
, "cannot write to %r0\n");
7867 err
+= efunc(pc
, "invalid register %u\n", r1
);
7869 err
+= efunc(pc
, "non-zero reserved bits\n");
7871 err
+= efunc(pc
, "invalid register %u\n", rd
);
7873 err
+= efunc(pc
, "cannot write to %r0\n");
7880 err
+= efunc(pc
, "invalid register %u\n", r1
);
7882 err
+= efunc(pc
, "non-zero reserved bits\n");
7884 err
+= efunc(pc
, "invalid register %u\n", rd
);
7886 err
+= efunc(pc
, "cannot write to 0 address\n");
7891 err
+= efunc(pc
, "invalid register %u\n", r1
);
7893 err
+= efunc(pc
, "invalid register %u\n", r2
);
7895 err
+= efunc(pc
, "non-zero reserved bits\n");
7899 err
+= efunc(pc
, "invalid register %u\n", r1
);
7900 if (r2
!= 0 || rd
!= 0)
7901 err
+= efunc(pc
, "non-zero reserved bits\n");
7914 if (label
>= dp
->dtdo_len
) {
7915 err
+= efunc(pc
, "invalid branch target %u\n",
7919 err
+= efunc(pc
, "backward branch to %u\n",
7924 if (r1
!= 0 || r2
!= 0)
7925 err
+= efunc(pc
, "non-zero reserved bits\n");
7927 err
+= efunc(pc
, "invalid register %u\n", rd
);
7931 case DIF_OP_FLUSHTS
:
7932 if (r1
!= 0 || r2
!= 0 || rd
!= 0)
7933 err
+= efunc(pc
, "non-zero reserved bits\n");
7936 if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) {
7937 err
+= efunc(pc
, "invalid integer ref %u\n",
7938 DIF_INSTR_INTEGER(instr
));
7941 err
+= efunc(pc
, "invalid register %u\n", rd
);
7943 err
+= efunc(pc
, "cannot write to %r0\n");
7946 if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) {
7947 err
+= efunc(pc
, "invalid string ref %u\n",
7948 DIF_INSTR_STRING(instr
));
7951 err
+= efunc(pc
, "invalid register %u\n", rd
);
7953 err
+= efunc(pc
, "cannot write to %r0\n");
7957 if (r1
> DIF_VAR_ARRAY_MAX
)
7958 err
+= efunc(pc
, "invalid array %u\n", r1
);
7960 err
+= efunc(pc
, "invalid register %u\n", r2
);
7962 err
+= efunc(pc
, "invalid register %u\n", rd
);
7964 err
+= efunc(pc
, "cannot write to %r0\n");
7971 if (v
< DIF_VAR_OTHER_MIN
|| v
> DIF_VAR_OTHER_MAX
)
7972 err
+= efunc(pc
, "invalid variable %u\n", v
);
7974 err
+= efunc(pc
, "invalid register %u\n", rd
);
7976 err
+= efunc(pc
, "cannot write to %r0\n");
7983 if (v
< DIF_VAR_OTHER_UBASE
|| v
> DIF_VAR_OTHER_MAX
)
7984 err
+= efunc(pc
, "invalid variable %u\n", v
);
7986 err
+= efunc(pc
, "invalid register %u\n", rd
);
7989 if (subr
> DIF_SUBR_MAX
)
7990 err
+= efunc(pc
, "invalid subr %u\n", subr
);
7992 err
+= efunc(pc
, "invalid register %u\n", rd
);
7994 err
+= efunc(pc
, "cannot write to %r0\n");
7996 if (subr
== DIF_SUBR_COPYOUT
||
7997 subr
== DIF_SUBR_COPYOUTSTR
) {
7998 dp
->dtdo_destructive
= 1;
8002 if (type
!= DIF_TYPE_STRING
&& type
!= DIF_TYPE_CTF
)
8003 err
+= efunc(pc
, "invalid ref type %u\n", type
);
8005 err
+= efunc(pc
, "invalid register %u\n", r2
);
8007 err
+= efunc(pc
, "invalid register %u\n", rs
);
8010 if (type
!= DIF_TYPE_CTF
)
8011 err
+= efunc(pc
, "invalid val type %u\n", type
);
8013 err
+= efunc(pc
, "invalid register %u\n", r2
);
8015 err
+= efunc(pc
, "invalid register %u\n", rs
);
8018 err
+= efunc(pc
, "invalid opcode %u\n",
8019 DIF_INSTR_OP(instr
));
8023 if (dp
->dtdo_len
!= 0 &&
8024 DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len
- 1]) != DIF_OP_RET
) {
8025 err
+= efunc(dp
->dtdo_len
- 1,
8026 "expected 'ret' as last DIF instruction\n");
8029 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
)) {
8031 * If we're not returning by reference, the size must be either
8032 * 0 or the size of one of the base types.
8034 switch (dp
->dtdo_rtype
.dtdt_size
) {
8036 case sizeof (uint8_t):
8037 case sizeof (uint16_t):
8038 case sizeof (uint32_t):
8039 case sizeof (uint64_t):
8043 err
+= efunc(dp
->dtdo_len
- 1, "bad return size");
8047 for (i
= 0; i
< dp
->dtdo_varlen
&& err
== 0; i
++) {
8048 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
], *existing
= NULL
;
8049 dtrace_diftype_t
*vt
, *et
;
8052 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
&&
8053 v
->dtdv_scope
!= DIFV_SCOPE_THREAD
&&
8054 v
->dtdv_scope
!= DIFV_SCOPE_LOCAL
) {
8055 err
+= efunc(i
, "unrecognized variable scope %d\n",
8060 if (v
->dtdv_kind
!= DIFV_KIND_ARRAY
&&
8061 v
->dtdv_kind
!= DIFV_KIND_SCALAR
) {
8062 err
+= efunc(i
, "unrecognized variable type %d\n",
8067 if ((id
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) {
8068 err
+= efunc(i
, "%d exceeds variable id limit\n", id
);
8072 if (id
< DIF_VAR_OTHER_UBASE
)
8076 * For user-defined variables, we need to check that this
8077 * definition is identical to any previous definition that we
8080 ndx
= id
- DIF_VAR_OTHER_UBASE
;
8082 switch (v
->dtdv_scope
) {
8083 case DIFV_SCOPE_GLOBAL
:
8084 if (ndx
< vstate
->dtvs_nglobals
) {
8085 dtrace_statvar_t
*svar
;
8087 if ((svar
= vstate
->dtvs_globals
[ndx
]) != NULL
)
8088 existing
= &svar
->dtsv_var
;
8093 case DIFV_SCOPE_THREAD
:
8094 if (ndx
< vstate
->dtvs_ntlocals
)
8095 existing
= &vstate
->dtvs_tlocals
[ndx
];
8098 case DIFV_SCOPE_LOCAL
:
8099 if (ndx
< vstate
->dtvs_nlocals
) {
8100 dtrace_statvar_t
*svar
;
8102 if ((svar
= vstate
->dtvs_locals
[ndx
]) != NULL
)
8103 existing
= &svar
->dtsv_var
;
8111 if (vt
->dtdt_flags
& DIF_TF_BYREF
) {
8112 if (vt
->dtdt_size
== 0) {
8113 err
+= efunc(i
, "zero-sized variable\n");
8117 if (v
->dtdv_scope
== DIFV_SCOPE_GLOBAL
&&
8118 vt
->dtdt_size
> dtrace_global_maxsize
) {
8119 err
+= efunc(i
, "oversized by-ref global\n");
8124 if (existing
== NULL
|| existing
->dtdv_id
== 0)
8127 ASSERT(existing
->dtdv_id
== v
->dtdv_id
);
8128 ASSERT(existing
->dtdv_scope
== v
->dtdv_scope
);
8130 if (existing
->dtdv_kind
!= v
->dtdv_kind
)
8131 err
+= efunc(i
, "%d changed variable kind\n", id
);
8133 et
= &existing
->dtdv_type
;
8135 if (vt
->dtdt_flags
!= et
->dtdt_flags
) {
8136 err
+= efunc(i
, "%d changed variable type flags\n", id
);
8140 if (vt
->dtdt_size
!= 0 && vt
->dtdt_size
!= et
->dtdt_size
) {
8141 err
+= efunc(i
, "%d changed variable type size\n", id
);
8150 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8151 * are much more constrained than normal DIFOs. Specifically, they may
8154 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8155 * miscellaneous string routines
8156 * 2. Access DTrace variables other than the args[] array, and the
8157 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8158 * 3. Have thread-local variables.
8159 * 4. Have dynamic variables.
8162 dtrace_difo_validate_helper(dtrace_difo_t
*dp
)
8164 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8168 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8169 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8171 uint_t v
= DIF_INSTR_VAR(instr
);
8172 uint_t subr
= DIF_INSTR_SUBR(instr
);
8173 uint_t op
= DIF_INSTR_OP(instr
);
8228 case DIF_OP_FLUSHTS
:
8240 if (v
>= DIF_VAR_OTHER_UBASE
)
8243 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
)
8246 if (v
== DIF_VAR_CURTHREAD
|| v
== DIF_VAR_PID
||
8247 v
== DIF_VAR_PPID
|| v
== DIF_VAR_TID
||
8248 v
== DIF_VAR_EXECNAME
|| v
== DIF_VAR_ZONENAME
||
8249 v
== DIF_VAR_UID
|| v
== DIF_VAR_GID
)
8252 err
+= efunc(pc
, "illegal variable %u\n", v
);
8259 err
+= efunc(pc
, "illegal dynamic variable load\n");
8265 err
+= efunc(pc
, "illegal dynamic variable store\n");
8269 if (subr
== DIF_SUBR_ALLOCA
||
8270 subr
== DIF_SUBR_BCOPY
||
8271 subr
== DIF_SUBR_COPYIN
||
8272 subr
== DIF_SUBR_COPYINTO
||
8273 subr
== DIF_SUBR_COPYINSTR
||
8274 subr
== DIF_SUBR_INDEX
||
8275 subr
== DIF_SUBR_LLTOSTR
||
8276 subr
== DIF_SUBR_RINDEX
||
8277 subr
== DIF_SUBR_STRCHR
||
8278 subr
== DIF_SUBR_STRJOIN
||
8279 subr
== DIF_SUBR_STRRCHR
||
8280 subr
== DIF_SUBR_STRSTR
||
8281 subr
== DIF_SUBR_CHUD
)
8284 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8288 err
+= efunc(pc
, "invalid opcode %u\n",
8289 DIF_INSTR_OP(instr
));
8297 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8301 dtrace_difo_cacheable(dtrace_difo_t
*dp
)
8308 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8309 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8311 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
)
8314 switch (v
->dtdv_id
) {
8315 case DIF_VAR_CURTHREAD
:
8318 case DIF_VAR_EXECNAME
:
8319 case DIF_VAR_ZONENAME
:
8328 * This DIF object may be cacheable. Now we need to look for any
8329 * array loading instructions, any memory loading instructions, or
8330 * any stores to thread-local variables.
8332 for (i
= 0; i
< dp
->dtdo_len
; i
++) {
8333 uint_t op
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]);
8335 if ((op
>= DIF_OP_LDSB
&& op
<= DIF_OP_LDX
) ||
8336 (op
>= DIF_OP_ULDSB
&& op
<= DIF_OP_ULDX
) ||
8337 (op
>= DIF_OP_RLDSB
&& op
<= DIF_OP_RLDX
) ||
8338 op
== DIF_OP_LDGA
|| op
== DIF_OP_STTS
)
8346 dtrace_difo_hold(dtrace_difo_t
*dp
)
8350 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8353 ASSERT(dp
->dtdo_refcnt
!= 0);
8356 * We need to check this DIF object for references to the variable
8357 * DIF_VAR_VTIMESTAMP.
8359 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8360 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8362 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8365 if (dtrace_vtime_references
++ == 0)
8366 dtrace_vtime_enable();
8371 * This routine calculates the dynamic variable chunksize for a given DIF
8372 * object. The calculation is not fool-proof, and can probably be tricked by
8373 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8374 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8375 * if a dynamic variable size exceeds the chunksize.
8378 dtrace_difo_chunksize(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8381 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
8382 const dif_instr_t
*text
= dp
->dtdo_buf
;
8388 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8389 dif_instr_t instr
= text
[pc
];
8390 uint_t op
= DIF_INSTR_OP(instr
);
8391 uint_t rd
= DIF_INSTR_RD(instr
);
8392 uint_t r1
= DIF_INSTR_R1(instr
);
8396 dtrace_key_t
*key
= tupregs
;
8400 sval
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)];
8405 key
= &tupregs
[DIF_DTR_NREGS
];
8406 key
[0].dttk_size
= 0;
8407 key
[1].dttk_size
= 0;
8409 scope
= DIFV_SCOPE_THREAD
;
8416 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
)
8417 key
[nkeys
++].dttk_size
= 0;
8419 key
[nkeys
++].dttk_size
= 0;
8421 if (op
== DIF_OP_STTAA
) {
8422 scope
= DIFV_SCOPE_THREAD
;
8424 scope
= DIFV_SCOPE_GLOBAL
;
8430 if (ttop
== DIF_DTR_NREGS
)
8433 if ((srd
== 0 || sval
== 0) && r1
== DIF_TYPE_STRING
) {
8435 * If the register for the size of the "pushtr"
8436 * is %r0 (or the value is 0) and the type is
8437 * a string, we'll use the system-wide default
8440 tupregs
[ttop
++].dttk_size
=
8441 dtrace_strsize_default
;
8446 tupregs
[ttop
++].dttk_size
= sval
;
8452 if (ttop
== DIF_DTR_NREGS
)
8455 tupregs
[ttop
++].dttk_size
= 0;
8458 case DIF_OP_FLUSHTS
:
8475 * We have a dynamic variable allocation; calculate its size.
8477 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
8478 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
8480 size
= sizeof (dtrace_dynvar_t
);
8481 size
+= sizeof (dtrace_key_t
) * (nkeys
- 1);
8485 * Now we need to determine the size of the stored data.
8487 id
= DIF_INSTR_VAR(instr
);
8489 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8490 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8492 if (v
->dtdv_id
== id
&& v
->dtdv_scope
== scope
) {
8493 size
+= v
->dtdv_type
.dtdt_size
;
8498 if (i
== dp
->dtdo_varlen
)
8502 * We have the size. If this is larger than the chunk size
8503 * for our dynamic variable state, reset the chunk size.
8505 size
= P2ROUNDUP(size
, sizeof (uint64_t));
8507 if (size
> vstate
->dtvs_dynvars
.dtds_chunksize
)
8508 vstate
->dtvs_dynvars
.dtds_chunksize
= size
;
8513 dtrace_difo_init(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8515 int i
, oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
8518 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8519 ASSERT(dp
->dtdo_buf
!= NULL
&& dp
->dtdo_len
!= 0);
8521 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8522 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8523 dtrace_statvar_t
*svar
, ***svarp
;
8525 uint8_t scope
= v
->dtdv_scope
;
8528 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8531 id
-= DIF_VAR_OTHER_UBASE
;
8534 case DIFV_SCOPE_THREAD
:
8535 while (id
>= (otlocals
= vstate
->dtvs_ntlocals
)) {
8536 dtrace_difv_t
*tlocals
;
8538 if ((ntlocals
= (otlocals
<< 1)) == 0)
8541 osz
= otlocals
* sizeof (dtrace_difv_t
);
8542 nsz
= ntlocals
* sizeof (dtrace_difv_t
);
8544 tlocals
= kmem_zalloc(nsz
, KM_SLEEP
);
8547 bcopy(vstate
->dtvs_tlocals
,
8549 kmem_free(vstate
->dtvs_tlocals
, osz
);
8552 vstate
->dtvs_tlocals
= tlocals
;
8553 vstate
->dtvs_ntlocals
= ntlocals
;
8556 vstate
->dtvs_tlocals
[id
] = *v
;
8559 case DIFV_SCOPE_LOCAL
:
8560 np
= &vstate
->dtvs_nlocals
;
8561 svarp
= &vstate
->dtvs_locals
;
8563 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8564 dsize
= NCPU
* (v
->dtdv_type
.dtdt_size
+
8567 dsize
= NCPU
* sizeof (uint64_t);
8571 case DIFV_SCOPE_GLOBAL
:
8572 np
= &vstate
->dtvs_nglobals
;
8573 svarp
= &vstate
->dtvs_globals
;
8575 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8576 dsize
= v
->dtdv_type
.dtdt_size
+
8585 while (id
>= (oldsvars
= *np
)) {
8586 dtrace_statvar_t
**statics
;
8587 int newsvars
, oldsize
, newsize
;
8589 if ((newsvars
= (oldsvars
<< 1)) == 0)
8592 oldsize
= oldsvars
* sizeof (dtrace_statvar_t
*);
8593 newsize
= newsvars
* sizeof (dtrace_statvar_t
*);
8595 statics
= kmem_zalloc(newsize
, KM_SLEEP
);
8598 bcopy(*svarp
, statics
, oldsize
);
8599 kmem_free(*svarp
, oldsize
);
8606 if ((svar
= (*svarp
)[id
]) == NULL
) {
8607 svar
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
);
8608 svar
->dtsv_var
= *v
;
8610 if ((svar
->dtsv_size
= dsize
) != 0) {
8611 svar
->dtsv_data
= (uint64_t)(uintptr_t)
8612 kmem_zalloc(dsize
, KM_SLEEP
);
8615 (*svarp
)[id
] = svar
;
8618 svar
->dtsv_refcnt
++;
8621 dtrace_difo_chunksize(dp
, vstate
);
8622 dtrace_difo_hold(dp
);
8625 static dtrace_difo_t
*
8626 dtrace_difo_duplicate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8631 ASSERT(dp
->dtdo_buf
!= NULL
);
8632 ASSERT(dp
->dtdo_refcnt
!= 0);
8634 new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
8636 ASSERT(dp
->dtdo_buf
!= NULL
);
8637 sz
= dp
->dtdo_len
* sizeof (dif_instr_t
);
8638 new->dtdo_buf
= kmem_alloc(sz
, KM_SLEEP
);
8639 bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
);
8640 new->dtdo_len
= dp
->dtdo_len
;
8642 if (dp
->dtdo_strtab
!= NULL
) {
8643 ASSERT(dp
->dtdo_strlen
!= 0);
8644 new->dtdo_strtab
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
);
8645 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
);
8646 new->dtdo_strlen
= dp
->dtdo_strlen
;
8649 if (dp
->dtdo_inttab
!= NULL
) {
8650 ASSERT(dp
->dtdo_intlen
!= 0);
8651 sz
= dp
->dtdo_intlen
* sizeof (uint64_t);
8652 new->dtdo_inttab
= kmem_alloc(sz
, KM_SLEEP
);
8653 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
);
8654 new->dtdo_intlen
= dp
->dtdo_intlen
;
8657 if (dp
->dtdo_vartab
!= NULL
) {
8658 ASSERT(dp
->dtdo_varlen
!= 0);
8659 sz
= dp
->dtdo_varlen
* sizeof (dtrace_difv_t
);
8660 new->dtdo_vartab
= kmem_alloc(sz
, KM_SLEEP
);
8661 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
);
8662 new->dtdo_varlen
= dp
->dtdo_varlen
;
8665 dtrace_difo_init(new, vstate
);
8670 dtrace_difo_destroy(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8674 ASSERT(dp
->dtdo_refcnt
== 0);
8676 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8677 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8678 dtrace_statvar_t
*svar
, **svarp
;
8680 uint8_t scope
= v
->dtdv_scope
;
8684 case DIFV_SCOPE_THREAD
:
8687 case DIFV_SCOPE_LOCAL
:
8688 np
= &vstate
->dtvs_nlocals
;
8689 svarp
= vstate
->dtvs_locals
;
8692 case DIFV_SCOPE_GLOBAL
:
8693 np
= &vstate
->dtvs_nglobals
;
8694 svarp
= vstate
->dtvs_globals
;
8701 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8704 id
-= DIF_VAR_OTHER_UBASE
;
8708 ASSERT(svar
!= NULL
);
8709 ASSERT(svar
->dtsv_refcnt
> 0);
8711 if (--svar
->dtsv_refcnt
> 0)
8714 if (svar
->dtsv_size
!= 0) {
8715 ASSERT(svar
->dtsv_data
!= NULL
);
8716 kmem_free((void *)(uintptr_t)svar
->dtsv_data
,
8720 kmem_free(svar
, sizeof (dtrace_statvar_t
));
8724 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
8725 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
8726 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
8727 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
8729 kmem_free(dp
, sizeof (dtrace_difo_t
));
8733 dtrace_difo_release(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8737 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8738 ASSERT(dp
->dtdo_refcnt
!= 0);
8740 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8741 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8743 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8746 ASSERT(dtrace_vtime_references
> 0);
8747 if (--dtrace_vtime_references
== 0)
8748 dtrace_vtime_disable();
8751 if (--dp
->dtdo_refcnt
== 0)
8752 dtrace_difo_destroy(dp
, vstate
);
8756 * DTrace Format Functions
8759 dtrace_format_add(dtrace_state_t
*state
, char *str
)
8762 uint16_t ndx
, len
= strlen(str
) + 1;
8764 fmt
= kmem_zalloc(len
, KM_SLEEP
);
8765 bcopy(str
, fmt
, len
);
8767 for (ndx
= 0; ndx
< state
->dts_nformats
; ndx
++) {
8768 if (state
->dts_formats
[ndx
] == NULL
) {
8769 state
->dts_formats
[ndx
] = fmt
;
8774 if (state
->dts_nformats
== USHRT_MAX
) {
8776 * This is only likely if a denial-of-service attack is being
8777 * attempted. As such, it's okay to fail silently here.
8779 kmem_free(fmt
, len
);
8784 * For simplicity, we always resize the formats array to be exactly the
8785 * number of formats.
8787 ndx
= state
->dts_nformats
++;
8788 new = kmem_alloc((ndx
+ 1) * sizeof (char *), KM_SLEEP
);
8790 if (state
->dts_formats
!= NULL
) {
8792 bcopy(state
->dts_formats
, new, ndx
* sizeof (char *));
8793 kmem_free(state
->dts_formats
, ndx
* sizeof (char *));
8796 state
->dts_formats
= new;
8797 state
->dts_formats
[ndx
] = fmt
;
8803 dtrace_format_remove(dtrace_state_t
*state
, uint16_t format
)
8807 ASSERT(state
->dts_formats
!= NULL
);
8808 ASSERT(format
<= state
->dts_nformats
);
8809 ASSERT(state
->dts_formats
[format
- 1] != NULL
);
8811 fmt
= state
->dts_formats
[format
- 1];
8812 kmem_free(fmt
, strlen(fmt
) + 1);
8813 state
->dts_formats
[format
- 1] = NULL
;
8817 dtrace_format_destroy(dtrace_state_t
*state
)
8821 if (state
->dts_nformats
== 0) {
8822 ASSERT(state
->dts_formats
== NULL
);
8826 ASSERT(state
->dts_formats
!= NULL
);
8828 for (i
= 0; i
< state
->dts_nformats
; i
++) {
8829 char *fmt
= state
->dts_formats
[i
];
8834 kmem_free(fmt
, strlen(fmt
) + 1);
8837 kmem_free(state
->dts_formats
, state
->dts_nformats
* sizeof (char *));
8838 state
->dts_nformats
= 0;
8839 state
->dts_formats
= NULL
;
8843 * DTrace Predicate Functions
8845 static dtrace_predicate_t
*
8846 dtrace_predicate_create(dtrace_difo_t
*dp
)
8848 dtrace_predicate_t
*pred
;
8850 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8851 ASSERT(dp
->dtdo_refcnt
!= 0);
8853 pred
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
);
8854 pred
->dtp_difo
= dp
;
8855 pred
->dtp_refcnt
= 1;
8857 if (!dtrace_difo_cacheable(dp
))
8860 if (dtrace_predcache_id
== DTRACE_CACHEIDNONE
) {
8862 * This is only theoretically possible -- we have had 2^32
8863 * cacheable predicates on this machine. We cannot allow any
8864 * more predicates to become cacheable: as unlikely as it is,
8865 * there may be a thread caching a (now stale) predicate cache
8866 * ID. (N.B.: the temptation is being successfully resisted to
8867 * have this cmn_err() "Holy shit -- we executed this code!")
8872 pred
->dtp_cacheid
= dtrace_predcache_id
++;
8878 dtrace_predicate_hold(dtrace_predicate_t
*pred
)
8880 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8881 ASSERT(pred
->dtp_difo
!= NULL
&& pred
->dtp_difo
->dtdo_refcnt
!= 0);
8882 ASSERT(pred
->dtp_refcnt
> 0);
8888 dtrace_predicate_release(dtrace_predicate_t
*pred
, dtrace_vstate_t
*vstate
)
8890 dtrace_difo_t
*dp
= pred
->dtp_difo
;
8892 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8893 ASSERT(dp
!= NULL
&& dp
->dtdo_refcnt
!= 0);
8894 ASSERT(pred
->dtp_refcnt
> 0);
8896 if (--pred
->dtp_refcnt
== 0) {
8897 dtrace_difo_release(pred
->dtp_difo
, vstate
);
8898 kmem_free(pred
, sizeof (dtrace_predicate_t
));
8903 * DTrace Action Description Functions
8905 static dtrace_actdesc_t
*
8906 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
,
8907 uint64_t uarg
, uint64_t arg
)
8909 dtrace_actdesc_t
*act
;
8911 /* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
8912 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/
8914 act
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
);
8915 act
->dtad_kind
= kind
;
8916 act
->dtad_ntuple
= ntuple
;
8917 act
->dtad_uarg
= uarg
;
8918 act
->dtad_arg
= arg
;
8919 act
->dtad_refcnt
= 1;
8925 dtrace_actdesc_hold(dtrace_actdesc_t
*act
)
8927 ASSERT(act
->dtad_refcnt
>= 1);
8932 dtrace_actdesc_release(dtrace_actdesc_t
*act
, dtrace_vstate_t
*vstate
)
8934 dtrace_actkind_t kind
= act
->dtad_kind
;
8937 ASSERT(act
->dtad_refcnt
>= 1);
8939 if (--act
->dtad_refcnt
!= 0)
8942 if ((dp
= act
->dtad_difo
) != NULL
)
8943 dtrace_difo_release(dp
, vstate
);
8945 if (DTRACEACT_ISPRINTFLIKE(kind
)) {
8946 char *str
= (char *)(uintptr_t)act
->dtad_arg
;
8948 /* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
8949 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/
8952 kmem_free(str
, strlen(str
) + 1);
8955 kmem_free(act
, sizeof (dtrace_actdesc_t
));
8959 * DTrace ECB Functions
8961 static dtrace_ecb_t
*
8962 dtrace_ecb_add(dtrace_state_t
*state
, dtrace_probe_t
*probe
)
8967 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8969 ecb
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
);
8970 ecb
->dte_predicate
= NULL
;
8971 ecb
->dte_probe
= probe
;
8974 * The default size is the size of the default action: recording
8977 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
8978 ecb
->dte_alignment
= sizeof (dtrace_epid_t
);
8980 epid
= state
->dts_epid
++;
8982 if (epid
- 1 >= state
->dts_necbs
) {
8983 dtrace_ecb_t
**oecbs
= state
->dts_ecbs
, **ecbs
;
8984 int necbs
= state
->dts_necbs
<< 1;
8986 ASSERT(epid
== state
->dts_necbs
+ 1);
8989 ASSERT(oecbs
== NULL
);
8993 ecbs
= kmem_zalloc(necbs
* sizeof (*ecbs
), KM_SLEEP
);
8996 bcopy(oecbs
, ecbs
, state
->dts_necbs
* sizeof (*ecbs
));
8998 dtrace_membar_producer();
8999 state
->dts_ecbs
= ecbs
;
9001 if (oecbs
!= NULL
) {
9003 * If this state is active, we must dtrace_sync()
9004 * before we can free the old dts_ecbs array: we're
9005 * coming in hot, and there may be active ring
9006 * buffer processing (which indexes into the dts_ecbs
9007 * array) on another CPU.
9009 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
9012 kmem_free(oecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9015 dtrace_membar_producer();
9016 state
->dts_necbs
= necbs
;
9019 ecb
->dte_state
= state
;
9021 ASSERT(state
->dts_ecbs
[epid
- 1] == NULL
);
9022 dtrace_membar_producer();
9023 state
->dts_ecbs
[(ecb
->dte_epid
= epid
) - 1] = ecb
;
9029 dtrace_ecb_enable(dtrace_ecb_t
*ecb
)
9031 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9033 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9034 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9035 ASSERT(ecb
->dte_next
== NULL
);
9037 if (probe
== NULL
) {
9039 * This is the NULL probe -- there's nothing to do.
9044 if (probe
->dtpr_ecb
== NULL
) {
9045 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9048 * We're the first ECB on this probe.
9050 probe
->dtpr_ecb
= probe
->dtpr_ecb_last
= ecb
;
9052 if (ecb
->dte_predicate
!= NULL
)
9053 probe
->dtpr_predcache
= ecb
->dte_predicate
->dtp_cacheid
;
9055 prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
,
9056 probe
->dtpr_id
, probe
->dtpr_arg
);
9059 * This probe is already active. Swing the last pointer to
9060 * point to the new ECB, and issue a dtrace_sync() to assure
9061 * that all CPUs have seen the change.
9063 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9064 probe
->dtpr_ecb_last
->dte_next
= ecb
;
9065 probe
->dtpr_ecb_last
= ecb
;
9066 probe
->dtpr_predcache
= 0;
9073 dtrace_ecb_resize(dtrace_ecb_t
*ecb
)
9075 uint32_t maxalign
= sizeof (dtrace_epid_t
);
9076 uint32_t align
= sizeof (uint8_t), offs
, diff
;
9077 dtrace_action_t
*act
;
9079 uint32_t aggbase
= UINT32_MAX
;
9080 dtrace_state_t
*state
= ecb
->dte_state
;
9083 * If we record anything, we always record the epid. (And we always
9086 offs
= sizeof (dtrace_epid_t
);
9087 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9089 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9090 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
9092 if ((align
= rec
->dtrd_alignment
) > maxalign
)
9095 if (!wastuple
&& act
->dta_intuple
) {
9097 * This is the first record in a tuple. Align the
9098 * offset to be at offset 4 in an 8-byte aligned
9101 diff
= offs
+ sizeof (dtrace_aggid_t
);
9103 if (diff
= (diff
& (sizeof (uint64_t) - 1)))
9104 offs
+= sizeof (uint64_t) - diff
;
9106 aggbase
= offs
- sizeof (dtrace_aggid_t
);
9107 ASSERT(!(aggbase
& (sizeof (uint64_t) - 1)));
9111 if (rec
->dtrd_size
!= 0 && (diff
= (offs
& (align
- 1)))) {
9113 * The current offset is not properly aligned; align it.
9115 offs
+= align
- diff
;
9118 rec
->dtrd_offset
= offs
;
9120 if (offs
+ rec
->dtrd_size
> ecb
->dte_needed
) {
9121 ecb
->dte_needed
= offs
+ rec
->dtrd_size
;
9123 if (ecb
->dte_needed
> state
->dts_needed
)
9124 state
->dts_needed
= ecb
->dte_needed
;
9127 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9128 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9129 dtrace_action_t
*first
= agg
->dtag_first
, *prev
;
9131 ASSERT(rec
->dtrd_size
!= 0 && first
!= NULL
);
9133 ASSERT(aggbase
!= UINT32_MAX
);
9135 agg
->dtag_base
= aggbase
;
9137 while ((prev
= first
->dta_prev
) != NULL
&&
9138 DTRACEACT_ISAGG(prev
->dta_kind
)) {
9139 agg
= (dtrace_aggregation_t
*)prev
;
9140 first
= agg
->dtag_first
;
9144 offs
= prev
->dta_rec
.dtrd_offset
+
9145 prev
->dta_rec
.dtrd_size
;
9147 offs
= sizeof (dtrace_epid_t
);
9151 if (!act
->dta_intuple
)
9152 ecb
->dte_size
= offs
+ rec
->dtrd_size
;
9154 offs
+= rec
->dtrd_size
;
9157 wastuple
= act
->dta_intuple
;
9160 if ((act
= ecb
->dte_action
) != NULL
&&
9161 !(act
->dta_kind
== DTRACEACT_SPECULATE
&& act
->dta_next
== NULL
) &&
9162 ecb
->dte_size
== sizeof (dtrace_epid_t
)) {
9164 * If the size is still sizeof (dtrace_epid_t), then all
9165 * actions store no data; set the size to 0.
9167 ecb
->dte_alignment
= maxalign
;
9171 * If the needed space is still sizeof (dtrace_epid_t), then
9172 * all actions need no additional space; set the needed
9175 if (ecb
->dte_needed
== sizeof (dtrace_epid_t
))
9176 ecb
->dte_needed
= 0;
9182 * Set our alignment, and make sure that the dte_size and dte_needed
9183 * are aligned to the size of an EPID.
9185 ecb
->dte_alignment
= maxalign
;
9186 ecb
->dte_size
= (ecb
->dte_size
+ (sizeof (dtrace_epid_t
) - 1)) &
9187 ~(sizeof (dtrace_epid_t
) - 1);
9188 ecb
->dte_needed
= (ecb
->dte_needed
+ (sizeof (dtrace_epid_t
) - 1)) &
9189 ~(sizeof (dtrace_epid_t
) - 1);
9190 ASSERT(ecb
->dte_size
<= ecb
->dte_needed
);
9193 static dtrace_action_t
*
9194 dtrace_ecb_aggregation_create(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9196 dtrace_aggregation_t
*agg
;
9197 size_t size
= sizeof (uint64_t);
9198 int ntuple
= desc
->dtad_ntuple
;
9199 dtrace_action_t
*act
;
9200 dtrace_recdesc_t
*frec
;
9201 dtrace_aggid_t aggid
;
9202 dtrace_state_t
*state
= ecb
->dte_state
;
9204 agg
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
);
9205 agg
->dtag_ecb
= ecb
;
9207 ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
));
9209 switch (desc
->dtad_kind
) {
9211 agg
->dtag_initial
= UINT64_MAX
;
9212 agg
->dtag_aggregate
= dtrace_aggregate_min
;
9216 agg
->dtag_aggregate
= dtrace_aggregate_max
;
9219 case DTRACEAGG_COUNT
:
9220 agg
->dtag_aggregate
= dtrace_aggregate_count
;
9223 case DTRACEAGG_QUANTIZE
:
9224 agg
->dtag_aggregate
= dtrace_aggregate_quantize
;
9225 size
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) *
9229 case DTRACEAGG_LQUANTIZE
: {
9230 uint16_t step
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
);
9231 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
);
9233 agg
->dtag_initial
= desc
->dtad_arg
;
9234 agg
->dtag_aggregate
= dtrace_aggregate_lquantize
;
9236 if (step
== 0 || levels
== 0)
9239 size
= levels
* sizeof (uint64_t) + 3 * sizeof (uint64_t);
9244 agg
->dtag_aggregate
= dtrace_aggregate_avg
;
9245 size
= sizeof (uint64_t) * 2;
9249 agg
->dtag_aggregate
= dtrace_aggregate_sum
;
9256 agg
->dtag_action
.dta_rec
.dtrd_size
= size
;
9262 * We must make sure that we have enough actions for the n-tuple.
9264 for (act
= ecb
->dte_action_last
; act
!= NULL
; act
= act
->dta_prev
) {
9265 if (DTRACEACT_ISAGG(act
->dta_kind
))
9268 if (--ntuple
== 0) {
9270 * This is the action with which our n-tuple begins.
9272 agg
->dtag_first
= act
;
9278 * This n-tuple is short by ntuple elements. Return failure.
9280 ASSERT(ntuple
!= 0);
9282 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9287 * If the last action in the tuple has a size of zero, it's actually
9288 * an expression argument for the aggregating action.
9290 ASSERT(ecb
->dte_action_last
!= NULL
);
9291 act
= ecb
->dte_action_last
;
9293 if (act
->dta_kind
== DTRACEACT_DIFEXPR
) {
9294 ASSERT(act
->dta_difo
!= NULL
);
9296 if (act
->dta_difo
->dtdo_rtype
.dtdt_size
== 0)
9297 agg
->dtag_hasarg
= 1;
9301 * We need to allocate an id for this aggregation.
9303 aggid
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1,
9304 VM_BESTFIT
| VM_SLEEP
);
9306 if (aggid
- 1 >= state
->dts_naggregations
) {
9307 dtrace_aggregation_t
**oaggs
= state
->dts_aggregations
;
9308 dtrace_aggregation_t
**aggs
;
9309 int naggs
= state
->dts_naggregations
<< 1;
9310 int onaggs
= state
->dts_naggregations
;
9312 ASSERT(aggid
== state
->dts_naggregations
+ 1);
9315 ASSERT(oaggs
== NULL
);
9319 aggs
= kmem_zalloc(naggs
* sizeof (*aggs
), KM_SLEEP
);
9321 if (oaggs
!= NULL
) {
9322 bcopy(oaggs
, aggs
, onaggs
* sizeof (*aggs
));
9323 kmem_free(oaggs
, onaggs
* sizeof (*aggs
));
9326 state
->dts_aggregations
= aggs
;
9327 state
->dts_naggregations
= naggs
;
9330 ASSERT(state
->dts_aggregations
[aggid
- 1] == NULL
);
9331 state
->dts_aggregations
[(agg
->dtag_id
= aggid
) - 1] = agg
;
9333 frec
= &agg
->dtag_first
->dta_rec
;
9334 if (frec
->dtrd_alignment
< sizeof (dtrace_aggid_t
))
9335 frec
->dtrd_alignment
= sizeof (dtrace_aggid_t
);
9337 for (act
= agg
->dtag_first
; act
!= NULL
; act
= act
->dta_next
) {
9338 ASSERT(!act
->dta_intuple
);
9339 act
->dta_intuple
= 1;
9342 return (&agg
->dtag_action
);
9346 dtrace_ecb_aggregation_destroy(dtrace_ecb_t
*ecb
, dtrace_action_t
*act
)
9348 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9349 dtrace_state_t
*state
= ecb
->dte_state
;
9350 dtrace_aggid_t aggid
= agg
->dtag_id
;
9352 ASSERT(DTRACEACT_ISAGG(act
->dta_kind
));
9353 vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1);
9355 ASSERT(state
->dts_aggregations
[aggid
- 1] == agg
);
9356 state
->dts_aggregations
[aggid
- 1] = NULL
;
9358 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9362 dtrace_ecb_action_add(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9364 dtrace_action_t
*action
, *last
;
9365 dtrace_difo_t
*dp
= desc
->dtad_difo
;
9366 uint32_t size
= 0, align
= sizeof (uint8_t), mask
;
9367 uint16_t format
= 0;
9368 dtrace_recdesc_t
*rec
;
9369 dtrace_state_t
*state
= ecb
->dte_state
;
9370 dtrace_optval_t
*opt
= state
->dts_options
, nframes
, strsize
;
9371 uint64_t arg
= desc
->dtad_arg
;
9373 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9374 ASSERT(ecb
->dte_action
== NULL
|| ecb
->dte_action
->dta_refcnt
== 1);
9376 if (DTRACEACT_ISAGG(desc
->dtad_kind
)) {
9378 * If this is an aggregating action, there must be neither
9379 * a speculate nor a commit on the action chain.
9381 dtrace_action_t
*act
;
9383 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9384 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9387 if (act
->dta_kind
== DTRACEACT_SPECULATE
)
9391 action
= dtrace_ecb_aggregation_create(ecb
, desc
);
9396 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) ||
9397 (desc
->dtad_kind
== DTRACEACT_DIFEXPR
&&
9398 dp
!= NULL
&& dp
->dtdo_destructive
)) {
9399 state
->dts_destructive
= 1;
9402 switch (desc
->dtad_kind
) {
9403 case DTRACEACT_PRINTF
:
9404 case DTRACEACT_PRINTA
:
9405 case DTRACEACT_SYSTEM
:
9406 case DTRACEACT_FREOPEN
:
9408 * We know that our arg is a string -- turn it into a
9412 ASSERT(desc
->dtad_kind
== DTRACEACT_PRINTA
);
9415 ASSERT(arg
!= NULL
);
9416 /* ASSERT(arg > KERNELBASE); */
9417 format
= dtrace_format_add(state
,
9418 (char *)(uintptr_t)arg
);
9422 case DTRACEACT_LIBACT
:
9423 case DTRACEACT_DIFEXPR
:
9427 if ((size
= dp
->dtdo_rtype
.dtdt_size
) != 0)
9430 if (dp
->dtdo_rtype
.dtdt_kind
== DIF_TYPE_STRING
) {
9431 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9434 size
= opt
[DTRACEOPT_STRSIZE
];
9439 case DTRACEACT_STACK
:
9440 if ((nframes
= arg
) == 0) {
9441 nframes
= opt
[DTRACEOPT_STACKFRAMES
];
9442 ASSERT(nframes
> 0);
9446 size
= nframes
* sizeof (pc_t
);
9449 case DTRACEACT_JSTACK
:
9450 if ((strsize
= DTRACE_USTACK_STRSIZE(arg
)) == 0)
9451 strsize
= opt
[DTRACEOPT_JSTACKSTRSIZE
];
9453 if ((nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0)
9454 nframes
= opt
[DTRACEOPT_JSTACKFRAMES
];
9456 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9459 case DTRACEACT_USTACK
:
9460 if (desc
->dtad_kind
!= DTRACEACT_JSTACK
&&
9461 (nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0) {
9462 strsize
= DTRACE_USTACK_STRSIZE(arg
);
9463 nframes
= opt
[DTRACEOPT_USTACKFRAMES
];
9464 ASSERT(nframes
> 0);
9465 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9469 * Save a slot for the pid.
9471 size
= (nframes
+ 1) * sizeof (uint64_t);
9472 size
+= DTRACE_USTACK_STRSIZE(arg
);
9473 size
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t)));
9479 if (dp
== NULL
|| ((size
= dp
->dtdo_rtype
.dtdt_size
) !=
9480 sizeof (uint64_t)) ||
9481 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9485 case DTRACEACT_USYM
:
9486 case DTRACEACT_UMOD
:
9487 case DTRACEACT_UADDR
:
9489 (dp
->dtdo_rtype
.dtdt_size
!= sizeof (uint64_t)) ||
9490 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9494 * We have a slot for the pid, plus a slot for the
9495 * argument. To keep things simple (aligned with
9496 * bitness-neutral sizing), we store each as a 64-bit
9499 size
= 2 * sizeof (uint64_t);
9502 case DTRACEACT_STOP
:
9503 case DTRACEACT_BREAKPOINT
:
9504 case DTRACEACT_PANIC
:
9507 case DTRACEACT_CHILL
:
9508 case DTRACEACT_DISCARD
:
9509 case DTRACEACT_RAISE
:
9514 case DTRACEACT_EXIT
:
9516 (size
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) ||
9517 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9521 case DTRACEACT_SPECULATE
:
9522 if (ecb
->dte_size
> sizeof (dtrace_epid_t
))
9528 state
->dts_speculates
= 1;
9531 case DTRACEACT_COMMIT
: {
9532 dtrace_action_t
*act
= ecb
->dte_action
;
9534 for (; act
!= NULL
; act
= act
->dta_next
) {
9535 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9548 if (size
!= 0 || desc
->dtad_kind
== DTRACEACT_SPECULATE
) {
9550 * If this is a data-storing action or a speculate,
9551 * we must be sure that there isn't a commit on the
9554 dtrace_action_t
*act
= ecb
->dte_action
;
9556 for (; act
!= NULL
; act
= act
->dta_next
) {
9557 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9562 action
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
);
9563 action
->dta_rec
.dtrd_size
= size
;
9566 action
->dta_refcnt
= 1;
9567 rec
= &action
->dta_rec
;
9568 size
= rec
->dtrd_size
;
9570 for (mask
= sizeof (uint64_t) - 1; size
!= 0 && mask
> 0; mask
>>= 1) {
9571 if (!(size
& mask
)) {
9577 action
->dta_kind
= desc
->dtad_kind
;
9579 if ((action
->dta_difo
= dp
) != NULL
)
9580 dtrace_difo_hold(dp
);
9582 rec
->dtrd_action
= action
->dta_kind
;
9583 rec
->dtrd_arg
= arg
;
9584 rec
->dtrd_uarg
= desc
->dtad_uarg
;
9585 rec
->dtrd_alignment
= (uint16_t)align
;
9586 rec
->dtrd_format
= format
;
9588 if ((last
= ecb
->dte_action_last
) != NULL
) {
9589 ASSERT(ecb
->dte_action
!= NULL
);
9590 action
->dta_prev
= last
;
9591 last
->dta_next
= action
;
9593 ASSERT(ecb
->dte_action
== NULL
);
9594 ecb
->dte_action
= action
;
9597 ecb
->dte_action_last
= action
;
9603 dtrace_ecb_action_remove(dtrace_ecb_t
*ecb
)
9605 dtrace_action_t
*act
= ecb
->dte_action
, *next
;
9606 dtrace_vstate_t
*vstate
= &ecb
->dte_state
->dts_vstate
;
9610 if (act
!= NULL
&& act
->dta_refcnt
> 1) {
9611 ASSERT(act
->dta_next
== NULL
|| act
->dta_next
->dta_refcnt
== 1);
9614 for (; act
!= NULL
; act
= next
) {
9615 next
= act
->dta_next
;
9616 ASSERT(next
!= NULL
|| act
== ecb
->dte_action_last
);
9617 ASSERT(act
->dta_refcnt
== 1);
9619 if ((format
= act
->dta_rec
.dtrd_format
) != 0)
9620 dtrace_format_remove(ecb
->dte_state
, format
);
9622 if ((dp
= act
->dta_difo
) != NULL
)
9623 dtrace_difo_release(dp
, vstate
);
9625 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9626 dtrace_ecb_aggregation_destroy(ecb
, act
);
9628 kmem_free(act
, sizeof (dtrace_action_t
));
9633 ecb
->dte_action
= NULL
;
9634 ecb
->dte_action_last
= NULL
;
9635 ecb
->dte_size
= sizeof (dtrace_epid_t
);
9639 dtrace_ecb_disable(dtrace_ecb_t
*ecb
)
9642 * We disable the ECB by removing it from its probe.
9644 dtrace_ecb_t
*pecb
, *prev
= NULL
;
9645 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9647 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9649 if (probe
== NULL
) {
9651 * This is the NULL probe; there is nothing to disable.
9656 for (pecb
= probe
->dtpr_ecb
; pecb
!= NULL
; pecb
= pecb
->dte_next
) {
9662 ASSERT(pecb
!= NULL
);
9665 probe
->dtpr_ecb
= ecb
->dte_next
;
9667 prev
->dte_next
= ecb
->dte_next
;
9670 if (ecb
== probe
->dtpr_ecb_last
) {
9671 ASSERT(ecb
->dte_next
== NULL
);
9672 probe
->dtpr_ecb_last
= prev
;
9676 * The ECB has been disconnected from the probe; now sync to assure
9677 * that all CPUs have seen the change before returning.
9681 if (probe
->dtpr_ecb
== NULL
) {
9683 * That was the last ECB on the probe; clear the predicate
9684 * cache ID for the probe, disable it and sync one more time
9685 * to assure that we'll never hit it again.
9687 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9689 ASSERT(ecb
->dte_next
== NULL
);
9690 ASSERT(probe
->dtpr_ecb_last
== NULL
);
9691 probe
->dtpr_predcache
= DTRACE_CACHEIDNONE
;
9692 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
,
9693 probe
->dtpr_id
, probe
->dtpr_arg
);
9697 * There is at least one ECB remaining on the probe. If there
9698 * is _exactly_ one, set the probe's predicate cache ID to be
9699 * the predicate cache ID of the remaining ECB.
9701 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9702 ASSERT(probe
->dtpr_predcache
== DTRACE_CACHEIDNONE
);
9704 if (probe
->dtpr_ecb
== probe
->dtpr_ecb_last
) {
9705 dtrace_predicate_t
*p
= probe
->dtpr_ecb
->dte_predicate
;
9707 ASSERT(probe
->dtpr_ecb
->dte_next
== NULL
);
9710 probe
->dtpr_predcache
= p
->dtp_cacheid
;
9713 ecb
->dte_next
= NULL
;
9718 dtrace_ecb_destroy(dtrace_ecb_t
*ecb
)
9720 dtrace_state_t
*state
= ecb
->dte_state
;
9721 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
9722 dtrace_predicate_t
*pred
;
9723 dtrace_epid_t epid
= ecb
->dte_epid
;
9725 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9726 ASSERT(ecb
->dte_next
== NULL
);
9727 ASSERT(ecb
->dte_probe
== NULL
|| ecb
->dte_probe
->dtpr_ecb
!= ecb
);
9729 if ((pred
= ecb
->dte_predicate
) != NULL
)
9730 dtrace_predicate_release(pred
, vstate
);
9732 dtrace_ecb_action_remove(ecb
);
9734 ASSERT(state
->dts_ecbs
[epid
- 1] == ecb
);
9735 state
->dts_ecbs
[epid
- 1] = NULL
;
9737 kmem_free(ecb
, sizeof (dtrace_ecb_t
));
9740 static dtrace_ecb_t
*
9741 dtrace_ecb_create(dtrace_state_t
*state
, dtrace_probe_t
*probe
,
9742 dtrace_enabling_t
*enab
)
9745 dtrace_predicate_t
*pred
;
9746 dtrace_actdesc_t
*act
;
9747 dtrace_provider_t
*prov
;
9748 dtrace_ecbdesc_t
*desc
= enab
->dten_current
;
9750 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9751 ASSERT(state
!= NULL
);
9753 ecb
= dtrace_ecb_add(state
, probe
);
9754 ecb
->dte_uarg
= desc
->dted_uarg
;
9756 if ((pred
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) {
9757 dtrace_predicate_hold(pred
);
9758 ecb
->dte_predicate
= pred
;
9761 if (probe
!= NULL
) {
9763 * If the provider shows more leg than the consumer is old
9764 * enough to see, we need to enable the appropriate implicit
9765 * predicate bits to prevent the ecb from activating at
9768 * Providers specifying DTRACE_PRIV_USER at register time
9769 * are stating that they need the /proc-style privilege
9770 * model to be enforced, and this is what DTRACE_COND_OWNER
9771 * and DTRACE_COND_ZONEOWNER will then do at probe time.
9773 prov
= probe
->dtpr_provider
;
9774 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLPROC
) &&
9775 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
9776 ecb
->dte_cond
|= DTRACE_COND_OWNER
;
9778 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLZONE
) &&
9779 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
9780 ecb
->dte_cond
|= DTRACE_COND_ZONEOWNER
;
9783 * If the provider shows us kernel innards and the user
9784 * is lacking sufficient privilege, enable the
9785 * DTRACE_COND_USERMODE implicit predicate.
9787 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) &&
9788 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_KERNEL
))
9789 ecb
->dte_cond
|= DTRACE_COND_USERMODE
;
9792 if (dtrace_ecb_create_cache
!= NULL
) {
9794 * If we have a cached ecb, we'll use its action list instead
9795 * of creating our own (saving both time and space).
9797 dtrace_ecb_t
*cached
= dtrace_ecb_create_cache
;
9798 dtrace_action_t
*act
= cached
->dte_action
;
9801 ASSERT(act
->dta_refcnt
> 0);
9803 ecb
->dte_action
= act
;
9804 ecb
->dte_action_last
= cached
->dte_action_last
;
9805 ecb
->dte_needed
= cached
->dte_needed
;
9806 ecb
->dte_size
= cached
->dte_size
;
9807 ecb
->dte_alignment
= cached
->dte_alignment
;
9813 for (act
= desc
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
9814 if ((enab
->dten_error
= dtrace_ecb_action_add(ecb
, act
)) != 0) {
9815 dtrace_ecb_destroy(ecb
);
9820 dtrace_ecb_resize(ecb
);
9822 return (dtrace_ecb_create_cache
= ecb
);
9826 dtrace_ecb_create_enable(dtrace_probe_t
*probe
, void *arg
)
9829 dtrace_enabling_t
*enab
= arg
;
9830 dtrace_state_t
*state
= enab
->dten_vstate
->dtvs_state
;
9832 ASSERT(state
!= NULL
);
9834 if (probe
!= NULL
&& probe
->dtpr_gen
< enab
->dten_probegen
) {
9836 * This probe was created in a generation for which this
9837 * enabling has previously created ECBs; we don't want to
9838 * enable it again, so just kick out.
9840 return (DTRACE_MATCH_NEXT
);
9843 if ((ecb
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
)
9844 return (DTRACE_MATCH_DONE
);
9846 dtrace_ecb_enable(ecb
);
9847 return (DTRACE_MATCH_NEXT
);
9850 static dtrace_ecb_t
*
9851 dtrace_epid2ecb(dtrace_state_t
*state
, dtrace_epid_t id
)
9855 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9857 if (id
== 0 || id
> state
->dts_necbs
)
9860 ASSERT(state
->dts_necbs
> 0 && state
->dts_ecbs
!= NULL
);
9861 ASSERT((ecb
= state
->dts_ecbs
[id
- 1]) == NULL
|| ecb
->dte_epid
== id
);
9863 return (state
->dts_ecbs
[id
- 1]);
9866 static dtrace_aggregation_t
*
9867 dtrace_aggid2agg(dtrace_state_t
*state
, dtrace_aggid_t id
)
9869 dtrace_aggregation_t
*agg
;
9871 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9873 if (id
== 0 || id
> state
->dts_naggregations
)
9876 ASSERT(state
->dts_naggregations
> 0 && state
->dts_aggregations
!= NULL
);
9877 ASSERT((agg
= state
->dts_aggregations
[id
- 1]) == NULL
||
9878 agg
->dtag_id
== id
);
9880 return (state
->dts_aggregations
[id
- 1]);
9884 * DTrace Buffer Functions
9886 * The following functions manipulate DTrace buffers. Most of these functions
9887 * are called in the context of establishing or processing consumer state;
9888 * exceptions are explicitly noted.
9892 * Note: called from cross call context. This function switches the two
9893 * buffers on a given CPU. The atomicity of this operation is assured by
9894 * disabling interrupts while the actual switch takes place; the disabling of
9895 * interrupts serializes the execution with any execution of dtrace_probe() on
9899 dtrace_buffer_switch(dtrace_buffer_t
*buf
)
9901 caddr_t tomax
= buf
->dtb_tomax
;
9902 caddr_t xamot
= buf
->dtb_xamot
;
9903 dtrace_icookie_t cookie
;
9905 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
9906 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_RING
));
9908 cookie
= dtrace_interrupt_disable();
9909 buf
->dtb_tomax
= xamot
;
9910 buf
->dtb_xamot
= tomax
;
9911 buf
->dtb_xamot_drops
= buf
->dtb_drops
;
9912 buf
->dtb_xamot_offset
= buf
->dtb_offset
;
9913 buf
->dtb_xamot_errors
= buf
->dtb_errors
;
9914 buf
->dtb_xamot_flags
= buf
->dtb_flags
;
9915 buf
->dtb_offset
= 0;
9917 buf
->dtb_errors
= 0;
9918 buf
->dtb_flags
&= ~(DTRACEBUF_ERROR
| DTRACEBUF_DROPPED
);
9919 dtrace_interrupt_enable(cookie
);
9923 * Note: called from cross call context. This function activates a buffer
9924 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
9925 * is guaranteed by the disabling of interrupts.
9928 dtrace_buffer_activate(dtrace_state_t
*state
)
9930 dtrace_buffer_t
*buf
;
9931 dtrace_icookie_t cookie
= dtrace_interrupt_disable();
9933 buf
= &state
->dts_buffer
[CPU
->cpu_id
];
9935 if (buf
->dtb_tomax
!= NULL
) {
9937 * We might like to assert that the buffer is marked inactive,
9938 * but this isn't necessarily true: the buffer for the CPU
9939 * that processes the BEGIN probe has its buffer activated
9940 * manually. In this case, we take the (harmless) action
9941 * re-clearing the bit INACTIVE bit.
9943 buf
->dtb_flags
&= ~DTRACEBUF_INACTIVE
;
9946 dtrace_interrupt_enable(cookie
);
9950 dtrace_buffer_alloc(dtrace_buffer_t
*bufs
, size_t size
, int flags
,
9954 dtrace_buffer_t
*buf
;
9956 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9957 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9959 if (size
> dtrace_nonroot_maxsize
&&
9960 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
9963 #if defined(__APPLE__)
9964 if (size
> (sane_size
/ 8) / NCPU
) /* As in kdbg_set_nkdbufs(), roughly. */
9966 #endif /* __APPLE__ */
9971 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
9974 buf
= &bufs
[cp
->cpu_id
];
9977 * If there is already a buffer allocated for this CPU, it
9978 * is only possible that this is a DR event. In this case,
9979 * the buffer size must match our specified size.
9981 if (buf
->dtb_tomax
!= NULL
) {
9982 ASSERT(buf
->dtb_size
== size
);
9986 ASSERT(buf
->dtb_xamot
== NULL
);
9988 if ((buf
->dtb_tomax
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
9991 buf
->dtb_size
= size
;
9992 buf
->dtb_flags
= flags
;
9993 buf
->dtb_offset
= 0;
9996 if (flags
& DTRACEBUF_NOSWITCH
)
9999 if ((buf
->dtb_xamot
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
10001 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10009 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
10012 buf
= &bufs
[cp
->cpu_id
];
10014 if (buf
->dtb_xamot
!= NULL
) {
10015 ASSERT(buf
->dtb_tomax
!= NULL
);
10016 ASSERT(buf
->dtb_size
== size
);
10017 kmem_free(buf
->dtb_xamot
, size
);
10020 if (buf
->dtb_tomax
!= NULL
) {
10021 ASSERT(buf
->dtb_size
== size
);
10022 kmem_free(buf
->dtb_tomax
, size
);
10025 buf
->dtb_tomax
= NULL
;
10026 buf
->dtb_xamot
= NULL
;
10028 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10034 * Note: called from probe context. This function just increments the drop
10035 * count on a buffer. It has been made a function to allow for the
10036 * possibility of understanding the source of mysterious drop counts. (A
10037 * problem for which one may be particularly disappointed that DTrace cannot
10038 * be used to understand DTrace.)
10041 dtrace_buffer_drop(dtrace_buffer_t
*buf
)
10047 * Note: called from probe context. This function is called to reserve space
10048 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10049 * mstate. Returns the new offset in the buffer, or a negative value if an
10050 * error has occurred.
10053 dtrace_buffer_reserve(dtrace_buffer_t
*buf
, size_t needed
, size_t align
,
10054 dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
10056 intptr_t offs
= buf
->dtb_offset
, soffs
;
10061 if (buf
->dtb_flags
& DTRACEBUF_INACTIVE
)
10064 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
10065 dtrace_buffer_drop(buf
);
10069 if (!(buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
))) {
10070 while (offs
& (align
- 1)) {
10072 * Assert that our alignment is off by a number which
10073 * is itself sizeof (uint32_t) aligned.
10075 ASSERT(!((align
- (offs
& (align
- 1))) &
10076 (sizeof (uint32_t) - 1)));
10077 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10078 offs
+= sizeof (uint32_t);
10081 if ((soffs
= offs
+ needed
) > buf
->dtb_size
) {
10082 dtrace_buffer_drop(buf
);
10086 if (mstate
== NULL
)
10089 mstate
->dtms_scratch_base
= (uintptr_t)tomax
+ soffs
;
10090 mstate
->dtms_scratch_size
= buf
->dtb_size
- soffs
;
10091 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10096 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10097 if (state
->dts_activity
!= DTRACE_ACTIVITY_COOLDOWN
&&
10098 (buf
->dtb_flags
& DTRACEBUF_FULL
))
10103 total
= needed
+ (offs
& (align
- 1));
10106 * For a ring buffer, life is quite a bit more complicated. Before
10107 * we can store any padding, we need to adjust our wrapping offset.
10108 * (If we've never before wrapped or we're not about to, no adjustment
10111 if ((buf
->dtb_flags
& DTRACEBUF_WRAPPED
) ||
10112 offs
+ total
> buf
->dtb_size
) {
10113 woffs
= buf
->dtb_xamot_offset
;
10115 if (offs
+ total
> buf
->dtb_size
) {
10117 * We can't fit in the end of the buffer. First, a
10118 * sanity check that we can fit in the buffer at all.
10120 if (total
> buf
->dtb_size
) {
10121 dtrace_buffer_drop(buf
);
10126 * We're going to be storing at the top of the buffer,
10127 * so now we need to deal with the wrapped offset. We
10128 * only reset our wrapped offset to 0 if it is
10129 * currently greater than the current offset. If it
10130 * is less than the current offset, it is because a
10131 * previous allocation induced a wrap -- but the
10132 * allocation didn't subsequently take the space due
10133 * to an error or false predicate evaluation. In this
10134 * case, we'll just leave the wrapped offset alone: if
10135 * the wrapped offset hasn't been advanced far enough
10136 * for this allocation, it will be adjusted in the
10139 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
10147 * Now we know that we're going to be storing to the
10148 * top of the buffer and that there is room for us
10149 * there. We need to clear the buffer from the current
10150 * offset to the end (there may be old gunk there).
10152 while (offs
< buf
->dtb_size
)
10156 * We need to set our offset to zero. And because we
10157 * are wrapping, we need to set the bit indicating as
10158 * much. We can also adjust our needed space back
10159 * down to the space required by the ECB -- we know
10160 * that the top of the buffer is aligned.
10164 buf
->dtb_flags
|= DTRACEBUF_WRAPPED
;
10167 * There is room for us in the buffer, so we simply
10168 * need to check the wrapped offset.
10170 if (woffs
< offs
) {
10172 * The wrapped offset is less than the offset.
10173 * This can happen if we allocated buffer space
10174 * that induced a wrap, but then we didn't
10175 * subsequently take the space due to an error
10176 * or false predicate evaluation. This is
10177 * okay; we know that _this_ allocation isn't
10178 * going to induce a wrap. We still can't
10179 * reset the wrapped offset to be zero,
10180 * however: the space may have been trashed in
10181 * the previous failed probe attempt. But at
10182 * least the wrapped offset doesn't need to
10183 * be adjusted at all...
10189 while (offs
+ total
> woffs
) {
10190 dtrace_epid_t epid
= *(uint32_t *)(tomax
+ woffs
);
10193 if (epid
== DTRACE_EPIDNONE
) {
10194 size
= sizeof (uint32_t);
10196 ASSERT(epid
<= state
->dts_necbs
);
10197 ASSERT(state
->dts_ecbs
[epid
- 1] != NULL
);
10199 size
= state
->dts_ecbs
[epid
- 1]->dte_size
;
10202 ASSERT(woffs
+ size
<= buf
->dtb_size
);
10205 if (woffs
+ size
== buf
->dtb_size
) {
10207 * We've reached the end of the buffer; we want
10208 * to set the wrapped offset to 0 and break
10209 * out. However, if the offs is 0, then we're
10210 * in a strange edge-condition: the amount of
10211 * space that we want to reserve plus the size
10212 * of the record that we're overwriting is
10213 * greater than the size of the buffer. This
10214 * is problematic because if we reserve the
10215 * space but subsequently don't consume it (due
10216 * to a failed predicate or error) the wrapped
10217 * offset will be 0 -- yet the EPID at offset 0
10218 * will not be committed. This situation is
10219 * relatively easy to deal with: if we're in
10220 * this case, the buffer is indistinguishable
10221 * from one that hasn't wrapped; we need only
10222 * finish the job by clearing the wrapped bit,
10223 * explicitly setting the offset to be 0, and
10224 * zero'ing out the old data in the buffer.
10227 buf
->dtb_flags
&= ~DTRACEBUF_WRAPPED
;
10228 buf
->dtb_offset
= 0;
10231 while (woffs
< buf
->dtb_size
)
10232 tomax
[woffs
++] = 0;
10243 * We have a wrapped offset. It may be that the wrapped offset
10244 * has become zero -- that's okay.
10246 buf
->dtb_xamot_offset
= woffs
;
10251 * Now we can plow the buffer with any necessary padding.
10253 while (offs
& (align
- 1)) {
10255 * Assert that our alignment is off by a number which
10256 * is itself sizeof (uint32_t) aligned.
10258 ASSERT(!((align
- (offs
& (align
- 1))) &
10259 (sizeof (uint32_t) - 1)));
10260 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10261 offs
+= sizeof (uint32_t);
10264 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10265 if (offs
+ needed
> buf
->dtb_size
- state
->dts_reserve
) {
10266 buf
->dtb_flags
|= DTRACEBUF_FULL
;
10271 if (mstate
== NULL
)
10275 * For ring buffers and fill buffers, the scratch space is always
10276 * the inactive buffer.
10278 mstate
->dtms_scratch_base
= (uintptr_t)buf
->dtb_xamot
;
10279 mstate
->dtms_scratch_size
= buf
->dtb_size
;
10280 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10286 dtrace_buffer_polish(dtrace_buffer_t
*buf
)
10288 ASSERT(buf
->dtb_flags
& DTRACEBUF_RING
);
10289 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10291 if (!(buf
->dtb_flags
& DTRACEBUF_WRAPPED
))
10295 * We need to polish the ring buffer. There are three cases:
10297 * - The first (and presumably most common) is that there is no gap
10298 * between the buffer offset and the wrapped offset. In this case,
10299 * there is nothing in the buffer that isn't valid data; we can
10300 * mark the buffer as polished and return.
10302 * - The second (less common than the first but still more common
10303 * than the third) is that there is a gap between the buffer offset
10304 * and the wrapped offset, and the wrapped offset is larger than the
10305 * buffer offset. This can happen because of an alignment issue, or
10306 * can happen because of a call to dtrace_buffer_reserve() that
10307 * didn't subsequently consume the buffer space. In this case,
10308 * we need to zero the data from the buffer offset to the wrapped
10311 * - The third (and least common) is that there is a gap between the
10312 * buffer offset and the wrapped offset, but the wrapped offset is
10313 * _less_ than the buffer offset. This can only happen because a
10314 * call to dtrace_buffer_reserve() induced a wrap, but the space
10315 * was not subsequently consumed. In this case, we need to zero the
10316 * space from the offset to the end of the buffer _and_ from the
10317 * top of the buffer to the wrapped offset.
10319 if (buf
->dtb_offset
< buf
->dtb_xamot_offset
) {
10320 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10321 buf
->dtb_xamot_offset
- buf
->dtb_offset
);
10324 if (buf
->dtb_offset
> buf
->dtb_xamot_offset
) {
10325 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10326 buf
->dtb_size
- buf
->dtb_offset
);
10327 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
);
10332 dtrace_buffer_free(dtrace_buffer_t
*bufs
)
10336 for (i
= 0; i
< NCPU
; i
++) {
10337 dtrace_buffer_t
*buf
= &bufs
[i
];
10339 if (buf
->dtb_tomax
== NULL
) {
10340 ASSERT(buf
->dtb_xamot
== NULL
);
10341 ASSERT(buf
->dtb_size
== 0);
10345 if (buf
->dtb_xamot
!= NULL
) {
10346 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
10347 kmem_free(buf
->dtb_xamot
, buf
->dtb_size
);
10350 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
);
10352 buf
->dtb_tomax
= NULL
;
10353 buf
->dtb_xamot
= NULL
;
10358 * DTrace Enabling Functions
10360 static dtrace_enabling_t
*
10361 dtrace_enabling_create(dtrace_vstate_t
*vstate
)
10363 dtrace_enabling_t
*enab
;
10365 enab
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
);
10366 enab
->dten_vstate
= vstate
;
10372 dtrace_enabling_add(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
)
10374 dtrace_ecbdesc_t
**ndesc
;
10375 size_t osize
, nsize
;
10378 * We can't add to enablings after we've enabled them, or after we've
10381 ASSERT(enab
->dten_probegen
== 0);
10382 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10384 #if defined(__APPLE__)
10385 if (ecb
== NULL
) return; /* XXX protection against gcc 4.0 botch on x86 */
10386 #endif /* __APPLE__ */
10388 if (enab
->dten_ndesc
< enab
->dten_maxdesc
) {
10389 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10393 osize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10395 if (enab
->dten_maxdesc
== 0) {
10396 enab
->dten_maxdesc
= 1;
10398 enab
->dten_maxdesc
<<= 1;
10401 ASSERT(enab
->dten_ndesc
< enab
->dten_maxdesc
);
10403 nsize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10404 ndesc
= kmem_zalloc(nsize
, KM_SLEEP
);
10405 bcopy(enab
->dten_desc
, ndesc
, osize
);
10406 kmem_free(enab
->dten_desc
, osize
);
10408 enab
->dten_desc
= ndesc
;
10409 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10413 dtrace_enabling_addlike(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
,
10414 dtrace_probedesc_t
*pd
)
10416 dtrace_ecbdesc_t
*new;
10417 dtrace_predicate_t
*pred
;
10418 dtrace_actdesc_t
*act
;
10421 * We're going to create a new ECB description that matches the
10422 * specified ECB in every way, but has the specified probe description.
10424 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
10426 if ((pred
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
)
10427 dtrace_predicate_hold(pred
);
10429 for (act
= ecb
->dted_action
; act
!= NULL
; act
= act
->dtad_next
)
10430 dtrace_actdesc_hold(act
);
10432 new->dted_action
= ecb
->dted_action
;
10433 new->dted_pred
= ecb
->dted_pred
;
10434 new->dted_probe
= *pd
;
10435 new->dted_uarg
= ecb
->dted_uarg
;
10437 dtrace_enabling_add(enab
, new);
10441 dtrace_enabling_dump(dtrace_enabling_t
*enab
)
10445 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10446 dtrace_probedesc_t
*desc
= &enab
->dten_desc
[i
]->dted_probe
;
10448 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
,
10449 desc
->dtpd_provider
, desc
->dtpd_mod
,
10450 desc
->dtpd_func
, desc
->dtpd_name
);
10455 dtrace_enabling_destroy(dtrace_enabling_t
*enab
)
10458 dtrace_ecbdesc_t
*ep
;
10459 dtrace_vstate_t
*vstate
= enab
->dten_vstate
;
10461 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10463 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10464 dtrace_actdesc_t
*act
, *next
;
10465 dtrace_predicate_t
*pred
;
10467 ep
= enab
->dten_desc
[i
];
10469 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
)
10470 dtrace_predicate_release(pred
, vstate
);
10472 for (act
= ep
->dted_action
; act
!= NULL
; act
= next
) {
10473 next
= act
->dtad_next
;
10474 dtrace_actdesc_release(act
, vstate
);
10477 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
10480 kmem_free(enab
->dten_desc
,
10481 enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*));
10484 * If this was a retained enabling, decrement the dts_nretained count
10485 * and take it off of the dtrace_retained list.
10487 if (enab
->dten_prev
!= NULL
|| enab
->dten_next
!= NULL
||
10488 dtrace_retained
== enab
) {
10489 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10490 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained
> 0);
10491 enab
->dten_vstate
->dtvs_state
->dts_nretained
--;
10494 if (enab
->dten_prev
== NULL
) {
10495 if (dtrace_retained
== enab
) {
10496 dtrace_retained
= enab
->dten_next
;
10498 if (dtrace_retained
!= NULL
)
10499 dtrace_retained
->dten_prev
= NULL
;
10502 ASSERT(enab
!= dtrace_retained
);
10503 ASSERT(dtrace_retained
!= NULL
);
10504 enab
->dten_prev
->dten_next
= enab
->dten_next
;
10507 if (enab
->dten_next
!= NULL
) {
10508 ASSERT(dtrace_retained
!= NULL
);
10509 enab
->dten_next
->dten_prev
= enab
->dten_prev
;
10512 kmem_free(enab
, sizeof (dtrace_enabling_t
));
10516 dtrace_enabling_retain(dtrace_enabling_t
*enab
)
10518 dtrace_state_t
*state
;
10520 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10521 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10522 ASSERT(enab
->dten_vstate
!= NULL
);
10524 state
= enab
->dten_vstate
->dtvs_state
;
10525 ASSERT(state
!= NULL
);
10528 * We only allow each state to retain dtrace_retain_max enablings.
10530 if (state
->dts_nretained
>= dtrace_retain_max
)
10533 state
->dts_nretained
++;
10535 if (dtrace_retained
== NULL
) {
10536 dtrace_retained
= enab
;
10540 enab
->dten_next
= dtrace_retained
;
10541 dtrace_retained
->dten_prev
= enab
;
10542 dtrace_retained
= enab
;
10548 dtrace_enabling_replicate(dtrace_state_t
*state
, dtrace_probedesc_t
*match
,
10549 dtrace_probedesc_t
*create
)
10551 dtrace_enabling_t
*new, *enab
;
10552 int found
= 0, err
= ENOENT
;
10554 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10555 ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
);
10556 ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
);
10557 ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
);
10558 ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
);
10560 new = dtrace_enabling_create(&state
->dts_vstate
);
10563 * Iterate over all retained enablings, looking for enablings that
10564 * match the specified state.
10566 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10570 * dtvs_state can only be NULL for helper enablings -- and
10571 * helper enablings can't be retained.
10573 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10575 if (enab
->dten_vstate
->dtvs_state
!= state
)
10579 * Now iterate over each probe description; we're looking for
10580 * an exact match to the specified probe description.
10582 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10583 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10584 dtrace_probedesc_t
*pd
= &ep
->dted_probe
;
10586 if (strcmp(pd
->dtpd_provider
, match
->dtpd_provider
))
10589 if (strcmp(pd
->dtpd_mod
, match
->dtpd_mod
))
10592 if (strcmp(pd
->dtpd_func
, match
->dtpd_func
))
10595 if (strcmp(pd
->dtpd_name
, match
->dtpd_name
))
10599 * We have a winning probe! Add it to our growing
10603 dtrace_enabling_addlike(new, ep
, create
);
10607 if (!found
|| (err
= dtrace_enabling_retain(new)) != 0) {
10608 dtrace_enabling_destroy(new);
10616 dtrace_enabling_retract(dtrace_state_t
*state
)
10618 dtrace_enabling_t
*enab
, *next
;
10620 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10623 * Iterate over all retained enablings, destroy the enablings retained
10624 * for the specified state.
10626 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= next
) {
10627 next
= enab
->dten_next
;
10630 * dtvs_state can only be NULL for helper enablings -- and
10631 * helper enablings can't be retained.
10633 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10635 if (enab
->dten_vstate
->dtvs_state
== state
) {
10636 ASSERT(state
->dts_nretained
> 0);
10637 dtrace_enabling_destroy(enab
);
10641 ASSERT(state
->dts_nretained
== 0);
10645 dtrace_enabling_match(dtrace_enabling_t
*enab
, int *nmatched
)
10650 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10651 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10653 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10654 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10656 enab
->dten_current
= ep
;
10657 enab
->dten_error
= 0;
10659 matched
+= dtrace_probe_enable(&ep
->dted_probe
, enab
);
10661 if (enab
->dten_error
!= 0) {
10663 * If we get an error half-way through enabling the
10664 * probes, we kick out -- perhaps with some number of
10665 * them enabled. Leaving enabled probes enabled may
10666 * be slightly confusing for user-level, but we expect
10667 * that no one will attempt to actually drive on in
10668 * the face of such errors. If this is an anonymous
10669 * enabling (indicated with a NULL nmatched pointer),
10670 * we cmn_err() a message. We aren't expecting to
10671 * get such an error -- such as it can exist at all,
10672 * it would be a result of corrupted DOF in the driver
10675 if (nmatched
== NULL
) {
10676 cmn_err(CE_WARN
, "dtrace_enabling_match() "
10677 "error on %p: %d", (void *)ep
,
10681 return (enab
->dten_error
);
10685 enab
->dten_probegen
= dtrace_probegen
;
10686 if (nmatched
!= NULL
)
10687 *nmatched
= matched
;
10693 dtrace_enabling_matchall(void)
10695 dtrace_enabling_t
*enab
;
10697 lck_mtx_lock(&cpu_lock
);
10698 lck_mtx_lock(&dtrace_lock
);
10701 * Because we can be called after dtrace_detach() has been called, we
10702 * cannot assert that there are retained enablings. We can safely
10703 * load from dtrace_retained, however: the taskq_destroy() at the
10704 * end of dtrace_detach() will block pending our completion.
10706 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
)
10707 (void) dtrace_enabling_match(enab
, NULL
);
10709 lck_mtx_unlock(&dtrace_lock
);
10710 lck_mtx_unlock(&cpu_lock
);
10714 dtrace_enabling_matchstate(dtrace_state_t
*state
, int *nmatched
)
10716 dtrace_enabling_t
*enab
;
10717 int matched
, total
= 0, err
;
10719 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10720 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10722 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10723 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10725 if (enab
->dten_vstate
->dtvs_state
!= state
)
10728 if ((err
= dtrace_enabling_match(enab
, &matched
)) != 0)
10734 if (nmatched
!= NULL
)
10741 * If an enabling is to be enabled without having matched probes (that is, if
10742 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
10743 * enabling must be _primed_ by creating an ECB for every ECB description.
10744 * This must be done to assure that we know the number of speculations, the
10745 * number of aggregations, the minimum buffer size needed, etc. before we
10746 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
10747 * enabling any probes, we create ECBs for every ECB decription, but with a
10748 * NULL probe -- which is exactly what this function does.
10751 dtrace_enabling_prime(dtrace_state_t
*state
)
10753 dtrace_enabling_t
*enab
;
10756 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10757 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10759 if (enab
->dten_vstate
->dtvs_state
!= state
)
10763 * We don't want to prime an enabling more than once, lest
10764 * we allow a malicious user to induce resource exhaustion.
10765 * (The ECBs that result from priming an enabling aren't
10766 * leaked -- but they also aren't deallocated until the
10767 * consumer state is destroyed.)
10769 if (enab
->dten_primed
)
10772 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10773 enab
->dten_current
= enab
->dten_desc
[i
];
10774 (void) dtrace_probe_enable(NULL
, enab
);
10777 enab
->dten_primed
= 1;
10782 * Called to indicate that probes should be provided due to retained
10783 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
10784 * must take an initial lap through the enabling calling the dtps_provide()
10785 * entry point explicitly to allow for autocreated probes.
10788 dtrace_enabling_provide(dtrace_provider_t
*prv
)
10791 dtrace_probedesc_t desc
;
10793 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10794 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
10798 prv
= dtrace_provider
;
10802 dtrace_enabling_t
*enab
= dtrace_retained
;
10803 void *parg
= prv
->dtpv_arg
;
10805 for (; enab
!= NULL
; enab
= enab
->dten_next
) {
10806 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10807 desc
= enab
->dten_desc
[i
]->dted_probe
;
10808 lck_mtx_unlock(&dtrace_lock
);
10809 prv
->dtpv_pops
.dtps_provide(parg
, &desc
);
10810 lck_mtx_lock(&dtrace_lock
);
10813 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
10815 lck_mtx_unlock(&dtrace_lock
);
10816 dtrace_probe_provide(NULL
, all
? NULL
: prv
);
10817 lck_mtx_lock(&dtrace_lock
);
10821 * DTrace DOF Functions
10825 dtrace_dof_error(dof_hdr_t
*dof
, const char *str
)
10827 if (dtrace_err_verbose
)
10828 cmn_err(CE_WARN
, "failed to process DOF: %s", str
);
10830 #ifdef DTRACE_ERRDEBUG
10831 dtrace_errdebug(str
);
10836 * Create DOF out of a currently enabled state. Right now, we only create
10837 * DOF containing the run-time options -- but this could be expanded to create
10838 * complete DOF representing the enabled state.
10841 dtrace_dof_create(dtrace_state_t
*state
)
10845 dof_optdesc_t
*opt
;
10846 int i
, len
= sizeof (dof_hdr_t
) +
10847 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) +
10848 sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
10850 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10852 dof
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
);
10853 dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
;
10854 dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
;
10855 dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
;
10856 dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
;
10858 dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
;
10859 dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
;
10860 dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
;
10861 dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
;
10862 dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
;
10863 dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
;
10865 dof
->dofh_flags
= 0;
10866 dof
->dofh_hdrsize
= sizeof (dof_hdr_t
);
10867 dof
->dofh_secsize
= sizeof (dof_sec_t
);
10868 dof
->dofh_secnum
= 1; /* only DOF_SECT_OPTDESC */
10869 dof
->dofh_secoff
= sizeof (dof_hdr_t
);
10870 dof
->dofh_loadsz
= len
;
10871 dof
->dofh_filesz
= len
;
10875 * Fill in the option section header...
10877 sec
= (dof_sec_t
*)((uintptr_t)dof
+ sizeof (dof_hdr_t
));
10878 sec
->dofs_type
= DOF_SECT_OPTDESC
;
10879 sec
->dofs_align
= sizeof (uint64_t);
10880 sec
->dofs_flags
= DOF_SECF_LOAD
;
10881 sec
->dofs_entsize
= sizeof (dof_optdesc_t
);
10883 opt
= (dof_optdesc_t
*)((uintptr_t)sec
+
10884 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)));
10886 sec
->dofs_offset
= (uintptr_t)opt
- (uintptr_t)dof
;
10887 sec
->dofs_size
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
10889 for (i
= 0; i
< DTRACEOPT_MAX
; i
++) {
10890 opt
[i
].dofo_option
= i
;
10891 opt
[i
].dofo_strtab
= DOF_SECIDX_NONE
;
10892 opt
[i
].dofo_value
= state
->dts_options
[i
];
10899 #if defined(__APPLE__)
10900 dtrace_dof_copyin(user_addr_t uarg
, int *errp
)
10902 dtrace_dof_copyin(uintptr_t uarg
, int *errp
)
10905 dof_hdr_t hdr
, *dof
;
10907 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
10910 * First, we're going to copyin() the sizeof (dof_hdr_t).
10912 #if defined(__APPLE__)
10913 if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) {
10915 if (copyin((void *)uarg
, &hdr
, sizeof (hdr
)) != 0) {
10917 dtrace_dof_error(NULL
, "failed to copyin DOF header");
10923 * Now we'll allocate the entire DOF and copy it in -- provided
10924 * that the length isn't outrageous.
10926 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
10927 dtrace_dof_error(&hdr
, "load size exceeds maximum");
10932 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
10933 dtrace_dof_error(&hdr
, "invalid load size");
10938 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
10940 #if defined(__APPLE__)
10941 if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0) {
10943 if (copyin((void *)uarg
, dof
, hdr
.dofh_loadsz
) != 0) {
10945 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
10953 #if defined(__APPLE__)
10956 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
)
10958 dof_hdr_t hdr
, *dof
;
10960 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
10963 * First, we're going to copyin() the sizeof (dof_hdr_t).
10965 if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) {
10966 dtrace_dof_error(NULL
, "failed to copyin DOF header");
10972 * Now we'll allocate the entire DOF and copy it in -- provided
10973 * that the length isn't outrageous.
10975 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
10976 dtrace_dof_error(&hdr
, "load size exceeds maximum");
10981 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
10982 dtrace_dof_error(&hdr
, "invalid load size");
10987 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
10989 if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) {
10990 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
10998 #endif /* __APPLE__ */
11001 dtrace_dof_property(const char *name
)
11005 unsigned int len
, i
;
11009 * Unfortunately, array of values in .conf files are always (and
11010 * only) interpreted to be integer arrays. We must read our DOF
11011 * as an integer array, and then squeeze it into a byte array.
11013 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
11014 (char *)name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
11017 for (i
= 0; i
< len
; i
++)
11018 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]);
11020 if (len
< sizeof (dof_hdr_t
)) {
11021 ddi_prop_free(buf
);
11022 dtrace_dof_error(NULL
, "truncated header");
11026 if (len
< (loadsz
= ((dof_hdr_t
*)buf
)->dofh_loadsz
)) {
11027 ddi_prop_free(buf
);
11028 dtrace_dof_error(NULL
, "truncated DOF");
11032 if (loadsz
>= dtrace_dof_maxsize
) {
11033 ddi_prop_free(buf
);
11034 dtrace_dof_error(NULL
, "oversized DOF");
11038 dof
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
);
11039 bcopy(buf
, dof
, loadsz
);
11040 ddi_prop_free(buf
);
11046 dtrace_dof_destroy(dof_hdr_t
*dof
)
11048 dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
);
11052 * Return the dof_sec_t pointer corresponding to a given section index. If the
11053 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11054 * a type other than DOF_SECT_NONE is specified, the header is checked against
11055 * this type and NULL is returned if the types do not match.
11058 dtrace_dof_sect(dof_hdr_t
*dof
, uint32_t type
, dof_secidx_t i
)
11060 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)
11061 ((uintptr_t)dof
+ dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11063 if (i
>= dof
->dofh_secnum
) {
11064 dtrace_dof_error(dof
, "referenced section index is invalid");
11068 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
11069 dtrace_dof_error(dof
, "referenced section is not loadable");
11073 if (type
!= DOF_SECT_NONE
&& type
!= sec
->dofs_type
) {
11074 dtrace_dof_error(dof
, "referenced section is the wrong type");
11081 static dtrace_probedesc_t
*
11082 dtrace_dof_probedesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_probedesc_t
*desc
)
11084 dof_probedesc_t
*probe
;
11086 uintptr_t daddr
= (uintptr_t)dof
;
11090 if (sec
->dofs_type
!= DOF_SECT_PROBEDESC
) {
11091 dtrace_dof_error(dof
, "invalid probe section");
11095 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11096 dtrace_dof_error(dof
, "bad alignment in probe description");
11100 if (sec
->dofs_offset
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) {
11101 dtrace_dof_error(dof
, "truncated probe description");
11105 probe
= (dof_probedesc_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11106 strtab
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
);
11108 if (strtab
== NULL
)
11111 str
= daddr
+ strtab
->dofs_offset
;
11112 size
= strtab
->dofs_size
;
11114 if (probe
->dofp_provider
>= strtab
->dofs_size
) {
11115 dtrace_dof_error(dof
, "corrupt probe provider");
11119 (void) strncpy(desc
->dtpd_provider
,
11120 (char *)(str
+ probe
->dofp_provider
),
11121 MIN(DTRACE_PROVNAMELEN
- 1, size
- probe
->dofp_provider
));
11123 if (probe
->dofp_mod
>= strtab
->dofs_size
) {
11124 dtrace_dof_error(dof
, "corrupt probe module");
11128 (void) strncpy(desc
->dtpd_mod
, (char *)(str
+ probe
->dofp_mod
),
11129 MIN(DTRACE_MODNAMELEN
- 1, size
- probe
->dofp_mod
));
11131 if (probe
->dofp_func
>= strtab
->dofs_size
) {
11132 dtrace_dof_error(dof
, "corrupt probe function");
11136 (void) strncpy(desc
->dtpd_func
, (char *)(str
+ probe
->dofp_func
),
11137 MIN(DTRACE_FUNCNAMELEN
- 1, size
- probe
->dofp_func
));
11139 if (probe
->dofp_name
>= strtab
->dofs_size
) {
11140 dtrace_dof_error(dof
, "corrupt probe name");
11144 (void) strncpy(desc
->dtpd_name
, (char *)(str
+ probe
->dofp_name
),
11145 MIN(DTRACE_NAMELEN
- 1, size
- probe
->dofp_name
));
11150 static dtrace_difo_t
*
11151 dtrace_dof_difo(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11156 dof_difohdr_t
*dofd
;
11157 uintptr_t daddr
= (uintptr_t)dof
;
11158 size_t max
= dtrace_difo_maxsize
;
11161 static const struct {
11169 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
),
11170 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
),
11171 sizeof (dif_instr_t
), "multiple DIF sections" },
11173 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
),
11174 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t),
11175 sizeof (uint64_t), "multiple integer tables" },
11177 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
),
11178 offsetof(dtrace_difo_t
, dtdo_strlen
), 0,
11179 sizeof (char), "multiple string tables" },
11181 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
),
11182 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
),
11183 sizeof (uint_t
), "multiple variable tables" },
11185 #if !defined(__APPLE__)
11186 { DOF_SECT_NONE
, 0, 0, 0, NULL
}
11188 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL
}
11189 #endif /* __APPLE__ */
11192 if (sec
->dofs_type
!= DOF_SECT_DIFOHDR
) {
11193 dtrace_dof_error(dof
, "invalid DIFO header section");
11197 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11198 dtrace_dof_error(dof
, "bad alignment in DIFO header");
11202 if (sec
->dofs_size
< sizeof (dof_difohdr_t
) ||
11203 sec
->dofs_size
% sizeof (dof_secidx_t
)) {
11204 dtrace_dof_error(dof
, "bad size in DIFO header");
11208 dofd
= (dof_difohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11209 n
= (sec
->dofs_size
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1;
11211 dp
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
11212 dp
->dtdo_rtype
= dofd
->dofd_rtype
;
11214 for (l
= 0; l
< n
; l
++) {
11219 if ((subsec
= dtrace_dof_sect(dof
, DOF_SECT_NONE
,
11220 dofd
->dofd_links
[l
])) == NULL
)
11221 goto err
; /* invalid section link */
11223 if (ttl
+ subsec
->dofs_size
> max
) {
11224 dtrace_dof_error(dof
, "exceeds maximum size");
11228 ttl
+= subsec
->dofs_size
;
11230 for (i
= 0; difo
[i
].section
!= DOF_SECT_NONE
; i
++) {
11231 if (subsec
->dofs_type
!= difo
[i
].section
)
11234 if (!(subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11235 dtrace_dof_error(dof
, "section not loaded");
11239 if (subsec
->dofs_align
!= difo
[i
].align
) {
11240 dtrace_dof_error(dof
, "bad alignment");
11244 bufp
= (void **)((uintptr_t)dp
+ difo
[i
].bufoffs
);
11245 lenp
= (uint32_t *)((uintptr_t)dp
+ difo
[i
].lenoffs
);
11247 if (*bufp
!= NULL
) {
11248 dtrace_dof_error(dof
, difo
[i
].msg
);
11252 if (difo
[i
].entsize
!= subsec
->dofs_entsize
) {
11253 dtrace_dof_error(dof
, "entry size mismatch");
11257 if (subsec
->dofs_entsize
!= 0 &&
11258 (subsec
->dofs_size
% subsec
->dofs_entsize
) != 0) {
11259 dtrace_dof_error(dof
, "corrupt entry size");
11263 *lenp
= subsec
->dofs_size
;
11264 *bufp
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
);
11265 bcopy((char *)(uintptr_t)(daddr
+ subsec
->dofs_offset
),
11266 *bufp
, subsec
->dofs_size
);
11268 if (subsec
->dofs_entsize
!= 0)
11269 *lenp
/= subsec
->dofs_entsize
;
11275 * If we encounter a loadable DIFO sub-section that is not
11276 * known to us, assume this is a broken program and fail.
11278 if (difo
[i
].section
== DOF_SECT_NONE
&&
11279 (subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11280 dtrace_dof_error(dof
, "unrecognized DIFO subsection");
11285 if (dp
->dtdo_buf
== NULL
) {
11287 * We can't have a DIF object without DIF text.
11289 dtrace_dof_error(dof
, "missing DIF text");
11294 * Before we validate the DIF object, run through the variable table
11295 * looking for the strings -- if any of their size are under, we'll set
11296 * their size to be the system-wide default string size. Note that
11297 * this should _not_ happen if the "strsize" option has been set --
11298 * in this case, the compiler should have set the size to reflect the
11299 * setting of the option.
11301 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
11302 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
11303 dtrace_diftype_t
*t
= &v
->dtdv_type
;
11305 if (v
->dtdv_id
< DIF_VAR_OTHER_UBASE
)
11308 if (t
->dtdt_kind
== DIF_TYPE_STRING
&& t
->dtdt_size
== 0)
11309 t
->dtdt_size
= dtrace_strsize_default
;
11312 if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0)
11315 dtrace_difo_init(dp
, vstate
);
11319 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
11320 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
11321 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
11322 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
11324 kmem_free(dp
, sizeof (dtrace_difo_t
));
11328 static dtrace_predicate_t
*
11329 dtrace_dof_predicate(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11334 if ((dp
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
)
11337 return (dtrace_predicate_create(dp
));
11340 static dtrace_actdesc_t
*
11341 dtrace_dof_actdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11344 dtrace_actdesc_t
*act
, *first
= NULL
, *last
= NULL
, *next
;
11345 dof_actdesc_t
*desc
;
11346 dof_sec_t
*difosec
;
11348 uintptr_t daddr
= (uintptr_t)dof
;
11350 dtrace_actkind_t kind
;
11352 if (sec
->dofs_type
!= DOF_SECT_ACTDESC
) {
11353 dtrace_dof_error(dof
, "invalid action section");
11357 if (sec
->dofs_offset
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) {
11358 dtrace_dof_error(dof
, "truncated action description");
11362 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11363 dtrace_dof_error(dof
, "bad alignment in action description");
11367 if (sec
->dofs_size
< sec
->dofs_entsize
) {
11368 dtrace_dof_error(dof
, "section entry size exceeds total size");
11372 if (sec
->dofs_entsize
!= sizeof (dof_actdesc_t
)) {
11373 dtrace_dof_error(dof
, "bad entry size in action description");
11377 if (sec
->dofs_size
/ sec
->dofs_entsize
> dtrace_actions_max
) {
11378 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max");
11382 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= sec
->dofs_entsize
) {
11383 desc
= (dof_actdesc_t
*)(daddr
+
11384 (uintptr_t)sec
->dofs_offset
+ offs
);
11385 kind
= (dtrace_actkind_t
)desc
->dofa_kind
;
11387 if (DTRACEACT_ISPRINTFLIKE(kind
) &&
11388 (kind
!= DTRACEACT_PRINTA
||
11389 desc
->dofa_strtab
!= DOF_SECIDX_NONE
)) {
11395 * printf()-like actions must have a format string.
11397 if ((strtab
= dtrace_dof_sect(dof
,
11398 DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
)
11401 str
= (char *)((uintptr_t)dof
+
11402 (uintptr_t)strtab
->dofs_offset
);
11404 for (i
= desc
->dofa_arg
; i
< strtab
->dofs_size
; i
++) {
11405 if (str
[i
] == '\0')
11409 if (i
>= strtab
->dofs_size
) {
11410 dtrace_dof_error(dof
, "bogus format string");
11414 if (i
== desc
->dofa_arg
) {
11415 dtrace_dof_error(dof
, "empty format string");
11419 i
-= desc
->dofa_arg
;
11420 fmt
= kmem_alloc(i
+ 1, KM_SLEEP
);
11421 bcopy(&str
[desc
->dofa_arg
], fmt
, i
+ 1);
11422 arg
= (uint64_t)(uintptr_t)fmt
;
11424 if (kind
== DTRACEACT_PRINTA
) {
11425 ASSERT(desc
->dofa_strtab
== DOF_SECIDX_NONE
);
11428 arg
= desc
->dofa_arg
;
11432 act
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
,
11433 desc
->dofa_uarg
, arg
);
11435 if (last
!= NULL
) {
11436 last
->dtad_next
= act
;
11443 if (desc
->dofa_difo
== DOF_SECIDX_NONE
)
11446 if ((difosec
= dtrace_dof_sect(dof
,
11447 DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
)
11450 act
->dtad_difo
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
);
11452 if (act
->dtad_difo
== NULL
)
11456 ASSERT(first
!= NULL
);
11460 for (act
= first
; act
!= NULL
; act
= next
) {
11461 next
= act
->dtad_next
;
11462 dtrace_actdesc_release(act
, vstate
);
11468 static dtrace_ecbdesc_t
*
11469 dtrace_dof_ecbdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11472 dtrace_ecbdesc_t
*ep
;
11473 dof_ecbdesc_t
*ecb
;
11474 dtrace_probedesc_t
*desc
;
11475 dtrace_predicate_t
*pred
= NULL
;
11477 if (sec
->dofs_size
< sizeof (dof_ecbdesc_t
)) {
11478 dtrace_dof_error(dof
, "truncated ECB description");
11482 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11483 dtrace_dof_error(dof
, "bad alignment in ECB description");
11487 ecb
= (dof_ecbdesc_t
*)((uintptr_t)dof
+ (uintptr_t)sec
->dofs_offset
);
11488 sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
);
11493 ep
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
11494 ep
->dted_uarg
= ecb
->dofe_uarg
;
11495 desc
= &ep
->dted_probe
;
11497 if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
)
11500 if (ecb
->dofe_pred
!= DOF_SECIDX_NONE
) {
11501 if ((sec
= dtrace_dof_sect(dof
,
11502 DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
)
11505 if ((pred
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
)
11508 ep
->dted_pred
.dtpdd_predicate
= pred
;
11511 if (ecb
->dofe_actions
!= DOF_SECIDX_NONE
) {
11512 if ((sec
= dtrace_dof_sect(dof
,
11513 DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
)
11516 ep
->dted_action
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
);
11518 if (ep
->dted_action
== NULL
)
11526 dtrace_predicate_release(pred
, vstate
);
11527 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
11531 #if !defined(__APPLE__) /* APPLE dyld has already done this for us */
11533 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
11534 * specified DOF. At present, this amounts to simply adding 'ubase' to the
11535 * site of any user SETX relocations to account for load object base address.
11536 * In the future, if we need other relocations, this function can be extended.
11539 dtrace_dof_relocate(dof_hdr_t
*dof
, dof_sec_t
*sec
, uint64_t ubase
)
11541 uintptr_t daddr
= (uintptr_t)dof
;
11542 dof_relohdr_t
*dofr
=
11543 (dof_relohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11544 dof_sec_t
*ss
, *rs
, *ts
;
11548 if (sec
->dofs_size
< sizeof (dof_relohdr_t
) ||
11549 sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11550 dtrace_dof_error(dof
, "invalid relocation header");
11554 ss
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, dofr
->dofr_strtab
);
11555 rs
= dtrace_dof_sect(dof
, DOF_SECT_RELTAB
, dofr
->dofr_relsec
);
11556 ts
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, dofr
->dofr_tgtsec
);
11558 if (ss
== NULL
|| rs
== NULL
|| ts
== NULL
)
11559 return (-1); /* dtrace_dof_error() has been called already */
11561 if (rs
->dofs_entsize
< sizeof (dof_relodesc_t
) ||
11562 rs
->dofs_align
!= sizeof (uint64_t)) {
11563 dtrace_dof_error(dof
, "invalid relocation section");
11567 r
= (dof_relodesc_t
*)(uintptr_t)(daddr
+ rs
->dofs_offset
);
11568 n
= rs
->dofs_size
/ rs
->dofs_entsize
;
11570 for (i
= 0; i
< n
; i
++) {
11571 uintptr_t taddr
= daddr
+ ts
->dofs_offset
+ r
->dofr_offset
;
11573 switch (r
->dofr_type
) {
11574 case DOF_RELO_NONE
:
11576 case DOF_RELO_SETX
:
11577 if (r
->dofr_offset
>= ts
->dofs_size
|| r
->dofr_offset
+
11578 sizeof (uint64_t) > ts
->dofs_size
) {
11579 dtrace_dof_error(dof
, "bad relocation offset");
11583 if (!IS_P2ALIGNED(taddr
, sizeof (uint64_t))) {
11584 dtrace_dof_error(dof
, "misaligned setx relo");
11588 *(uint64_t *)taddr
+= ubase
;
11591 dtrace_dof_error(dof
, "invalid relocation type");
11595 r
= (dof_relodesc_t
*)((uintptr_t)r
+ rs
->dofs_entsize
);
11600 #endif /* __APPLE__ */
11603 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
11604 * header: it should be at the front of a memory region that is at least
11605 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
11606 * size. It need not be validated in any other way.
11609 dtrace_dof_slurp(dof_hdr_t
*dof
, dtrace_vstate_t
*vstate
, cred_t
*cr
,
11610 dtrace_enabling_t
**enabp
, uint64_t ubase
, int noprobes
)
11612 uint64_t len
= dof
->dofh_loadsz
, seclen
;
11613 uintptr_t daddr
= (uintptr_t)dof
;
11614 dtrace_ecbdesc_t
*ep
;
11615 dtrace_enabling_t
*enab
;
11618 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11619 ASSERT(dof
->dofh_loadsz
>= sizeof (dof_hdr_t
));
11622 * Check the DOF header identification bytes. In addition to checking
11623 * valid settings, we also verify that unused bits/bytes are zeroed so
11624 * we can use them later without fear of regressing existing binaries.
11626 if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
],
11627 DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) {
11628 dtrace_dof_error(dof
, "DOF magic string mismatch");
11632 if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32
&&
11633 dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) {
11634 dtrace_dof_error(dof
, "DOF has invalid data model");
11638 if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) {
11639 dtrace_dof_error(dof
, "DOF encoding mismatch");
11643 #if !defined(__APPLE__)
11644 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
11645 dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_2
) {
11646 dtrace_dof_error(dof
, "DOF version mismatch");
11651 * We only support DOF_VERSION_3 for now.
11653 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) {
11654 dtrace_dof_error(dof
, "DOF version mismatch");
11659 if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) {
11660 dtrace_dof_error(dof
, "DOF uses unsupported instruction set");
11664 if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) {
11665 dtrace_dof_error(dof
, "DOF uses too many integer registers");
11669 if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) {
11670 dtrace_dof_error(dof
, "DOF uses too many tuple registers");
11674 for (i
= DOF_ID_PAD
; i
< DOF_ID_SIZE
; i
++) {
11675 if (dof
->dofh_ident
[i
] != 0) {
11676 dtrace_dof_error(dof
, "DOF has invalid ident byte set");
11681 if (dof
->dofh_flags
& ~DOF_FL_VALID
) {
11682 dtrace_dof_error(dof
, "DOF has invalid flag bits set");
11686 if (dof
->dofh_secsize
== 0) {
11687 dtrace_dof_error(dof
, "zero section header size");
11692 * Check that the section headers don't exceed the amount of DOF
11693 * data. Note that we cast the section size and number of sections
11694 * to uint64_t's to prevent possible overflow in the multiplication.
11696 seclen
= (uint64_t)dof
->dofh_secnum
* (uint64_t)dof
->dofh_secsize
;
11698 if (dof
->dofh_secoff
> len
|| seclen
> len
||
11699 dof
->dofh_secoff
+ seclen
> len
) {
11700 dtrace_dof_error(dof
, "truncated section headers");
11704 if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) {
11705 dtrace_dof_error(dof
, "misaligned section headers");
11709 if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) {
11710 dtrace_dof_error(dof
, "misaligned section size");
11715 * Take an initial pass through the section headers to be sure that
11716 * the headers don't have stray offsets. If the 'noprobes' flag is
11717 * set, do not permit sections relating to providers, probes, or args.
11719 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11720 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11721 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11724 switch (sec
->dofs_type
) {
11725 case DOF_SECT_PROVIDER
:
11726 case DOF_SECT_PROBES
:
11727 case DOF_SECT_PRARGS
:
11728 case DOF_SECT_PROFFS
:
11729 dtrace_dof_error(dof
, "illegal sections "
11735 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
11736 continue; /* just ignore non-loadable sections */
11738 if (sec
->dofs_align
& (sec
->dofs_align
- 1)) {
11739 dtrace_dof_error(dof
, "bad section alignment");
11743 if (sec
->dofs_offset
& (sec
->dofs_align
- 1)) {
11744 dtrace_dof_error(dof
, "misaligned section");
11748 if (sec
->dofs_offset
> len
|| sec
->dofs_size
> len
||
11749 sec
->dofs_offset
+ sec
->dofs_size
> len
) {
11750 dtrace_dof_error(dof
, "corrupt section header");
11754 if (sec
->dofs_type
== DOF_SECT_STRTAB
&& *((char *)daddr
+
11755 sec
->dofs_offset
+ sec
->dofs_size
- 1) != '\0') {
11756 dtrace_dof_error(dof
, "non-terminating string table");
11761 #if !defined(__APPLE__)
11763 * APPLE NOTE: We have no relocation to perform. All dof values are
11764 * relative offsets.
11768 * Take a second pass through the sections and locate and perform any
11769 * relocations that are present. We do this after the first pass to
11770 * be sure that all sections have had their headers validated.
11772 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11773 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11774 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11776 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
11777 continue; /* skip sections that are not loadable */
11779 switch (sec
->dofs_type
) {
11780 case DOF_SECT_URELHDR
:
11781 if (dtrace_dof_relocate(dof
, sec
, ubase
) != 0)
11786 #endif /* __APPLE__ */
11788 if ((enab
= *enabp
) == NULL
)
11789 enab
= *enabp
= dtrace_enabling_create(vstate
);
11791 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11792 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11793 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11795 if (sec
->dofs_type
!= DOF_SECT_ECBDESC
)
11798 #if !defined(__APPLE__)
11799 if ((ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
)) == NULL
) {
11800 dtrace_enabling_destroy(enab
);
11805 /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc
11806 are checked for the NULL return value.) */
11807 ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
);
11809 dtrace_enabling_destroy(enab
);
11813 #endif /* __APPLE__ */
11815 dtrace_enabling_add(enab
, ep
);
11822 * Process DOF for any options. This routine assumes that the DOF has been
11823 * at least processed by dtrace_dof_slurp().
11826 dtrace_dof_options(dof_hdr_t
*dof
, dtrace_state_t
*state
)
11831 dof_optdesc_t
*desc
;
11833 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11834 dof_sec_t
*sec
= (dof_sec_t
*)((uintptr_t)dof
+
11835 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11837 if (sec
->dofs_type
!= DOF_SECT_OPTDESC
)
11840 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11841 dtrace_dof_error(dof
, "bad alignment in "
11842 "option description");
11846 if ((entsize
= sec
->dofs_entsize
) == 0) {
11847 dtrace_dof_error(dof
, "zeroed option entry size");
11851 if (entsize
< sizeof (dof_optdesc_t
)) {
11852 dtrace_dof_error(dof
, "bad option entry size");
11856 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= entsize
) {
11857 desc
= (dof_optdesc_t
*)((uintptr_t)dof
+
11858 (uintptr_t)sec
->dofs_offset
+ offs
);
11860 if (desc
->dofo_strtab
!= DOF_SECIDX_NONE
) {
11861 dtrace_dof_error(dof
, "non-zero option string");
11865 if (desc
->dofo_value
== DTRACEOPT_UNSET
) {
11866 dtrace_dof_error(dof
, "unset option");
11870 if ((rval
= dtrace_state_option(state
,
11871 desc
->dofo_option
, desc
->dofo_value
)) != 0) {
11872 dtrace_dof_error(dof
, "rejected option");
11882 * DTrace Consumer State Functions
11884 #if defined(__APPLE__)
11886 #endif /* __APPLE__ */
11888 dtrace_dstate_init(dtrace_dstate_t
*dstate
, size_t size
)
11890 size_t hashsize
, maxper
, min
, chunksize
= dstate
->dtds_chunksize
;
11893 dtrace_dynvar_t
*dvar
, *next
, *start
;
11896 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11897 ASSERT(dstate
->dtds_base
== NULL
&& dstate
->dtds_percpu
== NULL
);
11899 bzero(dstate
, sizeof (dtrace_dstate_t
));
11901 if ((dstate
->dtds_chunksize
= chunksize
) == 0)
11902 dstate
->dtds_chunksize
= DTRACE_DYNVAR_CHUNKSIZE
;
11904 if (size
< (min
= dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
)))
11907 if ((base
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
11910 dstate
->dtds_size
= size
;
11911 dstate
->dtds_base
= base
;
11912 dstate
->dtds_percpu
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
);
11913 bzero(dstate
->dtds_percpu
, NCPU
* sizeof (dtrace_dstate_percpu_t
));
11915 hashsize
= size
/ (dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
));
11917 if (hashsize
!= 1 && (hashsize
& 1))
11920 dstate
->dtds_hashsize
= hashsize
;
11921 dstate
->dtds_hash
= dstate
->dtds_base
;
11924 * Set all of our hash buckets to point to the single sink, and (if
11925 * it hasn't already been set), set the sink's hash value to be the
11926 * sink sentinel value. The sink is needed for dynamic variable
11927 * lookups to know that they have iterated over an entire, valid hash
11930 for (i
= 0; i
< hashsize
; i
++)
11931 dstate
->dtds_hash
[i
].dtdh_chain
= &dtrace_dynhash_sink
;
11933 if (dtrace_dynhash_sink
.dtdv_hashval
!= DTRACE_DYNHASH_SINK
)
11934 dtrace_dynhash_sink
.dtdv_hashval
= DTRACE_DYNHASH_SINK
;
11937 * Determine number of active CPUs. Divide free list evenly among
11940 start
= (dtrace_dynvar_t
*)
11941 ((uintptr_t)base
+ hashsize
* sizeof (dtrace_dynhash_t
));
11942 limit
= (uintptr_t)base
+ size
;
11944 maxper
= (limit
- (uintptr_t)start
) / NCPU
;
11945 maxper
= (maxper
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
;
11947 for (i
= 0; i
< NCPU
; i
++) {
11948 dstate
->dtds_percpu
[i
].dtdsc_free
= dvar
= start
;
11951 * If we don't even have enough chunks to make it once through
11952 * NCPUs, we're just going to allocate everything to the first
11953 * CPU. And if we're on the last CPU, we're going to allocate
11954 * whatever is left over. In either case, we set the limit to
11955 * be the limit of the dynamic variable space.
11957 if (maxper
== 0 || i
== NCPU
- 1) {
11958 limit
= (uintptr_t)base
+ size
;
11961 limit
= (uintptr_t)start
+ maxper
;
11962 start
= (dtrace_dynvar_t
*)limit
;
11965 ASSERT(limit
<= (uintptr_t)base
+ size
);
11968 next
= (dtrace_dynvar_t
*)((uintptr_t)dvar
+
11969 dstate
->dtds_chunksize
);
11971 if ((uintptr_t)next
+ dstate
->dtds_chunksize
>= limit
)
11974 dvar
->dtdv_next
= next
;
11985 #if defined(__APPLE__)
11987 #endif /* __APPLE__ */
11989 dtrace_dstate_fini(dtrace_dstate_t
*dstate
)
11991 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
11993 if (dstate
->dtds_base
== NULL
)
11996 kmem_free(dstate
->dtds_base
, dstate
->dtds_size
);
11997 kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
);
12001 dtrace_vstate_fini(dtrace_vstate_t
*vstate
)
12004 * Logical XOR, where are you?
12006 ASSERT((vstate
->dtvs_nglobals
== 0) ^ (vstate
->dtvs_globals
!= NULL
));
12008 if (vstate
->dtvs_nglobals
> 0) {
12009 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals
*
12010 sizeof (dtrace_statvar_t
*));
12013 if (vstate
->dtvs_ntlocals
> 0) {
12014 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals
*
12015 sizeof (dtrace_difv_t
));
12018 ASSERT((vstate
->dtvs_nlocals
== 0) ^ (vstate
->dtvs_locals
!= NULL
));
12020 if (vstate
->dtvs_nlocals
> 0) {
12021 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals
*
12022 sizeof (dtrace_statvar_t
*));
12027 dtrace_state_clean(dtrace_state_t
*state
)
12029 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
)
12032 dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
);
12033 dtrace_speculation_clean(state
);
12037 dtrace_state_deadman(dtrace_state_t
*state
)
12043 now
= dtrace_gethrtime();
12045 if (state
!= dtrace_anon
.dta_state
&&
12046 now
- state
->dts_laststatus
>= dtrace_deadman_user
)
12050 * We must be sure that dts_alive never appears to be less than the
12051 * value upon entry to dtrace_state_deadman(), and because we lack a
12052 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12053 * store INT64_MAX to it, followed by a memory barrier, followed by
12054 * the new value. This assures that dts_alive never appears to be
12055 * less than its true value, regardless of the order in which the
12056 * stores to the underlying storage are issued.
12058 state
->dts_alive
= INT64_MAX
;
12059 dtrace_membar_producer();
12060 state
->dts_alive
= now
;
12063 #if defined(__APPLE__)
12065 #endif /* __APPLE__ */
12067 dtrace_state_create(dev_t
*devp
, cred_t
*cr
)
12072 dtrace_state_t
*state
;
12073 dtrace_optval_t
*opt
;
12074 int bufsize
= NCPU
* sizeof (dtrace_buffer_t
), i
;
12076 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12077 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12079 #if !defined(__APPLE__)
12080 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12081 VM_BESTFIT
| VM_SLEEP
);
12084 * Darwin's DEVFS layer acquired the minor number for this "device" when it called
12085 * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number
12086 * (next unused according to vmem_alloc()) and then immediately put the number back in play
12087 * (by calling vmem_free()). Now that minor number is being used for an open, so committing it
12088 * to use. The following vmem_alloc() must deliver that same minor number.
12091 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12092 VM_BESTFIT
| VM_SLEEP
);
12094 if (NULL
!= devp
) {
12095 ASSERT(getminor(*devp
) == minor
);
12096 if (getminor(*devp
) != minor
) {
12097 printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n",
12098 getminor(*devp
), minor
);
12099 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12103 /* NULL==devp iff "Anonymous state" (see dtrace_anon_property),
12104 * so just vend the minor device number here de novo since no "open" has occurred. */
12107 #endif /* __APPLE__ */
12109 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
12110 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12114 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
12115 state
->dts_epid
= DTRACE_EPIDNONE
+ 1;
12117 (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
);
12118 state
->dts_aggid_arena
= vmem_create(c
, (void *)1, UINT32_MAX
, 1,
12119 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
12121 if (devp
!= NULL
) {
12122 major
= getemajor(*devp
);
12124 major
= ddi_driver_major(dtrace_devi
);
12127 state
->dts_dev
= makedevice(major
, minor
);
12130 *devp
= state
->dts_dev
;
12133 * We allocate NCPU buffers. On the one hand, this can be quite
12134 * a bit of memory per instance (nearly 36K on a Starcat). On the
12135 * other hand, it saves an additional memory reference in the probe
12138 state
->dts_buffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12139 state
->dts_aggbuffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12140 state
->dts_cleaner
= CYCLIC_NONE
;
12141 state
->dts_deadman
= CYCLIC_NONE
;
12142 state
->dts_vstate
.dtvs_state
= state
;
12144 for (i
= 0; i
< DTRACEOPT_MAX
; i
++)
12145 state
->dts_options
[i
] = DTRACEOPT_UNSET
;
12148 * Set the default options.
12150 opt
= state
->dts_options
;
12151 opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
;
12152 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
;
12153 opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
;
12154 opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
;
12155 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
;
12156 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
;
12157 opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
;
12158 opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
;
12159 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
;
12160 opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
;
12161 opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
;
12162 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
;
12163 opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
;
12164 opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
;
12166 state
->dts_activity
= DTRACE_ACTIVITY_INACTIVE
;
12169 * Depending on the user credentials, we set flag bits which alter probe
12170 * visibility or the amount of destructiveness allowed. In the case of
12171 * actual anonymous tracing, or the possession of all privileges, all of
12172 * the normal checks are bypassed.
12174 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
12175 state
->dts_cred
.dcr_visible
= DTRACE_CRV_ALL
;
12176 state
->dts_cred
.dcr_action
= DTRACE_CRA_ALL
;
12179 * Set up the credentials for this instantiation. We take a
12180 * hold on the credential to prevent it from disappearing on
12181 * us; this in turn prevents the zone_t referenced by this
12182 * credential from disappearing. This means that we can
12183 * examine the credential and the zone from probe context.
12186 state
->dts_cred
.dcr_cred
= cr
;
12189 * CRA_PROC means "we have *some* privilege for dtrace" and
12190 * unlocks the use of variables like pid, zonename, etc.
12192 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) ||
12193 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12194 state
->dts_cred
.dcr_action
|= DTRACE_CRA_PROC
;
12198 * dtrace_user allows use of syscall and profile providers.
12199 * If the user also has proc_owner and/or proc_zone, we
12200 * extend the scope to include additional visibility and
12201 * destructive power.
12203 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) {
12204 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) {
12205 state
->dts_cred
.dcr_visible
|=
12206 DTRACE_CRV_ALLPROC
;
12208 state
->dts_cred
.dcr_action
|=
12209 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12212 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) {
12213 state
->dts_cred
.dcr_visible
|=
12214 DTRACE_CRV_ALLZONE
;
12216 state
->dts_cred
.dcr_action
|=
12217 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12221 * If we have all privs in whatever zone this is,
12222 * we can do destructive things to processes which
12223 * have altered credentials.
12225 #if !defined(__APPLE__)
12226 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12227 cr
->cr_zone
->zone_privset
)) {
12228 state
->dts_cred
.dcr_action
|=
12229 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12232 /* Darwin doesn't do zones. */
12233 state
->dts_cred
.dcr_action
|=
12234 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12235 #endif /* __APPLE__ */
12239 * Holding the dtrace_kernel privilege also implies that
12240 * the user has the dtrace_user privilege from a visibility
12241 * perspective. But without further privileges, some
12242 * destructive actions are not available.
12244 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) {
12246 * Make all probes in all zones visible. However,
12247 * this doesn't mean that all actions become available
12250 state
->dts_cred
.dcr_visible
|= DTRACE_CRV_KERNEL
|
12251 DTRACE_CRV_ALLPROC
| DTRACE_CRV_ALLZONE
;
12253 state
->dts_cred
.dcr_action
|= DTRACE_CRA_KERNEL
|
12256 * Holding proc_owner means that destructive actions
12257 * for *this* zone are allowed.
12259 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12260 state
->dts_cred
.dcr_action
|=
12261 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12264 * Holding proc_zone means that destructive actions
12265 * for this user/group ID in all zones is allowed.
12267 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12268 state
->dts_cred
.dcr_action
|=
12269 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12272 * If we have all privs in whatever zone this is,
12273 * we can do destructive things to processes which
12274 * have altered credentials.
12276 #if !defined(__APPLE__)
12277 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12278 cr
->cr_zone
->zone_privset
)) {
12279 state
->dts_cred
.dcr_action
|=
12280 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12283 /* Darwin doesn't do zones. */
12284 state
->dts_cred
.dcr_action
|=
12285 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12286 #endif /* __APPLE__ */
12290 * Holding the dtrace_proc privilege gives control over fasttrap
12291 * and pid providers. We need to grant wider destructive
12292 * privileges in the event that the user has proc_owner and/or
12295 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12296 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12297 state
->dts_cred
.dcr_action
|=
12298 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12300 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12301 state
->dts_cred
.dcr_action
|=
12302 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12310 dtrace_state_buffer(dtrace_state_t
*state
, dtrace_buffer_t
*buf
, int which
)
12312 dtrace_optval_t
*opt
= state
->dts_options
, size
;
12314 int flags
= 0, rval
;
12316 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12317 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12318 ASSERT(which
< DTRACEOPT_MAX
);
12319 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
||
12320 (state
== dtrace_anon
.dta_state
&&
12321 state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
));
12323 if (opt
[which
] == DTRACEOPT_UNSET
|| opt
[which
] == 0)
12326 if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
)
12327 cpu
= opt
[DTRACEOPT_CPU
];
12329 if (which
== DTRACEOPT_SPECSIZE
)
12330 flags
|= DTRACEBUF_NOSWITCH
;
12332 if (which
== DTRACEOPT_BUFSIZE
) {
12333 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
)
12334 flags
|= DTRACEBUF_RING
;
12336 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
)
12337 flags
|= DTRACEBUF_FILL
;
12339 if (state
!= dtrace_anon
.dta_state
||
12340 state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
12341 flags
|= DTRACEBUF_INACTIVE
;
12344 for (size
= opt
[which
]; size
>= sizeof (uint64_t); size
>>= 1) {
12346 * The size must be 8-byte aligned. If the size is not 8-byte
12347 * aligned, drop it down by the difference.
12349 if (size
& (sizeof (uint64_t) - 1))
12350 size
-= size
& (sizeof (uint64_t) - 1);
12352 if (size
< state
->dts_reserve
) {
12354 * Buffers always must be large enough to accommodate
12355 * their prereserved space. We return E2BIG instead
12356 * of ENOMEM in this case to allow for user-level
12357 * software to differentiate the cases.
12362 rval
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
);
12364 if (rval
!= ENOMEM
) {
12369 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12377 dtrace_state_buffers(dtrace_state_t
*state
)
12379 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12382 if ((rval
= dtrace_state_buffer(state
, state
->dts_buffer
,
12383 DTRACEOPT_BUFSIZE
)) != 0)
12386 if ((rval
= dtrace_state_buffer(state
, state
->dts_aggbuffer
,
12387 DTRACEOPT_AGGSIZE
)) != 0)
12390 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12391 if ((rval
= dtrace_state_buffer(state
,
12392 spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0)
12400 dtrace_state_prereserve(dtrace_state_t
*state
)
12403 dtrace_probe_t
*probe
;
12405 state
->dts_reserve
= 0;
12407 if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
)
12411 * If our buffer policy is a "fill" buffer policy, we need to set the
12412 * prereserved space to be the space required by the END probes.
12414 probe
= dtrace_probes
[dtrace_probeid_end
- 1];
12415 ASSERT(probe
!= NULL
);
12417 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
12418 if (ecb
->dte_state
!= state
)
12421 state
->dts_reserve
+= ecb
->dte_needed
+ ecb
->dte_alignment
;
12426 dtrace_state_go(dtrace_state_t
*state
, processorid_t
*cpu
)
12428 dtrace_optval_t
*opt
= state
->dts_options
, sz
, nspec
;
12429 dtrace_speculation_t
*spec
;
12430 dtrace_buffer_t
*buf
;
12431 cyc_handler_t hdlr
;
12433 int rval
= 0, i
, bufsize
= NCPU
* sizeof (dtrace_buffer_t
);
12434 dtrace_icookie_t cookie
;
12436 lck_mtx_lock(&cpu_lock
);
12437 lck_mtx_lock(&dtrace_lock
);
12439 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
12445 * Before we can perform any checks, we must prime all of the
12446 * retained enablings that correspond to this state.
12448 dtrace_enabling_prime(state
);
12450 if (state
->dts_destructive
&& !state
->dts_cred
.dcr_destructive
) {
12455 dtrace_state_prereserve(state
);
12458 * Now we want to do is try to allocate our speculations.
12459 * We do not automatically resize the number of speculations; if
12460 * this fails, we will fail the operation.
12462 nspec
= opt
[DTRACEOPT_NSPEC
];
12463 ASSERT(nspec
!= DTRACEOPT_UNSET
);
12465 if (nspec
> INT_MAX
) {
12470 spec
= kmem_zalloc(nspec
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
);
12472 if (spec
== NULL
) {
12477 state
->dts_speculations
= spec
;
12478 state
->dts_nspeculations
= (int)nspec
;
12480 for (i
= 0; i
< nspec
; i
++) {
12481 if ((buf
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) {
12486 spec
[i
].dtsp_buffer
= buf
;
12489 if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) {
12490 if (dtrace_anon
.dta_state
== NULL
) {
12495 if (state
->dts_necbs
!= 0) {
12500 state
->dts_anon
= dtrace_anon_grab();
12501 ASSERT(state
->dts_anon
!= NULL
);
12502 state
= state
->dts_anon
;
12505 * We want "grabanon" to be set in the grabbed state, so we'll
12506 * copy that option value from the grabbing state into the
12509 state
->dts_options
[DTRACEOPT_GRABANON
] =
12510 opt
[DTRACEOPT_GRABANON
];
12512 *cpu
= dtrace_anon
.dta_beganon
;
12515 * If the anonymous state is active (as it almost certainly
12516 * is if the anonymous enabling ultimately matched anything),
12517 * we don't allow any further option processing -- but we
12518 * don't return failure.
12520 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12524 if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET
&&
12525 opt
[DTRACEOPT_AGGSIZE
] != 0) {
12526 if (state
->dts_aggregations
== NULL
) {
12528 * We're not going to create an aggregation buffer
12529 * because we don't have any ECBs that contain
12530 * aggregations -- set this option to 0.
12532 opt
[DTRACEOPT_AGGSIZE
] = 0;
12535 * If we have an aggregation buffer, we must also have
12536 * a buffer to use as scratch.
12538 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
12539 opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
12540 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
12545 if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET
&&
12546 opt
[DTRACEOPT_SPECSIZE
] != 0) {
12547 if (!state
->dts_speculates
) {
12549 * We're not going to create speculation buffers
12550 * because we don't have any ECBs that actually
12551 * speculate -- set the speculation size to 0.
12553 opt
[DTRACEOPT_SPECSIZE
] = 0;
12558 * The bare minimum size for any buffer that we're actually going to
12559 * do anything to is sizeof (uint64_t).
12561 sz
= sizeof (uint64_t);
12563 if ((state
->dts_needed
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) ||
12564 (state
->dts_speculates
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) ||
12565 (state
->dts_aggregations
!= NULL
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) {
12567 * A buffer size has been explicitly set to 0 (or to a size
12568 * that will be adjusted to 0) and we need the space -- we
12569 * need to return failure. We return ENOSPC to differentiate
12570 * it from failing to allocate a buffer due to failure to meet
12571 * the reserve (for which we return E2BIG).
12577 if ((rval
= dtrace_state_buffers(state
)) != 0)
12580 if ((sz
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
)
12581 sz
= dtrace_dstate_defsize
;
12584 rval
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
);
12589 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12591 } while (sz
>>= 1);
12593 opt
[DTRACEOPT_DYNVARSIZE
] = sz
;
12598 if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
)
12599 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
;
12601 if (opt
[DTRACEOPT_CLEANRATE
] == 0)
12602 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12604 if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
)
12605 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
;
12607 if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
)
12608 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12610 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_clean
;
12611 hdlr
.cyh_arg
= state
;
12612 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12615 when
.cyt_interval
= opt
[DTRACEOPT_CLEANRATE
];
12617 state
->dts_cleaner
= cyclic_add(&hdlr
, &when
);
12619 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_deadman
;
12620 hdlr
.cyh_arg
= state
;
12621 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12624 when
.cyt_interval
= dtrace_deadman_interval
;
12626 state
->dts_alive
= state
->dts_laststatus
= dtrace_gethrtime();
12627 state
->dts_deadman
= cyclic_add(&hdlr
, &when
);
12629 state
->dts_activity
= DTRACE_ACTIVITY_WARMUP
;
12632 * Now it's time to actually fire the BEGIN probe. We need to disable
12633 * interrupts here both to record the CPU on which we fired the BEGIN
12634 * probe (the data from this CPU will be processed first at user
12635 * level) and to manually activate the buffer for this CPU.
12637 cookie
= dtrace_interrupt_disable();
12638 *cpu
= CPU
->cpu_id
;
12639 ASSERT(state
->dts_buffer
[*cpu
].dtb_flags
& DTRACEBUF_INACTIVE
);
12640 state
->dts_buffer
[*cpu
].dtb_flags
&= ~DTRACEBUF_INACTIVE
;
12642 dtrace_probe(dtrace_probeid_begin
,
12643 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12644 dtrace_interrupt_enable(cookie
);
12646 * We may have had an exit action from a BEGIN probe; only change our
12647 * state to ACTIVE if we're still in WARMUP.
12649 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
||
12650 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
);
12652 if (state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
)
12653 state
->dts_activity
= DTRACE_ACTIVITY_ACTIVE
;
12656 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
12657 * want each CPU to transition its principal buffer out of the
12658 * INACTIVE state. Doing this assures that no CPU will suddenly begin
12659 * processing an ECB halfway down a probe's ECB chain; all CPUs will
12660 * atomically transition from processing none of a state's ECBs to
12661 * processing all of them.
12663 dtrace_xcall(DTRACE_CPUALL
,
12664 (dtrace_xcall_t
)dtrace_buffer_activate
, state
);
12668 dtrace_buffer_free(state
->dts_buffer
);
12669 dtrace_buffer_free(state
->dts_aggbuffer
);
12671 if ((nspec
= state
->dts_nspeculations
) == 0) {
12672 ASSERT(state
->dts_speculations
== NULL
);
12676 spec
= state
->dts_speculations
;
12677 ASSERT(spec
!= NULL
);
12679 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12680 if ((buf
= spec
[i
].dtsp_buffer
) == NULL
)
12683 dtrace_buffer_free(buf
);
12684 kmem_free(buf
, bufsize
);
12687 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
12688 state
->dts_nspeculations
= 0;
12689 state
->dts_speculations
= NULL
;
12692 lck_mtx_unlock(&dtrace_lock
);
12693 lck_mtx_unlock(&cpu_lock
);
12699 dtrace_state_stop(dtrace_state_t
*state
, processorid_t
*cpu
)
12701 dtrace_icookie_t cookie
;
12703 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12705 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
&&
12706 state
->dts_activity
!= DTRACE_ACTIVITY_DRAINING
)
12710 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
12711 * to be sure that every CPU has seen it. See below for the details
12712 * on why this is done.
12714 state
->dts_activity
= DTRACE_ACTIVITY_DRAINING
;
12718 * By this point, it is impossible for any CPU to be still processing
12719 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
12720 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
12721 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
12722 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
12723 * iff we're in the END probe.
12725 state
->dts_activity
= DTRACE_ACTIVITY_COOLDOWN
;
12727 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_COOLDOWN
);
12730 * Finally, we can release the reserve and call the END probe. We
12731 * disable interrupts across calling the END probe to allow us to
12732 * return the CPU on which we actually called the END probe. This
12733 * allows user-land to be sure that this CPU's principal buffer is
12736 state
->dts_reserve
= 0;
12738 cookie
= dtrace_interrupt_disable();
12739 *cpu
= CPU
->cpu_id
;
12740 dtrace_probe(dtrace_probeid_end
,
12741 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12742 dtrace_interrupt_enable(cookie
);
12744 state
->dts_activity
= DTRACE_ACTIVITY_STOPPED
;
12751 dtrace_state_option(dtrace_state_t
*state
, dtrace_optid_t option
,
12752 dtrace_optval_t val
)
12754 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12756 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12759 if (option
>= DTRACEOPT_MAX
)
12762 if (option
!= DTRACEOPT_CPU
&& val
< 0)
12766 case DTRACEOPT_DESTRUCTIVE
:
12767 if (dtrace_destructive_disallow
)
12770 state
->dts_cred
.dcr_destructive
= 1;
12773 case DTRACEOPT_BUFSIZE
:
12774 case DTRACEOPT_DYNVARSIZE
:
12775 case DTRACEOPT_AGGSIZE
:
12776 case DTRACEOPT_SPECSIZE
:
12777 case DTRACEOPT_STRSIZE
:
12781 if (val
>= LONG_MAX
) {
12783 * If this is an otherwise negative value, set it to
12784 * the highest multiple of 128m less than LONG_MAX.
12785 * Technically, we're adjusting the size without
12786 * regard to the buffer resizing policy, but in fact,
12787 * this has no effect -- if we set the buffer size to
12788 * ~LONG_MAX and the buffer policy is ultimately set to
12789 * be "manual", the buffer allocation is guaranteed to
12790 * fail, if only because the allocation requires two
12791 * buffers. (We set the the size to the highest
12792 * multiple of 128m because it ensures that the size
12793 * will remain a multiple of a megabyte when
12794 * repeatedly halved -- all the way down to 15m.)
12796 val
= LONG_MAX
- (1 << 27) + 1;
12800 state
->dts_options
[option
] = val
;
12806 dtrace_state_destroy(dtrace_state_t
*state
)
12809 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
12810 minor_t minor
= getminor(state
->dts_dev
);
12811 int i
, bufsize
= NCPU
* sizeof (dtrace_buffer_t
);
12812 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12813 int nspec
= state
->dts_nspeculations
;
12816 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12817 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12820 * First, retract any retained enablings for this state.
12822 dtrace_enabling_retract(state
);
12823 ASSERT(state
->dts_nretained
== 0);
12825 if (state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
||
12826 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
) {
12828 * We have managed to come into dtrace_state_destroy() on a
12829 * hot enabling -- almost certainly because of a disorderly
12830 * shutdown of a consumer. (That is, a consumer that is
12831 * exiting without having called dtrace_stop().) In this case,
12832 * we're going to set our activity to be KILLED, and then
12833 * issue a sync to be sure that everyone is out of probe
12834 * context before we start blowing away ECBs.
12836 state
->dts_activity
= DTRACE_ACTIVITY_KILLED
;
12841 * Release the credential hold we took in dtrace_state_create().
12843 if (state
->dts_cred
.dcr_cred
!= NULL
)
12844 crfree(state
->dts_cred
.dcr_cred
);
12847 * Now we can safely disable and destroy any enabled probes. Because
12848 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
12849 * (especially if they're all enabled), we take two passes through the
12850 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
12851 * in the second we disable whatever is left over.
12853 for (match
= DTRACE_PRIV_KERNEL
; ; match
= 0) {
12854 for (i
= 0; i
< state
->dts_necbs
; i
++) {
12855 if ((ecb
= state
->dts_ecbs
[i
]) == NULL
)
12858 if (match
&& ecb
->dte_probe
!= NULL
) {
12859 dtrace_probe_t
*probe
= ecb
->dte_probe
;
12860 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
12862 if (!(prov
->dtpv_priv
.dtpp_flags
& match
))
12866 dtrace_ecb_disable(ecb
);
12867 dtrace_ecb_destroy(ecb
);
12875 * Before we free the buffers, perform one more sync to assure that
12876 * every CPU is out of probe context.
12880 dtrace_buffer_free(state
->dts_buffer
);
12881 dtrace_buffer_free(state
->dts_aggbuffer
);
12883 for (i
= 0; i
< nspec
; i
++)
12884 dtrace_buffer_free(spec
[i
].dtsp_buffer
);
12886 if (state
->dts_cleaner
!= CYCLIC_NONE
)
12887 cyclic_remove(state
->dts_cleaner
);
12889 if (state
->dts_deadman
!= CYCLIC_NONE
)
12890 cyclic_remove(state
->dts_deadman
);
12892 dtrace_dstate_fini(&vstate
->dtvs_dynvars
);
12893 dtrace_vstate_fini(vstate
);
12894 kmem_free(state
->dts_ecbs
, state
->dts_necbs
* sizeof (dtrace_ecb_t
*));
12896 if (state
->dts_aggregations
!= NULL
) {
12898 for (i
= 0; i
< state
->dts_naggregations
; i
++)
12899 ASSERT(state
->dts_aggregations
[i
] == NULL
);
12901 ASSERT(state
->dts_naggregations
> 0);
12902 kmem_free(state
->dts_aggregations
,
12903 state
->dts_naggregations
* sizeof (dtrace_aggregation_t
*));
12906 kmem_free(state
->dts_buffer
, bufsize
);
12907 kmem_free(state
->dts_aggbuffer
, bufsize
);
12909 for (i
= 0; i
< nspec
; i
++)
12910 kmem_free(spec
[i
].dtsp_buffer
, bufsize
);
12912 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
12914 dtrace_format_destroy(state
);
12916 vmem_destroy(state
->dts_aggid_arena
);
12917 ddi_soft_state_free(dtrace_softstate
, minor
);
12918 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12922 * DTrace Anonymous Enabling Functions
12924 static dtrace_state_t
*
12925 dtrace_anon_grab(void)
12927 dtrace_state_t
*state
;
12929 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12931 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
12932 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
12936 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
12937 ASSERT(dtrace_retained
!= NULL
);
12939 dtrace_enabling_destroy(dtrace_anon
.dta_enabling
);
12940 dtrace_anon
.dta_enabling
= NULL
;
12941 dtrace_anon
.dta_state
= NULL
;
12947 dtrace_anon_property(void)
12950 dtrace_state_t
*state
;
12952 char c
[32]; /* enough for "dof-data-" + digits */
12954 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12955 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12957 for (i
= 0; ; i
++) {
12958 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
);
12960 dtrace_err_verbose
= 1;
12962 if ((dof
= dtrace_dof_property(c
)) == NULL
) {
12963 dtrace_err_verbose
= 0;
12968 * We want to create anonymous state, so we need to transition
12969 * the kernel debugger to indicate that DTrace is active. If
12970 * this fails (e.g. because the debugger has modified text in
12971 * some way), we won't continue with the processing.
12973 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
12974 cmn_err(CE_NOTE
, "kernel debugger active; anonymous "
12975 "enabling ignored.");
12976 dtrace_dof_destroy(dof
);
12981 * If we haven't allocated an anonymous state, we'll do so now.
12983 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
12984 state
= dtrace_state_create(NULL
, NULL
);
12985 dtrace_anon
.dta_state
= state
;
12987 if (state
== NULL
) {
12989 * This basically shouldn't happen: the only
12990 * failure mode from dtrace_state_create() is a
12991 * failure of ddi_soft_state_zalloc() that
12992 * itself should never happen. Still, the
12993 * interface allows for a failure mode, and
12994 * we want to fail as gracefully as possible:
12995 * we'll emit an error message and cease
12996 * processing anonymous state in this case.
12998 cmn_err(CE_WARN
, "failed to create "
12999 "anonymous state");
13000 dtrace_dof_destroy(dof
);
13005 rv
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(),
13006 &dtrace_anon
.dta_enabling
, 0, B_TRUE
);
13009 rv
= dtrace_dof_options(dof
, state
);
13011 dtrace_err_verbose
= 0;
13012 dtrace_dof_destroy(dof
);
13016 * This is malformed DOF; chuck any anonymous state
13019 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13020 dtrace_state_destroy(state
);
13021 dtrace_anon
.dta_state
= NULL
;
13025 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13028 if (dtrace_anon
.dta_enabling
!= NULL
) {
13032 * dtrace_enabling_retain() can only fail because we are
13033 * trying to retain more enablings than are allowed -- but
13034 * we only have one anonymous enabling, and we are guaranteed
13035 * to be allowed at least one retained enabling; we assert
13036 * that dtrace_enabling_retain() returns success.
13038 rval
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
);
13041 dtrace_enabling_dump(dtrace_anon
.dta_enabling
);
13046 * DTrace Helper Functions
13049 dtrace_helper_trace(dtrace_helper_action_t
*helper
,
13050 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
, int where
)
13052 uint32_t size
, next
, nnext
, i
;
13053 dtrace_helptrace_t
*ent
;
13054 uint16_t flags
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13056 if (!dtrace_helptrace_enabled
)
13059 ASSERT(vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
13062 * What would a tracing framework be without its own tracing
13063 * framework? (Well, a hell of a lot simpler, for starters...)
13065 size
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals
*
13066 sizeof (uint64_t) - sizeof (uint64_t);
13069 * Iterate until we can allocate a slot in the trace buffer.
13072 next
= dtrace_helptrace_next
;
13074 if (next
+ size
< dtrace_helptrace_bufsize
) {
13075 nnext
= next
+ size
;
13079 } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
);
13082 * We have our slot; fill it in.
13087 ent
= (dtrace_helptrace_t
*)&dtrace_helptrace_buffer
[next
];
13088 ent
->dtht_helper
= helper
;
13089 ent
->dtht_where
= where
;
13090 ent
->dtht_nlocals
= vstate
->dtvs_nlocals
;
13092 ent
->dtht_fltoffs
= (mstate
->dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
13093 mstate
->dtms_fltoffs
: -1;
13094 ent
->dtht_fault
= DTRACE_FLAGS2FLT(flags
);
13095 ent
->dtht_illval
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
13097 for (i
= 0; i
< vstate
->dtvs_nlocals
; i
++) {
13098 dtrace_statvar_t
*svar
;
13100 if ((svar
= vstate
->dtvs_locals
[i
]) == NULL
)
13103 ASSERT(svar
->dtsv_size
>= NCPU
* sizeof (uint64_t));
13104 ent
->dtht_locals
[i
] =
13105 ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
];
13110 dtrace_helper(int which
, dtrace_mstate_t
*mstate
,
13111 dtrace_state_t
*state
, uint64_t arg0
, uint64_t arg1
)
13113 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13114 uint64_t sarg0
= mstate
->dtms_arg
[0];
13115 uint64_t sarg1
= mstate
->dtms_arg
[1];
13117 dtrace_helpers_t
*helpers
= curproc
->p_dtrace_helpers
;
13118 dtrace_helper_action_t
*helper
;
13119 dtrace_vstate_t
*vstate
;
13120 dtrace_difo_t
*pred
;
13121 int i
, trace
= dtrace_helptrace_enabled
;
13123 ASSERT(which
>= 0 && which
< DTRACE_NHELPER_ACTIONS
);
13125 if (helpers
== NULL
)
13128 if ((helper
= helpers
->dthps_actions
[which
]) == NULL
)
13131 vstate
= &helpers
->dthps_vstate
;
13132 mstate
->dtms_arg
[0] = arg0
;
13133 mstate
->dtms_arg
[1] = arg1
;
13136 * Now iterate over each helper. If its predicate evaluates to 'true',
13137 * we'll call the corresponding actions. Note that the below calls
13138 * to dtrace_dif_emulate() may set faults in machine state. This is
13139 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13140 * the stored DIF offset with its own (which is the desired behavior).
13141 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13142 * from machine state; this is okay, too.
13144 for (; helper
!= NULL
; helper
= helper
->dtha_next
) {
13145 if ((pred
= helper
->dtha_predicate
) != NULL
) {
13147 dtrace_helper_trace(helper
, mstate
, vstate
, 0);
13149 if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
))
13152 if (*flags
& CPU_DTRACE_FAULT
)
13156 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13158 dtrace_helper_trace(helper
,
13159 mstate
, vstate
, i
+ 1);
13161 rval
= dtrace_dif_emulate(helper
->dtha_actions
[i
],
13162 mstate
, vstate
, state
);
13164 if (*flags
& CPU_DTRACE_FAULT
)
13170 dtrace_helper_trace(helper
, mstate
, vstate
,
13171 DTRACE_HELPTRACE_NEXT
);
13175 dtrace_helper_trace(helper
, mstate
, vstate
,
13176 DTRACE_HELPTRACE_DONE
);
13179 * Restore the arg0 that we saved upon entry.
13181 mstate
->dtms_arg
[0] = sarg0
;
13182 mstate
->dtms_arg
[1] = sarg1
;
13188 dtrace_helper_trace(helper
, mstate
, vstate
,
13189 DTRACE_HELPTRACE_ERR
);
13192 * Restore the arg0 that we saved upon entry.
13194 mstate
->dtms_arg
[0] = sarg0
;
13195 mstate
->dtms_arg
[1] = sarg1
;
13201 dtrace_helper_action_destroy(dtrace_helper_action_t
*helper
,
13202 dtrace_vstate_t
*vstate
)
13206 if (helper
->dtha_predicate
!= NULL
)
13207 dtrace_difo_release(helper
->dtha_predicate
, vstate
);
13209 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13210 ASSERT(helper
->dtha_actions
[i
] != NULL
);
13211 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
);
13214 kmem_free(helper
->dtha_actions
,
13215 helper
->dtha_nactions
* sizeof (dtrace_difo_t
*));
13216 kmem_free(helper
, sizeof (dtrace_helper_action_t
));
13219 #if !defined(__APPLE__)
13221 dtrace_helper_destroygen(int gen
)
13223 proc_t
*p
= curproc
;
13226 dtrace_helper_destroygen(proc_t
* p
, int gen
)
13229 dtrace_helpers_t
*help
= p
->p_dtrace_helpers
;
13230 dtrace_vstate_t
*vstate
;
13233 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13235 if (help
== NULL
|| gen
> help
->dthps_generation
)
13238 vstate
= &help
->dthps_vstate
;
13240 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
13241 dtrace_helper_action_t
*last
= NULL
, *h
, *next
;
13243 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
13244 next
= h
->dtha_next
;
13246 if (h
->dtha_generation
== gen
) {
13247 if (last
!= NULL
) {
13248 last
->dtha_next
= next
;
13250 help
->dthps_actions
[i
] = next
;
13253 dtrace_helper_action_destroy(h
, vstate
);
13261 * Interate until we've cleared out all helper providers with the
13262 * given generation number.
13265 dtrace_helper_provider_t
*prov
;
13268 * Look for a helper provider with the right generation. We
13269 * have to start back at the beginning of the list each time
13270 * because we drop dtrace_lock. It's unlikely that we'll make
13271 * more than two passes.
13273 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13274 prov
= help
->dthps_provs
[i
];
13276 if (prov
->dthp_generation
== gen
)
13281 * If there were no matches, we're done.
13283 if (i
== help
->dthps_nprovs
)
13287 * Move the last helper provider into this slot.
13289 help
->dthps_nprovs
--;
13290 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
];
13291 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
;
13293 lck_mtx_unlock(&dtrace_lock
);
13296 * If we have a meta provider, remove this helper provider.
13298 lck_mtx_lock(&dtrace_meta_lock
);
13299 if (dtrace_meta_pid
!= NULL
) {
13300 ASSERT(dtrace_deferred_pid
== NULL
);
13301 dtrace_helper_provider_remove(&prov
->dthp_prov
,
13304 lck_mtx_unlock(&dtrace_meta_lock
);
13306 dtrace_helper_provider_destroy(prov
);
13308 lck_mtx_lock(&dtrace_lock
);
13315 dtrace_helper_validate(dtrace_helper_action_t
*helper
)
13320 if ((dp
= helper
->dtha_predicate
) != NULL
)
13321 err
+= dtrace_difo_validate_helper(dp
);
13323 for (i
= 0; i
< helper
->dtha_nactions
; i
++)
13324 err
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]);
13329 #if !defined(__APPLE__)
13331 dtrace_helper_action_add(int which
, dtrace_ecbdesc_t
*ep
)
13334 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t
*ep
)
13337 dtrace_helpers_t
*help
;
13338 dtrace_helper_action_t
*helper
, *last
;
13339 dtrace_actdesc_t
*act
;
13340 dtrace_vstate_t
*vstate
;
13341 dtrace_predicate_t
*pred
;
13342 int count
= 0, nactions
= 0, i
;
13344 if (which
< 0 || which
>= DTRACE_NHELPER_ACTIONS
)
13347 #if !defined(__APPLE__)
13348 help
= curproc
->p_dtrace_helpers
;
13350 help
= p
->p_dtrace_helpers
;
13352 last
= help
->dthps_actions
[which
];
13353 vstate
= &help
->dthps_vstate
;
13355 for (count
= 0; last
!= NULL
; last
= last
->dtha_next
) {
13357 if (last
->dtha_next
== NULL
)
13362 * If we already have dtrace_helper_actions_max helper actions for this
13363 * helper action type, we'll refuse to add a new one.
13365 if (count
>= dtrace_helper_actions_max
)
13368 helper
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
);
13369 helper
->dtha_generation
= help
->dthps_generation
;
13371 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) {
13372 ASSERT(pred
->dtp_difo
!= NULL
);
13373 dtrace_difo_hold(pred
->dtp_difo
);
13374 helper
->dtha_predicate
= pred
->dtp_difo
;
13377 for (act
= ep
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
13378 if (act
->dtad_kind
!= DTRACEACT_DIFEXPR
)
13381 if (act
->dtad_difo
== NULL
)
13387 helper
->dtha_actions
= kmem_zalloc(sizeof (dtrace_difo_t
*) *
13388 (helper
->dtha_nactions
= nactions
), KM_SLEEP
);
13390 for (act
= ep
->dted_action
, i
= 0; act
!= NULL
; act
= act
->dtad_next
) {
13391 dtrace_difo_hold(act
->dtad_difo
);
13392 helper
->dtha_actions
[i
++] = act
->dtad_difo
;
13395 if (!dtrace_helper_validate(helper
))
13398 if (last
== NULL
) {
13399 help
->dthps_actions
[which
] = helper
;
13401 last
->dtha_next
= helper
;
13404 if (vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
13405 dtrace_helptrace_nlocals
= vstate
->dtvs_nlocals
;
13406 dtrace_helptrace_next
= 0;
13411 dtrace_helper_action_destroy(helper
, vstate
);
13416 dtrace_helper_provider_register(proc_t
*p
, dtrace_helpers_t
*help
,
13417 dof_helper_t
*dofhp
)
13419 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
13421 lck_mtx_lock(&dtrace_meta_lock
);
13422 lck_mtx_lock(&dtrace_lock
);
13424 if (!dtrace_attached() || dtrace_meta_pid
== NULL
) {
13426 * If the dtrace module is loaded but not attached, or if
13427 * there aren't isn't a meta provider registered to deal with
13428 * these provider descriptions, we need to postpone creating
13429 * the actual providers until later.
13432 if (help
->dthps_next
== NULL
&& help
->dthps_prev
== NULL
&&
13433 dtrace_deferred_pid
!= help
) {
13434 help
->dthps_deferred
= 1;
13435 help
->dthps_pid
= p
->p_pid
;
13436 help
->dthps_next
= dtrace_deferred_pid
;
13437 help
->dthps_prev
= NULL
;
13438 if (dtrace_deferred_pid
!= NULL
)
13439 dtrace_deferred_pid
->dthps_prev
= help
;
13440 dtrace_deferred_pid
= help
;
13443 lck_mtx_unlock(&dtrace_lock
);
13445 } else if (dofhp
!= NULL
) {
13447 * If the dtrace module is loaded and we have a particular
13448 * helper provider description, pass that off to the
13452 lck_mtx_unlock(&dtrace_lock
);
13454 dtrace_helper_provide(dofhp
, p
->p_pid
);
13458 * Otherwise, just pass all the helper provider descriptions
13459 * off to the meta provider.
13463 lck_mtx_unlock(&dtrace_lock
);
13465 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13466 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
13471 lck_mtx_unlock(&dtrace_meta_lock
);
13474 #if !defined(__APPLE__)
13476 dtrace_helper_provider_add(dof_helper_t
*dofhp
, int gen
)
13479 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t
*dofhp
, int gen
)
13482 dtrace_helpers_t
*help
;
13483 dtrace_helper_provider_t
*hprov
, **tmp_provs
;
13484 uint_t tmp_maxprovs
, i
;
13486 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13488 #if !defined(__APPLE__)
13489 help
= curproc
->p_dtrace_helpers
;
13491 help
= p
->p_dtrace_helpers
;
13493 ASSERT(help
!= NULL
);
13496 * If we already have dtrace_helper_providers_max helper providers,
13497 * we're refuse to add a new one.
13499 if (help
->dthps_nprovs
>= dtrace_helper_providers_max
)
13503 * Check to make sure this isn't a duplicate.
13505 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13506 if (dofhp
->dofhp_addr
==
13507 help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
)
13511 hprov
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
);
13512 hprov
->dthp_prov
= *dofhp
;
13513 hprov
->dthp_ref
= 1;
13514 hprov
->dthp_generation
= gen
;
13517 * Allocate a bigger table for helper providers if it's already full.
13519 if (help
->dthps_maxprovs
== help
->dthps_nprovs
) {
13520 tmp_maxprovs
= help
->dthps_maxprovs
;
13521 tmp_provs
= help
->dthps_provs
;
13523 if (help
->dthps_maxprovs
== 0)
13524 help
->dthps_maxprovs
= 2;
13526 help
->dthps_maxprovs
*= 2;
13527 if (help
->dthps_maxprovs
> dtrace_helper_providers_max
)
13528 help
->dthps_maxprovs
= dtrace_helper_providers_max
;
13530 ASSERT(tmp_maxprovs
< help
->dthps_maxprovs
);
13532 help
->dthps_provs
= kmem_zalloc(help
->dthps_maxprovs
*
13533 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
13535 if (tmp_provs
!= NULL
) {
13536 bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs
*
13537 sizeof (dtrace_helper_provider_t
*));
13538 kmem_free(tmp_provs
, tmp_maxprovs
*
13539 sizeof (dtrace_helper_provider_t
*));
13543 help
->dthps_provs
[help
->dthps_nprovs
] = hprov
;
13544 help
->dthps_nprovs
++;
13550 dtrace_helper_provider_destroy(dtrace_helper_provider_t
*hprov
)
13552 lck_mtx_lock(&dtrace_lock
);
13554 if (--hprov
->dthp_ref
== 0) {
13556 lck_mtx_unlock(&dtrace_lock
);
13557 dof
= (dof_hdr_t
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
;
13558 dtrace_dof_destroy(dof
);
13559 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
));
13561 lck_mtx_unlock(&dtrace_lock
);
13566 dtrace_helper_provider_validate(dof_hdr_t
*dof
, dof_sec_t
*sec
)
13568 uintptr_t daddr
= (uintptr_t)dof
;
13569 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
13570 dof_provider_t
*provider
;
13571 dof_probe_t
*probe
;
13573 char *strtab
, *typestr
;
13574 dof_stridx_t typeidx
;
13576 uint_t nprobes
, j
, k
;
13578 ASSERT(sec
->dofs_type
== DOF_SECT_PROVIDER
);
13580 if (sec
->dofs_offset
& (sizeof (uint_t
) - 1)) {
13581 dtrace_dof_error(dof
, "misaligned section offset");
13586 * The section needs to be large enough to contain the DOF provider
13587 * structure appropriate for the given version.
13589 if (sec
->dofs_size
<
13590 ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ?
13591 offsetof(dof_provider_t
, dofpv_prenoffs
) :
13592 sizeof (dof_provider_t
))) {
13593 dtrace_dof_error(dof
, "provider section too small");
13597 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
13598 str_sec
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
);
13599 prb_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
);
13600 arg_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
);
13601 off_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
);
13603 if (str_sec
== NULL
|| prb_sec
== NULL
||
13604 arg_sec
== NULL
|| off_sec
== NULL
)
13609 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
13610 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
&&
13611 (enoff_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
,
13612 provider
->dofpv_prenoffs
)) == NULL
)
13615 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
13617 if (provider
->dofpv_name
>= str_sec
->dofs_size
||
13618 strlen(strtab
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) {
13619 dtrace_dof_error(dof
, "invalid provider name");
13623 if (prb_sec
->dofs_entsize
== 0 ||
13624 prb_sec
->dofs_entsize
> prb_sec
->dofs_size
) {
13625 dtrace_dof_error(dof
, "invalid entry size");
13629 if (prb_sec
->dofs_entsize
& (sizeof (uintptr_t) - 1)) {
13630 dtrace_dof_error(dof
, "misaligned entry size");
13634 if (off_sec
->dofs_entsize
!= sizeof (uint32_t)) {
13635 dtrace_dof_error(dof
, "invalid entry size");
13639 if (off_sec
->dofs_offset
& (sizeof (uint32_t) - 1)) {
13640 dtrace_dof_error(dof
, "misaligned section offset");
13644 if (arg_sec
->dofs_entsize
!= sizeof (uint8_t)) {
13645 dtrace_dof_error(dof
, "invalid entry size");
13649 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
13651 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
13654 * Take a pass through the probes to check for errors.
13656 for (j
= 0; j
< nprobes
; j
++) {
13657 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
13658 prb_sec
->dofs_offset
+ j
* prb_sec
->dofs_entsize
);
13660 if (probe
->dofpr_func
>= str_sec
->dofs_size
) {
13661 dtrace_dof_error(dof
, "invalid function name");
13665 if (strlen(strtab
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) {
13666 dtrace_dof_error(dof
, "function name too long");
13670 if (probe
->dofpr_name
>= str_sec
->dofs_size
||
13671 strlen(strtab
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) {
13672 dtrace_dof_error(dof
, "invalid probe name");
13677 * The offset count must not wrap the index, and the offsets
13678 * must also not overflow the section's data.
13680 if (probe
->dofpr_offidx
+ probe
->dofpr_noffs
<
13681 probe
->dofpr_offidx
||
13682 (probe
->dofpr_offidx
+ probe
->dofpr_noffs
) *
13683 off_sec
->dofs_entsize
> off_sec
->dofs_size
) {
13684 dtrace_dof_error(dof
, "invalid probe offset");
13688 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) {
13690 * If there's no is-enabled offset section, make sure
13691 * there aren't any is-enabled offsets. Otherwise
13692 * perform the same checks as for probe offsets
13693 * (immediately above).
13695 if (enoff_sec
== NULL
) {
13696 if (probe
->dofpr_enoffidx
!= 0 ||
13697 probe
->dofpr_nenoffs
!= 0) {
13698 dtrace_dof_error(dof
, "is-enabled "
13699 "offsets with null section");
13702 } else if (probe
->dofpr_enoffidx
+
13703 probe
->dofpr_nenoffs
< probe
->dofpr_enoffidx
||
13704 (probe
->dofpr_enoffidx
+ probe
->dofpr_nenoffs
) *
13705 enoff_sec
->dofs_entsize
> enoff_sec
->dofs_size
) {
13706 dtrace_dof_error(dof
, "invalid is-enabled "
13711 if (probe
->dofpr_noffs
+ probe
->dofpr_nenoffs
== 0) {
13712 dtrace_dof_error(dof
, "zero probe and "
13713 "is-enabled offsets");
13716 } else if (probe
->dofpr_noffs
== 0) {
13717 dtrace_dof_error(dof
, "zero probe offsets");
13721 if (probe
->dofpr_argidx
+ probe
->dofpr_xargc
<
13722 probe
->dofpr_argidx
||
13723 (probe
->dofpr_argidx
+ probe
->dofpr_xargc
) *
13724 arg_sec
->dofs_entsize
> arg_sec
->dofs_size
) {
13725 dtrace_dof_error(dof
, "invalid args");
13729 typeidx
= probe
->dofpr_nargv
;
13730 typestr
= strtab
+ probe
->dofpr_nargv
;
13731 for (k
= 0; k
< probe
->dofpr_nargc
; k
++) {
13732 if (typeidx
>= str_sec
->dofs_size
) {
13733 dtrace_dof_error(dof
, "bad "
13734 "native argument type");
13738 typesz
= strlen(typestr
) + 1;
13739 if (typesz
> DTRACE_ARGTYPELEN
) {
13740 dtrace_dof_error(dof
, "native "
13741 "argument type too long");
13748 typeidx
= probe
->dofpr_xargv
;
13749 typestr
= strtab
+ probe
->dofpr_xargv
;
13750 for (k
= 0; k
< probe
->dofpr_xargc
; k
++) {
13751 if (arg
[probe
->dofpr_argidx
+ k
] > probe
->dofpr_nargc
) {
13752 dtrace_dof_error(dof
, "bad "
13753 "native argument index");
13757 if (typeidx
>= str_sec
->dofs_size
) {
13758 dtrace_dof_error(dof
, "bad "
13759 "translated argument type");
13763 typesz
= strlen(typestr
) + 1;
13764 if (typesz
> DTRACE_ARGTYPELEN
) {
13765 dtrace_dof_error(dof
, "translated argument "
13778 #if !defined(__APPLE__)
13780 dtrace_helper_slurp(dof_hdr_t
*dof
, dof_helper_t
*dhp
)
13783 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t
*dof
, dof_helper_t
*dhp
)
13786 dtrace_helpers_t
*help
;
13787 dtrace_vstate_t
*vstate
;
13788 dtrace_enabling_t
*enab
= NULL
;
13789 int i
, gen
, rv
, nhelpers
= 0, nprovs
= 0, destroy
= 1;
13790 uintptr_t daddr
= (uintptr_t)dof
;
13792 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13794 #if !defined(__APPLE__)
13795 if ((help
= curproc
->p_dtrace_helpers
) == NULL
)
13796 help
= dtrace_helpers_create(curproc
);
13798 if ((help
= p
->p_dtrace_helpers
) == NULL
)
13799 help
= dtrace_helpers_create(p
);
13802 vstate
= &help
->dthps_vstate
;
13804 if ((rv
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
,
13805 dhp
!= NULL
? dhp
->dofhp_addr
: 0, B_FALSE
)) != 0) {
13806 dtrace_dof_destroy(dof
);
13811 * Look for helper providers and validate their descriptions.
13814 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13815 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
13816 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13818 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
13821 if (dtrace_helper_provider_validate(dof
, sec
) != 0) {
13822 dtrace_enabling_destroy(enab
);
13823 dtrace_dof_destroy(dof
);
13832 * Now we need to walk through the ECB descriptions in the enabling.
13834 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
13835 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
13836 dtrace_probedesc_t
*desc
= &ep
->dted_probe
;
13838 if (strcmp(desc
->dtpd_provider
, "dtrace") != 0)
13841 if (strcmp(desc
->dtpd_mod
, "helper") != 0)
13844 if (strcmp(desc
->dtpd_func
, "ustack") != 0)
13847 #if !defined(__APPLE__)
13848 if ((rv
= dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0)
13850 if ((rv
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0)
13854 * Adding this helper action failed -- we are now going
13855 * to rip out the entire generation and return failure.
13857 #if !defined(__APPLE__)
13858 (void) dtrace_helper_destroygen(help
->dthps_generation
);
13860 (void) dtrace_helper_destroygen(p
, help
->dthps_generation
);
13862 dtrace_enabling_destroy(enab
);
13863 dtrace_dof_destroy(dof
);
13870 if (nhelpers
< enab
->dten_ndesc
)
13871 dtrace_dof_error(dof
, "unmatched helpers");
13873 gen
= help
->dthps_generation
++;
13874 dtrace_enabling_destroy(enab
);
13876 if (dhp
!= NULL
&& nprovs
> 0) {
13877 dhp
->dofhp_dof
= (uint64_t)(uintptr_t)dof
;
13878 #if !defined(__APPLE__)
13879 if (dtrace_helper_provider_add(dhp
, gen
) == 0) {
13881 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) {
13883 lck_mtx_unlock(&dtrace_lock
);
13884 #if !defined(__APPLE__)
13885 dtrace_helper_provider_register(curproc
, help
, dhp
);
13887 dtrace_helper_provider_register(p
, help
, dhp
);
13889 lck_mtx_lock(&dtrace_lock
);
13896 dtrace_dof_destroy(dof
);
13901 #if defined(__APPLE__)
13906 * DTrace user static probes (USDT probes) and helper actions are loaded
13907 * in a process by proccessing dof sections. The dof sections are passed
13908 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
13909 * expensive to process dof for a process that will never use it. There
13910 * is a memory cost (allocating the providers/probes), and a cpu cost
13911 * (creating the providers/probes).
13913 * To reduce this cost, we use "lazy dof". The normal proceedure for
13914 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
13915 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
13916 * used, each process retains the dof_ioctl_data_t block, instead of
13917 * copying in the data it points to.
13919 * The dof_ioctl_data_t blocks are managed as if they were the actual
13920 * processed dof; on fork the block is copied to the child, on exec and
13921 * exit the block is freed.
13923 * If the process loads library(s) containing additional dof, the
13924 * new dof_ioctl_data_t is merged with the existing block.
13926 * There are a few catches that make this slightly more difficult.
13927 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
13928 * identifier value for each dof in the block. In non-lazy dof terms,
13929 * this is the generation that dof was loaded in. If we hand back
13930 * a UID for a lazy dof, that same UID must be able to unload the
13931 * dof once it has become non-lazy. To meet this requirement, the
13932 * code that loads lazy dof requires that the UID's for dof(s) in
13933 * the lazy dof be sorted, and in ascending order. It is okay to skip
13934 * UID's, I.E., 1 -> 5 -> 6 is legal.
13936 * Once a process has become non-lazy, it will stay non-lazy. All
13937 * future dof operations for that process will be non-lazy, even
13938 * if the dof mode transitions back to lazy.
13940 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
13941 * That way if the lazy check fails due to transitioning to non-lazy, the
13942 * right thing is done with the newly faulted in dof.
13946 * This method is a bit squicky. It must handle:
13948 * dof should not be lazy.
13949 * dof should have been handled lazily, but there was an error
13950 * dof was handled lazily, and needs to be freed.
13951 * dof was handled lazily, and must not be freed.
13954 * Returns EACCESS if dof should be handled non-lazily.
13956 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
13958 * If the dofs data is claimed by this method, dofs_claimed will be set.
13959 * Callers should not free claimed dofs.
13962 dtrace_lazy_dofs_add(proc_t
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
)
13965 ASSERT(incoming_dofs
&& incoming_dofs
->dofiod_count
> 0);
13970 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
13973 * If we have lazy dof, dof mode better be LAZY_ON.
13975 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
13976 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
13977 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
13980 * Any existing helpers force non-lazy behavior.
13982 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
13983 lck_mtx_lock(&p
->p_dtrace_sprlock
);
13985 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
13986 unsigned int existing_dofs_count
= (existing_dofs
) ? existing_dofs
->dofiod_count
: 0;
13987 unsigned int i
, merged_dofs_count
= incoming_dofs
->dofiod_count
+ existing_dofs_count
;
13992 if (merged_dofs_count
== 0 || merged_dofs_count
> 1024) {
13993 dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range");
13999 * Each dof being added must be assigned a unique generation.
14001 uint64_t generation
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count
- 1].dofhp_dof
+ 1 : 1;
14002 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) {
14004 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
14006 ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
);
14007 incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
= generation
++;
14011 if (existing_dofs
) {
14013 * Merge the existing and incoming dofs
14015 size_t merged_dofs_size
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
);
14016 dof_ioctl_data_t
* merged_dofs
= kmem_alloc(merged_dofs_size
, KM_SLEEP
);
14018 bcopy(&existing_dofs
->dofiod_helpers
[0],
14019 &merged_dofs
->dofiod_helpers
[0],
14020 sizeof(dof_helper_t
) * existing_dofs_count
);
14021 bcopy(&incoming_dofs
->dofiod_helpers
[0],
14022 &merged_dofs
->dofiod_helpers
[existing_dofs_count
],
14023 sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
);
14025 merged_dofs
->dofiod_count
= merged_dofs_count
;
14027 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14029 p
->p_dtrace_lazy_dofs
= merged_dofs
;
14032 * Claim the incoming dofs
14035 p
->p_dtrace_lazy_dofs
= incoming_dofs
;
14039 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14040 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14041 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14046 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14051 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14059 * EINVAL: lazy dof is enabled, but the requested generation was not found.
14060 * EACCES: This removal needs to be handled non-lazily.
14063 dtrace_lazy_dofs_remove(proc_t
*p
, int generation
)
14067 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14070 * If we have lazy dof, dof mode better be LAZY_ON.
14072 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14073 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14074 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
14077 * Any existing helpers force non-lazy behavior.
14079 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
14080 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14082 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
14084 if (existing_dofs
) {
14085 int index
, existing_dofs_count
= existing_dofs
->dofiod_count
;
14086 for (index
=0; index
<existing_dofs_count
; index
++) {
14087 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof
== generation
) {
14088 dof_ioctl_data_t
* removed_dofs
= NULL
;
14091 * If there is only 1 dof, we'll delete it and swap in NULL.
14093 if (existing_dofs_count
> 1) {
14094 int removed_dofs_count
= existing_dofs_count
- 1;
14095 size_t removed_dofs_size
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
);
14097 removed_dofs
= kmem_alloc(removed_dofs_size
, KM_SLEEP
);
14098 removed_dofs
->dofiod_count
= removed_dofs_count
;
14101 * copy the remaining data.
14104 bcopy(&existing_dofs
->dofiod_helpers
[0],
14105 &removed_dofs
->dofiod_helpers
[0],
14106 index
* sizeof(dof_helper_t
));
14109 if (index
< existing_dofs_count
-1) {
14110 bcopy(&existing_dofs
->dofiod_helpers
[index
+1],
14111 &removed_dofs
->dofiod_helpers
[index
],
14112 (existing_dofs_count
- index
- 1) * sizeof(dof_helper_t
));
14116 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14118 p
->p_dtrace_lazy_dofs
= removed_dofs
;
14120 rval
= KERN_SUCCESS
;
14127 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14130 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14131 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14138 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14143 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14149 dtrace_lazy_dofs_destroy(proc_t
*p
)
14151 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14152 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14155 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14156 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14157 * kern_exit.c and kern_exec.c.
14159 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
|| p
->p_lflag
& P_LEXIT
);
14160 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14162 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14163 p
->p_dtrace_lazy_dofs
= NULL
;
14165 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14166 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14169 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14174 dtrace_lazy_dofs_duplicate(proc_t
*parent
, proc_t
*child
)
14176 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
14177 lck_mtx_assert(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14178 lck_mtx_assert(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14180 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14181 lck_mtx_lock(&parent
->p_dtrace_sprlock
);
14184 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14185 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14188 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14189 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| parent
->p_dtrace_helpers
== NULL
);
14191 * In theory we should hold the child sprlock, but this is safe...
14193 ASSERT(child
->p_dtrace_lazy_dofs
== NULL
&& child
->p_dtrace_helpers
== NULL
);
14195 dof_ioctl_data_t
* parent_dofs
= parent
->p_dtrace_lazy_dofs
;
14196 dof_ioctl_data_t
* child_dofs
= NULL
;
14198 size_t parent_dofs_size
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
);
14199 child_dofs
= kmem_alloc(parent_dofs_size
, KM_SLEEP
);
14200 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
);
14203 lck_mtx_unlock(&parent
->p_dtrace_sprlock
);
14206 lck_mtx_lock(&child
->p_dtrace_sprlock
);
14207 child
->p_dtrace_lazy_dofs
= child_dofs
;
14208 lck_mtx_unlock(&child
->p_dtrace_sprlock
);
14211 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14215 dtrace_lazy_dofs_proc_iterate_filter(proc_t
*p
, void* ignored
)
14217 #pragma unused(ignored)
14219 * Okay to NULL test without taking the sprlock.
14221 return p
->p_dtrace_lazy_dofs
!= NULL
;
14225 dtrace_lazy_dofs_proc_iterate_doit(proc_t
*p
, void* ignored
)
14227 #pragma unused(ignored)
14229 * It is possible this process may exit during our attempt to
14230 * fault in the dof. We could fix this by holding locks longer,
14231 * but the errors are benign.
14233 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14236 * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF
14238 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14239 ASSERT(dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
);
14242 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14243 p
->p_dtrace_lazy_dofs
= NULL
;
14245 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14248 * Process each dof_helper_t
14250 if (lazy_dofs
!= NULL
) {
14254 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) {
14256 * When loading lazy dof, we depend on the generations being sorted in ascending order.
14258 ASSERT(i
>= (lazy_dofs
->dofiod_count
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14260 dof_helper_t
*dhp
= &lazy_dofs
->dofiod_helpers
[i
];
14263 * We stored the generation in dofhp_dof. Save it, and restore the original value.
14265 int generation
= dhp
->dofhp_dof
;
14266 dhp
->dofhp_dof
= dhp
->dofhp_addr
;
14268 dof_hdr_t
*dof
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
);
14271 dtrace_helpers_t
*help
;
14273 lck_mtx_lock(&dtrace_lock
);
14276 * This must be done with the dtrace_lock held
14278 if ((help
= p
->p_dtrace_helpers
) == NULL
)
14279 help
= dtrace_helpers_create(p
);
14282 * If the generation value has been bumped, someone snuck in
14283 * when we released the dtrace lock. We have to dump this generation,
14284 * there is no safe way to load it.
14286 if (help
->dthps_generation
<= generation
) {
14287 help
->dthps_generation
= generation
;
14290 * dtrace_helper_slurp() takes responsibility for the dof --
14291 * it may free it now or it may save it and free it later.
14293 if ((rval
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) {
14294 dtrace_dof_error(NULL
, "returned value did not match expected generation");
14298 lck_mtx_unlock(&dtrace_lock
);
14302 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14305 return PROC_RETURNED
;
14308 #endif /* __APPLE__ */
14310 static dtrace_helpers_t
*
14311 dtrace_helpers_create(proc_t
*p
)
14313 dtrace_helpers_t
*help
;
14315 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14316 ASSERT(p
->p_dtrace_helpers
== NULL
);
14318 help
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
);
14319 help
->dthps_actions
= kmem_zalloc(sizeof (dtrace_helper_action_t
*) *
14320 DTRACE_NHELPER_ACTIONS
, KM_SLEEP
);
14322 p
->p_dtrace_helpers
= help
;
14328 #if !defined(__APPLE__)
14330 dtrace_helpers_destroy(void)
14332 proc_t
*p
= curproc
;
14335 dtrace_helpers_destroy(proc_t
* p
)
14338 dtrace_helpers_t
*help
;
14339 dtrace_vstate_t
*vstate
;
14342 lck_mtx_lock(&dtrace_lock
);
14344 ASSERT(p
->p_dtrace_helpers
!= NULL
);
14345 ASSERT(dtrace_helpers
> 0);
14347 help
= p
->p_dtrace_helpers
;
14348 vstate
= &help
->dthps_vstate
;
14351 * We're now going to lose the help from this process.
14353 p
->p_dtrace_helpers
= NULL
;
14357 * Destory the helper actions.
14359 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14360 dtrace_helper_action_t
*h
, *next
;
14362 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
14363 next
= h
->dtha_next
;
14364 dtrace_helper_action_destroy(h
, vstate
);
14369 lck_mtx_unlock(&dtrace_lock
);
14372 * Destroy the helper providers.
14374 if (help
->dthps_maxprovs
> 0) {
14375 lck_mtx_lock(&dtrace_meta_lock
);
14376 if (dtrace_meta_pid
!= NULL
) {
14377 ASSERT(dtrace_deferred_pid
== NULL
);
14379 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14380 dtrace_helper_provider_remove(
14381 &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
);
14384 lck_mtx_lock(&dtrace_lock
);
14385 ASSERT(help
->dthps_deferred
== 0 ||
14386 help
->dthps_next
!= NULL
||
14387 help
->dthps_prev
!= NULL
||
14388 help
== dtrace_deferred_pid
);
14391 * Remove the helper from the deferred list.
14393 if (help
->dthps_next
!= NULL
)
14394 help
->dthps_next
->dthps_prev
= help
->dthps_prev
;
14395 if (help
->dthps_prev
!= NULL
)
14396 help
->dthps_prev
->dthps_next
= help
->dthps_next
;
14397 if (dtrace_deferred_pid
== help
) {
14398 dtrace_deferred_pid
= help
->dthps_next
;
14399 ASSERT(help
->dthps_prev
== NULL
);
14402 lck_mtx_unlock(&dtrace_lock
);
14405 lck_mtx_unlock(&dtrace_meta_lock
);
14407 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14408 dtrace_helper_provider_destroy(help
->dthps_provs
[i
]);
14411 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs
*
14412 sizeof (dtrace_helper_provider_t
*));
14415 lck_mtx_lock(&dtrace_lock
);
14417 dtrace_vstate_fini(&help
->dthps_vstate
);
14418 kmem_free(help
->dthps_actions
,
14419 sizeof (dtrace_helper_action_t
*) * DTRACE_NHELPER_ACTIONS
);
14420 kmem_free(help
, sizeof (dtrace_helpers_t
));
14423 lck_mtx_unlock(&dtrace_lock
);
14427 dtrace_helpers_duplicate(proc_t
*from
, proc_t
*to
)
14429 dtrace_helpers_t
*help
, *newhelp
;
14430 dtrace_helper_action_t
*helper
, *new, *last
;
14432 dtrace_vstate_t
*vstate
;
14433 int i
, j
, sz
, hasprovs
= 0;
14435 lck_mtx_lock(&dtrace_lock
);
14436 ASSERT(from
->p_dtrace_helpers
!= NULL
);
14437 ASSERT(dtrace_helpers
> 0);
14439 help
= from
->p_dtrace_helpers
;
14440 newhelp
= dtrace_helpers_create(to
);
14441 ASSERT(to
->p_dtrace_helpers
!= NULL
);
14443 newhelp
->dthps_generation
= help
->dthps_generation
;
14444 vstate
= &newhelp
->dthps_vstate
;
14447 * Duplicate the helper actions.
14449 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14450 if ((helper
= help
->dthps_actions
[i
]) == NULL
)
14453 for (last
= NULL
; helper
!= NULL
; helper
= helper
->dtha_next
) {
14454 new = kmem_zalloc(sizeof (dtrace_helper_action_t
),
14456 new->dtha_generation
= helper
->dtha_generation
;
14458 if ((dp
= helper
->dtha_predicate
) != NULL
) {
14459 dp
= dtrace_difo_duplicate(dp
, vstate
);
14460 new->dtha_predicate
= dp
;
14463 new->dtha_nactions
= helper
->dtha_nactions
;
14464 sz
= sizeof (dtrace_difo_t
*) * new->dtha_nactions
;
14465 new->dtha_actions
= kmem_alloc(sz
, KM_SLEEP
);
14467 for (j
= 0; j
< new->dtha_nactions
; j
++) {
14468 dtrace_difo_t
*dp
= helper
->dtha_actions
[j
];
14470 ASSERT(dp
!= NULL
);
14471 dp
= dtrace_difo_duplicate(dp
, vstate
);
14472 new->dtha_actions
[j
] = dp
;
14475 if (last
!= NULL
) {
14476 last
->dtha_next
= new;
14478 newhelp
->dthps_actions
[i
] = new;
14486 * Duplicate the helper providers and register them with the
14487 * DTrace framework.
14489 if (help
->dthps_nprovs
> 0) {
14490 newhelp
->dthps_nprovs
= help
->dthps_nprovs
;
14491 newhelp
->dthps_maxprovs
= help
->dthps_nprovs
;
14492 newhelp
->dthps_provs
= kmem_alloc(newhelp
->dthps_nprovs
*
14493 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
14494 for (i
= 0; i
< newhelp
->dthps_nprovs
; i
++) {
14495 newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
];
14496 newhelp
->dthps_provs
[i
]->dthp_ref
++;
14502 lck_mtx_unlock(&dtrace_lock
);
14505 dtrace_helper_provider_register(to
, newhelp
, NULL
);
14509 * DTrace Hook Functions
14512 dtrace_module_loaded(struct modctl
*ctl
)
14514 dtrace_provider_t
*prv
;
14516 lck_mtx_lock(&dtrace_provider_lock
);
14517 lck_mtx_lock(&mod_lock
);
14519 // ASSERT(ctl->mod_busy);
14522 * We're going to call each providers per-module provide operation
14523 * specifying only this module.
14525 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
14526 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
14528 lck_mtx_unlock(&mod_lock
);
14529 lck_mtx_unlock(&dtrace_provider_lock
);
14532 * If we have any retained enablings, we need to match against them.
14533 * Enabling probes requires that cpu_lock be held, and we cannot hold
14534 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14535 * module. (In particular, this happens when loading scheduling
14536 * classes.) So if we have any retained enablings, we need to dispatch
14537 * our task queue to do the match for us.
14539 lck_mtx_lock(&dtrace_lock
);
14541 if (dtrace_retained
== NULL
) {
14542 lck_mtx_unlock(&dtrace_lock
);
14546 (void) taskq_dispatch(dtrace_taskq
,
14547 (task_func_t
*)dtrace_enabling_matchall
, NULL
, TQ_SLEEP
);
14549 lck_mtx_unlock(&dtrace_lock
);
14552 * And now, for a little heuristic sleaze: in general, we want to
14553 * match modules as soon as they load. However, we cannot guarantee
14554 * this, because it would lead us to the lock ordering violation
14555 * outlined above. The common case, of course, is that cpu_lock is
14556 * _not_ held -- so we delay here for a clock tick, hoping that that's
14557 * long enough for the task queue to do its work. If it's not, it's
14558 * not a serious problem -- it just means that the module that we
14559 * just loaded may not be immediately instrumentable.
14565 dtrace_module_unloaded(struct modctl
*ctl
)
14567 dtrace_probe_t
template, *probe
, *first
, *next
;
14568 dtrace_provider_t
*prov
;
14570 template.dtpr_mod
= ctl
->mod_modname
;
14572 lck_mtx_lock(&dtrace_provider_lock
);
14573 lck_mtx_lock(&mod_lock
);
14574 lck_mtx_lock(&dtrace_lock
);
14576 if (dtrace_bymod
== NULL
) {
14578 * The DTrace module is loaded (obviously) but not attached;
14579 * we don't have any work to do.
14581 lck_mtx_unlock(&dtrace_provider_lock
);
14582 lck_mtx_unlock(&mod_lock
);
14583 lck_mtx_unlock(&dtrace_lock
);
14587 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
14588 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
14589 if (probe
->dtpr_ecb
!= NULL
) {
14590 lck_mtx_unlock(&dtrace_provider_lock
);
14591 lck_mtx_unlock(&mod_lock
);
14592 lck_mtx_unlock(&dtrace_lock
);
14595 * This shouldn't _actually_ be possible -- we're
14596 * unloading a module that has an enabled probe in it.
14597 * (It's normally up to the provider to make sure that
14598 * this can't happen.) However, because dtps_enable()
14599 * doesn't have a failure mode, there can be an
14600 * enable/unload race. Upshot: we don't want to
14601 * assert, but we're not going to disable the
14604 if (dtrace_err_verbose
) {
14605 cmn_err(CE_WARN
, "unloaded module '%s' had "
14606 "enabled probes", ctl
->mod_modname
);
14615 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
14616 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
14618 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
14620 next
= probe
->dtpr_nextmod
;
14621 dtrace_hash_remove(dtrace_bymod
, probe
);
14622 dtrace_hash_remove(dtrace_byfunc
, probe
);
14623 dtrace_hash_remove(dtrace_byname
, probe
);
14625 if (first
== NULL
) {
14627 probe
->dtpr_nextmod
= NULL
;
14629 probe
->dtpr_nextmod
= first
;
14635 * We've removed all of the module's probes from the hash chains and
14636 * from the probe array. Now issue a dtrace_sync() to be sure that
14637 * everyone has cleared out from any probe array processing.
14641 for (probe
= first
; probe
!= NULL
; probe
= first
) {
14642 first
= probe
->dtpr_nextmod
;
14643 prov
= probe
->dtpr_provider
;
14644 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
14646 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
14647 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
14648 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
14649 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
14650 #if !defined(__APPLE__)
14651 kmem_free(probe
, sizeof (dtrace_probe_t
));
14653 zfree(dtrace_probe_t_zone
, probe
);
14657 lck_mtx_unlock(&dtrace_lock
);
14658 lck_mtx_unlock(&mod_lock
);
14659 lck_mtx_unlock(&dtrace_provider_lock
);
14663 dtrace_suspend(void)
14665 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
));
14669 dtrace_resume(void)
14671 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
));
14675 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
)
14677 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14678 lck_mtx_lock(&dtrace_lock
);
14682 dtrace_state_t
*state
;
14683 dtrace_optval_t
*opt
, rs
, c
;
14686 * For now, we only allocate a new buffer for anonymous state.
14688 if ((state
= dtrace_anon
.dta_state
) == NULL
)
14691 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
14694 opt
= state
->dts_options
;
14695 c
= opt
[DTRACEOPT_CPU
];
14697 if (c
!= DTRACE_CPUALL
&& c
!= DTRACEOPT_UNSET
&& c
!= cpu
)
14701 * Regardless of what the actual policy is, we're going to
14702 * temporarily set our resize policy to be manual. We're
14703 * also going to temporarily set our CPU option to denote
14704 * the newly configured CPU.
14706 rs
= opt
[DTRACEOPT_BUFRESIZE
];
14707 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
;
14708 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
;
14710 (void) dtrace_state_buffers(state
);
14712 opt
[DTRACEOPT_BUFRESIZE
] = rs
;
14713 opt
[DTRACEOPT_CPU
] = c
;
14720 * We don't free the buffer in the CPU_UNCONFIG case. (The
14721 * buffer will be freed when the consumer exits.)
14729 lck_mtx_unlock(&dtrace_lock
);
14734 dtrace_cpu_setup_initial(processorid_t cpu
)
14736 (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
);
14740 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
)
14742 if (dtrace_toxranges
>= dtrace_toxranges_max
) {
14744 dtrace_toxrange_t
*range
;
14746 osize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14749 ASSERT(dtrace_toxrange
== NULL
);
14750 ASSERT(dtrace_toxranges_max
== 0);
14751 dtrace_toxranges_max
= 1;
14753 dtrace_toxranges_max
<<= 1;
14756 nsize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14757 range
= kmem_zalloc(nsize
, KM_SLEEP
);
14759 if (dtrace_toxrange
!= NULL
) {
14760 ASSERT(osize
!= 0);
14761 bcopy(dtrace_toxrange
, range
, osize
);
14762 kmem_free(dtrace_toxrange
, osize
);
14765 dtrace_toxrange
= range
;
14768 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base
== NULL
);
14769 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit
== NULL
);
14771 dtrace_toxrange
[dtrace_toxranges
].dtt_base
= base
;
14772 dtrace_toxrange
[dtrace_toxranges
].dtt_limit
= limit
;
14773 dtrace_toxranges
++;
14777 * DTrace Driver Cookbook Functions
14781 dtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
14783 dtrace_provider_id_t id
;
14784 dtrace_state_t
*state
= NULL
;
14785 dtrace_enabling_t
*enab
;
14787 lck_mtx_lock(&cpu_lock
);
14788 lck_mtx_lock(&dtrace_provider_lock
);
14789 lck_mtx_lock(&dtrace_lock
);
14791 if (ddi_soft_state_init(&dtrace_softstate
,
14792 sizeof (dtrace_state_t
), 0) != 0) {
14793 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state");
14794 lck_mtx_unlock(&cpu_lock
);
14795 lck_mtx_unlock(&dtrace_provider_lock
);
14796 lck_mtx_unlock(&dtrace_lock
);
14797 return (DDI_FAILURE
);
14800 #if !defined(__APPLE__)
14801 if (ddi_create_minor_node(devi
, DTRACEMNR_DTRACE
, S_IFCHR
,
14802 DTRACEMNRN_DTRACE
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
14803 ddi_create_minor_node(devi
, DTRACEMNR_HELPER
, S_IFCHR
,
14804 DTRACEMNRN_HELPER
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) {
14805 cmn_err(CE_NOTE
, "/dev/dtrace couldn't create minor nodes");
14806 ddi_remove_minor_node(devi
, NULL
);
14807 ddi_soft_state_fini(&dtrace_softstate
);
14808 lck_mtx_unlock(&cpu_lock
);
14809 lck_mtx_unlock(&dtrace_provider_lock
);
14810 lck_mtx_unlock(&dtrace_lock
);
14811 return (DDI_FAILURE
);
14813 #endif /* __APPLE__ */
14815 ddi_report_dev(devi
);
14816 dtrace_devi
= devi
;
14818 dtrace_modload
= dtrace_module_loaded
;
14819 dtrace_modunload
= dtrace_module_unloaded
;
14820 dtrace_cpu_init
= dtrace_cpu_setup_initial
;
14821 dtrace_helpers_cleanup
= dtrace_helpers_destroy
;
14822 dtrace_helpers_fork
= dtrace_helpers_duplicate
;
14823 dtrace_cpustart_init
= dtrace_suspend
;
14824 dtrace_cpustart_fini
= dtrace_resume
;
14825 dtrace_debugger_init
= dtrace_suspend
;
14826 dtrace_debugger_fini
= dtrace_resume
;
14827 dtrace_kreloc_init
= dtrace_suspend
;
14828 dtrace_kreloc_fini
= dtrace_resume
;
14830 register_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
14832 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14834 dtrace_arena
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1,
14835 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
14836 dtrace_minor
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
,
14837 UINT32_MAX
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
14838 VM_SLEEP
| VMC_IDENTIFIER
);
14839 dtrace_taskq
= taskq_create("dtrace_taskq", 1, maxclsyspri
,
14842 dtrace_state_cache
= kmem_cache_create("dtrace_state_cache",
14843 sizeof (dtrace_dstate_percpu_t
) * NCPU
, DTRACE_STATE_ALIGN
,
14844 NULL
, NULL
, NULL
, NULL
, NULL
, 0);
14846 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14848 dtrace_bymod
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
),
14849 offsetof(dtrace_probe_t
, dtpr_nextmod
),
14850 offsetof(dtrace_probe_t
, dtpr_prevmod
));
14852 dtrace_byfunc
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
),
14853 offsetof(dtrace_probe_t
, dtpr_nextfunc
),
14854 offsetof(dtrace_probe_t
, dtpr_prevfunc
));
14856 dtrace_byname
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
),
14857 offsetof(dtrace_probe_t
, dtpr_nextname
),
14858 offsetof(dtrace_probe_t
, dtpr_prevname
));
14860 if (dtrace_retain_max
< 1) {
14861 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; "
14862 "setting to 1", dtrace_retain_max
);
14863 dtrace_retain_max
= 1;
14867 * Now discover our toxic ranges.
14869 dtrace_toxic_ranges(dtrace_toxrange_add
);
14872 * Before we register ourselves as a provider to our own framework,
14873 * we would like to assert that dtrace_provider is NULL -- but that's
14874 * not true if we were loaded as a dependency of a DTrace provider.
14875 * Once we've registered, we can assert that dtrace_provider is our
14878 (void) dtrace_register("dtrace", &dtrace_provider_attr
,
14879 DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
);
14881 ASSERT(dtrace_provider
!= NULL
);
14882 ASSERT((dtrace_provider_id_t
)dtrace_provider
== id
);
14884 #if !defined(__APPLE__)
14885 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14886 dtrace_provider
, NULL
, NULL
, "BEGIN", 0, NULL
);
14887 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14888 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14889 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14890 dtrace_provider
, NULL
, NULL
, "ERROR", 1, NULL
);
14891 #elif defined(__ppc__) || defined(__ppc64__)
14892 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14893 dtrace_provider
, NULL
, NULL
, "BEGIN", 2, NULL
);
14894 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14895 dtrace_provider
, NULL
, NULL
, "END", 1, NULL
);
14896 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14897 dtrace_provider
, NULL
, NULL
, "ERROR", 4, NULL
);
14898 #elif (defined(__i386__) || defined (__x86_64__))
14899 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14900 dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
);
14901 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14902 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14903 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14904 dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
);
14906 #error Unknown Architecture
14907 #endif /* __APPLE__ */
14909 dtrace_anon_property();
14910 lck_mtx_unlock(&cpu_lock
);
14913 * If DTrace helper tracing is enabled, we need to allocate the
14914 * trace buffer and initialize the values.
14916 if (dtrace_helptrace_enabled
) {
14917 ASSERT(dtrace_helptrace_buffer
== NULL
);
14918 dtrace_helptrace_buffer
=
14919 kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
);
14920 dtrace_helptrace_next
= 0;
14924 * If there are already providers, we must ask them to provide their
14925 * probes, and then match any anonymous enabling against them. Note
14926 * that there should be no other retained enablings at this time:
14927 * the only retained enablings at this time should be the anonymous
14930 if (dtrace_anon
.dta_enabling
!= NULL
) {
14931 ASSERT(dtrace_retained
== dtrace_anon
.dta_enabling
);
14933 dtrace_enabling_provide(NULL
);
14934 state
= dtrace_anon
.dta_state
;
14937 * We couldn't hold cpu_lock across the above call to
14938 * dtrace_enabling_provide(), but we must hold it to actually
14939 * enable the probes. We have to drop all of our locks, pick
14940 * up cpu_lock, and regain our locks before matching the
14941 * retained anonymous enabling.
14943 lck_mtx_unlock(&dtrace_lock
);
14944 lck_mtx_unlock(&dtrace_provider_lock
);
14946 lck_mtx_lock(&cpu_lock
);
14947 lck_mtx_lock(&dtrace_provider_lock
);
14948 lck_mtx_lock(&dtrace_lock
);
14950 if ((enab
= dtrace_anon
.dta_enabling
) != NULL
)
14951 (void) dtrace_enabling_match(enab
, NULL
);
14953 lck_mtx_unlock(&cpu_lock
);
14956 lck_mtx_unlock(&dtrace_lock
);
14957 lck_mtx_unlock(&dtrace_provider_lock
);
14959 if (state
!= NULL
) {
14961 * If we created any anonymous state, set it going now.
14963 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
);
14966 return (DDI_SUCCESS
);
14969 extern void fasttrap_init(void);
14973 dtrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
14975 #pragma unused(flag, otyp)
14976 dtrace_state_t
*state
;
14981 #if !defined(__APPLE__)
14982 if (getminor(*devp
) == DTRACEMNRN_HELPER
)
14986 * If this wasn't an open with the "helper" minor, then it must be
14987 * the "dtrace" minor.
14989 ASSERT(getminor(*devp
) == DTRACEMNRN_DTRACE
);
14991 /* Darwin puts Helper on its own major device. */
14992 #endif /* __APPLE__ */
14995 * If no DTRACE_PRIV_* bits are set in the credential, then the
14996 * caller lacks sufficient permission to do anything with DTrace.
14998 dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
);
14999 if (priv
== DTRACE_PRIV_NONE
)
15002 #if defined(__APPLE__)
15004 * We delay the initialization of fasttrap as late as possible.
15005 * It certainly can't be later than now!
15008 #endif /* __APPLE__ */
15011 * Ask all providers to provide all their probes.
15013 lck_mtx_lock(&dtrace_provider_lock
);
15014 dtrace_probe_provide(NULL
, NULL
);
15015 lck_mtx_unlock(&dtrace_provider_lock
);
15017 lck_mtx_lock(&cpu_lock
);
15018 lck_mtx_lock(&dtrace_lock
);
15020 dtrace_membar_producer();
15023 * If the kernel debugger is active (that is, if the kernel debugger
15024 * modified text in some way), we won't allow the open.
15026 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
15028 lck_mtx_unlock(&cpu_lock
);
15029 lck_mtx_unlock(&dtrace_lock
);
15033 state
= dtrace_state_create(devp
, cred_p
);
15034 lck_mtx_unlock(&cpu_lock
);
15036 if (state
== NULL
) {
15037 if (--dtrace_opens
== 0)
15038 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15039 lck_mtx_unlock(&dtrace_lock
);
15043 lck_mtx_unlock(&dtrace_lock
);
15045 #if defined(__APPLE__)
15046 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15049 * If we are currently lazy, transition states.
15051 * Unlike dtrace_close, we do not need to check the
15052 * value of dtrace_opens, as any positive value (and
15053 * we count as 1) means we transition states.
15055 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
) {
15056 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_OFF
;
15059 * Iterate all existing processes and load lazy dofs.
15061 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
,
15062 dtrace_lazy_dofs_proc_iterate_doit
,
15064 dtrace_lazy_dofs_proc_iterate_filter
,
15068 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15076 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
15078 minor_t minor
= getminor(dev
);
15079 dtrace_state_t
*state
;
15081 #if !defined(__APPLE__)
15082 if (minor
== DTRACEMNRN_HELPER
)
15085 /* Darwin puts Helper on its own major device. */
15086 #endif /* __APPLE__ */
15088 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15090 lck_mtx_lock(&cpu_lock
);
15091 lck_mtx_lock(&dtrace_lock
);
15093 if (state
->dts_anon
) {
15095 * There is anonymous state. Destroy that first.
15097 ASSERT(dtrace_anon
.dta_state
== NULL
);
15098 dtrace_state_destroy(state
->dts_anon
);
15101 dtrace_state_destroy(state
);
15102 ASSERT(dtrace_opens
> 0);
15103 if (--dtrace_opens
== 0)
15104 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15106 lck_mtx_unlock(&dtrace_lock
);
15107 lck_mtx_unlock(&cpu_lock
);
15109 #if defined(__APPLE__)
15112 * Lock ordering requires the dof mode lock be taken before
15115 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15116 lck_mtx_lock(&dtrace_lock
);
15119 * If we are currently lazy-off, and this is the last close, transition to
15122 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
&& dtrace_opens
== 0) {
15123 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
15126 lck_mtx_unlock(&dtrace_lock
);
15127 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15133 #if defined(__APPLE__)
15135 * Introduce cast to quiet warnings.
15136 * XXX: This hides a lot of brokenness.
15138 #define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) )
15139 #define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) )
15140 #endif /* __APPLE__ */
15142 #if defined(__APPLE__)
15145 dtrace_ioctl_helper(int cmd
, caddr_t arg
, int *rv
)
15149 * Safe to check this outside the dof mode lock
15151 if (dtrace_dof_mode
== DTRACE_DOF_MODE_NEVER
)
15152 return KERN_SUCCESS
;
15155 case DTRACEHIOC_ADDDOF
: {
15156 dof_helper_t
*dhp
= NULL
;
15157 size_t dof_ioctl_data_size
;
15158 dof_ioctl_data_t
* multi_dof
;
15161 user_addr_t user_address
= *(user_addr_t
*)arg
;
15162 uint64_t dof_count
;
15163 int multi_dof_claimed
= 0;
15164 proc_t
* p
= current_proc();
15167 * Read the number of DOF sections being passed in.
15169 if (copyin(user_address
+ offsetof(dof_ioctl_data_t
, dofiod_count
),
15171 sizeof(dof_count
))) {
15172 dtrace_dof_error(NULL
, "failed to copyin dofiod_count");
15177 * Range check the count.
15179 if (dof_count
== 0 || dof_count
> 1024) {
15180 dtrace_dof_error(NULL
, "dofiod_count is not valid");
15185 * Allocate a correctly sized structure and copyin the data.
15187 dof_ioctl_data_size
= DOF_IOCTL_DATA_T_SIZE(dof_count
);
15188 if ((multi_dof
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)
15191 /* NOTE! We can no longer exit this method via return */
15192 if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) {
15193 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t");
15199 * Check that the count didn't change between the first copyin and the second.
15201 if (multi_dof
->dofiod_count
!= dof_count
) {
15207 * Try to process lazily first.
15209 rval
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
);
15212 * If rval is EACCES, we must be non-lazy.
15214 if (rval
== EACCES
) {
15217 * Process each dof_helper_t
15221 dhp
= &multi_dof
->dofiod_helpers
[i
];
15223 dof_hdr_t
*dof
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
);
15226 lck_mtx_lock(&dtrace_lock
);
15229 * dtrace_helper_slurp() takes responsibility for the dof --
15230 * it may free it now or it may save it and free it later.
15232 if ((dhp
->dofhp_dof
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) {
15236 lck_mtx_unlock(&dtrace_lock
);
15238 } while (++i
< multi_dof
->dofiod_count
&& rval
== 0);
15242 * We need to copyout the multi_dof struct, because it contains
15243 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
15245 * This could certainly be better optimized.
15247 if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) {
15248 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t");
15249 /* Don't overwrite pre-existing error code */
15250 if (rval
== 0) rval
= EFAULT
;
15255 * If we had to allocate struct memory, free it.
15257 if (multi_dof
!= NULL
&& !multi_dof_claimed
) {
15258 kmem_free(multi_dof
, dof_ioctl_data_size
);
15264 case DTRACEHIOC_REMOVE
: {
15265 int generation
= *(int*)arg
;
15266 proc_t
* p
= current_proc();
15271 int rval
= dtrace_lazy_dofs_remove(p
, generation
);
15274 * EACCES means non-lazy
15276 if (rval
== EACCES
) {
15277 lck_mtx_lock(&dtrace_lock
);
15278 rval
= dtrace_helper_destroygen(p
, generation
);
15279 lck_mtx_unlock(&dtrace_lock
);
15291 #endif /* __APPLE__ */
15295 dtrace_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
15297 minor_t minor
= getminor(dev
);
15298 dtrace_state_t
*state
;
15301 #if !defined(__APPLE__)
15302 if (minor
== DTRACEMNRN_HELPER
)
15303 return (dtrace_ioctl_helper(cmd
, arg
, rv
));
15305 /* Darwin puts Helper on its own major device. */
15306 #endif /* __APPLE__ */
15308 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15310 if (state
->dts_anon
) {
15311 ASSERT(dtrace_anon
.dta_state
== NULL
);
15312 state
= state
->dts_anon
;
15316 case DTRACEIOC_PROVIDER
: {
15317 dtrace_providerdesc_t pvd
;
15318 dtrace_provider_t
*pvp
;
15320 if (copyin((void *)arg
, &pvd
, sizeof (pvd
)) != 0)
15323 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
15324 lck_mtx_lock(&dtrace_provider_lock
);
15326 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
15327 if (strcmp(pvp
->dtpv_name
, pvd
.dtvd_name
) == 0)
15331 lck_mtx_unlock(&dtrace_provider_lock
);
15336 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
15337 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
15338 if (copyout(&pvd
, (void *)arg
, sizeof (pvd
)) != 0)
15344 case DTRACEIOC_EPROBE
: {
15345 dtrace_eprobedesc_t epdesc
;
15347 dtrace_action_t
*act
;
15353 if (copyin((void *)arg
, &epdesc
, sizeof (epdesc
)) != 0)
15356 lck_mtx_lock(&dtrace_lock
);
15358 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
15359 lck_mtx_unlock(&dtrace_lock
);
15363 if (ecb
->dte_probe
== NULL
) {
15364 lck_mtx_unlock(&dtrace_lock
);
15368 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
15369 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
15370 epdesc
.dtepd_size
= ecb
->dte_size
;
15372 nrecs
= epdesc
.dtepd_nrecs
;
15373 epdesc
.dtepd_nrecs
= 0;
15374 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15375 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15378 epdesc
.dtepd_nrecs
++;
15382 * Now that we have the size, we need to allocate a temporary
15383 * buffer in which to store the complete description. We need
15384 * the temporary buffer to be able to drop dtrace_lock()
15385 * across the copyout(), below.
15387 size
= sizeof (dtrace_eprobedesc_t
) +
15388 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
15390 buf
= kmem_alloc(size
, KM_SLEEP
);
15391 dest
= (uintptr_t)buf
;
15393 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
15394 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
15396 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15397 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15403 bcopy(&act
->dta_rec
, (void *)dest
,
15404 sizeof (dtrace_recdesc_t
));
15405 dest
+= sizeof (dtrace_recdesc_t
);
15408 lck_mtx_unlock(&dtrace_lock
);
15410 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15411 kmem_free(buf
, size
);
15415 kmem_free(buf
, size
);
15419 case DTRACEIOC_AGGDESC
: {
15420 dtrace_aggdesc_t aggdesc
;
15421 dtrace_action_t
*act
;
15422 dtrace_aggregation_t
*agg
;
15425 dtrace_recdesc_t
*lrec
;
15430 if (copyin((void *)arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
15433 lck_mtx_lock(&dtrace_lock
);
15435 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
15436 lck_mtx_unlock(&dtrace_lock
);
15440 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
15442 nrecs
= aggdesc
.dtagd_nrecs
;
15443 aggdesc
.dtagd_nrecs
= 0;
15445 offs
= agg
->dtag_base
;
15446 lrec
= &agg
->dtag_action
.dta_rec
;
15447 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
15449 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15450 ASSERT(act
->dta_intuple
||
15451 DTRACEACT_ISAGG(act
->dta_kind
));
15454 * If this action has a record size of zero, it
15455 * denotes an argument to the aggregating action.
15456 * Because the presence of this record doesn't (or
15457 * shouldn't) affect the way the data is interpreted,
15458 * we don't copy it out to save user-level the
15459 * confusion of dealing with a zero-length record.
15461 if (act
->dta_rec
.dtrd_size
== 0) {
15462 ASSERT(agg
->dtag_hasarg
);
15466 aggdesc
.dtagd_nrecs
++;
15468 if (act
== &agg
->dtag_action
)
15473 * Now that we have the size, we need to allocate a temporary
15474 * buffer in which to store the complete description. We need
15475 * the temporary buffer to be able to drop dtrace_lock()
15476 * across the copyout(), below.
15478 size
= sizeof (dtrace_aggdesc_t
) +
15479 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
15481 buf
= kmem_alloc(size
, KM_SLEEP
);
15482 dest
= (uintptr_t)buf
;
15484 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
15485 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
15487 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15488 dtrace_recdesc_t rec
= act
->dta_rec
;
15491 * See the comment in the above loop for why we pass
15492 * over zero-length records.
15494 if (rec
.dtrd_size
== 0) {
15495 ASSERT(agg
->dtag_hasarg
);
15502 rec
.dtrd_offset
-= offs
;
15503 bcopy(&rec
, (void *)dest
, sizeof (rec
));
15504 dest
+= sizeof (dtrace_recdesc_t
);
15506 if (act
== &agg
->dtag_action
)
15510 lck_mtx_unlock(&dtrace_lock
);
15512 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15513 kmem_free(buf
, size
);
15517 kmem_free(buf
, size
);
15521 case DTRACEIOC_ENABLE
: {
15523 dtrace_enabling_t
*enab
= NULL
;
15524 dtrace_vstate_t
*vstate
;
15530 * If a NULL argument has been passed, we take this as our
15531 * cue to reevaluate our enablings.
15534 lck_mtx_lock(&cpu_lock
);
15535 lck_mtx_lock(&dtrace_lock
);
15536 err
= dtrace_enabling_matchstate(state
, rv
);
15537 lck_mtx_unlock(&dtrace_lock
);
15538 lck_mtx_unlock(&cpu_lock
);
15543 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
15546 lck_mtx_lock(&cpu_lock
);
15547 lck_mtx_lock(&dtrace_lock
);
15548 vstate
= &state
->dts_vstate
;
15550 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
15551 lck_mtx_unlock(&dtrace_lock
);
15552 lck_mtx_unlock(&cpu_lock
);
15553 dtrace_dof_destroy(dof
);
15557 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
15558 lck_mtx_unlock(&dtrace_lock
);
15559 lck_mtx_unlock(&cpu_lock
);
15560 dtrace_dof_destroy(dof
);
15564 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
15565 dtrace_enabling_destroy(enab
);
15566 lck_mtx_unlock(&dtrace_lock
);
15567 lck_mtx_unlock(&cpu_lock
);
15568 dtrace_dof_destroy(dof
);
15572 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
15573 err
= dtrace_enabling_retain(enab
);
15575 dtrace_enabling_destroy(enab
);
15578 lck_mtx_unlock(&cpu_lock
);
15579 lck_mtx_unlock(&dtrace_lock
);
15580 dtrace_dof_destroy(dof
);
15585 case DTRACEIOC_REPLICATE
: {
15586 dtrace_repldesc_t desc
;
15587 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
15588 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
15591 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15594 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15595 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15596 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15597 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15599 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15600 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15601 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15602 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15604 lck_mtx_lock(&dtrace_lock
);
15605 err
= dtrace_enabling_replicate(state
, match
, create
);
15606 lck_mtx_unlock(&dtrace_lock
);
15611 case DTRACEIOC_PROBEMATCH
:
15612 case DTRACEIOC_PROBES
: {
15613 dtrace_probe_t
*probe
= NULL
;
15614 dtrace_probedesc_t desc
;
15615 dtrace_probekey_t pkey
;
15622 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15625 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15626 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15627 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15628 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15631 * Before we attempt to match this probe, we want to give
15632 * all providers the opportunity to provide it.
15634 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
15635 lck_mtx_lock(&dtrace_provider_lock
);
15636 dtrace_probe_provide(&desc
, NULL
);
15637 lck_mtx_unlock(&dtrace_provider_lock
);
15641 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15642 dtrace_probekey(&desc
, &pkey
);
15643 pkey
.dtpk_id
= DTRACE_IDNONE
;
15646 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
15648 lck_mtx_lock(&dtrace_lock
);
15650 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15651 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15652 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15653 (m
= dtrace_match_probe(probe
, &pkey
,
15654 priv
, uid
, zoneid
)) != 0)
15659 lck_mtx_unlock(&dtrace_lock
);
15664 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15665 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15666 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
15671 if (probe
== NULL
) {
15672 lck_mtx_unlock(&dtrace_lock
);
15676 dtrace_probe_description(probe
, &desc
);
15677 lck_mtx_unlock(&dtrace_lock
);
15679 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15685 case DTRACEIOC_PROBEARG
: {
15686 dtrace_argdesc_t desc
;
15687 dtrace_probe_t
*probe
;
15688 dtrace_provider_t
*prov
;
15690 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15693 if (desc
.dtargd_id
== DTRACE_IDNONE
)
15696 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
15699 lck_mtx_lock(&dtrace_provider_lock
);
15700 lck_mtx_lock(&mod_lock
);
15701 lck_mtx_lock(&dtrace_lock
);
15703 if (desc
.dtargd_id
> dtrace_nprobes
) {
15704 lck_mtx_unlock(&dtrace_lock
);
15705 lck_mtx_unlock(&mod_lock
);
15706 lck_mtx_unlock(&dtrace_provider_lock
);
15710 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
15711 lck_mtx_unlock(&dtrace_lock
);
15712 lck_mtx_unlock(&mod_lock
);
15713 lck_mtx_unlock(&dtrace_provider_lock
);
15717 lck_mtx_unlock(&dtrace_lock
);
15719 prov
= probe
->dtpr_provider
;
15721 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
15723 * There isn't any typed information for this probe.
15724 * Set the argument number to DTRACE_ARGNONE.
15726 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
15728 desc
.dtargd_native
[0] = '\0';
15729 desc
.dtargd_xlate
[0] = '\0';
15730 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
15732 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
15733 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
15736 lck_mtx_unlock(&mod_lock
);
15737 lck_mtx_unlock(&dtrace_provider_lock
);
15739 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15745 case DTRACEIOC_GO
: {
15746 processorid_t cpuid
;
15747 rval
= dtrace_state_go(state
, &cpuid
);
15752 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15758 case DTRACEIOC_STOP
: {
15759 processorid_t cpuid
;
15761 lck_mtx_lock(&dtrace_lock
);
15762 rval
= dtrace_state_stop(state
, &cpuid
);
15763 lck_mtx_unlock(&dtrace_lock
);
15768 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15774 case DTRACEIOC_DOFGET
: {
15775 dof_hdr_t hdr
, *dof
;
15778 if (copyin((void *)arg
, &hdr
, sizeof (hdr
)) != 0)
15781 lck_mtx_lock(&dtrace_lock
);
15782 dof
= dtrace_dof_create(state
);
15783 lck_mtx_unlock(&dtrace_lock
);
15785 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
15786 rval
= copyout(dof
, (void *)arg
, len
);
15787 dtrace_dof_destroy(dof
);
15789 return (rval
== 0 ? 0 : EFAULT
);
15792 case DTRACEIOC_AGGSNAP
:
15793 case DTRACEIOC_BUFSNAP
: {
15794 dtrace_bufdesc_t desc
;
15796 dtrace_buffer_t
*buf
;
15798 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15801 if (desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= NCPU
)
15804 lck_mtx_lock(&dtrace_lock
);
15806 if (cmd
== DTRACEIOC_BUFSNAP
) {
15807 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
15809 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
15812 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
15813 size_t sz
= buf
->dtb_offset
;
15815 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
15816 lck_mtx_unlock(&dtrace_lock
);
15821 * If this buffer has already been consumed, we're
15822 * going to indicate that there's nothing left here
15825 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
15826 lck_mtx_unlock(&dtrace_lock
);
15828 desc
.dtbd_size
= 0;
15829 desc
.dtbd_drops
= 0;
15830 desc
.dtbd_errors
= 0;
15831 desc
.dtbd_oldest
= 0;
15832 sz
= sizeof (desc
);
15834 if (copyout(&desc
, (void *)arg
, sz
) != 0)
15841 * If this is a ring buffer that has wrapped, we want
15842 * to copy the whole thing out.
15844 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
15845 dtrace_buffer_polish(buf
);
15846 sz
= buf
->dtb_size
;
15849 if (copyout(buf
->dtb_tomax
, desc
.dtbd_data
, sz
) != 0) {
15850 lck_mtx_unlock(&dtrace_lock
);
15854 desc
.dtbd_size
= sz
;
15855 desc
.dtbd_drops
= buf
->dtb_drops
;
15856 desc
.dtbd_errors
= buf
->dtb_errors
;
15857 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
15859 lck_mtx_unlock(&dtrace_lock
);
15861 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15864 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
15869 if (buf
->dtb_tomax
== NULL
) {
15870 ASSERT(buf
->dtb_xamot
== NULL
);
15871 lck_mtx_unlock(&dtrace_lock
);
15875 cached
= buf
->dtb_tomax
;
15876 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
15878 dtrace_xcall(desc
.dtbd_cpu
,
15879 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
15881 state
->dts_errors
+= buf
->dtb_xamot_errors
;
15884 * If the buffers did not actually switch, then the cross call
15885 * did not take place -- presumably because the given CPU is
15886 * not in the ready set. If this is the case, we'll return
15889 if (buf
->dtb_tomax
== cached
) {
15890 ASSERT(buf
->dtb_xamot
!= cached
);
15891 lck_mtx_unlock(&dtrace_lock
);
15895 ASSERT(cached
== buf
->dtb_xamot
);
15898 * We have our snapshot; now copy it out.
15900 if (copyout(buf
->dtb_xamot
, desc
.dtbd_data
,
15901 buf
->dtb_xamot_offset
) != 0) {
15902 lck_mtx_unlock(&dtrace_lock
);
15906 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
15907 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
15908 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
15909 desc
.dtbd_oldest
= 0;
15911 lck_mtx_unlock(&dtrace_lock
);
15914 * Finally, copy out the buffer description.
15916 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15922 case DTRACEIOC_CONF
: {
15923 dtrace_conf_t conf
;
15925 bzero(&conf
, sizeof (conf
));
15926 conf
.dtc_difversion
= DIF_VERSION
;
15927 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
15928 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
15929 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
15931 if (copyout(&conf
, (void *)arg
, sizeof (conf
)) != 0)
15937 case DTRACEIOC_STATUS
: {
15938 dtrace_status_t stat
;
15939 dtrace_dstate_t
*dstate
;
15944 * See the comment in dtrace_state_deadman() for the reason
15945 * for setting dts_laststatus to INT64_MAX before setting
15946 * it to the correct value.
15948 state
->dts_laststatus
= INT64_MAX
;
15949 dtrace_membar_producer();
15950 state
->dts_laststatus
= dtrace_gethrtime();
15952 bzero(&stat
, sizeof (stat
));
15954 lck_mtx_lock(&dtrace_lock
);
15956 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
15957 lck_mtx_unlock(&dtrace_lock
);
15961 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
15962 stat
.dtst_exiting
= 1;
15964 nerrs
= state
->dts_errors
;
15965 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
15967 for (i
= 0; i
< NCPU
; i
++) {
15968 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
15970 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
15971 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
15972 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
15974 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
15975 stat
.dtst_filled
++;
15977 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
15979 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
15980 dtrace_speculation_t
*spec
;
15981 dtrace_buffer_t
*buf
;
15983 spec
= &state
->dts_speculations
[j
];
15984 buf
= &spec
->dtsp_buffer
[i
];
15985 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
15989 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
15990 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
15991 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
15992 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
15994 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
15995 stat
.dtst_errors
= nerrs
;
15997 lck_mtx_unlock(&dtrace_lock
);
15999 if (copyout(&stat
, (void *)arg
, sizeof (stat
)) != 0)
16005 case DTRACEIOC_FORMAT
: {
16006 dtrace_fmtdesc_t fmt
;
16010 if (copyin((void *)arg
, &fmt
, sizeof (fmt
)) != 0)
16013 lck_mtx_lock(&dtrace_lock
);
16015 if (fmt
.dtfd_format
== 0 ||
16016 fmt
.dtfd_format
> state
->dts_nformats
) {
16017 lck_mtx_unlock(&dtrace_lock
);
16022 * Format strings are allocated contiguously and they are
16023 * never freed; if a format index is less than the number
16024 * of formats, we can assert that the format map is non-NULL
16025 * and that the format for the specified index is non-NULL.
16027 ASSERT(state
->dts_formats
!= NULL
);
16028 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
16029 ASSERT(str
!= NULL
);
16031 len
= strlen(str
) + 1;
16033 if (len
> fmt
.dtfd_length
) {
16034 fmt
.dtfd_length
= len
;
16036 if (copyout(&fmt
, (void *)arg
, sizeof (fmt
)) != 0) {
16037 lck_mtx_unlock(&dtrace_lock
);
16041 if (copyout(str
, fmt
.dtfd_string
, len
) != 0) {
16042 lck_mtx_unlock(&dtrace_lock
);
16047 lck_mtx_unlock(&dtrace_lock
);
16058 #if defined(__APPLE__)
16061 #endif /* __APPLE__ */
16063 #if !defined(__APPLE__)
16066 dtrace_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
16068 dtrace_state_t
*state
;
16075 return (DDI_SUCCESS
);
16078 return (DDI_FAILURE
);
16081 lck_mtx_lock(&cpu_lock
);
16082 lck_mtx_lock(&dtrace_provider_lock
);
16083 lck_mtx_lock(&dtrace_lock
);
16085 ASSERT(dtrace_opens
== 0);
16087 if (dtrace_helpers
> 0) {
16088 lck_mtx_unlock(&dtrace_provider_lock
);
16089 lck_mtx_unlock(&dtrace_lock
);
16090 lck_mtx_unlock(&cpu_lock
);
16091 return (DDI_FAILURE
);
16094 if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) {
16095 lck_mtx_unlock(&dtrace_provider_lock
);
16096 lck_mtx_unlock(&dtrace_lock
);
16097 lck_mtx_unlock(&cpu_lock
);
16098 return (DDI_FAILURE
);
16101 dtrace_provider
= NULL
;
16103 if ((state
= dtrace_anon_grab()) != NULL
) {
16105 * If there were ECBs on this state, the provider should
16106 * have not been allowed to detach; assert that there is
16109 ASSERT(state
->dts_necbs
== 0);
16110 dtrace_state_destroy(state
);
16113 * If we're being detached with anonymous state, we need to
16114 * indicate to the kernel debugger that DTrace is now inactive.
16116 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
16119 bzero(&dtrace_anon
, sizeof (dtrace_anon_t
));
16120 unregister_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
16121 dtrace_cpu_init
= NULL
;
16122 dtrace_helpers_cleanup
= NULL
;
16123 dtrace_helpers_fork
= NULL
;
16124 dtrace_cpustart_init
= NULL
;
16125 dtrace_cpustart_fini
= NULL
;
16126 dtrace_debugger_init
= NULL
;
16127 dtrace_debugger_fini
= NULL
;
16128 dtrace_kreloc_init
= NULL
;
16129 dtrace_kreloc_fini
= NULL
;
16130 dtrace_modload
= NULL
;
16131 dtrace_modunload
= NULL
;
16133 lck_mtx_unlock(&cpu_lock
);
16135 if (dtrace_helptrace_enabled
) {
16136 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
);
16137 dtrace_helptrace_buffer
= NULL
;
16140 kmem_free(dtrace_probes
, dtrace_nprobes
* sizeof (dtrace_probe_t
*));
16141 dtrace_probes
= NULL
;
16142 dtrace_nprobes
= 0;
16144 dtrace_hash_destroy(dtrace_bymod
);
16145 dtrace_hash_destroy(dtrace_byfunc
);
16146 dtrace_hash_destroy(dtrace_byname
);
16147 dtrace_bymod
= NULL
;
16148 dtrace_byfunc
= NULL
;
16149 dtrace_byname
= NULL
;
16151 kmem_cache_destroy(dtrace_state_cache
);
16152 vmem_destroy(dtrace_minor
);
16153 vmem_destroy(dtrace_arena
);
16155 if (dtrace_toxrange
!= NULL
) {
16156 kmem_free(dtrace_toxrange
,
16157 dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
));
16158 dtrace_toxrange
= NULL
;
16159 dtrace_toxranges
= 0;
16160 dtrace_toxranges_max
= 0;
16163 ddi_remove_minor_node(dtrace_devi
, NULL
);
16164 dtrace_devi
= NULL
;
16166 ddi_soft_state_fini(&dtrace_softstate
);
16168 ASSERT(dtrace_vtime_references
== 0);
16169 ASSERT(dtrace_opens
== 0);
16170 ASSERT(dtrace_retained
== NULL
);
16172 lck_mtx_unlock(&dtrace_lock
);
16173 lck_mtx_unlock(&dtrace_provider_lock
);
16176 * We don't destroy the task queue until after we have dropped our
16177 * locks (taskq_destroy() may block on running tasks). To prevent
16178 * attempting to do work after we have effectively detached but before
16179 * the task queue has been destroyed, all tasks dispatched via the
16180 * task queue must check that DTrace is still attached before
16181 * performing any operation.
16183 taskq_destroy(dtrace_taskq
);
16184 dtrace_taskq
= NULL
;
16186 return (DDI_SUCCESS
);
16191 dtrace_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
16196 case DDI_INFO_DEVT2DEVINFO
:
16197 *result
= (void *)dtrace_devi
;
16198 error
= DDI_SUCCESS
;
16200 case DDI_INFO_DEVT2INSTANCE
:
16201 *result
= (void *)0;
16202 error
= DDI_SUCCESS
;
16205 error
= DDI_FAILURE
;
16210 static struct cb_ops dtrace_cb_ops
= {
16211 dtrace_open
, /* open */
16212 dtrace_close
, /* close */
16213 nulldev
, /* strategy */
16214 nulldev
, /* print */
16218 dtrace_ioctl
, /* ioctl */
16219 nodev
, /* devmap */
16221 nodev
, /* segmap */
16222 nochpoll
, /* poll */
16223 ddi_prop_op
, /* cb_prop_op */
16225 D_NEW
| D_MP
/* Driver compatibility flag */
16228 static struct dev_ops dtrace_ops
= {
16229 DEVO_REV
, /* devo_rev */
16231 dtrace_info
, /* get_dev_info */
16232 nulldev
, /* identify */
16233 nulldev
, /* probe */
16234 dtrace_attach
, /* attach */
16235 dtrace_detach
, /* detach */
16237 &dtrace_cb_ops
, /* driver operations */
16238 NULL
, /* bus operations */
16239 nodev
/* dev power */
16242 static struct modldrv modldrv
= {
16243 &mod_driverops
, /* module type (this is a pseudo driver) */
16244 "Dynamic Tracing", /* name of module */
16245 &dtrace_ops
, /* driver ops */
16248 static struct modlinkage modlinkage
= {
16257 return (mod_install(&modlinkage
));
16261 _info(struct modinfo
*modinfop
)
16263 return (mod_info(&modlinkage
, modinfop
));
16269 return (mod_remove(&modlinkage
));
16273 d_open_t _dtrace_open
, helper_open
;
16274 d_close_t _dtrace_close
, helper_close
;
16275 d_ioctl_t _dtrace_ioctl
, helper_ioctl
;
16278 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16281 dev_t locdev
= dev
;
16283 return dtrace_open( &locdev
, flags
, devtype
, CRED());
16287 helper_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16289 #pragma unused(dev,flags,devtype,p)
16294 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16297 return dtrace_close( dev
, flags
, devtype
, CRED());
16301 helper_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16303 #pragma unused(dev,flags,devtype,p)
16308 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
16313 err
= dtrace_ioctl(dev
, (int)cmd
, *(intptr_t *)data
, fflag
, CRED(), &rv
);
16315 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
16317 ASSERT( (err
& 0xfffff000) == 0 );
16318 return (err
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */
16319 } else if (rv
!= 0) {
16320 ASSERT( (rv
& 0xfff00000) == 0 );
16321 return (((rv
& 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */
16327 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
16329 #pragma unused(dev,fflag,p)
16332 err
= dtrace_ioctl_helper((int)cmd
, data
, &rv
);
16333 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
16335 ASSERT( (err
& 0xfffff000) == 0 );
16336 return (err
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */
16337 } else if (rv
!= 0) {
16338 ASSERT( (rv
& 0xfff00000) == 0 );
16339 return (((rv
& 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */
16344 #define HELPER_MAJOR -24 /* let the kernel pick the device number */
16347 * A struct describing which functions will get invoked for certain
16350 static struct cdevsw helper_cdevsw
=
16352 helper_open
, /* open */
16353 helper_close
, /* close */
16354 eno_rdwrt
, /* read */
16355 eno_rdwrt
, /* write */
16356 helper_ioctl
, /* ioctl */
16357 (stop_fcn_t
*)nulldev
, /* stop */
16358 (reset_fcn_t
*)nulldev
, /* reset */
16360 eno_select
, /* select */
16361 eno_mmap
, /* mmap */
16362 eno_strat
, /* strategy */
16363 eno_getc
, /* getc */
16364 eno_putc
, /* putc */
16368 static int helper_majdevno
= 0;
16370 static int gDTraceInited
= 0;
16373 helper_init( void )
16376 * Once the "helper" is initialized, it can take ioctl calls that use locks
16377 * and zones initialized in dtrace_init. Make certain dtrace_init was called
16381 if (!gDTraceInited
) {
16382 panic("helper_init before dtrace_init\n");
16385 if (0 >= helper_majdevno
)
16387 helper_majdevno
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
);
16389 if (helper_majdevno
< 0) {
16390 printf("helper_init: failed to allocate a major number!\n");
16394 if (NULL
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
16395 DTRACEMNR_HELPER
, 0 )) {
16396 printf("dtrace_init: failed to devfs_make_node for helper!\n");
16400 panic("helper_init: called twice!\n");
16403 #undef HELPER_MAJOR
16406 * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected.
16409 dtrace_clone_func(dev_t dev
, int action
)
16411 #pragma unused(dev)
16413 if (action
== DEVFS_CLONE_ALLOC
) {
16414 if (NULL
== dtrace_minor
) /* Arena not created yet!?! */
16418 * Propose a minor number, namely the next number that vmem_alloc() will return.
16419 * Immediately put it back in play by calling vmem_free().
16421 int ret
= (int)(uintptr_t)vmem_alloc(dtrace_minor
, 1, VM_BESTFIT
| VM_SLEEP
);
16423 vmem_free(dtrace_minor
, (void *)(uintptr_t)ret
, 1);
16428 else if (action
== DEVFS_CLONE_FREE
) {
16434 #define DTRACE_MAJOR -24 /* let the kernel pick the device number */
16436 static struct cdevsw dtrace_cdevsw
=
16438 _dtrace_open
, /* open */
16439 _dtrace_close
, /* close */
16440 eno_rdwrt
, /* read */
16441 eno_rdwrt
, /* write */
16442 _dtrace_ioctl
, /* ioctl */
16443 (stop_fcn_t
*)nulldev
, /* stop */
16444 (reset_fcn_t
*)nulldev
, /* reset */
16446 eno_select
, /* select */
16447 eno_mmap
, /* mmap */
16448 eno_strat
, /* strategy */
16449 eno_getc
, /* getc */
16450 eno_putc
, /* putc */
16454 lck_attr_t
* dtrace_lck_attr
;
16455 lck_grp_attr_t
* dtrace_lck_grp_attr
;
16456 lck_grp_t
* dtrace_lck_grp
;
16458 static int gMajDevNo
;
16461 dtrace_init( void )
16463 if (0 == gDTraceInited
) {
16464 int i
, ncpu
= NCPU
;
16466 gMajDevNo
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
);
16468 if (gMajDevNo
< 0) {
16469 printf("dtrace_init: failed to allocate a major number!\n");
16474 if (NULL
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
16475 dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) {
16476 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
16481 #if defined(DTRACE_MEMORY_ZONES)
16484 * Initialize the dtrace kalloc-emulation zones.
16486 dtrace_alloc_init();
16488 #endif /* DTRACE_MEMORY_ZONES */
16491 * Allocate the dtrace_probe_t zone
16493 dtrace_probe_t_zone
= zinit(sizeof(dtrace_probe_t
),
16494 1024 * sizeof(dtrace_probe_t
),
16495 sizeof(dtrace_probe_t
),
16496 "dtrace.dtrace_probe_t");
16499 * Create the dtrace lock group and attrs.
16501 dtrace_lck_attr
= lck_attr_alloc_init();
16502 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();
16503 dtrace_lck_grp
= lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr
);
16506 * We have to initialize all locks explicitly
16508 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16509 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16510 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16512 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
);
16514 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16517 * The cpu_core structure consists of per-CPU state available in any context.
16518 * On some architectures, this may mean that the page(s) containing the
16519 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
16520 * is up to the platform to assure that this is performed properly. Note that
16521 * the structure is sized to avoid false sharing.
16523 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16524 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16526 cpu_core
= (cpu_core_t
*)kmem_zalloc( ncpu
* sizeof(cpu_core_t
), KM_SLEEP
);
16527 for (i
= 0; i
< ncpu
; ++i
) {
16528 lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16531 cpu_list
= (cpu_t
*)kmem_zalloc( ncpu
* sizeof(cpu_t
), KM_SLEEP
);
16532 for (i
= 0; i
< ncpu
; ++i
) {
16533 cpu_list
[i
].cpu_id
= (processorid_t
)i
;
16534 cpu_list
[i
].cpu_next
= &(cpu_list
[(i
+1) % ncpu
]);
16535 lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16538 lck_mtx_lock(&cpu_lock
);
16539 for (i
= 0; i
< ncpu
; ++i
)
16540 dtrace_cpu_setup_initial( (processorid_t
)i
); /* In lieu of register_cpu_setup_func() callback */
16541 lck_mtx_unlock(&cpu_lock
);
16543 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
16546 * See dtrace_impl.h for a description of dof modes.
16547 * The default is lazy dof.
16549 * XXX Warn if state is LAZY_OFF? It won't break anything, but
16550 * makes no sense...
16552 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode
, sizeof (dtrace_dof_mode
))) {
16553 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
16557 * Sanity check of dof mode value.
16559 switch (dtrace_dof_mode
) {
16560 case DTRACE_DOF_MODE_NEVER
:
16561 case DTRACE_DOF_MODE_LAZY_ON
:
16562 /* valid modes, but nothing else we need to do */
16565 case DTRACE_DOF_MODE_LAZY_OFF
:
16566 case DTRACE_DOF_MODE_NON_LAZY
:
16567 /* Cannot wait for a dtrace_open to init fasttrap */
16572 /* Invalid, clamp to non lazy */
16573 dtrace_dof_mode
= DTRACE_DOF_MODE_NON_LAZY
;
16581 panic("dtrace_init: called twice!\n");
16585 dtrace_postinit(void)
16587 dtrace_attach( (dev_info_t
*)makedev(gMajDevNo
, 0), 0 );
16589 #undef DTRACE_MAJOR
16592 * Routines used to register interest in cpu's being added to or removed
16596 register_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
16598 #pragma unused(ignore1,ignore2)
16602 unregister_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
16604 #pragma unused(ignore1,ignore2)
16606 #endif /* __APPLE__ */