4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
27 /* #pragma ident "@(#)dtrace.c 1.49 06/08/11 SMI" */
30 * DTrace - Dynamic Tracing for Solaris
32 * This is the implementation of the Solaris Dynamic Tracing framework
33 * (DTrace). The user-visible interface to DTrace is described at length in
34 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
35 * library, the in-kernel DTrace framework, and the DTrace providers are
36 * described in the block comments in the <sys/dtrace.h> header file. The
37 * internal architecture of DTrace is described in the block comments in the
38 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
39 * implementation very much assume mastery of all of these sources; if one has
40 * an unanswered question about the implementation, one should consult them
43 * The functions here are ordered roughly as follows:
45 * - Probe context functions
46 * - Probe hashing functions
47 * - Non-probe context utility functions
48 * - Matching functions
49 * - Provider-to-Framework API functions
50 * - Probe management functions
51 * - DIF object functions
53 * - Predicate functions
56 * - Enabling functions
58 * - Anonymous enabling functions
59 * - Consumer state functions
62 * - Driver cookbook functions
64 * Each group of functions begins with a block comment labelled the "DTrace
65 * [Group] Functions", allowing one to find each block by searching forward
66 * on capital-f functions.
69 #define _DTRACE_WANT_PROC_GLUE_ 1
71 #include <sys/errno.h>
72 #include <sys/types.h>
75 #include <sys/systm.h>
76 #include <sys/dtrace_impl.h>
77 #include <sys/param.h>
78 #include <sys/ioctl.h>
79 #include <sys/fcntl.h>
80 #include <miscfs/devfs/devfs.h>
81 #include <sys/malloc.h>
82 #include <sys/kernel_types.h>
83 #include <sys/proc_internal.h>
84 #include <sys/uio_internal.h>
85 #include <sys/kauth.h>
88 #include <mach/exception_types.h>
89 #include <sys/signalvar.h>
90 #include <kern/zalloc.h>
92 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
94 extern void dtrace_suspend(void);
95 extern void dtrace_resume(void);
96 extern void dtrace_init(void);
97 extern void helper_init(void);
99 #if defined(__APPLE__)
101 #include "../../../osfmk/chud/chud_dtrace.h"
103 extern kern_return_t chudxnu_dtrace_callback
104 (uint64_t selector
, uint64_t *args
, uint32_t count
);
108 * DTrace Tunable Variables
110 * The following variables may be tuned by adding a line to /etc/system that
111 * includes both the name of the DTrace module ("dtrace") and the name of the
112 * variable. For example:
114 * set dtrace:dtrace_destructive_disallow = 1
116 * In general, the only variables that one should be tuning this way are those
117 * that affect system-wide DTrace behavior, and for which the default behavior
118 * is undesirable. Most of these variables are tunable on a per-consumer
119 * basis using DTrace options, and need not be tuned on a system-wide basis.
120 * When tuning these variables, avoid pathological values; while some attempt
121 * is made to verify the integrity of these variables, they are not considered
122 * part of the supported interface to DTrace, and they are therefore not
123 * checked comprehensively. Further, these variables should not be tuned
124 * dynamically via "mdb -kw" or other means; they should only be tuned via
127 int dtrace_destructive_disallow
= 0;
128 #if defined(__APPLE__)
129 #define proc_t struct proc
130 #endif /* __APPLE__ */
131 dtrace_optval_t dtrace_nonroot_maxsize
= (16 * 1024 * 1024);
132 size_t dtrace_difo_maxsize
= (256 * 1024);
133 dtrace_optval_t dtrace_dof_maxsize
= (256 * 1024);
134 size_t dtrace_global_maxsize
= (16 * 1024);
135 size_t dtrace_actions_max
= (16 * 1024);
136 size_t dtrace_retain_max
= 1024;
137 dtrace_optval_t dtrace_helper_actions_max
= 32;
138 dtrace_optval_t dtrace_helper_providers_max
= 32;
139 dtrace_optval_t dtrace_dstate_defsize
= (1 * 1024 * 1024);
140 size_t dtrace_strsize_default
= 256;
141 dtrace_optval_t dtrace_cleanrate_default
= 9900990; /* 101 hz */
142 dtrace_optval_t dtrace_cleanrate_min
= 200000; /* 5000 hz */
143 dtrace_optval_t dtrace_cleanrate_max
= (uint64_t)60 * NANOSEC
; /* 1/minute */
144 dtrace_optval_t dtrace_aggrate_default
= NANOSEC
; /* 1 hz */
145 dtrace_optval_t dtrace_statusrate_default
= NANOSEC
; /* 1 hz */
146 dtrace_optval_t dtrace_statusrate_max
= (hrtime_t
)10 * NANOSEC
; /* 6/minute */
147 dtrace_optval_t dtrace_switchrate_default
= NANOSEC
; /* 1 hz */
148 dtrace_optval_t dtrace_nspec_default
= 1;
149 dtrace_optval_t dtrace_specsize_default
= 32 * 1024;
150 dtrace_optval_t dtrace_stackframes_default
= 20;
151 dtrace_optval_t dtrace_ustackframes_default
= 20;
152 dtrace_optval_t dtrace_jstackframes_default
= 50;
153 dtrace_optval_t dtrace_jstackstrsize_default
= 512;
154 int dtrace_msgdsize_max
= 128;
155 hrtime_t dtrace_chill_max
= 500 * (NANOSEC
/ MILLISEC
); /* 500 ms */
156 hrtime_t dtrace_chill_interval
= NANOSEC
; /* 1000 ms */
157 int dtrace_devdepth_max
= 32;
158 int dtrace_err_verbose
;
159 hrtime_t dtrace_deadman_interval
= NANOSEC
;
160 hrtime_t dtrace_deadman_timeout
= (hrtime_t
)10 * NANOSEC
;
161 hrtime_t dtrace_deadman_user
= (hrtime_t
)30 * NANOSEC
;
164 * DTrace External Variables
166 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
167 * available to DTrace consumers via the backtick (`) syntax. One of these,
168 * dtrace_zero, is made deliberately so: it is provided as a source of
169 * well-known, zero-filled memory. While this variable is not documented,
170 * it is used by some translators as an implementation detail.
172 const char dtrace_zero
[256] = { 0 }; /* zero-filled memory */
175 * DTrace Internal Variables
177 static dev_info_t
*dtrace_devi
; /* device info */
178 static vmem_t
*dtrace_arena
; /* probe ID arena */
179 static vmem_t
*dtrace_minor
; /* minor number arena */
180 static taskq_t
*dtrace_taskq
; /* task queue */
181 static dtrace_probe_t
**dtrace_probes
; /* array of all probes */
182 static int dtrace_nprobes
; /* number of probes */
183 static dtrace_provider_t
*dtrace_provider
; /* provider list */
184 static dtrace_meta_t
*dtrace_meta_pid
; /* user-land meta provider */
185 static int dtrace_opens
; /* number of opens */
186 static int dtrace_helpers
; /* number of helpers */
187 static void *dtrace_softstate
; /* softstate pointer */
188 static dtrace_hash_t
*dtrace_bymod
; /* probes hashed by module */
189 static dtrace_hash_t
*dtrace_byfunc
; /* probes hashed by function */
190 static dtrace_hash_t
*dtrace_byname
; /* probes hashed by name */
191 static dtrace_toxrange_t
*dtrace_toxrange
; /* toxic range array */
192 static int dtrace_toxranges
; /* number of toxic ranges */
193 static int dtrace_toxranges_max
; /* size of toxic range array */
194 static dtrace_anon_t dtrace_anon
; /* anonymous enabling */
195 static kmem_cache_t
*dtrace_state_cache
; /* cache for dynamic state */
196 static uint64_t dtrace_vtime_references
; /* number of vtimestamp refs */
197 static kthread_t
*dtrace_panicked
; /* panicking thread */
198 static dtrace_ecb_t
*dtrace_ecb_create_cache
; /* cached created ECB */
199 static dtrace_genid_t dtrace_probegen
; /* current probe generation */
200 static dtrace_helpers_t
*dtrace_deferred_pid
; /* deferred helper list */
201 static dtrace_enabling_t
*dtrace_retained
; /* list of retained enablings */
202 static dtrace_dynvar_t dtrace_dynhash_sink
; /* end of dynamic hash chains */
203 #if defined(__APPLE__)
204 static int dtrace_dof_mode
; /* dof mode */
207 #if defined(__APPLE__)
210 * To save memory, some common memory allocations are given a
211 * unique zone. In example, dtrace_probe_t is 72 bytes in size,
212 * which means it would fall into the kalloc.128 bucket. With
213 * 20k elements allocated, the space saved is substantial.
216 struct zone
*dtrace_probe_t_zone
;
222 * DTrace is protected by three (relatively coarse-grained) locks:
224 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
225 * including enabling state, probes, ECBs, consumer state, helper state,
226 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
227 * probe context is lock-free -- synchronization is handled via the
228 * dtrace_sync() cross call mechanism.
230 * (2) dtrace_provider_lock is required when manipulating provider state, or
231 * when provider state must be held constant.
233 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
234 * when meta provider state must be held constant.
236 * The lock ordering between these three locks is dtrace_meta_lock before
237 * dtrace_provider_lock before dtrace_lock. (In particular, there are
238 * several places where dtrace_provider_lock is held by the framework as it
239 * calls into the providers -- which then call back into the framework,
240 * grabbing dtrace_lock.)
242 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
243 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
244 * role as a coarse-grained lock; it is acquired before both of these locks.
245 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
246 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
247 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
248 * acquired _between_ dtrace_provider_lock and dtrace_lock.
254 * All kmutex_t vars have been changed to lck_mtx_t.
255 * Note that lck_mtx_t's require explicit initialization.
257 * mutex_enter() becomes lck_mtx_lock()
258 * mutex_exit() becomes lck_mtx_unlock()
260 * Lock asserts are changed like this:
262 * ASSERT(MUTEX_HELD(&cpu_lock));
264 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
266 * Due to the number of these changes, they are not called out explicitly.
268 static lck_mtx_t dtrace_lock
; /* probe state lock */
269 static lck_mtx_t dtrace_provider_lock
; /* provider state lock */
270 static lck_mtx_t dtrace_meta_lock
; /* meta-provider state lock */
271 #if defined(__APPLE__)
272 static lck_rw_t dtrace_dof_mode_lock
; /* dof mode lock */
276 * DTrace Provider Variables
278 * These are the variables relating to DTrace as a provider (that is, the
279 * provider of the BEGIN, END, and ERROR probes).
281 static dtrace_pattr_t dtrace_provider_attr
= {
282 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
283 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
284 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
285 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
286 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
293 static dtrace_pops_t dtrace_provider_ops
= {
294 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
,
295 (void (*)(void *, struct modctl
*))dtrace_nullop
,
296 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
297 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
298 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
299 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
303 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
306 static dtrace_id_t dtrace_probeid_begin
; /* special BEGIN probe */
307 static dtrace_id_t dtrace_probeid_end
; /* special END probe */
308 dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
311 * DTrace Helper Tracing Variables
313 uint32_t dtrace_helptrace_next
= 0;
314 uint32_t dtrace_helptrace_nlocals
;
315 char *dtrace_helptrace_buffer
;
316 int dtrace_helptrace_bufsize
= 512 * 1024;
319 int dtrace_helptrace_enabled
= 1;
321 int dtrace_helptrace_enabled
= 0;
325 * DTrace Error Hashing
327 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
328 * table. This is very useful for checking coverage of tests that are
329 * expected to induce DIF or DOF processing errors, and may be useful for
330 * debugging problems in the DIF code generator or in DOF generation . The
331 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
334 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
];
335 static const char *dtrace_errlast
;
336 static kthread_t
*dtrace_errthread
;
337 static lck_mtx_t dtrace_errlock
;
341 * DTrace Macros and Constants
343 * These are various macros that are useful in various spots in the
344 * implementation, along with a few random constants that have no meaning
345 * outside of the implementation. There is no real structure to this cpp
346 * mishmash -- but is there ever?
348 #define DTRACE_HASHSTR(hash, probe) \
349 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
351 #define DTRACE_HASHNEXT(hash, probe) \
352 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
354 #define DTRACE_HASHPREV(hash, probe) \
355 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
357 #define DTRACE_HASHEQ(hash, lhs, rhs) \
358 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
359 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
361 #define DTRACE_AGGHASHSIZE_SLEW 17
364 * The key for a thread-local variable consists of the lower 61 bits of the
365 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
366 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
367 * equal to a variable identifier. This is necessary (but not sufficient) to
368 * assure that global associative arrays never collide with thread-local
369 * variables. To guarantee that they cannot collide, we must also define the
370 * order for keying dynamic variables. That order is:
372 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
374 * Because the variable-key and the tls-key are in orthogonal spaces, there is
375 * no way for a global variable key signature to match a thread-local key
378 #if !defined(__APPLE__)
379 #define DTRACE_TLS_THRKEY(where) { \
381 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
382 for (; actv; actv >>= 1) \
384 ASSERT(intr < (1 << 3)); \
385 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
386 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
389 #define DTRACE_TLS_THRKEY(where) { \
390 uint_t intr = ml_at_interrupt_context(); /* XXX just one measely bit */ \
391 uint_t thr = (uint_t)current_thread(); \
392 uint_t pid = (uint_t)proc_selfpid(); \
393 ASSERT(intr < (1 << 3)); \
394 (where) = ((((uint64_t)thr << 32 | pid) + DIF_VARIABLE_MAX) & \
395 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
397 #endif /* __APPLE__ */
399 #define DTRACE_STORE(type, tomax, offset, what) \
400 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
402 #if !defined(__APPLE__)
403 #if !(defined(__i386__) || defined (__x86_64__))
404 #define DTRACE_ALIGNCHECK(addr, size, flags) \
405 if (addr & (size - 1)) { \
406 *flags |= CPU_DTRACE_BADALIGN; \
407 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
411 #define DTRACE_ALIGNCHECK(addr, size, flags)
414 #define DTRACE_LOADFUNC(bits) \
417 dtrace_load##bits(uintptr_t addr) \
419 size_t size = bits / NBBY; \
421 uint##bits##_t rval; \
423 volatile uint16_t *flags = (volatile uint16_t *) \
424 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
426 DTRACE_ALIGNCHECK(addr, size, flags); \
428 for (i = 0; i < dtrace_toxranges; i++) { \
429 if (addr >= dtrace_toxrange[i].dtt_limit) \
432 if (addr + size <= dtrace_toxrange[i].dtt_base) \
436 * This address falls within a toxic region; return 0. \
438 *flags |= CPU_DTRACE_BADADDR; \
439 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
443 *flags |= CPU_DTRACE_NOFAULT; \
445 rval = *((volatile uint##bits##_t *)addr); \
446 *flags &= ~CPU_DTRACE_NOFAULT; \
451 #define DTRACE_ALIGNCHECK(addr, size, flags) \
452 if (addr & (MIN(size,4) - 1)) { \
453 *flags |= CPU_DTRACE_BADALIGN; \
454 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
458 #define RECOVER_LABEL(bits) __asm__ volatile("_dtraceLoadRecover" #bits ":" );
460 #define DTRACE_LOADFUNC(bits) \
462 extern vm_offset_t dtraceLoadRecover##bits; \
463 uint##bits##_t dtrace_load##bits(uintptr_t addr); \
466 dtrace_load##bits(uintptr_t addr) \
468 size_t size = bits / NBBY; \
470 uint##bits##_t rval = 0; \
473 volatile uint16_t *flags = (volatile uint16_t *) \
474 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
476 DTRACE_ALIGNCHECK(addr, size, flags); \
478 for (i = 0; i < dtrace_toxranges; i++) { \
479 if (addr >= dtrace_toxrange[i].dtt_limit) \
482 if (addr + size <= dtrace_toxrange[i].dtt_base) \
486 * This address falls within a toxic region; return 0. \
488 *flags |= CPU_DTRACE_BADADDR; \
489 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
493 pp = pmap_find_phys(kernel_pmap, addr); \
495 if (0 == pp || /* pmap_find_phys failed ? */ \
496 !dtxnu_is_RAM_page(pp) /* Backed by RAM? */ ) { \
497 *flags |= CPU_DTRACE_BADADDR; \
498 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
503 volatile vm_offset_t recover = (vm_offset_t)&dtraceLoadRecover##bits; \
504 *flags |= CPU_DTRACE_NOFAULT; \
505 recover = dtrace_set_thread_recover(current_thread(), recover); \
507 rval = *((volatile uint##bits##_t *)addr); \
508 RECOVER_LABEL(bits); \
509 (void)dtrace_set_thread_recover(current_thread(), recover); \
510 *flags &= ~CPU_DTRACE_NOFAULT; \
515 #endif /* __APPLE__ */
519 #define dtrace_loadptr dtrace_load64
521 #define dtrace_loadptr dtrace_load32
524 #define DTRACE_DYNHASH_FREE 0
525 #define DTRACE_DYNHASH_SINK 1
526 #define DTRACE_DYNHASH_VALID 2
528 #define DTRACE_MATCH_NEXT 0
529 #define DTRACE_MATCH_DONE 1
530 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
531 #define DTRACE_STATE_ALIGN 64
533 #define DTRACE_FLAGS2FLT(flags) \
534 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
535 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
536 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
537 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
538 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
539 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
540 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
541 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
544 #define DTRACEACT_ISSTRING(act) \
545 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
546 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
548 static dtrace_probe_t
*dtrace_probe_lookup_id(dtrace_id_t id
);
549 static void dtrace_enabling_provide(dtrace_provider_t
*);
550 static int dtrace_enabling_match(dtrace_enabling_t
*, int *);
551 static void dtrace_enabling_matchall(void);
552 static dtrace_state_t
*dtrace_anon_grab(void);
553 static uint64_t dtrace_helper(int, dtrace_mstate_t
*,
554 dtrace_state_t
*, uint64_t, uint64_t);
555 static dtrace_helpers_t
*dtrace_helpers_create(proc_t
*);
556 static void dtrace_buffer_drop(dtrace_buffer_t
*);
557 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t
*, size_t, size_t,
558 dtrace_state_t
*, dtrace_mstate_t
*);
559 static int dtrace_state_option(dtrace_state_t
*, dtrace_optid_t
,
561 static int dtrace_ecb_create_enable(dtrace_probe_t
*, void *);
562 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t
*);
565 * DTrace Probe Context Functions
567 * These functions are called from probe context. Because probe context is
568 * any context in which C may be called, arbitrarily locks may be held,
569 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
570 * As a result, functions called from probe context may only call other DTrace
571 * support functions -- they may not interact at all with the system at large.
572 * (Note that the ASSERT macro is made probe-context safe by redefining it in
573 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
574 * loads are to be performed from probe context, they _must_ be in terms of
575 * the safe dtrace_load*() variants.
577 * Some functions in this block are not actually called from probe context;
578 * for these functions, there will be a comment above the function reading
579 * "Note: not called from probe context."
582 dtrace_panic(const char *format
, ...)
586 va_start(alist
, format
);
587 dtrace_vpanic(format
, alist
);
592 dtrace_assfail(const char *a
, const char *f
, int l
)
594 dtrace_panic("assertion failed: %s, file: %s, line: %d", a
, f
, l
);
597 * We just need something here that even the most clever compiler
598 * cannot optimize away.
600 return (a
[(uintptr_t)f
]);
604 * Atomically increment a specified error counter from probe context.
607 dtrace_error(uint32_t *counter
)
610 * Most counters stored to in probe context are per-CPU counters.
611 * However, there are some error conditions that are sufficiently
612 * arcane that they don't merit per-CPU storage. If these counters
613 * are incremented concurrently on different CPUs, scalability will be
614 * adversely affected -- but we don't expect them to be white-hot in a
615 * correctly constructed enabling...
622 if ((nval
= oval
+ 1) == 0) {
624 * If the counter would wrap, set it to 1 -- assuring
625 * that the counter is never zero when we have seen
626 * errors. (The counter must be 32-bits because we
627 * aren't guaranteed a 64-bit compare&swap operation.)
628 * To save this code both the infamy of being fingered
629 * by a priggish news story and the indignity of being
630 * the target of a neo-puritan witch trial, we're
631 * carefully avoiding any colorful description of the
632 * likelihood of this condition -- but suffice it to
633 * say that it is only slightly more likely than the
634 * overflow of predicate cache IDs, as discussed in
635 * dtrace_predicate_create().
639 } while (dtrace_cas32(counter
, oval
, nval
) != oval
);
643 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
644 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
652 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t
*mstate
)
654 if (dest
< mstate
->dtms_scratch_base
)
657 if (dest
+ size
< dest
)
660 if (dest
+ size
> mstate
->dtms_scratch_ptr
)
667 dtrace_canstore_statvar(uint64_t addr
, size_t sz
,
668 dtrace_statvar_t
**svars
, int nsvars
)
672 for (i
= 0; i
< nsvars
; i
++) {
673 dtrace_statvar_t
*svar
= svars
[i
];
675 if (svar
== NULL
|| svar
->dtsv_size
== 0)
678 if (addr
- svar
->dtsv_data
< svar
->dtsv_size
&&
679 addr
+ sz
<= svar
->dtsv_data
+ svar
->dtsv_size
)
687 * Check to see if the address is within a memory region to which a store may
688 * be issued. This includes the DTrace scratch areas, and any DTrace variable
689 * region. The caller of dtrace_canstore() is responsible for performing any
690 * alignment checks that are needed before stores are actually executed.
693 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
694 dtrace_vstate_t
*vstate
)
700 * First, check to see if the address is in scratch space...
702 a
= mstate
->dtms_scratch_base
;
703 s
= mstate
->dtms_scratch_size
;
705 if (addr
- a
< s
&& addr
+ sz
<= a
+ s
)
709 * Now check to see if it's a dynamic variable. This check will pick
710 * up both thread-local variables and any global dynamically-allocated
713 a
= (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
;
714 s
= vstate
->dtvs_dynvars
.dtds_size
;
715 if (addr
- a
< s
&& addr
+ sz
<= a
+ s
)
719 * Finally, check the static local and global variables. These checks
720 * take the longest, so we perform them last.
722 if (dtrace_canstore_statvar(addr
, sz
,
723 vstate
->dtvs_locals
, vstate
->dtvs_nlocals
))
726 if (dtrace_canstore_statvar(addr
, sz
,
727 vstate
->dtvs_globals
, vstate
->dtvs_nglobals
))
734 * Compare two strings using safe loads.
737 dtrace_strncmp(char *s1
, char *s2
, size_t limit
)
740 volatile uint16_t *flags
;
742 if (s1
== s2
|| limit
== 0)
745 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
751 c1
= dtrace_load8((uintptr_t)s1
++);
756 c2
= dtrace_load8((uintptr_t)s2
++);
760 } while (--limit
&& c1
!= '\0' && !(*flags
& CPU_DTRACE_FAULT
));
766 * Compute strlen(s) for a string using safe memory accesses. The additional
767 * len parameter is used to specify a maximum length to ensure completion.
770 dtrace_strlen(const char *s
, size_t lim
)
774 for (len
= 0; len
!= lim
; len
++)
775 if (dtrace_load8((uintptr_t)s
++) == '\0')
782 * Check if an address falls within a toxic region.
785 dtrace_istoxic(uintptr_t kaddr
, size_t size
)
787 uintptr_t taddr
, tsize
;
790 for (i
= 0; i
< dtrace_toxranges
; i
++) {
791 taddr
= dtrace_toxrange
[i
].dtt_base
;
792 tsize
= dtrace_toxrange
[i
].dtt_limit
- taddr
;
794 if (kaddr
- taddr
< tsize
) {
795 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
796 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= kaddr
;
800 if (taddr
- kaddr
< size
) {
801 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
802 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= taddr
;
811 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
812 * memory specified by the DIF program. The dst is assumed to be safe memory
813 * that we can store to directly because it is managed by DTrace. As with
814 * standard bcopy, overlapping copies are handled properly.
817 dtrace_bcopy(const void *src
, void *dst
, size_t len
)
821 const uint8_t *s2
= src
;
825 *s1
++ = dtrace_load8((uintptr_t)s2
++);
826 } while (--len
!= 0);
832 *--s1
= dtrace_load8((uintptr_t)--s2
);
833 } while (--len
!= 0);
839 * Copy src to dst using safe memory accesses, up to either the specified
840 * length, or the point that a nul byte is encountered. The src is assumed to
841 * be unsafe memory specified by the DIF program. The dst is assumed to be
842 * safe memory that we can store to directly because it is managed by DTrace.
843 * Unlike dtrace_bcopy(), overlapping regions are not handled.
846 dtrace_strcpy(const void *src
, void *dst
, size_t len
)
849 uint8_t *s1
= dst
, c
;
850 const uint8_t *s2
= src
;
853 *s1
++ = c
= dtrace_load8((uintptr_t)s2
++);
854 } while (--len
!= 0 && c
!= '\0');
859 * Copy src to dst, deriving the size and type from the specified (BYREF)
860 * variable type. The src is assumed to be unsafe memory specified by the DIF
861 * program. The dst is assumed to be DTrace variable memory that is of the
862 * specified type; we assume that we can store to directly.
865 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t
*type
)
867 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
869 if (type
->dtdt_kind
== DIF_TYPE_STRING
)
870 dtrace_strcpy(src
, dst
, type
->dtdt_size
);
872 dtrace_bcopy(src
, dst
, type
->dtdt_size
);
876 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
877 * unsafe memory specified by the DIF program. The s2 data is assumed to be
878 * safe memory that we can access directly because it is managed by DTrace.
881 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
)
883 volatile uint16_t *flags
;
885 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
890 if (s1
== NULL
|| s2
== NULL
)
893 if (s1
!= s2
&& len
!= 0) {
894 const uint8_t *ps1
= s1
;
895 const uint8_t *ps2
= s2
;
898 if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++)
900 } while (--len
!= 0 && !(*flags
& CPU_DTRACE_FAULT
));
906 * Zero the specified region using a simple byte-by-byte loop. Note that this
907 * is for safe DTrace-managed memory only.
910 dtrace_bzero(void *dst
, size_t len
)
914 for (cp
= dst
; len
!= 0; len
--)
919 * This privilege check should be used by actions and subroutines to
920 * verify that the user credentials of the process that enabled the
921 * invoking ECB match the target credentials
924 dtrace_priv_proc_common_user(dtrace_state_t
*state
)
926 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
929 * We should always have a non-NULL state cred here, since if cred
930 * is null (anonymous tracing), we fast-path bypass this routine.
932 ASSERT(s_cr
!= NULL
);
934 #if !defined(__APPLE__)
935 if ((cr
= CRED()) != NULL
&&
937 if ((cr
= dtrace_CRED()) != NULL
&&
938 #endif /* __APPLE__ */
939 s_cr
->cr_uid
== cr
->cr_uid
&&
940 s_cr
->cr_uid
== cr
->cr_ruid
&&
941 s_cr
->cr_uid
== cr
->cr_suid
&&
942 s_cr
->cr_gid
== cr
->cr_gid
&&
943 s_cr
->cr_gid
== cr
->cr_rgid
&&
944 s_cr
->cr_gid
== cr
->cr_sgid
)
951 * This privilege check should be used by actions and subroutines to
952 * verify that the zone of the process that enabled the invoking ECB
953 * matches the target credentials
956 dtrace_priv_proc_common_zone(dtrace_state_t
*state
)
958 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
961 * We should always have a non-NULL state cred here, since if cred
962 * is null (anonymous tracing), we fast-path bypass this routine.
964 ASSERT(s_cr
!= NULL
);
966 #if !defined(__APPLE__)
967 if ((cr
= CRED()) != NULL
&&
968 s_cr
->cr_zone
== cr
->cr_zone
)
973 return 1; /* Darwin doesn't do zones. */
974 #endif /* __APPLE__ */
978 * This privilege check should be used by actions and subroutines to
979 * verify that the process has not setuid or changed credentials.
981 #if !defined(__APPLE__)
983 dtrace_priv_proc_common_nocd()
987 if ((proc
= ttoproc(curthread
)) != NULL
&&
988 !(proc
->p_flag
& SNOCD
))
995 dtrace_priv_proc_common_nocd(void)
997 return 1; /* Darwin omits "No Core Dump" flag. */
999 #endif /* __APPLE__ */
1002 dtrace_priv_proc_destructive(dtrace_state_t
*state
)
1004 int action
= state
->dts_cred
.dcr_action
;
1006 #if defined(__APPLE__)
1007 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1009 #endif /* __APPLE__ */
1011 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) &&
1012 dtrace_priv_proc_common_zone(state
) == 0)
1015 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) &&
1016 dtrace_priv_proc_common_user(state
) == 0)
1019 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) &&
1020 dtrace_priv_proc_common_nocd() == 0)
1026 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1032 dtrace_priv_proc_control(dtrace_state_t
*state
)
1034 #if defined(__APPLE__)
1035 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1037 #endif /* __APPLE__ */
1039 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC_CONTROL
)
1042 if (dtrace_priv_proc_common_zone(state
) &&
1043 dtrace_priv_proc_common_user(state
) &&
1044 dtrace_priv_proc_common_nocd())
1047 #if defined(__APPLE__)
1049 #endif /* __APPLE__ */
1050 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1056 dtrace_priv_proc(dtrace_state_t
*state
)
1058 #if defined(__APPLE__)
1059 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1061 #endif /* __APPLE__ */
1063 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1066 #if defined(__APPLE__)
1068 #endif /* __APPLE__ */
1069 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1075 dtrace_priv_kernel(dtrace_state_t
*state
)
1077 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL
)
1080 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1086 dtrace_priv_kernel_destructive(dtrace_state_t
*state
)
1088 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL_DESTRUCTIVE
)
1091 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1097 * Note: not called from probe context. This function is called
1098 * asynchronously (and at a regular interval) from outside of probe context to
1099 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1100 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1102 #if defined(__APPLE__)
1104 #endif /* __APPLE__ */
1106 dtrace_dynvar_clean(dtrace_dstate_t
*dstate
)
1108 dtrace_dynvar_t
*dirty
;
1109 dtrace_dstate_percpu_t
*dcpu
;
1112 for (i
= 0; i
< NCPU
; i
++) {
1113 dcpu
= &dstate
->dtds_percpu
[i
];
1115 ASSERT(dcpu
->dtdsc_rinsing
== NULL
);
1118 * If the dirty list is NULL, there is no dirty work to do.
1120 if (dcpu
->dtdsc_dirty
== NULL
)
1124 * If the clean list is non-NULL, then we're not going to do
1125 * any work for this CPU -- it means that there has not been
1126 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1127 * since the last time we cleaned house.
1129 if (dcpu
->dtdsc_clean
!= NULL
)
1135 * Atomically move the dirty list aside.
1138 dirty
= dcpu
->dtdsc_dirty
;
1141 * Before we zap the dirty list, set the rinsing list.
1142 * (This allows for a potential assertion in
1143 * dtrace_dynvar(): if a free dynamic variable appears
1144 * on a hash chain, either the dirty list or the
1145 * rinsing list for some CPU must be non-NULL.)
1147 dcpu
->dtdsc_rinsing
= dirty
;
1148 dtrace_membar_producer();
1149 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
,
1150 dirty
, NULL
) != dirty
);
1155 * We have no work to do; we can simply return.
1162 for (i
= 0; i
< NCPU
; i
++) {
1163 dcpu
= &dstate
->dtds_percpu
[i
];
1165 if (dcpu
->dtdsc_rinsing
== NULL
)
1169 * We are now guaranteed that no hash chain contains a pointer
1170 * into this dirty list; we can make it clean.
1172 ASSERT(dcpu
->dtdsc_clean
== NULL
);
1173 dcpu
->dtdsc_clean
= dcpu
->dtdsc_rinsing
;
1174 dcpu
->dtdsc_rinsing
= NULL
;
1178 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1179 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1180 * This prevents a race whereby a CPU incorrectly decides that
1181 * the state should be something other than DTRACE_DSTATE_CLEAN
1182 * after dtrace_dynvar_clean() has completed.
1186 dstate
->dtds_state
= DTRACE_DSTATE_CLEAN
;
1190 * Depending on the value of the op parameter, this function looks-up,
1191 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1192 * allocation is requested, this function will return a pointer to a
1193 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1194 * variable can be allocated. If NULL is returned, the appropriate counter
1195 * will be incremented.
1197 #if defined(__APPLE__)
1199 #endif /* __APPLE__ */
1201 dtrace_dynvar(dtrace_dstate_t
*dstate
, uint_t nkeys
,
1202 dtrace_key_t
*key
, size_t dsize
, dtrace_dynvar_op_t op
)
1204 uint64_t hashval
= DTRACE_DYNHASH_VALID
;
1205 dtrace_dynhash_t
*hash
= dstate
->dtds_hash
;
1206 dtrace_dynvar_t
*free
, *new_free
, *next
, *dvar
, *start
, *prev
= NULL
;
1207 processorid_t me
= CPU
->cpu_id
, cpu
= me
;
1208 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[me
];
1209 size_t bucket
, ksize
;
1210 size_t chunksize
= dstate
->dtds_chunksize
;
1211 uintptr_t kdata
, lock
, nstate
;
1217 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1218 * algorithm. For the by-value portions, we perform the algorithm in
1219 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1220 * bit, and seems to have only a minute effect on distribution. For
1221 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1222 * over each referenced byte. It's painful to do this, but it's much
1223 * better than pathological hash distribution. The efficacy of the
1224 * hashing algorithm (and a comparison with other algorithms) may be
1225 * found by running the ::dtrace_dynstat MDB dcmd.
1227 for (i
= 0; i
< nkeys
; i
++) {
1228 if (key
[i
].dttk_size
== 0) {
1229 uint64_t val
= key
[i
].dttk_value
;
1231 hashval
+= (val
>> 48) & 0xffff;
1232 hashval
+= (hashval
<< 10);
1233 hashval
^= (hashval
>> 6);
1235 hashval
+= (val
>> 32) & 0xffff;
1236 hashval
+= (hashval
<< 10);
1237 hashval
^= (hashval
>> 6);
1239 hashval
+= (val
>> 16) & 0xffff;
1240 hashval
+= (hashval
<< 10);
1241 hashval
^= (hashval
>> 6);
1243 hashval
+= val
& 0xffff;
1244 hashval
+= (hashval
<< 10);
1245 hashval
^= (hashval
>> 6);
1248 * This is incredibly painful, but it beats the hell
1249 * out of the alternative.
1251 uint64_t j
, size
= key
[i
].dttk_size
;
1252 uintptr_t base
= (uintptr_t)key
[i
].dttk_value
;
1254 for (j
= 0; j
< size
; j
++) {
1255 hashval
+= dtrace_load8(base
+ j
);
1256 hashval
+= (hashval
<< 10);
1257 hashval
^= (hashval
>> 6);
1262 hashval
+= (hashval
<< 3);
1263 hashval
^= (hashval
>> 11);
1264 hashval
+= (hashval
<< 15);
1267 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1268 * comes out to be one of our two sentinel hash values. If this
1269 * actually happens, we set the hashval to be a value known to be a
1270 * non-sentinel value.
1272 if (hashval
== DTRACE_DYNHASH_FREE
|| hashval
== DTRACE_DYNHASH_SINK
)
1273 hashval
= DTRACE_DYNHASH_VALID
;
1276 * Yes, it's painful to do a divide here. If the cycle count becomes
1277 * important here, tricks can be pulled to reduce it. (However, it's
1278 * critical that hash collisions be kept to an absolute minimum;
1279 * they're much more painful than a divide.) It's better to have a
1280 * solution that generates few collisions and still keeps things
1281 * relatively simple.
1283 bucket
= hashval
% dstate
->dtds_hashsize
;
1285 if (op
== DTRACE_DYNVAR_DEALLOC
) {
1286 volatile uintptr_t *lockp
= &hash
[bucket
].dtdh_lock
;
1289 while ((lock
= *lockp
) & 1)
1292 if (dtrace_casptr((void *)lockp
,
1293 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1297 dtrace_membar_producer();
1302 lock
= hash
[bucket
].dtdh_lock
;
1304 dtrace_membar_consumer();
1306 start
= hash
[bucket
].dtdh_chain
;
1307 ASSERT(start
!= NULL
&& (start
->dtdv_hashval
== DTRACE_DYNHASH_SINK
||
1308 start
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
||
1309 op
!= DTRACE_DYNVAR_DEALLOC
));
1311 for (dvar
= start
; dvar
!= NULL
; dvar
= dvar
->dtdv_next
) {
1312 dtrace_tuple_t
*dtuple
= &dvar
->dtdv_tuple
;
1313 dtrace_key_t
*dkey
= &dtuple
->dtt_key
[0];
1315 if (dvar
->dtdv_hashval
!= hashval
) {
1316 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_SINK
) {
1318 * We've reached the sink, and therefore the
1319 * end of the hash chain; we can kick out of
1320 * the loop knowing that we have seen a valid
1321 * snapshot of state.
1323 ASSERT(dvar
->dtdv_next
== NULL
);
1324 ASSERT(dvar
== &dtrace_dynhash_sink
);
1328 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
) {
1330 * We've gone off the rails: somewhere along
1331 * the line, one of the members of this hash
1332 * chain was deleted. Note that we could also
1333 * detect this by simply letting this loop run
1334 * to completion, as we would eventually hit
1335 * the end of the dirty list. However, we
1336 * want to avoid running the length of the
1337 * dirty list unnecessarily (it might be quite
1338 * long), so we catch this as early as
1339 * possible by detecting the hash marker. In
1340 * this case, we simply set dvar to NULL and
1341 * break; the conditional after the loop will
1342 * send us back to top.
1351 if (dtuple
->dtt_nkeys
!= nkeys
)
1354 for (i
= 0; i
< nkeys
; i
++, dkey
++) {
1355 if (dkey
->dttk_size
!= key
[i
].dttk_size
)
1356 goto next
; /* size or type mismatch */
1358 if (dkey
->dttk_size
!= 0) {
1360 (void *)(uintptr_t)key
[i
].dttk_value
,
1361 (void *)(uintptr_t)dkey
->dttk_value
,
1365 if (dkey
->dttk_value
!= key
[i
].dttk_value
)
1370 if (op
!= DTRACE_DYNVAR_DEALLOC
)
1373 ASSERT(dvar
->dtdv_next
== NULL
||
1374 dvar
->dtdv_next
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
);
1377 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1378 ASSERT(start
!= dvar
);
1379 ASSERT(prev
->dtdv_next
== dvar
);
1380 prev
->dtdv_next
= dvar
->dtdv_next
;
1382 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
,
1383 start
, dvar
->dtdv_next
) != start
) {
1385 * We have failed to atomically swing the
1386 * hash table head pointer, presumably because
1387 * of a conflicting allocation on another CPU.
1388 * We need to reread the hash chain and try
1395 dtrace_membar_producer();
1398 * Now set the hash value to indicate that it's free.
1400 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1401 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1403 dtrace_membar_producer();
1406 * Set the next pointer to point at the dirty list, and
1407 * atomically swing the dirty pointer to the newly freed dvar.
1410 next
= dcpu
->dtdsc_dirty
;
1411 dvar
->dtdv_next
= next
;
1412 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
);
1415 * Finally, unlock this hash bucket.
1417 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1419 hash
[bucket
].dtdh_lock
++;
1429 * If dvar is NULL, it is because we went off the rails:
1430 * one of the elements that we traversed in the hash chain
1431 * was deleted while we were traversing it. In this case,
1432 * we assert that we aren't doing a dealloc (deallocs lock
1433 * the hash bucket to prevent themselves from racing with
1434 * one another), and retry the hash chain traversal.
1436 ASSERT(op
!= DTRACE_DYNVAR_DEALLOC
);
1440 if (op
!= DTRACE_DYNVAR_ALLOC
) {
1442 * If we are not to allocate a new variable, we want to
1443 * return NULL now. Before we return, check that the value
1444 * of the lock word hasn't changed. If it has, we may have
1445 * seen an inconsistent snapshot.
1447 if (op
== DTRACE_DYNVAR_NOALLOC
) {
1448 if (hash
[bucket
].dtdh_lock
!= lock
)
1451 ASSERT(op
== DTRACE_DYNVAR_DEALLOC
);
1452 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1454 hash
[bucket
].dtdh_lock
++;
1461 * We need to allocate a new dynamic variable. The size we need is the
1462 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1463 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1464 * the size of any referred-to data (dsize). We then round the final
1465 * size up to the chunksize for allocation.
1467 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
1468 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
1471 * This should be pretty much impossible, but could happen if, say,
1472 * strange DIF specified the tuple. Ideally, this should be an
1473 * assertion and not an error condition -- but that requires that the
1474 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1475 * bullet-proof. (That is, it must not be able to be fooled by
1476 * malicious DIF.) Given the lack of backwards branches in DIF,
1477 * solving this would presumably not amount to solving the Halting
1478 * Problem -- but it still seems awfully hard.
1480 if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys
- 1) +
1481 ksize
+ dsize
> chunksize
) {
1482 dcpu
->dtdsc_drops
++;
1486 nstate
= DTRACE_DSTATE_EMPTY
;
1490 free
= dcpu
->dtdsc_free
;
1493 dtrace_dynvar_t
*clean
= dcpu
->dtdsc_clean
;
1496 if (clean
== NULL
) {
1498 * We're out of dynamic variable space on
1499 * this CPU. Unless we have tried all CPUs,
1500 * we'll try to allocate from a different
1503 switch (dstate
->dtds_state
) {
1504 case DTRACE_DSTATE_CLEAN
: {
1505 void *sp
= &dstate
->dtds_state
;
1510 if (dcpu
->dtdsc_dirty
!= NULL
&&
1511 nstate
== DTRACE_DSTATE_EMPTY
)
1512 nstate
= DTRACE_DSTATE_DIRTY
;
1514 if (dcpu
->dtdsc_rinsing
!= NULL
)
1515 nstate
= DTRACE_DSTATE_RINSING
;
1517 dcpu
= &dstate
->dtds_percpu
[cpu
];
1522 (void) dtrace_cas32(sp
,
1523 DTRACE_DSTATE_CLEAN
, nstate
);
1526 * To increment the correct bean
1527 * counter, take another lap.
1532 case DTRACE_DSTATE_DIRTY
:
1533 dcpu
->dtdsc_dirty_drops
++;
1536 case DTRACE_DSTATE_RINSING
:
1537 dcpu
->dtdsc_rinsing_drops
++;
1540 case DTRACE_DSTATE_EMPTY
:
1541 dcpu
->dtdsc_drops
++;
1545 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
);
1550 * The clean list appears to be non-empty. We want to
1551 * move the clean list to the free list; we start by
1552 * moving the clean pointer aside.
1554 if (dtrace_casptr(&dcpu
->dtdsc_clean
,
1555 clean
, NULL
) != clean
) {
1557 * We are in one of two situations:
1559 * (a) The clean list was switched to the
1560 * free list by another CPU.
1562 * (b) The clean list was added to by the
1565 * In either of these situations, we can
1566 * just reattempt the free list allocation.
1571 ASSERT(clean
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1574 * Now we'll move the clean list to the free list.
1575 * It's impossible for this to fail: the only way
1576 * the free list can be updated is through this
1577 * code path, and only one CPU can own the clean list.
1578 * Thus, it would only be possible for this to fail if
1579 * this code were racing with dtrace_dynvar_clean().
1580 * (That is, if dtrace_dynvar_clean() updated the clean
1581 * list, and we ended up racing to update the free
1582 * list.) This race is prevented by the dtrace_sync()
1583 * in dtrace_dynvar_clean() -- which flushes the
1584 * owners of the clean lists out before resetting
1587 rval
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
);
1588 ASSERT(rval
== NULL
);
1593 new_free
= dvar
->dtdv_next
;
1594 } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
);
1597 * We have now allocated a new chunk. We copy the tuple keys into the
1598 * tuple array and copy any referenced key data into the data space
1599 * following the tuple array. As we do this, we relocate dttk_value
1600 * in the final tuple to point to the key data address in the chunk.
1602 kdata
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
];
1603 dvar
->dtdv_data
= (void *)(kdata
+ ksize
);
1604 dvar
->dtdv_tuple
.dtt_nkeys
= nkeys
;
1606 for (i
= 0; i
< nkeys
; i
++) {
1607 dtrace_key_t
*dkey
= &dvar
->dtdv_tuple
.dtt_key
[i
];
1608 size_t kesize
= key
[i
].dttk_size
;
1612 (const void *)(uintptr_t)key
[i
].dttk_value
,
1613 (void *)kdata
, kesize
);
1614 dkey
->dttk_value
= kdata
;
1615 kdata
+= P2ROUNDUP(kesize
, sizeof (uint64_t));
1617 dkey
->dttk_value
= key
[i
].dttk_value
;
1620 dkey
->dttk_size
= kesize
;
1623 ASSERT(dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1624 dvar
->dtdv_hashval
= hashval
;
1625 dvar
->dtdv_next
= start
;
1627 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
)
1631 * The cas has failed. Either another CPU is adding an element to
1632 * this hash chain, or another CPU is deleting an element from this
1633 * hash chain. The simplest way to deal with both of these cases
1634 * (though not necessarily the most efficient) is to free our
1635 * allocated block and tail-call ourselves. Note that the free is
1636 * to the dirty list and _not_ to the free list. This is to prevent
1637 * races with allocators, above.
1639 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1641 dtrace_membar_producer();
1644 free
= dcpu
->dtdsc_dirty
;
1645 dvar
->dtdv_next
= free
;
1646 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
);
1648 return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
));
1653 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1661 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1668 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
)
1670 int i
, zero
= DTRACE_QUANTIZE_ZEROBUCKET
;
1671 int64_t val
= (int64_t)nval
;
1674 for (i
= 0; i
< zero
; i
++) {
1675 if (val
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1681 for (i
= zero
+ 1; i
< DTRACE_QUANTIZE_NBUCKETS
; i
++) {
1682 if (val
< DTRACE_QUANTIZE_BUCKETVAL(i
)) {
1683 quanta
[i
- 1] += incr
;
1688 quanta
[DTRACE_QUANTIZE_NBUCKETS
- 1] += incr
;
1696 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
)
1698 uint64_t arg
= *lquanta
++;
1699 int32_t base
= DTRACE_LQUANTIZE_BASE(arg
);
1700 uint16_t step
= DTRACE_LQUANTIZE_STEP(arg
);
1701 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(arg
);
1702 int32_t val
= (int32_t)nval
, level
;
1705 ASSERT(levels
!= 0);
1709 * This is an underflow.
1715 level
= (val
- base
) / step
;
1717 if (level
< levels
) {
1718 lquanta
[level
+ 1] += incr
;
1723 * This is an overflow.
1725 lquanta
[levels
+ 1] += incr
;
1730 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
)
1738 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1745 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1751 * Aggregate given the tuple in the principal data buffer, and the aggregating
1752 * action denoted by the specified dtrace_aggregation_t. The aggregation
1753 * buffer is specified as the buf parameter. This routine does not return
1754 * failure; if there is no space in the aggregation buffer, the data will be
1755 * dropped, and a corresponding counter incremented.
1758 dtrace_aggregate(dtrace_aggregation_t
*agg
, dtrace_buffer_t
*dbuf
,
1759 intptr_t offset
, dtrace_buffer_t
*buf
, uint64_t expr
, uint64_t arg
)
1761 dtrace_recdesc_t
*rec
= &agg
->dtag_action
.dta_rec
;
1762 uint32_t i
, ndx
, size
, fsize
;
1763 uint32_t align
= sizeof (uint64_t) - 1;
1764 dtrace_aggbuffer_t
*agb
;
1765 dtrace_aggkey_t
*key
;
1766 uint32_t hashval
= 0, limit
, isstr
;
1767 caddr_t tomax
, data
, kdata
;
1768 dtrace_actkind_t action
;
1769 dtrace_action_t
*act
;
1775 if (!agg
->dtag_hasarg
) {
1777 * Currently, only quantize() and lquantize() take additional
1778 * arguments, and they have the same semantics: an increment
1779 * value that defaults to 1 when not present. If additional
1780 * aggregating actions take arguments, the setting of the
1781 * default argument value will presumably have to become more
1787 action
= agg
->dtag_action
.dta_kind
- DTRACEACT_AGGREGATION
;
1788 size
= rec
->dtrd_offset
- agg
->dtag_base
;
1789 fsize
= size
+ rec
->dtrd_size
;
1791 ASSERT(dbuf
->dtb_tomax
!= NULL
);
1792 data
= dbuf
->dtb_tomax
+ offset
+ agg
->dtag_base
;
1794 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
1795 dtrace_buffer_drop(buf
);
1800 * The metastructure is always at the bottom of the buffer.
1802 agb
= (dtrace_aggbuffer_t
*)(tomax
+ buf
->dtb_size
-
1803 sizeof (dtrace_aggbuffer_t
));
1805 if (buf
->dtb_offset
== 0) {
1807 * We just kludge up approximately 1/8th of the size to be
1808 * buckets. If this guess ends up being routinely
1809 * off-the-mark, we may need to dynamically readjust this
1810 * based on past performance.
1812 uintptr_t hashsize
= (buf
->dtb_size
>> 3) / sizeof (uintptr_t);
1814 if ((uintptr_t)agb
- hashsize
* sizeof (dtrace_aggkey_t
*) <
1815 (uintptr_t)tomax
|| hashsize
== 0) {
1817 * We've been given a ludicrously small buffer;
1818 * increment our drop count and leave.
1820 dtrace_buffer_drop(buf
);
1825 * And now, a pathetic attempt to try to get a an odd (or
1826 * perchance, a prime) hash size for better hash distribution.
1828 if (hashsize
> (DTRACE_AGGHASHSIZE_SLEW
<< 3))
1829 hashsize
-= DTRACE_AGGHASHSIZE_SLEW
;
1831 agb
->dtagb_hashsize
= hashsize
;
1832 agb
->dtagb_hash
= (dtrace_aggkey_t
**)((uintptr_t)agb
-
1833 agb
->dtagb_hashsize
* sizeof (dtrace_aggkey_t
*));
1834 agb
->dtagb_free
= (uintptr_t)agb
->dtagb_hash
;
1836 for (i
= 0; i
< agb
->dtagb_hashsize
; i
++)
1837 agb
->dtagb_hash
[i
] = NULL
;
1840 ASSERT(agg
->dtag_first
!= NULL
);
1841 ASSERT(agg
->dtag_first
->dta_intuple
);
1844 * Calculate the hash value based on the key. Note that we _don't_
1845 * include the aggid in the hashing (but we will store it as part of
1846 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
1847 * algorithm: a simple, quick algorithm that has no known funnels, and
1848 * gets good distribution in practice. The efficacy of the hashing
1849 * algorithm (and a comparison with other algorithms) may be found by
1850 * running the ::dtrace_aggstat MDB dcmd.
1852 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
1853 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1854 limit
= i
+ act
->dta_rec
.dtrd_size
;
1855 ASSERT(limit
<= size
);
1856 isstr
= DTRACEACT_ISSTRING(act
);
1858 for (; i
< limit
; i
++) {
1860 hashval
+= (hashval
<< 10);
1861 hashval
^= (hashval
>> 6);
1863 if (isstr
&& data
[i
] == '\0')
1868 hashval
+= (hashval
<< 3);
1869 hashval
^= (hashval
>> 11);
1870 hashval
+= (hashval
<< 15);
1873 * Yes, the divide here is expensive -- but it's generally the least
1874 * of the performance issues given the amount of data that we iterate
1875 * over to compute hash values, compare data, etc.
1877 ndx
= hashval
% agb
->dtagb_hashsize
;
1879 for (key
= agb
->dtagb_hash
[ndx
]; key
!= NULL
; key
= key
->dtak_next
) {
1880 ASSERT((caddr_t
)key
>= tomax
);
1881 ASSERT((caddr_t
)key
< tomax
+ buf
->dtb_size
);
1883 if (hashval
!= key
->dtak_hashval
|| key
->dtak_size
!= size
)
1886 kdata
= key
->dtak_data
;
1887 ASSERT(kdata
>= tomax
&& kdata
< tomax
+ buf
->dtb_size
);
1889 for (act
= agg
->dtag_first
; act
->dta_intuple
;
1890 act
= act
->dta_next
) {
1891 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1892 limit
= i
+ act
->dta_rec
.dtrd_size
;
1893 ASSERT(limit
<= size
);
1894 isstr
= DTRACEACT_ISSTRING(act
);
1896 for (; i
< limit
; i
++) {
1897 if (kdata
[i
] != data
[i
])
1900 if (isstr
&& data
[i
] == '\0')
1905 if (action
!= key
->dtak_action
) {
1907 * We are aggregating on the same value in the same
1908 * aggregation with two different aggregating actions.
1909 * (This should have been picked up in the compiler,
1910 * so we may be dealing with errant or devious DIF.)
1911 * This is an error condition; we indicate as much,
1914 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
1919 * This is a hit: we need to apply the aggregator to
1920 * the value at this key.
1922 agg
->dtag_aggregate((uint64_t *)(kdata
+ size
), expr
, arg
);
1929 * We didn't find it. We need to allocate some zero-filled space,
1930 * link it into the hash table appropriately, and apply the aggregator
1931 * to the (zero-filled) value.
1933 offs
= buf
->dtb_offset
;
1934 while (offs
& (align
- 1))
1935 offs
+= sizeof (uint32_t);
1938 * If we don't have enough room to both allocate a new key _and_
1939 * its associated data, increment the drop count and return.
1941 if ((uintptr_t)tomax
+ offs
+ fsize
>
1942 agb
->dtagb_free
- sizeof (dtrace_aggkey_t
)) {
1943 dtrace_buffer_drop(buf
);
1948 ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1)));
1949 key
= (dtrace_aggkey_t
*)(agb
->dtagb_free
- sizeof (dtrace_aggkey_t
));
1950 agb
->dtagb_free
-= sizeof (dtrace_aggkey_t
);
1952 key
->dtak_data
= kdata
= tomax
+ offs
;
1953 buf
->dtb_offset
= offs
+ fsize
;
1956 * Now copy the data across.
1958 *((dtrace_aggid_t
*)kdata
) = agg
->dtag_id
;
1960 for (i
= sizeof (dtrace_aggid_t
); i
< size
; i
++)
1964 * Because strings are not zeroed out by default, we need to iterate
1965 * looking for actions that store strings, and we need to explicitly
1966 * pad these strings out with zeroes.
1968 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
1971 if (!DTRACEACT_ISSTRING(act
))
1974 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
1975 limit
= i
+ act
->dta_rec
.dtrd_size
;
1976 ASSERT(limit
<= size
);
1978 for (nul
= 0; i
< limit
; i
++) {
1984 if (data
[i
] != '\0')
1991 for (i
= size
; i
< fsize
; i
++)
1994 key
->dtak_hashval
= hashval
;
1995 key
->dtak_size
= size
;
1996 key
->dtak_action
= action
;
1997 key
->dtak_next
= agb
->dtagb_hash
[ndx
];
1998 agb
->dtagb_hash
[ndx
] = key
;
2001 * Finally, apply the aggregator.
2003 *((uint64_t *)(key
->dtak_data
+ size
)) = agg
->dtag_initial
;
2004 agg
->dtag_aggregate((uint64_t *)(key
->dtak_data
+ size
), expr
, arg
);
2008 * Given consumer state, this routine finds a speculation in the INACTIVE
2009 * state and transitions it into the ACTIVE state. If there is no speculation
2010 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2011 * incremented -- it is up to the caller to take appropriate action.
2014 dtrace_speculation(dtrace_state_t
*state
)
2017 dtrace_speculation_state_t current
;
2018 uint32_t *stat
= &state
->dts_speculations_unavail
, count
;
2020 while (i
< state
->dts_nspeculations
) {
2021 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2023 current
= spec
->dtsp_state
;
2025 if (current
!= DTRACESPEC_INACTIVE
) {
2026 if (current
== DTRACESPEC_COMMITTINGMANY
||
2027 current
== DTRACESPEC_COMMITTING
||
2028 current
== DTRACESPEC_DISCARDING
)
2029 stat
= &state
->dts_speculations_busy
;
2034 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2035 current
, DTRACESPEC_ACTIVE
) == current
)
2040 * We couldn't find a speculation. If we found as much as a single
2041 * busy speculation buffer, we'll attribute this failure as "busy"
2042 * instead of "unavail".
2046 } while (dtrace_cas32(stat
, count
, count
+ 1) != count
);
2052 * This routine commits an active speculation. If the specified speculation
2053 * is not in a valid state to perform a commit(), this routine will silently do
2054 * nothing. The state of the specified speculation is transitioned according
2055 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2058 dtrace_speculation_commit(dtrace_state_t
*state
, processorid_t cpu
,
2059 dtrace_specid_t which
)
2061 dtrace_speculation_t
*spec
;
2062 dtrace_buffer_t
*src
, *dest
;
2063 uintptr_t daddr
, saddr
, dlimit
;
2064 dtrace_speculation_state_t current
, new;
2070 if (which
> state
->dts_nspeculations
) {
2071 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2075 spec
= &state
->dts_speculations
[which
- 1];
2076 src
= &spec
->dtsp_buffer
[cpu
];
2077 dest
= &state
->dts_buffer
[cpu
];
2080 current
= spec
->dtsp_state
;
2082 if (current
== DTRACESPEC_COMMITTINGMANY
)
2086 case DTRACESPEC_INACTIVE
:
2087 case DTRACESPEC_DISCARDING
:
2090 case DTRACESPEC_COMMITTING
:
2092 * This is only possible if we are (a) commit()'ing
2093 * without having done a prior speculate() on this CPU
2094 * and (b) racing with another commit() on a different
2095 * CPU. There's nothing to do -- we just assert that
2098 ASSERT(src
->dtb_offset
== 0);
2101 case DTRACESPEC_ACTIVE
:
2102 new = DTRACESPEC_COMMITTING
;
2105 case DTRACESPEC_ACTIVEONE
:
2107 * This speculation is active on one CPU. If our
2108 * buffer offset is non-zero, we know that the one CPU
2109 * must be us. Otherwise, we are committing on a
2110 * different CPU from the speculate(), and we must
2111 * rely on being asynchronously cleaned.
2113 if (src
->dtb_offset
!= 0) {
2114 new = DTRACESPEC_COMMITTING
;
2119 case DTRACESPEC_ACTIVEMANY
:
2120 new = DTRACESPEC_COMMITTINGMANY
;
2126 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2127 current
, new) != current
);
2130 * We have set the state to indicate that we are committing this
2131 * speculation. Now reserve the necessary space in the destination
2134 if ((offs
= dtrace_buffer_reserve(dest
, src
->dtb_offset
,
2135 sizeof (uint64_t), state
, NULL
)) < 0) {
2136 dtrace_buffer_drop(dest
);
2141 * We have the space; copy the buffer across. (Note that this is a
2142 * highly subobtimal bcopy(); in the unlikely event that this becomes
2143 * a serious performance issue, a high-performance DTrace-specific
2144 * bcopy() should obviously be invented.)
2146 daddr
= (uintptr_t)dest
->dtb_tomax
+ offs
;
2147 dlimit
= daddr
+ src
->dtb_offset
;
2148 saddr
= (uintptr_t)src
->dtb_tomax
;
2151 * First, the aligned portion.
2153 while (dlimit
- daddr
>= sizeof (uint64_t)) {
2154 *((uint64_t *)daddr
) = *((uint64_t *)saddr
);
2156 daddr
+= sizeof (uint64_t);
2157 saddr
+= sizeof (uint64_t);
2161 * Now any left-over bit...
2163 while (dlimit
- daddr
)
2164 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++);
2167 * Finally, commit the reserved space in the destination buffer.
2169 dest
->dtb_offset
= offs
+ src
->dtb_offset
;
2173 * If we're lucky enough to be the only active CPU on this speculation
2174 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2176 if (current
== DTRACESPEC_ACTIVE
||
2177 (current
== DTRACESPEC_ACTIVEONE
&& new == DTRACESPEC_COMMITTING
)) {
2178 uint32_t rval
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2179 DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
);
2181 ASSERT(rval
== DTRACESPEC_COMMITTING
);
2184 src
->dtb_offset
= 0;
2185 src
->dtb_xamot_drops
+= src
->dtb_drops
;
2190 * This routine discards an active speculation. If the specified speculation
2191 * is not in a valid state to perform a discard(), this routine will silently
2192 * do nothing. The state of the specified speculation is transitioned
2193 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2196 dtrace_speculation_discard(dtrace_state_t
*state
, processorid_t cpu
,
2197 dtrace_specid_t which
)
2199 dtrace_speculation_t
*spec
;
2200 dtrace_speculation_state_t current
, new;
2201 dtrace_buffer_t
*buf
;
2206 if (which
> state
->dts_nspeculations
) {
2207 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2211 spec
= &state
->dts_speculations
[which
- 1];
2212 buf
= &spec
->dtsp_buffer
[cpu
];
2215 current
= spec
->dtsp_state
;
2218 case DTRACESPEC_INACTIVE
:
2219 case DTRACESPEC_COMMITTINGMANY
:
2220 case DTRACESPEC_COMMITTING
:
2221 case DTRACESPEC_DISCARDING
:
2224 case DTRACESPEC_ACTIVE
:
2225 case DTRACESPEC_ACTIVEMANY
:
2226 new = DTRACESPEC_DISCARDING
;
2229 case DTRACESPEC_ACTIVEONE
:
2230 if (buf
->dtb_offset
!= 0) {
2231 new = DTRACESPEC_INACTIVE
;
2233 new = DTRACESPEC_DISCARDING
;
2240 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2241 current
, new) != current
);
2243 buf
->dtb_offset
= 0;
2248 * Note: not called from probe context. This function is called
2249 * asynchronously from cross call context to clean any speculations that are
2250 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2251 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2255 dtrace_speculation_clean_here(dtrace_state_t
*state
)
2257 dtrace_icookie_t cookie
;
2258 processorid_t cpu
= CPU
->cpu_id
;
2259 dtrace_buffer_t
*dest
= &state
->dts_buffer
[cpu
];
2262 cookie
= dtrace_interrupt_disable();
2264 if (dest
->dtb_tomax
== NULL
) {
2265 dtrace_interrupt_enable(cookie
);
2269 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2270 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2271 dtrace_buffer_t
*src
= &spec
->dtsp_buffer
[cpu
];
2273 if (src
->dtb_tomax
== NULL
)
2276 if (spec
->dtsp_state
== DTRACESPEC_DISCARDING
) {
2277 src
->dtb_offset
= 0;
2281 if (spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2284 if (src
->dtb_offset
== 0)
2287 dtrace_speculation_commit(state
, cpu
, i
+ 1);
2290 dtrace_interrupt_enable(cookie
);
2294 * Note: not called from probe context. This function is called
2295 * asynchronously (and at a regular interval) to clean any speculations that
2296 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2297 * is work to be done, it cross calls all CPUs to perform that work;
2298 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2299 * INACTIVE state until they have been cleaned by all CPUs.
2302 dtrace_speculation_clean(dtrace_state_t
*state
)
2307 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2308 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2310 ASSERT(!spec
->dtsp_cleaning
);
2312 if (spec
->dtsp_state
!= DTRACESPEC_DISCARDING
&&
2313 spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2317 spec
->dtsp_cleaning
= 1;
2323 dtrace_xcall(DTRACE_CPUALL
,
2324 (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
);
2327 * We now know that all CPUs have committed or discarded their
2328 * speculation buffers, as appropriate. We can now set the state
2331 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2332 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2333 dtrace_speculation_state_t current
, new;
2335 if (!spec
->dtsp_cleaning
)
2338 current
= spec
->dtsp_state
;
2339 ASSERT(current
== DTRACESPEC_DISCARDING
||
2340 current
== DTRACESPEC_COMMITTINGMANY
);
2342 new = DTRACESPEC_INACTIVE
;
2344 rv
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new);
2345 ASSERT(rv
== current
);
2346 spec
->dtsp_cleaning
= 0;
2351 * Called as part of a speculate() to get the speculative buffer associated
2352 * with a given speculation. Returns NULL if the specified speculation is not
2353 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2354 * the active CPU is not the specified CPU -- the speculation will be
2355 * atomically transitioned into the ACTIVEMANY state.
2357 static dtrace_buffer_t
*
2358 dtrace_speculation_buffer(dtrace_state_t
*state
, processorid_t cpuid
,
2359 dtrace_specid_t which
)
2361 dtrace_speculation_t
*spec
;
2362 dtrace_speculation_state_t current
, new;
2363 dtrace_buffer_t
*buf
;
2368 if (which
> state
->dts_nspeculations
) {
2369 cpu_core
[cpuid
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2373 spec
= &state
->dts_speculations
[which
- 1];
2374 buf
= &spec
->dtsp_buffer
[cpuid
];
2377 current
= spec
->dtsp_state
;
2380 case DTRACESPEC_INACTIVE
:
2381 case DTRACESPEC_COMMITTINGMANY
:
2382 case DTRACESPEC_DISCARDING
:
2385 case DTRACESPEC_COMMITTING
:
2386 ASSERT(buf
->dtb_offset
== 0);
2389 case DTRACESPEC_ACTIVEONE
:
2391 * This speculation is currently active on one CPU.
2392 * Check the offset in the buffer; if it's non-zero,
2393 * that CPU must be us (and we leave the state alone).
2394 * If it's zero, assume that we're starting on a new
2395 * CPU -- and change the state to indicate that the
2396 * speculation is active on more than one CPU.
2398 if (buf
->dtb_offset
!= 0)
2401 new = DTRACESPEC_ACTIVEMANY
;
2404 case DTRACESPEC_ACTIVEMANY
:
2407 case DTRACESPEC_ACTIVE
:
2408 new = DTRACESPEC_ACTIVEONE
;
2414 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2415 current
, new) != current
);
2417 ASSERT(new == DTRACESPEC_ACTIVEONE
|| new == DTRACESPEC_ACTIVEMANY
);
2422 * This function implements the DIF emulator's variable lookups. The emulator
2423 * passes a reserved variable identifier and optional built-in array index.
2426 dtrace_dif_variable(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
, uint64_t v
,
2430 * If we're accessing one of the uncached arguments, we'll turn this
2431 * into a reference in the args array.
2433 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
) {
2434 ndx
= v
- DIF_VAR_ARG0
;
2440 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_ARGS
);
2441 if (ndx
>= sizeof (mstate
->dtms_arg
) /
2442 sizeof (mstate
->dtms_arg
[0])) {
2443 #if !defined(__APPLE__)
2444 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2446 /* Account for introduction of __dtrace_probe() on xnu. */
2447 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2448 #endif /* __APPLE__ */
2449 dtrace_provider_t
*pv
;
2452 pv
= mstate
->dtms_probe
->dtpr_provider
;
2453 if (pv
->dtpv_pops
.dtps_getargval
!= NULL
)
2454 val
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
,
2455 mstate
->dtms_probe
->dtpr_id
,
2456 mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
);
2457 #if defined(__APPLE__)
2458 /* Special case access of arg5 as passed to dtrace_probeid_error (which see.) */
2459 else if (mstate
->dtms_probe
->dtpr_id
== dtrace_probeid_error
&& ndx
== 5) {
2460 return ((dtrace_state_t
*)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
;
2462 #endif /* __APPLE__ */
2464 val
= dtrace_getarg(ndx
, aframes
);
2467 * This is regrettably required to keep the compiler
2468 * from tail-optimizing the call to dtrace_getarg().
2469 * The condition always evaluates to true, but the
2470 * compiler has no way of figuring that out a priori.
2471 * (None of this would be necessary if the compiler
2472 * could be relied upon to _always_ tail-optimize
2473 * the call to dtrace_getarg() -- but it can't.)
2475 if (mstate
->dtms_probe
!= NULL
)
2481 return (mstate
->dtms_arg
[ndx
]);
2483 #if !defined(__APPLE__)
2484 case DIF_VAR_UREGS
: {
2487 if (!dtrace_priv_proc(state
))
2490 if ((lwp
= curthread
->t_lwp
) == NULL
) {
2491 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2492 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= NULL
;
2496 return (dtrace_getreg(lwp
->lwp_regs
, ndx
));
2499 case DIF_VAR_UREGS
: {
2502 if (!dtrace_priv_proc(state
))
2505 if ((thread
= current_thread()) == NULL
) {
2506 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
2507 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= 0;
2511 return (dtrace_getreg(find_user_regs(thread
), ndx
));
2513 #endif /* __APPLE__ */
2515 #if !defined(__APPLE__)
2516 case DIF_VAR_CURTHREAD
:
2517 if (!dtrace_priv_kernel(state
))
2519 return ((uint64_t)(uintptr_t)curthread
);
2521 case DIF_VAR_CURTHREAD
:
2522 if (!dtrace_priv_kernel(state
))
2525 return ((uint64_t)(uintptr_t)current_thread());
2526 #endif /* __APPLE__ */
2528 case DIF_VAR_TIMESTAMP
:
2529 if (!(mstate
->dtms_present
& DTRACE_MSTATE_TIMESTAMP
)) {
2530 mstate
->dtms_timestamp
= dtrace_gethrtime();
2531 mstate
->dtms_present
|= DTRACE_MSTATE_TIMESTAMP
;
2533 return (mstate
->dtms_timestamp
);
2535 #if !defined(__APPLE__)
2536 case DIF_VAR_VTIMESTAMP
:
2537 ASSERT(dtrace_vtime_references
!= 0);
2538 return (curthread
->t_dtrace_vtime
);
2540 case DIF_VAR_VTIMESTAMP
:
2541 ASSERT(dtrace_vtime_references
!= 0);
2542 return (dtrace_get_thread_vtime(current_thread()));
2543 #endif /* __APPLE__ */
2545 case DIF_VAR_WALLTIMESTAMP
:
2546 if (!(mstate
->dtms_present
& DTRACE_MSTATE_WALLTIMESTAMP
)) {
2547 mstate
->dtms_walltimestamp
= dtrace_gethrestime();
2548 mstate
->dtms_present
|= DTRACE_MSTATE_WALLTIMESTAMP
;
2550 return (mstate
->dtms_walltimestamp
);
2553 if (!dtrace_priv_kernel(state
))
2555 if (!(mstate
->dtms_present
& DTRACE_MSTATE_IPL
)) {
2556 mstate
->dtms_ipl
= dtrace_getipl();
2557 mstate
->dtms_present
|= DTRACE_MSTATE_IPL
;
2559 return (mstate
->dtms_ipl
);
2562 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_EPID
);
2563 return (mstate
->dtms_epid
);
2566 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2567 return (mstate
->dtms_probe
->dtpr_id
);
2569 case DIF_VAR_STACKDEPTH
:
2570 if (!dtrace_priv_kernel(state
))
2572 if (!(mstate
->dtms_present
& DTRACE_MSTATE_STACKDEPTH
)) {
2573 #if !defined(__APPLE__)
2574 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2576 /* Account for introduction of __dtrace_probe() on xnu. */
2577 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2578 #endif /* __APPLE__ */
2580 mstate
->dtms_stackdepth
= dtrace_getstackdepth(aframes
);
2581 mstate
->dtms_present
|= DTRACE_MSTATE_STACKDEPTH
;
2583 return (mstate
->dtms_stackdepth
);
2585 case DIF_VAR_USTACKDEPTH
:
2586 if (!dtrace_priv_proc(state
))
2588 if (!(mstate
->dtms_present
& DTRACE_MSTATE_USTACKDEPTH
)) {
2590 * See comment in DIF_VAR_PID.
2592 if (DTRACE_ANCHORED(mstate
->dtms_probe
) &&
2594 mstate
->dtms_ustackdepth
= 0;
2596 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
2597 mstate
->dtms_ustackdepth
=
2598 dtrace_getustackdepth();
2599 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
2601 mstate
->dtms_present
|= DTRACE_MSTATE_USTACKDEPTH
;
2603 return (mstate
->dtms_ustackdepth
);
2605 case DIF_VAR_CALLER
:
2606 if (!dtrace_priv_kernel(state
))
2608 if (!(mstate
->dtms_present
& DTRACE_MSTATE_CALLER
)) {
2609 #if !defined(__APPLE__)
2610 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2612 /* Account for introduction of __dtrace_probe() on xnu. */
2613 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2614 #endif /* __APPLE__ */
2616 if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) {
2618 * If this is an unanchored probe, we are
2619 * required to go through the slow path:
2620 * dtrace_caller() only guarantees correct
2621 * results for anchored probes.
2625 dtrace_getpcstack(caller
, 2, aframes
,
2626 (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]);
2627 mstate
->dtms_caller
= caller
[1];
2628 } else if ((mstate
->dtms_caller
=
2629 dtrace_caller(aframes
)) == -1) {
2631 * We have failed to do this the quick way;
2632 * we must resort to the slower approach of
2633 * calling dtrace_getpcstack().
2637 dtrace_getpcstack(&caller
, 1, aframes
, NULL
);
2638 mstate
->dtms_caller
= caller
;
2641 mstate
->dtms_present
|= DTRACE_MSTATE_CALLER
;
2643 return (mstate
->dtms_caller
);
2645 case DIF_VAR_UCALLER
:
2646 if (!dtrace_priv_proc(state
))
2649 if (!(mstate
->dtms_present
& DTRACE_MSTATE_UCALLER
)) {
2653 * dtrace_getupcstack() fills in the first uint64_t
2654 * with the current PID. The second uint64_t will
2655 * be the program counter at user-level. The third
2656 * uint64_t will contain the caller, which is what
2660 dtrace_getupcstack(ustack
, 3);
2661 mstate
->dtms_ucaller
= ustack
[2];
2662 mstate
->dtms_present
|= DTRACE_MSTATE_UCALLER
;
2665 return (mstate
->dtms_ucaller
);
2667 case DIF_VAR_PROBEPROV
:
2668 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2669 return ((uint64_t)(uintptr_t)
2670 mstate
->dtms_probe
->dtpr_provider
->dtpv_name
);
2672 case DIF_VAR_PROBEMOD
:
2673 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2674 return ((uint64_t)(uintptr_t)
2675 mstate
->dtms_probe
->dtpr_mod
);
2677 case DIF_VAR_PROBEFUNC
:
2678 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2679 return ((uint64_t)(uintptr_t)
2680 mstate
->dtms_probe
->dtpr_func
);
2682 case DIF_VAR_PROBENAME
:
2683 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
2684 return ((uint64_t)(uintptr_t)
2685 mstate
->dtms_probe
->dtpr_name
);
2687 #if !defined(__APPLE__)
2689 if (!dtrace_priv_proc(state
))
2693 * Note that we are assuming that an unanchored probe is
2694 * always due to a high-level interrupt. (And we're assuming
2695 * that there is only a single high level interrupt.)
2697 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2698 return (pid0
.pid_id
);
2701 * It is always safe to dereference one's own t_procp pointer:
2702 * it always points to a valid, allocated proc structure.
2703 * Further, it is always safe to dereference the p_pidp member
2704 * of one's own proc structure. (These are truisms becuase
2705 * threads and processes don't clean up their own state --
2706 * they leave that task to whomever reaps them.)
2708 return ((uint64_t)curthread
->t_procp
->p_pidp
->pid_id
);
2712 if (!dtrace_priv_proc(state
))
2716 * Note that we are assuming that an unanchored probe is
2717 * always due to a high-level interrupt. (And we're assuming
2718 * that there is only a single high level interrupt.)
2720 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2721 /* Anchored probe that fires while on an interrupt accrues to process 0 */
2724 return ((uint64_t)proc_selfpid());
2725 #endif /* __APPLE__ */
2727 #if !defined(__APPLE__)
2729 if (!dtrace_priv_proc(state
))
2733 * See comment in DIF_VAR_PID.
2735 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2736 return (pid0
.pid_id
);
2738 return ((uint64_t)curthread
->t_procp
->p_ppid
);
2741 if (!dtrace_priv_proc(state
))
2745 * See comment in DIF_VAR_PID.
2747 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2750 return ((uint64_t)(uintptr_t)(current_proc()->p_ppid
));
2751 #endif /* __APPLE__ */
2753 #if !defined(__APPLE__)
2756 * See comment in DIF_VAR_PID.
2758 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2761 return ((uint64_t)curthread
->t_tid
);
2765 * See comment in DIF_VAR_PID.
2767 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2770 return ((uint64_t)(uintptr_t)current_thread()); /* Is user's (pthread_t)t->kernel_thread */
2771 #endif /* __APPLE__ */
2773 #if !defined(__APPLE__)
2774 case DIF_VAR_EXECNAME
:
2775 if (!dtrace_priv_proc(state
))
2779 * See comment in DIF_VAR_PID.
2781 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2782 return ((uint64_t)(uintptr_t)p0
.p_user
.u_comm
);
2785 * It is always safe to dereference one's own t_procp pointer:
2786 * it always points to a valid, allocated proc structure.
2787 * (This is true because threads don't clean up their own
2788 * state -- they leave that task to whomever reaps them.)
2790 return ((uint64_t)(uintptr_t)
2791 curthread
->t_procp
->p_user
.u_comm
);
2793 case DIF_VAR_EXECNAME
:
2795 char *xname
= (char *)mstate
->dtms_scratch_ptr
;
2796 size_t scratch_size
= MAXCOMLEN
+1;
2798 /* The scratch allocation's lifetime is that of the clause. */
2799 if (mstate
->dtms_scratch_ptr
+ scratch_size
>
2800 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
)
2803 if (!dtrace_priv_proc(state
))
2806 mstate
->dtms_scratch_ptr
+= scratch_size
;
2807 proc_selfname( xname
, MAXCOMLEN
);
2809 return ((uint64_t)(uintptr_t)xname
);
2811 #endif /* __APPLE__ */
2812 #if !defined(__APPLE__)
2813 case DIF_VAR_ZONENAME
:
2814 if (!dtrace_priv_proc(state
))
2818 * See comment in DIF_VAR_PID.
2820 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2821 return ((uint64_t)(uintptr_t)p0
.p_zone
->zone_name
);
2824 * It is always safe to dereference one's own t_procp pointer:
2825 * it always points to a valid, allocated proc structure.
2826 * (This is true because threads don't clean up their own
2827 * state -- they leave that task to whomever reaps them.)
2829 return ((uint64_t)(uintptr_t)
2830 curthread
->t_procp
->p_zone
->zone_name
);
2833 case DIF_VAR_ZONENAME
:
2834 if (!dtrace_priv_proc(state
))
2837 return ((uint64_t)(uintptr_t)NULL
); /* Darwin doesn't do "zones" */
2838 #endif /* __APPLE__ */
2840 #if !defined(__APPLE__)
2842 if (!dtrace_priv_proc(state
))
2846 * See comment in DIF_VAR_PID.
2848 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2849 return ((uint64_t)p0
.p_cred
->cr_uid
);
2851 return ((uint64_t)curthread
->t_cred
->cr_uid
);
2854 if (!dtrace_priv_proc(state
))
2858 * See comment in DIF_VAR_PID.
2860 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2863 if (dtrace_CRED() != NULL
)
2864 return ((uint64_t)kauth_getuid());
2867 #endif /* __APPLE__ */
2869 #if !defined(__APPLE__)
2871 if (!dtrace_priv_proc(state
))
2875 * See comment in DIF_VAR_PID.
2877 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2878 return ((uint64_t)p0
.p_cred
->cr_gid
);
2880 return ((uint64_t)curthread
->t_cred
->cr_gid
);
2883 if (!dtrace_priv_proc(state
))
2887 * See comment in DIF_VAR_PID.
2889 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2892 if (dtrace_CRED() != NULL
)
2893 return ((uint64_t)kauth_getgid());
2896 #endif /* __APPLE__ */
2898 #if !defined(__APPLE__)
2899 case DIF_VAR_ERRNO
: {
2901 if (!dtrace_priv_proc(state
))
2905 * See comment in DIF_VAR_PID.
2907 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2910 if ((lwp
= curthread
->t_lwp
) == NULL
)
2913 return ((uint64_t)lwp
->lwp_errno
);
2916 case DIF_VAR_ERRNO
: {
2917 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
2918 if (!dtrace_priv_proc(state
))
2922 * See comment in DIF_VAR_PID.
2924 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
2927 return (uthread
? uthread
->t_dtrace_errno
: -1);
2929 #endif /* __APPLE__ */
2932 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
2938 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
2939 * Notice that we don't bother validating the proper number of arguments or
2940 * their types in the tuple stack. This isn't needed because all argument
2941 * interpretation is safe because of our load safety -- the worst that can
2942 * happen is that a bogus program can obtain bogus results.
2945 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
,
2946 dtrace_key_t
*tupregs
, int nargs
,
2947 dtrace_mstate_t
*mstate
, dtrace_state_t
*state
)
2949 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
2950 #if !defined(__APPLE__)
2951 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
2953 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
2954 #endif /* __APPLE__ */
2956 #if !defined(__APPLE__)
2967 /* XXX awaits lock/mutex work */
2968 #endif /* __APPLE__ */
2972 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
2975 #if !defined(__APPLE__)
2976 case DIF_SUBR_MUTEX_OWNED
:
2977 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
2978 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
))
2979 regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
;
2981 regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
);
2984 case DIF_SUBR_MUTEX_OWNER
:
2985 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
2986 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) &&
2987 MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
)
2988 regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
);
2993 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
:
2994 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
2995 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
);
2998 case DIF_SUBR_MUTEX_TYPE_SPIN
:
2999 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3000 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
);
3003 case DIF_SUBR_RW_READ_HELD
: {
3006 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3007 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
);
3011 case DIF_SUBR_RW_WRITE_HELD
:
3012 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3013 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
);
3016 case DIF_SUBR_RW_ISWRITER
:
3017 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3018 regs
[rd
] = _RW_ISWRITER(&r
.ri
);
3021 /* XXX awaits lock/mutex work */
3022 #endif /* __APPLE__ */
3024 case DIF_SUBR_BCOPY
: {
3026 * We need to be sure that the destination is in the scratch
3027 * region -- no other region is allowed.
3029 uintptr_t src
= tupregs
[0].dttk_value
;
3030 uintptr_t dest
= tupregs
[1].dttk_value
;
3031 size_t size
= tupregs
[2].dttk_value
;
3033 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3034 *flags
|= CPU_DTRACE_BADADDR
;
3039 dtrace_bcopy((void *)src
, (void *)dest
, size
);
3043 case DIF_SUBR_ALLOCA
:
3044 case DIF_SUBR_COPYIN
: {
3045 uintptr_t dest
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
3047 tupregs
[subr
== DIF_SUBR_ALLOCA
? 0 : 1].dttk_value
;
3048 size_t scratch_size
= (dest
- mstate
->dtms_scratch_ptr
) + size
;
3051 * This action doesn't require any credential checks since
3052 * probes will not activate in user contexts to which the
3053 * enabling user does not have permissions.
3055 if (mstate
->dtms_scratch_ptr
+ scratch_size
>
3056 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3057 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3062 if (subr
== DIF_SUBR_COPYIN
) {
3063 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3064 #if !defined(__APPLE__)
3065 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3067 if (dtrace_priv_proc(state
))
3068 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3069 #endif /* __APPLE__ */
3070 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3073 mstate
->dtms_scratch_ptr
+= scratch_size
;
3078 case DIF_SUBR_COPYINTO
: {
3079 uint64_t size
= tupregs
[1].dttk_value
;
3080 uintptr_t dest
= tupregs
[2].dttk_value
;
3083 * This action doesn't require any credential checks since
3084 * probes will not activate in user contexts to which the
3085 * enabling user does not have permissions.
3087 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3088 *flags
|= CPU_DTRACE_BADADDR
;
3093 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3094 #if !defined(__APPLE__)
3095 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3097 if (dtrace_priv_proc(state
))
3098 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
);
3099 #endif /* __APPLE__ */
3100 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3104 case DIF_SUBR_COPYINSTR
: {
3105 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3106 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3108 if (nargs
> 1 && tupregs
[1].dttk_value
< size
)
3109 size
= tupregs
[1].dttk_value
+ 1;
3112 * This action doesn't require any credential checks since
3113 * probes will not activate in user contexts to which the
3114 * enabling user does not have permissions.
3116 if (mstate
->dtms_scratch_ptr
+ size
>
3117 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3118 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3123 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3124 #if !defined(__APPLE__)
3125 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
);
3127 if (dtrace_priv_proc(state
))
3128 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
);
3129 #endif /* __APPLE__ */
3130 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3132 ((char *)dest
)[size
- 1] = '\0';
3133 mstate
->dtms_scratch_ptr
+= size
;
3138 #if !defined(__APPLE__)
3139 case DIF_SUBR_MSGSIZE
:
3140 case DIF_SUBR_MSGDSIZE
: {
3141 uintptr_t baddr
= tupregs
[0].dttk_value
, daddr
;
3142 uintptr_t wptr
, rptr
;
3146 while (baddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3147 wptr
= dtrace_loadptr(baddr
+
3148 offsetof(mblk_t
, b_wptr
));
3150 rptr
= dtrace_loadptr(baddr
+
3151 offsetof(mblk_t
, b_rptr
));
3154 *flags
|= CPU_DTRACE_BADADDR
;
3155 *illval
= tupregs
[0].dttk_value
;
3159 daddr
= dtrace_loadptr(baddr
+
3160 offsetof(mblk_t
, b_datap
));
3162 baddr
= dtrace_loadptr(baddr
+
3163 offsetof(mblk_t
, b_cont
));
3166 * We want to prevent against denial-of-service here,
3167 * so we're only going to search the list for
3168 * dtrace_msgdsize_max mblks.
3170 if (cont
++ > dtrace_msgdsize_max
) {
3171 *flags
|= CPU_DTRACE_ILLOP
;
3175 if (subr
== DIF_SUBR_MSGDSIZE
) {
3176 if (dtrace_load8(daddr
+
3177 offsetof(dblk_t
, db_type
)) != M_DATA
)
3181 count
+= wptr
- rptr
;
3184 if (!(*flags
& CPU_DTRACE_FAULT
))
3190 case DIF_SUBR_MSGSIZE
:
3191 case DIF_SUBR_MSGDSIZE
: {
3192 /* Darwin does not implement SysV streams messages */
3196 #endif /* __APPLE__ */
3198 #if !defined(__APPLE__)
3199 case DIF_SUBR_PROGENYOF
: {
3200 pid_t pid
= tupregs
[0].dttk_value
;
3204 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3206 for (p
= curthread
->t_procp
; p
!= NULL
; p
= p
->p_parent
) {
3207 if (p
->p_pidp
->pid_id
== pid
) {
3213 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3219 case DIF_SUBR_PROGENYOF
: {
3220 pid_t pid
= tupregs
[0].dttk_value
;
3221 struct proc
*p
= current_proc();
3222 int rval
= 0, lim
= nprocs
;
3224 while(p
&& (lim
-- > 0)) {
3227 ppid
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
));
3228 if (*flags
& CPU_DTRACE_FAULT
)
3237 break; /* Can't climb process tree any further. */
3239 p
= (struct proc
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
));
3240 if (*flags
& CPU_DTRACE_FAULT
)
3247 #endif /* __APPLE__ */
3249 case DIF_SUBR_SPECULATION
:
3250 regs
[rd
] = dtrace_speculation(state
);
3253 #if !defined(__APPLE__)
3254 case DIF_SUBR_COPYOUT
: {
3255 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3256 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3257 uint64_t size
= tupregs
[2].dttk_value
;
3259 if (!dtrace_destructive_disallow
&&
3260 dtrace_priv_proc_control(state
) &&
3261 !dtrace_istoxic(kaddr
, size
)) {
3262 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3263 dtrace_copyout(kaddr
, uaddr
, size
);
3264 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3269 case DIF_SUBR_COPYOUTSTR
: {
3270 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3271 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3272 uint64_t size
= tupregs
[2].dttk_value
;
3274 if (!dtrace_destructive_disallow
&&
3275 dtrace_priv_proc_control(state
) &&
3276 !dtrace_istoxic(kaddr
, size
)) {
3277 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3278 dtrace_copyoutstr(kaddr
, uaddr
, size
);
3279 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3284 case DIF_SUBR_COPYOUT
: {
3285 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3286 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3287 uint64_t size
= tupregs
[2].dttk_value
;
3289 if (!dtrace_destructive_disallow
&&
3290 dtrace_priv_proc_control(state
) &&
3291 !dtrace_istoxic(kaddr
, size
)) {
3292 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3293 dtrace_copyout(kaddr
, uaddr
, size
);
3294 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3299 case DIF_SUBR_COPYOUTSTR
: {
3300 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3301 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3302 uint64_t size
= tupregs
[2].dttk_value
;
3304 if (!dtrace_destructive_disallow
&&
3305 dtrace_priv_proc_control(state
) &&
3306 !dtrace_istoxic(kaddr
, size
)) {
3307 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3308 dtrace_copyoutstr(kaddr
, uaddr
, size
);
3309 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3313 #endif /* __APPLE__ */
3315 case DIF_SUBR_STRLEN
:
3316 regs
[rd
] = dtrace_strlen((char *)(uintptr_t)
3317 tupregs
[0].dttk_value
,
3318 state
->dts_options
[DTRACEOPT_STRSIZE
]);
3321 case DIF_SUBR_STRCHR
:
3322 case DIF_SUBR_STRRCHR
: {
3324 * We're going to iterate over the string looking for the
3325 * specified character. We will iterate until we have reached
3326 * the string length or we have found the character. If this
3327 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3328 * of the specified character instead of the first.
3330 uintptr_t addr
= tupregs
[0].dttk_value
;
3331 uintptr_t limit
= addr
+ state
->dts_options
[DTRACEOPT_STRSIZE
];
3332 char c
, target
= (char)tupregs
[1].dttk_value
;
3334 for (regs
[rd
] = NULL
; addr
< limit
; addr
++) {
3335 if ((c
= dtrace_load8(addr
)) == target
) {
3338 if (subr
== DIF_SUBR_STRCHR
)
3349 case DIF_SUBR_STRSTR
:
3350 case DIF_SUBR_INDEX
:
3351 case DIF_SUBR_RINDEX
: {
3353 * We're going to iterate over the string looking for the
3354 * specified string. We will iterate until we have reached
3355 * the string length or we have found the string. (Yes, this
3356 * is done in the most naive way possible -- but considering
3357 * that the string we're searching for is likely to be
3358 * relatively short, the complexity of Rabin-Karp or similar
3359 * hardly seems merited.)
3361 char *addr
= (char *)(uintptr_t)tupregs
[0].dttk_value
;
3362 char *substr
= (char *)(uintptr_t)tupregs
[1].dttk_value
;
3363 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3364 size_t len
= dtrace_strlen(addr
, size
);
3365 size_t sublen
= dtrace_strlen(substr
, size
);
3366 char *limit
= addr
+ len
, *orig
= addr
;
3367 int notfound
= subr
== DIF_SUBR_STRSTR
? 0 : -1;
3370 regs
[rd
] = notfound
;
3373 * strstr() and index()/rindex() have similar semantics if
3374 * both strings are the empty string: strstr() returns a
3375 * pointer to the (empty) string, and index() and rindex()
3376 * both return index 0 (regardless of any position argument).
3378 if (sublen
== 0 && len
== 0) {
3379 if (subr
== DIF_SUBR_STRSTR
)
3380 regs
[rd
] = (uintptr_t)addr
;
3386 if (subr
!= DIF_SUBR_STRSTR
) {
3387 if (subr
== DIF_SUBR_RINDEX
) {
3394 * Both index() and rindex() take an optional position
3395 * argument that denotes the starting position.
3398 int64_t pos
= (int64_t)tupregs
[2].dttk_value
;
3401 * If the position argument to index() is
3402 * negative, Perl implicitly clamps it at
3403 * zero. This semantic is a little surprising
3404 * given the special meaning of negative
3405 * positions to similar Perl functions like
3406 * substr(), but it appears to reflect a
3407 * notion that index() can start from a
3408 * negative index and increment its way up to
3409 * the string. Given this notion, Perl's
3410 * rindex() is at least self-consistent in
3411 * that it implicitly clamps positions greater
3412 * than the string length to be the string
3413 * length. Where Perl completely loses
3414 * coherence, however, is when the specified
3415 * substring is the empty string (""). In
3416 * this case, even if the position is
3417 * negative, rindex() returns 0 -- and even if
3418 * the position is greater than the length,
3419 * index() returns the string length. These
3420 * semantics violate the notion that index()
3421 * should never return a value less than the
3422 * specified position and that rindex() should
3423 * never return a value greater than the
3424 * specified position. (One assumes that
3425 * these semantics are artifacts of Perl's
3426 * implementation and not the results of
3427 * deliberate design -- it beggars belief that
3428 * even Larry Wall could desire such oddness.)
3429 * While in the abstract one would wish for
3430 * consistent position semantics across
3431 * substr(), index() and rindex() -- or at the
3432 * very least self-consistent position
3433 * semantics for index() and rindex() -- we
3434 * instead opt to keep with the extant Perl
3435 * semantics, in all their broken glory. (Do
3436 * we have more desire to maintain Perl's
3437 * semantics than Perl does? Probably.)
3439 if (subr
== DIF_SUBR_RINDEX
) {
3463 for (regs
[rd
] = notfound
; addr
!= limit
; addr
+= inc
) {
3464 if (dtrace_strncmp(addr
, substr
, sublen
) == 0) {
3465 if (subr
!= DIF_SUBR_STRSTR
) {
3467 * As D index() and rindex() are
3468 * modeled on Perl (and not on awk),
3469 * we return a zero-based (and not a
3470 * one-based) index. (For you Perl
3471 * weenies: no, we're not going to add
3472 * $[ -- and shouldn't you be at a con
3475 regs
[rd
] = (uintptr_t)(addr
- orig
);
3479 ASSERT(subr
== DIF_SUBR_STRSTR
);
3480 regs
[rd
] = (uintptr_t)addr
;
3488 case DIF_SUBR_STRTOK
: {
3489 uintptr_t addr
= tupregs
[0].dttk_value
;
3490 uintptr_t tokaddr
= tupregs
[1].dttk_value
;
3491 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3492 uintptr_t limit
, toklimit
= tokaddr
+ size
;
3493 uint8_t c
, tokmap
[32]; /* 256 / 8 */
3494 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3497 if (mstate
->dtms_scratch_ptr
+ size
>
3498 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3499 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3506 * If the address specified is NULL, we use our saved
3507 * strtok pointer from the mstate. Note that this
3508 * means that the saved strtok pointer is _only_
3509 * valid within multiple enablings of the same probe --
3510 * it behaves like an implicit clause-local variable.
3512 addr
= mstate
->dtms_strtok
;
3516 * First, zero the token map, and then process the token
3517 * string -- setting a bit in the map for every character
3518 * found in the token string.
3520 for (i
= 0; i
< sizeof (tokmap
); i
++)
3523 for (; tokaddr
< toklimit
; tokaddr
++) {
3524 if ((c
= dtrace_load8(tokaddr
)) == '\0')
3527 ASSERT((c
>> 3) < sizeof (tokmap
));
3528 tokmap
[c
>> 3] |= (1 << (c
& 0x7));
3531 for (limit
= addr
+ size
; addr
< limit
; addr
++) {
3533 * We're looking for a character that is _not_ contained
3534 * in the token string.
3536 if ((c
= dtrace_load8(addr
)) == '\0')
3539 if (!(tokmap
[c
>> 3] & (1 << (c
& 0x7))))
3545 * We reached the end of the string without finding
3546 * any character that was not in the token string.
3547 * We return NULL in this case, and we set the saved
3548 * address to NULL as well.
3551 mstate
->dtms_strtok
= NULL
;
3556 * From here on, we're copying into the destination string.
3558 for (i
= 0; addr
< limit
&& i
< size
- 1; addr
++) {
3559 if ((c
= dtrace_load8(addr
)) == '\0')
3562 if (tokmap
[c
>> 3] & (1 << (c
& 0x7)))
3571 regs
[rd
] = (uintptr_t)dest
;
3572 mstate
->dtms_scratch_ptr
+= size
;
3573 mstate
->dtms_strtok
= addr
;
3577 case DIF_SUBR_SUBSTR
: {
3578 uintptr_t s
= tupregs
[0].dttk_value
;
3579 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3580 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3581 int64_t index
= (int64_t)tupregs
[1].dttk_value
;
3582 int64_t remaining
= (int64_t)tupregs
[2].dttk_value
;
3583 size_t len
= dtrace_strlen((char *)s
, size
);
3587 remaining
= (int64_t)size
;
3589 if (mstate
->dtms_scratch_ptr
+ size
>
3590 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3591 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3599 if (index
< 0 && index
+ remaining
> 0) {
3605 if (index
>= len
|| index
< 0)
3608 for (d
[0] = '\0'; remaining
> 0; remaining
--) {
3609 if ((d
[i
++] = dtrace_load8(s
++ + index
)) == '\0')
3618 mstate
->dtms_scratch_ptr
+= size
;
3619 regs
[rd
] = (uintptr_t)d
;
3623 #if !defined(__APPLE__)
3624 case DIF_SUBR_GETMAJOR
:
3626 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR64
) & MAXMAJ64
;
3628 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR
) & MAXMAJ
;
3632 #else /* __APPLE__ */
3633 case DIF_SUBR_GETMAJOR
:
3634 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value
);
3636 #endif /* __APPLE__ */
3638 #if !defined(__APPLE__)
3639 case DIF_SUBR_GETMINOR
:
3641 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN64
;
3643 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN
;
3647 #else /* __APPLE__ */
3648 case DIF_SUBR_GETMINOR
:
3649 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value
);
3651 #endif /* __APPLE__ */
3653 #if !defined(__APPLE__)
3654 case DIF_SUBR_DDI_PATHNAME
: {
3656 * This one is a galactic mess. We are going to roughly
3657 * emulate ddi_pathname(), but it's made more complicated
3658 * by the fact that we (a) want to include the minor name and
3659 * (b) must proceed iteratively instead of recursively.
3661 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3662 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3663 char *start
= (char *)dest
, *end
= start
+ size
- 1;
3664 uintptr_t daddr
= tupregs
[0].dttk_value
;
3665 int64_t minor
= (int64_t)tupregs
[1].dttk_value
;
3667 int i
, len
, depth
= 0;
3669 if (size
== 0 || mstate
->dtms_scratch_ptr
+ size
>
3670 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3671 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3679 * We want to have a name for the minor. In order to do this,
3680 * we need to walk the minor list from the devinfo. We want
3681 * to be sure that we don't infinitely walk a circular list,
3682 * so we check for circularity by sending a scout pointer
3683 * ahead two elements for every element that we iterate over;
3684 * if the list is circular, these will ultimately point to the
3685 * same element. You may recognize this little trick as the
3686 * answer to a stupid interview question -- one that always
3687 * seems to be asked by those who had to have it laboriously
3688 * explained to them, and who can't even concisely describe
3689 * the conditions under which one would be forced to resort to
3690 * this technique. Needless to say, those conditions are
3691 * found here -- and probably only here. Is this is the only
3692 * use of this infamous trick in shipping, production code?
3693 * If it isn't, it probably should be...
3696 uintptr_t maddr
= dtrace_loadptr(daddr
+
3697 offsetof(struct dev_info
, devi_minor
));
3699 uintptr_t next
= offsetof(struct ddi_minor_data
, next
);
3700 uintptr_t name
= offsetof(struct ddi_minor_data
,
3701 d_minor
) + offsetof(struct ddi_minor
, name
);
3702 uintptr_t dev
= offsetof(struct ddi_minor_data
,
3703 d_minor
) + offsetof(struct ddi_minor
, dev
);
3707 scout
= dtrace_loadptr(maddr
+ next
);
3709 while (maddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3712 m
= dtrace_load64(maddr
+ dev
) & MAXMIN64
;
3714 m
= dtrace_load32(maddr
+ dev
) & MAXMIN
;
3717 maddr
= dtrace_loadptr(maddr
+ next
);
3722 scout
= dtrace_loadptr(scout
+ next
);
3727 scout
= dtrace_loadptr(scout
+ next
);
3732 if (scout
== maddr
) {
3733 *flags
|= CPU_DTRACE_ILLOP
;
3741 * We have the minor data. Now we need to
3742 * copy the minor's name into the end of the
3745 s
= (char *)dtrace_loadptr(maddr
+ name
);
3746 len
= dtrace_strlen(s
, size
);
3748 if (*flags
& CPU_DTRACE_FAULT
)
3752 if ((end
-= (len
+ 1)) < start
)
3758 for (i
= 1; i
<= len
; i
++)
3759 end
[i
] = dtrace_load8((uintptr_t)s
++);
3764 while (daddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3765 ddi_node_state_t devi_state
;
3767 devi_state
= dtrace_load32(daddr
+
3768 offsetof(struct dev_info
, devi_node_state
));
3770 if (*flags
& CPU_DTRACE_FAULT
)
3773 if (devi_state
>= DS_INITIALIZED
) {
3774 s
= (char *)dtrace_loadptr(daddr
+
3775 offsetof(struct dev_info
, devi_addr
));
3776 len
= dtrace_strlen(s
, size
);
3778 if (*flags
& CPU_DTRACE_FAULT
)
3782 if ((end
-= (len
+ 1)) < start
)
3788 for (i
= 1; i
<= len
; i
++)
3789 end
[i
] = dtrace_load8((uintptr_t)s
++);
3793 * Now for the node name...
3795 s
= (char *)dtrace_loadptr(daddr
+
3796 offsetof(struct dev_info
, devi_node_name
));
3798 daddr
= dtrace_loadptr(daddr
+
3799 offsetof(struct dev_info
, devi_parent
));
3802 * If our parent is NULL (that is, if we're the root
3803 * node), we're going to use the special path
3809 len
= dtrace_strlen(s
, size
);
3810 if (*flags
& CPU_DTRACE_FAULT
)
3813 if ((end
-= (len
+ 1)) < start
)
3816 for (i
= 1; i
<= len
; i
++)
3817 end
[i
] = dtrace_load8((uintptr_t)s
++);
3820 if (depth
++ > dtrace_devdepth_max
) {
3821 *flags
|= CPU_DTRACE_ILLOP
;
3827 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3829 if (daddr
== NULL
) {
3830 regs
[rd
] = (uintptr_t)end
;
3831 mstate
->dtms_scratch_ptr
+= size
;
3837 case DIF_SUBR_DDI_PATHNAME
: {
3838 /* XXX awaits galactic disentanglement ;-} */
3842 #endif /* __APPLE__ */
3844 case DIF_SUBR_STRJOIN
: {
3845 char *d
= (char *)mstate
->dtms_scratch_ptr
;
3846 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3847 uintptr_t s1
= tupregs
[0].dttk_value
;
3848 uintptr_t s2
= tupregs
[1].dttk_value
;
3851 if (mstate
->dtms_scratch_ptr
+ size
>
3852 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3853 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3860 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3865 if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') {
3873 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3878 if ((d
[i
++] = dtrace_load8(s2
++)) == '\0')
3883 mstate
->dtms_scratch_ptr
+= i
;
3884 regs
[rd
] = (uintptr_t)d
;
3890 case DIF_SUBR_LLTOSTR
: {
3891 int64_t i
= (int64_t)tupregs
[0].dttk_value
;
3892 int64_t val
= i
< 0 ? i
* -1 : i
;
3893 uint64_t size
= 22; /* enough room for 2^64 in decimal */
3894 char *end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
3896 if (mstate
->dtms_scratch_ptr
+ size
>
3897 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3898 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3903 for (*end
-- = '\0'; val
; val
/= 10)
3904 *end
-- = '0' + (val
% 10);
3912 regs
[rd
] = (uintptr_t)end
+ 1;
3913 mstate
->dtms_scratch_ptr
+= size
;
3917 case DIF_SUBR_DIRNAME
:
3918 case DIF_SUBR_BASENAME
: {
3919 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3920 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3921 uintptr_t src
= tupregs
[0].dttk_value
;
3922 int i
, j
, len
= dtrace_strlen((char *)src
, size
);
3923 int lastbase
= -1, firstbase
= -1, lastdir
= -1;
3926 if (mstate
->dtms_scratch_ptr
+ size
>
3927 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
3928 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3934 * The basename and dirname for a zero-length string is
3939 src
= (uintptr_t)".";
3943 * Start from the back of the string, moving back toward the
3944 * front until we see a character that isn't a slash. That
3945 * character is the last character in the basename.
3947 for (i
= len
- 1; i
>= 0; i
--) {
3948 if (dtrace_load8(src
+ i
) != '/')
3956 * Starting from the last character in the basename, move
3957 * towards the front until we find a slash. The character
3958 * that we processed immediately before that is the first
3959 * character in the basename.
3961 for (; i
>= 0; i
--) {
3962 if (dtrace_load8(src
+ i
) == '/')
3970 * Now keep going until we find a non-slash character. That
3971 * character is the last character in the dirname.
3973 for (; i
>= 0; i
--) {
3974 if (dtrace_load8(src
+ i
) != '/')
3981 ASSERT(!(lastbase
== -1 && firstbase
!= -1));
3982 ASSERT(!(firstbase
== -1 && lastdir
!= -1));
3984 if (lastbase
== -1) {
3986 * We didn't find a non-slash character. We know that
3987 * the length is non-zero, so the whole string must be
3988 * slashes. In either the dirname or the basename
3989 * case, we return '/'.
3991 ASSERT(firstbase
== -1);
3992 firstbase
= lastbase
= lastdir
= 0;
3995 if (firstbase
== -1) {
3997 * The entire string consists only of a basename
3998 * component. If we're looking for dirname, we need
3999 * to change our string to be just "."; if we're
4000 * looking for a basename, we'll just set the first
4001 * character of the basename to be 0.
4003 if (subr
== DIF_SUBR_DIRNAME
) {
4004 ASSERT(lastdir
== -1);
4005 src
= (uintptr_t)".";
4012 if (subr
== DIF_SUBR_DIRNAME
) {
4013 if (lastdir
== -1) {
4015 * We know that we have a slash in the name --
4016 * or lastdir would be set to 0, above. And
4017 * because lastdir is -1, we know that this
4018 * slash must be the first character. (That
4019 * is, the full string must be of the form
4020 * "/basename".) In this case, the last
4021 * character of the directory name is 0.
4029 ASSERT(subr
== DIF_SUBR_BASENAME
);
4030 ASSERT(firstbase
!= -1 && lastbase
!= -1);
4035 for (i
= start
, j
= 0; i
<= end
&& j
< size
- 1; i
++, j
++)
4036 dest
[j
] = dtrace_load8(src
+ i
);
4039 regs
[rd
] = (uintptr_t)dest
;
4040 mstate
->dtms_scratch_ptr
+= size
;
4044 case DIF_SUBR_CLEANPATH
: {
4045 char *dest
= (char *)mstate
->dtms_scratch_ptr
, c
;
4046 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4047 uintptr_t src
= tupregs
[0].dttk_value
;
4050 if (mstate
->dtms_scratch_ptr
+ size
>
4051 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
4052 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4058 * Move forward, loading each character.
4061 c
= dtrace_load8(src
+ i
++);
4063 if (j
+ 5 >= size
) /* 5 = strlen("/..c\0") */
4071 c
= dtrace_load8(src
+ i
++);
4075 * We have two slashes -- we can just advance
4076 * to the next character.
4083 * This is not "." and it's not ".." -- we can
4084 * just store the "/" and this character and
4092 c
= dtrace_load8(src
+ i
++);
4096 * This is a "/./" component. We're not going
4097 * to store anything in the destination buffer;
4098 * we're just going to go to the next component.
4105 * This is not ".." -- we can just store the
4106 * "/." and this character and continue
4115 c
= dtrace_load8(src
+ i
++);
4117 if (c
!= '/' && c
!= '\0') {
4119 * This is not ".." -- it's "..[mumble]".
4120 * We'll store the "/.." and this character
4121 * and continue processing.
4131 * This is "/../" or "/..\0". We need to back up
4132 * our destination pointer until we find a "/".
4135 while (j
!= 0 && dest
[--j
] != '/')
4140 } while (c
!= '\0');
4143 regs
[rd
] = (uintptr_t)dest
;
4144 mstate
->dtms_scratch_ptr
+= size
;
4149 /* CHUD callback ('chud(uint64_t, [uint64_t], [uint64_t] ...)') */
4150 case DIF_SUBR_CHUD
: {
4151 uint64_t selector
= tupregs
[0].dttk_value
;
4152 uint64_t args
[DIF_DTR_NREGS
-1] = {0ULL};
4155 /* copy in any variadic argument list */
4156 for(ii
= 0; ii
< DIF_DTR_NREGS
-1; ii
++) {
4157 args
[ii
] = tupregs
[ii
+1].dttk_value
;
4161 chudxnu_dtrace_callback(selector
, args
, DIF_DTR_NREGS
-1);
4162 if(KERN_SUCCESS
!= ret
) {
4168 #endif /* __APPLE__ */
4174 * Emulate the execution of DTrace IR instructions specified by the given
4175 * DIF object. This function is deliberately void of assertions as all of
4176 * the necessary checks are handled by a call to dtrace_difo_validate().
4179 dtrace_dif_emulate(dtrace_difo_t
*difo
, dtrace_mstate_t
*mstate
,
4180 dtrace_vstate_t
*vstate
, dtrace_state_t
*state
)
4182 const dif_instr_t
*text
= difo
->dtdo_buf
;
4183 const uint_t textlen
= difo
->dtdo_len
;
4184 const char *strtab
= difo
->dtdo_strtab
;
4185 const uint64_t *inttab
= difo
->dtdo_inttab
;
4188 dtrace_statvar_t
*svar
;
4189 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
4191 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
4192 #if !defined(__APPLE__)
4193 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4195 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4196 #endif /* __APPLE__ */
4198 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
4199 uint64_t regs
[DIF_DIR_NREGS
];
4202 uint8_t cc_n
= 0, cc_z
= 0, cc_v
= 0, cc_c
= 0;
4204 uint_t pc
= 0, id
, opc
;
4209 regs
[DIF_REG_R0
] = 0; /* %r0 is fixed at zero */
4211 while (pc
< textlen
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4215 r1
= DIF_INSTR_R1(instr
);
4216 r2
= DIF_INSTR_R2(instr
);
4217 rd
= DIF_INSTR_RD(instr
);
4219 switch (DIF_INSTR_OP(instr
)) {
4221 regs
[rd
] = regs
[r1
] | regs
[r2
];
4224 regs
[rd
] = regs
[r1
] ^ regs
[r2
];
4227 regs
[rd
] = regs
[r1
] & regs
[r2
];
4230 regs
[rd
] = regs
[r1
] << regs
[r2
];
4233 regs
[rd
] = regs
[r1
] >> regs
[r2
];
4236 regs
[rd
] = regs
[r1
] - regs
[r2
];
4239 regs
[rd
] = regs
[r1
] + regs
[r2
];
4242 regs
[rd
] = regs
[r1
] * regs
[r2
];
4245 if (regs
[r2
] == 0) {
4247 *flags
|= CPU_DTRACE_DIVZERO
;
4249 regs
[rd
] = (int64_t)regs
[r1
] /
4255 if (regs
[r2
] == 0) {
4257 *flags
|= CPU_DTRACE_DIVZERO
;
4259 regs
[rd
] = regs
[r1
] / regs
[r2
];
4264 if (regs
[r2
] == 0) {
4266 *flags
|= CPU_DTRACE_DIVZERO
;
4268 regs
[rd
] = (int64_t)regs
[r1
] %
4274 if (regs
[r2
] == 0) {
4276 *flags
|= CPU_DTRACE_DIVZERO
;
4278 regs
[rd
] = regs
[r1
] % regs
[r2
];
4283 regs
[rd
] = ~regs
[r1
];
4286 regs
[rd
] = regs
[r1
];
4289 cc_r
= regs
[r1
] - regs
[r2
];
4293 cc_c
= regs
[r1
] < regs
[r2
];
4296 cc_n
= cc_v
= cc_c
= 0;
4297 cc_z
= regs
[r1
] == 0;
4300 pc
= DIF_INSTR_LABEL(instr
);
4304 pc
= DIF_INSTR_LABEL(instr
);
4308 pc
= DIF_INSTR_LABEL(instr
);
4311 if ((cc_z
| (cc_n
^ cc_v
)) == 0)
4312 pc
= DIF_INSTR_LABEL(instr
);
4315 if ((cc_c
| cc_z
) == 0)
4316 pc
= DIF_INSTR_LABEL(instr
);
4319 if ((cc_n
^ cc_v
) == 0)
4320 pc
= DIF_INSTR_LABEL(instr
);
4324 pc
= DIF_INSTR_LABEL(instr
);
4328 pc
= DIF_INSTR_LABEL(instr
);
4332 pc
= DIF_INSTR_LABEL(instr
);
4335 if (cc_z
| (cc_n
^ cc_v
))
4336 pc
= DIF_INSTR_LABEL(instr
);
4340 pc
= DIF_INSTR_LABEL(instr
);
4343 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4344 *flags
|= CPU_DTRACE_KPRIV
;
4350 regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]);
4353 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4354 *flags
|= CPU_DTRACE_KPRIV
;
4360 regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]);
4363 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4364 *flags
|= CPU_DTRACE_KPRIV
;
4370 regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]);
4373 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4374 *flags
|= CPU_DTRACE_KPRIV
;
4380 regs
[rd
] = dtrace_load8(regs
[r1
]);
4383 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4384 *flags
|= CPU_DTRACE_KPRIV
;
4390 regs
[rd
] = dtrace_load16(regs
[r1
]);
4393 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4394 *flags
|= CPU_DTRACE_KPRIV
;
4400 regs
[rd
] = dtrace_load32(regs
[r1
]);
4403 if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) {
4404 *flags
|= CPU_DTRACE_KPRIV
;
4410 regs
[rd
] = dtrace_load64(regs
[r1
]);
4414 dtrace_fuword8(regs
[r1
]);
4417 regs
[rd
] = (int16_t)
4418 dtrace_fuword16(regs
[r1
]);
4421 regs
[rd
] = (int32_t)
4422 dtrace_fuword32(regs
[r1
]);
4426 dtrace_fuword8(regs
[r1
]);
4430 dtrace_fuword16(regs
[r1
]);
4434 dtrace_fuword32(regs
[r1
]);
4438 dtrace_fuword64(regs
[r1
]);
4446 regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)];
4449 regs
[rd
] = (uint64_t)(uintptr_t)
4450 (strtab
+ DIF_INSTR_STRING(instr
));
4453 cc_r
= dtrace_strncmp((char *)(uintptr_t)regs
[r1
],
4454 (char *)(uintptr_t)regs
[r2
],
4455 state
->dts_options
[DTRACEOPT_STRSIZE
]);
4462 regs
[rd
] = dtrace_dif_variable(mstate
, state
,
4466 id
= DIF_INSTR_VAR(instr
);
4468 if (id
>= DIF_VAR_OTHER_UBASE
) {
4471 id
-= DIF_VAR_OTHER_UBASE
;
4472 svar
= vstate
->dtvs_globals
[id
];
4473 ASSERT(svar
!= NULL
);
4474 v
= &svar
->dtsv_var
;
4476 if (!(v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)) {
4477 regs
[rd
] = svar
->dtsv_data
;
4481 a
= (uintptr_t)svar
->dtsv_data
;
4483 if (*(uint8_t *)a
== UINT8_MAX
) {
4485 * If the 0th byte is set to UINT8_MAX
4486 * then this is to be treated as a
4487 * reference to a NULL variable.
4491 regs
[rd
] = a
+ sizeof (uint64_t);
4497 regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0);
4501 id
= DIF_INSTR_VAR(instr
);
4503 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4504 id
-= DIF_VAR_OTHER_UBASE
;
4506 svar
= vstate
->dtvs_globals
[id
];
4507 ASSERT(svar
!= NULL
);
4508 v
= &svar
->dtsv_var
;
4510 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4511 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4514 ASSERT(svar
->dtsv_size
!= 0);
4516 if (regs
[rd
] == NULL
) {
4517 *(uint8_t *)a
= UINT8_MAX
;
4521 a
+= sizeof (uint64_t);
4524 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4525 (void *)a
, &v
->dtdv_type
);
4529 svar
->dtsv_data
= regs
[rd
];
4534 * There are no DTrace built-in thread-local arrays at
4535 * present. This opcode is saved for future work.
4537 *flags
|= CPU_DTRACE_ILLOP
;
4542 id
= DIF_INSTR_VAR(instr
);
4544 if (id
< DIF_VAR_OTHER_UBASE
) {
4546 * For now, this has no meaning.
4552 id
-= DIF_VAR_OTHER_UBASE
;
4554 ASSERT(id
< vstate
->dtvs_nlocals
);
4555 ASSERT(vstate
->dtvs_locals
!= NULL
);
4557 svar
= vstate
->dtvs_locals
[id
];
4558 ASSERT(svar
!= NULL
);
4559 v
= &svar
->dtsv_var
;
4561 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4562 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4563 size_t sz
= v
->dtdv_type
.dtdt_size
;
4565 sz
+= sizeof (uint64_t);
4566 ASSERT(svar
->dtsv_size
== NCPU
* sz
);
4567 a
+= CPU
->cpu_id
* sz
;
4569 if (*(uint8_t *)a
== UINT8_MAX
) {
4571 * If the 0th byte is set to UINT8_MAX
4572 * then this is to be treated as a
4573 * reference to a NULL variable.
4577 regs
[rd
] = a
+ sizeof (uint64_t);
4583 ASSERT(svar
->dtsv_size
== NCPU
* sizeof (uint64_t));
4584 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
4585 regs
[rd
] = tmp
[CPU
->cpu_id
];
4589 id
= DIF_INSTR_VAR(instr
);
4591 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4592 id
-= DIF_VAR_OTHER_UBASE
;
4593 ASSERT(id
< vstate
->dtvs_nlocals
);
4595 ASSERT(vstate
->dtvs_locals
!= NULL
);
4596 svar
= vstate
->dtvs_locals
[id
];
4597 ASSERT(svar
!= NULL
);
4598 v
= &svar
->dtsv_var
;
4600 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4601 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
4602 size_t sz
= v
->dtdv_type
.dtdt_size
;
4604 sz
+= sizeof (uint64_t);
4605 ASSERT(svar
->dtsv_size
== NCPU
* sz
);
4606 a
+= CPU
->cpu_id
* sz
;
4608 if (regs
[rd
] == NULL
) {
4609 *(uint8_t *)a
= UINT8_MAX
;
4613 a
+= sizeof (uint64_t);
4616 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4617 (void *)a
, &v
->dtdv_type
);
4621 ASSERT(svar
->dtsv_size
== NCPU
* sizeof (uint64_t));
4622 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
4623 tmp
[CPU
->cpu_id
] = regs
[rd
];
4627 dtrace_dynvar_t
*dvar
;
4630 id
= DIF_INSTR_VAR(instr
);
4631 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4632 id
-= DIF_VAR_OTHER_UBASE
;
4633 v
= &vstate
->dtvs_tlocals
[id
];
4635 key
= &tupregs
[DIF_DTR_NREGS
];
4636 key
[0].dttk_value
= (uint64_t)id
;
4637 key
[0].dttk_size
= 0;
4638 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
4639 key
[1].dttk_size
= 0;
4641 dvar
= dtrace_dynvar(dstate
, 2, key
,
4642 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
);
4649 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4650 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
4652 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
4659 dtrace_dynvar_t
*dvar
;
4662 id
= DIF_INSTR_VAR(instr
);
4663 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4664 id
-= DIF_VAR_OTHER_UBASE
;
4666 key
= &tupregs
[DIF_DTR_NREGS
];
4667 key
[0].dttk_value
= (uint64_t)id
;
4668 key
[0].dttk_size
= 0;
4669 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
4670 key
[1].dttk_size
= 0;
4671 v
= &vstate
->dtvs_tlocals
[id
];
4673 dvar
= dtrace_dynvar(dstate
, 2, key
,
4674 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4675 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4676 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
4677 DTRACE_DYNVAR_DEALLOC
);
4680 * Given that we're storing to thread-local data,
4681 * we need to flush our predicate cache.
4683 #if !defined(__APPLE__)
4684 curthread
->t_predcache
= NULL
;
4686 dtrace_set_thread_predcache(current_thread(), 0);
4687 #endif /* __APPLE__ */
4693 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4694 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4695 dvar
->dtdv_data
, &v
->dtdv_type
);
4697 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
4704 regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
];
4708 dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
,
4709 regs
, tupregs
, ttop
, mstate
, state
);
4713 if (ttop
== DIF_DTR_NREGS
) {
4714 *flags
|= CPU_DTRACE_TUPOFLOW
;
4718 if (r1
== DIF_TYPE_STRING
) {
4720 * If this is a string type and the size is 0,
4721 * we'll use the system-wide default string
4722 * size. Note that we are _not_ looking at
4723 * the value of the DTRACEOPT_STRSIZE option;
4724 * had this been set, we would expect to have
4725 * a non-zero size value in the "pushtr".
4727 tupregs
[ttop
].dttk_size
=
4728 dtrace_strlen((char *)(uintptr_t)regs
[rd
],
4729 regs
[r2
] ? regs
[r2
] :
4730 dtrace_strsize_default
) + 1;
4732 tupregs
[ttop
].dttk_size
= regs
[r2
];
4735 tupregs
[ttop
++].dttk_value
= regs
[rd
];
4739 if (ttop
== DIF_DTR_NREGS
) {
4740 *flags
|= CPU_DTRACE_TUPOFLOW
;
4744 tupregs
[ttop
].dttk_value
= regs
[rd
];
4745 tupregs
[ttop
++].dttk_size
= 0;
4753 case DIF_OP_FLUSHTS
:
4758 case DIF_OP_LDTAA
: {
4759 dtrace_dynvar_t
*dvar
;
4760 dtrace_key_t
*key
= tupregs
;
4761 uint_t nkeys
= ttop
;
4763 id
= DIF_INSTR_VAR(instr
);
4764 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4765 id
-= DIF_VAR_OTHER_UBASE
;
4767 key
[nkeys
].dttk_value
= (uint64_t)id
;
4768 key
[nkeys
++].dttk_size
= 0;
4770 if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) {
4771 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
4772 key
[nkeys
++].dttk_size
= 0;
4773 v
= &vstate
->dtvs_tlocals
[id
];
4775 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
4778 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
4779 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4780 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4781 DTRACE_DYNVAR_NOALLOC
);
4788 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4789 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
4791 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
4798 case DIF_OP_STTAA
: {
4799 dtrace_dynvar_t
*dvar
;
4800 dtrace_key_t
*key
= tupregs
;
4801 uint_t nkeys
= ttop
;
4803 id
= DIF_INSTR_VAR(instr
);
4804 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
4805 id
-= DIF_VAR_OTHER_UBASE
;
4807 key
[nkeys
].dttk_value
= (uint64_t)id
;
4808 key
[nkeys
++].dttk_size
= 0;
4810 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) {
4811 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
4812 key
[nkeys
++].dttk_size
= 0;
4813 v
= &vstate
->dtvs_tlocals
[id
];
4815 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
4818 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
4819 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
4820 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
4821 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
4822 DTRACE_DYNVAR_DEALLOC
);
4827 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
4828 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
4829 dvar
->dtdv_data
, &v
->dtdv_type
);
4831 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
4837 case DIF_OP_ALLOCS
: {
4838 uintptr_t ptr
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
4839 size_t size
= ptr
- mstate
->dtms_scratch_ptr
+ regs
[r1
];
4841 if (mstate
->dtms_scratch_ptr
+ size
>
4842 mstate
->dtms_scratch_base
+
4843 mstate
->dtms_scratch_size
) {
4844 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4847 dtrace_bzero((void *)
4848 mstate
->dtms_scratch_ptr
, size
);
4849 mstate
->dtms_scratch_ptr
+= size
;
4856 if (!dtrace_canstore(regs
[rd
], regs
[r2
],
4858 *flags
|= CPU_DTRACE_BADADDR
;
4863 dtrace_bcopy((void *)(uintptr_t)regs
[r1
],
4864 (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]);
4868 if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) {
4869 *flags
|= CPU_DTRACE_BADADDR
;
4873 *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
];
4877 if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) {
4878 *flags
|= CPU_DTRACE_BADADDR
;
4883 *flags
|= CPU_DTRACE_BADALIGN
;
4887 *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
];
4891 if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) {
4892 *flags
|= CPU_DTRACE_BADADDR
;
4897 *flags
|= CPU_DTRACE_BADALIGN
;
4901 *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
];
4905 if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) {
4906 *flags
|= CPU_DTRACE_BADADDR
;
4910 #if !defined(__APPLE__)
4913 if (regs
[rd
] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */
4914 #endif /* __APPLE__ */
4915 *flags
|= CPU_DTRACE_BADALIGN
;
4919 *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
];
4924 if (!(*flags
& CPU_DTRACE_FAULT
))
4927 mstate
->dtms_fltoffs
= opc
* sizeof (dif_instr_t
);
4928 mstate
->dtms_present
|= DTRACE_MSTATE_FLTOFFS
;
4934 dtrace_action_breakpoint(dtrace_ecb_t
*ecb
)
4936 dtrace_probe_t
*probe
= ecb
->dte_probe
;
4937 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
4938 char c
[DTRACE_FULLNAMELEN
+ 80], *str
;
4939 char *msg
= "dtrace: breakpoint action at probe ";
4940 char *ecbmsg
= " (ecb ";
4941 uintptr_t mask
= (0xf << (sizeof (uintptr_t) * NBBY
/ 4));
4942 uintptr_t val
= (uintptr_t)ecb
;
4943 int shift
= (sizeof (uintptr_t) * NBBY
) - 4, i
= 0;
4945 if (dtrace_destructive_disallow
)
4949 * It's impossible to be taking action on the NULL probe.
4951 ASSERT(probe
!= NULL
);
4954 * This is a poor man's (destitute man's?) sprintf(): we want to
4955 * print the provider name, module name, function name and name of
4956 * the probe, along with the hex address of the ECB with the breakpoint
4957 * action -- all of which we must place in the character buffer by
4960 while (*msg
!= '\0')
4963 for (str
= prov
->dtpv_name
; *str
!= '\0'; str
++)
4967 for (str
= probe
->dtpr_mod
; *str
!= '\0'; str
++)
4971 for (str
= probe
->dtpr_func
; *str
!= '\0'; str
++)
4975 for (str
= probe
->dtpr_name
; *str
!= '\0'; str
++)
4978 while (*ecbmsg
!= '\0')
4981 while (shift
>= 0) {
4982 mask
= (uintptr_t)0xf << shift
;
4984 if (val
>= ((uintptr_t)1 << shift
))
4985 c
[i
++] = "0123456789abcdef"[(val
& mask
) >> shift
];
4996 dtrace_action_panic(dtrace_ecb_t
*ecb
)
4998 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5001 * It's impossible to be taking action on the NULL probe.
5003 ASSERT(probe
!= NULL
);
5005 if (dtrace_destructive_disallow
)
5008 if (dtrace_panicked
!= NULL
)
5011 #if !defined(__APPLE__)
5012 if (dtrace_casptr(&dtrace_panicked
, NULL
, curthread
) != NULL
)
5015 if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
)
5017 #endif /* __APPLE__ */
5020 * We won the right to panic. (We want to be sure that only one
5021 * thread calls panic() from dtrace_probe(), and that panic() is
5022 * called exactly once.)
5024 dtrace_panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5025 probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
,
5026 probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
);
5028 #if defined(__APPLE__)
5029 /* Mac OS X debug feature -- can return from panic() */
5030 dtrace_panicked
= NULL
;
5031 #endif /* __APPLE__ */
5035 dtrace_action_raise(uint64_t sig
)
5037 if (dtrace_destructive_disallow
)
5041 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5045 #if !defined(__APPLE__)
5047 * raise() has a queue depth of 1 -- we ignore all subsequent
5048 * invocations of the raise() action.
5050 if (curthread
->t_dtrace_sig
== 0)
5051 curthread
->t_dtrace_sig
= (uint8_t)sig
;
5053 curthread
->t_sig_check
= 1;
5056 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
5058 if (uthread
&& uthread
->t_dtrace_sig
== 0) {
5059 uthread
->t_dtrace_sig
= sig
;
5060 psignal(current_proc(), (int)sig
);
5062 #endif /* __APPLE__ */
5066 dtrace_action_stop(void)
5068 if (dtrace_destructive_disallow
)
5071 #if !defined(__APPLE__)
5072 if (!curthread
->t_dtrace_stop
) {
5073 curthread
->t_dtrace_stop
= 1;
5074 curthread
->t_sig_check
= 1;
5078 psignal(current_proc(), SIGSTOP
);
5079 #endif /* __APPLE__ */
5083 dtrace_action_chill(dtrace_mstate_t
*mstate
, hrtime_t val
)
5086 volatile uint16_t *flags
;
5089 if (dtrace_destructive_disallow
)
5092 flags
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
;
5094 now
= dtrace_gethrtime();
5096 if (now
- cpu
->cpu_dtrace_chillmark
> dtrace_chill_interval
) {
5098 * We need to advance the mark to the current time.
5100 cpu
->cpu_dtrace_chillmark
= now
;
5101 cpu
->cpu_dtrace_chilled
= 0;
5105 * Now check to see if the requested chill time would take us over
5106 * the maximum amount of time allowed in the chill interval. (Or
5107 * worse, if the calculation itself induces overflow.)
5109 if (cpu
->cpu_dtrace_chilled
+ val
> dtrace_chill_max
||
5110 cpu
->cpu_dtrace_chilled
+ val
< cpu
->cpu_dtrace_chilled
) {
5111 *flags
|= CPU_DTRACE_ILLOP
;
5115 while (dtrace_gethrtime() - now
< val
)
5119 * Normally, we assure that the value of the variable "timestamp" does
5120 * not change within an ECB. The presence of chill() represents an
5121 * exception to this rule, however.
5123 mstate
->dtms_present
&= ~DTRACE_MSTATE_TIMESTAMP
;
5124 cpu
->cpu_dtrace_chilled
+= val
;
5128 dtrace_action_ustack(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
,
5129 uint64_t *buf
, uint64_t arg
)
5131 int nframes
= DTRACE_USTACK_NFRAMES(arg
);
5132 int strsize
= DTRACE_USTACK_STRSIZE(arg
);
5133 uint64_t *pcs
= &buf
[1], *fps
;
5134 char *str
= (char *)&pcs
[nframes
];
5135 int size
, offs
= 0, i
, j
;
5136 uintptr_t old
= mstate
->dtms_scratch_ptr
, saved
;
5137 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
5141 * Should be taking a faster path if string space has not been
5144 ASSERT(strsize
!= 0);
5147 * We will first allocate some temporary space for the frame pointers.
5149 fps
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5150 size
= (uintptr_t)fps
- mstate
->dtms_scratch_ptr
+
5151 (nframes
* sizeof (uint64_t));
5153 if (mstate
->dtms_scratch_ptr
+ size
>
5154 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
5156 * Not enough room for our frame pointers -- need to indicate
5157 * that we ran out of scratch space.
5159 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5163 mstate
->dtms_scratch_ptr
+= size
;
5164 saved
= mstate
->dtms_scratch_ptr
;
5167 * Now get a stack with both program counters and frame pointers.
5169 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5170 dtrace_getufpstack(buf
, fps
, nframes
+ 1);
5171 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5174 * If that faulted, we're cooked.
5176 if (*flags
& CPU_DTRACE_FAULT
)
5180 * Now we want to walk up the stack, calling the USTACK helper. For
5181 * each iteration, we restore the scratch pointer.
5183 for (i
= 0; i
< nframes
; i
++) {
5184 mstate
->dtms_scratch_ptr
= saved
;
5186 if (offs
>= strsize
)
5189 sym
= (char *)(uintptr_t)dtrace_helper(
5190 DTRACE_HELPER_ACTION_USTACK
,
5191 mstate
, state
, pcs
[i
], fps
[i
]);
5194 * If we faulted while running the helper, we're going to
5195 * clear the fault and null out the corresponding string.
5197 if (*flags
& CPU_DTRACE_FAULT
) {
5198 *flags
&= ~CPU_DTRACE_FAULT
;
5208 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5211 * Now copy in the string that the helper returned to us.
5213 for (j
= 0; offs
+ j
< strsize
; j
++) {
5214 if ((str
[offs
+ j
] = sym
[j
]) == '\0')
5218 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5223 if (offs
>= strsize
) {
5225 * If we didn't have room for all of the strings, we don't
5226 * abort processing -- this needn't be a fatal error -- but we
5227 * still want to increment a counter (dts_stkstroverflows) to
5228 * allow this condition to be warned about. (If this is from
5229 * a jstack() action, it is easily tuned via jstackstrsize.)
5231 dtrace_error(&state
->dts_stkstroverflows
);
5234 while (offs
< strsize
)
5238 mstate
->dtms_scratch_ptr
= old
;
5242 * If you're looking for the epicenter of DTrace, you just found it. This
5243 * is the function called by the provider to fire a probe -- from which all
5244 * subsequent probe-context DTrace activity emanates.
5246 #if !defined(__APPLE__)
5248 dtrace_probe(dtrace_id_t id
, uintptr_t arg0
, uintptr_t arg1
,
5249 uintptr_t arg2
, uintptr_t arg3
, uintptr_t arg4
)
5252 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5253 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5254 #endif /* __APPLE__ */
5256 processorid_t cpuid
;
5257 dtrace_icookie_t cookie
;
5258 dtrace_probe_t
*probe
;
5259 dtrace_mstate_t mstate
;
5261 dtrace_action_t
*act
;
5265 volatile uint16_t *flags
;
5268 #if !defined(__APPLE__)
5270 * Kick out immediately if this CPU is still being born (in which case
5271 * curthread will be set to -1)
5273 if ((uintptr_t)curthread
& 1)
5276 #endif /* __APPLE__ */
5278 cookie
= dtrace_interrupt_disable();
5279 probe
= dtrace_probes
[id
- 1];
5280 cpuid
= CPU
->cpu_id
;
5281 onintr
= CPU_ON_INTR(CPU
);
5283 #if !defined(__APPLE__)
5284 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5285 probe
->dtpr_predcache
== curthread
->t_predcache
) {
5287 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5288 probe
->dtpr_predcache
== dtrace_get_thread_predcache(current_thread())) {
5289 #endif /* __APPLE__ */
5291 * We have hit in the predicate cache; we know that
5292 * this predicate would evaluate to be false.
5294 dtrace_interrupt_enable(cookie
);
5298 if (panic_quiesce
) {
5300 * We don't trace anything if we're panicking.
5302 dtrace_interrupt_enable(cookie
);
5306 #if !defined(__APPLE__)
5307 now
= dtrace_gethrtime();
5308 vtime
= dtrace_vtime_references
!= 0;
5310 if (vtime
&& curthread
->t_dtrace_start
)
5311 curthread
->t_dtrace_vtime
+= now
- curthread
->t_dtrace_start
;
5313 vtime
= dtrace_vtime_references
!= 0;
5317 int64_t dtrace_accum_time
, recent_vtime
;
5318 thread_t thread
= current_thread();
5320 dtrace_accum_time
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */
5322 if (dtrace_accum_time
>= 0) {
5323 recent_vtime
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */
5325 recent_vtime
= recent_vtime
- dtrace_accum_time
; /* Time without DTrace contribution */
5327 dtrace_set_thread_vtime(thread
, recent_vtime
);
5331 now
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
5332 #endif /* __APPLE__ */
5334 #if defined(__APPLE__)
5336 * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances.
5337 * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes
5338 * NULL through "arg0" and the probe_id of the ovedrriden probe as arg1. Detect that here
5339 * and cons up a viable state (from the probe_id).
5341 if (dtrace_probeid_error
== id
&& NULL
== arg0
) {
5342 dtrace_id_t ftp_id
= (dtrace_id_t
)arg1
;
5343 dtrace_probe_t
*ftp_probe
= dtrace_probes
[ftp_id
- 1];
5344 dtrace_ecb_t
*ftp_ecb
= ftp_probe
->dtpr_ecb
;
5346 if (NULL
!= ftp_ecb
) {
5347 dtrace_state_t
*ftp_state
= ftp_ecb
->dte_state
;
5349 arg0
= (uint64_t)(uintptr_t)ftp_state
;
5350 arg1
= ftp_ecb
->dte_epid
;
5352 * args[2-4] established by caller.
5354 ftp_state
->dts_arg_error_illval
= -1; /* arg5 */
5357 #endif /* __APPLE__ */
5359 mstate
.dtms_probe
= probe
;
5360 mstate
.dtms_arg
[0] = arg0
;
5361 mstate
.dtms_arg
[1] = arg1
;
5362 mstate
.dtms_arg
[2] = arg2
;
5363 mstate
.dtms_arg
[3] = arg3
;
5364 mstate
.dtms_arg
[4] = arg4
;
5366 flags
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
;
5368 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
5369 dtrace_predicate_t
*pred
= ecb
->dte_predicate
;
5370 dtrace_state_t
*state
= ecb
->dte_state
;
5371 dtrace_buffer_t
*buf
= &state
->dts_buffer
[cpuid
];
5372 dtrace_buffer_t
*aggbuf
= &state
->dts_aggbuffer
[cpuid
];
5373 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
5374 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5379 * A little subtlety with the following (seemingly innocuous)
5380 * declaration of the automatic 'val': by looking at the
5381 * code, you might think that it could be declared in the
5382 * action processing loop, below. (That is, it's only used in
5383 * the action processing loop.) However, it must be declared
5384 * out of that scope because in the case of DIF expression
5385 * arguments to aggregating actions, one iteration of the
5386 * action loop will use the last iteration's value.
5394 mstate
.dtms_present
= DTRACE_MSTATE_ARGS
| DTRACE_MSTATE_PROBE
;
5395 *flags
&= ~CPU_DTRACE_ERROR
;
5397 if (prov
== dtrace_provider
) {
5399 * If dtrace itself is the provider of this probe,
5400 * we're only going to continue processing the ECB if
5401 * arg0 (the dtrace_state_t) is equal to the ECB's
5402 * creating state. (This prevents disjoint consumers
5403 * from seeing one another's metaprobes.)
5405 if (arg0
!= (uint64_t)(uintptr_t)state
)
5409 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
) {
5411 * We're not currently active. If our provider isn't
5412 * the dtrace pseudo provider, we're not interested.
5414 if (prov
!= dtrace_provider
)
5418 * Now we must further check if we are in the BEGIN
5419 * probe. If we are, we will only continue processing
5420 * if we're still in WARMUP -- if one BEGIN enabling
5421 * has invoked the exit() action, we don't want to
5422 * evaluate subsequent BEGIN enablings.
5424 if (probe
->dtpr_id
== dtrace_probeid_begin
&&
5425 state
->dts_activity
!= DTRACE_ACTIVITY_WARMUP
) {
5426 ASSERT(state
->dts_activity
==
5427 DTRACE_ACTIVITY_DRAINING
);
5432 if (ecb
->dte_cond
) {
5434 * If the dte_cond bits indicate that this
5435 * consumer is only allowed to see user-mode firings
5436 * of this probe, call the provider's dtps_usermode()
5437 * entry point to check that the probe was fired
5438 * while in a user context. Skip this ECB if that's
5441 if ((ecb
->dte_cond
& DTRACE_COND_USERMODE
) &&
5442 prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
,
5443 probe
->dtpr_id
, probe
->dtpr_arg
) == 0)
5447 * This is more subtle than it looks. We have to be
5448 * absolutely certain that CRED() isn't going to
5449 * change out from under us so it's only legit to
5450 * examine that structure if we're in constrained
5451 * situations. Currently, the only times we'll this
5452 * check is if a non-super-user has enabled the
5453 * profile or syscall providers -- providers that
5454 * allow visibility of all processes. For the
5455 * profile case, the check above will ensure that
5456 * we're examining a user context.
5458 if (ecb
->dte_cond
& DTRACE_COND_OWNER
) {
5461 ecb
->dte_state
->dts_cred
.dcr_cred
;
5464 ASSERT(s_cr
!= NULL
);
5466 #if !defined(__APPLE__)
5467 if ((cr
= CRED()) == NULL
||
5469 if ((cr
= dtrace_CRED()) == NULL
||
5470 #endif /* __APPLE__ */
5471 s_cr
->cr_uid
!= cr
->cr_uid
||
5472 s_cr
->cr_uid
!= cr
->cr_ruid
||
5473 s_cr
->cr_uid
!= cr
->cr_suid
||
5474 s_cr
->cr_gid
!= cr
->cr_gid
||
5475 s_cr
->cr_gid
!= cr
->cr_rgid
||
5476 s_cr
->cr_gid
!= cr
->cr_sgid
||
5477 #if !defined(__APPLE__)
5478 (proc
= ttoproc(curthread
)) == NULL
||
5479 (proc
->p_flag
& SNOCD
))
5481 1) /* Darwin omits "No Core Dump" flag. */
5482 #endif /* __APPLE__ */
5486 if (ecb
->dte_cond
& DTRACE_COND_ZONEOWNER
) {
5489 ecb
->dte_state
->dts_cred
.dcr_cred
;
5491 ASSERT(s_cr
!= NULL
);
5493 #if !defined(__APPLE__) /* Darwin doesn't do zones. */
5494 if ((cr
= CRED()) == NULL
||
5495 s_cr
->cr_zone
->zone_id
!=
5496 cr
->cr_zone
->zone_id
)
5498 #endif /* __APPLE__ */
5502 if (now
- state
->dts_alive
> dtrace_deadman_timeout
) {
5504 * We seem to be dead. Unless we (a) have kernel
5505 * destructive permissions (b) have expicitly enabled
5506 * destructive actions and (c) destructive actions have
5507 * not been disabled, we're going to transition into
5508 * the KILLED state, from which no further processing
5509 * on this state will be performed.
5511 if (!dtrace_priv_kernel_destructive(state
) ||
5512 !state
->dts_cred
.dcr_destructive
||
5513 dtrace_destructive_disallow
) {
5514 void *activity
= &state
->dts_activity
;
5515 dtrace_activity_t current
;
5518 current
= state
->dts_activity
;
5519 } while (dtrace_cas32(activity
, current
,
5520 DTRACE_ACTIVITY_KILLED
) != current
);
5526 if ((offs
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
,
5527 ecb
->dte_alignment
, state
, &mstate
)) < 0)
5530 tomax
= buf
->dtb_tomax
;
5531 ASSERT(tomax
!= NULL
);
5533 if (ecb
->dte_size
!= 0)
5534 DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
);
5536 mstate
.dtms_epid
= ecb
->dte_epid
;
5537 mstate
.dtms_present
|= DTRACE_MSTATE_EPID
;
5540 dtrace_difo_t
*dp
= pred
->dtp_difo
;
5543 rval
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5545 if (!(*flags
& CPU_DTRACE_ERROR
) && !rval
) {
5546 dtrace_cacheid_t cid
= probe
->dtpr_predcache
;
5548 if (cid
!= DTRACE_CACHEIDNONE
&& !onintr
) {
5550 * Update the predicate cache...
5552 ASSERT(cid
== pred
->dtp_cacheid
);
5553 #if !defined(__APPLE__)
5554 curthread
->t_predcache
= cid
;
5556 dtrace_set_thread_predcache(current_thread(), cid
);
5557 #endif /* __APPLE__ */
5564 for (act
= ecb
->dte_action
; !(*flags
& CPU_DTRACE_ERROR
) &&
5565 act
!= NULL
; act
= act
->dta_next
) {
5568 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
5570 size
= rec
->dtrd_size
;
5571 valoffs
= offs
+ rec
->dtrd_offset
;
5573 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
5575 dtrace_aggregation_t
*agg
;
5577 agg
= (dtrace_aggregation_t
*)act
;
5579 if ((dp
= act
->dta_difo
) != NULL
)
5580 v
= dtrace_dif_emulate(dp
,
5581 &mstate
, vstate
, state
);
5583 if (*flags
& CPU_DTRACE_ERROR
)
5587 * Note that we always pass the expression
5588 * value from the previous iteration of the
5589 * action loop. This value will only be used
5590 * if there is an expression argument to the
5591 * aggregating action, denoted by the
5592 * dtag_hasarg field.
5594 dtrace_aggregate(agg
, buf
,
5595 offs
, aggbuf
, v
, val
);
5599 switch (act
->dta_kind
) {
5600 case DTRACEACT_STOP
:
5601 if (dtrace_priv_proc_destructive(state
))
5602 dtrace_action_stop();
5605 case DTRACEACT_BREAKPOINT
:
5606 if (dtrace_priv_kernel_destructive(state
))
5607 dtrace_action_breakpoint(ecb
);
5610 case DTRACEACT_PANIC
:
5611 if (dtrace_priv_kernel_destructive(state
))
5612 dtrace_action_panic(ecb
);
5615 case DTRACEACT_STACK
:
5616 if (!dtrace_priv_kernel(state
))
5619 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
5620 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
5621 DTRACE_ANCHORED(probe
) ? NULL
:
5626 case DTRACEACT_JSTACK
:
5627 case DTRACEACT_USTACK
:
5628 if (!dtrace_priv_proc(state
))
5632 * See comment in DIF_VAR_PID.
5634 if (DTRACE_ANCHORED(mstate
.dtms_probe
) &&
5636 int depth
= DTRACE_USTACK_NFRAMES(
5639 dtrace_bzero((void *)(tomax
+ valoffs
),
5640 DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
)
5641 + depth
* sizeof (uint64_t));
5646 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 &&
5647 curproc
->p_dtrace_helpers
!= NULL
) {
5649 * This is the slow path -- we have
5650 * allocated string space, and we're
5651 * getting the stack of a process that
5652 * has helpers. Call into a separate
5653 * routine to perform this processing.
5655 dtrace_action_ustack(&mstate
, state
,
5656 (uint64_t *)(tomax
+ valoffs
),
5661 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5662 dtrace_getupcstack((uint64_t *)
5664 DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1);
5665 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5675 val
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
5677 if (*flags
& CPU_DTRACE_ERROR
)
5680 switch (act
->dta_kind
) {
5681 case DTRACEACT_SPECULATE
:
5682 ASSERT(buf
== &state
->dts_buffer
[cpuid
]);
5683 buf
= dtrace_speculation_buffer(state
,
5687 *flags
|= CPU_DTRACE_DROP
;
5691 offs
= dtrace_buffer_reserve(buf
,
5692 ecb
->dte_needed
, ecb
->dte_alignment
,
5696 *flags
|= CPU_DTRACE_DROP
;
5700 tomax
= buf
->dtb_tomax
;
5701 ASSERT(tomax
!= NULL
);
5703 if (ecb
->dte_size
!= 0)
5704 DTRACE_STORE(uint32_t, tomax
, offs
,
5708 case DTRACEACT_CHILL
:
5709 if (dtrace_priv_kernel_destructive(state
))
5710 dtrace_action_chill(&mstate
, val
);
5713 case DTRACEACT_RAISE
:
5714 if (dtrace_priv_proc_destructive(state
))
5715 dtrace_action_raise(val
);
5718 case DTRACEACT_COMMIT
:
5722 * We need to commit our buffer state.
5725 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
5726 buf
= &state
->dts_buffer
[cpuid
];
5727 dtrace_speculation_commit(state
, cpuid
, val
);
5731 case DTRACEACT_DISCARD
:
5732 dtrace_speculation_discard(state
, cpuid
, val
);
5735 case DTRACEACT_DIFEXPR
:
5736 case DTRACEACT_LIBACT
:
5737 case DTRACEACT_PRINTF
:
5738 case DTRACEACT_PRINTA
:
5739 case DTRACEACT_SYSTEM
:
5740 case DTRACEACT_FREOPEN
:
5745 if (!dtrace_priv_kernel(state
))
5749 #if !defined(__APPLE__)
5750 case DTRACEACT_USYM
:
5751 case DTRACEACT_UMOD
:
5752 case DTRACEACT_UADDR
: {
5753 struct pid
*pid
= curthread
->t_procp
->p_pidp
;
5755 if (!dtrace_priv_proc(state
))
5758 DTRACE_STORE(uint64_t, tomax
,
5759 valoffs
, (uint64_t)pid
->pid_id
);
5760 DTRACE_STORE(uint64_t, tomax
,
5761 valoffs
+ sizeof (uint64_t), val
);
5766 case DTRACEACT_USYM
:
5767 case DTRACEACT_UMOD
:
5768 case DTRACEACT_UADDR
: {
5769 if (!dtrace_priv_proc(state
))
5772 DTRACE_STORE(uint64_t, tomax
,
5773 valoffs
, (uint64_t)proc_selfpid());
5774 DTRACE_STORE(uint64_t, tomax
,
5775 valoffs
+ sizeof (uint64_t), val
);
5779 #endif /* __APPLE__ */
5781 case DTRACEACT_EXIT
: {
5783 * For the exit action, we are going to attempt
5784 * to atomically set our activity to be
5785 * draining. If this fails (either because
5786 * another CPU has beat us to the exit action,
5787 * or because our current activity is something
5788 * other than ACTIVE or WARMUP), we will
5789 * continue. This assures that the exit action
5790 * can be successfully recorded at most once
5791 * when we're in the ACTIVE state. If we're
5792 * encountering the exit() action while in
5793 * COOLDOWN, however, we want to honor the new
5794 * status code. (We know that we're the only
5795 * thread in COOLDOWN, so there is no race.)
5797 void *activity
= &state
->dts_activity
;
5798 dtrace_activity_t current
= state
->dts_activity
;
5800 if (current
== DTRACE_ACTIVITY_COOLDOWN
)
5803 if (current
!= DTRACE_ACTIVITY_WARMUP
)
5804 current
= DTRACE_ACTIVITY_ACTIVE
;
5806 if (dtrace_cas32(activity
, current
,
5807 DTRACE_ACTIVITY_DRAINING
) != current
) {
5808 *flags
|= CPU_DTRACE_DROP
;
5819 if (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
) {
5820 uintptr_t end
= valoffs
+ size
;
5823 * If this is a string, we're going to only
5824 * load until we find the zero byte -- after
5825 * which we'll store zero bytes.
5827 if (dp
->dtdo_rtype
.dtdt_kind
==
5830 int intuple
= act
->dta_intuple
;
5833 for (s
= 0; s
< size
; s
++) {
5835 c
= dtrace_load8(val
++);
5837 DTRACE_STORE(uint8_t, tomax
,
5840 if (c
== '\0' && intuple
)
5847 while (valoffs
< end
) {
5848 DTRACE_STORE(uint8_t, tomax
, valoffs
++,
5849 dtrace_load8(val
++));
5859 case sizeof (uint8_t):
5860 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
);
5862 case sizeof (uint16_t):
5863 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
);
5865 case sizeof (uint32_t):
5866 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
);
5868 case sizeof (uint64_t):
5869 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
);
5873 * Any other size should have been returned by
5874 * reference, not by value.
5881 if (*flags
& CPU_DTRACE_DROP
)
5884 if (*flags
& CPU_DTRACE_FAULT
) {
5886 dtrace_action_t
*err
;
5890 if (probe
->dtpr_id
== dtrace_probeid_error
) {
5892 * There's nothing we can do -- we had an
5893 * error on the error probe. We bump an
5894 * error counter to at least indicate that
5895 * this condition happened.
5897 dtrace_error(&state
->dts_dblerrors
);
5903 * Before recursing on dtrace_probe(), we
5904 * need to explicitly clear out our start
5905 * time to prevent it from being accumulated
5906 * into t_dtrace_vtime.
5908 #if !defined(__APPLE__)
5909 curthread
->t_dtrace_start
= 0;
5911 /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */
5912 dtrace_set_thread_tracing(current_thread(),
5913 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
5914 #endif /* __APPLE__ */
5918 * Iterate over the actions to figure out which action
5919 * we were processing when we experienced the error.
5920 * Note that act points _past_ the faulting action; if
5921 * act is ecb->dte_action, the fault was in the
5922 * predicate, if it's ecb->dte_action->dta_next it's
5923 * in action #1, and so on.
5925 for (err
= ecb
->dte_action
, ndx
= 0;
5926 err
!= act
; err
= err
->dta_next
, ndx
++)
5929 dtrace_probe_error(state
, ecb
->dte_epid
, ndx
,
5930 (mstate
.dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
5931 mstate
.dtms_fltoffs
: -1, DTRACE_FLAGS2FLT(*flags
),
5932 cpu_core
[cpuid
].cpuc_dtrace_illval
);
5938 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
5941 #if !defined(__APPLE__)
5943 curthread
->t_dtrace_start
= dtrace_gethrtime();
5946 thread_t thread
= current_thread();
5947 int64_t t
= dtrace_get_thread_tracing(thread
);
5950 /* Usual case, accumulate time spent here into t_dtrace_tracing */
5951 dtrace_set_thread_tracing(thread
, t
+ (dtrace_gethrtime() - now
));
5953 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
5954 dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);
5957 #endif /* __APPLE__ */
5959 dtrace_interrupt_enable(cookie
);
5962 #if defined(__APPLE__)
5963 /* Don't allow a thread to re-enter dtrace_probe() */
5965 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5966 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5968 thread_t thread
= current_thread();
5970 if (id
== dtrace_probeid_error
) {
5971 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
5972 dtrace_getfp(); /* Defeat tail-call optimization of __dtrace_probe() */
5973 } else if (!dtrace_get_thread_reentering(thread
)) {
5974 dtrace_set_thread_reentering(thread
, TRUE
);
5975 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
5976 dtrace_set_thread_reentering(thread
, FALSE
);
5979 #endif /* __APPLE__ */
5982 * DTrace Probe Hashing Functions
5984 * The functions in this section (and indeed, the functions in remaining
5985 * sections) are not _called_ from probe context. (Any exceptions to this are
5986 * marked with a "Note:".) Rather, they are called from elsewhere in the
5987 * DTrace framework to look-up probes in, add probes to and remove probes from
5988 * the DTrace probe hashes. (Each probe is hashed by each element of the
5989 * probe tuple -- allowing for fast lookups, regardless of what was
5993 dtrace_hash_str(char *p
)
5999 hval
= (hval
<< 4) + *p
++;
6000 if ((g
= (hval
& 0xf0000000)) != 0)
6007 static dtrace_hash_t
*
6008 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
)
6010 dtrace_hash_t
*hash
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
);
6012 hash
->dth_stroffs
= stroffs
;
6013 hash
->dth_nextoffs
= nextoffs
;
6014 hash
->dth_prevoffs
= prevoffs
;
6017 hash
->dth_mask
= hash
->dth_size
- 1;
6019 hash
->dth_tab
= kmem_zalloc(hash
->dth_size
*
6020 sizeof (dtrace_hashbucket_t
*), KM_SLEEP
);
6025 #if !defined(__APPLE__) /* Quiet compiler warning */
6027 dtrace_hash_destroy(dtrace_hash_t
*hash
)
6032 for (i
= 0; i
< hash
->dth_size
; i
++)
6033 ASSERT(hash
->dth_tab
[i
] == NULL
);
6036 kmem_free(hash
->dth_tab
,
6037 hash
->dth_size
* sizeof (dtrace_hashbucket_t
*));
6038 kmem_free(hash
, sizeof (dtrace_hash_t
));
6040 #endif /* __APPLE__ */
6043 dtrace_hash_resize(dtrace_hash_t
*hash
)
6045 int size
= hash
->dth_size
, i
, ndx
;
6046 int new_size
= hash
->dth_size
<< 1;
6047 int new_mask
= new_size
- 1;
6048 dtrace_hashbucket_t
**new_tab
, *bucket
, *next
;
6050 ASSERT((new_size
& new_mask
) == 0);
6052 new_tab
= kmem_zalloc(new_size
* sizeof (void *), KM_SLEEP
);
6054 for (i
= 0; i
< size
; i
++) {
6055 for (bucket
= hash
->dth_tab
[i
]; bucket
!= NULL
; bucket
= next
) {
6056 dtrace_probe_t
*probe
= bucket
->dthb_chain
;
6058 ASSERT(probe
!= NULL
);
6059 ndx
= DTRACE_HASHSTR(hash
, probe
) & new_mask
;
6061 next
= bucket
->dthb_next
;
6062 bucket
->dthb_next
= new_tab
[ndx
];
6063 new_tab
[ndx
] = bucket
;
6067 kmem_free(hash
->dth_tab
, hash
->dth_size
* sizeof (void *));
6068 hash
->dth_tab
= new_tab
;
6069 hash
->dth_size
= new_size
;
6070 hash
->dth_mask
= new_mask
;
6074 dtrace_hash_add(dtrace_hash_t
*hash
, dtrace_probe_t
*new)
6076 int hashval
= DTRACE_HASHSTR(hash
, new);
6077 int ndx
= hashval
& hash
->dth_mask
;
6078 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6079 dtrace_probe_t
**nextp
, **prevp
;
6081 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6082 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new))
6086 if ((hash
->dth_nbuckets
>> 1) > hash
->dth_size
) {
6087 dtrace_hash_resize(hash
);
6088 dtrace_hash_add(hash
, new);
6092 bucket
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
);
6093 bucket
->dthb_next
= hash
->dth_tab
[ndx
];
6094 hash
->dth_tab
[ndx
] = bucket
;
6095 hash
->dth_nbuckets
++;
6098 nextp
= DTRACE_HASHNEXT(hash
, new);
6099 ASSERT(*nextp
== NULL
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
);
6100 *nextp
= bucket
->dthb_chain
;
6102 if (bucket
->dthb_chain
!= NULL
) {
6103 prevp
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
);
6104 ASSERT(*prevp
== NULL
);
6108 bucket
->dthb_chain
= new;
6112 static dtrace_probe_t
*
6113 dtrace_hash_lookup(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6115 int hashval
= DTRACE_HASHSTR(hash
, template);
6116 int ndx
= hashval
& hash
->dth_mask
;
6117 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6119 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6120 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6121 return (bucket
->dthb_chain
);
6128 dtrace_hash_collisions(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6130 int hashval
= DTRACE_HASHSTR(hash
, template);
6131 int ndx
= hashval
& hash
->dth_mask
;
6132 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6134 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6135 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6136 return (bucket
->dthb_len
);
6143 dtrace_hash_remove(dtrace_hash_t
*hash
, dtrace_probe_t
*probe
)
6145 int ndx
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
;
6146 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6148 dtrace_probe_t
**prevp
= DTRACE_HASHPREV(hash
, probe
);
6149 dtrace_probe_t
**nextp
= DTRACE_HASHNEXT(hash
, probe
);
6152 * Find the bucket that we're removing this probe from.
6154 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6155 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
))
6159 ASSERT(bucket
!= NULL
);
6161 if (*prevp
== NULL
) {
6162 if (*nextp
== NULL
) {
6164 * The removed probe was the only probe on this
6165 * bucket; we need to remove the bucket.
6167 dtrace_hashbucket_t
*b
= hash
->dth_tab
[ndx
];
6169 ASSERT(bucket
->dthb_chain
== probe
);
6173 hash
->dth_tab
[ndx
] = bucket
->dthb_next
;
6175 while (b
->dthb_next
!= bucket
)
6177 b
->dthb_next
= bucket
->dthb_next
;
6180 ASSERT(hash
->dth_nbuckets
> 0);
6181 hash
->dth_nbuckets
--;
6182 kmem_free(bucket
, sizeof (dtrace_hashbucket_t
));
6186 bucket
->dthb_chain
= *nextp
;
6188 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
;
6192 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
;
6196 * DTrace Utility Functions
6198 * These are random utility functions that are _not_ called from probe context.
6201 dtrace_badattr(const dtrace_attribute_t
*a
)
6203 return (a
->dtat_name
> DTRACE_STABILITY_MAX
||
6204 a
->dtat_data
> DTRACE_STABILITY_MAX
||
6205 a
->dtat_class
> DTRACE_CLASS_MAX
);
6209 * Return a duplicate copy of a string. If the specified string is NULL,
6210 * this function returns a zero-length string.
6213 dtrace_strdup(const char *str
)
6215 char *new = kmem_zalloc((str
!= NULL
? strlen(str
) : 0) + 1, KM_SLEEP
);
6218 (void) strcpy(new, str
);
6223 #define DTRACE_ISALPHA(c) \
6224 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6227 dtrace_badname(const char *s
)
6231 if (s
== NULL
|| (c
= *s
++) == '\0')
6234 if (!DTRACE_ISALPHA(c
) && c
!= '-' && c
!= '_' && c
!= '.')
6237 while ((c
= *s
++) != '\0') {
6238 if (!DTRACE_ISALPHA(c
) && (c
< '0' || c
> '9') &&
6239 c
!= '-' && c
!= '_' && c
!= '.' && c
!= '`')
6247 dtrace_cred2priv(cred_t
*cr
, uint32_t *privp
, uid_t
*uidp
, zoneid_t
*zoneidp
)
6251 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
6253 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6255 priv
= DTRACE_PRIV_ALL
;
6257 *uidp
= crgetuid(cr
);
6258 *zoneidp
= crgetzoneid(cr
);
6261 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
))
6262 priv
|= DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
;
6263 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
))
6264 priv
|= DTRACE_PRIV_USER
;
6265 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
))
6266 priv
|= DTRACE_PRIV_PROC
;
6267 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
6268 priv
|= DTRACE_PRIV_OWNER
;
6269 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
6270 priv
|= DTRACE_PRIV_ZONEOWNER
;
6276 #ifdef DTRACE_ERRDEBUG
6278 dtrace_errdebug(const char *str
)
6280 int hval
= dtrace_hash_str((char *)str
) % DTRACE_ERRHASHSZ
;
6283 lck_mtx_lock(&dtrace_errlock
);
6284 dtrace_errlast
= str
;
6285 #if !defined(__APPLE__)
6286 dtrace_errthread
= curthread
;
6288 dtrace_errthread
= current_thread();
6289 #endif /* __APPLE__ */
6291 while (occupied
++ < DTRACE_ERRHASHSZ
) {
6292 if (dtrace_errhash
[hval
].dter_msg
== str
) {
6293 dtrace_errhash
[hval
].dter_count
++;
6297 if (dtrace_errhash
[hval
].dter_msg
!= NULL
) {
6298 hval
= (hval
+ 1) % DTRACE_ERRHASHSZ
;
6302 dtrace_errhash
[hval
].dter_msg
= str
;
6303 dtrace_errhash
[hval
].dter_count
= 1;
6307 panic("dtrace: undersized error hash");
6309 lck_mtx_unlock(&dtrace_errlock
);
6314 * DTrace Matching Functions
6316 * These functions are used to match groups of probes, given some elements of
6317 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6320 dtrace_match_priv(const dtrace_probe_t
*prp
, uint32_t priv
, uid_t uid
,
6323 if (priv
!= DTRACE_PRIV_ALL
) {
6324 uint32_t ppriv
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
;
6325 uint32_t match
= priv
& ppriv
;
6328 * No PRIV_DTRACE_* privileges...
6330 if ((priv
& (DTRACE_PRIV_PROC
| DTRACE_PRIV_USER
|
6331 DTRACE_PRIV_KERNEL
)) == 0)
6335 * No matching bits, but there were bits to match...
6337 if (match
== 0 && ppriv
!= 0)
6341 * Need to have permissions to the process, but don't...
6343 if (((ppriv
& ~match
) & DTRACE_PRIV_OWNER
) != 0 &&
6344 uid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) {
6349 * Need to be in the same zone unless we possess the
6350 * privilege to examine all zones.
6352 if (((ppriv
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 &&
6353 zoneid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) {
6362 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6363 * consists of input pattern strings and an ops-vector to evaluate them.
6364 * This function returns >0 for match, 0 for no match, and <0 for error.
6367 dtrace_match_probe(const dtrace_probe_t
*prp
, const dtrace_probekey_t
*pkp
,
6368 uint32_t priv
, uid_t uid
, zoneid_t zoneid
)
6370 dtrace_provider_t
*pvp
= prp
->dtpr_provider
;
6373 if (pvp
->dtpv_defunct
)
6376 if ((rv
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0)
6379 if ((rv
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0)
6382 if ((rv
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0)
6385 if ((rv
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0)
6388 if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0)
6395 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
6396 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
6397 * libc's version, the kernel version only applies to 8-bit ASCII strings.
6398 * In addition, all of the recursion cases except for '*' matching have been
6399 * unwound. For '*', we still implement recursive evaluation, but a depth
6400 * counter is maintained and matching is aborted if we recurse too deep.
6401 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
6404 dtrace_match_glob(const char *s
, const char *p
, int depth
)
6410 if (depth
> DTRACE_PROBEKEY_MAXDEPTH
)
6414 s
= ""; /* treat NULL as empty string */
6423 if ((c
= *p
++) == '\0')
6424 return (s1
== '\0');
6428 int ok
= 0, notflag
= 0;
6439 if ((c
= *p
++) == '\0')
6443 if (c
== '-' && lc
!= '\0' && *p
!= ']') {
6444 if ((c
= *p
++) == '\0')
6446 if (c
== '\\' && (c
= *p
++) == '\0')
6450 if (s1
< lc
|| s1
> c
)
6454 } else if (lc
<= s1
&& s1
<= c
)
6457 } else if (c
== '\\' && (c
= *p
++) == '\0')
6460 lc
= c
; /* save left-hand 'c' for next iteration */
6470 if ((c
= *p
++) == '\0')
6482 if ((c
= *p
++) == '\0')
6498 p
++; /* consecutive *'s are identical to a single one */
6503 for (s
= olds
; *s
!= '\0'; s
++) {
6504 if ((gs
= dtrace_match_glob(s
, p
, depth
+ 1)) != 0)
6514 dtrace_match_string(const char *s
, const char *p
, int depth
)
6516 return (s
!= NULL
&& strcmp(s
, p
) == 0);
6521 dtrace_match_nul(const char *s
, const char *p
, int depth
)
6523 return (1); /* always match the empty pattern */
6528 dtrace_match_nonzero(const char *s
, const char *p
, int depth
)
6530 return (s
!= NULL
&& s
[0] != '\0');
6534 dtrace_match(const dtrace_probekey_t
*pkp
, uint32_t priv
, uid_t uid
,
6535 zoneid_t zoneid
, int (*matched
)(dtrace_probe_t
*, void *), void *arg
)
6537 dtrace_probe_t
template, *probe
;
6538 dtrace_hash_t
*hash
= NULL
;
6539 int len
, best
= INT_MAX
, nmatched
= 0;
6542 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6545 * If the probe ID is specified in the key, just lookup by ID and
6546 * invoke the match callback once if a matching probe is found.
6548 if (pkp
->dtpk_id
!= DTRACE_IDNONE
) {
6549 if ((probe
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL
&&
6550 dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) {
6551 (void) (*matched
)(probe
, arg
);
6557 template.dtpr_mod
= (char *)pkp
->dtpk_mod
;
6558 template.dtpr_func
= (char *)pkp
->dtpk_func
;
6559 template.dtpr_name
= (char *)pkp
->dtpk_name
;
6562 * We want to find the most distinct of the module name, function
6563 * name, and name. So for each one that is not a glob pattern or
6564 * empty string, we perform a lookup in the corresponding hash and
6565 * use the hash table with the fewest collisions to do our search.
6567 if (pkp
->dtpk_mmatch
== &dtrace_match_string
&&
6568 (len
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) {
6570 hash
= dtrace_bymod
;
6573 if (pkp
->dtpk_fmatch
== &dtrace_match_string
&&
6574 (len
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) {
6576 hash
= dtrace_byfunc
;
6579 if (pkp
->dtpk_nmatch
== &dtrace_match_string
&&
6580 (len
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) {
6582 hash
= dtrace_byname
;
6586 * If we did not select a hash table, iterate over every probe and
6587 * invoke our callback for each one that matches our input probe key.
6590 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6591 if ((probe
= dtrace_probes
[i
]) == NULL
||
6592 dtrace_match_probe(probe
, pkp
, priv
, uid
,
6598 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
)
6606 * If we selected a hash table, iterate over each probe of the same key
6607 * name and invoke the callback for every probe that matches the other
6608 * attributes of our input probe key.
6610 for (probe
= dtrace_hash_lookup(hash
, &template); probe
!= NULL
;
6611 probe
= *(DTRACE_HASHNEXT(hash
, probe
))) {
6613 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0)
6618 if ((*matched
)(probe
, arg
) != DTRACE_MATCH_NEXT
)
6626 * Return the function pointer dtrace_probecmp() should use to compare the
6627 * specified pattern with a string. For NULL or empty patterns, we select
6628 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
6629 * For non-empty non-glob strings, we use dtrace_match_string().
6631 static dtrace_probekey_f
*
6632 dtrace_probekey_func(const char *p
)
6636 if (p
== NULL
|| *p
== '\0')
6637 return (&dtrace_match_nul
);
6639 while ((c
= *p
++) != '\0') {
6640 if (c
== '[' || c
== '?' || c
== '*' || c
== '\\')
6641 return (&dtrace_match_glob
);
6644 return (&dtrace_match_string
);
6648 * Build a probe comparison key for use with dtrace_match_probe() from the
6649 * given probe description. By convention, a null key only matches anchored
6650 * probes: if each field is the empty string, reset dtpk_fmatch to
6651 * dtrace_match_nonzero().
6654 dtrace_probekey(const dtrace_probedesc_t
*pdp
, dtrace_probekey_t
*pkp
)
6656 pkp
->dtpk_prov
= pdp
->dtpd_provider
;
6657 pkp
->dtpk_pmatch
= dtrace_probekey_func(pdp
->dtpd_provider
);
6659 pkp
->dtpk_mod
= pdp
->dtpd_mod
;
6660 pkp
->dtpk_mmatch
= dtrace_probekey_func(pdp
->dtpd_mod
);
6662 pkp
->dtpk_func
= pdp
->dtpd_func
;
6663 pkp
->dtpk_fmatch
= dtrace_probekey_func(pdp
->dtpd_func
);
6665 pkp
->dtpk_name
= pdp
->dtpd_name
;
6666 pkp
->dtpk_nmatch
= dtrace_probekey_func(pdp
->dtpd_name
);
6668 pkp
->dtpk_id
= pdp
->dtpd_id
;
6670 if (pkp
->dtpk_id
== DTRACE_IDNONE
&&
6671 pkp
->dtpk_pmatch
== &dtrace_match_nul
&&
6672 pkp
->dtpk_mmatch
== &dtrace_match_nul
&&
6673 pkp
->dtpk_fmatch
== &dtrace_match_nul
&&
6674 pkp
->dtpk_nmatch
== &dtrace_match_nul
)
6675 pkp
->dtpk_fmatch
= &dtrace_match_nonzero
;
6679 * DTrace Provider-to-Framework API Functions
6681 * These functions implement much of the Provider-to-Framework API, as
6682 * described in <sys/dtrace.h>. The parts of the API not in this section are
6683 * the functions in the API for probe management (found below), and
6684 * dtrace_probe() itself (found above).
6688 * Register the calling provider with the DTrace framework. This should
6689 * generally be called by DTrace providers in their attach(9E) entry point.
6692 dtrace_register(const char *name
, const dtrace_pattr_t
*pap
, uint32_t priv
,
6693 cred_t
*cr
, const dtrace_pops_t
*pops
, void *arg
, dtrace_provider_id_t
*idp
)
6695 dtrace_provider_t
*provider
;
6697 if (name
== NULL
|| pap
== NULL
|| pops
== NULL
|| idp
== NULL
) {
6698 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6699 "arguments", name
? name
: "<NULL>");
6703 if (name
[0] == '\0' || dtrace_badname(name
)) {
6704 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6705 "provider name", name
);
6709 if ((pops
->dtps_provide
== NULL
&& pops
->dtps_provide_module
== NULL
) ||
6710 pops
->dtps_enable
== NULL
|| pops
->dtps_disable
== NULL
||
6711 pops
->dtps_destroy
== NULL
||
6712 ((pops
->dtps_resume
== NULL
) != (pops
->dtps_suspend
== NULL
))) {
6713 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6714 "provider ops", name
);
6718 if (dtrace_badattr(&pap
->dtpa_provider
) ||
6719 dtrace_badattr(&pap
->dtpa_mod
) ||
6720 dtrace_badattr(&pap
->dtpa_func
) ||
6721 dtrace_badattr(&pap
->dtpa_name
) ||
6722 dtrace_badattr(&pap
->dtpa_args
)) {
6723 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6724 "provider attributes", name
);
6728 if (priv
& ~DTRACE_PRIV_ALL
) {
6729 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
6730 "privilege attributes", name
);
6734 if ((priv
& DTRACE_PRIV_KERNEL
) &&
6735 (priv
& (DTRACE_PRIV_USER
| DTRACE_PRIV_OWNER
)) &&
6736 pops
->dtps_usermode
== NULL
) {
6737 cmn_err(CE_WARN
, "failed to register provider '%s': need "
6738 "dtps_usermode() op for given privilege attributes", name
);
6742 provider
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
);
6743 provider
->dtpv_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
6744 (void) strcpy(provider
->dtpv_name
, name
);
6746 provider
->dtpv_attr
= *pap
;
6747 provider
->dtpv_priv
.dtpp_flags
= priv
;
6749 provider
->dtpv_priv
.dtpp_uid
= crgetuid(cr
);
6750 provider
->dtpv_priv
.dtpp_zoneid
= crgetzoneid(cr
);
6752 provider
->dtpv_pops
= *pops
;
6754 if (pops
->dtps_provide
== NULL
) {
6755 ASSERT(pops
->dtps_provide_module
!= NULL
);
6756 provider
->dtpv_pops
.dtps_provide
=
6757 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
;
6760 if (pops
->dtps_provide_module
== NULL
) {
6761 ASSERT(pops
->dtps_provide
!= NULL
);
6762 provider
->dtpv_pops
.dtps_provide_module
=
6763 (void (*)(void *, struct modctl
*))dtrace_nullop
;
6766 if (pops
->dtps_suspend
== NULL
) {
6767 ASSERT(pops
->dtps_resume
== NULL
);
6768 provider
->dtpv_pops
.dtps_suspend
=
6769 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
6770 provider
->dtpv_pops
.dtps_resume
=
6771 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
6774 provider
->dtpv_arg
= arg
;
6775 *idp
= (dtrace_provider_id_t
)provider
;
6777 if (pops
== &dtrace_provider_ops
) {
6778 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
6779 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6780 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
6783 * We make sure that the DTrace provider is at the head of
6784 * the provider chain.
6786 provider
->dtpv_next
= dtrace_provider
;
6787 dtrace_provider
= provider
;
6791 lck_mtx_lock(&dtrace_provider_lock
);
6792 lck_mtx_lock(&dtrace_lock
);
6795 * If there is at least one provider registered, we'll add this
6796 * provider after the first provider.
6798 if (dtrace_provider
!= NULL
) {
6799 provider
->dtpv_next
= dtrace_provider
->dtpv_next
;
6800 dtrace_provider
->dtpv_next
= provider
;
6802 dtrace_provider
= provider
;
6805 if (dtrace_retained
!= NULL
) {
6806 dtrace_enabling_provide(provider
);
6809 * Now we need to call dtrace_enabling_matchall() -- which
6810 * will acquire cpu_lock and dtrace_lock. We therefore need
6811 * to drop all of our locks before calling into it...
6813 lck_mtx_unlock(&dtrace_lock
);
6814 lck_mtx_unlock(&dtrace_provider_lock
);
6815 dtrace_enabling_matchall();
6820 lck_mtx_unlock(&dtrace_lock
);
6821 lck_mtx_unlock(&dtrace_provider_lock
);
6827 * Unregister the specified provider from the DTrace framework. This should
6828 * generally be called by DTrace providers in their detach(9E) entry point.
6831 dtrace_unregister(dtrace_provider_id_t id
)
6833 dtrace_provider_t
*old
= (dtrace_provider_t
*)id
;
6834 dtrace_provider_t
*prev
= NULL
;
6836 dtrace_probe_t
*probe
, *first
= NULL
;
6838 if (old
->dtpv_pops
.dtps_enable
==
6839 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
) {
6841 * If DTrace itself is the provider, we're called with locks
6844 ASSERT(old
== dtrace_provider
);
6845 ASSERT(dtrace_devi
!= NULL
);
6846 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
6847 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
6851 if (dtrace_provider
->dtpv_next
!= NULL
) {
6853 * There's another provider here; return failure.
6858 lck_mtx_lock(&dtrace_provider_lock
);
6859 lck_mtx_lock(&mod_lock
);
6860 lck_mtx_lock(&dtrace_lock
);
6864 * If anyone has /dev/dtrace open, or if there are anonymous enabled
6865 * probes, we refuse to let providers slither away, unless this
6866 * provider has already been explicitly invalidated.
6868 if (!old
->dtpv_defunct
&&
6869 (dtrace_opens
|| (dtrace_anon
.dta_state
!= NULL
&&
6870 dtrace_anon
.dta_state
->dts_necbs
> 0))) {
6872 lck_mtx_unlock(&dtrace_lock
);
6873 lck_mtx_unlock(&mod_lock
);
6874 lck_mtx_unlock(&dtrace_provider_lock
);
6880 * Attempt to destroy the probes associated with this provider.
6882 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6883 if ((probe
= dtrace_probes
[i
]) == NULL
)
6886 if (probe
->dtpr_provider
!= old
)
6889 if (probe
->dtpr_ecb
== NULL
)
6893 * We have at least one ECB; we can't remove this provider.
6896 lck_mtx_unlock(&dtrace_lock
);
6897 lck_mtx_unlock(&mod_lock
);
6898 lck_mtx_unlock(&dtrace_provider_lock
);
6904 * All of the probes for this provider are disabled; we can safely
6905 * remove all of them from their hash chains and from the probe array.
6907 for (i
= 0; i
< dtrace_nprobes
; i
++) {
6908 if ((probe
= dtrace_probes
[i
]) == NULL
)
6911 if (probe
->dtpr_provider
!= old
)
6914 dtrace_probes
[i
] = NULL
;
6916 dtrace_hash_remove(dtrace_bymod
, probe
);
6917 dtrace_hash_remove(dtrace_byfunc
, probe
);
6918 dtrace_hash_remove(dtrace_byname
, probe
);
6920 if (first
== NULL
) {
6922 probe
->dtpr_nextmod
= NULL
;
6924 probe
->dtpr_nextmod
= first
;
6930 * The provider's probes have been removed from the hash chains and
6931 * from the probe array. Now issue a dtrace_sync() to be sure that
6932 * everyone has cleared out from any probe array processing.
6936 for (probe
= first
; probe
!= NULL
; probe
= first
) {
6937 first
= probe
->dtpr_nextmod
;
6939 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
,
6941 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
6942 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
6943 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
6944 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1);
6945 #if !defined(__APPLE__)
6946 kmem_free(probe
, sizeof (dtrace_probe_t
));
6948 zfree(dtrace_probe_t_zone
, probe
);
6952 if ((prev
= dtrace_provider
) == old
) {
6953 ASSERT(self
|| dtrace_devi
== NULL
);
6954 ASSERT(old
->dtpv_next
== NULL
|| dtrace_devi
== NULL
);
6955 dtrace_provider
= old
->dtpv_next
;
6957 while (prev
!= NULL
&& prev
->dtpv_next
!= old
)
6958 prev
= prev
->dtpv_next
;
6961 panic("attempt to unregister non-existent "
6962 "dtrace provider %p\n", (void *)id
);
6965 prev
->dtpv_next
= old
->dtpv_next
;
6969 lck_mtx_unlock(&dtrace_lock
);
6970 lck_mtx_unlock(&mod_lock
);
6971 lck_mtx_unlock(&dtrace_provider_lock
);
6974 kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1);
6975 kmem_free(old
, sizeof (dtrace_provider_t
));
6981 * Invalidate the specified provider. All subsequent probe lookups for the
6982 * specified provider will fail, but its probes will not be removed.
6985 dtrace_invalidate(dtrace_provider_id_t id
)
6987 dtrace_provider_t
*pvp
= (dtrace_provider_t
*)id
;
6989 ASSERT(pvp
->dtpv_pops
.dtps_enable
!=
6990 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
);
6992 lck_mtx_lock(&dtrace_provider_lock
);
6993 lck_mtx_lock(&dtrace_lock
);
6995 pvp
->dtpv_defunct
= 1;
6997 lck_mtx_unlock(&dtrace_lock
);
6998 lck_mtx_unlock(&dtrace_provider_lock
);
7002 * Indicate whether or not DTrace has attached.
7005 dtrace_attached(void)
7008 * dtrace_provider will be non-NULL iff the DTrace driver has
7009 * attached. (It's non-NULL because DTrace is always itself a
7012 return (dtrace_provider
!= NULL
);
7016 * Remove all the unenabled probes for the given provider. This function is
7017 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7018 * -- just as many of its associated probes as it can.
7021 dtrace_condense(dtrace_provider_id_t id
)
7023 dtrace_provider_t
*prov
= (dtrace_provider_t
*)id
;
7025 dtrace_probe_t
*probe
;
7028 * Make sure this isn't the dtrace provider itself.
7030 ASSERT(prov
->dtpv_pops
.dtps_enable
!=
7031 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
);
7033 lck_mtx_lock(&dtrace_provider_lock
);
7034 lck_mtx_lock(&dtrace_lock
);
7037 * Attempt to destroy the probes associated with this provider.
7039 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7040 if ((probe
= dtrace_probes
[i
]) == NULL
)
7043 if (probe
->dtpr_provider
!= prov
)
7046 if (probe
->dtpr_ecb
!= NULL
)
7049 dtrace_probes
[i
] = NULL
;
7051 dtrace_hash_remove(dtrace_bymod
, probe
);
7052 dtrace_hash_remove(dtrace_byfunc
, probe
);
7053 dtrace_hash_remove(dtrace_byname
, probe
);
7055 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i
+ 1,
7057 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7058 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7059 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7060 #if !defined(__APPLE__)
7061 kmem_free(probe
, sizeof (dtrace_probe_t
));
7063 zfree(dtrace_probe_t_zone
, probe
);
7065 vmem_free(dtrace_arena
, (void *)((uintptr_t)i
+ 1), 1);
7068 lck_mtx_unlock(&dtrace_lock
);
7069 lck_mtx_unlock(&dtrace_provider_lock
);
7075 * DTrace Probe Management Functions
7077 * The functions in this section perform the DTrace probe management,
7078 * including functions to create probes, look-up probes, and call into the
7079 * providers to request that probes be provided. Some of these functions are
7080 * in the Provider-to-Framework API; these functions can be identified by the
7081 * fact that they are not declared "static".
7085 * Create a probe with the specified module name, function name, and name.
7088 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
,
7089 const char *func
, const char *name
, int aframes
, void *arg
)
7091 dtrace_probe_t
*probe
, **probes
;
7092 dtrace_provider_t
*provider
= (dtrace_provider_t
*)prov
;
7095 if (provider
== dtrace_provider
) {
7096 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7098 lck_mtx_lock(&dtrace_lock
);
7101 id
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1,
7102 VM_BESTFIT
| VM_SLEEP
);
7103 #if !defined(__APPLE__)
7104 probe
= kmem_zalloc(sizeof (dtrace_probe_t
), KM_SLEEP
);
7106 probe
= zalloc(dtrace_probe_t_zone
);
7107 bzero(probe
, sizeof (dtrace_probe_t
));
7110 probe
->dtpr_id
= id
;
7111 probe
->dtpr_gen
= dtrace_probegen
++;
7112 probe
->dtpr_mod
= dtrace_strdup(mod
);
7113 probe
->dtpr_func
= dtrace_strdup(func
);
7114 probe
->dtpr_name
= dtrace_strdup(name
);
7115 probe
->dtpr_arg
= arg
;
7116 probe
->dtpr_aframes
= aframes
;
7117 probe
->dtpr_provider
= provider
;
7119 dtrace_hash_add(dtrace_bymod
, probe
);
7120 dtrace_hash_add(dtrace_byfunc
, probe
);
7121 dtrace_hash_add(dtrace_byname
, probe
);
7123 if (id
- 1 >= dtrace_nprobes
) {
7124 size_t osize
= dtrace_nprobes
* sizeof (dtrace_probe_t
*);
7125 size_t nsize
= osize
<< 1;
7129 ASSERT(dtrace_probes
== NULL
);
7130 nsize
= sizeof (dtrace_probe_t
*);
7133 probes
= kmem_zalloc(nsize
, KM_SLEEP
);
7135 if (dtrace_probes
== NULL
) {
7137 dtrace_probes
= probes
;
7140 dtrace_probe_t
**oprobes
= dtrace_probes
;
7142 bcopy(oprobes
, probes
, osize
);
7143 dtrace_membar_producer();
7144 dtrace_probes
= probes
;
7149 * All CPUs are now seeing the new probes array; we can
7150 * safely free the old array.
7152 kmem_free(oprobes
, osize
);
7153 dtrace_nprobes
<<= 1;
7156 ASSERT(id
- 1 < dtrace_nprobes
);
7159 ASSERT(dtrace_probes
[id
- 1] == NULL
);
7160 dtrace_probes
[id
- 1] = probe
;
7162 if (provider
!= dtrace_provider
)
7163 lck_mtx_unlock(&dtrace_lock
);
7168 static dtrace_probe_t
*
7169 dtrace_probe_lookup_id(dtrace_id_t id
)
7171 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7173 if (id
== 0 || id
> dtrace_nprobes
)
7176 return (dtrace_probes
[id
- 1]);
7180 dtrace_probe_lookup_match(dtrace_probe_t
*probe
, void *arg
)
7182 *((dtrace_id_t
*)arg
) = probe
->dtpr_id
;
7184 return (DTRACE_MATCH_DONE
);
7188 * Look up a probe based on provider and one or more of module name, function
7189 * name and probe name.
7192 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
,
7193 const char *func
, const char *name
)
7195 dtrace_probekey_t pkey
;
7199 pkey
.dtpk_prov
= ((dtrace_provider_t
*)prid
)->dtpv_name
;
7200 pkey
.dtpk_pmatch
= &dtrace_match_string
;
7201 pkey
.dtpk_mod
= mod
;
7202 pkey
.dtpk_mmatch
= mod
? &dtrace_match_string
: &dtrace_match_nul
;
7203 pkey
.dtpk_func
= func
;
7204 pkey
.dtpk_fmatch
= func
? &dtrace_match_string
: &dtrace_match_nul
;
7205 pkey
.dtpk_name
= name
;
7206 pkey
.dtpk_nmatch
= name
? &dtrace_match_string
: &dtrace_match_nul
;
7207 pkey
.dtpk_id
= DTRACE_IDNONE
;
7209 lck_mtx_lock(&dtrace_lock
);
7210 match
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0,
7211 dtrace_probe_lookup_match
, &id
);
7212 lck_mtx_unlock(&dtrace_lock
);
7214 ASSERT(match
== 1 || match
== 0);
7215 return (match
? id
: 0);
7219 * Returns the probe argument associated with the specified probe.
7222 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
)
7224 dtrace_probe_t
*probe
;
7227 lck_mtx_lock(&dtrace_lock
);
7229 if ((probe
= dtrace_probe_lookup_id(pid
)) != NULL
&&
7230 probe
->dtpr_provider
== (dtrace_provider_t
*)id
)
7231 rval
= probe
->dtpr_arg
;
7233 lck_mtx_unlock(&dtrace_lock
);
7239 * Copy a probe into a probe description.
7242 dtrace_probe_description(const dtrace_probe_t
*prp
, dtrace_probedesc_t
*pdp
)
7244 bzero(pdp
, sizeof (dtrace_probedesc_t
));
7245 pdp
->dtpd_id
= prp
->dtpr_id
;
7247 (void) strlcpy(pdp
->dtpd_provider
,
7248 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
);
7250 (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
);
7251 (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
);
7252 (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
);
7256 * Called to indicate that a probe -- or probes -- should be provided by a
7257 * specfied provider. If the specified description is NULL, the provider will
7258 * be told to provide all of its probes. (This is done whenever a new
7259 * consumer comes along, or whenever a retained enabling is to be matched.) If
7260 * the specified description is non-NULL, the provider is given the
7261 * opportunity to dynamically provide the specified probe, allowing providers
7262 * to support the creation of probes on-the-fly. (So-called _autocreated_
7263 * probes.) If the provider is NULL, the operations will be applied to all
7264 * providers; if the provider is non-NULL the operations will only be applied
7265 * to the specified provider. The dtrace_provider_lock must be held, and the
7266 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7267 * will need to grab the dtrace_lock when it reenters the framework through
7268 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7271 dtrace_probe_provide(dtrace_probedesc_t
*desc
, dtrace_provider_t
*prv
)
7276 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
7280 prv
= dtrace_provider
;
7286 * First, call the blanket provide operation.
7288 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
);
7290 #if !defined(__APPLE__)
7292 * Now call the per-module provide operation. We will grab
7293 * mod_lock to prevent the list from being modified. Note
7294 * that this also prevents the mod_busy bits from changing.
7295 * (mod_busy can only be changed with mod_lock held.)
7297 lck_mtx_lock(&mod_lock
);
7301 if (ctl
->mod_busy
|| ctl
->mod_mp
== NULL
)
7304 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
7306 } while ((ctl
= ctl
->mod_next
) != &modules
);
7308 lck_mtx_unlock(&mod_lock
);
7310 #if 0 /* XXX Workaround for PR_4643546 XXX */
7311 simple_lock(&kmod_lock
);
7315 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ktl
);
7319 simple_unlock(&kmod_lock
);
7322 * Don't bother to iterate over the kmod list. At present only fbt
7323 * offers a provide_module in its dtpv_pops, and then it ignores the
7326 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, NULL
);
7328 #endif /* __APPLE__ */
7329 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
7333 * Iterate over each probe, and call the Framework-to-Provider API function
7337 dtrace_probe_foreach(uintptr_t offs
)
7339 dtrace_provider_t
*prov
;
7340 void (*func
)(void *, dtrace_id_t
, void *);
7341 dtrace_probe_t
*probe
;
7342 dtrace_icookie_t cookie
;
7346 * We disable interrupts to walk through the probe array. This is
7347 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7348 * won't see stale data.
7350 cookie
= dtrace_interrupt_disable();
7352 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7353 if ((probe
= dtrace_probes
[i
]) == NULL
)
7356 if (probe
->dtpr_ecb
== NULL
) {
7358 * This probe isn't enabled -- don't call the function.
7363 prov
= probe
->dtpr_provider
;
7364 func
= *((void(**)(void *, dtrace_id_t
, void *))
7365 ((uintptr_t)&prov
->dtpv_pops
+ offs
));
7367 func(prov
->dtpv_arg
, i
+ 1, probe
->dtpr_arg
);
7370 dtrace_interrupt_enable(cookie
);
7374 dtrace_probe_enable(const dtrace_probedesc_t
*desc
, dtrace_enabling_t
*enab
)
7376 dtrace_probekey_t pkey
;
7381 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7383 dtrace_ecb_create_cache
= NULL
;
7387 * If we're passed a NULL description, we're being asked to
7388 * create an ECB with a NULL probe.
7390 (void) dtrace_ecb_create_enable(NULL
, enab
);
7394 dtrace_probekey(desc
, &pkey
);
7395 dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
,
7396 &priv
, &uid
, &zoneid
);
7398 return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
,
7403 * DTrace Helper Provider Functions
7406 dtrace_dofattr2attr(dtrace_attribute_t
*attr
, const dof_attr_t dofattr
)
7408 attr
->dtat_name
= DOF_ATTR_NAME(dofattr
);
7409 attr
->dtat_data
= DOF_ATTR_DATA(dofattr
);
7410 attr
->dtat_class
= DOF_ATTR_CLASS(dofattr
);
7414 dtrace_dofprov2hprov(dtrace_helper_provdesc_t
*hprov
,
7415 const dof_provider_t
*dofprov
, char *strtab
)
7417 hprov
->dthpv_provname
= strtab
+ dofprov
->dofpv_name
;
7418 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
,
7419 dofprov
->dofpv_provattr
);
7420 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
,
7421 dofprov
->dofpv_modattr
);
7422 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
,
7423 dofprov
->dofpv_funcattr
);
7424 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
,
7425 dofprov
->dofpv_nameattr
);
7426 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
,
7427 dofprov
->dofpv_argsattr
);
7431 dtrace_helper_provide_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7433 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7434 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7435 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
7436 dof_provider_t
*provider
;
7438 uint32_t *off
, *enoff
;
7442 dtrace_helper_provdesc_t dhpv
;
7443 dtrace_helper_probedesc_t dhpb
;
7444 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7445 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7448 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7449 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7450 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7451 prb_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7452 provider
->dofpv_probes
* dof
->dofh_secsize
);
7453 arg_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7454 provider
->dofpv_prargs
* dof
->dofh_secsize
);
7455 off_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7456 provider
->dofpv_proffs
* dof
->dofh_secsize
);
7458 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7459 off
= (uint32_t *)(uintptr_t)(daddr
+ off_sec
->dofs_offset
);
7460 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
7464 * See dtrace_helper_provider_validate().
7466 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
7467 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
) {
7468 enoff_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7469 provider
->dofpv_prenoffs
* dof
->dofh_secsize
);
7470 enoff
= (uint32_t *)(uintptr_t)(daddr
+ enoff_sec
->dofs_offset
);
7473 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
7476 * Create the provider.
7478 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7480 if ((parg
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
)
7486 * Create the probes.
7488 for (i
= 0; i
< nprobes
; i
++) {
7489 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
7490 prb_sec
->dofs_offset
+ i
* prb_sec
->dofs_entsize
);
7492 dhpb
.dthpb_mod
= dhp
->dofhp_mod
;
7493 dhpb
.dthpb_func
= strtab
+ probe
->dofpr_func
;
7494 dhpb
.dthpb_name
= strtab
+ probe
->dofpr_name
;
7495 #if defined(__APPLE__)
7496 dhpb
.dthpb_base
= dhp
->dofhp_addr
;
7498 dhpb
.dthpb_base
= probe
->dofpr_addr
;
7500 dhpb
.dthpb_offs
= off
+ probe
->dofpr_offidx
;
7501 dhpb
.dthpb_noffs
= probe
->dofpr_noffs
;
7502 if (enoff
!= NULL
) {
7503 dhpb
.dthpb_enoffs
= enoff
+ probe
->dofpr_enoffidx
;
7504 dhpb
.dthpb_nenoffs
= probe
->dofpr_nenoffs
;
7506 dhpb
.dthpb_enoffs
= NULL
;
7507 dhpb
.dthpb_nenoffs
= 0;
7509 dhpb
.dthpb_args
= arg
+ probe
->dofpr_argidx
;
7510 dhpb
.dthpb_nargc
= probe
->dofpr_nargc
;
7511 dhpb
.dthpb_xargc
= probe
->dofpr_xargc
;
7512 dhpb
.dthpb_ntypes
= strtab
+ probe
->dofpr_nargv
;
7513 dhpb
.dthpb_xtypes
= strtab
+ probe
->dofpr_xargv
;
7515 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
);
7520 dtrace_helper_provide(dof_helper_t
*dhp
, pid_t pid
)
7522 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7523 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7526 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
7528 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7529 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7530 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7532 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7535 dtrace_helper_provide_one(dhp
, sec
, pid
);
7539 * We may have just created probes, so we must now rematch against
7540 * any retained enablings. Note that this call will acquire both
7541 * cpu_lock and dtrace_lock; the fact that we are holding
7542 * dtrace_meta_lock now is what defines the ordering with respect to
7543 * these three locks.
7545 dtrace_enabling_matchall();
7549 dtrace_helper_provider_remove_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
7551 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7552 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7554 dof_provider_t
*provider
;
7556 dtrace_helper_provdesc_t dhpv
;
7557 dtrace_meta_t
*meta
= dtrace_meta_pid
;
7558 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
7560 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
7561 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
7562 provider
->dofpv_strtab
* dof
->dofh_secsize
);
7564 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
7567 * Create the provider.
7569 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
7571 mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
);
7577 dtrace_helper_provider_remove(dof_helper_t
*dhp
, pid_t pid
)
7579 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
7580 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
7583 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
7585 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
7586 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
7587 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
7589 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
7592 dtrace_helper_provider_remove_one(dhp
, sec
, pid
);
7597 * DTrace Meta Provider-to-Framework API Functions
7599 * These functions implement the Meta Provider-to-Framework API, as described
7600 * in <sys/dtrace.h>.
7603 dtrace_meta_register(const char *name
, const dtrace_mops_t
*mops
, void *arg
,
7604 dtrace_meta_provider_id_t
*idp
)
7606 dtrace_meta_t
*meta
;
7607 dtrace_helpers_t
*help
, *next
;
7610 *idp
= DTRACE_METAPROVNONE
;
7613 * We strictly don't need the name, but we hold onto it for
7614 * debuggability. All hail error queues!
7617 cmn_err(CE_WARN
, "failed to register meta-provider: "
7623 mops
->dtms_create_probe
== NULL
||
7624 mops
->dtms_provide_pid
== NULL
||
7625 mops
->dtms_remove_pid
== NULL
) {
7626 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7627 "invalid ops", name
);
7631 meta
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
);
7632 meta
->dtm_mops
= *mops
;
7633 meta
->dtm_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
7634 (void) strcpy(meta
->dtm_name
, name
);
7635 meta
->dtm_arg
= arg
;
7637 lck_mtx_lock(&dtrace_meta_lock
);
7638 lck_mtx_lock(&dtrace_lock
);
7640 if (dtrace_meta_pid
!= NULL
) {
7641 lck_mtx_unlock(&dtrace_lock
);
7642 lck_mtx_unlock(&dtrace_meta_lock
);
7643 cmn_err(CE_WARN
, "failed to register meta-register %s: "
7644 "user-land meta-provider exists", name
);
7645 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1);
7646 kmem_free(meta
, sizeof (dtrace_meta_t
));
7650 dtrace_meta_pid
= meta
;
7651 *idp
= (dtrace_meta_provider_id_t
)meta
;
7654 * If there are providers and probes ready to go, pass them
7655 * off to the new meta provider now.
7658 help
= dtrace_deferred_pid
;
7659 dtrace_deferred_pid
= NULL
;
7661 lck_mtx_unlock(&dtrace_lock
);
7663 while (help
!= NULL
) {
7664 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
7665 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
7669 next
= help
->dthps_next
;
7670 help
->dthps_next
= NULL
;
7671 help
->dthps_prev
= NULL
;
7672 help
->dthps_deferred
= 0;
7676 lck_mtx_unlock(&dtrace_meta_lock
);
7682 dtrace_meta_unregister(dtrace_meta_provider_id_t id
)
7684 dtrace_meta_t
**pp
, *old
= (dtrace_meta_t
*)id
;
7686 lck_mtx_lock(&dtrace_meta_lock
);
7687 lck_mtx_lock(&dtrace_lock
);
7689 if (old
== dtrace_meta_pid
) {
7690 pp
= &dtrace_meta_pid
;
7692 panic("attempt to unregister non-existent "
7693 "dtrace meta-provider %p\n", (void *)old
);
7696 if (old
->dtm_count
!= 0) {
7697 lck_mtx_unlock(&dtrace_lock
);
7698 lck_mtx_unlock(&dtrace_meta_lock
);
7704 lck_mtx_unlock(&dtrace_lock
);
7705 lck_mtx_unlock(&dtrace_meta_lock
);
7707 kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1);
7708 kmem_free(old
, sizeof (dtrace_meta_t
));
7715 * DTrace DIF Object Functions
7718 dtrace_difo_err(uint_t pc
, const char *format
, ...)
7720 if (dtrace_err_verbose
) {
7723 (void) uprintf("dtrace DIF object error: [%u]: ", pc
);
7724 va_start(alist
, format
);
7725 (void) vuprintf(format
, alist
);
7729 #ifdef DTRACE_ERRDEBUG
7730 dtrace_errdebug(format
);
7736 * Validate a DTrace DIF object by checking the IR instructions. The following
7737 * rules are currently enforced by dtrace_difo_validate():
7739 * 1. Each instruction must have a valid opcode
7740 * 2. Each register, string, variable, or subroutine reference must be valid
7741 * 3. No instruction can modify register %r0 (must be zero)
7742 * 4. All instruction reserved bits must be set to zero
7743 * 5. The last instruction must be a "ret" instruction
7744 * 6. All branch targets must reference a valid instruction _after_ the branch
7747 dtrace_difo_validate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
, uint_t nregs
,
7751 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
7755 kcheck
= cr
== NULL
||
7756 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
) == 0;
7758 dp
->dtdo_destructive
= 0;
7760 for (pc
= 0; pc
< dp
->dtdo_len
&& err
== 0; pc
++) {
7761 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
7763 uint_t r1
= DIF_INSTR_R1(instr
);
7764 uint_t r2
= DIF_INSTR_R2(instr
);
7765 uint_t rd
= DIF_INSTR_RD(instr
);
7766 uint_t rs
= DIF_INSTR_RS(instr
);
7767 uint_t label
= DIF_INSTR_LABEL(instr
);
7768 uint_t v
= DIF_INSTR_VAR(instr
);
7769 uint_t subr
= DIF_INSTR_SUBR(instr
);
7770 uint_t type
= DIF_INSTR_TYPE(instr
);
7771 uint_t op
= DIF_INSTR_OP(instr
);
7789 err
+= efunc(pc
, "invalid register %u\n", r1
);
7791 err
+= efunc(pc
, "invalid register %u\n", r2
);
7793 err
+= efunc(pc
, "invalid register %u\n", rd
);
7795 err
+= efunc(pc
, "cannot write to %r0\n");
7801 err
+= efunc(pc
, "invalid register %u\n", r1
);
7803 err
+= efunc(pc
, "non-zero reserved bits\n");
7805 err
+= efunc(pc
, "invalid register %u\n", rd
);
7807 err
+= efunc(pc
, "cannot write to %r0\n");
7817 err
+= efunc(pc
, "invalid register %u\n", r1
);
7819 err
+= efunc(pc
, "non-zero reserved bits\n");
7821 err
+= efunc(pc
, "invalid register %u\n", rd
);
7823 err
+= efunc(pc
, "cannot write to %r0\n");
7825 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op
+
7826 DIF_OP_RLDSB
- DIF_OP_LDSB
, r1
, rd
);
7836 err
+= efunc(pc
, "invalid register %u\n", r1
);
7838 err
+= efunc(pc
, "non-zero reserved bits\n");
7840 err
+= efunc(pc
, "invalid register %u\n", rd
);
7842 err
+= efunc(pc
, "cannot write to %r0\n");
7852 err
+= efunc(pc
, "invalid register %u\n", r1
);
7854 err
+= efunc(pc
, "non-zero reserved bits\n");
7856 err
+= efunc(pc
, "invalid register %u\n", rd
);
7858 err
+= efunc(pc
, "cannot write to %r0\n");
7865 err
+= efunc(pc
, "invalid register %u\n", r1
);
7867 err
+= efunc(pc
, "non-zero reserved bits\n");
7869 err
+= efunc(pc
, "invalid register %u\n", rd
);
7871 err
+= efunc(pc
, "cannot write to 0 address\n");
7876 err
+= efunc(pc
, "invalid register %u\n", r1
);
7878 err
+= efunc(pc
, "invalid register %u\n", r2
);
7880 err
+= efunc(pc
, "non-zero reserved bits\n");
7884 err
+= efunc(pc
, "invalid register %u\n", r1
);
7885 if (r2
!= 0 || rd
!= 0)
7886 err
+= efunc(pc
, "non-zero reserved bits\n");
7899 if (label
>= dp
->dtdo_len
) {
7900 err
+= efunc(pc
, "invalid branch target %u\n",
7904 err
+= efunc(pc
, "backward branch to %u\n",
7909 if (r1
!= 0 || r2
!= 0)
7910 err
+= efunc(pc
, "non-zero reserved bits\n");
7912 err
+= efunc(pc
, "invalid register %u\n", rd
);
7916 case DIF_OP_FLUSHTS
:
7917 if (r1
!= 0 || r2
!= 0 || rd
!= 0)
7918 err
+= efunc(pc
, "non-zero reserved bits\n");
7921 if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) {
7922 err
+= efunc(pc
, "invalid integer ref %u\n",
7923 DIF_INSTR_INTEGER(instr
));
7926 err
+= efunc(pc
, "invalid register %u\n", rd
);
7928 err
+= efunc(pc
, "cannot write to %r0\n");
7931 if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) {
7932 err
+= efunc(pc
, "invalid string ref %u\n",
7933 DIF_INSTR_STRING(instr
));
7936 err
+= efunc(pc
, "invalid register %u\n", rd
);
7938 err
+= efunc(pc
, "cannot write to %r0\n");
7942 if (r1
> DIF_VAR_ARRAY_MAX
)
7943 err
+= efunc(pc
, "invalid array %u\n", r1
);
7945 err
+= efunc(pc
, "invalid register %u\n", r2
);
7947 err
+= efunc(pc
, "invalid register %u\n", rd
);
7949 err
+= efunc(pc
, "cannot write to %r0\n");
7956 if (v
< DIF_VAR_OTHER_MIN
|| v
> DIF_VAR_OTHER_MAX
)
7957 err
+= efunc(pc
, "invalid variable %u\n", v
);
7959 err
+= efunc(pc
, "invalid register %u\n", rd
);
7961 err
+= efunc(pc
, "cannot write to %r0\n");
7968 if (v
< DIF_VAR_OTHER_UBASE
|| v
> DIF_VAR_OTHER_MAX
)
7969 err
+= efunc(pc
, "invalid variable %u\n", v
);
7971 err
+= efunc(pc
, "invalid register %u\n", rd
);
7974 if (subr
> DIF_SUBR_MAX
)
7975 err
+= efunc(pc
, "invalid subr %u\n", subr
);
7977 err
+= efunc(pc
, "invalid register %u\n", rd
);
7979 err
+= efunc(pc
, "cannot write to %r0\n");
7981 if (subr
== DIF_SUBR_COPYOUT
||
7982 subr
== DIF_SUBR_COPYOUTSTR
) {
7983 dp
->dtdo_destructive
= 1;
7987 if (type
!= DIF_TYPE_STRING
&& type
!= DIF_TYPE_CTF
)
7988 err
+= efunc(pc
, "invalid ref type %u\n", type
);
7990 err
+= efunc(pc
, "invalid register %u\n", r2
);
7992 err
+= efunc(pc
, "invalid register %u\n", rs
);
7995 if (type
!= DIF_TYPE_CTF
)
7996 err
+= efunc(pc
, "invalid val type %u\n", type
);
7998 err
+= efunc(pc
, "invalid register %u\n", r2
);
8000 err
+= efunc(pc
, "invalid register %u\n", rs
);
8003 err
+= efunc(pc
, "invalid opcode %u\n",
8004 DIF_INSTR_OP(instr
));
8008 if (dp
->dtdo_len
!= 0 &&
8009 DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len
- 1]) != DIF_OP_RET
) {
8010 err
+= efunc(dp
->dtdo_len
- 1,
8011 "expected 'ret' as last DIF instruction\n");
8014 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
)) {
8016 * If we're not returning by reference, the size must be either
8017 * 0 or the size of one of the base types.
8019 switch (dp
->dtdo_rtype
.dtdt_size
) {
8021 case sizeof (uint8_t):
8022 case sizeof (uint16_t):
8023 case sizeof (uint32_t):
8024 case sizeof (uint64_t):
8028 err
+= efunc(dp
->dtdo_len
- 1, "bad return size");
8032 for (i
= 0; i
< dp
->dtdo_varlen
&& err
== 0; i
++) {
8033 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
], *existing
= NULL
;
8034 dtrace_diftype_t
*vt
, *et
;
8037 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
&&
8038 v
->dtdv_scope
!= DIFV_SCOPE_THREAD
&&
8039 v
->dtdv_scope
!= DIFV_SCOPE_LOCAL
) {
8040 err
+= efunc(i
, "unrecognized variable scope %d\n",
8045 if (v
->dtdv_kind
!= DIFV_KIND_ARRAY
&&
8046 v
->dtdv_kind
!= DIFV_KIND_SCALAR
) {
8047 err
+= efunc(i
, "unrecognized variable type %d\n",
8052 if ((id
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) {
8053 err
+= efunc(i
, "%d exceeds variable id limit\n", id
);
8057 if (id
< DIF_VAR_OTHER_UBASE
)
8061 * For user-defined variables, we need to check that this
8062 * definition is identical to any previous definition that we
8065 ndx
= id
- DIF_VAR_OTHER_UBASE
;
8067 switch (v
->dtdv_scope
) {
8068 case DIFV_SCOPE_GLOBAL
:
8069 if (ndx
< vstate
->dtvs_nglobals
) {
8070 dtrace_statvar_t
*svar
;
8072 if ((svar
= vstate
->dtvs_globals
[ndx
]) != NULL
)
8073 existing
= &svar
->dtsv_var
;
8078 case DIFV_SCOPE_THREAD
:
8079 if (ndx
< vstate
->dtvs_ntlocals
)
8080 existing
= &vstate
->dtvs_tlocals
[ndx
];
8083 case DIFV_SCOPE_LOCAL
:
8084 if (ndx
< vstate
->dtvs_nlocals
) {
8085 dtrace_statvar_t
*svar
;
8087 if ((svar
= vstate
->dtvs_locals
[ndx
]) != NULL
)
8088 existing
= &svar
->dtsv_var
;
8096 if (vt
->dtdt_flags
& DIF_TF_BYREF
) {
8097 if (vt
->dtdt_size
== 0) {
8098 err
+= efunc(i
, "zero-sized variable\n");
8102 if (v
->dtdv_scope
== DIFV_SCOPE_GLOBAL
&&
8103 vt
->dtdt_size
> dtrace_global_maxsize
) {
8104 err
+= efunc(i
, "oversized by-ref global\n");
8109 if (existing
== NULL
|| existing
->dtdv_id
== 0)
8112 ASSERT(existing
->dtdv_id
== v
->dtdv_id
);
8113 ASSERT(existing
->dtdv_scope
== v
->dtdv_scope
);
8115 if (existing
->dtdv_kind
!= v
->dtdv_kind
)
8116 err
+= efunc(i
, "%d changed variable kind\n", id
);
8118 et
= &existing
->dtdv_type
;
8120 if (vt
->dtdt_flags
!= et
->dtdt_flags
) {
8121 err
+= efunc(i
, "%d changed variable type flags\n", id
);
8125 if (vt
->dtdt_size
!= 0 && vt
->dtdt_size
!= et
->dtdt_size
) {
8126 err
+= efunc(i
, "%d changed variable type size\n", id
);
8135 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8136 * are much more constrained than normal DIFOs. Specifically, they may
8139 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8140 * miscellaneous string routines
8141 * 2. Access DTrace variables other than the args[] array, and the
8142 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8143 * 3. Have thread-local variables.
8144 * 4. Have dynamic variables.
8147 dtrace_difo_validate_helper(dtrace_difo_t
*dp
)
8149 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8153 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8154 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8156 uint_t v
= DIF_INSTR_VAR(instr
);
8157 uint_t subr
= DIF_INSTR_SUBR(instr
);
8158 uint_t op
= DIF_INSTR_OP(instr
);
8213 case DIF_OP_FLUSHTS
:
8225 if (v
>= DIF_VAR_OTHER_UBASE
)
8228 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
)
8231 if (v
== DIF_VAR_CURTHREAD
|| v
== DIF_VAR_PID
||
8232 v
== DIF_VAR_PPID
|| v
== DIF_VAR_TID
||
8233 v
== DIF_VAR_EXECNAME
|| v
== DIF_VAR_ZONENAME
||
8234 v
== DIF_VAR_UID
|| v
== DIF_VAR_GID
)
8237 err
+= efunc(pc
, "illegal variable %u\n", v
);
8244 err
+= efunc(pc
, "illegal dynamic variable load\n");
8250 err
+= efunc(pc
, "illegal dynamic variable store\n");
8254 if (subr
== DIF_SUBR_ALLOCA
||
8255 subr
== DIF_SUBR_BCOPY
||
8256 subr
== DIF_SUBR_COPYIN
||
8257 subr
== DIF_SUBR_COPYINTO
||
8258 subr
== DIF_SUBR_COPYINSTR
||
8259 subr
== DIF_SUBR_INDEX
||
8260 subr
== DIF_SUBR_LLTOSTR
||
8261 subr
== DIF_SUBR_RINDEX
||
8262 subr
== DIF_SUBR_STRCHR
||
8263 subr
== DIF_SUBR_STRJOIN
||
8264 subr
== DIF_SUBR_STRRCHR
||
8265 subr
== DIF_SUBR_STRSTR
||
8266 subr
== DIF_SUBR_CHUD
)
8269 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8273 err
+= efunc(pc
, "invalid opcode %u\n",
8274 DIF_INSTR_OP(instr
));
8282 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8286 dtrace_difo_cacheable(dtrace_difo_t
*dp
)
8293 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8294 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8296 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
)
8299 switch (v
->dtdv_id
) {
8300 case DIF_VAR_CURTHREAD
:
8303 case DIF_VAR_EXECNAME
:
8304 case DIF_VAR_ZONENAME
:
8313 * This DIF object may be cacheable. Now we need to look for any
8314 * array loading instructions, any memory loading instructions, or
8315 * any stores to thread-local variables.
8317 for (i
= 0; i
< dp
->dtdo_len
; i
++) {
8318 uint_t op
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]);
8320 if ((op
>= DIF_OP_LDSB
&& op
<= DIF_OP_LDX
) ||
8321 (op
>= DIF_OP_ULDSB
&& op
<= DIF_OP_ULDX
) ||
8322 (op
>= DIF_OP_RLDSB
&& op
<= DIF_OP_RLDX
) ||
8323 op
== DIF_OP_LDGA
|| op
== DIF_OP_STTS
)
8331 dtrace_difo_hold(dtrace_difo_t
*dp
)
8335 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8338 ASSERT(dp
->dtdo_refcnt
!= 0);
8341 * We need to check this DIF object for references to the variable
8342 * DIF_VAR_VTIMESTAMP.
8344 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8345 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8347 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8350 if (dtrace_vtime_references
++ == 0)
8351 dtrace_vtime_enable();
8356 * This routine calculates the dynamic variable chunksize for a given DIF
8357 * object. The calculation is not fool-proof, and can probably be tricked by
8358 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8359 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8360 * if a dynamic variable size exceeds the chunksize.
8363 dtrace_difo_chunksize(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8366 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
8367 const dif_instr_t
*text
= dp
->dtdo_buf
;
8373 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8374 dif_instr_t instr
= text
[pc
];
8375 uint_t op
= DIF_INSTR_OP(instr
);
8376 uint_t rd
= DIF_INSTR_RD(instr
);
8377 uint_t r1
= DIF_INSTR_R1(instr
);
8381 dtrace_key_t
*key
= tupregs
;
8385 sval
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)];
8390 key
= &tupregs
[DIF_DTR_NREGS
];
8391 key
[0].dttk_size
= 0;
8392 key
[1].dttk_size
= 0;
8394 scope
= DIFV_SCOPE_THREAD
;
8401 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
)
8402 key
[nkeys
++].dttk_size
= 0;
8404 key
[nkeys
++].dttk_size
= 0;
8406 if (op
== DIF_OP_STTAA
) {
8407 scope
= DIFV_SCOPE_THREAD
;
8409 scope
= DIFV_SCOPE_GLOBAL
;
8415 if (ttop
== DIF_DTR_NREGS
)
8418 if ((srd
== 0 || sval
== 0) && r1
== DIF_TYPE_STRING
) {
8420 * If the register for the size of the "pushtr"
8421 * is %r0 (or the value is 0) and the type is
8422 * a string, we'll use the system-wide default
8425 tupregs
[ttop
++].dttk_size
=
8426 dtrace_strsize_default
;
8431 tupregs
[ttop
++].dttk_size
= sval
;
8437 if (ttop
== DIF_DTR_NREGS
)
8440 tupregs
[ttop
++].dttk_size
= 0;
8443 case DIF_OP_FLUSHTS
:
8460 * We have a dynamic variable allocation; calculate its size.
8462 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
8463 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
8465 size
= sizeof (dtrace_dynvar_t
);
8466 size
+= sizeof (dtrace_key_t
) * (nkeys
- 1);
8470 * Now we need to determine the size of the stored data.
8472 id
= DIF_INSTR_VAR(instr
);
8474 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8475 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8477 if (v
->dtdv_id
== id
&& v
->dtdv_scope
== scope
) {
8478 size
+= v
->dtdv_type
.dtdt_size
;
8483 if (i
== dp
->dtdo_varlen
)
8487 * We have the size. If this is larger than the chunk size
8488 * for our dynamic variable state, reset the chunk size.
8490 size
= P2ROUNDUP(size
, sizeof (uint64_t));
8492 if (size
> vstate
->dtvs_dynvars
.dtds_chunksize
)
8493 vstate
->dtvs_dynvars
.dtds_chunksize
= size
;
8498 dtrace_difo_init(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8500 int i
, oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
8503 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8504 ASSERT(dp
->dtdo_buf
!= NULL
&& dp
->dtdo_len
!= 0);
8506 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8507 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8508 dtrace_statvar_t
*svar
, ***svarp
;
8510 uint8_t scope
= v
->dtdv_scope
;
8513 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8516 id
-= DIF_VAR_OTHER_UBASE
;
8519 case DIFV_SCOPE_THREAD
:
8520 while (id
>= (otlocals
= vstate
->dtvs_ntlocals
)) {
8521 dtrace_difv_t
*tlocals
;
8523 if ((ntlocals
= (otlocals
<< 1)) == 0)
8526 osz
= otlocals
* sizeof (dtrace_difv_t
);
8527 nsz
= ntlocals
* sizeof (dtrace_difv_t
);
8529 tlocals
= kmem_zalloc(nsz
, KM_SLEEP
);
8532 bcopy(vstate
->dtvs_tlocals
,
8534 kmem_free(vstate
->dtvs_tlocals
, osz
);
8537 vstate
->dtvs_tlocals
= tlocals
;
8538 vstate
->dtvs_ntlocals
= ntlocals
;
8541 vstate
->dtvs_tlocals
[id
] = *v
;
8544 case DIFV_SCOPE_LOCAL
:
8545 np
= &vstate
->dtvs_nlocals
;
8546 svarp
= &vstate
->dtvs_locals
;
8548 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8549 dsize
= NCPU
* (v
->dtdv_type
.dtdt_size
+
8552 dsize
= NCPU
* sizeof (uint64_t);
8556 case DIFV_SCOPE_GLOBAL
:
8557 np
= &vstate
->dtvs_nglobals
;
8558 svarp
= &vstate
->dtvs_globals
;
8560 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
8561 dsize
= v
->dtdv_type
.dtdt_size
+
8570 while (id
>= (oldsvars
= *np
)) {
8571 dtrace_statvar_t
**statics
;
8572 int newsvars
, oldsize
, newsize
;
8574 if ((newsvars
= (oldsvars
<< 1)) == 0)
8577 oldsize
= oldsvars
* sizeof (dtrace_statvar_t
*);
8578 newsize
= newsvars
* sizeof (dtrace_statvar_t
*);
8580 statics
= kmem_zalloc(newsize
, KM_SLEEP
);
8583 bcopy(*svarp
, statics
, oldsize
);
8584 kmem_free(*svarp
, oldsize
);
8591 if ((svar
= (*svarp
)[id
]) == NULL
) {
8592 svar
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
);
8593 svar
->dtsv_var
= *v
;
8595 if ((svar
->dtsv_size
= dsize
) != 0) {
8596 svar
->dtsv_data
= (uint64_t)(uintptr_t)
8597 kmem_zalloc(dsize
, KM_SLEEP
);
8600 (*svarp
)[id
] = svar
;
8603 svar
->dtsv_refcnt
++;
8606 dtrace_difo_chunksize(dp
, vstate
);
8607 dtrace_difo_hold(dp
);
8610 static dtrace_difo_t
*
8611 dtrace_difo_duplicate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8616 ASSERT(dp
->dtdo_buf
!= NULL
);
8617 ASSERT(dp
->dtdo_refcnt
!= 0);
8619 new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
8621 ASSERT(dp
->dtdo_buf
!= NULL
);
8622 sz
= dp
->dtdo_len
* sizeof (dif_instr_t
);
8623 new->dtdo_buf
= kmem_alloc(sz
, KM_SLEEP
);
8624 bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
);
8625 new->dtdo_len
= dp
->dtdo_len
;
8627 if (dp
->dtdo_strtab
!= NULL
) {
8628 ASSERT(dp
->dtdo_strlen
!= 0);
8629 new->dtdo_strtab
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
);
8630 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
);
8631 new->dtdo_strlen
= dp
->dtdo_strlen
;
8634 if (dp
->dtdo_inttab
!= NULL
) {
8635 ASSERT(dp
->dtdo_intlen
!= 0);
8636 sz
= dp
->dtdo_intlen
* sizeof (uint64_t);
8637 new->dtdo_inttab
= kmem_alloc(sz
, KM_SLEEP
);
8638 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
);
8639 new->dtdo_intlen
= dp
->dtdo_intlen
;
8642 if (dp
->dtdo_vartab
!= NULL
) {
8643 ASSERT(dp
->dtdo_varlen
!= 0);
8644 sz
= dp
->dtdo_varlen
* sizeof (dtrace_difv_t
);
8645 new->dtdo_vartab
= kmem_alloc(sz
, KM_SLEEP
);
8646 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
);
8647 new->dtdo_varlen
= dp
->dtdo_varlen
;
8650 dtrace_difo_init(new, vstate
);
8655 dtrace_difo_destroy(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8659 ASSERT(dp
->dtdo_refcnt
== 0);
8661 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8662 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8663 dtrace_statvar_t
*svar
, **svarp
;
8665 uint8_t scope
= v
->dtdv_scope
;
8669 case DIFV_SCOPE_THREAD
:
8672 case DIFV_SCOPE_LOCAL
:
8673 np
= &vstate
->dtvs_nlocals
;
8674 svarp
= vstate
->dtvs_locals
;
8677 case DIFV_SCOPE_GLOBAL
:
8678 np
= &vstate
->dtvs_nglobals
;
8679 svarp
= vstate
->dtvs_globals
;
8686 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
8689 id
-= DIF_VAR_OTHER_UBASE
;
8693 ASSERT(svar
!= NULL
);
8694 ASSERT(svar
->dtsv_refcnt
> 0);
8696 if (--svar
->dtsv_refcnt
> 0)
8699 if (svar
->dtsv_size
!= 0) {
8700 ASSERT(svar
->dtsv_data
!= NULL
);
8701 kmem_free((void *)(uintptr_t)svar
->dtsv_data
,
8705 kmem_free(svar
, sizeof (dtrace_statvar_t
));
8709 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
8710 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
8711 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
8712 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
8714 kmem_free(dp
, sizeof (dtrace_difo_t
));
8718 dtrace_difo_release(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8722 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8723 ASSERT(dp
->dtdo_refcnt
!= 0);
8725 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8726 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8728 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8731 ASSERT(dtrace_vtime_references
> 0);
8732 if (--dtrace_vtime_references
== 0)
8733 dtrace_vtime_disable();
8736 if (--dp
->dtdo_refcnt
== 0)
8737 dtrace_difo_destroy(dp
, vstate
);
8741 * DTrace Format Functions
8744 dtrace_format_add(dtrace_state_t
*state
, char *str
)
8747 uint16_t ndx
, len
= strlen(str
) + 1;
8749 fmt
= kmem_zalloc(len
, KM_SLEEP
);
8750 bcopy(str
, fmt
, len
);
8752 for (ndx
= 0; ndx
< state
->dts_nformats
; ndx
++) {
8753 if (state
->dts_formats
[ndx
] == NULL
) {
8754 state
->dts_formats
[ndx
] = fmt
;
8759 if (state
->dts_nformats
== USHRT_MAX
) {
8761 * This is only likely if a denial-of-service attack is being
8762 * attempted. As such, it's okay to fail silently here.
8764 kmem_free(fmt
, len
);
8769 * For simplicity, we always resize the formats array to be exactly the
8770 * number of formats.
8772 ndx
= state
->dts_nformats
++;
8773 new = kmem_alloc((ndx
+ 1) * sizeof (char *), KM_SLEEP
);
8775 if (state
->dts_formats
!= NULL
) {
8777 bcopy(state
->dts_formats
, new, ndx
* sizeof (char *));
8778 kmem_free(state
->dts_formats
, ndx
* sizeof (char *));
8781 state
->dts_formats
= new;
8782 state
->dts_formats
[ndx
] = fmt
;
8788 dtrace_format_remove(dtrace_state_t
*state
, uint16_t format
)
8792 ASSERT(state
->dts_formats
!= NULL
);
8793 ASSERT(format
<= state
->dts_nformats
);
8794 ASSERT(state
->dts_formats
[format
- 1] != NULL
);
8796 fmt
= state
->dts_formats
[format
- 1];
8797 kmem_free(fmt
, strlen(fmt
) + 1);
8798 state
->dts_formats
[format
- 1] = NULL
;
8802 dtrace_format_destroy(dtrace_state_t
*state
)
8806 if (state
->dts_nformats
== 0) {
8807 ASSERT(state
->dts_formats
== NULL
);
8811 ASSERT(state
->dts_formats
!= NULL
);
8813 for (i
= 0; i
< state
->dts_nformats
; i
++) {
8814 char *fmt
= state
->dts_formats
[i
];
8819 kmem_free(fmt
, strlen(fmt
) + 1);
8822 kmem_free(state
->dts_formats
, state
->dts_nformats
* sizeof (char *));
8823 state
->dts_nformats
= 0;
8824 state
->dts_formats
= NULL
;
8828 * DTrace Predicate Functions
8830 static dtrace_predicate_t
*
8831 dtrace_predicate_create(dtrace_difo_t
*dp
)
8833 dtrace_predicate_t
*pred
;
8835 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8836 ASSERT(dp
->dtdo_refcnt
!= 0);
8838 pred
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
);
8839 pred
->dtp_difo
= dp
;
8840 pred
->dtp_refcnt
= 1;
8842 if (!dtrace_difo_cacheable(dp
))
8845 if (dtrace_predcache_id
== DTRACE_CACHEIDNONE
) {
8847 * This is only theoretically possible -- we have had 2^32
8848 * cacheable predicates on this machine. We cannot allow any
8849 * more predicates to become cacheable: as unlikely as it is,
8850 * there may be a thread caching a (now stale) predicate cache
8851 * ID. (N.B.: the temptation is being successfully resisted to
8852 * have this cmn_err() "Holy shit -- we executed this code!")
8857 pred
->dtp_cacheid
= dtrace_predcache_id
++;
8863 dtrace_predicate_hold(dtrace_predicate_t
*pred
)
8865 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8866 ASSERT(pred
->dtp_difo
!= NULL
&& pred
->dtp_difo
->dtdo_refcnt
!= 0);
8867 ASSERT(pred
->dtp_refcnt
> 0);
8873 dtrace_predicate_release(dtrace_predicate_t
*pred
, dtrace_vstate_t
*vstate
)
8875 dtrace_difo_t
*dp
= pred
->dtp_difo
;
8877 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8878 ASSERT(dp
!= NULL
&& dp
->dtdo_refcnt
!= 0);
8879 ASSERT(pred
->dtp_refcnt
> 0);
8881 if (--pred
->dtp_refcnt
== 0) {
8882 dtrace_difo_release(pred
->dtp_difo
, vstate
);
8883 kmem_free(pred
, sizeof (dtrace_predicate_t
));
8888 * DTrace Action Description Functions
8890 static dtrace_actdesc_t
*
8891 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
,
8892 uint64_t uarg
, uint64_t arg
)
8894 dtrace_actdesc_t
*act
;
8896 /* ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != NULL &&
8897 arg >= KERNELBASE) || (arg == NULL && kind == DTRACEACT_PRINTA));*/
8899 act
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
);
8900 act
->dtad_kind
= kind
;
8901 act
->dtad_ntuple
= ntuple
;
8902 act
->dtad_uarg
= uarg
;
8903 act
->dtad_arg
= arg
;
8904 act
->dtad_refcnt
= 1;
8910 dtrace_actdesc_hold(dtrace_actdesc_t
*act
)
8912 ASSERT(act
->dtad_refcnt
>= 1);
8917 dtrace_actdesc_release(dtrace_actdesc_t
*act
, dtrace_vstate_t
*vstate
)
8919 dtrace_actkind_t kind
= act
->dtad_kind
;
8922 ASSERT(act
->dtad_refcnt
>= 1);
8924 if (--act
->dtad_refcnt
!= 0)
8927 if ((dp
= act
->dtad_difo
) != NULL
)
8928 dtrace_difo_release(dp
, vstate
);
8930 if (DTRACEACT_ISPRINTFLIKE(kind
)) {
8931 char *str
= (char *)(uintptr_t)act
->dtad_arg
;
8933 /* ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
8934 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));*/
8937 kmem_free(str
, strlen(str
) + 1);
8940 kmem_free(act
, sizeof (dtrace_actdesc_t
));
8944 * DTrace ECB Functions
8946 static dtrace_ecb_t
*
8947 dtrace_ecb_add(dtrace_state_t
*state
, dtrace_probe_t
*probe
)
8952 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8954 ecb
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
);
8955 ecb
->dte_predicate
= NULL
;
8956 ecb
->dte_probe
= probe
;
8959 * The default size is the size of the default action: recording
8962 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
8963 ecb
->dte_alignment
= sizeof (dtrace_epid_t
);
8965 epid
= state
->dts_epid
++;
8967 if (epid
- 1 >= state
->dts_necbs
) {
8968 dtrace_ecb_t
**oecbs
= state
->dts_ecbs
, **ecbs
;
8969 int necbs
= state
->dts_necbs
<< 1;
8971 ASSERT(epid
== state
->dts_necbs
+ 1);
8974 ASSERT(oecbs
== NULL
);
8978 ecbs
= kmem_zalloc(necbs
* sizeof (*ecbs
), KM_SLEEP
);
8981 bcopy(oecbs
, ecbs
, state
->dts_necbs
* sizeof (*ecbs
));
8983 dtrace_membar_producer();
8984 state
->dts_ecbs
= ecbs
;
8986 if (oecbs
!= NULL
) {
8988 * If this state is active, we must dtrace_sync()
8989 * before we can free the old dts_ecbs array: we're
8990 * coming in hot, and there may be active ring
8991 * buffer processing (which indexes into the dts_ecbs
8992 * array) on another CPU.
8994 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
8997 kmem_free(oecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9000 dtrace_membar_producer();
9001 state
->dts_necbs
= necbs
;
9004 ecb
->dte_state
= state
;
9006 ASSERT(state
->dts_ecbs
[epid
- 1] == NULL
);
9007 dtrace_membar_producer();
9008 state
->dts_ecbs
[(ecb
->dte_epid
= epid
) - 1] = ecb
;
9014 dtrace_ecb_enable(dtrace_ecb_t
*ecb
)
9016 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9018 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9019 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9020 ASSERT(ecb
->dte_next
== NULL
);
9022 if (probe
== NULL
) {
9024 * This is the NULL probe -- there's nothing to do.
9029 if (probe
->dtpr_ecb
== NULL
) {
9030 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9033 * We're the first ECB on this probe.
9035 probe
->dtpr_ecb
= probe
->dtpr_ecb_last
= ecb
;
9037 if (ecb
->dte_predicate
!= NULL
)
9038 probe
->dtpr_predcache
= ecb
->dte_predicate
->dtp_cacheid
;
9040 prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
,
9041 probe
->dtpr_id
, probe
->dtpr_arg
);
9044 * This probe is already active. Swing the last pointer to
9045 * point to the new ECB, and issue a dtrace_sync() to assure
9046 * that all CPUs have seen the change.
9048 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9049 probe
->dtpr_ecb_last
->dte_next
= ecb
;
9050 probe
->dtpr_ecb_last
= ecb
;
9051 probe
->dtpr_predcache
= 0;
9058 dtrace_ecb_resize(dtrace_ecb_t
*ecb
)
9060 uint32_t maxalign
= sizeof (dtrace_epid_t
);
9061 uint32_t align
= sizeof (uint8_t), offs
, diff
;
9062 dtrace_action_t
*act
;
9064 uint32_t aggbase
= UINT32_MAX
;
9065 dtrace_state_t
*state
= ecb
->dte_state
;
9068 * If we record anything, we always record the epid. (And we always
9071 offs
= sizeof (dtrace_epid_t
);
9072 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9074 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9075 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
9077 if ((align
= rec
->dtrd_alignment
) > maxalign
)
9080 if (!wastuple
&& act
->dta_intuple
) {
9082 * This is the first record in a tuple. Align the
9083 * offset to be at offset 4 in an 8-byte aligned
9086 diff
= offs
+ sizeof (dtrace_aggid_t
);
9088 if (diff
= (diff
& (sizeof (uint64_t) - 1)))
9089 offs
+= sizeof (uint64_t) - diff
;
9091 aggbase
= offs
- sizeof (dtrace_aggid_t
);
9092 ASSERT(!(aggbase
& (sizeof (uint64_t) - 1)));
9096 if (rec
->dtrd_size
!= 0 && (diff
= (offs
& (align
- 1)))) {
9098 * The current offset is not properly aligned; align it.
9100 offs
+= align
- diff
;
9103 rec
->dtrd_offset
= offs
;
9105 if (offs
+ rec
->dtrd_size
> ecb
->dte_needed
) {
9106 ecb
->dte_needed
= offs
+ rec
->dtrd_size
;
9108 if (ecb
->dte_needed
> state
->dts_needed
)
9109 state
->dts_needed
= ecb
->dte_needed
;
9112 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9113 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9114 dtrace_action_t
*first
= agg
->dtag_first
, *prev
;
9116 ASSERT(rec
->dtrd_size
!= 0 && first
!= NULL
);
9118 ASSERT(aggbase
!= UINT32_MAX
);
9120 agg
->dtag_base
= aggbase
;
9122 while ((prev
= first
->dta_prev
) != NULL
&&
9123 DTRACEACT_ISAGG(prev
->dta_kind
)) {
9124 agg
= (dtrace_aggregation_t
*)prev
;
9125 first
= agg
->dtag_first
;
9129 offs
= prev
->dta_rec
.dtrd_offset
+
9130 prev
->dta_rec
.dtrd_size
;
9132 offs
= sizeof (dtrace_epid_t
);
9136 if (!act
->dta_intuple
)
9137 ecb
->dte_size
= offs
+ rec
->dtrd_size
;
9139 offs
+= rec
->dtrd_size
;
9142 wastuple
= act
->dta_intuple
;
9145 if ((act
= ecb
->dte_action
) != NULL
&&
9146 !(act
->dta_kind
== DTRACEACT_SPECULATE
&& act
->dta_next
== NULL
) &&
9147 ecb
->dte_size
== sizeof (dtrace_epid_t
)) {
9149 * If the size is still sizeof (dtrace_epid_t), then all
9150 * actions store no data; set the size to 0.
9152 ecb
->dte_alignment
= maxalign
;
9156 * If the needed space is still sizeof (dtrace_epid_t), then
9157 * all actions need no additional space; set the needed
9160 if (ecb
->dte_needed
== sizeof (dtrace_epid_t
))
9161 ecb
->dte_needed
= 0;
9167 * Set our alignment, and make sure that the dte_size and dte_needed
9168 * are aligned to the size of an EPID.
9170 ecb
->dte_alignment
= maxalign
;
9171 ecb
->dte_size
= (ecb
->dte_size
+ (sizeof (dtrace_epid_t
) - 1)) &
9172 ~(sizeof (dtrace_epid_t
) - 1);
9173 ecb
->dte_needed
= (ecb
->dte_needed
+ (sizeof (dtrace_epid_t
) - 1)) &
9174 ~(sizeof (dtrace_epid_t
) - 1);
9175 ASSERT(ecb
->dte_size
<= ecb
->dte_needed
);
9178 static dtrace_action_t
*
9179 dtrace_ecb_aggregation_create(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9181 dtrace_aggregation_t
*agg
;
9182 size_t size
= sizeof (uint64_t);
9183 int ntuple
= desc
->dtad_ntuple
;
9184 dtrace_action_t
*act
;
9185 dtrace_recdesc_t
*frec
;
9186 dtrace_aggid_t aggid
;
9187 dtrace_state_t
*state
= ecb
->dte_state
;
9189 agg
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
);
9190 agg
->dtag_ecb
= ecb
;
9192 ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
));
9194 switch (desc
->dtad_kind
) {
9196 agg
->dtag_initial
= UINT64_MAX
;
9197 agg
->dtag_aggregate
= dtrace_aggregate_min
;
9201 agg
->dtag_aggregate
= dtrace_aggregate_max
;
9204 case DTRACEAGG_COUNT
:
9205 agg
->dtag_aggregate
= dtrace_aggregate_count
;
9208 case DTRACEAGG_QUANTIZE
:
9209 agg
->dtag_aggregate
= dtrace_aggregate_quantize
;
9210 size
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) *
9214 case DTRACEAGG_LQUANTIZE
: {
9215 uint16_t step
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
);
9216 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
);
9218 agg
->dtag_initial
= desc
->dtad_arg
;
9219 agg
->dtag_aggregate
= dtrace_aggregate_lquantize
;
9221 if (step
== 0 || levels
== 0)
9224 size
= levels
* sizeof (uint64_t) + 3 * sizeof (uint64_t);
9229 agg
->dtag_aggregate
= dtrace_aggregate_avg
;
9230 size
= sizeof (uint64_t) * 2;
9234 agg
->dtag_aggregate
= dtrace_aggregate_sum
;
9241 agg
->dtag_action
.dta_rec
.dtrd_size
= size
;
9247 * We must make sure that we have enough actions for the n-tuple.
9249 for (act
= ecb
->dte_action_last
; act
!= NULL
; act
= act
->dta_prev
) {
9250 if (DTRACEACT_ISAGG(act
->dta_kind
))
9253 if (--ntuple
== 0) {
9255 * This is the action with which our n-tuple begins.
9257 agg
->dtag_first
= act
;
9263 * This n-tuple is short by ntuple elements. Return failure.
9265 ASSERT(ntuple
!= 0);
9267 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9272 * If the last action in the tuple has a size of zero, it's actually
9273 * an expression argument for the aggregating action.
9275 ASSERT(ecb
->dte_action_last
!= NULL
);
9276 act
= ecb
->dte_action_last
;
9278 if (act
->dta_kind
== DTRACEACT_DIFEXPR
) {
9279 ASSERT(act
->dta_difo
!= NULL
);
9281 if (act
->dta_difo
->dtdo_rtype
.dtdt_size
== 0)
9282 agg
->dtag_hasarg
= 1;
9286 * We need to allocate an id for this aggregation.
9288 aggid
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1,
9289 VM_BESTFIT
| VM_SLEEP
);
9291 if (aggid
- 1 >= state
->dts_naggregations
) {
9292 dtrace_aggregation_t
**oaggs
= state
->dts_aggregations
;
9293 dtrace_aggregation_t
**aggs
;
9294 int naggs
= state
->dts_naggregations
<< 1;
9295 int onaggs
= state
->dts_naggregations
;
9297 ASSERT(aggid
== state
->dts_naggregations
+ 1);
9300 ASSERT(oaggs
== NULL
);
9304 aggs
= kmem_zalloc(naggs
* sizeof (*aggs
), KM_SLEEP
);
9306 if (oaggs
!= NULL
) {
9307 bcopy(oaggs
, aggs
, onaggs
* sizeof (*aggs
));
9308 kmem_free(oaggs
, onaggs
* sizeof (*aggs
));
9311 state
->dts_aggregations
= aggs
;
9312 state
->dts_naggregations
= naggs
;
9315 ASSERT(state
->dts_aggregations
[aggid
- 1] == NULL
);
9316 state
->dts_aggregations
[(agg
->dtag_id
= aggid
) - 1] = agg
;
9318 frec
= &agg
->dtag_first
->dta_rec
;
9319 if (frec
->dtrd_alignment
< sizeof (dtrace_aggid_t
))
9320 frec
->dtrd_alignment
= sizeof (dtrace_aggid_t
);
9322 for (act
= agg
->dtag_first
; act
!= NULL
; act
= act
->dta_next
) {
9323 ASSERT(!act
->dta_intuple
);
9324 act
->dta_intuple
= 1;
9327 return (&agg
->dtag_action
);
9331 dtrace_ecb_aggregation_destroy(dtrace_ecb_t
*ecb
, dtrace_action_t
*act
)
9333 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9334 dtrace_state_t
*state
= ecb
->dte_state
;
9335 dtrace_aggid_t aggid
= agg
->dtag_id
;
9337 ASSERT(DTRACEACT_ISAGG(act
->dta_kind
));
9338 vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1);
9340 ASSERT(state
->dts_aggregations
[aggid
- 1] == agg
);
9341 state
->dts_aggregations
[aggid
- 1] = NULL
;
9343 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9347 dtrace_ecb_action_add(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9349 dtrace_action_t
*action
, *last
;
9350 dtrace_difo_t
*dp
= desc
->dtad_difo
;
9351 uint32_t size
= 0, align
= sizeof (uint8_t), mask
;
9352 uint16_t format
= 0;
9353 dtrace_recdesc_t
*rec
;
9354 dtrace_state_t
*state
= ecb
->dte_state
;
9355 dtrace_optval_t
*opt
= state
->dts_options
, nframes
, strsize
;
9356 uint64_t arg
= desc
->dtad_arg
;
9358 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9359 ASSERT(ecb
->dte_action
== NULL
|| ecb
->dte_action
->dta_refcnt
== 1);
9361 if (DTRACEACT_ISAGG(desc
->dtad_kind
)) {
9363 * If this is an aggregating action, there must be neither
9364 * a speculate nor a commit on the action chain.
9366 dtrace_action_t
*act
;
9368 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9369 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9372 if (act
->dta_kind
== DTRACEACT_SPECULATE
)
9376 action
= dtrace_ecb_aggregation_create(ecb
, desc
);
9381 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) ||
9382 (desc
->dtad_kind
== DTRACEACT_DIFEXPR
&&
9383 dp
!= NULL
&& dp
->dtdo_destructive
)) {
9384 state
->dts_destructive
= 1;
9387 switch (desc
->dtad_kind
) {
9388 case DTRACEACT_PRINTF
:
9389 case DTRACEACT_PRINTA
:
9390 case DTRACEACT_SYSTEM
:
9391 case DTRACEACT_FREOPEN
:
9393 * We know that our arg is a string -- turn it into a
9397 ASSERT(desc
->dtad_kind
== DTRACEACT_PRINTA
);
9400 ASSERT(arg
!= NULL
);
9401 /* ASSERT(arg > KERNELBASE); */
9402 format
= dtrace_format_add(state
,
9403 (char *)(uintptr_t)arg
);
9407 case DTRACEACT_LIBACT
:
9408 case DTRACEACT_DIFEXPR
:
9412 if ((size
= dp
->dtdo_rtype
.dtdt_size
) != 0)
9415 if (dp
->dtdo_rtype
.dtdt_kind
== DIF_TYPE_STRING
) {
9416 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9419 size
= opt
[DTRACEOPT_STRSIZE
];
9424 case DTRACEACT_STACK
:
9425 if ((nframes
= arg
) == 0) {
9426 nframes
= opt
[DTRACEOPT_STACKFRAMES
];
9427 ASSERT(nframes
> 0);
9431 size
= nframes
* sizeof (pc_t
);
9434 case DTRACEACT_JSTACK
:
9435 if ((strsize
= DTRACE_USTACK_STRSIZE(arg
)) == 0)
9436 strsize
= opt
[DTRACEOPT_JSTACKSTRSIZE
];
9438 if ((nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0)
9439 nframes
= opt
[DTRACEOPT_JSTACKFRAMES
];
9441 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9444 case DTRACEACT_USTACK
:
9445 if (desc
->dtad_kind
!= DTRACEACT_JSTACK
&&
9446 (nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0) {
9447 strsize
= DTRACE_USTACK_STRSIZE(arg
);
9448 nframes
= opt
[DTRACEOPT_USTACKFRAMES
];
9449 ASSERT(nframes
> 0);
9450 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
9454 * Save a slot for the pid.
9456 size
= (nframes
+ 1) * sizeof (uint64_t);
9457 size
+= DTRACE_USTACK_STRSIZE(arg
);
9458 size
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t)));
9464 if (dp
== NULL
|| ((size
= dp
->dtdo_rtype
.dtdt_size
) !=
9465 sizeof (uint64_t)) ||
9466 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9470 case DTRACEACT_USYM
:
9471 case DTRACEACT_UMOD
:
9472 case DTRACEACT_UADDR
:
9474 (dp
->dtdo_rtype
.dtdt_size
!= sizeof (uint64_t)) ||
9475 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9479 * We have a slot for the pid, plus a slot for the
9480 * argument. To keep things simple (aligned with
9481 * bitness-neutral sizing), we store each as a 64-bit
9484 size
= 2 * sizeof (uint64_t);
9487 case DTRACEACT_STOP
:
9488 case DTRACEACT_BREAKPOINT
:
9489 case DTRACEACT_PANIC
:
9492 case DTRACEACT_CHILL
:
9493 case DTRACEACT_DISCARD
:
9494 case DTRACEACT_RAISE
:
9499 case DTRACEACT_EXIT
:
9501 (size
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) ||
9502 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
9506 case DTRACEACT_SPECULATE
:
9507 if (ecb
->dte_size
> sizeof (dtrace_epid_t
))
9513 state
->dts_speculates
= 1;
9516 case DTRACEACT_COMMIT
: {
9517 dtrace_action_t
*act
= ecb
->dte_action
;
9519 for (; act
!= NULL
; act
= act
->dta_next
) {
9520 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9533 if (size
!= 0 || desc
->dtad_kind
== DTRACEACT_SPECULATE
) {
9535 * If this is a data-storing action or a speculate,
9536 * we must be sure that there isn't a commit on the
9539 dtrace_action_t
*act
= ecb
->dte_action
;
9541 for (; act
!= NULL
; act
= act
->dta_next
) {
9542 if (act
->dta_kind
== DTRACEACT_COMMIT
)
9547 action
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
);
9548 action
->dta_rec
.dtrd_size
= size
;
9551 action
->dta_refcnt
= 1;
9552 rec
= &action
->dta_rec
;
9553 size
= rec
->dtrd_size
;
9555 for (mask
= sizeof (uint64_t) - 1; size
!= 0 && mask
> 0; mask
>>= 1) {
9556 if (!(size
& mask
)) {
9562 action
->dta_kind
= desc
->dtad_kind
;
9564 if ((action
->dta_difo
= dp
) != NULL
)
9565 dtrace_difo_hold(dp
);
9567 rec
->dtrd_action
= action
->dta_kind
;
9568 rec
->dtrd_arg
= arg
;
9569 rec
->dtrd_uarg
= desc
->dtad_uarg
;
9570 rec
->dtrd_alignment
= (uint16_t)align
;
9571 rec
->dtrd_format
= format
;
9573 if ((last
= ecb
->dte_action_last
) != NULL
) {
9574 ASSERT(ecb
->dte_action
!= NULL
);
9575 action
->dta_prev
= last
;
9576 last
->dta_next
= action
;
9578 ASSERT(ecb
->dte_action
== NULL
);
9579 ecb
->dte_action
= action
;
9582 ecb
->dte_action_last
= action
;
9588 dtrace_ecb_action_remove(dtrace_ecb_t
*ecb
)
9590 dtrace_action_t
*act
= ecb
->dte_action
, *next
;
9591 dtrace_vstate_t
*vstate
= &ecb
->dte_state
->dts_vstate
;
9595 if (act
!= NULL
&& act
->dta_refcnt
> 1) {
9596 ASSERT(act
->dta_next
== NULL
|| act
->dta_next
->dta_refcnt
== 1);
9599 for (; act
!= NULL
; act
= next
) {
9600 next
= act
->dta_next
;
9601 ASSERT(next
!= NULL
|| act
== ecb
->dte_action_last
);
9602 ASSERT(act
->dta_refcnt
== 1);
9604 if ((format
= act
->dta_rec
.dtrd_format
) != 0)
9605 dtrace_format_remove(ecb
->dte_state
, format
);
9607 if ((dp
= act
->dta_difo
) != NULL
)
9608 dtrace_difo_release(dp
, vstate
);
9610 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9611 dtrace_ecb_aggregation_destroy(ecb
, act
);
9613 kmem_free(act
, sizeof (dtrace_action_t
));
9618 ecb
->dte_action
= NULL
;
9619 ecb
->dte_action_last
= NULL
;
9620 ecb
->dte_size
= sizeof (dtrace_epid_t
);
9624 dtrace_ecb_disable(dtrace_ecb_t
*ecb
)
9627 * We disable the ECB by removing it from its probe.
9629 dtrace_ecb_t
*pecb
, *prev
= NULL
;
9630 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9632 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9634 if (probe
== NULL
) {
9636 * This is the NULL probe; there is nothing to disable.
9641 for (pecb
= probe
->dtpr_ecb
; pecb
!= NULL
; pecb
= pecb
->dte_next
) {
9647 ASSERT(pecb
!= NULL
);
9650 probe
->dtpr_ecb
= ecb
->dte_next
;
9652 prev
->dte_next
= ecb
->dte_next
;
9655 if (ecb
== probe
->dtpr_ecb_last
) {
9656 ASSERT(ecb
->dte_next
== NULL
);
9657 probe
->dtpr_ecb_last
= prev
;
9661 * The ECB has been disconnected from the probe; now sync to assure
9662 * that all CPUs have seen the change before returning.
9666 if (probe
->dtpr_ecb
== NULL
) {
9668 * That was the last ECB on the probe; clear the predicate
9669 * cache ID for the probe, disable it and sync one more time
9670 * to assure that we'll never hit it again.
9672 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9674 ASSERT(ecb
->dte_next
== NULL
);
9675 ASSERT(probe
->dtpr_ecb_last
== NULL
);
9676 probe
->dtpr_predcache
= DTRACE_CACHEIDNONE
;
9677 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
,
9678 probe
->dtpr_id
, probe
->dtpr_arg
);
9682 * There is at least one ECB remaining on the probe. If there
9683 * is _exactly_ one, set the probe's predicate cache ID to be
9684 * the predicate cache ID of the remaining ECB.
9686 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9687 ASSERT(probe
->dtpr_predcache
== DTRACE_CACHEIDNONE
);
9689 if (probe
->dtpr_ecb
== probe
->dtpr_ecb_last
) {
9690 dtrace_predicate_t
*p
= probe
->dtpr_ecb
->dte_predicate
;
9692 ASSERT(probe
->dtpr_ecb
->dte_next
== NULL
);
9695 probe
->dtpr_predcache
= p
->dtp_cacheid
;
9698 ecb
->dte_next
= NULL
;
9703 dtrace_ecb_destroy(dtrace_ecb_t
*ecb
)
9705 dtrace_state_t
*state
= ecb
->dte_state
;
9706 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
9707 dtrace_predicate_t
*pred
;
9708 dtrace_epid_t epid
= ecb
->dte_epid
;
9710 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9711 ASSERT(ecb
->dte_next
== NULL
);
9712 ASSERT(ecb
->dte_probe
== NULL
|| ecb
->dte_probe
->dtpr_ecb
!= ecb
);
9714 if ((pred
= ecb
->dte_predicate
) != NULL
)
9715 dtrace_predicate_release(pred
, vstate
);
9717 dtrace_ecb_action_remove(ecb
);
9719 ASSERT(state
->dts_ecbs
[epid
- 1] == ecb
);
9720 state
->dts_ecbs
[epid
- 1] = NULL
;
9722 kmem_free(ecb
, sizeof (dtrace_ecb_t
));
9725 static dtrace_ecb_t
*
9726 dtrace_ecb_create(dtrace_state_t
*state
, dtrace_probe_t
*probe
,
9727 dtrace_enabling_t
*enab
)
9730 dtrace_predicate_t
*pred
;
9731 dtrace_actdesc_t
*act
;
9732 dtrace_provider_t
*prov
;
9733 dtrace_ecbdesc_t
*desc
= enab
->dten_current
;
9735 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9736 ASSERT(state
!= NULL
);
9738 ecb
= dtrace_ecb_add(state
, probe
);
9739 ecb
->dte_uarg
= desc
->dted_uarg
;
9741 if ((pred
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) {
9742 dtrace_predicate_hold(pred
);
9743 ecb
->dte_predicate
= pred
;
9746 if (probe
!= NULL
) {
9748 * If the provider shows more leg than the consumer is old
9749 * enough to see, we need to enable the appropriate implicit
9750 * predicate bits to prevent the ecb from activating at
9753 * Providers specifying DTRACE_PRIV_USER at register time
9754 * are stating that they need the /proc-style privilege
9755 * model to be enforced, and this is what DTRACE_COND_OWNER
9756 * and DTRACE_COND_ZONEOWNER will then do at probe time.
9758 prov
= probe
->dtpr_provider
;
9759 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLPROC
) &&
9760 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
9761 ecb
->dte_cond
|= DTRACE_COND_OWNER
;
9763 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLZONE
) &&
9764 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
9765 ecb
->dte_cond
|= DTRACE_COND_ZONEOWNER
;
9768 * If the provider shows us kernel innards and the user
9769 * is lacking sufficient privilege, enable the
9770 * DTRACE_COND_USERMODE implicit predicate.
9772 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) &&
9773 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_KERNEL
))
9774 ecb
->dte_cond
|= DTRACE_COND_USERMODE
;
9777 if (dtrace_ecb_create_cache
!= NULL
) {
9779 * If we have a cached ecb, we'll use its action list instead
9780 * of creating our own (saving both time and space).
9782 dtrace_ecb_t
*cached
= dtrace_ecb_create_cache
;
9783 dtrace_action_t
*act
= cached
->dte_action
;
9786 ASSERT(act
->dta_refcnt
> 0);
9788 ecb
->dte_action
= act
;
9789 ecb
->dte_action_last
= cached
->dte_action_last
;
9790 ecb
->dte_needed
= cached
->dte_needed
;
9791 ecb
->dte_size
= cached
->dte_size
;
9792 ecb
->dte_alignment
= cached
->dte_alignment
;
9798 for (act
= desc
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
9799 if ((enab
->dten_error
= dtrace_ecb_action_add(ecb
, act
)) != 0) {
9800 dtrace_ecb_destroy(ecb
);
9805 dtrace_ecb_resize(ecb
);
9807 return (dtrace_ecb_create_cache
= ecb
);
9811 dtrace_ecb_create_enable(dtrace_probe_t
*probe
, void *arg
)
9814 dtrace_enabling_t
*enab
= arg
;
9815 dtrace_state_t
*state
= enab
->dten_vstate
->dtvs_state
;
9817 ASSERT(state
!= NULL
);
9819 if (probe
!= NULL
&& probe
->dtpr_gen
< enab
->dten_probegen
) {
9821 * This probe was created in a generation for which this
9822 * enabling has previously created ECBs; we don't want to
9823 * enable it again, so just kick out.
9825 return (DTRACE_MATCH_NEXT
);
9828 if ((ecb
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
)
9829 return (DTRACE_MATCH_DONE
);
9831 dtrace_ecb_enable(ecb
);
9832 return (DTRACE_MATCH_NEXT
);
9835 static dtrace_ecb_t
*
9836 dtrace_epid2ecb(dtrace_state_t
*state
, dtrace_epid_t id
)
9840 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9842 if (id
== 0 || id
> state
->dts_necbs
)
9845 ASSERT(state
->dts_necbs
> 0 && state
->dts_ecbs
!= NULL
);
9846 ASSERT((ecb
= state
->dts_ecbs
[id
- 1]) == NULL
|| ecb
->dte_epid
== id
);
9848 return (state
->dts_ecbs
[id
- 1]);
9851 static dtrace_aggregation_t
*
9852 dtrace_aggid2agg(dtrace_state_t
*state
, dtrace_aggid_t id
)
9854 dtrace_aggregation_t
*agg
;
9856 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9858 if (id
== 0 || id
> state
->dts_naggregations
)
9861 ASSERT(state
->dts_naggregations
> 0 && state
->dts_aggregations
!= NULL
);
9862 ASSERT((agg
= state
->dts_aggregations
[id
- 1]) == NULL
||
9863 agg
->dtag_id
== id
);
9865 return (state
->dts_aggregations
[id
- 1]);
9869 * DTrace Buffer Functions
9871 * The following functions manipulate DTrace buffers. Most of these functions
9872 * are called in the context of establishing or processing consumer state;
9873 * exceptions are explicitly noted.
9877 * Note: called from cross call context. This function switches the two
9878 * buffers on a given CPU. The atomicity of this operation is assured by
9879 * disabling interrupts while the actual switch takes place; the disabling of
9880 * interrupts serializes the execution with any execution of dtrace_probe() on
9884 dtrace_buffer_switch(dtrace_buffer_t
*buf
)
9886 caddr_t tomax
= buf
->dtb_tomax
;
9887 caddr_t xamot
= buf
->dtb_xamot
;
9888 dtrace_icookie_t cookie
;
9890 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
9891 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_RING
));
9893 cookie
= dtrace_interrupt_disable();
9894 buf
->dtb_tomax
= xamot
;
9895 buf
->dtb_xamot
= tomax
;
9896 buf
->dtb_xamot_drops
= buf
->dtb_drops
;
9897 buf
->dtb_xamot_offset
= buf
->dtb_offset
;
9898 buf
->dtb_xamot_errors
= buf
->dtb_errors
;
9899 buf
->dtb_xamot_flags
= buf
->dtb_flags
;
9900 buf
->dtb_offset
= 0;
9902 buf
->dtb_errors
= 0;
9903 buf
->dtb_flags
&= ~(DTRACEBUF_ERROR
| DTRACEBUF_DROPPED
);
9904 dtrace_interrupt_enable(cookie
);
9908 * Note: called from cross call context. This function activates a buffer
9909 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
9910 * is guaranteed by the disabling of interrupts.
9913 dtrace_buffer_activate(dtrace_state_t
*state
)
9915 dtrace_buffer_t
*buf
;
9916 dtrace_icookie_t cookie
= dtrace_interrupt_disable();
9918 buf
= &state
->dts_buffer
[CPU
->cpu_id
];
9920 if (buf
->dtb_tomax
!= NULL
) {
9922 * We might like to assert that the buffer is marked inactive,
9923 * but this isn't necessarily true: the buffer for the CPU
9924 * that processes the BEGIN probe has its buffer activated
9925 * manually. In this case, we take the (harmless) action
9926 * re-clearing the bit INACTIVE bit.
9928 buf
->dtb_flags
&= ~DTRACEBUF_INACTIVE
;
9931 dtrace_interrupt_enable(cookie
);
9935 dtrace_buffer_alloc(dtrace_buffer_t
*bufs
, size_t size
, int flags
,
9939 dtrace_buffer_t
*buf
;
9941 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9942 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9944 if (size
> dtrace_nonroot_maxsize
&&
9945 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
9948 #if defined(__APPLE__)
9949 if (size
> (sane_size
/ 8) / NCPU
) /* As in kdbg_set_nkdbufs(), roughly. */
9951 #endif /* __APPLE__ */
9956 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
9959 buf
= &bufs
[cp
->cpu_id
];
9962 * If there is already a buffer allocated for this CPU, it
9963 * is only possible that this is a DR event. In this case,
9964 * the buffer size must match our specified size.
9966 if (buf
->dtb_tomax
!= NULL
) {
9967 ASSERT(buf
->dtb_size
== size
);
9971 ASSERT(buf
->dtb_xamot
== NULL
);
9973 if ((buf
->dtb_tomax
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
9976 buf
->dtb_size
= size
;
9977 buf
->dtb_flags
= flags
;
9978 buf
->dtb_offset
= 0;
9981 if (flags
& DTRACEBUF_NOSWITCH
)
9984 if ((buf
->dtb_xamot
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
9986 } while ((cp
= cp
->cpu_next
) != cpu_list
);
9994 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
9997 buf
= &bufs
[cp
->cpu_id
];
9999 if (buf
->dtb_xamot
!= NULL
) {
10000 ASSERT(buf
->dtb_tomax
!= NULL
);
10001 ASSERT(buf
->dtb_size
== size
);
10002 kmem_free(buf
->dtb_xamot
, size
);
10005 if (buf
->dtb_tomax
!= NULL
) {
10006 ASSERT(buf
->dtb_size
== size
);
10007 kmem_free(buf
->dtb_tomax
, size
);
10010 buf
->dtb_tomax
= NULL
;
10011 buf
->dtb_xamot
= NULL
;
10013 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10019 * Note: called from probe context. This function just increments the drop
10020 * count on a buffer. It has been made a function to allow for the
10021 * possibility of understanding the source of mysterious drop counts. (A
10022 * problem for which one may be particularly disappointed that DTrace cannot
10023 * be used to understand DTrace.)
10026 dtrace_buffer_drop(dtrace_buffer_t
*buf
)
10032 * Note: called from probe context. This function is called to reserve space
10033 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10034 * mstate. Returns the new offset in the buffer, or a negative value if an
10035 * error has occurred.
10038 dtrace_buffer_reserve(dtrace_buffer_t
*buf
, size_t needed
, size_t align
,
10039 dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
10041 intptr_t offs
= buf
->dtb_offset
, soffs
;
10046 if (buf
->dtb_flags
& DTRACEBUF_INACTIVE
)
10049 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
10050 dtrace_buffer_drop(buf
);
10054 if (!(buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
))) {
10055 while (offs
& (align
- 1)) {
10057 * Assert that our alignment is off by a number which
10058 * is itself sizeof (uint32_t) aligned.
10060 ASSERT(!((align
- (offs
& (align
- 1))) &
10061 (sizeof (uint32_t) - 1)));
10062 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10063 offs
+= sizeof (uint32_t);
10066 if ((soffs
= offs
+ needed
) > buf
->dtb_size
) {
10067 dtrace_buffer_drop(buf
);
10071 if (mstate
== NULL
)
10074 mstate
->dtms_scratch_base
= (uintptr_t)tomax
+ soffs
;
10075 mstate
->dtms_scratch_size
= buf
->dtb_size
- soffs
;
10076 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10081 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10082 if (state
->dts_activity
!= DTRACE_ACTIVITY_COOLDOWN
&&
10083 (buf
->dtb_flags
& DTRACEBUF_FULL
))
10088 total
= needed
+ (offs
& (align
- 1));
10091 * For a ring buffer, life is quite a bit more complicated. Before
10092 * we can store any padding, we need to adjust our wrapping offset.
10093 * (If we've never before wrapped or we're not about to, no adjustment
10096 if ((buf
->dtb_flags
& DTRACEBUF_WRAPPED
) ||
10097 offs
+ total
> buf
->dtb_size
) {
10098 woffs
= buf
->dtb_xamot_offset
;
10100 if (offs
+ total
> buf
->dtb_size
) {
10102 * We can't fit in the end of the buffer. First, a
10103 * sanity check that we can fit in the buffer at all.
10105 if (total
> buf
->dtb_size
) {
10106 dtrace_buffer_drop(buf
);
10111 * We're going to be storing at the top of the buffer,
10112 * so now we need to deal with the wrapped offset. We
10113 * only reset our wrapped offset to 0 if it is
10114 * currently greater than the current offset. If it
10115 * is less than the current offset, it is because a
10116 * previous allocation induced a wrap -- but the
10117 * allocation didn't subsequently take the space due
10118 * to an error or false predicate evaluation. In this
10119 * case, we'll just leave the wrapped offset alone: if
10120 * the wrapped offset hasn't been advanced far enough
10121 * for this allocation, it will be adjusted in the
10124 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
10132 * Now we know that we're going to be storing to the
10133 * top of the buffer and that there is room for us
10134 * there. We need to clear the buffer from the current
10135 * offset to the end (there may be old gunk there).
10137 while (offs
< buf
->dtb_size
)
10141 * We need to set our offset to zero. And because we
10142 * are wrapping, we need to set the bit indicating as
10143 * much. We can also adjust our needed space back
10144 * down to the space required by the ECB -- we know
10145 * that the top of the buffer is aligned.
10149 buf
->dtb_flags
|= DTRACEBUF_WRAPPED
;
10152 * There is room for us in the buffer, so we simply
10153 * need to check the wrapped offset.
10155 if (woffs
< offs
) {
10157 * The wrapped offset is less than the offset.
10158 * This can happen if we allocated buffer space
10159 * that induced a wrap, but then we didn't
10160 * subsequently take the space due to an error
10161 * or false predicate evaluation. This is
10162 * okay; we know that _this_ allocation isn't
10163 * going to induce a wrap. We still can't
10164 * reset the wrapped offset to be zero,
10165 * however: the space may have been trashed in
10166 * the previous failed probe attempt. But at
10167 * least the wrapped offset doesn't need to
10168 * be adjusted at all...
10174 while (offs
+ total
> woffs
) {
10175 dtrace_epid_t epid
= *(uint32_t *)(tomax
+ woffs
);
10178 if (epid
== DTRACE_EPIDNONE
) {
10179 size
= sizeof (uint32_t);
10181 ASSERT(epid
<= state
->dts_necbs
);
10182 ASSERT(state
->dts_ecbs
[epid
- 1] != NULL
);
10184 size
= state
->dts_ecbs
[epid
- 1]->dte_size
;
10187 ASSERT(woffs
+ size
<= buf
->dtb_size
);
10190 if (woffs
+ size
== buf
->dtb_size
) {
10192 * We've reached the end of the buffer; we want
10193 * to set the wrapped offset to 0 and break
10194 * out. However, if the offs is 0, then we're
10195 * in a strange edge-condition: the amount of
10196 * space that we want to reserve plus the size
10197 * of the record that we're overwriting is
10198 * greater than the size of the buffer. This
10199 * is problematic because if we reserve the
10200 * space but subsequently don't consume it (due
10201 * to a failed predicate or error) the wrapped
10202 * offset will be 0 -- yet the EPID at offset 0
10203 * will not be committed. This situation is
10204 * relatively easy to deal with: if we're in
10205 * this case, the buffer is indistinguishable
10206 * from one that hasn't wrapped; we need only
10207 * finish the job by clearing the wrapped bit,
10208 * explicitly setting the offset to be 0, and
10209 * zero'ing out the old data in the buffer.
10212 buf
->dtb_flags
&= ~DTRACEBUF_WRAPPED
;
10213 buf
->dtb_offset
= 0;
10216 while (woffs
< buf
->dtb_size
)
10217 tomax
[woffs
++] = 0;
10228 * We have a wrapped offset. It may be that the wrapped offset
10229 * has become zero -- that's okay.
10231 buf
->dtb_xamot_offset
= woffs
;
10236 * Now we can plow the buffer with any necessary padding.
10238 while (offs
& (align
- 1)) {
10240 * Assert that our alignment is off by a number which
10241 * is itself sizeof (uint32_t) aligned.
10243 ASSERT(!((align
- (offs
& (align
- 1))) &
10244 (sizeof (uint32_t) - 1)));
10245 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10246 offs
+= sizeof (uint32_t);
10249 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10250 if (offs
+ needed
> buf
->dtb_size
- state
->dts_reserve
) {
10251 buf
->dtb_flags
|= DTRACEBUF_FULL
;
10256 if (mstate
== NULL
)
10260 * For ring buffers and fill buffers, the scratch space is always
10261 * the inactive buffer.
10263 mstate
->dtms_scratch_base
= (uintptr_t)buf
->dtb_xamot
;
10264 mstate
->dtms_scratch_size
= buf
->dtb_size
;
10265 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10271 dtrace_buffer_polish(dtrace_buffer_t
*buf
)
10273 ASSERT(buf
->dtb_flags
& DTRACEBUF_RING
);
10274 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10276 if (!(buf
->dtb_flags
& DTRACEBUF_WRAPPED
))
10280 * We need to polish the ring buffer. There are three cases:
10282 * - The first (and presumably most common) is that there is no gap
10283 * between the buffer offset and the wrapped offset. In this case,
10284 * there is nothing in the buffer that isn't valid data; we can
10285 * mark the buffer as polished and return.
10287 * - The second (less common than the first but still more common
10288 * than the third) is that there is a gap between the buffer offset
10289 * and the wrapped offset, and the wrapped offset is larger than the
10290 * buffer offset. This can happen because of an alignment issue, or
10291 * can happen because of a call to dtrace_buffer_reserve() that
10292 * didn't subsequently consume the buffer space. In this case,
10293 * we need to zero the data from the buffer offset to the wrapped
10296 * - The third (and least common) is that there is a gap between the
10297 * buffer offset and the wrapped offset, but the wrapped offset is
10298 * _less_ than the buffer offset. This can only happen because a
10299 * call to dtrace_buffer_reserve() induced a wrap, but the space
10300 * was not subsequently consumed. In this case, we need to zero the
10301 * space from the offset to the end of the buffer _and_ from the
10302 * top of the buffer to the wrapped offset.
10304 if (buf
->dtb_offset
< buf
->dtb_xamot_offset
) {
10305 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10306 buf
->dtb_xamot_offset
- buf
->dtb_offset
);
10309 if (buf
->dtb_offset
> buf
->dtb_xamot_offset
) {
10310 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10311 buf
->dtb_size
- buf
->dtb_offset
);
10312 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
);
10317 dtrace_buffer_free(dtrace_buffer_t
*bufs
)
10321 for (i
= 0; i
< NCPU
; i
++) {
10322 dtrace_buffer_t
*buf
= &bufs
[i
];
10324 if (buf
->dtb_tomax
== NULL
) {
10325 ASSERT(buf
->dtb_xamot
== NULL
);
10326 ASSERT(buf
->dtb_size
== 0);
10330 if (buf
->dtb_xamot
!= NULL
) {
10331 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
10332 kmem_free(buf
->dtb_xamot
, buf
->dtb_size
);
10335 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
);
10337 buf
->dtb_tomax
= NULL
;
10338 buf
->dtb_xamot
= NULL
;
10343 * DTrace Enabling Functions
10345 static dtrace_enabling_t
*
10346 dtrace_enabling_create(dtrace_vstate_t
*vstate
)
10348 dtrace_enabling_t
*enab
;
10350 enab
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
);
10351 enab
->dten_vstate
= vstate
;
10357 dtrace_enabling_add(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
)
10359 dtrace_ecbdesc_t
**ndesc
;
10360 size_t osize
, nsize
;
10363 * We can't add to enablings after we've enabled them, or after we've
10366 ASSERT(enab
->dten_probegen
== 0);
10367 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10369 #if defined(__APPLE__)
10370 if (ecb
== NULL
) return; /* XXX protection against gcc 4.0 botch on x86 */
10371 #endif /* __APPLE__ */
10373 if (enab
->dten_ndesc
< enab
->dten_maxdesc
) {
10374 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10378 osize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10380 if (enab
->dten_maxdesc
== 0) {
10381 enab
->dten_maxdesc
= 1;
10383 enab
->dten_maxdesc
<<= 1;
10386 ASSERT(enab
->dten_ndesc
< enab
->dten_maxdesc
);
10388 nsize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
10389 ndesc
= kmem_zalloc(nsize
, KM_SLEEP
);
10390 bcopy(enab
->dten_desc
, ndesc
, osize
);
10391 kmem_free(enab
->dten_desc
, osize
);
10393 enab
->dten_desc
= ndesc
;
10394 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
10398 dtrace_enabling_addlike(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
,
10399 dtrace_probedesc_t
*pd
)
10401 dtrace_ecbdesc_t
*new;
10402 dtrace_predicate_t
*pred
;
10403 dtrace_actdesc_t
*act
;
10406 * We're going to create a new ECB description that matches the
10407 * specified ECB in every way, but has the specified probe description.
10409 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
10411 if ((pred
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
)
10412 dtrace_predicate_hold(pred
);
10414 for (act
= ecb
->dted_action
; act
!= NULL
; act
= act
->dtad_next
)
10415 dtrace_actdesc_hold(act
);
10417 new->dted_action
= ecb
->dted_action
;
10418 new->dted_pred
= ecb
->dted_pred
;
10419 new->dted_probe
= *pd
;
10420 new->dted_uarg
= ecb
->dted_uarg
;
10422 dtrace_enabling_add(enab
, new);
10426 dtrace_enabling_dump(dtrace_enabling_t
*enab
)
10430 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10431 dtrace_probedesc_t
*desc
= &enab
->dten_desc
[i
]->dted_probe
;
10433 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
,
10434 desc
->dtpd_provider
, desc
->dtpd_mod
,
10435 desc
->dtpd_func
, desc
->dtpd_name
);
10440 dtrace_enabling_destroy(dtrace_enabling_t
*enab
)
10443 dtrace_ecbdesc_t
*ep
;
10444 dtrace_vstate_t
*vstate
= enab
->dten_vstate
;
10446 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10448 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10449 dtrace_actdesc_t
*act
, *next
;
10450 dtrace_predicate_t
*pred
;
10452 ep
= enab
->dten_desc
[i
];
10454 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
)
10455 dtrace_predicate_release(pred
, vstate
);
10457 for (act
= ep
->dted_action
; act
!= NULL
; act
= next
) {
10458 next
= act
->dtad_next
;
10459 dtrace_actdesc_release(act
, vstate
);
10462 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
10465 kmem_free(enab
->dten_desc
,
10466 enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*));
10469 * If this was a retained enabling, decrement the dts_nretained count
10470 * and take it off of the dtrace_retained list.
10472 if (enab
->dten_prev
!= NULL
|| enab
->dten_next
!= NULL
||
10473 dtrace_retained
== enab
) {
10474 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10475 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained
> 0);
10476 enab
->dten_vstate
->dtvs_state
->dts_nretained
--;
10479 if (enab
->dten_prev
== NULL
) {
10480 if (dtrace_retained
== enab
) {
10481 dtrace_retained
= enab
->dten_next
;
10483 if (dtrace_retained
!= NULL
)
10484 dtrace_retained
->dten_prev
= NULL
;
10487 ASSERT(enab
!= dtrace_retained
);
10488 ASSERT(dtrace_retained
!= NULL
);
10489 enab
->dten_prev
->dten_next
= enab
->dten_next
;
10492 if (enab
->dten_next
!= NULL
) {
10493 ASSERT(dtrace_retained
!= NULL
);
10494 enab
->dten_next
->dten_prev
= enab
->dten_prev
;
10497 kmem_free(enab
, sizeof (dtrace_enabling_t
));
10501 dtrace_enabling_retain(dtrace_enabling_t
*enab
)
10503 dtrace_state_t
*state
;
10505 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10506 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
10507 ASSERT(enab
->dten_vstate
!= NULL
);
10509 state
= enab
->dten_vstate
->dtvs_state
;
10510 ASSERT(state
!= NULL
);
10513 * We only allow each state to retain dtrace_retain_max enablings.
10515 if (state
->dts_nretained
>= dtrace_retain_max
)
10518 state
->dts_nretained
++;
10520 if (dtrace_retained
== NULL
) {
10521 dtrace_retained
= enab
;
10525 enab
->dten_next
= dtrace_retained
;
10526 dtrace_retained
->dten_prev
= enab
;
10527 dtrace_retained
= enab
;
10533 dtrace_enabling_replicate(dtrace_state_t
*state
, dtrace_probedesc_t
*match
,
10534 dtrace_probedesc_t
*create
)
10536 dtrace_enabling_t
*new, *enab
;
10537 int found
= 0, err
= ENOENT
;
10539 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10540 ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
);
10541 ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
);
10542 ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
);
10543 ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
);
10545 new = dtrace_enabling_create(&state
->dts_vstate
);
10548 * Iterate over all retained enablings, looking for enablings that
10549 * match the specified state.
10551 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10555 * dtvs_state can only be NULL for helper enablings -- and
10556 * helper enablings can't be retained.
10558 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10560 if (enab
->dten_vstate
->dtvs_state
!= state
)
10564 * Now iterate over each probe description; we're looking for
10565 * an exact match to the specified probe description.
10567 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10568 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10569 dtrace_probedesc_t
*pd
= &ep
->dted_probe
;
10571 if (strcmp(pd
->dtpd_provider
, match
->dtpd_provider
))
10574 if (strcmp(pd
->dtpd_mod
, match
->dtpd_mod
))
10577 if (strcmp(pd
->dtpd_func
, match
->dtpd_func
))
10580 if (strcmp(pd
->dtpd_name
, match
->dtpd_name
))
10584 * We have a winning probe! Add it to our growing
10588 dtrace_enabling_addlike(new, ep
, create
);
10592 if (!found
|| (err
= dtrace_enabling_retain(new)) != 0) {
10593 dtrace_enabling_destroy(new);
10601 dtrace_enabling_retract(dtrace_state_t
*state
)
10603 dtrace_enabling_t
*enab
, *next
;
10605 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10608 * Iterate over all retained enablings, destroy the enablings retained
10609 * for the specified state.
10611 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= next
) {
10612 next
= enab
->dten_next
;
10615 * dtvs_state can only be NULL for helper enablings -- and
10616 * helper enablings can't be retained.
10618 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10620 if (enab
->dten_vstate
->dtvs_state
== state
) {
10621 ASSERT(state
->dts_nretained
> 0);
10622 dtrace_enabling_destroy(enab
);
10626 ASSERT(state
->dts_nretained
== 0);
10630 dtrace_enabling_match(dtrace_enabling_t
*enab
, int *nmatched
)
10635 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10636 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10638 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10639 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
10641 enab
->dten_current
= ep
;
10642 enab
->dten_error
= 0;
10644 matched
+= dtrace_probe_enable(&ep
->dted_probe
, enab
);
10646 if (enab
->dten_error
!= 0) {
10648 * If we get an error half-way through enabling the
10649 * probes, we kick out -- perhaps with some number of
10650 * them enabled. Leaving enabled probes enabled may
10651 * be slightly confusing for user-level, but we expect
10652 * that no one will attempt to actually drive on in
10653 * the face of such errors. If this is an anonymous
10654 * enabling (indicated with a NULL nmatched pointer),
10655 * we cmn_err() a message. We aren't expecting to
10656 * get such an error -- such as it can exist at all,
10657 * it would be a result of corrupted DOF in the driver
10660 if (nmatched
== NULL
) {
10661 cmn_err(CE_WARN
, "dtrace_enabling_match() "
10662 "error on %p: %d", (void *)ep
,
10666 return (enab
->dten_error
);
10670 enab
->dten_probegen
= dtrace_probegen
;
10671 if (nmatched
!= NULL
)
10672 *nmatched
= matched
;
10678 dtrace_enabling_matchall(void)
10680 dtrace_enabling_t
*enab
;
10682 lck_mtx_lock(&cpu_lock
);
10683 lck_mtx_lock(&dtrace_lock
);
10686 * Because we can be called after dtrace_detach() has been called, we
10687 * cannot assert that there are retained enablings. We can safely
10688 * load from dtrace_retained, however: the taskq_destroy() at the
10689 * end of dtrace_detach() will block pending our completion.
10691 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
)
10692 (void) dtrace_enabling_match(enab
, NULL
);
10694 lck_mtx_unlock(&dtrace_lock
);
10695 lck_mtx_unlock(&cpu_lock
);
10699 dtrace_enabling_matchstate(dtrace_state_t
*state
, int *nmatched
)
10701 dtrace_enabling_t
*enab
;
10702 int matched
, total
= 0, err
;
10704 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10705 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10707 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10708 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10710 if (enab
->dten_vstate
->dtvs_state
!= state
)
10713 if ((err
= dtrace_enabling_match(enab
, &matched
)) != 0)
10719 if (nmatched
!= NULL
)
10726 * If an enabling is to be enabled without having matched probes (that is, if
10727 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
10728 * enabling must be _primed_ by creating an ECB for every ECB description.
10729 * This must be done to assure that we know the number of speculations, the
10730 * number of aggregations, the minimum buffer size needed, etc. before we
10731 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
10732 * enabling any probes, we create ECBs for every ECB decription, but with a
10733 * NULL probe -- which is exactly what this function does.
10736 dtrace_enabling_prime(dtrace_state_t
*state
)
10738 dtrace_enabling_t
*enab
;
10741 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
10742 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
10744 if (enab
->dten_vstate
->dtvs_state
!= state
)
10748 * We don't want to prime an enabling more than once, lest
10749 * we allow a malicious user to induce resource exhaustion.
10750 * (The ECBs that result from priming an enabling aren't
10751 * leaked -- but they also aren't deallocated until the
10752 * consumer state is destroyed.)
10754 if (enab
->dten_primed
)
10757 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10758 enab
->dten_current
= enab
->dten_desc
[i
];
10759 (void) dtrace_probe_enable(NULL
, enab
);
10762 enab
->dten_primed
= 1;
10767 * Called to indicate that probes should be provided due to retained
10768 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
10769 * must take an initial lap through the enabling calling the dtps_provide()
10770 * entry point explicitly to allow for autocreated probes.
10773 dtrace_enabling_provide(dtrace_provider_t
*prv
)
10776 dtrace_probedesc_t desc
;
10778 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10779 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
10783 prv
= dtrace_provider
;
10787 dtrace_enabling_t
*enab
= dtrace_retained
;
10788 void *parg
= prv
->dtpv_arg
;
10790 for (; enab
!= NULL
; enab
= enab
->dten_next
) {
10791 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
10792 desc
= enab
->dten_desc
[i
]->dted_probe
;
10793 lck_mtx_unlock(&dtrace_lock
);
10794 prv
->dtpv_pops
.dtps_provide(parg
, &desc
);
10795 lck_mtx_lock(&dtrace_lock
);
10798 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
10800 lck_mtx_unlock(&dtrace_lock
);
10801 dtrace_probe_provide(NULL
, all
? NULL
: prv
);
10802 lck_mtx_lock(&dtrace_lock
);
10806 * DTrace DOF Functions
10810 dtrace_dof_error(dof_hdr_t
*dof
, const char *str
)
10812 if (dtrace_err_verbose
)
10813 cmn_err(CE_WARN
, "failed to process DOF: %s", str
);
10815 #ifdef DTRACE_ERRDEBUG
10816 dtrace_errdebug(str
);
10821 * Create DOF out of a currently enabled state. Right now, we only create
10822 * DOF containing the run-time options -- but this could be expanded to create
10823 * complete DOF representing the enabled state.
10826 dtrace_dof_create(dtrace_state_t
*state
)
10830 dof_optdesc_t
*opt
;
10831 int i
, len
= sizeof (dof_hdr_t
) +
10832 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) +
10833 sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
10835 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10837 dof
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
);
10838 dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
;
10839 dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
;
10840 dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
;
10841 dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
;
10843 dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
;
10844 dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
;
10845 dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
;
10846 dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
;
10847 dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
;
10848 dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
;
10850 dof
->dofh_flags
= 0;
10851 dof
->dofh_hdrsize
= sizeof (dof_hdr_t
);
10852 dof
->dofh_secsize
= sizeof (dof_sec_t
);
10853 dof
->dofh_secnum
= 1; /* only DOF_SECT_OPTDESC */
10854 dof
->dofh_secoff
= sizeof (dof_hdr_t
);
10855 dof
->dofh_loadsz
= len
;
10856 dof
->dofh_filesz
= len
;
10860 * Fill in the option section header...
10862 sec
= (dof_sec_t
*)((uintptr_t)dof
+ sizeof (dof_hdr_t
));
10863 sec
->dofs_type
= DOF_SECT_OPTDESC
;
10864 sec
->dofs_align
= sizeof (uint64_t);
10865 sec
->dofs_flags
= DOF_SECF_LOAD
;
10866 sec
->dofs_entsize
= sizeof (dof_optdesc_t
);
10868 opt
= (dof_optdesc_t
*)((uintptr_t)sec
+
10869 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)));
10871 sec
->dofs_offset
= (uintptr_t)opt
- (uintptr_t)dof
;
10872 sec
->dofs_size
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
10874 for (i
= 0; i
< DTRACEOPT_MAX
; i
++) {
10875 opt
[i
].dofo_option
= i
;
10876 opt
[i
].dofo_strtab
= DOF_SECIDX_NONE
;
10877 opt
[i
].dofo_value
= state
->dts_options
[i
];
10884 #if defined(__APPLE__)
10885 dtrace_dof_copyin(user_addr_t uarg
, int *errp
)
10887 dtrace_dof_copyin(uintptr_t uarg
, int *errp
)
10890 dof_hdr_t hdr
, *dof
;
10892 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
10895 * First, we're going to copyin() the sizeof (dof_hdr_t).
10897 #if defined(__APPLE__)
10898 if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) {
10900 if (copyin((void *)uarg
, &hdr
, sizeof (hdr
)) != 0) {
10902 dtrace_dof_error(NULL
, "failed to copyin DOF header");
10908 * Now we'll allocate the entire DOF and copy it in -- provided
10909 * that the length isn't outrageous.
10911 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
10912 dtrace_dof_error(&hdr
, "load size exceeds maximum");
10917 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
10918 dtrace_dof_error(&hdr
, "invalid load size");
10923 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
10925 #if defined(__APPLE__)
10926 if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0) {
10928 if (copyin((void *)uarg
, dof
, hdr
.dofh_loadsz
) != 0) {
10930 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
10938 #if defined(__APPLE__)
10941 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
)
10943 dof_hdr_t hdr
, *dof
;
10945 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
10948 * First, we're going to copyin() the sizeof (dof_hdr_t).
10950 if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) {
10951 dtrace_dof_error(NULL
, "failed to copyin DOF header");
10957 * Now we'll allocate the entire DOF and copy it in -- provided
10958 * that the length isn't outrageous.
10960 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
10961 dtrace_dof_error(&hdr
, "load size exceeds maximum");
10966 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
10967 dtrace_dof_error(&hdr
, "invalid load size");
10972 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
10974 if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) {
10975 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
10983 #endif /* __APPLE__ */
10986 dtrace_dof_property(const char *name
)
10990 unsigned int len
, i
;
10994 * Unfortunately, array of values in .conf files are always (and
10995 * only) interpreted to be integer arrays. We must read our DOF
10996 * as an integer array, and then squeeze it into a byte array.
10998 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
10999 (char *)name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
11002 for (i
= 0; i
< len
; i
++)
11003 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]);
11005 if (len
< sizeof (dof_hdr_t
)) {
11006 ddi_prop_free(buf
);
11007 dtrace_dof_error(NULL
, "truncated header");
11011 if (len
< (loadsz
= ((dof_hdr_t
*)buf
)->dofh_loadsz
)) {
11012 ddi_prop_free(buf
);
11013 dtrace_dof_error(NULL
, "truncated DOF");
11017 if (loadsz
>= dtrace_dof_maxsize
) {
11018 ddi_prop_free(buf
);
11019 dtrace_dof_error(NULL
, "oversized DOF");
11023 dof
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
);
11024 bcopy(buf
, dof
, loadsz
);
11025 ddi_prop_free(buf
);
11031 dtrace_dof_destroy(dof_hdr_t
*dof
)
11033 dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
);
11037 * Return the dof_sec_t pointer corresponding to a given section index. If the
11038 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11039 * a type other than DOF_SECT_NONE is specified, the header is checked against
11040 * this type and NULL is returned if the types do not match.
11043 dtrace_dof_sect(dof_hdr_t
*dof
, uint32_t type
, dof_secidx_t i
)
11045 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)
11046 ((uintptr_t)dof
+ dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11048 if (i
>= dof
->dofh_secnum
) {
11049 dtrace_dof_error(dof
, "referenced section index is invalid");
11053 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
11054 dtrace_dof_error(dof
, "referenced section is not loadable");
11058 if (type
!= DOF_SECT_NONE
&& type
!= sec
->dofs_type
) {
11059 dtrace_dof_error(dof
, "referenced section is the wrong type");
11066 static dtrace_probedesc_t
*
11067 dtrace_dof_probedesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_probedesc_t
*desc
)
11069 dof_probedesc_t
*probe
;
11071 uintptr_t daddr
= (uintptr_t)dof
;
11075 if (sec
->dofs_type
!= DOF_SECT_PROBEDESC
) {
11076 dtrace_dof_error(dof
, "invalid probe section");
11080 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11081 dtrace_dof_error(dof
, "bad alignment in probe description");
11085 if (sec
->dofs_offset
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) {
11086 dtrace_dof_error(dof
, "truncated probe description");
11090 probe
= (dof_probedesc_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11091 strtab
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
);
11093 if (strtab
== NULL
)
11096 str
= daddr
+ strtab
->dofs_offset
;
11097 size
= strtab
->dofs_size
;
11099 if (probe
->dofp_provider
>= strtab
->dofs_size
) {
11100 dtrace_dof_error(dof
, "corrupt probe provider");
11104 (void) strncpy(desc
->dtpd_provider
,
11105 (char *)(str
+ probe
->dofp_provider
),
11106 MIN(DTRACE_PROVNAMELEN
- 1, size
- probe
->dofp_provider
));
11108 if (probe
->dofp_mod
>= strtab
->dofs_size
) {
11109 dtrace_dof_error(dof
, "corrupt probe module");
11113 (void) strncpy(desc
->dtpd_mod
, (char *)(str
+ probe
->dofp_mod
),
11114 MIN(DTRACE_MODNAMELEN
- 1, size
- probe
->dofp_mod
));
11116 if (probe
->dofp_func
>= strtab
->dofs_size
) {
11117 dtrace_dof_error(dof
, "corrupt probe function");
11121 (void) strncpy(desc
->dtpd_func
, (char *)(str
+ probe
->dofp_func
),
11122 MIN(DTRACE_FUNCNAMELEN
- 1, size
- probe
->dofp_func
));
11124 if (probe
->dofp_name
>= strtab
->dofs_size
) {
11125 dtrace_dof_error(dof
, "corrupt probe name");
11129 (void) strncpy(desc
->dtpd_name
, (char *)(str
+ probe
->dofp_name
),
11130 MIN(DTRACE_NAMELEN
- 1, size
- probe
->dofp_name
));
11135 static dtrace_difo_t
*
11136 dtrace_dof_difo(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11141 dof_difohdr_t
*dofd
;
11142 uintptr_t daddr
= (uintptr_t)dof
;
11143 size_t max
= dtrace_difo_maxsize
;
11146 static const struct {
11154 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
),
11155 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
),
11156 sizeof (dif_instr_t
), "multiple DIF sections" },
11158 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
),
11159 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t),
11160 sizeof (uint64_t), "multiple integer tables" },
11162 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
),
11163 offsetof(dtrace_difo_t
, dtdo_strlen
), 0,
11164 sizeof (char), "multiple string tables" },
11166 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
),
11167 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
),
11168 sizeof (uint_t
), "multiple variable tables" },
11170 #if !defined(__APPLE__)
11171 { DOF_SECT_NONE
, 0, 0, 0, NULL
}
11173 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL
}
11174 #endif /* __APPLE__ */
11177 if (sec
->dofs_type
!= DOF_SECT_DIFOHDR
) {
11178 dtrace_dof_error(dof
, "invalid DIFO header section");
11182 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11183 dtrace_dof_error(dof
, "bad alignment in DIFO header");
11187 if (sec
->dofs_size
< sizeof (dof_difohdr_t
) ||
11188 sec
->dofs_size
% sizeof (dof_secidx_t
)) {
11189 dtrace_dof_error(dof
, "bad size in DIFO header");
11193 dofd
= (dof_difohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11194 n
= (sec
->dofs_size
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1;
11196 dp
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
11197 dp
->dtdo_rtype
= dofd
->dofd_rtype
;
11199 for (l
= 0; l
< n
; l
++) {
11204 if ((subsec
= dtrace_dof_sect(dof
, DOF_SECT_NONE
,
11205 dofd
->dofd_links
[l
])) == NULL
)
11206 goto err
; /* invalid section link */
11208 if (ttl
+ subsec
->dofs_size
> max
) {
11209 dtrace_dof_error(dof
, "exceeds maximum size");
11213 ttl
+= subsec
->dofs_size
;
11215 for (i
= 0; difo
[i
].section
!= DOF_SECT_NONE
; i
++) {
11216 if (subsec
->dofs_type
!= difo
[i
].section
)
11219 if (!(subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11220 dtrace_dof_error(dof
, "section not loaded");
11224 if (subsec
->dofs_align
!= difo
[i
].align
) {
11225 dtrace_dof_error(dof
, "bad alignment");
11229 bufp
= (void **)((uintptr_t)dp
+ difo
[i
].bufoffs
);
11230 lenp
= (uint32_t *)((uintptr_t)dp
+ difo
[i
].lenoffs
);
11232 if (*bufp
!= NULL
) {
11233 dtrace_dof_error(dof
, difo
[i
].msg
);
11237 if (difo
[i
].entsize
!= subsec
->dofs_entsize
) {
11238 dtrace_dof_error(dof
, "entry size mismatch");
11242 if (subsec
->dofs_entsize
!= 0 &&
11243 (subsec
->dofs_size
% subsec
->dofs_entsize
) != 0) {
11244 dtrace_dof_error(dof
, "corrupt entry size");
11248 *lenp
= subsec
->dofs_size
;
11249 *bufp
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
);
11250 bcopy((char *)(uintptr_t)(daddr
+ subsec
->dofs_offset
),
11251 *bufp
, subsec
->dofs_size
);
11253 if (subsec
->dofs_entsize
!= 0)
11254 *lenp
/= subsec
->dofs_entsize
;
11260 * If we encounter a loadable DIFO sub-section that is not
11261 * known to us, assume this is a broken program and fail.
11263 if (difo
[i
].section
== DOF_SECT_NONE
&&
11264 (subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11265 dtrace_dof_error(dof
, "unrecognized DIFO subsection");
11270 if (dp
->dtdo_buf
== NULL
) {
11272 * We can't have a DIF object without DIF text.
11274 dtrace_dof_error(dof
, "missing DIF text");
11279 * Before we validate the DIF object, run through the variable table
11280 * looking for the strings -- if any of their size are under, we'll set
11281 * their size to be the system-wide default string size. Note that
11282 * this should _not_ happen if the "strsize" option has been set --
11283 * in this case, the compiler should have set the size to reflect the
11284 * setting of the option.
11286 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
11287 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
11288 dtrace_diftype_t
*t
= &v
->dtdv_type
;
11290 if (v
->dtdv_id
< DIF_VAR_OTHER_UBASE
)
11293 if (t
->dtdt_kind
== DIF_TYPE_STRING
&& t
->dtdt_size
== 0)
11294 t
->dtdt_size
= dtrace_strsize_default
;
11297 if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0)
11300 dtrace_difo_init(dp
, vstate
);
11304 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
11305 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
11306 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
11307 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
11309 kmem_free(dp
, sizeof (dtrace_difo_t
));
11313 static dtrace_predicate_t
*
11314 dtrace_dof_predicate(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11319 if ((dp
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
)
11322 return (dtrace_predicate_create(dp
));
11325 static dtrace_actdesc_t
*
11326 dtrace_dof_actdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11329 dtrace_actdesc_t
*act
, *first
= NULL
, *last
= NULL
, *next
;
11330 dof_actdesc_t
*desc
;
11331 dof_sec_t
*difosec
;
11333 uintptr_t daddr
= (uintptr_t)dof
;
11335 dtrace_actkind_t kind
;
11337 if (sec
->dofs_type
!= DOF_SECT_ACTDESC
) {
11338 dtrace_dof_error(dof
, "invalid action section");
11342 if (sec
->dofs_offset
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) {
11343 dtrace_dof_error(dof
, "truncated action description");
11347 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11348 dtrace_dof_error(dof
, "bad alignment in action description");
11352 if (sec
->dofs_size
< sec
->dofs_entsize
) {
11353 dtrace_dof_error(dof
, "section entry size exceeds total size");
11357 if (sec
->dofs_entsize
!= sizeof (dof_actdesc_t
)) {
11358 dtrace_dof_error(dof
, "bad entry size in action description");
11362 if (sec
->dofs_size
/ sec
->dofs_entsize
> dtrace_actions_max
) {
11363 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max");
11367 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= sec
->dofs_entsize
) {
11368 desc
= (dof_actdesc_t
*)(daddr
+
11369 (uintptr_t)sec
->dofs_offset
+ offs
);
11370 kind
= (dtrace_actkind_t
)desc
->dofa_kind
;
11372 if (DTRACEACT_ISPRINTFLIKE(kind
) &&
11373 (kind
!= DTRACEACT_PRINTA
||
11374 desc
->dofa_strtab
!= DOF_SECIDX_NONE
)) {
11380 * printf()-like actions must have a format string.
11382 if ((strtab
= dtrace_dof_sect(dof
,
11383 DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
)
11386 str
= (char *)((uintptr_t)dof
+
11387 (uintptr_t)strtab
->dofs_offset
);
11389 for (i
= desc
->dofa_arg
; i
< strtab
->dofs_size
; i
++) {
11390 if (str
[i
] == '\0')
11394 if (i
>= strtab
->dofs_size
) {
11395 dtrace_dof_error(dof
, "bogus format string");
11399 if (i
== desc
->dofa_arg
) {
11400 dtrace_dof_error(dof
, "empty format string");
11404 i
-= desc
->dofa_arg
;
11405 fmt
= kmem_alloc(i
+ 1, KM_SLEEP
);
11406 bcopy(&str
[desc
->dofa_arg
], fmt
, i
+ 1);
11407 arg
= (uint64_t)(uintptr_t)fmt
;
11409 if (kind
== DTRACEACT_PRINTA
) {
11410 ASSERT(desc
->dofa_strtab
== DOF_SECIDX_NONE
);
11413 arg
= desc
->dofa_arg
;
11417 act
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
,
11418 desc
->dofa_uarg
, arg
);
11420 if (last
!= NULL
) {
11421 last
->dtad_next
= act
;
11428 if (desc
->dofa_difo
== DOF_SECIDX_NONE
)
11431 if ((difosec
= dtrace_dof_sect(dof
,
11432 DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
)
11435 act
->dtad_difo
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
);
11437 if (act
->dtad_difo
== NULL
)
11441 ASSERT(first
!= NULL
);
11445 for (act
= first
; act
!= NULL
; act
= next
) {
11446 next
= act
->dtad_next
;
11447 dtrace_actdesc_release(act
, vstate
);
11453 static dtrace_ecbdesc_t
*
11454 dtrace_dof_ecbdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11457 dtrace_ecbdesc_t
*ep
;
11458 dof_ecbdesc_t
*ecb
;
11459 dtrace_probedesc_t
*desc
;
11460 dtrace_predicate_t
*pred
= NULL
;
11462 if (sec
->dofs_size
< sizeof (dof_ecbdesc_t
)) {
11463 dtrace_dof_error(dof
, "truncated ECB description");
11467 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11468 dtrace_dof_error(dof
, "bad alignment in ECB description");
11472 ecb
= (dof_ecbdesc_t
*)((uintptr_t)dof
+ (uintptr_t)sec
->dofs_offset
);
11473 sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
);
11478 ep
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
11479 ep
->dted_uarg
= ecb
->dofe_uarg
;
11480 desc
= &ep
->dted_probe
;
11482 if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
)
11485 if (ecb
->dofe_pred
!= DOF_SECIDX_NONE
) {
11486 if ((sec
= dtrace_dof_sect(dof
,
11487 DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
)
11490 if ((pred
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
)
11493 ep
->dted_pred
.dtpdd_predicate
= pred
;
11496 if (ecb
->dofe_actions
!= DOF_SECIDX_NONE
) {
11497 if ((sec
= dtrace_dof_sect(dof
,
11498 DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
)
11501 ep
->dted_action
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
);
11503 if (ep
->dted_action
== NULL
)
11511 dtrace_predicate_release(pred
, vstate
);
11512 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
11516 #if !defined(__APPLE__) /* APPLE dyld has already done this for us */
11518 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
11519 * specified DOF. At present, this amounts to simply adding 'ubase' to the
11520 * site of any user SETX relocations to account for load object base address.
11521 * In the future, if we need other relocations, this function can be extended.
11524 dtrace_dof_relocate(dof_hdr_t
*dof
, dof_sec_t
*sec
, uint64_t ubase
)
11526 uintptr_t daddr
= (uintptr_t)dof
;
11527 dof_relohdr_t
*dofr
=
11528 (dof_relohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11529 dof_sec_t
*ss
, *rs
, *ts
;
11533 if (sec
->dofs_size
< sizeof (dof_relohdr_t
) ||
11534 sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11535 dtrace_dof_error(dof
, "invalid relocation header");
11539 ss
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, dofr
->dofr_strtab
);
11540 rs
= dtrace_dof_sect(dof
, DOF_SECT_RELTAB
, dofr
->dofr_relsec
);
11541 ts
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, dofr
->dofr_tgtsec
);
11543 if (ss
== NULL
|| rs
== NULL
|| ts
== NULL
)
11544 return (-1); /* dtrace_dof_error() has been called already */
11546 if (rs
->dofs_entsize
< sizeof (dof_relodesc_t
) ||
11547 rs
->dofs_align
!= sizeof (uint64_t)) {
11548 dtrace_dof_error(dof
, "invalid relocation section");
11552 r
= (dof_relodesc_t
*)(uintptr_t)(daddr
+ rs
->dofs_offset
);
11553 n
= rs
->dofs_size
/ rs
->dofs_entsize
;
11555 for (i
= 0; i
< n
; i
++) {
11556 uintptr_t taddr
= daddr
+ ts
->dofs_offset
+ r
->dofr_offset
;
11558 switch (r
->dofr_type
) {
11559 case DOF_RELO_NONE
:
11561 case DOF_RELO_SETX
:
11562 if (r
->dofr_offset
>= ts
->dofs_size
|| r
->dofr_offset
+
11563 sizeof (uint64_t) > ts
->dofs_size
) {
11564 dtrace_dof_error(dof
, "bad relocation offset");
11568 if (!IS_P2ALIGNED(taddr
, sizeof (uint64_t))) {
11569 dtrace_dof_error(dof
, "misaligned setx relo");
11573 *(uint64_t *)taddr
+= ubase
;
11576 dtrace_dof_error(dof
, "invalid relocation type");
11580 r
= (dof_relodesc_t
*)((uintptr_t)r
+ rs
->dofs_entsize
);
11585 #endif /* __APPLE__ */
11588 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
11589 * header: it should be at the front of a memory region that is at least
11590 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
11591 * size. It need not be validated in any other way.
11594 dtrace_dof_slurp(dof_hdr_t
*dof
, dtrace_vstate_t
*vstate
, cred_t
*cr
,
11595 dtrace_enabling_t
**enabp
, uint64_t ubase
, int noprobes
)
11597 uint64_t len
= dof
->dofh_loadsz
, seclen
;
11598 uintptr_t daddr
= (uintptr_t)dof
;
11599 dtrace_ecbdesc_t
*ep
;
11600 dtrace_enabling_t
*enab
;
11603 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11604 ASSERT(dof
->dofh_loadsz
>= sizeof (dof_hdr_t
));
11607 * Check the DOF header identification bytes. In addition to checking
11608 * valid settings, we also verify that unused bits/bytes are zeroed so
11609 * we can use them later without fear of regressing existing binaries.
11611 if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
],
11612 DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) {
11613 dtrace_dof_error(dof
, "DOF magic string mismatch");
11617 if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32
&&
11618 dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) {
11619 dtrace_dof_error(dof
, "DOF has invalid data model");
11623 if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) {
11624 dtrace_dof_error(dof
, "DOF encoding mismatch");
11628 #if !defined(__APPLE__)
11629 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
11630 dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_2
) {
11631 dtrace_dof_error(dof
, "DOF version mismatch");
11636 * We only support DOF_VERSION_3 for now.
11638 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) {
11639 dtrace_dof_error(dof
, "DOF version mismatch");
11644 if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) {
11645 dtrace_dof_error(dof
, "DOF uses unsupported instruction set");
11649 if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) {
11650 dtrace_dof_error(dof
, "DOF uses too many integer registers");
11654 if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) {
11655 dtrace_dof_error(dof
, "DOF uses too many tuple registers");
11659 for (i
= DOF_ID_PAD
; i
< DOF_ID_SIZE
; i
++) {
11660 if (dof
->dofh_ident
[i
] != 0) {
11661 dtrace_dof_error(dof
, "DOF has invalid ident byte set");
11666 if (dof
->dofh_flags
& ~DOF_FL_VALID
) {
11667 dtrace_dof_error(dof
, "DOF has invalid flag bits set");
11671 if (dof
->dofh_secsize
== 0) {
11672 dtrace_dof_error(dof
, "zero section header size");
11677 * Check that the section headers don't exceed the amount of DOF
11678 * data. Note that we cast the section size and number of sections
11679 * to uint64_t's to prevent possible overflow in the multiplication.
11681 seclen
= (uint64_t)dof
->dofh_secnum
* (uint64_t)dof
->dofh_secsize
;
11683 if (dof
->dofh_secoff
> len
|| seclen
> len
||
11684 dof
->dofh_secoff
+ seclen
> len
) {
11685 dtrace_dof_error(dof
, "truncated section headers");
11689 if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) {
11690 dtrace_dof_error(dof
, "misaligned section headers");
11694 if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) {
11695 dtrace_dof_error(dof
, "misaligned section size");
11700 * Take an initial pass through the section headers to be sure that
11701 * the headers don't have stray offsets. If the 'noprobes' flag is
11702 * set, do not permit sections relating to providers, probes, or args.
11704 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11705 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11706 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11709 switch (sec
->dofs_type
) {
11710 case DOF_SECT_PROVIDER
:
11711 case DOF_SECT_PROBES
:
11712 case DOF_SECT_PRARGS
:
11713 case DOF_SECT_PROFFS
:
11714 dtrace_dof_error(dof
, "illegal sections "
11720 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
11721 continue; /* just ignore non-loadable sections */
11723 if (sec
->dofs_align
& (sec
->dofs_align
- 1)) {
11724 dtrace_dof_error(dof
, "bad section alignment");
11728 if (sec
->dofs_offset
& (sec
->dofs_align
- 1)) {
11729 dtrace_dof_error(dof
, "misaligned section");
11733 if (sec
->dofs_offset
> len
|| sec
->dofs_size
> len
||
11734 sec
->dofs_offset
+ sec
->dofs_size
> len
) {
11735 dtrace_dof_error(dof
, "corrupt section header");
11739 if (sec
->dofs_type
== DOF_SECT_STRTAB
&& *((char *)daddr
+
11740 sec
->dofs_offset
+ sec
->dofs_size
- 1) != '\0') {
11741 dtrace_dof_error(dof
, "non-terminating string table");
11746 #if !defined(__APPLE__)
11748 * APPLE NOTE: We have no relocation to perform. All dof values are
11749 * relative offsets.
11753 * Take a second pass through the sections and locate and perform any
11754 * relocations that are present. We do this after the first pass to
11755 * be sure that all sections have had their headers validated.
11757 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11758 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11759 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11761 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
11762 continue; /* skip sections that are not loadable */
11764 switch (sec
->dofs_type
) {
11765 case DOF_SECT_URELHDR
:
11766 if (dtrace_dof_relocate(dof
, sec
, ubase
) != 0)
11771 #endif /* __APPLE__ */
11773 if ((enab
= *enabp
) == NULL
)
11774 enab
= *enabp
= dtrace_enabling_create(vstate
);
11776 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11777 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
11778 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11780 if (sec
->dofs_type
!= DOF_SECT_ECBDESC
)
11783 #if !defined(__APPLE__)
11784 if ((ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
)) == NULL
) {
11785 dtrace_enabling_destroy(enab
);
11790 /* XXX Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc
11791 are checked for the NULL return value.) */
11792 ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
);
11794 dtrace_enabling_destroy(enab
);
11798 #endif /* __APPLE__ */
11800 dtrace_enabling_add(enab
, ep
);
11807 * Process DOF for any options. This routine assumes that the DOF has been
11808 * at least processed by dtrace_dof_slurp().
11811 dtrace_dof_options(dof_hdr_t
*dof
, dtrace_state_t
*state
)
11816 dof_optdesc_t
*desc
;
11818 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
11819 dof_sec_t
*sec
= (dof_sec_t
*)((uintptr_t)dof
+
11820 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11822 if (sec
->dofs_type
!= DOF_SECT_OPTDESC
)
11825 if (sec
->dofs_align
!= sizeof (uint64_t)) {
11826 dtrace_dof_error(dof
, "bad alignment in "
11827 "option description");
11831 if ((entsize
= sec
->dofs_entsize
) == 0) {
11832 dtrace_dof_error(dof
, "zeroed option entry size");
11836 if (entsize
< sizeof (dof_optdesc_t
)) {
11837 dtrace_dof_error(dof
, "bad option entry size");
11841 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= entsize
) {
11842 desc
= (dof_optdesc_t
*)((uintptr_t)dof
+
11843 (uintptr_t)sec
->dofs_offset
+ offs
);
11845 if (desc
->dofo_strtab
!= DOF_SECIDX_NONE
) {
11846 dtrace_dof_error(dof
, "non-zero option string");
11850 if (desc
->dofo_value
== DTRACEOPT_UNSET
) {
11851 dtrace_dof_error(dof
, "unset option");
11855 if ((rval
= dtrace_state_option(state
,
11856 desc
->dofo_option
, desc
->dofo_value
)) != 0) {
11857 dtrace_dof_error(dof
, "rejected option");
11867 * DTrace Consumer State Functions
11869 #if defined(__APPLE__)
11871 #endif /* __APPLE__ */
11873 dtrace_dstate_init(dtrace_dstate_t
*dstate
, size_t size
)
11875 size_t hashsize
, maxper
, min
, chunksize
= dstate
->dtds_chunksize
;
11878 dtrace_dynvar_t
*dvar
, *next
, *start
;
11881 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11882 ASSERT(dstate
->dtds_base
== NULL
&& dstate
->dtds_percpu
== NULL
);
11884 bzero(dstate
, sizeof (dtrace_dstate_t
));
11886 if ((dstate
->dtds_chunksize
= chunksize
) == 0)
11887 dstate
->dtds_chunksize
= DTRACE_DYNVAR_CHUNKSIZE
;
11889 if (size
< (min
= dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
)))
11892 if ((base
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
11895 dstate
->dtds_size
= size
;
11896 dstate
->dtds_base
= base
;
11897 dstate
->dtds_percpu
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
);
11898 bzero(dstate
->dtds_percpu
, NCPU
* sizeof (dtrace_dstate_percpu_t
));
11900 hashsize
= size
/ (dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
));
11902 if (hashsize
!= 1 && (hashsize
& 1))
11905 dstate
->dtds_hashsize
= hashsize
;
11906 dstate
->dtds_hash
= dstate
->dtds_base
;
11909 * Set all of our hash buckets to point to the single sink, and (if
11910 * it hasn't already been set), set the sink's hash value to be the
11911 * sink sentinel value. The sink is needed for dynamic variable
11912 * lookups to know that they have iterated over an entire, valid hash
11915 for (i
= 0; i
< hashsize
; i
++)
11916 dstate
->dtds_hash
[i
].dtdh_chain
= &dtrace_dynhash_sink
;
11918 if (dtrace_dynhash_sink
.dtdv_hashval
!= DTRACE_DYNHASH_SINK
)
11919 dtrace_dynhash_sink
.dtdv_hashval
= DTRACE_DYNHASH_SINK
;
11922 * Determine number of active CPUs. Divide free list evenly among
11925 start
= (dtrace_dynvar_t
*)
11926 ((uintptr_t)base
+ hashsize
* sizeof (dtrace_dynhash_t
));
11927 limit
= (uintptr_t)base
+ size
;
11929 maxper
= (limit
- (uintptr_t)start
) / NCPU
;
11930 maxper
= (maxper
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
;
11932 for (i
= 0; i
< NCPU
; i
++) {
11933 dstate
->dtds_percpu
[i
].dtdsc_free
= dvar
= start
;
11936 * If we don't even have enough chunks to make it once through
11937 * NCPUs, we're just going to allocate everything to the first
11938 * CPU. And if we're on the last CPU, we're going to allocate
11939 * whatever is left over. In either case, we set the limit to
11940 * be the limit of the dynamic variable space.
11942 if (maxper
== 0 || i
== NCPU
- 1) {
11943 limit
= (uintptr_t)base
+ size
;
11946 limit
= (uintptr_t)start
+ maxper
;
11947 start
= (dtrace_dynvar_t
*)limit
;
11950 ASSERT(limit
<= (uintptr_t)base
+ size
);
11953 next
= (dtrace_dynvar_t
*)((uintptr_t)dvar
+
11954 dstate
->dtds_chunksize
);
11956 if ((uintptr_t)next
+ dstate
->dtds_chunksize
>= limit
)
11959 dvar
->dtdv_next
= next
;
11970 #if defined(__APPLE__)
11972 #endif /* __APPLE__ */
11974 dtrace_dstate_fini(dtrace_dstate_t
*dstate
)
11976 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
11978 if (dstate
->dtds_base
== NULL
)
11981 kmem_free(dstate
->dtds_base
, dstate
->dtds_size
);
11982 kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
);
11986 dtrace_vstate_fini(dtrace_vstate_t
*vstate
)
11989 * Logical XOR, where are you?
11991 ASSERT((vstate
->dtvs_nglobals
== 0) ^ (vstate
->dtvs_globals
!= NULL
));
11993 if (vstate
->dtvs_nglobals
> 0) {
11994 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals
*
11995 sizeof (dtrace_statvar_t
*));
11998 if (vstate
->dtvs_ntlocals
> 0) {
11999 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals
*
12000 sizeof (dtrace_difv_t
));
12003 ASSERT((vstate
->dtvs_nlocals
== 0) ^ (vstate
->dtvs_locals
!= NULL
));
12005 if (vstate
->dtvs_nlocals
> 0) {
12006 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals
*
12007 sizeof (dtrace_statvar_t
*));
12012 dtrace_state_clean(dtrace_state_t
*state
)
12014 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
)
12017 dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
);
12018 dtrace_speculation_clean(state
);
12022 dtrace_state_deadman(dtrace_state_t
*state
)
12028 now
= dtrace_gethrtime();
12030 if (state
!= dtrace_anon
.dta_state
&&
12031 now
- state
->dts_laststatus
>= dtrace_deadman_user
)
12035 * We must be sure that dts_alive never appears to be less than the
12036 * value upon entry to dtrace_state_deadman(), and because we lack a
12037 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12038 * store INT64_MAX to it, followed by a memory barrier, followed by
12039 * the new value. This assures that dts_alive never appears to be
12040 * less than its true value, regardless of the order in which the
12041 * stores to the underlying storage are issued.
12043 state
->dts_alive
= INT64_MAX
;
12044 dtrace_membar_producer();
12045 state
->dts_alive
= now
;
12048 #if defined(__APPLE__)
12050 #endif /* __APPLE__ */
12052 dtrace_state_create(dev_t
*devp
, cred_t
*cr
)
12057 dtrace_state_t
*state
;
12058 dtrace_optval_t
*opt
;
12059 int bufsize
= NCPU
* sizeof (dtrace_buffer_t
), i
;
12061 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12062 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12064 #if !defined(__APPLE__)
12065 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12066 VM_BESTFIT
| VM_SLEEP
);
12069 * Darwin's DEVFS layer acquired the minor number for this "device" when it called
12070 * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number
12071 * (next unused according to vmem_alloc()) and then immediately put the number back in play
12072 * (by calling vmem_free()). Now that minor number is being used for an open, so committing it
12073 * to use. The following vmem_alloc() must deliver that same minor number.
12076 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12077 VM_BESTFIT
| VM_SLEEP
);
12079 if (NULL
!= devp
) {
12080 ASSERT(getminor(*devp
) == minor
);
12081 if (getminor(*devp
) != minor
) {
12082 printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n",
12083 getminor(*devp
), minor
);
12084 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12088 /* NULL==devp iff "Anonymous state" (see dtrace_anon_property),
12089 * so just vend the minor device number here de novo since no "open" has occurred. */
12092 #endif /* __APPLE__ */
12094 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
12095 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12099 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
12100 state
->dts_epid
= DTRACE_EPIDNONE
+ 1;
12102 (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
);
12103 state
->dts_aggid_arena
= vmem_create(c
, (void *)1, UINT32_MAX
, 1,
12104 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
12106 if (devp
!= NULL
) {
12107 major
= getemajor(*devp
);
12109 major
= ddi_driver_major(dtrace_devi
);
12112 state
->dts_dev
= makedevice(major
, minor
);
12115 *devp
= state
->dts_dev
;
12118 * We allocate NCPU buffers. On the one hand, this can be quite
12119 * a bit of memory per instance (nearly 36K on a Starcat). On the
12120 * other hand, it saves an additional memory reference in the probe
12123 state
->dts_buffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12124 state
->dts_aggbuffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12125 state
->dts_cleaner
= CYCLIC_NONE
;
12126 state
->dts_deadman
= CYCLIC_NONE
;
12127 state
->dts_vstate
.dtvs_state
= state
;
12129 for (i
= 0; i
< DTRACEOPT_MAX
; i
++)
12130 state
->dts_options
[i
] = DTRACEOPT_UNSET
;
12133 * Set the default options.
12135 opt
= state
->dts_options
;
12136 opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
;
12137 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
;
12138 opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
;
12139 opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
;
12140 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
;
12141 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
;
12142 opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
;
12143 opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
;
12144 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
;
12145 opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
;
12146 opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
;
12147 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
;
12148 opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
;
12149 opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
;
12151 state
->dts_activity
= DTRACE_ACTIVITY_INACTIVE
;
12154 * Depending on the user credentials, we set flag bits which alter probe
12155 * visibility or the amount of destructiveness allowed. In the case of
12156 * actual anonymous tracing, or the possession of all privileges, all of
12157 * the normal checks are bypassed.
12159 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
12160 state
->dts_cred
.dcr_visible
= DTRACE_CRV_ALL
;
12161 state
->dts_cred
.dcr_action
= DTRACE_CRA_ALL
;
12164 * Set up the credentials for this instantiation. We take a
12165 * hold on the credential to prevent it from disappearing on
12166 * us; this in turn prevents the zone_t referenced by this
12167 * credential from disappearing. This means that we can
12168 * examine the credential and the zone from probe context.
12171 state
->dts_cred
.dcr_cred
= cr
;
12174 * CRA_PROC means "we have *some* privilege for dtrace" and
12175 * unlocks the use of variables like pid, zonename, etc.
12177 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) ||
12178 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12179 state
->dts_cred
.dcr_action
|= DTRACE_CRA_PROC
;
12183 * dtrace_user allows use of syscall and profile providers.
12184 * If the user also has proc_owner and/or proc_zone, we
12185 * extend the scope to include additional visibility and
12186 * destructive power.
12188 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) {
12189 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) {
12190 state
->dts_cred
.dcr_visible
|=
12191 DTRACE_CRV_ALLPROC
;
12193 state
->dts_cred
.dcr_action
|=
12194 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12197 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) {
12198 state
->dts_cred
.dcr_visible
|=
12199 DTRACE_CRV_ALLZONE
;
12201 state
->dts_cred
.dcr_action
|=
12202 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12206 * If we have all privs in whatever zone this is,
12207 * we can do destructive things to processes which
12208 * have altered credentials.
12210 #if !defined(__APPLE__)
12211 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12212 cr
->cr_zone
->zone_privset
)) {
12213 state
->dts_cred
.dcr_action
|=
12214 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12217 /* Darwin doesn't do zones. */
12218 state
->dts_cred
.dcr_action
|=
12219 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12220 #endif /* __APPLE__ */
12224 * Holding the dtrace_kernel privilege also implies that
12225 * the user has the dtrace_user privilege from a visibility
12226 * perspective. But without further privileges, some
12227 * destructive actions are not available.
12229 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) {
12231 * Make all probes in all zones visible. However,
12232 * this doesn't mean that all actions become available
12235 state
->dts_cred
.dcr_visible
|= DTRACE_CRV_KERNEL
|
12236 DTRACE_CRV_ALLPROC
| DTRACE_CRV_ALLZONE
;
12238 state
->dts_cred
.dcr_action
|= DTRACE_CRA_KERNEL
|
12241 * Holding proc_owner means that destructive actions
12242 * for *this* zone are allowed.
12244 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12245 state
->dts_cred
.dcr_action
|=
12246 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12249 * Holding proc_zone means that destructive actions
12250 * for this user/group ID in all zones is allowed.
12252 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12253 state
->dts_cred
.dcr_action
|=
12254 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12257 * If we have all privs in whatever zone this is,
12258 * we can do destructive things to processes which
12259 * have altered credentials.
12261 #if !defined(__APPLE__)
12262 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
12263 cr
->cr_zone
->zone_privset
)) {
12264 state
->dts_cred
.dcr_action
|=
12265 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12268 /* Darwin doesn't do zones. */
12269 state
->dts_cred
.dcr_action
|=
12270 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12271 #endif /* __APPLE__ */
12275 * Holding the dtrace_proc privilege gives control over fasttrap
12276 * and pid providers. We need to grant wider destructive
12277 * privileges in the event that the user has proc_owner and/or
12280 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12281 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12282 state
->dts_cred
.dcr_action
|=
12283 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12285 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12286 state
->dts_cred
.dcr_action
|=
12287 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12295 dtrace_state_buffer(dtrace_state_t
*state
, dtrace_buffer_t
*buf
, int which
)
12297 dtrace_optval_t
*opt
= state
->dts_options
, size
;
12299 int flags
= 0, rval
;
12301 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12302 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12303 ASSERT(which
< DTRACEOPT_MAX
);
12304 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
||
12305 (state
== dtrace_anon
.dta_state
&&
12306 state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
));
12308 if (opt
[which
] == DTRACEOPT_UNSET
|| opt
[which
] == 0)
12311 if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
)
12312 cpu
= opt
[DTRACEOPT_CPU
];
12314 if (which
== DTRACEOPT_SPECSIZE
)
12315 flags
|= DTRACEBUF_NOSWITCH
;
12317 if (which
== DTRACEOPT_BUFSIZE
) {
12318 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
)
12319 flags
|= DTRACEBUF_RING
;
12321 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
)
12322 flags
|= DTRACEBUF_FILL
;
12324 if (state
!= dtrace_anon
.dta_state
||
12325 state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
12326 flags
|= DTRACEBUF_INACTIVE
;
12329 for (size
= opt
[which
]; size
>= sizeof (uint64_t); size
>>= 1) {
12331 * The size must be 8-byte aligned. If the size is not 8-byte
12332 * aligned, drop it down by the difference.
12334 if (size
& (sizeof (uint64_t) - 1))
12335 size
-= size
& (sizeof (uint64_t) - 1);
12337 if (size
< state
->dts_reserve
) {
12339 * Buffers always must be large enough to accommodate
12340 * their prereserved space. We return E2BIG instead
12341 * of ENOMEM in this case to allow for user-level
12342 * software to differentiate the cases.
12347 rval
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
);
12349 if (rval
!= ENOMEM
) {
12354 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12362 dtrace_state_buffers(dtrace_state_t
*state
)
12364 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12367 if ((rval
= dtrace_state_buffer(state
, state
->dts_buffer
,
12368 DTRACEOPT_BUFSIZE
)) != 0)
12371 if ((rval
= dtrace_state_buffer(state
, state
->dts_aggbuffer
,
12372 DTRACEOPT_AGGSIZE
)) != 0)
12375 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12376 if ((rval
= dtrace_state_buffer(state
,
12377 spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0)
12385 dtrace_state_prereserve(dtrace_state_t
*state
)
12388 dtrace_probe_t
*probe
;
12390 state
->dts_reserve
= 0;
12392 if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
)
12396 * If our buffer policy is a "fill" buffer policy, we need to set the
12397 * prereserved space to be the space required by the END probes.
12399 probe
= dtrace_probes
[dtrace_probeid_end
- 1];
12400 ASSERT(probe
!= NULL
);
12402 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
12403 if (ecb
->dte_state
!= state
)
12406 state
->dts_reserve
+= ecb
->dte_needed
+ ecb
->dte_alignment
;
12411 dtrace_state_go(dtrace_state_t
*state
, processorid_t
*cpu
)
12413 dtrace_optval_t
*opt
= state
->dts_options
, sz
, nspec
;
12414 dtrace_speculation_t
*spec
;
12415 dtrace_buffer_t
*buf
;
12416 cyc_handler_t hdlr
;
12418 int rval
= 0, i
, bufsize
= NCPU
* sizeof (dtrace_buffer_t
);
12419 dtrace_icookie_t cookie
;
12421 lck_mtx_lock(&cpu_lock
);
12422 lck_mtx_lock(&dtrace_lock
);
12424 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
12430 * Before we can perform any checks, we must prime all of the
12431 * retained enablings that correspond to this state.
12433 dtrace_enabling_prime(state
);
12435 if (state
->dts_destructive
&& !state
->dts_cred
.dcr_destructive
) {
12440 dtrace_state_prereserve(state
);
12443 * Now we want to do is try to allocate our speculations.
12444 * We do not automatically resize the number of speculations; if
12445 * this fails, we will fail the operation.
12447 nspec
= opt
[DTRACEOPT_NSPEC
];
12448 ASSERT(nspec
!= DTRACEOPT_UNSET
);
12450 if (nspec
> INT_MAX
) {
12455 spec
= kmem_zalloc(nspec
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
);
12457 if (spec
== NULL
) {
12462 state
->dts_speculations
= spec
;
12463 state
->dts_nspeculations
= (int)nspec
;
12465 for (i
= 0; i
< nspec
; i
++) {
12466 if ((buf
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) {
12471 spec
[i
].dtsp_buffer
= buf
;
12474 if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) {
12475 if (dtrace_anon
.dta_state
== NULL
) {
12480 if (state
->dts_necbs
!= 0) {
12485 state
->dts_anon
= dtrace_anon_grab();
12486 ASSERT(state
->dts_anon
!= NULL
);
12487 state
= state
->dts_anon
;
12490 * We want "grabanon" to be set in the grabbed state, so we'll
12491 * copy that option value from the grabbing state into the
12494 state
->dts_options
[DTRACEOPT_GRABANON
] =
12495 opt
[DTRACEOPT_GRABANON
];
12497 *cpu
= dtrace_anon
.dta_beganon
;
12500 * If the anonymous state is active (as it almost certainly
12501 * is if the anonymous enabling ultimately matched anything),
12502 * we don't allow any further option processing -- but we
12503 * don't return failure.
12505 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12509 if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET
&&
12510 opt
[DTRACEOPT_AGGSIZE
] != 0) {
12511 if (state
->dts_aggregations
== NULL
) {
12513 * We're not going to create an aggregation buffer
12514 * because we don't have any ECBs that contain
12515 * aggregations -- set this option to 0.
12517 opt
[DTRACEOPT_AGGSIZE
] = 0;
12520 * If we have an aggregation buffer, we must also have
12521 * a buffer to use as scratch.
12523 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
12524 opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
12525 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
12530 if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET
&&
12531 opt
[DTRACEOPT_SPECSIZE
] != 0) {
12532 if (!state
->dts_speculates
) {
12534 * We're not going to create speculation buffers
12535 * because we don't have any ECBs that actually
12536 * speculate -- set the speculation size to 0.
12538 opt
[DTRACEOPT_SPECSIZE
] = 0;
12543 * The bare minimum size for any buffer that we're actually going to
12544 * do anything to is sizeof (uint64_t).
12546 sz
= sizeof (uint64_t);
12548 if ((state
->dts_needed
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) ||
12549 (state
->dts_speculates
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) ||
12550 (state
->dts_aggregations
!= NULL
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) {
12552 * A buffer size has been explicitly set to 0 (or to a size
12553 * that will be adjusted to 0) and we need the space -- we
12554 * need to return failure. We return ENOSPC to differentiate
12555 * it from failing to allocate a buffer due to failure to meet
12556 * the reserve (for which we return E2BIG).
12562 if ((rval
= dtrace_state_buffers(state
)) != 0)
12565 if ((sz
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
)
12566 sz
= dtrace_dstate_defsize
;
12569 rval
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
);
12574 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12576 } while (sz
>>= 1);
12578 opt
[DTRACEOPT_DYNVARSIZE
] = sz
;
12583 if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
)
12584 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
;
12586 if (opt
[DTRACEOPT_CLEANRATE
] == 0)
12587 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12589 if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
)
12590 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
;
12592 if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
)
12593 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
12595 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_clean
;
12596 hdlr
.cyh_arg
= state
;
12597 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12600 when
.cyt_interval
= opt
[DTRACEOPT_CLEANRATE
];
12602 state
->dts_cleaner
= cyclic_add(&hdlr
, &when
);
12604 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_deadman
;
12605 hdlr
.cyh_arg
= state
;
12606 hdlr
.cyh_level
= CY_LOW_LEVEL
;
12609 when
.cyt_interval
= dtrace_deadman_interval
;
12611 state
->dts_alive
= state
->dts_laststatus
= dtrace_gethrtime();
12612 state
->dts_deadman
= cyclic_add(&hdlr
, &when
);
12614 state
->dts_activity
= DTRACE_ACTIVITY_WARMUP
;
12617 * Now it's time to actually fire the BEGIN probe. We need to disable
12618 * interrupts here both to record the CPU on which we fired the BEGIN
12619 * probe (the data from this CPU will be processed first at user
12620 * level) and to manually activate the buffer for this CPU.
12622 cookie
= dtrace_interrupt_disable();
12623 *cpu
= CPU
->cpu_id
;
12624 ASSERT(state
->dts_buffer
[*cpu
].dtb_flags
& DTRACEBUF_INACTIVE
);
12625 state
->dts_buffer
[*cpu
].dtb_flags
&= ~DTRACEBUF_INACTIVE
;
12627 dtrace_probe(dtrace_probeid_begin
,
12628 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12629 dtrace_interrupt_enable(cookie
);
12631 * We may have had an exit action from a BEGIN probe; only change our
12632 * state to ACTIVE if we're still in WARMUP.
12634 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
||
12635 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
);
12637 if (state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
)
12638 state
->dts_activity
= DTRACE_ACTIVITY_ACTIVE
;
12641 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
12642 * want each CPU to transition its principal buffer out of the
12643 * INACTIVE state. Doing this assures that no CPU will suddenly begin
12644 * processing an ECB halfway down a probe's ECB chain; all CPUs will
12645 * atomically transition from processing none of a state's ECBs to
12646 * processing all of them.
12648 dtrace_xcall(DTRACE_CPUALL
,
12649 (dtrace_xcall_t
)dtrace_buffer_activate
, state
);
12653 dtrace_buffer_free(state
->dts_buffer
);
12654 dtrace_buffer_free(state
->dts_aggbuffer
);
12656 if ((nspec
= state
->dts_nspeculations
) == 0) {
12657 ASSERT(state
->dts_speculations
== NULL
);
12661 spec
= state
->dts_speculations
;
12662 ASSERT(spec
!= NULL
);
12664 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12665 if ((buf
= spec
[i
].dtsp_buffer
) == NULL
)
12668 dtrace_buffer_free(buf
);
12669 kmem_free(buf
, bufsize
);
12672 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
12673 state
->dts_nspeculations
= 0;
12674 state
->dts_speculations
= NULL
;
12677 lck_mtx_unlock(&dtrace_lock
);
12678 lck_mtx_unlock(&cpu_lock
);
12684 dtrace_state_stop(dtrace_state_t
*state
, processorid_t
*cpu
)
12686 dtrace_icookie_t cookie
;
12688 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12690 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
&&
12691 state
->dts_activity
!= DTRACE_ACTIVITY_DRAINING
)
12695 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
12696 * to be sure that every CPU has seen it. See below for the details
12697 * on why this is done.
12699 state
->dts_activity
= DTRACE_ACTIVITY_DRAINING
;
12703 * By this point, it is impossible for any CPU to be still processing
12704 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
12705 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
12706 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
12707 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
12708 * iff we're in the END probe.
12710 state
->dts_activity
= DTRACE_ACTIVITY_COOLDOWN
;
12712 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_COOLDOWN
);
12715 * Finally, we can release the reserve and call the END probe. We
12716 * disable interrupts across calling the END probe to allow us to
12717 * return the CPU on which we actually called the END probe. This
12718 * allows user-land to be sure that this CPU's principal buffer is
12721 state
->dts_reserve
= 0;
12723 cookie
= dtrace_interrupt_disable();
12724 *cpu
= CPU
->cpu_id
;
12725 dtrace_probe(dtrace_probeid_end
,
12726 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
12727 dtrace_interrupt_enable(cookie
);
12729 state
->dts_activity
= DTRACE_ACTIVITY_STOPPED
;
12736 dtrace_state_option(dtrace_state_t
*state
, dtrace_optid_t option
,
12737 dtrace_optval_t val
)
12739 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12741 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
12744 if (option
>= DTRACEOPT_MAX
)
12747 if (option
!= DTRACEOPT_CPU
&& val
< 0)
12751 case DTRACEOPT_DESTRUCTIVE
:
12752 if (dtrace_destructive_disallow
)
12755 state
->dts_cred
.dcr_destructive
= 1;
12758 case DTRACEOPT_BUFSIZE
:
12759 case DTRACEOPT_DYNVARSIZE
:
12760 case DTRACEOPT_AGGSIZE
:
12761 case DTRACEOPT_SPECSIZE
:
12762 case DTRACEOPT_STRSIZE
:
12766 if (val
>= LONG_MAX
) {
12768 * If this is an otherwise negative value, set it to
12769 * the highest multiple of 128m less than LONG_MAX.
12770 * Technically, we're adjusting the size without
12771 * regard to the buffer resizing policy, but in fact,
12772 * this has no effect -- if we set the buffer size to
12773 * ~LONG_MAX and the buffer policy is ultimately set to
12774 * be "manual", the buffer allocation is guaranteed to
12775 * fail, if only because the allocation requires two
12776 * buffers. (We set the the size to the highest
12777 * multiple of 128m because it ensures that the size
12778 * will remain a multiple of a megabyte when
12779 * repeatedly halved -- all the way down to 15m.)
12781 val
= LONG_MAX
- (1 << 27) + 1;
12785 state
->dts_options
[option
] = val
;
12791 dtrace_state_destroy(dtrace_state_t
*state
)
12794 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
12795 minor_t minor
= getminor(state
->dts_dev
);
12796 int i
, bufsize
= NCPU
* sizeof (dtrace_buffer_t
);
12797 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12798 int nspec
= state
->dts_nspeculations
;
12801 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12802 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12805 * First, retract any retained enablings for this state.
12807 dtrace_enabling_retract(state
);
12808 ASSERT(state
->dts_nretained
== 0);
12810 if (state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
||
12811 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
) {
12813 * We have managed to come into dtrace_state_destroy() on a
12814 * hot enabling -- almost certainly because of a disorderly
12815 * shutdown of a consumer. (That is, a consumer that is
12816 * exiting without having called dtrace_stop().) In this case,
12817 * we're going to set our activity to be KILLED, and then
12818 * issue a sync to be sure that everyone is out of probe
12819 * context before we start blowing away ECBs.
12821 state
->dts_activity
= DTRACE_ACTIVITY_KILLED
;
12826 * Release the credential hold we took in dtrace_state_create().
12828 if (state
->dts_cred
.dcr_cred
!= NULL
)
12829 crfree(state
->dts_cred
.dcr_cred
);
12832 * Now we can safely disable and destroy any enabled probes. Because
12833 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
12834 * (especially if they're all enabled), we take two passes through the
12835 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
12836 * in the second we disable whatever is left over.
12838 for (match
= DTRACE_PRIV_KERNEL
; ; match
= 0) {
12839 for (i
= 0; i
< state
->dts_necbs
; i
++) {
12840 if ((ecb
= state
->dts_ecbs
[i
]) == NULL
)
12843 if (match
&& ecb
->dte_probe
!= NULL
) {
12844 dtrace_probe_t
*probe
= ecb
->dte_probe
;
12845 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
12847 if (!(prov
->dtpv_priv
.dtpp_flags
& match
))
12851 dtrace_ecb_disable(ecb
);
12852 dtrace_ecb_destroy(ecb
);
12860 * Before we free the buffers, perform one more sync to assure that
12861 * every CPU is out of probe context.
12865 dtrace_buffer_free(state
->dts_buffer
);
12866 dtrace_buffer_free(state
->dts_aggbuffer
);
12868 for (i
= 0; i
< nspec
; i
++)
12869 dtrace_buffer_free(spec
[i
].dtsp_buffer
);
12871 if (state
->dts_cleaner
!= CYCLIC_NONE
)
12872 cyclic_remove(state
->dts_cleaner
);
12874 if (state
->dts_deadman
!= CYCLIC_NONE
)
12875 cyclic_remove(state
->dts_deadman
);
12877 dtrace_dstate_fini(&vstate
->dtvs_dynvars
);
12878 dtrace_vstate_fini(vstate
);
12879 kmem_free(state
->dts_ecbs
, state
->dts_necbs
* sizeof (dtrace_ecb_t
*));
12881 if (state
->dts_aggregations
!= NULL
) {
12883 for (i
= 0; i
< state
->dts_naggregations
; i
++)
12884 ASSERT(state
->dts_aggregations
[i
] == NULL
);
12886 ASSERT(state
->dts_naggregations
> 0);
12887 kmem_free(state
->dts_aggregations
,
12888 state
->dts_naggregations
* sizeof (dtrace_aggregation_t
*));
12891 kmem_free(state
->dts_buffer
, bufsize
);
12892 kmem_free(state
->dts_aggbuffer
, bufsize
);
12894 for (i
= 0; i
< nspec
; i
++)
12895 kmem_free(spec
[i
].dtsp_buffer
, bufsize
);
12897 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
12899 dtrace_format_destroy(state
);
12901 vmem_destroy(state
->dts_aggid_arena
);
12902 ddi_soft_state_free(dtrace_softstate
, minor
);
12903 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12907 * DTrace Anonymous Enabling Functions
12909 static dtrace_state_t
*
12910 dtrace_anon_grab(void)
12912 dtrace_state_t
*state
;
12914 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12916 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
12917 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
12921 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
12922 ASSERT(dtrace_retained
!= NULL
);
12924 dtrace_enabling_destroy(dtrace_anon
.dta_enabling
);
12925 dtrace_anon
.dta_enabling
= NULL
;
12926 dtrace_anon
.dta_state
= NULL
;
12932 dtrace_anon_property(void)
12935 dtrace_state_t
*state
;
12937 char c
[32]; /* enough for "dof-data-" + digits */
12939 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12940 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12942 for (i
= 0; ; i
++) {
12943 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
);
12945 dtrace_err_verbose
= 1;
12947 if ((dof
= dtrace_dof_property(c
)) == NULL
) {
12948 dtrace_err_verbose
= 0;
12953 * We want to create anonymous state, so we need to transition
12954 * the kernel debugger to indicate that DTrace is active. If
12955 * this fails (e.g. because the debugger has modified text in
12956 * some way), we won't continue with the processing.
12958 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
12959 cmn_err(CE_NOTE
, "kernel debugger active; anonymous "
12960 "enabling ignored.");
12961 dtrace_dof_destroy(dof
);
12966 * If we haven't allocated an anonymous state, we'll do so now.
12968 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
12969 state
= dtrace_state_create(NULL
, NULL
);
12970 dtrace_anon
.dta_state
= state
;
12972 if (state
== NULL
) {
12974 * This basically shouldn't happen: the only
12975 * failure mode from dtrace_state_create() is a
12976 * failure of ddi_soft_state_zalloc() that
12977 * itself should never happen. Still, the
12978 * interface allows for a failure mode, and
12979 * we want to fail as gracefully as possible:
12980 * we'll emit an error message and cease
12981 * processing anonymous state in this case.
12983 cmn_err(CE_WARN
, "failed to create "
12984 "anonymous state");
12985 dtrace_dof_destroy(dof
);
12990 rv
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(),
12991 &dtrace_anon
.dta_enabling
, 0, B_TRUE
);
12994 rv
= dtrace_dof_options(dof
, state
);
12996 dtrace_err_verbose
= 0;
12997 dtrace_dof_destroy(dof
);
13001 * This is malformed DOF; chuck any anonymous state
13004 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13005 dtrace_state_destroy(state
);
13006 dtrace_anon
.dta_state
= NULL
;
13010 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13013 if (dtrace_anon
.dta_enabling
!= NULL
) {
13017 * dtrace_enabling_retain() can only fail because we are
13018 * trying to retain more enablings than are allowed -- but
13019 * we only have one anonymous enabling, and we are guaranteed
13020 * to be allowed at least one retained enabling; we assert
13021 * that dtrace_enabling_retain() returns success.
13023 rval
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
);
13026 dtrace_enabling_dump(dtrace_anon
.dta_enabling
);
13031 * DTrace Helper Functions
13034 dtrace_helper_trace(dtrace_helper_action_t
*helper
,
13035 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
, int where
)
13037 uint32_t size
, next
, nnext
, i
;
13038 dtrace_helptrace_t
*ent
;
13039 uint16_t flags
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13041 if (!dtrace_helptrace_enabled
)
13044 ASSERT(vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
13047 * What would a tracing framework be without its own tracing
13048 * framework? (Well, a hell of a lot simpler, for starters...)
13050 size
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals
*
13051 sizeof (uint64_t) - sizeof (uint64_t);
13054 * Iterate until we can allocate a slot in the trace buffer.
13057 next
= dtrace_helptrace_next
;
13059 if (next
+ size
< dtrace_helptrace_bufsize
) {
13060 nnext
= next
+ size
;
13064 } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
);
13067 * We have our slot; fill it in.
13072 ent
= (dtrace_helptrace_t
*)&dtrace_helptrace_buffer
[next
];
13073 ent
->dtht_helper
= helper
;
13074 ent
->dtht_where
= where
;
13075 ent
->dtht_nlocals
= vstate
->dtvs_nlocals
;
13077 ent
->dtht_fltoffs
= (mstate
->dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
13078 mstate
->dtms_fltoffs
: -1;
13079 ent
->dtht_fault
= DTRACE_FLAGS2FLT(flags
);
13080 ent
->dtht_illval
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
13082 for (i
= 0; i
< vstate
->dtvs_nlocals
; i
++) {
13083 dtrace_statvar_t
*svar
;
13085 if ((svar
= vstate
->dtvs_locals
[i
]) == NULL
)
13088 ASSERT(svar
->dtsv_size
>= NCPU
* sizeof (uint64_t));
13089 ent
->dtht_locals
[i
] =
13090 ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
];
13095 dtrace_helper(int which
, dtrace_mstate_t
*mstate
,
13096 dtrace_state_t
*state
, uint64_t arg0
, uint64_t arg1
)
13098 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13099 uint64_t sarg0
= mstate
->dtms_arg
[0];
13100 uint64_t sarg1
= mstate
->dtms_arg
[1];
13102 dtrace_helpers_t
*helpers
= curproc
->p_dtrace_helpers
;
13103 dtrace_helper_action_t
*helper
;
13104 dtrace_vstate_t
*vstate
;
13105 dtrace_difo_t
*pred
;
13106 int i
, trace
= dtrace_helptrace_enabled
;
13108 ASSERT(which
>= 0 && which
< DTRACE_NHELPER_ACTIONS
);
13110 if (helpers
== NULL
)
13113 if ((helper
= helpers
->dthps_actions
[which
]) == NULL
)
13116 vstate
= &helpers
->dthps_vstate
;
13117 mstate
->dtms_arg
[0] = arg0
;
13118 mstate
->dtms_arg
[1] = arg1
;
13121 * Now iterate over each helper. If its predicate evaluates to 'true',
13122 * we'll call the corresponding actions. Note that the below calls
13123 * to dtrace_dif_emulate() may set faults in machine state. This is
13124 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13125 * the stored DIF offset with its own (which is the desired behavior).
13126 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13127 * from machine state; this is okay, too.
13129 for (; helper
!= NULL
; helper
= helper
->dtha_next
) {
13130 if ((pred
= helper
->dtha_predicate
) != NULL
) {
13132 dtrace_helper_trace(helper
, mstate
, vstate
, 0);
13134 if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
))
13137 if (*flags
& CPU_DTRACE_FAULT
)
13141 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13143 dtrace_helper_trace(helper
,
13144 mstate
, vstate
, i
+ 1);
13146 rval
= dtrace_dif_emulate(helper
->dtha_actions
[i
],
13147 mstate
, vstate
, state
);
13149 if (*flags
& CPU_DTRACE_FAULT
)
13155 dtrace_helper_trace(helper
, mstate
, vstate
,
13156 DTRACE_HELPTRACE_NEXT
);
13160 dtrace_helper_trace(helper
, mstate
, vstate
,
13161 DTRACE_HELPTRACE_DONE
);
13164 * Restore the arg0 that we saved upon entry.
13166 mstate
->dtms_arg
[0] = sarg0
;
13167 mstate
->dtms_arg
[1] = sarg1
;
13173 dtrace_helper_trace(helper
, mstate
, vstate
,
13174 DTRACE_HELPTRACE_ERR
);
13177 * Restore the arg0 that we saved upon entry.
13179 mstate
->dtms_arg
[0] = sarg0
;
13180 mstate
->dtms_arg
[1] = sarg1
;
13186 dtrace_helper_action_destroy(dtrace_helper_action_t
*helper
,
13187 dtrace_vstate_t
*vstate
)
13191 if (helper
->dtha_predicate
!= NULL
)
13192 dtrace_difo_release(helper
->dtha_predicate
, vstate
);
13194 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13195 ASSERT(helper
->dtha_actions
[i
] != NULL
);
13196 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
);
13199 kmem_free(helper
->dtha_actions
,
13200 helper
->dtha_nactions
* sizeof (dtrace_difo_t
*));
13201 kmem_free(helper
, sizeof (dtrace_helper_action_t
));
13204 #if !defined(__APPLE__)
13206 dtrace_helper_destroygen(int gen
)
13208 proc_t
*p
= curproc
;
13211 dtrace_helper_destroygen(proc_t
* p
, int gen
)
13214 dtrace_helpers_t
*help
= p
->p_dtrace_helpers
;
13215 dtrace_vstate_t
*vstate
;
13218 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13220 if (help
== NULL
|| gen
> help
->dthps_generation
)
13223 vstate
= &help
->dthps_vstate
;
13225 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
13226 dtrace_helper_action_t
*last
= NULL
, *h
, *next
;
13228 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
13229 next
= h
->dtha_next
;
13231 if (h
->dtha_generation
== gen
) {
13232 if (last
!= NULL
) {
13233 last
->dtha_next
= next
;
13235 help
->dthps_actions
[i
] = next
;
13238 dtrace_helper_action_destroy(h
, vstate
);
13246 * Interate until we've cleared out all helper providers with the
13247 * given generation number.
13250 dtrace_helper_provider_t
*prov
;
13253 * Look for a helper provider with the right generation. We
13254 * have to start back at the beginning of the list each time
13255 * because we drop dtrace_lock. It's unlikely that we'll make
13256 * more than two passes.
13258 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13259 prov
= help
->dthps_provs
[i
];
13261 if (prov
->dthp_generation
== gen
)
13266 * If there were no matches, we're done.
13268 if (i
== help
->dthps_nprovs
)
13272 * Move the last helper provider into this slot.
13274 help
->dthps_nprovs
--;
13275 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
];
13276 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
;
13278 lck_mtx_unlock(&dtrace_lock
);
13281 * If we have a meta provider, remove this helper provider.
13283 lck_mtx_lock(&dtrace_meta_lock
);
13284 if (dtrace_meta_pid
!= NULL
) {
13285 ASSERT(dtrace_deferred_pid
== NULL
);
13286 dtrace_helper_provider_remove(&prov
->dthp_prov
,
13289 lck_mtx_unlock(&dtrace_meta_lock
);
13291 dtrace_helper_provider_destroy(prov
);
13293 lck_mtx_lock(&dtrace_lock
);
13300 dtrace_helper_validate(dtrace_helper_action_t
*helper
)
13305 if ((dp
= helper
->dtha_predicate
) != NULL
)
13306 err
+= dtrace_difo_validate_helper(dp
);
13308 for (i
= 0; i
< helper
->dtha_nactions
; i
++)
13309 err
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]);
13314 #if !defined(__APPLE__)
13316 dtrace_helper_action_add(int which
, dtrace_ecbdesc_t
*ep
)
13319 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t
*ep
)
13322 dtrace_helpers_t
*help
;
13323 dtrace_helper_action_t
*helper
, *last
;
13324 dtrace_actdesc_t
*act
;
13325 dtrace_vstate_t
*vstate
;
13326 dtrace_predicate_t
*pred
;
13327 int count
= 0, nactions
= 0, i
;
13329 if (which
< 0 || which
>= DTRACE_NHELPER_ACTIONS
)
13332 #if !defined(__APPLE__)
13333 help
= curproc
->p_dtrace_helpers
;
13335 help
= p
->p_dtrace_helpers
;
13337 last
= help
->dthps_actions
[which
];
13338 vstate
= &help
->dthps_vstate
;
13340 for (count
= 0; last
!= NULL
; last
= last
->dtha_next
) {
13342 if (last
->dtha_next
== NULL
)
13347 * If we already have dtrace_helper_actions_max helper actions for this
13348 * helper action type, we'll refuse to add a new one.
13350 if (count
>= dtrace_helper_actions_max
)
13353 helper
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
);
13354 helper
->dtha_generation
= help
->dthps_generation
;
13356 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) {
13357 ASSERT(pred
->dtp_difo
!= NULL
);
13358 dtrace_difo_hold(pred
->dtp_difo
);
13359 helper
->dtha_predicate
= pred
->dtp_difo
;
13362 for (act
= ep
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
13363 if (act
->dtad_kind
!= DTRACEACT_DIFEXPR
)
13366 if (act
->dtad_difo
== NULL
)
13372 helper
->dtha_actions
= kmem_zalloc(sizeof (dtrace_difo_t
*) *
13373 (helper
->dtha_nactions
= nactions
), KM_SLEEP
);
13375 for (act
= ep
->dted_action
, i
= 0; act
!= NULL
; act
= act
->dtad_next
) {
13376 dtrace_difo_hold(act
->dtad_difo
);
13377 helper
->dtha_actions
[i
++] = act
->dtad_difo
;
13380 if (!dtrace_helper_validate(helper
))
13383 if (last
== NULL
) {
13384 help
->dthps_actions
[which
] = helper
;
13386 last
->dtha_next
= helper
;
13389 if (vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
13390 dtrace_helptrace_nlocals
= vstate
->dtvs_nlocals
;
13391 dtrace_helptrace_next
= 0;
13396 dtrace_helper_action_destroy(helper
, vstate
);
13401 dtrace_helper_provider_register(proc_t
*p
, dtrace_helpers_t
*help
,
13402 dof_helper_t
*dofhp
)
13404 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
13406 lck_mtx_lock(&dtrace_meta_lock
);
13407 lck_mtx_lock(&dtrace_lock
);
13409 if (!dtrace_attached() || dtrace_meta_pid
== NULL
) {
13411 * If the dtrace module is loaded but not attached, or if
13412 * there aren't isn't a meta provider registered to deal with
13413 * these provider descriptions, we need to postpone creating
13414 * the actual providers until later.
13417 if (help
->dthps_next
== NULL
&& help
->dthps_prev
== NULL
&&
13418 dtrace_deferred_pid
!= help
) {
13419 help
->dthps_deferred
= 1;
13420 help
->dthps_pid
= p
->p_pid
;
13421 help
->dthps_next
= dtrace_deferred_pid
;
13422 help
->dthps_prev
= NULL
;
13423 if (dtrace_deferred_pid
!= NULL
)
13424 dtrace_deferred_pid
->dthps_prev
= help
;
13425 dtrace_deferred_pid
= help
;
13428 lck_mtx_unlock(&dtrace_lock
);
13430 } else if (dofhp
!= NULL
) {
13432 * If the dtrace module is loaded and we have a particular
13433 * helper provider description, pass that off to the
13437 lck_mtx_unlock(&dtrace_lock
);
13439 dtrace_helper_provide(dofhp
, p
->p_pid
);
13443 * Otherwise, just pass all the helper provider descriptions
13444 * off to the meta provider.
13448 lck_mtx_unlock(&dtrace_lock
);
13450 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13451 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
13456 lck_mtx_unlock(&dtrace_meta_lock
);
13459 #if !defined(__APPLE__)
13461 dtrace_helper_provider_add(dof_helper_t
*dofhp
, int gen
)
13464 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t
*dofhp
, int gen
)
13467 dtrace_helpers_t
*help
;
13468 dtrace_helper_provider_t
*hprov
, **tmp_provs
;
13469 uint_t tmp_maxprovs
, i
;
13471 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13473 #if !defined(__APPLE__)
13474 help
= curproc
->p_dtrace_helpers
;
13476 help
= p
->p_dtrace_helpers
;
13478 ASSERT(help
!= NULL
);
13481 * If we already have dtrace_helper_providers_max helper providers,
13482 * we're refuse to add a new one.
13484 if (help
->dthps_nprovs
>= dtrace_helper_providers_max
)
13488 * Check to make sure this isn't a duplicate.
13490 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13491 if (dofhp
->dofhp_addr
==
13492 help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
)
13496 hprov
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
);
13497 hprov
->dthp_prov
= *dofhp
;
13498 hprov
->dthp_ref
= 1;
13499 hprov
->dthp_generation
= gen
;
13502 * Allocate a bigger table for helper providers if it's already full.
13504 if (help
->dthps_maxprovs
== help
->dthps_nprovs
) {
13505 tmp_maxprovs
= help
->dthps_maxprovs
;
13506 tmp_provs
= help
->dthps_provs
;
13508 if (help
->dthps_maxprovs
== 0)
13509 help
->dthps_maxprovs
= 2;
13511 help
->dthps_maxprovs
*= 2;
13512 if (help
->dthps_maxprovs
> dtrace_helper_providers_max
)
13513 help
->dthps_maxprovs
= dtrace_helper_providers_max
;
13515 ASSERT(tmp_maxprovs
< help
->dthps_maxprovs
);
13517 help
->dthps_provs
= kmem_zalloc(help
->dthps_maxprovs
*
13518 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
13520 if (tmp_provs
!= NULL
) {
13521 bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs
*
13522 sizeof (dtrace_helper_provider_t
*));
13523 kmem_free(tmp_provs
, tmp_maxprovs
*
13524 sizeof (dtrace_helper_provider_t
*));
13528 help
->dthps_provs
[help
->dthps_nprovs
] = hprov
;
13529 help
->dthps_nprovs
++;
13535 dtrace_helper_provider_destroy(dtrace_helper_provider_t
*hprov
)
13537 lck_mtx_lock(&dtrace_lock
);
13539 if (--hprov
->dthp_ref
== 0) {
13541 lck_mtx_unlock(&dtrace_lock
);
13542 dof
= (dof_hdr_t
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
;
13543 dtrace_dof_destroy(dof
);
13544 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
));
13546 lck_mtx_unlock(&dtrace_lock
);
13551 dtrace_helper_provider_validate(dof_hdr_t
*dof
, dof_sec_t
*sec
)
13553 uintptr_t daddr
= (uintptr_t)dof
;
13554 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
13555 dof_provider_t
*provider
;
13556 dof_probe_t
*probe
;
13558 char *strtab
, *typestr
;
13559 dof_stridx_t typeidx
;
13561 uint_t nprobes
, j
, k
;
13563 ASSERT(sec
->dofs_type
== DOF_SECT_PROVIDER
);
13565 if (sec
->dofs_offset
& (sizeof (uint_t
) - 1)) {
13566 dtrace_dof_error(dof
, "misaligned section offset");
13571 * The section needs to be large enough to contain the DOF provider
13572 * structure appropriate for the given version.
13574 if (sec
->dofs_size
<
13575 ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ?
13576 offsetof(dof_provider_t
, dofpv_prenoffs
) :
13577 sizeof (dof_provider_t
))) {
13578 dtrace_dof_error(dof
, "provider section too small");
13582 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
13583 str_sec
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
);
13584 prb_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
);
13585 arg_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
);
13586 off_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
);
13588 if (str_sec
== NULL
|| prb_sec
== NULL
||
13589 arg_sec
== NULL
|| off_sec
== NULL
)
13594 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
13595 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
&&
13596 (enoff_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
,
13597 provider
->dofpv_prenoffs
)) == NULL
)
13600 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
13602 if (provider
->dofpv_name
>= str_sec
->dofs_size
||
13603 strlen(strtab
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) {
13604 dtrace_dof_error(dof
, "invalid provider name");
13608 if (prb_sec
->dofs_entsize
== 0 ||
13609 prb_sec
->dofs_entsize
> prb_sec
->dofs_size
) {
13610 dtrace_dof_error(dof
, "invalid entry size");
13614 if (prb_sec
->dofs_entsize
& (sizeof (uintptr_t) - 1)) {
13615 dtrace_dof_error(dof
, "misaligned entry size");
13619 if (off_sec
->dofs_entsize
!= sizeof (uint32_t)) {
13620 dtrace_dof_error(dof
, "invalid entry size");
13624 if (off_sec
->dofs_offset
& (sizeof (uint32_t) - 1)) {
13625 dtrace_dof_error(dof
, "misaligned section offset");
13629 if (arg_sec
->dofs_entsize
!= sizeof (uint8_t)) {
13630 dtrace_dof_error(dof
, "invalid entry size");
13634 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
13636 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
13639 * Take a pass through the probes to check for errors.
13641 for (j
= 0; j
< nprobes
; j
++) {
13642 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
13643 prb_sec
->dofs_offset
+ j
* prb_sec
->dofs_entsize
);
13645 if (probe
->dofpr_func
>= str_sec
->dofs_size
) {
13646 dtrace_dof_error(dof
, "invalid function name");
13650 if (strlen(strtab
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) {
13651 dtrace_dof_error(dof
, "function name too long");
13655 if (probe
->dofpr_name
>= str_sec
->dofs_size
||
13656 strlen(strtab
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) {
13657 dtrace_dof_error(dof
, "invalid probe name");
13662 * The offset count must not wrap the index, and the offsets
13663 * must also not overflow the section's data.
13665 if (probe
->dofpr_offidx
+ probe
->dofpr_noffs
<
13666 probe
->dofpr_offidx
||
13667 (probe
->dofpr_offidx
+ probe
->dofpr_noffs
) *
13668 off_sec
->dofs_entsize
> off_sec
->dofs_size
) {
13669 dtrace_dof_error(dof
, "invalid probe offset");
13673 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) {
13675 * If there's no is-enabled offset section, make sure
13676 * there aren't any is-enabled offsets. Otherwise
13677 * perform the same checks as for probe offsets
13678 * (immediately above).
13680 if (enoff_sec
== NULL
) {
13681 if (probe
->dofpr_enoffidx
!= 0 ||
13682 probe
->dofpr_nenoffs
!= 0) {
13683 dtrace_dof_error(dof
, "is-enabled "
13684 "offsets with null section");
13687 } else if (probe
->dofpr_enoffidx
+
13688 probe
->dofpr_nenoffs
< probe
->dofpr_enoffidx
||
13689 (probe
->dofpr_enoffidx
+ probe
->dofpr_nenoffs
) *
13690 enoff_sec
->dofs_entsize
> enoff_sec
->dofs_size
) {
13691 dtrace_dof_error(dof
, "invalid is-enabled "
13696 if (probe
->dofpr_noffs
+ probe
->dofpr_nenoffs
== 0) {
13697 dtrace_dof_error(dof
, "zero probe and "
13698 "is-enabled offsets");
13701 } else if (probe
->dofpr_noffs
== 0) {
13702 dtrace_dof_error(dof
, "zero probe offsets");
13706 if (probe
->dofpr_argidx
+ probe
->dofpr_xargc
<
13707 probe
->dofpr_argidx
||
13708 (probe
->dofpr_argidx
+ probe
->dofpr_xargc
) *
13709 arg_sec
->dofs_entsize
> arg_sec
->dofs_size
) {
13710 dtrace_dof_error(dof
, "invalid args");
13714 typeidx
= probe
->dofpr_nargv
;
13715 typestr
= strtab
+ probe
->dofpr_nargv
;
13716 for (k
= 0; k
< probe
->dofpr_nargc
; k
++) {
13717 if (typeidx
>= str_sec
->dofs_size
) {
13718 dtrace_dof_error(dof
, "bad "
13719 "native argument type");
13723 typesz
= strlen(typestr
) + 1;
13724 if (typesz
> DTRACE_ARGTYPELEN
) {
13725 dtrace_dof_error(dof
, "native "
13726 "argument type too long");
13733 typeidx
= probe
->dofpr_xargv
;
13734 typestr
= strtab
+ probe
->dofpr_xargv
;
13735 for (k
= 0; k
< probe
->dofpr_xargc
; k
++) {
13736 if (arg
[probe
->dofpr_argidx
+ k
] > probe
->dofpr_nargc
) {
13737 dtrace_dof_error(dof
, "bad "
13738 "native argument index");
13742 if (typeidx
>= str_sec
->dofs_size
) {
13743 dtrace_dof_error(dof
, "bad "
13744 "translated argument type");
13748 typesz
= strlen(typestr
) + 1;
13749 if (typesz
> DTRACE_ARGTYPELEN
) {
13750 dtrace_dof_error(dof
, "translated argument "
13763 #if !defined(__APPLE__)
13765 dtrace_helper_slurp(dof_hdr_t
*dof
, dof_helper_t
*dhp
)
13768 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t
*dof
, dof_helper_t
*dhp
)
13771 dtrace_helpers_t
*help
;
13772 dtrace_vstate_t
*vstate
;
13773 dtrace_enabling_t
*enab
= NULL
;
13774 int i
, gen
, rv
, nhelpers
= 0, nprovs
= 0, destroy
= 1;
13775 uintptr_t daddr
= (uintptr_t)dof
;
13777 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13779 #if !defined(__APPLE__)
13780 if ((help
= curproc
->p_dtrace_helpers
) == NULL
)
13781 help
= dtrace_helpers_create(curproc
);
13783 if ((help
= p
->p_dtrace_helpers
) == NULL
)
13784 help
= dtrace_helpers_create(p
);
13787 vstate
= &help
->dthps_vstate
;
13789 if ((rv
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
,
13790 dhp
!= NULL
? dhp
->dofhp_addr
: 0, B_FALSE
)) != 0) {
13791 dtrace_dof_destroy(dof
);
13796 * Look for helper providers and validate their descriptions.
13799 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13800 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
13801 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13803 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
13806 if (dtrace_helper_provider_validate(dof
, sec
) != 0) {
13807 dtrace_enabling_destroy(enab
);
13808 dtrace_dof_destroy(dof
);
13817 * Now we need to walk through the ECB descriptions in the enabling.
13819 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
13820 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
13821 dtrace_probedesc_t
*desc
= &ep
->dted_probe
;
13823 if (strcmp(desc
->dtpd_provider
, "dtrace") != 0)
13826 if (strcmp(desc
->dtpd_mod
, "helper") != 0)
13829 if (strcmp(desc
->dtpd_func
, "ustack") != 0)
13832 #if !defined(__APPLE__)
13833 if ((rv
= dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0)
13835 if ((rv
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
, ep
)) != 0)
13839 * Adding this helper action failed -- we are now going
13840 * to rip out the entire generation and return failure.
13842 #if !defined(__APPLE__)
13843 (void) dtrace_helper_destroygen(help
->dthps_generation
);
13845 (void) dtrace_helper_destroygen(p
, help
->dthps_generation
);
13847 dtrace_enabling_destroy(enab
);
13848 dtrace_dof_destroy(dof
);
13855 if (nhelpers
< enab
->dten_ndesc
)
13856 dtrace_dof_error(dof
, "unmatched helpers");
13858 gen
= help
->dthps_generation
++;
13859 dtrace_enabling_destroy(enab
);
13861 if (dhp
!= NULL
&& nprovs
> 0) {
13862 dhp
->dofhp_dof
= (uint64_t)(uintptr_t)dof
;
13863 #if !defined(__APPLE__)
13864 if (dtrace_helper_provider_add(dhp
, gen
) == 0) {
13866 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) {
13868 lck_mtx_unlock(&dtrace_lock
);
13869 #if !defined(__APPLE__)
13870 dtrace_helper_provider_register(curproc
, help
, dhp
);
13872 dtrace_helper_provider_register(p
, help
, dhp
);
13874 lck_mtx_lock(&dtrace_lock
);
13881 dtrace_dof_destroy(dof
);
13886 #if defined(__APPLE__)
13891 * DTrace user static probes (USDT probes) and helper actions are loaded
13892 * in a process by proccessing dof sections. The dof sections are passed
13893 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
13894 * expensive to process dof for a process that will never use it. There
13895 * is a memory cost (allocating the providers/probes), and a cpu cost
13896 * (creating the providers/probes).
13898 * To reduce this cost, we use "lazy dof". The normal proceedure for
13899 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
13900 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
13901 * used, each process retains the dof_ioctl_data_t block, instead of
13902 * copying in the data it points to.
13904 * The dof_ioctl_data_t blocks are managed as if they were the actual
13905 * processed dof; on fork the block is copied to the child, on exec and
13906 * exit the block is freed.
13908 * If the process loads library(s) containing additional dof, the
13909 * new dof_ioctl_data_t is merged with the existing block.
13911 * There are a few catches that make this slightly more difficult.
13912 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
13913 * identifier value for each dof in the block. In non-lazy dof terms,
13914 * this is the generation that dof was loaded in. If we hand back
13915 * a UID for a lazy dof, that same UID must be able to unload the
13916 * dof once it has become non-lazy. To meet this requirement, the
13917 * code that loads lazy dof requires that the UID's for dof(s) in
13918 * the lazy dof be sorted, and in ascending order. It is okay to skip
13919 * UID's, I.E., 1 -> 5 -> 6 is legal.
13921 * Once a process has become non-lazy, it will stay non-lazy. All
13922 * future dof operations for that process will be non-lazy, even
13923 * if the dof mode transitions back to lazy.
13925 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
13926 * That way if the lazy check fails due to transitioning to non-lazy, the
13927 * right thing is done with the newly faulted in dof.
13931 * This method is a bit squicky. It must handle:
13933 * dof should not be lazy.
13934 * dof should have been handled lazily, but there was an error
13935 * dof was handled lazily, and needs to be freed.
13936 * dof was handled lazily, and must not be freed.
13939 * Returns EACCESS if dof should be handled non-lazily.
13941 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
13943 * If the dofs data is claimed by this method, dofs_claimed will be set.
13944 * Callers should not free claimed dofs.
13947 dtrace_lazy_dofs_add(proc_t
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
)
13950 ASSERT(incoming_dofs
&& incoming_dofs
->dofiod_count
> 0);
13955 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
13958 * If we have lazy dof, dof mode better be LAZY_ON.
13960 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
13961 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
13962 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
13965 * Any existing helpers force non-lazy behavior.
13967 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
13968 lck_mtx_lock(&p
->p_dtrace_sprlock
);
13970 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
13971 unsigned int existing_dofs_count
= (existing_dofs
) ? existing_dofs
->dofiod_count
: 0;
13972 unsigned int i
, merged_dofs_count
= incoming_dofs
->dofiod_count
+ existing_dofs_count
;
13977 if (merged_dofs_count
== 0 || merged_dofs_count
> 1024) {
13978 dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range");
13984 * Each dof being added must be assigned a unique generation.
13986 uint64_t generation
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count
- 1].dofhp_dof
+ 1 : 1;
13987 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) {
13989 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
13991 ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
);
13992 incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
= generation
++;
13996 if (existing_dofs
) {
13998 * Merge the existing and incoming dofs
14000 size_t merged_dofs_size
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
);
14001 dof_ioctl_data_t
* merged_dofs
= kmem_alloc(merged_dofs_size
, KM_SLEEP
);
14003 bcopy(&existing_dofs
->dofiod_helpers
[0],
14004 &merged_dofs
->dofiod_helpers
[0],
14005 sizeof(dof_helper_t
) * existing_dofs_count
);
14006 bcopy(&incoming_dofs
->dofiod_helpers
[0],
14007 &merged_dofs
->dofiod_helpers
[existing_dofs_count
],
14008 sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
);
14010 merged_dofs
->dofiod_count
= merged_dofs_count
;
14012 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14014 p
->p_dtrace_lazy_dofs
= merged_dofs
;
14017 * Claim the incoming dofs
14020 p
->p_dtrace_lazy_dofs
= incoming_dofs
;
14024 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14025 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14026 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14031 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14036 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14044 * EINVAL: lazy dof is enabled, but the requested generation was not found.
14045 * EACCES: This removal needs to be handled non-lazily.
14048 dtrace_lazy_dofs_remove(proc_t
*p
, int generation
)
14052 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14055 * If we have lazy dof, dof mode better be LAZY_ON.
14057 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14058 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14059 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
14062 * Any existing helpers force non-lazy behavior.
14064 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
14065 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14067 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
14069 if (existing_dofs
) {
14070 int index
, existing_dofs_count
= existing_dofs
->dofiod_count
;
14071 for (index
=0; index
<existing_dofs_count
; index
++) {
14072 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof
== generation
) {
14073 dof_ioctl_data_t
* removed_dofs
= NULL
;
14076 * If there is only 1 dof, we'll delete it and swap in NULL.
14078 if (existing_dofs_count
> 1) {
14079 int removed_dofs_count
= existing_dofs_count
- 1;
14080 size_t removed_dofs_size
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
);
14082 removed_dofs
= kmem_alloc(removed_dofs_size
, KM_SLEEP
);
14083 removed_dofs
->dofiod_count
= removed_dofs_count
;
14086 * copy the remaining data.
14089 bcopy(&existing_dofs
->dofiod_helpers
[0],
14090 &removed_dofs
->dofiod_helpers
[0],
14091 index
* sizeof(dof_helper_t
));
14094 if (index
< existing_dofs_count
-1) {
14095 bcopy(&existing_dofs
->dofiod_helpers
[index
+1],
14096 &removed_dofs
->dofiod_helpers
[index
],
14097 (existing_dofs_count
- index
- 1) * sizeof(dof_helper_t
));
14101 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14103 p
->p_dtrace_lazy_dofs
= removed_dofs
;
14105 rval
= KERN_SUCCESS
;
14112 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14115 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14116 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14123 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14128 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14134 dtrace_lazy_dofs_destroy(proc_t
*p
)
14136 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14137 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14140 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14141 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14142 * kern_exit.c and kern_exec.c.
14144 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
|| p
->p_lflag
& P_LEXIT
);
14145 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14147 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14148 p
->p_dtrace_lazy_dofs
= NULL
;
14150 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14151 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14154 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14159 dtrace_lazy_dofs_duplicate(proc_t
*parent
, proc_t
*child
)
14161 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
14162 lck_mtx_assert(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14163 lck_mtx_assert(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14165 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14166 lck_mtx_lock(&parent
->p_dtrace_sprlock
);
14169 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14170 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14173 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14174 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| parent
->p_dtrace_helpers
== NULL
);
14176 * In theory we should hold the child sprlock, but this is safe...
14178 ASSERT(child
->p_dtrace_lazy_dofs
== NULL
&& child
->p_dtrace_helpers
== NULL
);
14180 dof_ioctl_data_t
* parent_dofs
= parent
->p_dtrace_lazy_dofs
;
14181 dof_ioctl_data_t
* child_dofs
= NULL
;
14183 size_t parent_dofs_size
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
);
14184 child_dofs
= kmem_alloc(parent_dofs_size
, KM_SLEEP
);
14185 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
);
14188 lck_mtx_unlock(&parent
->p_dtrace_sprlock
);
14191 lck_mtx_lock(&child
->p_dtrace_sprlock
);
14192 child
->p_dtrace_lazy_dofs
= child_dofs
;
14193 lck_mtx_unlock(&child
->p_dtrace_sprlock
);
14196 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14200 dtrace_lazy_dofs_proc_iterate_filter(proc_t
*p
, void* ignored
)
14202 #pragma unused(ignored)
14204 * Okay to NULL test without taking the sprlock.
14206 return p
->p_dtrace_lazy_dofs
!= NULL
;
14210 dtrace_lazy_dofs_proc_iterate_doit(proc_t
*p
, void* ignored
)
14212 #pragma unused(ignored)
14214 * It is possible this process may exit during our attempt to
14215 * fault in the dof. We could fix this by holding locks longer,
14216 * but the errors are benign.
14218 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14221 * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF
14223 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14224 ASSERT(dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
);
14227 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14228 p
->p_dtrace_lazy_dofs
= NULL
;
14230 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14233 * Process each dof_helper_t
14235 if (lazy_dofs
!= NULL
) {
14239 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) {
14241 * When loading lazy dof, we depend on the generations being sorted in ascending order.
14243 ASSERT(i
>= (lazy_dofs
->dofiod_count
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14245 dof_helper_t
*dhp
= &lazy_dofs
->dofiod_helpers
[i
];
14248 * We stored the generation in dofhp_dof. Save it, and restore the original value.
14250 int generation
= dhp
->dofhp_dof
;
14251 dhp
->dofhp_dof
= dhp
->dofhp_addr
;
14253 dof_hdr_t
*dof
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
);
14256 dtrace_helpers_t
*help
;
14258 lck_mtx_lock(&dtrace_lock
);
14261 * This must be done with the dtrace_lock held
14263 if ((help
= p
->p_dtrace_helpers
) == NULL
)
14264 help
= dtrace_helpers_create(p
);
14267 * If the generation value has been bumped, someone snuck in
14268 * when we released the dtrace lock. We have to dump this generation,
14269 * there is no safe way to load it.
14271 if (help
->dthps_generation
<= generation
) {
14272 help
->dthps_generation
= generation
;
14275 * dtrace_helper_slurp() takes responsibility for the dof --
14276 * it may free it now or it may save it and free it later.
14278 if ((rval
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) {
14279 dtrace_dof_error(NULL
, "returned value did not match expected generation");
14283 lck_mtx_unlock(&dtrace_lock
);
14287 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14290 return PROC_RETURNED
;
14293 #endif /* __APPLE__ */
14295 static dtrace_helpers_t
*
14296 dtrace_helpers_create(proc_t
*p
)
14298 dtrace_helpers_t
*help
;
14300 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14301 ASSERT(p
->p_dtrace_helpers
== NULL
);
14303 help
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
);
14304 help
->dthps_actions
= kmem_zalloc(sizeof (dtrace_helper_action_t
*) *
14305 DTRACE_NHELPER_ACTIONS
, KM_SLEEP
);
14307 p
->p_dtrace_helpers
= help
;
14313 #if !defined(__APPLE__)
14315 dtrace_helpers_destroy(void)
14317 proc_t
*p
= curproc
;
14320 dtrace_helpers_destroy(proc_t
* p
)
14323 dtrace_helpers_t
*help
;
14324 dtrace_vstate_t
*vstate
;
14327 lck_mtx_lock(&dtrace_lock
);
14329 ASSERT(p
->p_dtrace_helpers
!= NULL
);
14330 ASSERT(dtrace_helpers
> 0);
14332 help
= p
->p_dtrace_helpers
;
14333 vstate
= &help
->dthps_vstate
;
14336 * We're now going to lose the help from this process.
14338 p
->p_dtrace_helpers
= NULL
;
14342 * Destory the helper actions.
14344 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14345 dtrace_helper_action_t
*h
, *next
;
14347 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
14348 next
= h
->dtha_next
;
14349 dtrace_helper_action_destroy(h
, vstate
);
14354 lck_mtx_unlock(&dtrace_lock
);
14357 * Destroy the helper providers.
14359 if (help
->dthps_maxprovs
> 0) {
14360 lck_mtx_lock(&dtrace_meta_lock
);
14361 if (dtrace_meta_pid
!= NULL
) {
14362 ASSERT(dtrace_deferred_pid
== NULL
);
14364 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14365 dtrace_helper_provider_remove(
14366 &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
);
14369 lck_mtx_lock(&dtrace_lock
);
14370 ASSERT(help
->dthps_deferred
== 0 ||
14371 help
->dthps_next
!= NULL
||
14372 help
->dthps_prev
!= NULL
||
14373 help
== dtrace_deferred_pid
);
14376 * Remove the helper from the deferred list.
14378 if (help
->dthps_next
!= NULL
)
14379 help
->dthps_next
->dthps_prev
= help
->dthps_prev
;
14380 if (help
->dthps_prev
!= NULL
)
14381 help
->dthps_prev
->dthps_next
= help
->dthps_next
;
14382 if (dtrace_deferred_pid
== help
) {
14383 dtrace_deferred_pid
= help
->dthps_next
;
14384 ASSERT(help
->dthps_prev
== NULL
);
14387 lck_mtx_unlock(&dtrace_lock
);
14390 lck_mtx_unlock(&dtrace_meta_lock
);
14392 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14393 dtrace_helper_provider_destroy(help
->dthps_provs
[i
]);
14396 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs
*
14397 sizeof (dtrace_helper_provider_t
*));
14400 lck_mtx_lock(&dtrace_lock
);
14402 dtrace_vstate_fini(&help
->dthps_vstate
);
14403 kmem_free(help
->dthps_actions
,
14404 sizeof (dtrace_helper_action_t
*) * DTRACE_NHELPER_ACTIONS
);
14405 kmem_free(help
, sizeof (dtrace_helpers_t
));
14408 lck_mtx_unlock(&dtrace_lock
);
14412 dtrace_helpers_duplicate(proc_t
*from
, proc_t
*to
)
14414 dtrace_helpers_t
*help
, *newhelp
;
14415 dtrace_helper_action_t
*helper
, *new, *last
;
14417 dtrace_vstate_t
*vstate
;
14418 int i
, j
, sz
, hasprovs
= 0;
14420 lck_mtx_lock(&dtrace_lock
);
14421 ASSERT(from
->p_dtrace_helpers
!= NULL
);
14422 ASSERT(dtrace_helpers
> 0);
14424 help
= from
->p_dtrace_helpers
;
14425 newhelp
= dtrace_helpers_create(to
);
14426 ASSERT(to
->p_dtrace_helpers
!= NULL
);
14428 newhelp
->dthps_generation
= help
->dthps_generation
;
14429 vstate
= &newhelp
->dthps_vstate
;
14432 * Duplicate the helper actions.
14434 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14435 if ((helper
= help
->dthps_actions
[i
]) == NULL
)
14438 for (last
= NULL
; helper
!= NULL
; helper
= helper
->dtha_next
) {
14439 new = kmem_zalloc(sizeof (dtrace_helper_action_t
),
14441 new->dtha_generation
= helper
->dtha_generation
;
14443 if ((dp
= helper
->dtha_predicate
) != NULL
) {
14444 dp
= dtrace_difo_duplicate(dp
, vstate
);
14445 new->dtha_predicate
= dp
;
14448 new->dtha_nactions
= helper
->dtha_nactions
;
14449 sz
= sizeof (dtrace_difo_t
*) * new->dtha_nactions
;
14450 new->dtha_actions
= kmem_alloc(sz
, KM_SLEEP
);
14452 for (j
= 0; j
< new->dtha_nactions
; j
++) {
14453 dtrace_difo_t
*dp
= helper
->dtha_actions
[j
];
14455 ASSERT(dp
!= NULL
);
14456 dp
= dtrace_difo_duplicate(dp
, vstate
);
14457 new->dtha_actions
[j
] = dp
;
14460 if (last
!= NULL
) {
14461 last
->dtha_next
= new;
14463 newhelp
->dthps_actions
[i
] = new;
14471 * Duplicate the helper providers and register them with the
14472 * DTrace framework.
14474 if (help
->dthps_nprovs
> 0) {
14475 newhelp
->dthps_nprovs
= help
->dthps_nprovs
;
14476 newhelp
->dthps_maxprovs
= help
->dthps_nprovs
;
14477 newhelp
->dthps_provs
= kmem_alloc(newhelp
->dthps_nprovs
*
14478 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
14479 for (i
= 0; i
< newhelp
->dthps_nprovs
; i
++) {
14480 newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
];
14481 newhelp
->dthps_provs
[i
]->dthp_ref
++;
14487 lck_mtx_unlock(&dtrace_lock
);
14490 dtrace_helper_provider_register(to
, newhelp
, NULL
);
14494 * DTrace Hook Functions
14497 dtrace_module_loaded(struct modctl
*ctl
)
14499 dtrace_provider_t
*prv
;
14501 lck_mtx_lock(&dtrace_provider_lock
);
14502 lck_mtx_lock(&mod_lock
);
14504 // ASSERT(ctl->mod_busy);
14507 * We're going to call each providers per-module provide operation
14508 * specifying only this module.
14510 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
14511 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
14513 lck_mtx_unlock(&mod_lock
);
14514 lck_mtx_unlock(&dtrace_provider_lock
);
14517 * If we have any retained enablings, we need to match against them.
14518 * Enabling probes requires that cpu_lock be held, and we cannot hold
14519 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
14520 * module. (In particular, this happens when loading scheduling
14521 * classes.) So if we have any retained enablings, we need to dispatch
14522 * our task queue to do the match for us.
14524 lck_mtx_lock(&dtrace_lock
);
14526 if (dtrace_retained
== NULL
) {
14527 lck_mtx_unlock(&dtrace_lock
);
14531 (void) taskq_dispatch(dtrace_taskq
,
14532 (task_func_t
*)dtrace_enabling_matchall
, NULL
, TQ_SLEEP
);
14534 lck_mtx_unlock(&dtrace_lock
);
14537 * And now, for a little heuristic sleaze: in general, we want to
14538 * match modules as soon as they load. However, we cannot guarantee
14539 * this, because it would lead us to the lock ordering violation
14540 * outlined above. The common case, of course, is that cpu_lock is
14541 * _not_ held -- so we delay here for a clock tick, hoping that that's
14542 * long enough for the task queue to do its work. If it's not, it's
14543 * not a serious problem -- it just means that the module that we
14544 * just loaded may not be immediately instrumentable.
14550 dtrace_module_unloaded(struct modctl
*ctl
)
14552 dtrace_probe_t
template, *probe
, *first
, *next
;
14553 dtrace_provider_t
*prov
;
14555 template.dtpr_mod
= ctl
->mod_modname
;
14557 lck_mtx_lock(&dtrace_provider_lock
);
14558 lck_mtx_lock(&mod_lock
);
14559 lck_mtx_lock(&dtrace_lock
);
14561 if (dtrace_bymod
== NULL
) {
14563 * The DTrace module is loaded (obviously) but not attached;
14564 * we don't have any work to do.
14566 lck_mtx_unlock(&dtrace_provider_lock
);
14567 lck_mtx_unlock(&mod_lock
);
14568 lck_mtx_unlock(&dtrace_lock
);
14572 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
14573 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
14574 if (probe
->dtpr_ecb
!= NULL
) {
14575 lck_mtx_unlock(&dtrace_provider_lock
);
14576 lck_mtx_unlock(&mod_lock
);
14577 lck_mtx_unlock(&dtrace_lock
);
14580 * This shouldn't _actually_ be possible -- we're
14581 * unloading a module that has an enabled probe in it.
14582 * (It's normally up to the provider to make sure that
14583 * this can't happen.) However, because dtps_enable()
14584 * doesn't have a failure mode, there can be an
14585 * enable/unload race. Upshot: we don't want to
14586 * assert, but we're not going to disable the
14589 if (dtrace_err_verbose
) {
14590 cmn_err(CE_WARN
, "unloaded module '%s' had "
14591 "enabled probes", ctl
->mod_modname
);
14600 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
14601 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
14603 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
14605 next
= probe
->dtpr_nextmod
;
14606 dtrace_hash_remove(dtrace_bymod
, probe
);
14607 dtrace_hash_remove(dtrace_byfunc
, probe
);
14608 dtrace_hash_remove(dtrace_byname
, probe
);
14610 if (first
== NULL
) {
14612 probe
->dtpr_nextmod
= NULL
;
14614 probe
->dtpr_nextmod
= first
;
14620 * We've removed all of the module's probes from the hash chains and
14621 * from the probe array. Now issue a dtrace_sync() to be sure that
14622 * everyone has cleared out from any probe array processing.
14626 for (probe
= first
; probe
!= NULL
; probe
= first
) {
14627 first
= probe
->dtpr_nextmod
;
14628 prov
= probe
->dtpr_provider
;
14629 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
14631 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
14632 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
14633 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
14634 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
14635 #if !defined(__APPLE__)
14636 kmem_free(probe
, sizeof (dtrace_probe_t
));
14638 zfree(dtrace_probe_t_zone
, probe
);
14642 lck_mtx_unlock(&dtrace_lock
);
14643 lck_mtx_unlock(&mod_lock
);
14644 lck_mtx_unlock(&dtrace_provider_lock
);
14648 dtrace_suspend(void)
14650 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
));
14654 dtrace_resume(void)
14656 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
));
14660 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
)
14662 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14663 lck_mtx_lock(&dtrace_lock
);
14667 dtrace_state_t
*state
;
14668 dtrace_optval_t
*opt
, rs
, c
;
14671 * For now, we only allocate a new buffer for anonymous state.
14673 if ((state
= dtrace_anon
.dta_state
) == NULL
)
14676 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
14679 opt
= state
->dts_options
;
14680 c
= opt
[DTRACEOPT_CPU
];
14682 if (c
!= DTRACE_CPUALL
&& c
!= DTRACEOPT_UNSET
&& c
!= cpu
)
14686 * Regardless of what the actual policy is, we're going to
14687 * temporarily set our resize policy to be manual. We're
14688 * also going to temporarily set our CPU option to denote
14689 * the newly configured CPU.
14691 rs
= opt
[DTRACEOPT_BUFRESIZE
];
14692 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
;
14693 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
;
14695 (void) dtrace_state_buffers(state
);
14697 opt
[DTRACEOPT_BUFRESIZE
] = rs
;
14698 opt
[DTRACEOPT_CPU
] = c
;
14705 * We don't free the buffer in the CPU_UNCONFIG case. (The
14706 * buffer will be freed when the consumer exits.)
14714 lck_mtx_unlock(&dtrace_lock
);
14719 dtrace_cpu_setup_initial(processorid_t cpu
)
14721 (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
);
14725 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
)
14727 if (dtrace_toxranges
>= dtrace_toxranges_max
) {
14729 dtrace_toxrange_t
*range
;
14731 osize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14734 ASSERT(dtrace_toxrange
== NULL
);
14735 ASSERT(dtrace_toxranges_max
== 0);
14736 dtrace_toxranges_max
= 1;
14738 dtrace_toxranges_max
<<= 1;
14741 nsize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
14742 range
= kmem_zalloc(nsize
, KM_SLEEP
);
14744 if (dtrace_toxrange
!= NULL
) {
14745 ASSERT(osize
!= 0);
14746 bcopy(dtrace_toxrange
, range
, osize
);
14747 kmem_free(dtrace_toxrange
, osize
);
14750 dtrace_toxrange
= range
;
14753 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base
== NULL
);
14754 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit
== NULL
);
14756 dtrace_toxrange
[dtrace_toxranges
].dtt_base
= base
;
14757 dtrace_toxrange
[dtrace_toxranges
].dtt_limit
= limit
;
14758 dtrace_toxranges
++;
14762 * DTrace Driver Cookbook Functions
14766 dtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
14768 dtrace_provider_id_t id
;
14769 dtrace_state_t
*state
= NULL
;
14770 dtrace_enabling_t
*enab
;
14772 lck_mtx_lock(&cpu_lock
);
14773 lck_mtx_lock(&dtrace_provider_lock
);
14774 lck_mtx_lock(&dtrace_lock
);
14776 if (ddi_soft_state_init(&dtrace_softstate
,
14777 sizeof (dtrace_state_t
), 0) != 0) {
14778 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state");
14779 lck_mtx_unlock(&cpu_lock
);
14780 lck_mtx_unlock(&dtrace_provider_lock
);
14781 lck_mtx_unlock(&dtrace_lock
);
14782 return (DDI_FAILURE
);
14785 #if !defined(__APPLE__)
14786 if (ddi_create_minor_node(devi
, DTRACEMNR_DTRACE
, S_IFCHR
,
14787 DTRACEMNRN_DTRACE
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
14788 ddi_create_minor_node(devi
, DTRACEMNR_HELPER
, S_IFCHR
,
14789 DTRACEMNRN_HELPER
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) {
14790 cmn_err(CE_NOTE
, "/dev/dtrace couldn't create minor nodes");
14791 ddi_remove_minor_node(devi
, NULL
);
14792 ddi_soft_state_fini(&dtrace_softstate
);
14793 lck_mtx_unlock(&cpu_lock
);
14794 lck_mtx_unlock(&dtrace_provider_lock
);
14795 lck_mtx_unlock(&dtrace_lock
);
14796 return (DDI_FAILURE
);
14798 #endif /* __APPLE__ */
14800 ddi_report_dev(devi
);
14801 dtrace_devi
= devi
;
14803 dtrace_modload
= dtrace_module_loaded
;
14804 dtrace_modunload
= dtrace_module_unloaded
;
14805 dtrace_cpu_init
= dtrace_cpu_setup_initial
;
14806 dtrace_helpers_cleanup
= dtrace_helpers_destroy
;
14807 dtrace_helpers_fork
= dtrace_helpers_duplicate
;
14808 dtrace_cpustart_init
= dtrace_suspend
;
14809 dtrace_cpustart_fini
= dtrace_resume
;
14810 dtrace_debugger_init
= dtrace_suspend
;
14811 dtrace_debugger_fini
= dtrace_resume
;
14812 dtrace_kreloc_init
= dtrace_suspend
;
14813 dtrace_kreloc_fini
= dtrace_resume
;
14815 register_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
14817 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14819 dtrace_arena
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1,
14820 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
14821 dtrace_minor
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
,
14822 UINT32_MAX
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
14823 VM_SLEEP
| VMC_IDENTIFIER
);
14824 dtrace_taskq
= taskq_create("dtrace_taskq", 1, maxclsyspri
,
14827 dtrace_state_cache
= kmem_cache_create("dtrace_state_cache",
14828 sizeof (dtrace_dstate_percpu_t
) * NCPU
, DTRACE_STATE_ALIGN
,
14829 NULL
, NULL
, NULL
, NULL
, NULL
, 0);
14831 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14833 dtrace_bymod
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
),
14834 offsetof(dtrace_probe_t
, dtpr_nextmod
),
14835 offsetof(dtrace_probe_t
, dtpr_prevmod
));
14837 dtrace_byfunc
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
),
14838 offsetof(dtrace_probe_t
, dtpr_nextfunc
),
14839 offsetof(dtrace_probe_t
, dtpr_prevfunc
));
14841 dtrace_byname
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
),
14842 offsetof(dtrace_probe_t
, dtpr_nextname
),
14843 offsetof(dtrace_probe_t
, dtpr_prevname
));
14845 if (dtrace_retain_max
< 1) {
14846 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; "
14847 "setting to 1", dtrace_retain_max
);
14848 dtrace_retain_max
= 1;
14852 * Now discover our toxic ranges.
14854 dtrace_toxic_ranges(dtrace_toxrange_add
);
14857 * Before we register ourselves as a provider to our own framework,
14858 * we would like to assert that dtrace_provider is NULL -- but that's
14859 * not true if we were loaded as a dependency of a DTrace provider.
14860 * Once we've registered, we can assert that dtrace_provider is our
14863 (void) dtrace_register("dtrace", &dtrace_provider_attr
,
14864 DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
);
14866 ASSERT(dtrace_provider
!= NULL
);
14867 ASSERT((dtrace_provider_id_t
)dtrace_provider
== id
);
14869 #if !defined(__APPLE__)
14870 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14871 dtrace_provider
, NULL
, NULL
, "BEGIN", 0, NULL
);
14872 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14873 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14874 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14875 dtrace_provider
, NULL
, NULL
, "ERROR", 1, NULL
);
14876 #elif defined(__ppc__) || defined(__ppc64__)
14877 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14878 dtrace_provider
, NULL
, NULL
, "BEGIN", 2, NULL
);
14879 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14880 dtrace_provider
, NULL
, NULL
, "END", 1, NULL
);
14881 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14882 dtrace_provider
, NULL
, NULL
, "ERROR", 4, NULL
);
14883 #elif (defined(__i386__) || defined (__x86_64__))
14884 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
14885 dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
);
14886 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
14887 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
14888 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
14889 dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
);
14891 #error Unknown Architecture
14892 #endif /* __APPLE__ */
14894 dtrace_anon_property();
14895 lck_mtx_unlock(&cpu_lock
);
14898 * If DTrace helper tracing is enabled, we need to allocate the
14899 * trace buffer and initialize the values.
14901 if (dtrace_helptrace_enabled
) {
14902 ASSERT(dtrace_helptrace_buffer
== NULL
);
14903 dtrace_helptrace_buffer
=
14904 kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
);
14905 dtrace_helptrace_next
= 0;
14909 * If there are already providers, we must ask them to provide their
14910 * probes, and then match any anonymous enabling against them. Note
14911 * that there should be no other retained enablings at this time:
14912 * the only retained enablings at this time should be the anonymous
14915 if (dtrace_anon
.dta_enabling
!= NULL
) {
14916 ASSERT(dtrace_retained
== dtrace_anon
.dta_enabling
);
14918 dtrace_enabling_provide(NULL
);
14919 state
= dtrace_anon
.dta_state
;
14922 * We couldn't hold cpu_lock across the above call to
14923 * dtrace_enabling_provide(), but we must hold it to actually
14924 * enable the probes. We have to drop all of our locks, pick
14925 * up cpu_lock, and regain our locks before matching the
14926 * retained anonymous enabling.
14928 lck_mtx_unlock(&dtrace_lock
);
14929 lck_mtx_unlock(&dtrace_provider_lock
);
14931 lck_mtx_lock(&cpu_lock
);
14932 lck_mtx_lock(&dtrace_provider_lock
);
14933 lck_mtx_lock(&dtrace_lock
);
14935 if ((enab
= dtrace_anon
.dta_enabling
) != NULL
)
14936 (void) dtrace_enabling_match(enab
, NULL
);
14938 lck_mtx_unlock(&cpu_lock
);
14941 lck_mtx_unlock(&dtrace_lock
);
14942 lck_mtx_unlock(&dtrace_provider_lock
);
14944 if (state
!= NULL
) {
14946 * If we created any anonymous state, set it going now.
14948 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
);
14951 return (DDI_SUCCESS
);
14954 extern void fasttrap_init(void);
14958 dtrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
14960 #pragma unused(flag, otyp)
14961 dtrace_state_t
*state
;
14966 #if !defined(__APPLE__)
14967 if (getminor(*devp
) == DTRACEMNRN_HELPER
)
14971 * If this wasn't an open with the "helper" minor, then it must be
14972 * the "dtrace" minor.
14974 ASSERT(getminor(*devp
) == DTRACEMNRN_DTRACE
);
14976 /* Darwin puts Helper on its own major device. */
14977 #endif /* __APPLE__ */
14980 * If no DTRACE_PRIV_* bits are set in the credential, then the
14981 * caller lacks sufficient permission to do anything with DTrace.
14983 dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
);
14984 if (priv
== DTRACE_PRIV_NONE
)
14987 #if defined(__APPLE__)
14989 * We delay the initialization of fasttrap as late as possible.
14990 * It certainly can't be later than now!
14993 #endif /* __APPLE__ */
14996 * Ask all providers to provide all their probes.
14998 lck_mtx_lock(&dtrace_provider_lock
);
14999 dtrace_probe_provide(NULL
, NULL
);
15000 lck_mtx_unlock(&dtrace_provider_lock
);
15002 lck_mtx_lock(&cpu_lock
);
15003 lck_mtx_lock(&dtrace_lock
);
15005 dtrace_membar_producer();
15008 * If the kernel debugger is active (that is, if the kernel debugger
15009 * modified text in some way), we won't allow the open.
15011 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
15013 lck_mtx_unlock(&cpu_lock
);
15014 lck_mtx_unlock(&dtrace_lock
);
15018 state
= dtrace_state_create(devp
, cred_p
);
15019 lck_mtx_unlock(&cpu_lock
);
15021 if (state
== NULL
) {
15022 if (--dtrace_opens
== 0)
15023 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15024 lck_mtx_unlock(&dtrace_lock
);
15028 lck_mtx_unlock(&dtrace_lock
);
15030 #if defined(__APPLE__)
15031 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15034 * If we are currently lazy, transition states.
15036 * Unlike dtrace_close, we do not need to check the
15037 * value of dtrace_opens, as any positive value (and
15038 * we count as 1) means we transition states.
15040 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
) {
15041 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_OFF
;
15044 * Iterate all existing processes and load lazy dofs.
15046 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
,
15047 dtrace_lazy_dofs_proc_iterate_doit
,
15049 dtrace_lazy_dofs_proc_iterate_filter
,
15053 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15061 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
15063 minor_t minor
= getminor(dev
);
15064 dtrace_state_t
*state
;
15066 #if !defined(__APPLE__)
15067 if (minor
== DTRACEMNRN_HELPER
)
15070 /* Darwin puts Helper on its own major device. */
15071 #endif /* __APPLE__ */
15073 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15075 lck_mtx_lock(&cpu_lock
);
15076 lck_mtx_lock(&dtrace_lock
);
15078 if (state
->dts_anon
) {
15080 * There is anonymous state. Destroy that first.
15082 ASSERT(dtrace_anon
.dta_state
== NULL
);
15083 dtrace_state_destroy(state
->dts_anon
);
15086 dtrace_state_destroy(state
);
15087 ASSERT(dtrace_opens
> 0);
15088 if (--dtrace_opens
== 0)
15089 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15091 lck_mtx_unlock(&dtrace_lock
);
15092 lck_mtx_unlock(&cpu_lock
);
15094 #if defined(__APPLE__)
15097 * Lock ordering requires the dof mode lock be taken before
15100 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15101 lck_mtx_lock(&dtrace_lock
);
15104 * If we are currently lazy-off, and this is the last close, transition to
15107 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
&& dtrace_opens
== 0) {
15108 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
15111 lck_mtx_unlock(&dtrace_lock
);
15112 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15118 #if defined(__APPLE__)
15120 * Introduce cast to quiet warnings.
15121 * XXX: This hides a lot of brokenness.
15123 #define copyin(src, dst, len) copyin( (user_addr_t)(src), (dst), (len) )
15124 #define copyout(src, dst, len) copyout( (src), (user_addr_t)(dst), (len) )
15125 #endif /* __APPLE__ */
15127 #if defined(__APPLE__)
15130 dtrace_ioctl_helper(int cmd
, caddr_t arg
, int *rv
)
15134 * Safe to check this outside the dof mode lock
15136 if (dtrace_dof_mode
== DTRACE_DOF_MODE_NEVER
)
15137 return KERN_SUCCESS
;
15140 case DTRACEHIOC_ADDDOF
: {
15141 dof_helper_t
*dhp
= NULL
;
15142 size_t dof_ioctl_data_size
;
15143 dof_ioctl_data_t
* multi_dof
;
15146 user_addr_t user_address
= *(user_addr_t
*)arg
;
15147 uint64_t dof_count
;
15148 int multi_dof_claimed
= 0;
15149 proc_t
* p
= current_proc();
15152 * Read the number of DOF sections being passed in.
15154 if (copyin(user_address
+ offsetof(dof_ioctl_data_t
, dofiod_count
),
15156 sizeof(dof_count
))) {
15157 dtrace_dof_error(NULL
, "failed to copyin dofiod_count");
15162 * Range check the count.
15164 if (dof_count
== 0 || dof_count
> 1024) {
15165 dtrace_dof_error(NULL
, "dofiod_count is not valid");
15170 * Allocate a correctly sized structure and copyin the data.
15172 dof_ioctl_data_size
= DOF_IOCTL_DATA_T_SIZE(dof_count
);
15173 if ((multi_dof
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)
15176 /* NOTE! We can no longer exit this method via return */
15177 if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) {
15178 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t");
15184 * Check that the count didn't change between the first copyin and the second.
15186 if (multi_dof
->dofiod_count
!= dof_count
) {
15192 * Try to process lazily first.
15194 rval
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
);
15197 * If rval is EACCES, we must be non-lazy.
15199 if (rval
== EACCES
) {
15202 * Process each dof_helper_t
15206 dhp
= &multi_dof
->dofiod_helpers
[i
];
15208 dof_hdr_t
*dof
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
);
15211 lck_mtx_lock(&dtrace_lock
);
15214 * dtrace_helper_slurp() takes responsibility for the dof --
15215 * it may free it now or it may save it and free it later.
15217 if ((dhp
->dofhp_dof
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) {
15221 lck_mtx_unlock(&dtrace_lock
);
15223 } while (++i
< multi_dof
->dofiod_count
&& rval
== 0);
15227 * We need to copyout the multi_dof struct, because it contains
15228 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
15230 * This could certainly be better optimized.
15232 if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) {
15233 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t");
15234 /* Don't overwrite pre-existing error code */
15235 if (rval
== 0) rval
= EFAULT
;
15240 * If we had to allocate struct memory, free it.
15242 if (multi_dof
!= NULL
&& !multi_dof_claimed
) {
15243 kmem_free(multi_dof
, dof_ioctl_data_size
);
15249 case DTRACEHIOC_REMOVE
: {
15250 int generation
= *(int*)arg
;
15251 proc_t
* p
= current_proc();
15256 int rval
= dtrace_lazy_dofs_remove(p
, generation
);
15259 * EACCES means non-lazy
15261 if (rval
== EACCES
) {
15262 lck_mtx_lock(&dtrace_lock
);
15263 rval
= dtrace_helper_destroygen(p
, generation
);
15264 lck_mtx_unlock(&dtrace_lock
);
15276 #endif /* __APPLE__ */
15280 dtrace_ioctl(dev_t dev
, int cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
15282 minor_t minor
= getminor(dev
);
15283 dtrace_state_t
*state
;
15286 #if !defined(__APPLE__)
15287 if (minor
== DTRACEMNRN_HELPER
)
15288 return (dtrace_ioctl_helper(cmd
, arg
, rv
));
15290 /* Darwin puts Helper on its own major device. */
15291 #endif /* __APPLE__ */
15293 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15295 if (state
->dts_anon
) {
15296 ASSERT(dtrace_anon
.dta_state
== NULL
);
15297 state
= state
->dts_anon
;
15301 case DTRACEIOC_PROVIDER
: {
15302 dtrace_providerdesc_t pvd
;
15303 dtrace_provider_t
*pvp
;
15305 if (copyin((void *)arg
, &pvd
, sizeof (pvd
)) != 0)
15308 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
15309 lck_mtx_lock(&dtrace_provider_lock
);
15311 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
15312 if (strcmp(pvp
->dtpv_name
, pvd
.dtvd_name
) == 0)
15316 lck_mtx_unlock(&dtrace_provider_lock
);
15321 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
15322 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
15323 if (copyout(&pvd
, (void *)arg
, sizeof (pvd
)) != 0)
15329 case DTRACEIOC_EPROBE
: {
15330 dtrace_eprobedesc_t epdesc
;
15332 dtrace_action_t
*act
;
15338 if (copyin((void *)arg
, &epdesc
, sizeof (epdesc
)) != 0)
15341 lck_mtx_lock(&dtrace_lock
);
15343 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
15344 lck_mtx_unlock(&dtrace_lock
);
15348 if (ecb
->dte_probe
== NULL
) {
15349 lck_mtx_unlock(&dtrace_lock
);
15353 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
15354 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
15355 epdesc
.dtepd_size
= ecb
->dte_size
;
15357 nrecs
= epdesc
.dtepd_nrecs
;
15358 epdesc
.dtepd_nrecs
= 0;
15359 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15360 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15363 epdesc
.dtepd_nrecs
++;
15367 * Now that we have the size, we need to allocate a temporary
15368 * buffer in which to store the complete description. We need
15369 * the temporary buffer to be able to drop dtrace_lock()
15370 * across the copyout(), below.
15372 size
= sizeof (dtrace_eprobedesc_t
) +
15373 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
15375 buf
= kmem_alloc(size
, KM_SLEEP
);
15376 dest
= (uintptr_t)buf
;
15378 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
15379 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
15381 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
15382 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
15388 bcopy(&act
->dta_rec
, (void *)dest
,
15389 sizeof (dtrace_recdesc_t
));
15390 dest
+= sizeof (dtrace_recdesc_t
);
15393 lck_mtx_unlock(&dtrace_lock
);
15395 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15396 kmem_free(buf
, size
);
15400 kmem_free(buf
, size
);
15404 case DTRACEIOC_AGGDESC
: {
15405 dtrace_aggdesc_t aggdesc
;
15406 dtrace_action_t
*act
;
15407 dtrace_aggregation_t
*agg
;
15410 dtrace_recdesc_t
*lrec
;
15415 if (copyin((void *)arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
15418 lck_mtx_lock(&dtrace_lock
);
15420 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
15421 lck_mtx_unlock(&dtrace_lock
);
15425 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
15427 nrecs
= aggdesc
.dtagd_nrecs
;
15428 aggdesc
.dtagd_nrecs
= 0;
15430 offs
= agg
->dtag_base
;
15431 lrec
= &agg
->dtag_action
.dta_rec
;
15432 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
15434 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15435 ASSERT(act
->dta_intuple
||
15436 DTRACEACT_ISAGG(act
->dta_kind
));
15439 * If this action has a record size of zero, it
15440 * denotes an argument to the aggregating action.
15441 * Because the presence of this record doesn't (or
15442 * shouldn't) affect the way the data is interpreted,
15443 * we don't copy it out to save user-level the
15444 * confusion of dealing with a zero-length record.
15446 if (act
->dta_rec
.dtrd_size
== 0) {
15447 ASSERT(agg
->dtag_hasarg
);
15451 aggdesc
.dtagd_nrecs
++;
15453 if (act
== &agg
->dtag_action
)
15458 * Now that we have the size, we need to allocate a temporary
15459 * buffer in which to store the complete description. We need
15460 * the temporary buffer to be able to drop dtrace_lock()
15461 * across the copyout(), below.
15463 size
= sizeof (dtrace_aggdesc_t
) +
15464 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
15466 buf
= kmem_alloc(size
, KM_SLEEP
);
15467 dest
= (uintptr_t)buf
;
15469 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
15470 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
15472 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
15473 dtrace_recdesc_t rec
= act
->dta_rec
;
15476 * See the comment in the above loop for why we pass
15477 * over zero-length records.
15479 if (rec
.dtrd_size
== 0) {
15480 ASSERT(agg
->dtag_hasarg
);
15487 rec
.dtrd_offset
-= offs
;
15488 bcopy(&rec
, (void *)dest
, sizeof (rec
));
15489 dest
+= sizeof (dtrace_recdesc_t
);
15491 if (act
== &agg
->dtag_action
)
15495 lck_mtx_unlock(&dtrace_lock
);
15497 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
15498 kmem_free(buf
, size
);
15502 kmem_free(buf
, size
);
15506 case DTRACEIOC_ENABLE
: {
15508 dtrace_enabling_t
*enab
= NULL
;
15509 dtrace_vstate_t
*vstate
;
15515 * If a NULL argument has been passed, we take this as our
15516 * cue to reevaluate our enablings.
15519 lck_mtx_lock(&cpu_lock
);
15520 lck_mtx_lock(&dtrace_lock
);
15521 err
= dtrace_enabling_matchstate(state
, rv
);
15522 lck_mtx_unlock(&dtrace_lock
);
15523 lck_mtx_unlock(&cpu_lock
);
15528 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
15531 lck_mtx_lock(&cpu_lock
);
15532 lck_mtx_lock(&dtrace_lock
);
15533 vstate
= &state
->dts_vstate
;
15535 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
15536 lck_mtx_unlock(&dtrace_lock
);
15537 lck_mtx_unlock(&cpu_lock
);
15538 dtrace_dof_destroy(dof
);
15542 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
15543 lck_mtx_unlock(&dtrace_lock
);
15544 lck_mtx_unlock(&cpu_lock
);
15545 dtrace_dof_destroy(dof
);
15549 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
15550 dtrace_enabling_destroy(enab
);
15551 lck_mtx_unlock(&dtrace_lock
);
15552 lck_mtx_unlock(&cpu_lock
);
15553 dtrace_dof_destroy(dof
);
15557 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
15558 err
= dtrace_enabling_retain(enab
);
15560 dtrace_enabling_destroy(enab
);
15563 lck_mtx_unlock(&cpu_lock
);
15564 lck_mtx_unlock(&dtrace_lock
);
15565 dtrace_dof_destroy(dof
);
15570 case DTRACEIOC_REPLICATE
: {
15571 dtrace_repldesc_t desc
;
15572 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
15573 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
15576 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15579 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15580 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15581 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15582 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15584 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15585 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15586 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15587 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15589 lck_mtx_lock(&dtrace_lock
);
15590 err
= dtrace_enabling_replicate(state
, match
, create
);
15591 lck_mtx_unlock(&dtrace_lock
);
15596 case DTRACEIOC_PROBEMATCH
:
15597 case DTRACEIOC_PROBES
: {
15598 dtrace_probe_t
*probe
= NULL
;
15599 dtrace_probedesc_t desc
;
15600 dtrace_probekey_t pkey
;
15607 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15610 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
15611 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
15612 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
15613 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
15616 * Before we attempt to match this probe, we want to give
15617 * all providers the opportunity to provide it.
15619 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
15620 lck_mtx_lock(&dtrace_provider_lock
);
15621 dtrace_probe_provide(&desc
, NULL
);
15622 lck_mtx_unlock(&dtrace_provider_lock
);
15626 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15627 dtrace_probekey(&desc
, &pkey
);
15628 pkey
.dtpk_id
= DTRACE_IDNONE
;
15631 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
15633 lck_mtx_lock(&dtrace_lock
);
15635 if (cmd
== DTRACEIOC_PROBEMATCH
) {
15636 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15637 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15638 (m
= dtrace_match_probe(probe
, &pkey
,
15639 priv
, uid
, zoneid
)) != 0)
15644 lck_mtx_unlock(&dtrace_lock
);
15649 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
15650 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
15651 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
15656 if (probe
== NULL
) {
15657 lck_mtx_unlock(&dtrace_lock
);
15661 dtrace_probe_description(probe
, &desc
);
15662 lck_mtx_unlock(&dtrace_lock
);
15664 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15670 case DTRACEIOC_PROBEARG
: {
15671 dtrace_argdesc_t desc
;
15672 dtrace_probe_t
*probe
;
15673 dtrace_provider_t
*prov
;
15675 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15678 if (desc
.dtargd_id
== DTRACE_IDNONE
)
15681 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
15684 lck_mtx_lock(&dtrace_provider_lock
);
15685 lck_mtx_lock(&mod_lock
);
15686 lck_mtx_lock(&dtrace_lock
);
15688 if (desc
.dtargd_id
> dtrace_nprobes
) {
15689 lck_mtx_unlock(&dtrace_lock
);
15690 lck_mtx_unlock(&mod_lock
);
15691 lck_mtx_unlock(&dtrace_provider_lock
);
15695 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
15696 lck_mtx_unlock(&dtrace_lock
);
15697 lck_mtx_unlock(&mod_lock
);
15698 lck_mtx_unlock(&dtrace_provider_lock
);
15702 lck_mtx_unlock(&dtrace_lock
);
15704 prov
= probe
->dtpr_provider
;
15706 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
15708 * There isn't any typed information for this probe.
15709 * Set the argument number to DTRACE_ARGNONE.
15711 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
15713 desc
.dtargd_native
[0] = '\0';
15714 desc
.dtargd_xlate
[0] = '\0';
15715 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
15717 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
15718 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
15721 lck_mtx_unlock(&mod_lock
);
15722 lck_mtx_unlock(&dtrace_provider_lock
);
15724 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15730 case DTRACEIOC_GO
: {
15731 processorid_t cpuid
;
15732 rval
= dtrace_state_go(state
, &cpuid
);
15737 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15743 case DTRACEIOC_STOP
: {
15744 processorid_t cpuid
;
15746 lck_mtx_lock(&dtrace_lock
);
15747 rval
= dtrace_state_stop(state
, &cpuid
);
15748 lck_mtx_unlock(&dtrace_lock
);
15753 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
15759 case DTRACEIOC_DOFGET
: {
15760 dof_hdr_t hdr
, *dof
;
15763 if (copyin((void *)arg
, &hdr
, sizeof (hdr
)) != 0)
15766 lck_mtx_lock(&dtrace_lock
);
15767 dof
= dtrace_dof_create(state
);
15768 lck_mtx_unlock(&dtrace_lock
);
15770 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
15771 rval
= copyout(dof
, (void *)arg
, len
);
15772 dtrace_dof_destroy(dof
);
15774 return (rval
== 0 ? 0 : EFAULT
);
15777 case DTRACEIOC_AGGSNAP
:
15778 case DTRACEIOC_BUFSNAP
: {
15779 dtrace_bufdesc_t desc
;
15781 dtrace_buffer_t
*buf
;
15783 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
15786 if (desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= NCPU
)
15789 lck_mtx_lock(&dtrace_lock
);
15791 if (cmd
== DTRACEIOC_BUFSNAP
) {
15792 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
15794 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
15797 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
15798 size_t sz
= buf
->dtb_offset
;
15800 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
15801 lck_mtx_unlock(&dtrace_lock
);
15806 * If this buffer has already been consumed, we're
15807 * going to indicate that there's nothing left here
15810 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
15811 lck_mtx_unlock(&dtrace_lock
);
15813 desc
.dtbd_size
= 0;
15814 desc
.dtbd_drops
= 0;
15815 desc
.dtbd_errors
= 0;
15816 desc
.dtbd_oldest
= 0;
15817 sz
= sizeof (desc
);
15819 if (copyout(&desc
, (void *)arg
, sz
) != 0)
15826 * If this is a ring buffer that has wrapped, we want
15827 * to copy the whole thing out.
15829 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
15830 dtrace_buffer_polish(buf
);
15831 sz
= buf
->dtb_size
;
15834 if (copyout(buf
->dtb_tomax
, desc
.dtbd_data
, sz
) != 0) {
15835 lck_mtx_unlock(&dtrace_lock
);
15839 desc
.dtbd_size
= sz
;
15840 desc
.dtbd_drops
= buf
->dtb_drops
;
15841 desc
.dtbd_errors
= buf
->dtb_errors
;
15842 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
15844 lck_mtx_unlock(&dtrace_lock
);
15846 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15849 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
15854 if (buf
->dtb_tomax
== NULL
) {
15855 ASSERT(buf
->dtb_xamot
== NULL
);
15856 lck_mtx_unlock(&dtrace_lock
);
15860 cached
= buf
->dtb_tomax
;
15861 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
15863 dtrace_xcall(desc
.dtbd_cpu
,
15864 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
15866 state
->dts_errors
+= buf
->dtb_xamot_errors
;
15869 * If the buffers did not actually switch, then the cross call
15870 * did not take place -- presumably because the given CPU is
15871 * not in the ready set. If this is the case, we'll return
15874 if (buf
->dtb_tomax
== cached
) {
15875 ASSERT(buf
->dtb_xamot
!= cached
);
15876 lck_mtx_unlock(&dtrace_lock
);
15880 ASSERT(cached
== buf
->dtb_xamot
);
15883 * We have our snapshot; now copy it out.
15885 if (copyout(buf
->dtb_xamot
, desc
.dtbd_data
,
15886 buf
->dtb_xamot_offset
) != 0) {
15887 lck_mtx_unlock(&dtrace_lock
);
15891 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
15892 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
15893 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
15894 desc
.dtbd_oldest
= 0;
15896 lck_mtx_unlock(&dtrace_lock
);
15899 * Finally, copy out the buffer description.
15901 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
15907 case DTRACEIOC_CONF
: {
15908 dtrace_conf_t conf
;
15910 bzero(&conf
, sizeof (conf
));
15911 conf
.dtc_difversion
= DIF_VERSION
;
15912 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
15913 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
15914 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
15916 if (copyout(&conf
, (void *)arg
, sizeof (conf
)) != 0)
15922 case DTRACEIOC_STATUS
: {
15923 dtrace_status_t stat
;
15924 dtrace_dstate_t
*dstate
;
15929 * See the comment in dtrace_state_deadman() for the reason
15930 * for setting dts_laststatus to INT64_MAX before setting
15931 * it to the correct value.
15933 state
->dts_laststatus
= INT64_MAX
;
15934 dtrace_membar_producer();
15935 state
->dts_laststatus
= dtrace_gethrtime();
15937 bzero(&stat
, sizeof (stat
));
15939 lck_mtx_lock(&dtrace_lock
);
15941 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
15942 lck_mtx_unlock(&dtrace_lock
);
15946 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
15947 stat
.dtst_exiting
= 1;
15949 nerrs
= state
->dts_errors
;
15950 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
15952 for (i
= 0; i
< NCPU
; i
++) {
15953 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
15955 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
15956 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
15957 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
15959 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
15960 stat
.dtst_filled
++;
15962 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
15964 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
15965 dtrace_speculation_t
*spec
;
15966 dtrace_buffer_t
*buf
;
15968 spec
= &state
->dts_speculations
[j
];
15969 buf
= &spec
->dtsp_buffer
[i
];
15970 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
15974 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
15975 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
15976 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
15977 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
15979 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
15980 stat
.dtst_errors
= nerrs
;
15982 lck_mtx_unlock(&dtrace_lock
);
15984 if (copyout(&stat
, (void *)arg
, sizeof (stat
)) != 0)
15990 case DTRACEIOC_FORMAT
: {
15991 dtrace_fmtdesc_t fmt
;
15995 if (copyin((void *)arg
, &fmt
, sizeof (fmt
)) != 0)
15998 lck_mtx_lock(&dtrace_lock
);
16000 if (fmt
.dtfd_format
== 0 ||
16001 fmt
.dtfd_format
> state
->dts_nformats
) {
16002 lck_mtx_unlock(&dtrace_lock
);
16007 * Format strings are allocated contiguously and they are
16008 * never freed; if a format index is less than the number
16009 * of formats, we can assert that the format map is non-NULL
16010 * and that the format for the specified index is non-NULL.
16012 ASSERT(state
->dts_formats
!= NULL
);
16013 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
16014 ASSERT(str
!= NULL
);
16016 len
= strlen(str
) + 1;
16018 if (len
> fmt
.dtfd_length
) {
16019 fmt
.dtfd_length
= len
;
16021 if (copyout(&fmt
, (void *)arg
, sizeof (fmt
)) != 0) {
16022 lck_mtx_unlock(&dtrace_lock
);
16026 if (copyout(str
, fmt
.dtfd_string
, len
) != 0) {
16027 lck_mtx_unlock(&dtrace_lock
);
16032 lck_mtx_unlock(&dtrace_lock
);
16043 #if defined(__APPLE__)
16046 #endif /* __APPLE__ */
16048 #if !defined(__APPLE__)
16051 dtrace_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
16053 dtrace_state_t
*state
;
16060 return (DDI_SUCCESS
);
16063 return (DDI_FAILURE
);
16066 lck_mtx_lock(&cpu_lock
);
16067 lck_mtx_lock(&dtrace_provider_lock
);
16068 lck_mtx_lock(&dtrace_lock
);
16070 ASSERT(dtrace_opens
== 0);
16072 if (dtrace_helpers
> 0) {
16073 lck_mtx_unlock(&dtrace_provider_lock
);
16074 lck_mtx_unlock(&dtrace_lock
);
16075 lck_mtx_unlock(&cpu_lock
);
16076 return (DDI_FAILURE
);
16079 if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) {
16080 lck_mtx_unlock(&dtrace_provider_lock
);
16081 lck_mtx_unlock(&dtrace_lock
);
16082 lck_mtx_unlock(&cpu_lock
);
16083 return (DDI_FAILURE
);
16086 dtrace_provider
= NULL
;
16088 if ((state
= dtrace_anon_grab()) != NULL
) {
16090 * If there were ECBs on this state, the provider should
16091 * have not been allowed to detach; assert that there is
16094 ASSERT(state
->dts_necbs
== 0);
16095 dtrace_state_destroy(state
);
16098 * If we're being detached with anonymous state, we need to
16099 * indicate to the kernel debugger that DTrace is now inactive.
16101 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
16104 bzero(&dtrace_anon
, sizeof (dtrace_anon_t
));
16105 unregister_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
16106 dtrace_cpu_init
= NULL
;
16107 dtrace_helpers_cleanup
= NULL
;
16108 dtrace_helpers_fork
= NULL
;
16109 dtrace_cpustart_init
= NULL
;
16110 dtrace_cpustart_fini
= NULL
;
16111 dtrace_debugger_init
= NULL
;
16112 dtrace_debugger_fini
= NULL
;
16113 dtrace_kreloc_init
= NULL
;
16114 dtrace_kreloc_fini
= NULL
;
16115 dtrace_modload
= NULL
;
16116 dtrace_modunload
= NULL
;
16118 lck_mtx_unlock(&cpu_lock
);
16120 if (dtrace_helptrace_enabled
) {
16121 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
);
16122 dtrace_helptrace_buffer
= NULL
;
16125 kmem_free(dtrace_probes
, dtrace_nprobes
* sizeof (dtrace_probe_t
*));
16126 dtrace_probes
= NULL
;
16127 dtrace_nprobes
= 0;
16129 dtrace_hash_destroy(dtrace_bymod
);
16130 dtrace_hash_destroy(dtrace_byfunc
);
16131 dtrace_hash_destroy(dtrace_byname
);
16132 dtrace_bymod
= NULL
;
16133 dtrace_byfunc
= NULL
;
16134 dtrace_byname
= NULL
;
16136 kmem_cache_destroy(dtrace_state_cache
);
16137 vmem_destroy(dtrace_minor
);
16138 vmem_destroy(dtrace_arena
);
16140 if (dtrace_toxrange
!= NULL
) {
16141 kmem_free(dtrace_toxrange
,
16142 dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
));
16143 dtrace_toxrange
= NULL
;
16144 dtrace_toxranges
= 0;
16145 dtrace_toxranges_max
= 0;
16148 ddi_remove_minor_node(dtrace_devi
, NULL
);
16149 dtrace_devi
= NULL
;
16151 ddi_soft_state_fini(&dtrace_softstate
);
16153 ASSERT(dtrace_vtime_references
== 0);
16154 ASSERT(dtrace_opens
== 0);
16155 ASSERT(dtrace_retained
== NULL
);
16157 lck_mtx_unlock(&dtrace_lock
);
16158 lck_mtx_unlock(&dtrace_provider_lock
);
16161 * We don't destroy the task queue until after we have dropped our
16162 * locks (taskq_destroy() may block on running tasks). To prevent
16163 * attempting to do work after we have effectively detached but before
16164 * the task queue has been destroyed, all tasks dispatched via the
16165 * task queue must check that DTrace is still attached before
16166 * performing any operation.
16168 taskq_destroy(dtrace_taskq
);
16169 dtrace_taskq
= NULL
;
16171 return (DDI_SUCCESS
);
16176 dtrace_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
16181 case DDI_INFO_DEVT2DEVINFO
:
16182 *result
= (void *)dtrace_devi
;
16183 error
= DDI_SUCCESS
;
16185 case DDI_INFO_DEVT2INSTANCE
:
16186 *result
= (void *)0;
16187 error
= DDI_SUCCESS
;
16190 error
= DDI_FAILURE
;
16195 static struct cb_ops dtrace_cb_ops
= {
16196 dtrace_open
, /* open */
16197 dtrace_close
, /* close */
16198 nulldev
, /* strategy */
16199 nulldev
, /* print */
16203 dtrace_ioctl
, /* ioctl */
16204 nodev
, /* devmap */
16206 nodev
, /* segmap */
16207 nochpoll
, /* poll */
16208 ddi_prop_op
, /* cb_prop_op */
16210 D_NEW
| D_MP
/* Driver compatibility flag */
16213 static struct dev_ops dtrace_ops
= {
16214 DEVO_REV
, /* devo_rev */
16216 dtrace_info
, /* get_dev_info */
16217 nulldev
, /* identify */
16218 nulldev
, /* probe */
16219 dtrace_attach
, /* attach */
16220 dtrace_detach
, /* detach */
16222 &dtrace_cb_ops
, /* driver operations */
16223 NULL
, /* bus operations */
16224 nodev
/* dev power */
16227 static struct modldrv modldrv
= {
16228 &mod_driverops
, /* module type (this is a pseudo driver) */
16229 "Dynamic Tracing", /* name of module */
16230 &dtrace_ops
, /* driver ops */
16233 static struct modlinkage modlinkage
= {
16242 return (mod_install(&modlinkage
));
16246 _info(struct modinfo
*modinfop
)
16248 return (mod_info(&modlinkage
, modinfop
));
16254 return (mod_remove(&modlinkage
));
16258 d_open_t _dtrace_open
, helper_open
;
16259 d_close_t _dtrace_close
, helper_close
;
16260 d_ioctl_t _dtrace_ioctl
, helper_ioctl
;
16263 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16266 dev_t locdev
= dev
;
16268 return dtrace_open( &locdev
, flags
, devtype
, CRED());
16272 helper_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16274 #pragma unused(dev,flags,devtype,p)
16279 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16282 return dtrace_close( dev
, flags
, devtype
, CRED());
16286 helper_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
16288 #pragma unused(dev,flags,devtype,p)
16293 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
16298 err
= dtrace_ioctl(dev
, (int)cmd
, *(intptr_t *)data
, fflag
, CRED(), &rv
);
16300 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
16302 ASSERT( (err
& 0xfffff000) == 0 );
16303 return (err
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */
16304 } else if (rv
!= 0) {
16305 ASSERT( (rv
& 0xfff00000) == 0 );
16306 return (((rv
& 0xfffff) << 12)); /* ioctl returns -1 and errno set to a return value >= 4096 */
16312 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
16314 #pragma unused(dev,fflag,p)
16317 err
= dtrace_ioctl_helper((int)cmd
, data
, &rv
);
16318 /* XXX Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
16320 ASSERT( (err
& 0xfffff000) == 0 );
16321 return (err
& 0xfff); /* ioctl returns -1 and errno set to an error code < 4096 */
16322 } else if (rv
!= 0) {
16323 ASSERT( (rv
& 0xfff00000) == 0 );
16324 return (((rv
& 0xfffff) << 20)); /* ioctl returns -1 and errno set to a return value >= 4096 */
16329 #define HELPER_MAJOR -24 /* let the kernel pick the device number */
16332 * A struct describing which functions will get invoked for certain
16335 static struct cdevsw helper_cdevsw
=
16337 helper_open
, /* open */
16338 helper_close
, /* close */
16339 eno_rdwrt
, /* read */
16340 eno_rdwrt
, /* write */
16341 helper_ioctl
, /* ioctl */
16342 (stop_fcn_t
*)nulldev
, /* stop */
16343 (reset_fcn_t
*)nulldev
, /* reset */
16345 eno_select
, /* select */
16346 eno_mmap
, /* mmap */
16347 eno_strat
, /* strategy */
16348 eno_getc
, /* getc */
16349 eno_putc
, /* putc */
16353 static int helper_majdevno
= 0;
16355 static int gDTraceInited
= 0;
16358 helper_init( void )
16361 * Once the "helper" is initialized, it can take ioctl calls that use locks
16362 * and zones initialized in dtrace_init. Make certain dtrace_init was called
16366 if (!gDTraceInited
) {
16367 panic("helper_init before dtrace_init\n");
16370 if (0 >= helper_majdevno
)
16372 helper_majdevno
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
);
16374 if (helper_majdevno
< 0) {
16375 printf("helper_init: failed to allocate a major number!\n");
16379 if (NULL
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
16380 DTRACEMNR_HELPER
, 0 )) {
16381 printf("dtrace_init: failed to devfs_make_node for helper!\n");
16385 panic("helper_init: called twice!\n");
16388 #undef HELPER_MAJOR
16391 * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected.
16394 dtrace_clone_func(dev_t dev
, int action
)
16396 #pragma unused(dev)
16398 if (action
== DEVFS_CLONE_ALLOC
) {
16399 if (NULL
== dtrace_minor
) /* Arena not created yet!?! */
16403 * Propose a minor number, namely the next number that vmem_alloc() will return.
16404 * Immediately put it back in play by calling vmem_free().
16406 int ret
= (int)(uintptr_t)vmem_alloc(dtrace_minor
, 1, VM_BESTFIT
| VM_SLEEP
);
16408 vmem_free(dtrace_minor
, (void *)(uintptr_t)ret
, 1);
16413 else if (action
== DEVFS_CLONE_FREE
) {
16419 #define DTRACE_MAJOR -24 /* let the kernel pick the device number */
16421 static struct cdevsw dtrace_cdevsw
=
16423 _dtrace_open
, /* open */
16424 _dtrace_close
, /* close */
16425 eno_rdwrt
, /* read */
16426 eno_rdwrt
, /* write */
16427 _dtrace_ioctl
, /* ioctl */
16428 (stop_fcn_t
*)nulldev
, /* stop */
16429 (reset_fcn_t
*)nulldev
, /* reset */
16431 eno_select
, /* select */
16432 eno_mmap
, /* mmap */
16433 eno_strat
, /* strategy */
16434 eno_getc
, /* getc */
16435 eno_putc
, /* putc */
16439 lck_attr_t
* dtrace_lck_attr
;
16440 lck_grp_attr_t
* dtrace_lck_grp_attr
;
16441 lck_grp_t
* dtrace_lck_grp
;
16443 static int gMajDevNo
;
16446 dtrace_init( void )
16448 if (0 == gDTraceInited
) {
16449 int i
, ncpu
= NCPU
;
16451 gMajDevNo
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
);
16453 if (gMajDevNo
< 0) {
16454 printf("dtrace_init: failed to allocate a major number!\n");
16459 if (NULL
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
16460 dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) {
16461 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
16466 #if defined(DTRACE_MEMORY_ZONES)
16469 * Initialize the dtrace kalloc-emulation zones.
16471 dtrace_alloc_init();
16473 #endif /* DTRACE_MEMORY_ZONES */
16476 * Allocate the dtrace_probe_t zone
16478 dtrace_probe_t_zone
= zinit(sizeof(dtrace_probe_t
),
16479 1024 * sizeof(dtrace_probe_t
),
16480 sizeof(dtrace_probe_t
),
16481 "dtrace.dtrace_probe_t");
16484 * Create the dtrace lock group and attrs.
16486 dtrace_lck_attr
= lck_attr_alloc_init();
16487 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();
16488 dtrace_lck_grp
= lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr
);
16491 * We have to initialize all locks explicitly
16493 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16494 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16495 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16497 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
);
16499 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16502 * The cpu_core structure consists of per-CPU state available in any context.
16503 * On some architectures, this may mean that the page(s) containing the
16504 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
16505 * is up to the platform to assure that this is performed properly. Note that
16506 * the structure is sized to avoid false sharing.
16508 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16509 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16511 cpu_core
= (cpu_core_t
*)kmem_zalloc( ncpu
* sizeof(cpu_core_t
), KM_SLEEP
);
16512 for (i
= 0; i
< ncpu
; ++i
) {
16513 lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16516 cpu_list
= (cpu_t
*)kmem_zalloc( ncpu
* sizeof(cpu_t
), KM_SLEEP
);
16517 for (i
= 0; i
< ncpu
; ++i
) {
16518 cpu_list
[i
].cpu_id
= (processorid_t
)i
;
16519 cpu_list
[i
].cpu_next
= &(cpu_list
[(i
+1) % ncpu
]);
16520 lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
16523 lck_mtx_lock(&cpu_lock
);
16524 for (i
= 0; i
< ncpu
; ++i
)
16525 dtrace_cpu_setup_initial( (processorid_t
)i
); /* In lieu of register_cpu_setup_func() callback */
16526 lck_mtx_unlock(&cpu_lock
);
16528 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
16531 * See dtrace_impl.h for a description of dof modes.
16532 * The default is lazy dof.
16534 * XXX Warn if state is LAZY_OFF? It won't break anything, but
16535 * makes no sense...
16537 if (!PE_parse_boot_arg("dtrace_dof_mode", &dtrace_dof_mode
)) {
16538 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
16542 * Sanity check of dof mode value.
16544 switch (dtrace_dof_mode
) {
16545 case DTRACE_DOF_MODE_NEVER
:
16546 case DTRACE_DOF_MODE_LAZY_ON
:
16547 /* valid modes, but nothing else we need to do */
16550 case DTRACE_DOF_MODE_LAZY_OFF
:
16551 case DTRACE_DOF_MODE_NON_LAZY
:
16552 /* Cannot wait for a dtrace_open to init fasttrap */
16557 /* Invalid, clamp to non lazy */
16558 dtrace_dof_mode
= DTRACE_DOF_MODE_NON_LAZY
;
16566 panic("dtrace_init: called twice!\n");
16570 dtrace_postinit(void)
16572 dtrace_attach( (dev_info_t
*)makedev(gMajDevNo
, 0), 0 );
16574 #undef DTRACE_MAJOR
16577 * Routines used to register interest in cpu's being added to or removed
16581 register_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
16583 #pragma unused(ignore1,ignore2)
16587 unregister_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
16589 #pragma unused(ignore1,ignore2)
16591 #endif /* __APPLE__ */