4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Portions copyright (c) 2011, Joyent, Inc. All rights reserved.
27 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
31 /* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */
34 * DTrace - Dynamic Tracing for Solaris
36 * This is the implementation of the Solaris Dynamic Tracing framework
37 * (DTrace). The user-visible interface to DTrace is described at length in
38 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
39 * library, the in-kernel DTrace framework, and the DTrace providers are
40 * described in the block comments in the <sys/dtrace.h> header file. The
41 * internal architecture of DTrace is described in the block comments in the
42 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
43 * implementation very much assume mastery of all of these sources; if one has
44 * an unanswered question about the implementation, one should consult them
47 * The functions here are ordered roughly as follows:
49 * - Probe context functions
50 * - Probe hashing functions
51 * - Non-probe context utility functions
52 * - Matching functions
53 * - Provider-to-Framework API functions
54 * - Probe management functions
55 * - DIF object functions
57 * - Predicate functions
60 * - Enabling functions
62 * - Anonymous enabling functions
63 * - Consumer state functions
66 * - Driver cookbook functions
68 * Each group of functions begins with a block comment labelled the "DTrace
69 * [Group] Functions", allowing one to find each block by searching forward
70 * on capital-f functions.
72 #if !defined(__APPLE__)
73 #include <sys/errno.h>
75 #include <sys/modctl.h>
77 #include <sys/systm.h>
79 #include <sys/sunddi.h>
80 #include <sys/cpuvar.h>
82 #include <sys/strsubr.h>
83 #include <sys/sysmacros.h>
84 #include <sys/dtrace_impl.h>
85 #include <sys/atomic.h>
86 #include <sys/cmn_err.h>
87 #include <sys/mutex_impl.h>
88 #include <sys/rwlock_impl.h>
89 #include <sys/ctf_api.h>
90 #include <sys/panic.h>
91 #include <sys/priv_impl.h>
92 #include <sys/policy.h>
93 #include <sys/cred_impl.h>
94 #include <sys/procfs_isa.h>
95 #include <sys/taskq.h>
96 #include <sys/mkdev.h>
100 #include <sys/errno.h>
101 #include <sys/types.h>
102 #include <sys/stat.h>
103 #include <sys/conf.h>
104 #include <sys/systm.h>
105 #include <sys/dtrace_impl.h>
106 #include <sys/param.h>
107 #include <sys/proc_internal.h>
108 #include <sys/ioctl.h>
109 #include <sys/fcntl.h>
110 #include <miscfs/devfs/devfs.h>
111 #include <sys/malloc.h>
112 #include <sys/kernel_types.h>
113 #include <sys/proc_internal.h>
114 #include <sys/uio_internal.h>
115 #include <sys/kauth.h>
117 #include <sys/user.h>
118 #include <mach/exception_types.h>
119 #include <sys/signalvar.h>
120 #include <mach/task.h>
121 #include <kern/zalloc.h>
122 #include <kern/ast.h>
123 #include <netinet/in.h>
125 #if defined(__APPLE__)
126 #include <kern/cpu_data.h>
127 extern uint32_t pmap_find_phys(void *, uint64_t);
128 extern boolean_t
pmap_valid_page(uint32_t);
129 extern void OSKextRegisterKextsWithDTrace(void);
130 extern kmod_info_t g_kernel_kmod_info
;
131 #endif /* __APPLE__ */
134 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
135 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
137 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
139 extern void dtrace_suspend(void);
140 extern void dtrace_resume(void);
141 extern void dtrace_init(void);
142 extern void helper_init(void);
143 extern void fasttrap_init(void);
144 extern void dtrace_lazy_dofs_duplicate(proc_t
*, proc_t
*);
145 extern void dtrace_lazy_dofs_destroy(proc_t
*);
146 extern void dtrace_postinit(void);
148 #include "../../../osfmk/chud/chud_dtrace.h"
150 extern kern_return_t chudxnu_dtrace_callback
151 (uint64_t selector
, uint64_t *args
, uint32_t count
);
153 #endif /* __APPLE__ */
156 * DTrace Tunable Variables
158 * The following variables may be tuned by adding a line to /etc/system that
159 * includes both the name of the DTrace module ("dtrace") and the name of the
160 * variable. For example:
162 * set dtrace:dtrace_destructive_disallow = 1
164 * In general, the only variables that one should be tuning this way are those
165 * that affect system-wide DTrace behavior, and for which the default behavior
166 * is undesirable. Most of these variables are tunable on a per-consumer
167 * basis using DTrace options, and need not be tuned on a system-wide basis.
168 * When tuning these variables, avoid pathological values; while some attempt
169 * is made to verify the integrity of these variables, they are not considered
170 * part of the supported interface to DTrace, and they are therefore not
171 * checked comprehensively. Further, these variables should not be tuned
172 * dynamically via "mdb -kw" or other means; they should only be tuned via
175 int dtrace_destructive_disallow
= 0;
176 dtrace_optval_t dtrace_nonroot_maxsize
= (16 * 1024 * 1024);
177 size_t dtrace_difo_maxsize
= (256 * 1024);
178 dtrace_optval_t dtrace_dof_maxsize
= (384 * 1024);
179 size_t dtrace_global_maxsize
= (16 * 1024);
180 size_t dtrace_actions_max
= (16 * 1024);
181 size_t dtrace_retain_max
= 1024;
182 dtrace_optval_t dtrace_helper_actions_max
= 32;
183 dtrace_optval_t dtrace_helper_providers_max
= 64;
184 dtrace_optval_t dtrace_dstate_defsize
= (1 * 1024 * 1024);
185 size_t dtrace_strsize_default
= 256;
186 dtrace_optval_t dtrace_cleanrate_default
= 990099000; /* 1.1 hz */
187 dtrace_optval_t dtrace_cleanrate_min
= 20000000; /* 50 hz */
188 dtrace_optval_t dtrace_cleanrate_max
= (uint64_t)60 * NANOSEC
; /* 1/minute */
189 dtrace_optval_t dtrace_aggrate_default
= NANOSEC
; /* 1 hz */
190 dtrace_optval_t dtrace_statusrate_default
= NANOSEC
; /* 1 hz */
191 dtrace_optval_t dtrace_statusrate_max
= (hrtime_t
)10 * NANOSEC
; /* 6/minute */
192 dtrace_optval_t dtrace_switchrate_default
= NANOSEC
; /* 1 hz */
193 dtrace_optval_t dtrace_nspec_default
= 1;
194 dtrace_optval_t dtrace_specsize_default
= 32 * 1024;
195 dtrace_optval_t dtrace_stackframes_default
= 20;
196 dtrace_optval_t dtrace_ustackframes_default
= 20;
197 dtrace_optval_t dtrace_jstackframes_default
= 50;
198 dtrace_optval_t dtrace_jstackstrsize_default
= 512;
199 int dtrace_msgdsize_max
= 128;
200 hrtime_t dtrace_chill_max
= 500 * (NANOSEC
/ MILLISEC
); /* 500 ms */
201 hrtime_t dtrace_chill_interval
= NANOSEC
; /* 1000 ms */
202 int dtrace_devdepth_max
= 32;
203 int dtrace_err_verbose
;
204 hrtime_t dtrace_deadman_interval
= NANOSEC
;
205 hrtime_t dtrace_deadman_timeout
= (hrtime_t
)10 * NANOSEC
;
206 hrtime_t dtrace_deadman_user
= (hrtime_t
)30 * NANOSEC
;
209 * DTrace External Variables
211 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
212 * available to DTrace consumers via the backtick (`) syntax. One of these,
213 * dtrace_zero, is made deliberately so: it is provided as a source of
214 * well-known, zero-filled memory. While this variable is not documented,
215 * it is used by some translators as an implementation detail.
217 const char dtrace_zero
[256] = { 0 }; /* zero-filled memory */
218 unsigned int dtrace_max_cpus
= 0; /* number of enabled cpus */
220 * DTrace Internal Variables
222 static dev_info_t
*dtrace_devi
; /* device info */
223 static vmem_t
*dtrace_arena
; /* probe ID arena */
224 static vmem_t
*dtrace_minor
; /* minor number arena */
225 static taskq_t
*dtrace_taskq
; /* task queue */
226 static dtrace_probe_t
**dtrace_probes
; /* array of all probes */
227 static int dtrace_nprobes
; /* number of probes */
228 static dtrace_provider_t
*dtrace_provider
; /* provider list */
229 static dtrace_meta_t
*dtrace_meta_pid
; /* user-land meta provider */
230 static int dtrace_opens
; /* number of opens */
231 static int dtrace_helpers
; /* number of helpers */
232 static void *dtrace_softstate
; /* softstate pointer */
233 static dtrace_hash_t
*dtrace_bymod
; /* probes hashed by module */
234 static dtrace_hash_t
*dtrace_byfunc
; /* probes hashed by function */
235 static dtrace_hash_t
*dtrace_byname
; /* probes hashed by name */
236 static dtrace_toxrange_t
*dtrace_toxrange
; /* toxic range array */
237 static int dtrace_toxranges
; /* number of toxic ranges */
238 static int dtrace_toxranges_max
; /* size of toxic range array */
239 static dtrace_anon_t dtrace_anon
; /* anonymous enabling */
240 static kmem_cache_t
*dtrace_state_cache
; /* cache for dynamic state */
241 static uint64_t dtrace_vtime_references
; /* number of vtimestamp refs */
242 static kthread_t
*dtrace_panicked
; /* panicking thread */
243 static dtrace_ecb_t
*dtrace_ecb_create_cache
; /* cached created ECB */
244 static dtrace_genid_t dtrace_probegen
; /* current probe generation */
245 static dtrace_helpers_t
*dtrace_deferred_pid
; /* deferred helper list */
246 static dtrace_enabling_t
*dtrace_retained
; /* list of retained enablings */
247 static dtrace_genid_t dtrace_retained_gen
; /* current retained enab gen */
248 static dtrace_dynvar_t dtrace_dynhash_sink
; /* end of dynamic hash chains */
249 #if defined(__APPLE__)
250 static int dtrace_dof_mode
; /* See dtrace_impl.h for a description of Darwin's dof modes. */
253 * This does't quite fit as an internal variable, as it must be accessed in
254 * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either...
256 int dtrace_kernel_symbol_mode
; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */
259 #if defined(__APPLE__)
261 * To save memory, some common memory allocations are given a
262 * unique zone. For example, dtrace_probe_t is 72 bytes in size,
263 * which means it would fall into the kalloc.128 bucket. With
264 * 20k elements allocated, the space saved is substantial.
267 struct zone
*dtrace_probe_t_zone
;
269 static int dtrace_module_unloaded(struct kmod_info
*kmod
);
270 #endif /* __APPLE__ */
274 * DTrace is protected by three (relatively coarse-grained) locks:
276 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
277 * including enabling state, probes, ECBs, consumer state, helper state,
278 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
279 * probe context is lock-free -- synchronization is handled via the
280 * dtrace_sync() cross call mechanism.
282 * (2) dtrace_provider_lock is required when manipulating provider state, or
283 * when provider state must be held constant.
285 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
286 * when meta provider state must be held constant.
288 * The lock ordering between these three locks is dtrace_meta_lock before
289 * dtrace_provider_lock before dtrace_lock. (In particular, there are
290 * several places where dtrace_provider_lock is held by the framework as it
291 * calls into the providers -- which then call back into the framework,
292 * grabbing dtrace_lock.)
294 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
295 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
296 * role as a coarse-grained lock; it is acquired before both of these locks.
297 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
298 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
299 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
300 * acquired _between_ dtrace_provider_lock and dtrace_lock.
303 #if !defined(__APPLE__)
304 static kmutex_t dtrace_lock
; /* probe state lock */
305 static kmutex_t dtrace_provider_lock
; /* provider state lock */
306 static kmutex_t dtrace_meta_lock
; /* meta-provider state lock */
311 * All kmutex_t vars have been changed to lck_mtx_t.
312 * Note that lck_mtx_t's require explicit initialization.
314 * mutex_enter() becomes lck_mtx_lock()
315 * mutex_exit() becomes lck_mtx_unlock()
317 * Lock asserts are changed like this:
319 * ASSERT(MUTEX_HELD(&cpu_lock));
321 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
323 * Due to the number of these changes, they are not called out explicitly.
325 static lck_mtx_t dtrace_lock
; /* probe state lock */
326 static lck_mtx_t dtrace_provider_lock
; /* provider state lock */
327 static lck_mtx_t dtrace_meta_lock
; /* meta-provider state lock */
328 static lck_rw_t dtrace_dof_mode_lock
; /* dof mode lock */
329 #endif /* __APPLE__ */
332 * DTrace Provider Variables
334 * These are the variables relating to DTrace as a provider (that is, the
335 * provider of the BEGIN, END, and ERROR probes).
337 static dtrace_pattr_t dtrace_provider_attr
= {
338 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
339 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
340 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
341 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
342 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
350 dtrace_enable_nullop(void)
355 static dtrace_pops_t dtrace_provider_ops
= {
356 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
,
357 (void (*)(void *, struct modctl
*))dtrace_nullop
,
358 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
,
359 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
360 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
361 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
365 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
368 static dtrace_id_t dtrace_probeid_begin
; /* special BEGIN probe */
369 static dtrace_id_t dtrace_probeid_end
; /* special END probe */
370 dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
373 * DTrace Helper Tracing Variables
375 uint32_t dtrace_helptrace_next
= 0;
376 uint32_t dtrace_helptrace_nlocals
;
377 char *dtrace_helptrace_buffer
;
378 #if !defined(__APPLE__) /* Quiet compiler warning */
379 int dtrace_helptrace_bufsize
= 512 * 1024;
381 size_t dtrace_helptrace_bufsize
= 512 * 1024;
382 #endif /* __APPLE__ */
385 int dtrace_helptrace_enabled
= 1;
387 int dtrace_helptrace_enabled
= 0;
391 * DTrace Error Hashing
393 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
394 * table. This is very useful for checking coverage of tests that are
395 * expected to induce DIF or DOF processing errors, and may be useful for
396 * debugging problems in the DIF code generator or in DOF generation . The
397 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
400 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
];
401 static const char *dtrace_errlast
;
402 static kthread_t
*dtrace_errthread
;
403 static lck_mtx_t dtrace_errlock
;
407 * DTrace Macros and Constants
409 * These are various macros that are useful in various spots in the
410 * implementation, along with a few random constants that have no meaning
411 * outside of the implementation. There is no real structure to this cpp
412 * mishmash -- but is there ever?
414 #define DTRACE_HASHSTR(hash, probe) \
415 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
417 #define DTRACE_HASHNEXT(hash, probe) \
418 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
420 #define DTRACE_HASHPREV(hash, probe) \
421 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
423 #define DTRACE_HASHEQ(hash, lhs, rhs) \
424 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
425 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
427 #define DTRACE_AGGHASHSIZE_SLEW 17
429 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
432 * The key for a thread-local variable consists of the lower 61 bits of the
433 * t_did, plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
434 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
435 * equal to a variable identifier. This is necessary (but not sufficient) to
436 * assure that global associative arrays never collide with thread-local
437 * variables. To guarantee that they cannot collide, we must also define the
438 * order for keying dynamic variables. That order is:
440 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
442 * Because the variable-key and the tls-key are in orthogonal spaces, there is
443 * no way for a global variable key signature to match a thread-local key
446 #if !defined(__APPLE__)
447 #define DTRACE_TLS_THRKEY(where) { \
449 uint_t actv = CPU->cpu_intr_actv >> (LOCK_LEVEL + 1); \
450 for (; actv; actv >>= 1) \
452 ASSERT(intr < (1 << 3)); \
453 (where) = ((curthread->t_did + DIF_VARIABLE_MAX) & \
454 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
457 #if defined (__x86_64__)
458 /* FIXME: two function calls!! */
459 #define DTRACE_TLS_THRKEY(where) { \
460 uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
461 uint64_t thr = (uintptr_t)current_thread(); \
462 ASSERT(intr < (1 << 3)); \
463 (where) = ((thr + DIF_VARIABLE_MAX) & \
464 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
467 #error Unknown architecture
469 #endif /* __APPLE__ */
471 #define DT_BSWAP_8(x) ((x) & 0xff)
472 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
473 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
474 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
476 #define DT_MASK_LO 0x00000000FFFFFFFFULL
478 #define DTRACE_STORE(type, tomax, offset, what) \
479 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
482 #define DTRACE_ALIGNCHECK(addr, size, flags) \
483 if (addr & (MIN(size,4) - 1)) { \
484 *flags |= CPU_DTRACE_BADALIGN; \
485 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
490 * Test whether a range of memory starting at testaddr of size testsz falls
491 * within the range of memory described by addr, sz. We take care to avoid
492 * problems with overflow and underflow of the unsigned quantities, and
493 * disallow all negative sizes. Ranges of size 0 are allowed.
495 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
496 ((testaddr) - (baseaddr) < (basesz) && \
497 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
498 (testaddr) + (testsz) >= (testaddr))
501 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
502 * alloc_sz on the righthand side of the comparison in order to avoid overflow
503 * or underflow in the comparison with it. This is simpler than the INRANGE
504 * check above, because we know that the dtms_scratch_ptr is valid in the
505 * range. Allocations of size zero are allowed.
507 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
508 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
509 (mstate)->dtms_scratch_ptr >= (alloc_sz))
511 #if !defined(__APPLE__)
512 #define DTRACE_LOADFUNC(bits) \
515 dtrace_load##bits(uintptr_t addr) \
517 size_t size = bits / NBBY; \
519 uint##bits##_t rval; \
521 volatile uint16_t *flags = (volatile uint16_t *) \
522 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
524 DTRACE_ALIGNCHECK(addr, size, flags); \
526 for (i = 0; i < dtrace_toxranges; i++) { \
527 if (addr >= dtrace_toxrange[i].dtt_limit) \
530 if (addr + size <= dtrace_toxrange[i].dtt_base) \
534 * This address falls within a toxic region; return 0. \
536 *flags |= CPU_DTRACE_BADADDR; \
537 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
541 *flags |= CPU_DTRACE_NOFAULT; \
543 rval = *((volatile uint##bits##_t *)addr); \
544 *flags &= ~CPU_DTRACE_NOFAULT; \
546 return (!(*flags & CPU_DTRACE_FAULT) ? rval : 0); \
548 #else /* __APPLE__ */
549 #define RECOVER_LABEL(bits) dtraceLoadRecover##bits:
551 #if defined (__x86_64__)
552 #define DTRACE_LOADFUNC(bits) \
554 uint##bits##_t dtrace_load##bits(uintptr_t addr); \
557 dtrace_load##bits(uintptr_t addr) \
559 size_t size = bits / NBBY; \
561 uint##bits##_t rval = 0; \
563 volatile uint16_t *flags = (volatile uint16_t *) \
564 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
566 DTRACE_ALIGNCHECK(addr, size, flags); \
568 for (i = 0; i < dtrace_toxranges; i++) { \
569 if (addr >= dtrace_toxrange[i].dtt_limit) \
572 if (addr + size <= dtrace_toxrange[i].dtt_base) \
576 * This address falls within a toxic region; return 0. \
578 *flags |= CPU_DTRACE_BADADDR; \
579 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
584 volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \
585 *flags |= CPU_DTRACE_NOFAULT; \
586 recover = dtrace_set_thread_recover(current_thread(), recover); \
589 * PR6394061 - avoid device memory that is unpredictably \
590 * mapped and unmapped \
592 if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \
593 rval = *((volatile uint##bits##_t *)addr); \
594 RECOVER_LABEL(bits); \
595 (void)dtrace_set_thread_recover(current_thread(), recover); \
596 *flags &= ~CPU_DTRACE_NOFAULT; \
601 #else /* all other architectures */
602 #error Unknown Architecture
604 #endif /* __APPLE__ */
607 #define dtrace_loadptr dtrace_load64
609 #define dtrace_loadptr dtrace_load32
612 #define DTRACE_DYNHASH_FREE 0
613 #define DTRACE_DYNHASH_SINK 1
614 #define DTRACE_DYNHASH_VALID 2
616 #define DTRACE_MATCH_FAIL -1
617 #define DTRACE_MATCH_NEXT 0
618 #define DTRACE_MATCH_DONE 1
619 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
620 #define DTRACE_STATE_ALIGN 64
622 #define DTRACE_FLAGS2FLT(flags) \
623 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
624 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
625 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
626 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
627 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
628 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
629 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
630 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
631 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
634 #define DTRACEACT_ISSTRING(act) \
635 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
636 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
639 #if defined (__APPLE__)
640 /* Avoid compiler warnings when assigning regs[rd] = NULL */
643 #define NULL (uintptr_t)0
645 #endif /* __APPLE__ */
647 static size_t dtrace_strlen(const char *, size_t);
648 static dtrace_probe_t
*dtrace_probe_lookup_id(dtrace_id_t id
);
649 static void dtrace_enabling_provide(dtrace_provider_t
*);
650 static int dtrace_enabling_match(dtrace_enabling_t
*, int *);
651 static void dtrace_enabling_matchall(void);
652 static dtrace_state_t
*dtrace_anon_grab(void);
653 static uint64_t dtrace_helper(int, dtrace_mstate_t
*,
654 dtrace_state_t
*, uint64_t, uint64_t);
655 static dtrace_helpers_t
*dtrace_helpers_create(proc_t
*);
656 static void dtrace_buffer_drop(dtrace_buffer_t
*);
657 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t
*, size_t, size_t,
658 dtrace_state_t
*, dtrace_mstate_t
*);
659 static int dtrace_state_option(dtrace_state_t
*, dtrace_optid_t
,
661 static int dtrace_ecb_create_enable(dtrace_probe_t
*, void *);
662 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t
*);
665 * DTrace Probe Context Functions
667 * These functions are called from probe context. Because probe context is
668 * any context in which C may be called, arbitrarily locks may be held,
669 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
670 * As a result, functions called from probe context may only call other DTrace
671 * support functions -- they may not interact at all with the system at large.
672 * (Note that the ASSERT macro is made probe-context safe by redefining it in
673 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
674 * loads are to be performed from probe context, they _must_ be in terms of
675 * the safe dtrace_load*() variants.
677 * Some functions in this block are not actually called from probe context;
678 * for these functions, there will be a comment above the function reading
679 * "Note: not called from probe context."
683 dtrace_assfail(const char *a
, const char *f
, int l
)
685 panic("dtrace: assertion failed: %s, file: %s, line: %d", a
, f
, l
);
688 * We just need something here that even the most clever compiler
689 * cannot optimize away.
691 return (a
[(uintptr_t)f
]);
695 * Atomically increment a specified error counter from probe context.
698 dtrace_error(uint32_t *counter
)
701 * Most counters stored to in probe context are per-CPU counters.
702 * However, there are some error conditions that are sufficiently
703 * arcane that they don't merit per-CPU storage. If these counters
704 * are incremented concurrently on different CPUs, scalability will be
705 * adversely affected -- but we don't expect them to be white-hot in a
706 * correctly constructed enabling...
713 if ((nval
= oval
+ 1) == 0) {
715 * If the counter would wrap, set it to 1 -- assuring
716 * that the counter is never zero when we have seen
717 * errors. (The counter must be 32-bits because we
718 * aren't guaranteed a 64-bit compare&swap operation.)
719 * To save this code both the infamy of being fingered
720 * by a priggish news story and the indignity of being
721 * the target of a neo-puritan witch trial, we're
722 * carefully avoiding any colorful description of the
723 * likelihood of this condition -- but suffice it to
724 * say that it is only slightly more likely than the
725 * overflow of predicate cache IDs, as discussed in
726 * dtrace_predicate_create().
730 } while (dtrace_cas32(counter
, oval
, nval
) != oval
);
734 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
735 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
743 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t
*mstate
)
745 if (dest
< mstate
->dtms_scratch_base
)
748 if (dest
+ size
< dest
)
751 if (dest
+ size
> mstate
->dtms_scratch_ptr
)
758 dtrace_canstore_statvar(uint64_t addr
, size_t sz
,
759 dtrace_statvar_t
**svars
, int nsvars
)
763 for (i
= 0; i
< nsvars
; i
++) {
764 dtrace_statvar_t
*svar
= svars
[i
];
766 if (svar
== NULL
|| svar
->dtsv_size
== 0)
769 if (DTRACE_INRANGE(addr
, sz
, svar
->dtsv_data
, svar
->dtsv_size
))
777 * Check to see if the address is within a memory region to which a store may
778 * be issued. This includes the DTrace scratch areas, and any DTrace variable
779 * region. The caller of dtrace_canstore() is responsible for performing any
780 * alignment checks that are needed before stores are actually executed.
783 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
784 dtrace_vstate_t
*vstate
)
787 * First, check to see if the address is in scratch space...
789 if (DTRACE_INRANGE(addr
, sz
, mstate
->dtms_scratch_base
,
790 mstate
->dtms_scratch_size
))
794 * Now check to see if it's a dynamic variable. This check will pick
795 * up both thread-local variables and any global dynamically-allocated
798 if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
,
799 vstate
->dtvs_dynvars
.dtds_size
)) {
800 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
801 uintptr_t base
= (uintptr_t)dstate
->dtds_base
+
802 (dstate
->dtds_hashsize
* sizeof (dtrace_dynhash_t
));
806 * Before we assume that we can store here, we need to make
807 * sure that it isn't in our metadata -- storing to our
808 * dynamic variable metadata would corrupt our state. For
809 * the range to not include any dynamic variable metadata,
812 * (1) Start above the hash table that is at the base of
813 * the dynamic variable space
815 * (2) Have a starting chunk offset that is beyond the
816 * dtrace_dynvar_t that is at the base of every chunk
818 * (3) Not span a chunk boundary
824 chunkoffs
= (addr
- base
) % dstate
->dtds_chunksize
;
826 if (chunkoffs
< sizeof (dtrace_dynvar_t
))
829 if (chunkoffs
+ sz
> dstate
->dtds_chunksize
)
836 * Finally, check the static local and global variables. These checks
837 * take the longest, so we perform them last.
839 if (dtrace_canstore_statvar(addr
, sz
,
840 vstate
->dtvs_locals
, vstate
->dtvs_nlocals
))
843 if (dtrace_canstore_statvar(addr
, sz
,
844 vstate
->dtvs_globals
, vstate
->dtvs_nglobals
))
852 * Convenience routine to check to see if the address is within a memory
853 * region in which a load may be issued given the user's privilege level;
854 * if not, it sets the appropriate error flags and loads 'addr' into the
855 * illegal value slot.
857 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
858 * appropriate memory access protection.
861 dtrace_canload(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
862 dtrace_vstate_t
*vstate
)
864 #if !defined(__APPLE__) /* Quiet compiler warning - matches dtrace_dif_emulate */
865 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
867 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
871 * If we hold the privilege to read from kernel memory, then
872 * everything is readable.
874 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
878 * You can obviously read that which you can store.
880 if (dtrace_canstore(addr
, sz
, mstate
, vstate
))
884 * We're allowed to read from our own string table.
886 if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)mstate
->dtms_difo
->dtdo_strtab
,
887 mstate
->dtms_difo
->dtdo_strlen
))
890 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV
);
896 * Convenience routine to check to see if a given string is within a memory
897 * region in which a load may be issued given the user's privilege level;
898 * this exists so that we don't need to issue unnecessary dtrace_strlen()
899 * calls in the event that the user has all privileges.
902 dtrace_strcanload(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
903 dtrace_vstate_t
*vstate
)
908 * If we hold the privilege to read from kernel memory, then
909 * everything is readable.
911 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
914 strsz
= 1 + dtrace_strlen((char *)(uintptr_t)addr
, sz
);
915 if (dtrace_canload(addr
, strsz
, mstate
, vstate
))
922 * Convenience routine to check to see if a given variable is within a memory
923 * region in which a load may be issued given the user's privilege level.
926 dtrace_vcanload(void *src
, dtrace_diftype_t
*type
, dtrace_mstate_t
*mstate
,
927 dtrace_vstate_t
*vstate
)
930 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
933 * If we hold the privilege to read from kernel memory, then
934 * everything is readable.
936 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
939 if (type
->dtdt_kind
== DIF_TYPE_STRING
)
940 sz
= dtrace_strlen(src
,
941 vstate
->dtvs_state
->dts_options
[DTRACEOPT_STRSIZE
]) + 1;
943 sz
= type
->dtdt_size
;
945 return (dtrace_canload((uintptr_t)src
, sz
, mstate
, vstate
));
949 * Compare two strings using safe loads.
952 dtrace_strncmp(char *s1
, char *s2
, size_t limit
)
955 volatile uint16_t *flags
;
957 if (s1
== s2
|| limit
== 0)
960 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
966 c1
= dtrace_load8((uintptr_t)s1
++);
972 c2
= dtrace_load8((uintptr_t)s2
++);
977 } while (--limit
&& c1
!= '\0' && !(*flags
& CPU_DTRACE_FAULT
));
983 * Compute strlen(s) for a string using safe memory accesses. The additional
984 * len parameter is used to specify a maximum length to ensure completion.
987 dtrace_strlen(const char *s
, size_t lim
)
991 for (len
= 0; len
!= lim
; len
++) {
992 if (dtrace_load8((uintptr_t)s
++) == '\0')
1000 * Check if an address falls within a toxic region.
1003 dtrace_istoxic(uintptr_t kaddr
, size_t size
)
1005 uintptr_t taddr
, tsize
;
1008 for (i
= 0; i
< dtrace_toxranges
; i
++) {
1009 taddr
= dtrace_toxrange
[i
].dtt_base
;
1010 tsize
= dtrace_toxrange
[i
].dtt_limit
- taddr
;
1012 if (kaddr
- taddr
< tsize
) {
1013 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1014 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= kaddr
;
1018 if (taddr
- kaddr
< size
) {
1019 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1020 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= taddr
;
1029 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
1030 * memory specified by the DIF program. The dst is assumed to be safe memory
1031 * that we can store to directly because it is managed by DTrace. As with
1032 * standard bcopy, overlapping copies are handled properly.
1035 dtrace_bcopy(const void *src
, void *dst
, size_t len
)
1039 const uint8_t *s2
= src
;
1043 *s1
++ = dtrace_load8((uintptr_t)s2
++);
1044 } while (--len
!= 0);
1050 *--s1
= dtrace_load8((uintptr_t)--s2
);
1051 } while (--len
!= 0);
1057 * Copy src to dst using safe memory accesses, up to either the specified
1058 * length, or the point that a nul byte is encountered. The src is assumed to
1059 * be unsafe memory specified by the DIF program. The dst is assumed to be
1060 * safe memory that we can store to directly because it is managed by DTrace.
1061 * Unlike dtrace_bcopy(), overlapping regions are not handled.
1064 dtrace_strcpy(const void *src
, void *dst
, size_t len
)
1067 uint8_t *s1
= dst
, c
;
1068 const uint8_t *s2
= src
;
1071 *s1
++ = c
= dtrace_load8((uintptr_t)s2
++);
1072 } while (--len
!= 0 && c
!= '\0');
1077 * Copy src to dst, deriving the size and type from the specified (BYREF)
1078 * variable type. The src is assumed to be unsafe memory specified by the DIF
1079 * program. The dst is assumed to be DTrace variable memory that is of the
1080 * specified type; we assume that we can store to directly.
1083 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t
*type
)
1085 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
1087 if (type
->dtdt_kind
== DIF_TYPE_STRING
) {
1088 dtrace_strcpy(src
, dst
, type
->dtdt_size
);
1090 dtrace_bcopy(src
, dst
, type
->dtdt_size
);
1095 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1096 * unsafe memory specified by the DIF program. The s2 data is assumed to be
1097 * safe memory that we can access directly because it is managed by DTrace.
1100 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
)
1102 volatile uint16_t *flags
;
1104 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
1109 if (s1
== NULL
|| s2
== NULL
)
1112 if (s1
!= s2
&& len
!= 0) {
1113 const uint8_t *ps1
= s1
;
1114 const uint8_t *ps2
= s2
;
1117 if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++)
1119 } while (--len
!= 0 && !(*flags
& CPU_DTRACE_FAULT
));
1125 * Zero the specified region using a simple byte-by-byte loop. Note that this
1126 * is for safe DTrace-managed memory only.
1129 dtrace_bzero(void *dst
, size_t len
)
1133 for (cp
= dst
; len
!= 0; len
--)
1138 dtrace_add_128(uint64_t *addend1
, uint64_t *addend2
, uint64_t *sum
)
1142 result
[0] = addend1
[0] + addend2
[0];
1143 result
[1] = addend1
[1] + addend2
[1] +
1144 (result
[0] < addend1
[0] || result
[0] < addend2
[0] ? 1 : 0);
1151 * Shift the 128-bit value in a by b. If b is positive, shift left.
1152 * If b is negative, shift right.
1155 dtrace_shift_128(uint64_t *a
, int b
)
1165 a
[0] = a
[1] >> (b
- 64);
1169 mask
= 1LL << (64 - b
);
1171 a
[0] |= ((a
[1] & mask
) << (64 - b
));
1176 a
[1] = a
[0] << (b
- 64);
1180 mask
= a
[0] >> (64 - b
);
1188 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1189 * use native multiplication on those, and then re-combine into the
1190 * resulting 128-bit value.
1192 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1199 dtrace_multiply_128(uint64_t factor1
, uint64_t factor2
, uint64_t *product
)
1201 uint64_t hi1
, hi2
, lo1
, lo2
;
1204 hi1
= factor1
>> 32;
1205 hi2
= factor2
>> 32;
1207 lo1
= factor1
& DT_MASK_LO
;
1208 lo2
= factor2
& DT_MASK_LO
;
1210 product
[0] = lo1
* lo2
;
1211 product
[1] = hi1
* hi2
;
1215 dtrace_shift_128(tmp
, 32);
1216 dtrace_add_128(product
, tmp
, product
);
1220 dtrace_shift_128(tmp
, 32);
1221 dtrace_add_128(product
, tmp
, product
);
1225 * This privilege check should be used by actions and subroutines to
1226 * verify that the user credentials of the process that enabled the
1227 * invoking ECB match the target credentials
1230 dtrace_priv_proc_common_user(dtrace_state_t
*state
)
1232 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
1235 * We should always have a non-NULL state cred here, since if cred
1236 * is null (anonymous tracing), we fast-path bypass this routine.
1238 ASSERT(s_cr
!= NULL
);
1240 #if !defined(__APPLE__)
1241 if ((cr
= CRED()) != NULL
&&
1243 if ((cr
= dtrace_CRED()) != NULL
&&
1244 #endif /* __APPLE__ */
1245 posix_cred_get(s_cr
)->cr_uid
== posix_cred_get(cr
)->cr_uid
&&
1246 posix_cred_get(s_cr
)->cr_uid
== posix_cred_get(cr
)->cr_ruid
&&
1247 posix_cred_get(s_cr
)->cr_uid
== posix_cred_get(cr
)->cr_suid
&&
1248 posix_cred_get(s_cr
)->cr_gid
== posix_cred_get(cr
)->cr_gid
&&
1249 posix_cred_get(s_cr
)->cr_gid
== posix_cred_get(cr
)->cr_rgid
&&
1250 posix_cred_get(s_cr
)->cr_gid
== posix_cred_get(cr
)->cr_sgid
)
1257 * This privilege check should be used by actions and subroutines to
1258 * verify that the zone of the process that enabled the invoking ECB
1259 * matches the target credentials
1262 dtrace_priv_proc_common_zone(dtrace_state_t
*state
)
1264 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
1265 #pragma unused(cr, s_cr) /* __APPLE__ */
1268 * We should always have a non-NULL state cred here, since if cred
1269 * is null (anonymous tracing), we fast-path bypass this routine.
1271 ASSERT(s_cr
!= NULL
);
1273 #if !defined(__APPLE__)
1274 if ((cr
= CRED()) != NULL
&&
1275 s_cr
->cr_zone
== cr
->cr_zone
)
1280 #pragma unused(state)
1282 return 1; /* Darwin doesn't do zones. */
1283 #endif /* __APPLE__ */
1287 * This privilege check should be used by actions and subroutines to
1288 * verify that the process has not setuid or changed credentials.
1290 #if !defined(__APPLE__)
1292 dtrace_priv_proc_common_nocd()
1296 if ((proc
= ttoproc(curthread
)) != NULL
&&
1297 !(proc
->p_flag
& SNOCD
))
1304 dtrace_priv_proc_common_nocd(void)
1306 return 1; /* Darwin omits "No Core Dump" flag. */
1308 #endif /* __APPLE__ */
1311 dtrace_priv_proc_destructive(dtrace_state_t
*state
)
1313 int action
= state
->dts_cred
.dcr_action
;
1315 #if defined(__APPLE__)
1316 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1318 #endif /* __APPLE__ */
1320 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) &&
1321 dtrace_priv_proc_common_zone(state
) == 0)
1324 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) &&
1325 dtrace_priv_proc_common_user(state
) == 0)
1328 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) &&
1329 dtrace_priv_proc_common_nocd() == 0)
1335 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1341 dtrace_priv_proc_control(dtrace_state_t
*state
)
1343 #if defined(__APPLE__)
1344 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1346 #endif /* __APPLE__ */
1348 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC_CONTROL
)
1351 if (dtrace_priv_proc_common_zone(state
) &&
1352 dtrace_priv_proc_common_user(state
) &&
1353 dtrace_priv_proc_common_nocd())
1356 #if defined(__APPLE__)
1358 #endif /* __APPLE__ */
1359 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1365 dtrace_priv_proc(dtrace_state_t
*state
)
1367 #if defined(__APPLE__)
1368 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1370 #endif /* __APPLE__ */
1372 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1375 #if defined(__APPLE__)
1377 #endif /* __APPLE__ */
1378 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1383 #if defined(__APPLE__)
1384 /* dtrace_priv_proc() omitting the P_LNOATTACH check. For PID and EXECNAME accesses. */
1386 dtrace_priv_proc_relaxed(dtrace_state_t
*state
)
1389 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1392 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1396 #endif /* __APPLE__ */
1399 dtrace_priv_kernel(dtrace_state_t
*state
)
1401 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL
)
1404 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1410 dtrace_priv_kernel_destructive(dtrace_state_t
*state
)
1412 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL_DESTRUCTIVE
)
1415 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1421 * Note: not called from probe context. This function is called
1422 * asynchronously (and at a regular interval) from outside of probe context to
1423 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1424 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1426 #if defined(__APPLE__) /* Quiet compiler warning. */
1428 #endif /* __APPLE__ */
1430 dtrace_dynvar_clean(dtrace_dstate_t
*dstate
)
1432 dtrace_dynvar_t
*dirty
;
1433 dtrace_dstate_percpu_t
*dcpu
;
1436 for (i
= 0; i
< (int)NCPU
; i
++) {
1437 dcpu
= &dstate
->dtds_percpu
[i
];
1439 ASSERT(dcpu
->dtdsc_rinsing
== NULL
);
1442 * If the dirty list is NULL, there is no dirty work to do.
1444 if (dcpu
->dtdsc_dirty
== NULL
)
1448 * If the clean list is non-NULL, then we're not going to do
1449 * any work for this CPU -- it means that there has not been
1450 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1451 * since the last time we cleaned house.
1453 if (dcpu
->dtdsc_clean
!= NULL
)
1459 * Atomically move the dirty list aside.
1462 dirty
= dcpu
->dtdsc_dirty
;
1465 * Before we zap the dirty list, set the rinsing list.
1466 * (This allows for a potential assertion in
1467 * dtrace_dynvar(): if a free dynamic variable appears
1468 * on a hash chain, either the dirty list or the
1469 * rinsing list for some CPU must be non-NULL.)
1471 dcpu
->dtdsc_rinsing
= dirty
;
1472 dtrace_membar_producer();
1473 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
,
1474 dirty
, NULL
) != dirty
);
1479 * We have no work to do; we can simply return.
1486 for (i
= 0; i
< (int)NCPU
; i
++) {
1487 dcpu
= &dstate
->dtds_percpu
[i
];
1489 if (dcpu
->dtdsc_rinsing
== NULL
)
1493 * We are now guaranteed that no hash chain contains a pointer
1494 * into this dirty list; we can make it clean.
1496 ASSERT(dcpu
->dtdsc_clean
== NULL
);
1497 dcpu
->dtdsc_clean
= dcpu
->dtdsc_rinsing
;
1498 dcpu
->dtdsc_rinsing
= NULL
;
1502 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1503 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1504 * This prevents a race whereby a CPU incorrectly decides that
1505 * the state should be something other than DTRACE_DSTATE_CLEAN
1506 * after dtrace_dynvar_clean() has completed.
1510 dstate
->dtds_state
= DTRACE_DSTATE_CLEAN
;
1514 * Depending on the value of the op parameter, this function looks-up,
1515 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1516 * allocation is requested, this function will return a pointer to a
1517 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1518 * variable can be allocated. If NULL is returned, the appropriate counter
1519 * will be incremented.
1521 #if defined(__APPLE__) /* Quiet compiler warning. */
1523 #endif /* __APPLE__ */
1525 dtrace_dynvar(dtrace_dstate_t
*dstate
, uint_t nkeys
,
1526 dtrace_key_t
*key
, size_t dsize
, dtrace_dynvar_op_t op
,
1527 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
)
1529 uint64_t hashval
= DTRACE_DYNHASH_VALID
;
1530 dtrace_dynhash_t
*hash
= dstate
->dtds_hash
;
1531 dtrace_dynvar_t
*free
, *new_free
, *next
, *dvar
, *start
, *prev
= NULL
;
1532 processorid_t me
= CPU
->cpu_id
, cpu
= me
;
1533 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[me
];
1534 size_t bucket
, ksize
;
1535 size_t chunksize
= dstate
->dtds_chunksize
;
1536 uintptr_t kdata
, lock
, nstate
;
1542 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1543 * algorithm. For the by-value portions, we perform the algorithm in
1544 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1545 * bit, and seems to have only a minute effect on distribution. For
1546 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1547 * over each referenced byte. It's painful to do this, but it's much
1548 * better than pathological hash distribution. The efficacy of the
1549 * hashing algorithm (and a comparison with other algorithms) may be
1550 * found by running the ::dtrace_dynstat MDB dcmd.
1552 for (i
= 0; i
< nkeys
; i
++) {
1553 if (key
[i
].dttk_size
== 0) {
1554 uint64_t val
= key
[i
].dttk_value
;
1556 hashval
+= (val
>> 48) & 0xffff;
1557 hashval
+= (hashval
<< 10);
1558 hashval
^= (hashval
>> 6);
1560 hashval
+= (val
>> 32) & 0xffff;
1561 hashval
+= (hashval
<< 10);
1562 hashval
^= (hashval
>> 6);
1564 hashval
+= (val
>> 16) & 0xffff;
1565 hashval
+= (hashval
<< 10);
1566 hashval
^= (hashval
>> 6);
1568 hashval
+= val
& 0xffff;
1569 hashval
+= (hashval
<< 10);
1570 hashval
^= (hashval
>> 6);
1573 * This is incredibly painful, but it beats the hell
1574 * out of the alternative.
1576 uint64_t j
, size
= key
[i
].dttk_size
;
1577 uintptr_t base
= (uintptr_t)key
[i
].dttk_value
;
1579 if (!dtrace_canload(base
, size
, mstate
, vstate
))
1582 for (j
= 0; j
< size
; j
++) {
1583 hashval
+= dtrace_load8(base
+ j
);
1584 hashval
+= (hashval
<< 10);
1585 hashval
^= (hashval
>> 6);
1590 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT
))
1593 hashval
+= (hashval
<< 3);
1594 hashval
^= (hashval
>> 11);
1595 hashval
+= (hashval
<< 15);
1598 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1599 * comes out to be one of our two sentinel hash values. If this
1600 * actually happens, we set the hashval to be a value known to be a
1601 * non-sentinel value.
1603 if (hashval
== DTRACE_DYNHASH_FREE
|| hashval
== DTRACE_DYNHASH_SINK
)
1604 hashval
= DTRACE_DYNHASH_VALID
;
1607 * Yes, it's painful to do a divide here. If the cycle count becomes
1608 * important here, tricks can be pulled to reduce it. (However, it's
1609 * critical that hash collisions be kept to an absolute minimum;
1610 * they're much more painful than a divide.) It's better to have a
1611 * solution that generates few collisions and still keeps things
1612 * relatively simple.
1614 bucket
= hashval
% dstate
->dtds_hashsize
;
1616 if (op
== DTRACE_DYNVAR_DEALLOC
) {
1617 volatile uintptr_t *lockp
= &hash
[bucket
].dtdh_lock
;
1620 while ((lock
= *lockp
) & 1)
1623 #if !defined(__APPLE__) /* Quiet compiler warning */
1624 if (dtrace_casptr((void *)lockp
,
1625 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1628 if (dtrace_casptr((void *)(uintptr_t)lockp
,
1629 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1631 #endif /* __APPLE__ */
1634 dtrace_membar_producer();
1639 lock
= hash
[bucket
].dtdh_lock
;
1641 dtrace_membar_consumer();
1643 start
= hash
[bucket
].dtdh_chain
;
1644 ASSERT(start
!= NULL
&& (start
->dtdv_hashval
== DTRACE_DYNHASH_SINK
||
1645 start
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
||
1646 op
!= DTRACE_DYNVAR_DEALLOC
));
1648 for (dvar
= start
; dvar
!= NULL
; dvar
= dvar
->dtdv_next
) {
1649 dtrace_tuple_t
*dtuple
= &dvar
->dtdv_tuple
;
1650 dtrace_key_t
*dkey
= &dtuple
->dtt_key
[0];
1652 if (dvar
->dtdv_hashval
!= hashval
) {
1653 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_SINK
) {
1655 * We've reached the sink, and therefore the
1656 * end of the hash chain; we can kick out of
1657 * the loop knowing that we have seen a valid
1658 * snapshot of state.
1660 ASSERT(dvar
->dtdv_next
== NULL
);
1661 ASSERT(dvar
== &dtrace_dynhash_sink
);
1665 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
) {
1667 * We've gone off the rails: somewhere along
1668 * the line, one of the members of this hash
1669 * chain was deleted. Note that we could also
1670 * detect this by simply letting this loop run
1671 * to completion, as we would eventually hit
1672 * the end of the dirty list. However, we
1673 * want to avoid running the length of the
1674 * dirty list unnecessarily (it might be quite
1675 * long), so we catch this as early as
1676 * possible by detecting the hash marker. In
1677 * this case, we simply set dvar to NULL and
1678 * break; the conditional after the loop will
1679 * send us back to top.
1688 if (dtuple
->dtt_nkeys
!= nkeys
)
1691 for (i
= 0; i
< nkeys
; i
++, dkey
++) {
1692 if (dkey
->dttk_size
!= key
[i
].dttk_size
)
1693 goto next
; /* size or type mismatch */
1695 if (dkey
->dttk_size
!= 0) {
1697 (void *)(uintptr_t)key
[i
].dttk_value
,
1698 (void *)(uintptr_t)dkey
->dttk_value
,
1702 if (dkey
->dttk_value
!= key
[i
].dttk_value
)
1707 if (op
!= DTRACE_DYNVAR_DEALLOC
)
1710 ASSERT(dvar
->dtdv_next
== NULL
||
1711 dvar
->dtdv_next
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
);
1714 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1715 ASSERT(start
!= dvar
);
1716 ASSERT(prev
->dtdv_next
== dvar
);
1717 prev
->dtdv_next
= dvar
->dtdv_next
;
1719 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
,
1720 start
, dvar
->dtdv_next
) != start
) {
1722 * We have failed to atomically swing the
1723 * hash table head pointer, presumably because
1724 * of a conflicting allocation on another CPU.
1725 * We need to reread the hash chain and try
1732 dtrace_membar_producer();
1735 * Now set the hash value to indicate that it's free.
1737 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1738 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1740 dtrace_membar_producer();
1743 * Set the next pointer to point at the dirty list, and
1744 * atomically swing the dirty pointer to the newly freed dvar.
1747 next
= dcpu
->dtdsc_dirty
;
1748 dvar
->dtdv_next
= next
;
1749 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
);
1752 * Finally, unlock this hash bucket.
1754 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1756 hash
[bucket
].dtdh_lock
++;
1766 * If dvar is NULL, it is because we went off the rails:
1767 * one of the elements that we traversed in the hash chain
1768 * was deleted while we were traversing it. In this case,
1769 * we assert that we aren't doing a dealloc (deallocs lock
1770 * the hash bucket to prevent themselves from racing with
1771 * one another), and retry the hash chain traversal.
1773 ASSERT(op
!= DTRACE_DYNVAR_DEALLOC
);
1777 if (op
!= DTRACE_DYNVAR_ALLOC
) {
1779 * If we are not to allocate a new variable, we want to
1780 * return NULL now. Before we return, check that the value
1781 * of the lock word hasn't changed. If it has, we may have
1782 * seen an inconsistent snapshot.
1784 if (op
== DTRACE_DYNVAR_NOALLOC
) {
1785 if (hash
[bucket
].dtdh_lock
!= lock
)
1788 ASSERT(op
== DTRACE_DYNVAR_DEALLOC
);
1789 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1791 hash
[bucket
].dtdh_lock
++;
1798 * We need to allocate a new dynamic variable. The size we need is the
1799 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1800 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1801 * the size of any referred-to data (dsize). We then round the final
1802 * size up to the chunksize for allocation.
1804 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
1805 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
1808 * This should be pretty much impossible, but could happen if, say,
1809 * strange DIF specified the tuple. Ideally, this should be an
1810 * assertion and not an error condition -- but that requires that the
1811 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1812 * bullet-proof. (That is, it must not be able to be fooled by
1813 * malicious DIF.) Given the lack of backwards branches in DIF,
1814 * solving this would presumably not amount to solving the Halting
1815 * Problem -- but it still seems awfully hard.
1817 if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys
- 1) +
1818 ksize
+ dsize
> chunksize
) {
1819 dcpu
->dtdsc_drops
++;
1823 nstate
= DTRACE_DSTATE_EMPTY
;
1827 free
= dcpu
->dtdsc_free
;
1830 dtrace_dynvar_t
*clean
= dcpu
->dtdsc_clean
;
1833 if (clean
== NULL
) {
1835 * We're out of dynamic variable space on
1836 * this CPU. Unless we have tried all CPUs,
1837 * we'll try to allocate from a different
1840 switch (dstate
->dtds_state
) {
1841 case DTRACE_DSTATE_CLEAN
: {
1842 void *sp
= &dstate
->dtds_state
;
1844 if (++cpu
>= (int)NCPU
)
1847 if (dcpu
->dtdsc_dirty
!= NULL
&&
1848 nstate
== DTRACE_DSTATE_EMPTY
)
1849 nstate
= DTRACE_DSTATE_DIRTY
;
1851 if (dcpu
->dtdsc_rinsing
!= NULL
)
1852 nstate
= DTRACE_DSTATE_RINSING
;
1854 dcpu
= &dstate
->dtds_percpu
[cpu
];
1859 (void) dtrace_cas32(sp
,
1860 DTRACE_DSTATE_CLEAN
, nstate
);
1863 * To increment the correct bean
1864 * counter, take another lap.
1869 case DTRACE_DSTATE_DIRTY
:
1870 dcpu
->dtdsc_dirty_drops
++;
1873 case DTRACE_DSTATE_RINSING
:
1874 dcpu
->dtdsc_rinsing_drops
++;
1877 case DTRACE_DSTATE_EMPTY
:
1878 dcpu
->dtdsc_drops
++;
1882 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
);
1887 * The clean list appears to be non-empty. We want to
1888 * move the clean list to the free list; we start by
1889 * moving the clean pointer aside.
1891 if (dtrace_casptr(&dcpu
->dtdsc_clean
,
1892 clean
, NULL
) != clean
) {
1894 * We are in one of two situations:
1896 * (a) The clean list was switched to the
1897 * free list by another CPU.
1899 * (b) The clean list was added to by the
1902 * In either of these situations, we can
1903 * just reattempt the free list allocation.
1908 ASSERT(clean
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1911 * Now we'll move the clean list to the free list.
1912 * It's impossible for this to fail: the only way
1913 * the free list can be updated is through this
1914 * code path, and only one CPU can own the clean list.
1915 * Thus, it would only be possible for this to fail if
1916 * this code were racing with dtrace_dynvar_clean().
1917 * (That is, if dtrace_dynvar_clean() updated the clean
1918 * list, and we ended up racing to update the free
1919 * list.) This race is prevented by the dtrace_sync()
1920 * in dtrace_dynvar_clean() -- which flushes the
1921 * owners of the clean lists out before resetting
1924 rval
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
);
1925 ASSERT(rval
== NULL
);
1930 new_free
= dvar
->dtdv_next
;
1931 } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
);
1934 * We have now allocated a new chunk. We copy the tuple keys into the
1935 * tuple array and copy any referenced key data into the data space
1936 * following the tuple array. As we do this, we relocate dttk_value
1937 * in the final tuple to point to the key data address in the chunk.
1939 kdata
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
];
1940 dvar
->dtdv_data
= (void *)(kdata
+ ksize
);
1941 dvar
->dtdv_tuple
.dtt_nkeys
= nkeys
;
1943 for (i
= 0; i
< nkeys
; i
++) {
1944 dtrace_key_t
*dkey
= &dvar
->dtdv_tuple
.dtt_key
[i
];
1945 size_t kesize
= key
[i
].dttk_size
;
1949 (const void *)(uintptr_t)key
[i
].dttk_value
,
1950 (void *)kdata
, kesize
);
1951 dkey
->dttk_value
= kdata
;
1952 kdata
+= P2ROUNDUP(kesize
, sizeof (uint64_t));
1954 dkey
->dttk_value
= key
[i
].dttk_value
;
1957 dkey
->dttk_size
= kesize
;
1960 ASSERT(dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1961 dvar
->dtdv_hashval
= hashval
;
1962 dvar
->dtdv_next
= start
;
1964 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
)
1968 * The cas has failed. Either another CPU is adding an element to
1969 * this hash chain, or another CPU is deleting an element from this
1970 * hash chain. The simplest way to deal with both of these cases
1971 * (though not necessarily the most efficient) is to free our
1972 * allocated block and tail-call ourselves. Note that the free is
1973 * to the dirty list and _not_ to the free list. This is to prevent
1974 * races with allocators, above.
1976 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1978 dtrace_membar_producer();
1981 free
= dcpu
->dtdsc_dirty
;
1982 dvar
->dtdv_next
= free
;
1983 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
);
1985 return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
, mstate
, vstate
));
1990 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
1992 #pragma unused(arg) /* __APPLE__ */
1993 if ((int64_t)nval
< (int64_t)*oval
)
1999 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2001 #pragma unused(arg) /* __APPLE__ */
2002 if ((int64_t)nval
> (int64_t)*oval
)
2007 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
)
2009 int i
, zero
= DTRACE_QUANTIZE_ZEROBUCKET
;
2010 int64_t val
= (int64_t)nval
;
2013 for (i
= 0; i
< zero
; i
++) {
2014 if (val
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) {
2020 for (i
= zero
+ 1; i
< DTRACE_QUANTIZE_NBUCKETS
; i
++) {
2021 if (val
< DTRACE_QUANTIZE_BUCKETVAL(i
)) {
2022 quanta
[i
- 1] += incr
;
2027 quanta
[DTRACE_QUANTIZE_NBUCKETS
- 1] += incr
;
2035 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
)
2037 uint64_t arg
= *lquanta
++;
2038 int32_t base
= DTRACE_LQUANTIZE_BASE(arg
);
2039 uint16_t step
= DTRACE_LQUANTIZE_STEP(arg
);
2040 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(arg
);
2041 int32_t val
= (int32_t)nval
, level
;
2044 ASSERT(levels
!= 0);
2048 * This is an underflow.
2054 level
= (val
- base
) / step
;
2056 if (level
< levels
) {
2057 lquanta
[level
+ 1] += incr
;
2062 * This is an overflow.
2064 lquanta
[levels
+ 1] += incr
;
2068 dtrace_aggregate_llquantize_bucket(int16_t factor
, int16_t low
, int16_t high
,
2069 int16_t nsteps
, int64_t value
)
2071 int64_t this = 1, last
, next
;
2072 int base
= 1, order
;
2074 for (order
= 0; order
< low
; ++order
)
2078 * If our value is less than our factor taken to the power of the
2079 * low order of magnitude, it goes into the zeroth bucket.
2086 for (this *= factor
; order
<= high
; ++order
) {
2087 int nbuckets
= this > nsteps
? nsteps
: this;
2090 * We should not generally get log/linear quantizations
2091 * with a high magnitude that allows 64-bits to
2092 * overflow, but we nonetheless protect against this
2093 * by explicitly checking for overflow, and clamping
2094 * our value accordingly.
2096 next
= this * factor
;
2102 * If our value lies within this order of magnitude,
2103 * determine its position by taking the offset within
2104 * the order of magnitude, dividing by the bucket
2105 * width, and adding to our (accumulated) base.
2108 return (base
+ (value
- last
) / (this / nbuckets
));
2111 base
+= nbuckets
- (nbuckets
/ factor
);
2117 * Our value is greater than or equal to our factor taken to the
2118 * power of one plus the high magnitude -- return the top bucket.
2124 dtrace_aggregate_llquantize(uint64_t *llquanta
, uint64_t nval
, uint64_t incr
)
2126 uint64_t arg
= *llquanta
++;
2127 uint16_t factor
= DTRACE_LLQUANTIZE_FACTOR(arg
);
2128 uint16_t low
= DTRACE_LLQUANTIZE_LOW(arg
);
2129 uint16_t high
= DTRACE_LLQUANTIZE_HIGH(arg
);
2130 uint16_t nsteps
= DTRACE_LLQUANTIZE_NSTEP(arg
);
2132 llquanta
[dtrace_aggregate_llquantize_bucket(factor
, low
, high
, nsteps
, nval
)] += incr
;
2137 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
)
2139 #pragma unused(arg) /* __APPLE__ */
2146 dtrace_aggregate_stddev(uint64_t *data
, uint64_t nval
, uint64_t arg
)
2148 #pragma unused(arg) /* __APPLE__ */
2149 int64_t snval
= (int64_t)nval
;
2156 * What we want to say here is:
2158 * data[2] += nval * nval;
2160 * But given that nval is 64-bit, we could easily overflow, so
2161 * we do this as 128-bit arithmetic.
2166 dtrace_multiply_128((uint64_t)snval
, (uint64_t)snval
, tmp
);
2167 dtrace_add_128(data
+ 2, tmp
, data
+ 2);
2172 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2174 #pragma unused(nval, arg) /* __APPLE__ */
2180 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2182 #pragma unused(arg) /* __APPLE__ */
2187 * Aggregate given the tuple in the principal data buffer, and the aggregating
2188 * action denoted by the specified dtrace_aggregation_t. The aggregation
2189 * buffer is specified as the buf parameter. This routine does not return
2190 * failure; if there is no space in the aggregation buffer, the data will be
2191 * dropped, and a corresponding counter incremented.
2194 dtrace_aggregate(dtrace_aggregation_t
*agg
, dtrace_buffer_t
*dbuf
,
2195 intptr_t offset
, dtrace_buffer_t
*buf
, uint64_t expr
, uint64_t arg
)
2198 dtrace_recdesc_t
*rec
= &agg
->dtag_action
.dta_rec
;
2199 uint32_t i
, ndx
, size
, fsize
;
2200 uint32_t align
= sizeof (uint64_t) - 1;
2201 dtrace_aggbuffer_t
*agb
;
2202 dtrace_aggkey_t
*key
;
2203 uint32_t hashval
= 0, limit
, isstr
;
2204 caddr_t tomax
, data
, kdata
;
2205 dtrace_actkind_t action
;
2206 dtrace_action_t
*act
;
2212 if (!agg
->dtag_hasarg
) {
2214 * Currently, only quantize() and lquantize() take additional
2215 * arguments, and they have the same semantics: an increment
2216 * value that defaults to 1 when not present. If additional
2217 * aggregating actions take arguments, the setting of the
2218 * default argument value will presumably have to become more
2224 action
= agg
->dtag_action
.dta_kind
- DTRACEACT_AGGREGATION
;
2225 size
= rec
->dtrd_offset
- agg
->dtag_base
;
2226 fsize
= size
+ rec
->dtrd_size
;
2228 ASSERT(dbuf
->dtb_tomax
!= NULL
);
2229 data
= dbuf
->dtb_tomax
+ offset
+ agg
->dtag_base
;
2231 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
2232 dtrace_buffer_drop(buf
);
2237 * The metastructure is always at the bottom of the buffer.
2239 agb
= (dtrace_aggbuffer_t
*)(tomax
+ buf
->dtb_size
-
2240 sizeof (dtrace_aggbuffer_t
));
2242 if (buf
->dtb_offset
== 0) {
2244 * We just kludge up approximately 1/8th of the size to be
2245 * buckets. If this guess ends up being routinely
2246 * off-the-mark, we may need to dynamically readjust this
2247 * based on past performance.
2249 uintptr_t hashsize
= (buf
->dtb_size
>> 3) / sizeof (uintptr_t);
2251 if ((uintptr_t)agb
- hashsize
* sizeof (dtrace_aggkey_t
*) <
2252 (uintptr_t)tomax
|| hashsize
== 0) {
2254 * We've been given a ludicrously small buffer;
2255 * increment our drop count and leave.
2257 dtrace_buffer_drop(buf
);
2262 * And now, a pathetic attempt to try to get a an odd (or
2263 * perchance, a prime) hash size for better hash distribution.
2265 if (hashsize
> (DTRACE_AGGHASHSIZE_SLEW
<< 3))
2266 hashsize
-= DTRACE_AGGHASHSIZE_SLEW
;
2268 agb
->dtagb_hashsize
= hashsize
;
2269 agb
->dtagb_hash
= (dtrace_aggkey_t
**)((uintptr_t)agb
-
2270 agb
->dtagb_hashsize
* sizeof (dtrace_aggkey_t
*));
2271 agb
->dtagb_free
= (uintptr_t)agb
->dtagb_hash
;
2273 for (i
= 0; i
< agb
->dtagb_hashsize
; i
++)
2274 agb
->dtagb_hash
[i
] = NULL
;
2277 ASSERT(agg
->dtag_first
!= NULL
);
2278 ASSERT(agg
->dtag_first
->dta_intuple
);
2281 * Calculate the hash value based on the key. Note that we _don't_
2282 * include the aggid in the hashing (but we will store it as part of
2283 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2284 * algorithm: a simple, quick algorithm that has no known funnels, and
2285 * gets good distribution in practice. The efficacy of the hashing
2286 * algorithm (and a comparison with other algorithms) may be found by
2287 * running the ::dtrace_aggstat MDB dcmd.
2289 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
2290 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2291 limit
= i
+ act
->dta_rec
.dtrd_size
;
2292 ASSERT(limit
<= size
);
2293 isstr
= DTRACEACT_ISSTRING(act
);
2295 for (; i
< limit
; i
++) {
2297 hashval
+= (hashval
<< 10);
2298 hashval
^= (hashval
>> 6);
2300 if (isstr
&& data
[i
] == '\0')
2305 hashval
+= (hashval
<< 3);
2306 hashval
^= (hashval
>> 11);
2307 hashval
+= (hashval
<< 15);
2310 * Yes, the divide here is expensive -- but it's generally the least
2311 * of the performance issues given the amount of data that we iterate
2312 * over to compute hash values, compare data, etc.
2314 ndx
= hashval
% agb
->dtagb_hashsize
;
2316 for (key
= agb
->dtagb_hash
[ndx
]; key
!= NULL
; key
= key
->dtak_next
) {
2317 ASSERT((caddr_t
)key
>= tomax
);
2318 ASSERT((caddr_t
)key
< tomax
+ buf
->dtb_size
);
2320 if (hashval
!= key
->dtak_hashval
|| key
->dtak_size
!= size
)
2323 kdata
= key
->dtak_data
;
2324 ASSERT(kdata
>= tomax
&& kdata
< tomax
+ buf
->dtb_size
);
2326 for (act
= agg
->dtag_first
; act
->dta_intuple
;
2327 act
= act
->dta_next
) {
2328 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2329 limit
= i
+ act
->dta_rec
.dtrd_size
;
2330 ASSERT(limit
<= size
);
2331 isstr
= DTRACEACT_ISSTRING(act
);
2333 for (; i
< limit
; i
++) {
2334 if (kdata
[i
] != data
[i
])
2337 if (isstr
&& data
[i
] == '\0')
2342 if (action
!= key
->dtak_action
) {
2344 * We are aggregating on the same value in the same
2345 * aggregation with two different aggregating actions.
2346 * (This should have been picked up in the compiler,
2347 * so we may be dealing with errant or devious DIF.)
2348 * This is an error condition; we indicate as much,
2351 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
2356 * This is a hit: we need to apply the aggregator to
2357 * the value at this key.
2359 agg
->dtag_aggregate((uint64_t *)(kdata
+ size
), expr
, arg
);
2366 * We didn't find it. We need to allocate some zero-filled space,
2367 * link it into the hash table appropriately, and apply the aggregator
2368 * to the (zero-filled) value.
2370 offs
= buf
->dtb_offset
;
2371 while (offs
& (align
- 1))
2372 offs
+= sizeof (uint32_t);
2375 * If we don't have enough room to both allocate a new key _and_
2376 * its associated data, increment the drop count and return.
2378 if ((uintptr_t)tomax
+ offs
+ fsize
>
2379 agb
->dtagb_free
- sizeof (dtrace_aggkey_t
)) {
2380 dtrace_buffer_drop(buf
);
2385 ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1)));
2386 key
= (dtrace_aggkey_t
*)(agb
->dtagb_free
- sizeof (dtrace_aggkey_t
));
2387 agb
->dtagb_free
-= sizeof (dtrace_aggkey_t
);
2389 key
->dtak_data
= kdata
= tomax
+ offs
;
2390 buf
->dtb_offset
= offs
+ fsize
;
2393 * Now copy the data across.
2395 *((dtrace_aggid_t
*)kdata
) = agg
->dtag_id
;
2397 for (i
= sizeof (dtrace_aggid_t
); i
< size
; i
++)
2401 * Because strings are not zeroed out by default, we need to iterate
2402 * looking for actions that store strings, and we need to explicitly
2403 * pad these strings out with zeroes.
2405 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
2408 if (!DTRACEACT_ISSTRING(act
))
2411 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2412 limit
= i
+ act
->dta_rec
.dtrd_size
;
2413 ASSERT(limit
<= size
);
2415 for (nul
= 0; i
< limit
; i
++) {
2421 if (data
[i
] != '\0')
2428 for (i
= size
; i
< fsize
; i
++)
2431 key
->dtak_hashval
= hashval
;
2432 key
->dtak_size
= size
;
2433 key
->dtak_action
= action
;
2434 key
->dtak_next
= agb
->dtagb_hash
[ndx
];
2435 agb
->dtagb_hash
[ndx
] = key
;
2438 * Finally, apply the aggregator.
2440 *((uint64_t *)(key
->dtak_data
+ size
)) = agg
->dtag_initial
;
2441 agg
->dtag_aggregate((uint64_t *)(key
->dtak_data
+ size
), expr
, arg
);
2445 * Given consumer state, this routine finds a speculation in the INACTIVE
2446 * state and transitions it into the ACTIVE state. If there is no speculation
2447 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2448 * incremented -- it is up to the caller to take appropriate action.
2451 dtrace_speculation(dtrace_state_t
*state
)
2454 dtrace_speculation_state_t current
;
2455 uint32_t *stat
= &state
->dts_speculations_unavail
, count
;
2457 while (i
< state
->dts_nspeculations
) {
2458 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2460 current
= spec
->dtsp_state
;
2462 if (current
!= DTRACESPEC_INACTIVE
) {
2463 if (current
== DTRACESPEC_COMMITTINGMANY
||
2464 current
== DTRACESPEC_COMMITTING
||
2465 current
== DTRACESPEC_DISCARDING
)
2466 stat
= &state
->dts_speculations_busy
;
2471 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2472 current
, DTRACESPEC_ACTIVE
) == current
)
2477 * We couldn't find a speculation. If we found as much as a single
2478 * busy speculation buffer, we'll attribute this failure as "busy"
2479 * instead of "unavail".
2483 } while (dtrace_cas32(stat
, count
, count
+ 1) != count
);
2489 * This routine commits an active speculation. If the specified speculation
2490 * is not in a valid state to perform a commit(), this routine will silently do
2491 * nothing. The state of the specified speculation is transitioned according
2492 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2495 dtrace_speculation_commit(dtrace_state_t
*state
, processorid_t cpu
,
2496 dtrace_specid_t which
)
2498 dtrace_speculation_t
*spec
;
2499 dtrace_buffer_t
*src
, *dest
;
2500 uintptr_t daddr
, saddr
, dlimit
;
2501 #if !defined(__APPLE__) /* Quiet compiler warning */
2502 dtrace_speculation_state_t current
, new;
2504 dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
;
2505 #endif /* __APPLE__ */
2511 #if !defined(__APPLE__) /* Quiet compiler warning */
2512 if (which
> state
->dts_nspeculations
) {
2513 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2517 if (which
> (dtrace_specid_t
)state
->dts_nspeculations
) {
2518 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2521 #endif /* __APPLE__ */
2523 spec
= &state
->dts_speculations
[which
- 1];
2524 src
= &spec
->dtsp_buffer
[cpu
];
2525 dest
= &state
->dts_buffer
[cpu
];
2528 current
= spec
->dtsp_state
;
2530 if (current
== DTRACESPEC_COMMITTINGMANY
)
2534 case DTRACESPEC_INACTIVE
:
2535 case DTRACESPEC_DISCARDING
:
2538 case DTRACESPEC_COMMITTING
:
2540 * This is only possible if we are (a) commit()'ing
2541 * without having done a prior speculate() on this CPU
2542 * and (b) racing with another commit() on a different
2543 * CPU. There's nothing to do -- we just assert that
2546 ASSERT(src
->dtb_offset
== 0);
2549 case DTRACESPEC_ACTIVE
:
2550 new = DTRACESPEC_COMMITTING
;
2553 case DTRACESPEC_ACTIVEONE
:
2555 * This speculation is active on one CPU. If our
2556 * buffer offset is non-zero, we know that the one CPU
2557 * must be us. Otherwise, we are committing on a
2558 * different CPU from the speculate(), and we must
2559 * rely on being asynchronously cleaned.
2561 if (src
->dtb_offset
!= 0) {
2562 new = DTRACESPEC_COMMITTING
;
2567 case DTRACESPEC_ACTIVEMANY
:
2568 new = DTRACESPEC_COMMITTINGMANY
;
2574 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2575 current
, new) != current
);
2578 * We have set the state to indicate that we are committing this
2579 * speculation. Now reserve the necessary space in the destination
2582 if ((offs
= dtrace_buffer_reserve(dest
, src
->dtb_offset
,
2583 sizeof (uint64_t), state
, NULL
)) < 0) {
2584 dtrace_buffer_drop(dest
);
2589 * We have the space; copy the buffer across. (Note that this is a
2590 * highly subobtimal bcopy(); in the unlikely event that this becomes
2591 * a serious performance issue, a high-performance DTrace-specific
2592 * bcopy() should obviously be invented.)
2594 daddr
= (uintptr_t)dest
->dtb_tomax
+ offs
;
2595 dlimit
= daddr
+ src
->dtb_offset
;
2596 saddr
= (uintptr_t)src
->dtb_tomax
;
2599 * First, the aligned portion.
2601 while (dlimit
- daddr
>= sizeof (uint64_t)) {
2602 *((uint64_t *)daddr
) = *((uint64_t *)saddr
);
2604 daddr
+= sizeof (uint64_t);
2605 saddr
+= sizeof (uint64_t);
2609 * Now any left-over bit...
2611 while (dlimit
- daddr
)
2612 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++);
2615 * Finally, commit the reserved space in the destination buffer.
2617 dest
->dtb_offset
= offs
+ src
->dtb_offset
;
2621 * If we're lucky enough to be the only active CPU on this speculation
2622 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2624 if (current
== DTRACESPEC_ACTIVE
||
2625 (current
== DTRACESPEC_ACTIVEONE
&& new == DTRACESPEC_COMMITTING
)) {
2626 uint32_t rval
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2627 DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
);
2628 #pragma unused(rval) /* __APPLE__ */
2630 ASSERT(rval
== DTRACESPEC_COMMITTING
);
2633 src
->dtb_offset
= 0;
2634 src
->dtb_xamot_drops
+= src
->dtb_drops
;
2639 * This routine discards an active speculation. If the specified speculation
2640 * is not in a valid state to perform a discard(), this routine will silently
2641 * do nothing. The state of the specified speculation is transitioned
2642 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2645 dtrace_speculation_discard(dtrace_state_t
*state
, processorid_t cpu
,
2646 dtrace_specid_t which
)
2648 dtrace_speculation_t
*spec
;
2649 #if !defined(__APPLE__) /* Quiet compiler warning */
2650 dtrace_speculation_state_t current
, new;
2652 dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
;
2653 #endif /* __APPLE__ */
2654 dtrace_buffer_t
*buf
;
2659 #if !defined(__APPLE__) /* Quiet compiler warning */
2660 if (which
> state
->dts_nspeculations
) {
2661 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2665 if (which
> (dtrace_specid_t
)state
->dts_nspeculations
) {
2666 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2669 #endif /* __APPLE__ */
2671 spec
= &state
->dts_speculations
[which
- 1];
2672 buf
= &spec
->dtsp_buffer
[cpu
];
2675 current
= spec
->dtsp_state
;
2678 case DTRACESPEC_INACTIVE
:
2679 case DTRACESPEC_COMMITTINGMANY
:
2680 case DTRACESPEC_COMMITTING
:
2681 case DTRACESPEC_DISCARDING
:
2684 case DTRACESPEC_ACTIVE
:
2685 case DTRACESPEC_ACTIVEMANY
:
2686 new = DTRACESPEC_DISCARDING
;
2689 case DTRACESPEC_ACTIVEONE
:
2690 if (buf
->dtb_offset
!= 0) {
2691 new = DTRACESPEC_INACTIVE
;
2693 new = DTRACESPEC_DISCARDING
;
2700 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2701 current
, new) != current
);
2703 buf
->dtb_offset
= 0;
2708 * Note: not called from probe context. This function is called
2709 * asynchronously from cross call context to clean any speculations that are
2710 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2711 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2715 dtrace_speculation_clean_here(dtrace_state_t
*state
)
2717 dtrace_icookie_t cookie
;
2718 processorid_t cpu
= CPU
->cpu_id
;
2719 dtrace_buffer_t
*dest
= &state
->dts_buffer
[cpu
];
2722 cookie
= dtrace_interrupt_disable();
2724 if (dest
->dtb_tomax
== NULL
) {
2725 dtrace_interrupt_enable(cookie
);
2729 #if !defined(__APPLE__) /* Quiet compiler warning */
2730 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2732 for (i
= 0; i
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) {
2733 #endif /* __APPLE__ */
2734 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2735 dtrace_buffer_t
*src
= &spec
->dtsp_buffer
[cpu
];
2737 if (src
->dtb_tomax
== NULL
)
2740 if (spec
->dtsp_state
== DTRACESPEC_DISCARDING
) {
2741 src
->dtb_offset
= 0;
2745 if (spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2748 if (src
->dtb_offset
== 0)
2751 dtrace_speculation_commit(state
, cpu
, i
+ 1);
2754 dtrace_interrupt_enable(cookie
);
2758 * Note: not called from probe context. This function is called
2759 * asynchronously (and at a regular interval) to clean any speculations that
2760 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2761 * is work to be done, it cross calls all CPUs to perform that work;
2762 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2763 * INACTIVE state until they have been cleaned by all CPUs.
2766 dtrace_speculation_clean(dtrace_state_t
*state
)
2768 #if !defined(__APPLE__) /* Quiet compiler warning */
2773 #endif /* __APPLE__ */
2776 #if !defined(__APPLE__) /* Quiet compiler warning */
2777 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2779 for (i
= 0; i
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) {
2780 #endif /* __APPLE__ */
2781 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2783 ASSERT(!spec
->dtsp_cleaning
);
2785 if (spec
->dtsp_state
!= DTRACESPEC_DISCARDING
&&
2786 spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2790 spec
->dtsp_cleaning
= 1;
2796 dtrace_xcall(DTRACE_CPUALL
,
2797 (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
);
2800 * We now know that all CPUs have committed or discarded their
2801 * speculation buffers, as appropriate. We can now set the state
2804 #if !defined(__APPLE__) /* Quiet compiler warning */
2805 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
2807 for (i
= 0; i
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) {
2808 #endif /* __APPLE__ */
2809 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2810 dtrace_speculation_state_t current
, new;
2812 if (!spec
->dtsp_cleaning
)
2815 current
= spec
->dtsp_state
;
2816 ASSERT(current
== DTRACESPEC_DISCARDING
||
2817 current
== DTRACESPEC_COMMITTINGMANY
);
2819 new = DTRACESPEC_INACTIVE
;
2821 rv
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new);
2822 ASSERT(rv
== current
);
2823 spec
->dtsp_cleaning
= 0;
2828 * Called as part of a speculate() to get the speculative buffer associated
2829 * with a given speculation. Returns NULL if the specified speculation is not
2830 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2831 * the active CPU is not the specified CPU -- the speculation will be
2832 * atomically transitioned into the ACTIVEMANY state.
2834 static dtrace_buffer_t
*
2835 dtrace_speculation_buffer(dtrace_state_t
*state
, processorid_t cpuid
,
2836 dtrace_specid_t which
)
2838 dtrace_speculation_t
*spec
;
2839 #if !defined(__APPLE__) /* Quiet compiler warning */
2840 dtrace_speculation_state_t current
, new;
2842 dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
;
2843 #endif /* __APPLE__ */
2844 dtrace_buffer_t
*buf
;
2849 #if !defined(__APPLE__) /* Quiet compiler warning */
2850 if (which
> state
->dts_nspeculations
) {
2852 if (which
> (dtrace_specid_t
)state
->dts_nspeculations
) {
2853 #endif /* __APPLE__ */
2854 cpu_core
[cpuid
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2858 spec
= &state
->dts_speculations
[which
- 1];
2859 buf
= &spec
->dtsp_buffer
[cpuid
];
2862 current
= spec
->dtsp_state
;
2865 case DTRACESPEC_INACTIVE
:
2866 case DTRACESPEC_COMMITTINGMANY
:
2867 case DTRACESPEC_DISCARDING
:
2870 case DTRACESPEC_COMMITTING
:
2871 ASSERT(buf
->dtb_offset
== 0);
2874 case DTRACESPEC_ACTIVEONE
:
2876 * This speculation is currently active on one CPU.
2877 * Check the offset in the buffer; if it's non-zero,
2878 * that CPU must be us (and we leave the state alone).
2879 * If it's zero, assume that we're starting on a new
2880 * CPU -- and change the state to indicate that the
2881 * speculation is active on more than one CPU.
2883 if (buf
->dtb_offset
!= 0)
2886 new = DTRACESPEC_ACTIVEMANY
;
2889 case DTRACESPEC_ACTIVEMANY
:
2892 case DTRACESPEC_ACTIVE
:
2893 new = DTRACESPEC_ACTIVEONE
;
2899 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2900 current
, new) != current
);
2902 ASSERT(new == DTRACESPEC_ACTIVEONE
|| new == DTRACESPEC_ACTIVEMANY
);
2907 * Return a string. In the event that the user lacks the privilege to access
2908 * arbitrary kernel memory, we copy the string out to scratch memory so that we
2909 * don't fail access checking.
2911 * dtrace_dif_variable() uses this routine as a helper for various
2912 * builtin values such as 'execname' and 'probefunc.'
2914 #if defined(__APPLE__) /* Quiet compiler warning. */
2916 #endif /* __APPLE__ */
2918 dtrace_dif_varstr(uintptr_t addr
, dtrace_state_t
*state
,
2919 dtrace_mstate_t
*mstate
)
2921 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
2926 * The easy case: this probe is allowed to read all of memory, so
2927 * we can just return this as a vanilla pointer.
2929 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
2933 * This is the tougher case: we copy the string in question from
2934 * kernel memory into scratch memory and return it that way: this
2935 * ensures that we won't trip up when access checking tests the
2936 * BYREF return value.
2938 strsz
= dtrace_strlen((char *)addr
, size
) + 1;
2940 if (mstate
->dtms_scratch_ptr
+ strsz
>
2941 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
2942 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
2946 dtrace_strcpy((const void *)addr
, (void *)mstate
->dtms_scratch_ptr
,
2948 ret
= mstate
->dtms_scratch_ptr
;
2949 mstate
->dtms_scratch_ptr
+= strsz
;
2954 * This function implements the DIF emulator's variable lookups. The emulator
2955 * passes a reserved variable identifier and optional built-in array index.
2958 dtrace_dif_variable(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
, uint64_t v
,
2962 * If we're accessing one of the uncached arguments, we'll turn this
2963 * into a reference in the args array.
2965 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
) {
2966 ndx
= v
- DIF_VAR_ARG0
;
2972 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_ARGS
);
2973 if (ndx
>= sizeof (mstate
->dtms_arg
) /
2974 sizeof (mstate
->dtms_arg
[0])) {
2975 #if !defined(__APPLE__)
2976 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
2978 /* Account for introduction of __dtrace_probe() on xnu. */
2979 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
2980 #endif /* __APPLE__ */
2981 dtrace_provider_t
*pv
;
2984 pv
= mstate
->dtms_probe
->dtpr_provider
;
2985 if (pv
->dtpv_pops
.dtps_getargval
!= NULL
)
2986 val
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
,
2987 mstate
->dtms_probe
->dtpr_id
,
2988 mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
);
2989 #if defined(__APPLE__)
2990 /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */
2991 else if (mstate
->dtms_probe
->dtpr_id
== dtrace_probeid_error
&& ndx
== 5) {
2992 return ((dtrace_state_t
*)(uintptr_t)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
;
2994 #endif /* __APPLE__ */
2996 val
= dtrace_getarg(ndx
, aframes
);
2999 * This is regrettably required to keep the compiler
3000 * from tail-optimizing the call to dtrace_getarg().
3001 * The condition always evaluates to true, but the
3002 * compiler has no way of figuring that out a priori.
3003 * (None of this would be necessary if the compiler
3004 * could be relied upon to _always_ tail-optimize
3005 * the call to dtrace_getarg() -- but it can't.)
3007 if (mstate
->dtms_probe
!= NULL
)
3013 return (mstate
->dtms_arg
[ndx
]);
3015 #if !defined(__APPLE__)
3016 case DIF_VAR_UREGS
: {
3019 if (!dtrace_priv_proc(state
))
3022 if ((lwp
= curthread
->t_lwp
) == NULL
) {
3023 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
3024 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= NULL
;
3028 return (dtrace_getreg(lwp
->lwp_regs
, ndx
));
3031 case DIF_VAR_UREGS
: {
3034 if (!dtrace_priv_proc(state
))
3037 if ((thread
= current_thread()) == NULL
) {
3038 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
3039 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= 0;
3043 return (dtrace_getreg(find_user_regs(thread
), ndx
));
3045 #endif /* __APPLE__ */
3047 #if !defined(__APPLE__)
3048 case DIF_VAR_CURTHREAD
:
3049 if (!dtrace_priv_kernel(state
))
3051 return ((uint64_t)(uintptr_t)curthread
);
3053 case DIF_VAR_CURTHREAD
:
3054 if (!dtrace_priv_kernel(state
))
3057 return ((uint64_t)(uintptr_t)current_thread());
3058 #endif /* __APPLE__ */
3060 case DIF_VAR_TIMESTAMP
:
3061 if (!(mstate
->dtms_present
& DTRACE_MSTATE_TIMESTAMP
)) {
3062 mstate
->dtms_timestamp
= dtrace_gethrtime();
3063 mstate
->dtms_present
|= DTRACE_MSTATE_TIMESTAMP
;
3065 return (mstate
->dtms_timestamp
);
3067 #if !defined(__APPLE__)
3068 case DIF_VAR_VTIMESTAMP
:
3069 ASSERT(dtrace_vtime_references
!= 0);
3070 return (curthread
->t_dtrace_vtime
);
3072 case DIF_VAR_VTIMESTAMP
:
3073 ASSERT(dtrace_vtime_references
!= 0);
3074 return (dtrace_get_thread_vtime(current_thread()));
3075 #endif /* __APPLE__ */
3077 case DIF_VAR_WALLTIMESTAMP
:
3078 if (!(mstate
->dtms_present
& DTRACE_MSTATE_WALLTIMESTAMP
)) {
3079 mstate
->dtms_walltimestamp
= dtrace_gethrestime();
3080 mstate
->dtms_present
|= DTRACE_MSTATE_WALLTIMESTAMP
;
3082 return (mstate
->dtms_walltimestamp
);
3085 if (!dtrace_priv_kernel(state
))
3087 if (!(mstate
->dtms_present
& DTRACE_MSTATE_IPL
)) {
3088 mstate
->dtms_ipl
= dtrace_getipl();
3089 mstate
->dtms_present
|= DTRACE_MSTATE_IPL
;
3091 return (mstate
->dtms_ipl
);
3094 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_EPID
);
3095 return (mstate
->dtms_epid
);
3098 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3099 return (mstate
->dtms_probe
->dtpr_id
);
3101 case DIF_VAR_STACKDEPTH
:
3102 if (!dtrace_priv_kernel(state
))
3104 if (!(mstate
->dtms_present
& DTRACE_MSTATE_STACKDEPTH
)) {
3105 #if !defined(__APPLE__)
3106 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
3108 /* Account for introduction of __dtrace_probe() on xnu. */
3109 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
3110 #endif /* __APPLE__ */
3112 mstate
->dtms_stackdepth
= dtrace_getstackdepth(aframes
);
3113 mstate
->dtms_present
|= DTRACE_MSTATE_STACKDEPTH
;
3115 return (mstate
->dtms_stackdepth
);
3117 case DIF_VAR_USTACKDEPTH
:
3118 if (!dtrace_priv_proc(state
))
3120 if (!(mstate
->dtms_present
& DTRACE_MSTATE_USTACKDEPTH
)) {
3122 * See comment in DIF_VAR_PID.
3124 if (DTRACE_ANCHORED(mstate
->dtms_probe
) &&
3126 mstate
->dtms_ustackdepth
= 0;
3128 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3129 mstate
->dtms_ustackdepth
=
3130 dtrace_getustackdepth();
3131 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3133 mstate
->dtms_present
|= DTRACE_MSTATE_USTACKDEPTH
;
3135 return (mstate
->dtms_ustackdepth
);
3137 case DIF_VAR_CALLER
:
3138 if (!dtrace_priv_kernel(state
))
3140 if (!(mstate
->dtms_present
& DTRACE_MSTATE_CALLER
)) {
3141 #if !defined(__APPLE__)
3142 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 2;
3144 /* Account for introduction of __dtrace_probe() on xnu. */
3145 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
3146 #endif /* __APPLE__ */
3148 if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) {
3150 * If this is an unanchored probe, we are
3151 * required to go through the slow path:
3152 * dtrace_caller() only guarantees correct
3153 * results for anchored probes.
3157 dtrace_getpcstack(caller
, 2, aframes
,
3158 (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]);
3159 mstate
->dtms_caller
= caller
[1];
3160 } else if ((mstate
->dtms_caller
=
3161 #if !defined(__APPLE__) /* Quiet compiler warnings */
3162 dtrace_caller(aframes
)) == -1) {
3164 dtrace_caller(aframes
)) == (uintptr_t)-1) {
3165 #endif /* __APPLE__ */
3167 * We have failed to do this the quick way;
3168 * we must resort to the slower approach of
3169 * calling dtrace_getpcstack().
3173 dtrace_getpcstack(&caller
, 1, aframes
, NULL
);
3174 mstate
->dtms_caller
= caller
;
3177 mstate
->dtms_present
|= DTRACE_MSTATE_CALLER
;
3179 return (mstate
->dtms_caller
);
3181 case DIF_VAR_UCALLER
:
3182 if (!dtrace_priv_proc(state
))
3185 if (!(mstate
->dtms_present
& DTRACE_MSTATE_UCALLER
)) {
3189 * dtrace_getupcstack() fills in the first uint64_t
3190 * with the current PID. The second uint64_t will
3191 * be the program counter at user-level. The third
3192 * uint64_t will contain the caller, which is what
3196 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3197 dtrace_getupcstack(ustack
, 3);
3198 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3199 mstate
->dtms_ucaller
= ustack
[2];
3200 mstate
->dtms_present
|= DTRACE_MSTATE_UCALLER
;
3203 return (mstate
->dtms_ucaller
);
3205 case DIF_VAR_PROBEPROV
:
3206 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3207 return (dtrace_dif_varstr(
3208 (uintptr_t)mstate
->dtms_probe
->dtpr_provider
->dtpv_name
,
3211 case DIF_VAR_PROBEMOD
:
3212 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3213 return (dtrace_dif_varstr(
3214 (uintptr_t)mstate
->dtms_probe
->dtpr_mod
,
3217 case DIF_VAR_PROBEFUNC
:
3218 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3219 return (dtrace_dif_varstr(
3220 (uintptr_t)mstate
->dtms_probe
->dtpr_func
,
3223 case DIF_VAR_PROBENAME
:
3224 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3225 return (dtrace_dif_varstr(
3226 (uintptr_t)mstate
->dtms_probe
->dtpr_name
,
3229 #if !defined(__APPLE__)
3231 if (!dtrace_priv_proc(state
))
3235 * Note that we are assuming that an unanchored probe is
3236 * always due to a high-level interrupt. (And we're assuming
3237 * that there is only a single high level interrupt.)
3239 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3240 return (pid0
.pid_id
);
3243 * It is always safe to dereference one's own t_procp pointer:
3244 * it always points to a valid, allocated proc structure.
3245 * Further, it is always safe to dereference the p_pidp member
3246 * of one's own proc structure. (These are truisms becuase
3247 * threads and processes don't clean up their own state --
3248 * they leave that task to whomever reaps them.)
3250 return ((uint64_t)curthread
->t_procp
->p_pidp
->pid_id
);
3254 if (!dtrace_priv_proc_relaxed(state
))
3258 * Note that we are assuming that an unanchored probe is
3259 * always due to a high-level interrupt. (And we're assuming
3260 * that there is only a single high level interrupt.)
3262 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3263 /* Anchored probe that fires while on an interrupt accrues to process 0 */
3266 return ((uint64_t)dtrace_proc_selfpid());
3267 #endif /* __APPLE__ */
3269 #if !defined(__APPLE__)
3271 if (!dtrace_priv_proc(state
))
3275 * See comment in DIF_VAR_PID.
3277 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3278 return (pid0
.pid_id
);
3281 * It is always safe to dereference one's own t_procp pointer:
3282 * it always points to a valid, allocated proc structure.
3283 * (This is true because threads don't clean up their own
3284 * state -- they leave that task to whomever reaps them.)
3286 return ((uint64_t)curthread
->t_procp
->p_ppid
);
3289 if (!dtrace_priv_proc_relaxed(state
))
3293 * See comment in DIF_VAR_PID.
3295 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3298 return ((uint64_t)dtrace_proc_selfppid());
3299 #endif /* __APPLE__ */
3301 #if !defined(__APPLE__)
3304 * See comment in DIF_VAR_PID.
3306 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3309 return ((uint64_t)curthread
->t_tid
);
3312 /* We do not need to check for null current_thread() */
3313 return thread_tid(current_thread()); /* globally unique */
3315 case DIF_VAR_PTHREAD_SELF
:
3316 if (!dtrace_priv_proc(state
))
3319 /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */
3322 case DIF_VAR_DISPATCHQADDR
:
3323 if (!dtrace_priv_proc(state
))
3326 /* We do not need to check for null current_thread() */
3327 return thread_dispatchqaddr(current_thread());
3328 #endif /* __APPLE__ */
3330 #if !defined(__APPLE__)
3331 case DIF_VAR_EXECNAME
:
3332 if (!dtrace_priv_proc(state
))
3336 * See comment in DIF_VAR_PID.
3338 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3339 return ((uint64_t)(uintptr_t)p0
.p_user
.u_comm
);
3342 * It is always safe to dereference one's own t_procp pointer:
3343 * it always points to a valid, allocated proc structure.
3344 * (This is true because threads don't clean up their own
3345 * state -- they leave that task to whomever reaps them.)
3347 return (dtrace_dif_varstr(
3348 (uintptr_t)curthread
->t_procp
->p_user
.u_comm
,
3351 case DIF_VAR_EXECNAME
:
3353 char *xname
= (char *)mstate
->dtms_scratch_ptr
;
3354 size_t scratch_size
= MAXCOMLEN
+1;
3356 /* The scratch allocation's lifetime is that of the clause. */
3357 if (!DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3358 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3362 if (!dtrace_priv_proc_relaxed(state
))
3365 mstate
->dtms_scratch_ptr
+= scratch_size
;
3366 proc_selfname( xname
, MAXCOMLEN
);
3368 return ((uint64_t)(uintptr_t)xname
);
3370 #endif /* __APPLE__ */
3371 #if !defined(__APPLE__)
3372 case DIF_VAR_ZONENAME
:
3373 if (!dtrace_priv_proc(state
))
3377 * See comment in DIF_VAR_PID.
3379 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3380 return ((uint64_t)(uintptr_t)p0
.p_zone
->zone_name
);
3383 * It is always safe to dereference one's own t_procp pointer:
3384 * it always points to a valid, allocated proc structure.
3385 * (This is true because threads don't clean up their own
3386 * state -- they leave that task to whomever reaps them.)
3388 return (dtrace_dif_varstr(
3389 (uintptr_t)curthread
->t_procp
->p_zone
->zone_name
,
3393 case DIF_VAR_ZONENAME
:
3395 /* scratch_size is equal to length('global') + 1 for the null-terminator. */
3396 char *zname
= (char *)mstate
->dtms_scratch_ptr
;
3397 size_t scratch_size
= 6 + 1;
3399 if (!dtrace_priv_proc(state
))
3402 /* The scratch allocation's lifetime is that of the clause. */
3403 if (!DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3404 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3408 mstate
->dtms_scratch_ptr
+= scratch_size
;
3410 /* The kernel does not provide zonename, it will always return 'global'. */
3411 strlcpy(zname
, "global", scratch_size
);
3413 return ((uint64_t)(uintptr_t)zname
);
3415 #endif /* __APPLE__ */
3417 #if !defined(__APPLE__)
3419 if (!dtrace_priv_proc(state
))
3423 * See comment in DIF_VAR_PID.
3425 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3426 return ((uint64_t)p0
.p_cred
->cr_uid
);
3429 * It is always safe to dereference one's own t_procp pointer:
3430 * it always points to a valid, allocated proc structure.
3431 * (This is true because threads don't clean up their own
3432 * state -- they leave that task to whomever reaps them.)
3434 * Additionally, it is safe to dereference one's own process
3435 * credential, since this is never NULL after process birth.
3437 return ((uint64_t)curthread
->t_procp
->p_cred
->cr_uid
);
3440 if (!dtrace_priv_proc_relaxed(state
))
3444 * See comment in DIF_VAR_PID.
3446 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3449 return ((uint64_t) dtrace_proc_selfruid());
3450 #endif /* __APPLE__ */
3452 #if !defined(__APPLE__)
3454 if (!dtrace_priv_proc(state
))
3458 * See comment in DIF_VAR_PID.
3460 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3461 return ((uint64_t)p0
.p_cred
->cr_gid
);
3464 * It is always safe to dereference one's own t_procp pointer:
3465 * it always points to a valid, allocated proc structure.
3466 * (This is true because threads don't clean up their own
3467 * state -- they leave that task to whomever reaps them.)
3469 * Additionally, it is safe to dereference one's own process
3470 * credential, since this is never NULL after process birth.
3472 return ((uint64_t)curthread
->t_procp
->p_cred
->cr_gid
);
3475 if (!dtrace_priv_proc(state
))
3479 * See comment in DIF_VAR_PID.
3481 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3484 if (dtrace_CRED() != NULL
)
3485 /* Credential does not require lazy initialization. */
3486 return ((uint64_t)kauth_getgid());
3488 /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */
3489 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3492 #endif /* __APPLE__ */
3494 #if !defined(__APPLE__)
3495 case DIF_VAR_ERRNO
: {
3497 if (!dtrace_priv_proc(state
))
3501 * See comment in DIF_VAR_PID.
3503 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3507 * It is always safe to dereference one's own t_lwp pointer in
3508 * the event that this pointer is non-NULL. (This is true
3509 * because threads and lwps don't clean up their own state --
3510 * they leave that task to whomever reaps them.)
3512 if ((lwp
= curthread
->t_lwp
) == NULL
)
3515 return ((uint64_t)lwp
->lwp_errno
);
3518 case DIF_VAR_ERRNO
: {
3519 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
3520 if (!dtrace_priv_proc(state
))
3524 * See comment in DIF_VAR_PID.
3526 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3530 return (uint64_t)uthread
->t_dtrace_errno
;
3532 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3536 #endif /* __APPLE__ */
3539 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3545 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3546 * Notice that we don't bother validating the proper number of arguments or
3547 * their types in the tuple stack. This isn't needed because all argument
3548 * interpretation is safe because of our load safety -- the worst that can
3549 * happen is that a bogus program can obtain bogus results.
3552 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
,
3553 dtrace_key_t
*tupregs
, int nargs
,
3554 dtrace_mstate_t
*mstate
, dtrace_state_t
*state
)
3556 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
3557 #if !defined(__APPLE__)
3558 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
3560 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
3561 #endif /* __APPLE__ */
3562 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
3564 #if !defined(__APPLE__)
3575 /* FIXME: awaits lock/mutex work */
3576 #endif /* __APPLE__ */
3580 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3583 #if !defined(__APPLE__)
3584 case DIF_SUBR_MUTEX_OWNED
:
3585 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3591 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3592 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
))
3593 regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
;
3595 regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
);
3598 case DIF_SUBR_MUTEX_OWNER
:
3599 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3605 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3606 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) &&
3607 MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
)
3608 regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
);
3613 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
:
3614 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3620 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3621 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
);
3624 case DIF_SUBR_MUTEX_TYPE_SPIN
:
3625 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3631 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3632 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
);
3635 case DIF_SUBR_RW_READ_HELD
: {
3638 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (uintptr_t),
3644 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3645 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
);
3649 case DIF_SUBR_RW_WRITE_HELD
:
3650 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
),
3656 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3657 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
);
3660 case DIF_SUBR_RW_ISWRITER
:
3661 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
),
3667 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3668 regs
[rd
] = _RW_ISWRITER(&r
.ri
);
3671 /* FIXME: awaits lock/mutex work */
3672 #endif /* __APPLE__ */
3674 case DIF_SUBR_BCOPY
: {
3676 * We need to be sure that the destination is in the scratch
3677 * region -- no other region is allowed.
3679 uintptr_t src
= tupregs
[0].dttk_value
;
3680 uintptr_t dest
= tupregs
[1].dttk_value
;
3681 size_t size
= tupregs
[2].dttk_value
;
3683 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3684 *flags
|= CPU_DTRACE_BADADDR
;
3689 if (!dtrace_canload(src
, size
, mstate
, vstate
)) {
3694 dtrace_bcopy((void *)src
, (void *)dest
, size
);
3698 case DIF_SUBR_ALLOCA
:
3699 case DIF_SUBR_COPYIN
: {
3700 uintptr_t dest
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
3702 tupregs
[subr
== DIF_SUBR_ALLOCA
? 0 : 1].dttk_value
;
3703 size_t scratch_size
= (dest
- mstate
->dtms_scratch_ptr
) + size
;
3706 * This action doesn't require any credential checks since
3707 * probes will not activate in user contexts to which the
3708 * enabling user does not have permissions.
3712 * Rounding up the user allocation size could have overflowed
3713 * a large, bogus allocation (like -1ULL) to 0.
3715 if (scratch_size
< size
||
3716 !DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3717 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3722 if (subr
== DIF_SUBR_COPYIN
) {
3723 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3724 #if !defined(__APPLE__)
3725 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3727 if (dtrace_priv_proc(state
))
3728 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3729 #endif /* __APPLE__ */
3730 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3733 mstate
->dtms_scratch_ptr
+= scratch_size
;
3738 case DIF_SUBR_COPYINTO
: {
3739 uint64_t size
= tupregs
[1].dttk_value
;
3740 uintptr_t dest
= tupregs
[2].dttk_value
;
3743 * This action doesn't require any credential checks since
3744 * probes will not activate in user contexts to which the
3745 * enabling user does not have permissions.
3747 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3748 *flags
|= CPU_DTRACE_BADADDR
;
3753 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3754 #if !defined(__APPLE__)
3755 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3757 if (dtrace_priv_proc(state
))
3758 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3759 #endif /* __APPLE__ */
3760 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3764 case DIF_SUBR_COPYINSTR
: {
3765 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3766 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3768 if (nargs
> 1 && tupregs
[1].dttk_value
< size
)
3769 size
= tupregs
[1].dttk_value
+ 1;
3772 * This action doesn't require any credential checks since
3773 * probes will not activate in user contexts to which the
3774 * enabling user does not have permissions.
3776 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3777 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3782 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3783 #if !defined(__APPLE__)
3784 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
, flags
);
3786 if (dtrace_priv_proc(state
))
3787 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
, flags
);
3788 #endif /* __APPLE__ */
3789 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3791 ((char *)dest
)[size
- 1] = '\0';
3792 mstate
->dtms_scratch_ptr
+= size
;
3797 #if !defined(__APPLE__)
3798 case DIF_SUBR_MSGSIZE
:
3799 case DIF_SUBR_MSGDSIZE
: {
3800 uintptr_t baddr
= tupregs
[0].dttk_value
, daddr
;
3801 uintptr_t wptr
, rptr
;
3805 while (baddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
3807 if (!dtrace_canload(baddr
, sizeof (mblk_t
), mstate
,
3813 wptr
= dtrace_loadptr(baddr
+
3814 offsetof(mblk_t
, b_wptr
));
3816 rptr
= dtrace_loadptr(baddr
+
3817 offsetof(mblk_t
, b_rptr
));
3820 *flags
|= CPU_DTRACE_BADADDR
;
3821 *illval
= tupregs
[0].dttk_value
;
3825 daddr
= dtrace_loadptr(baddr
+
3826 offsetof(mblk_t
, b_datap
));
3828 baddr
= dtrace_loadptr(baddr
+
3829 offsetof(mblk_t
, b_cont
));
3832 * We want to prevent against denial-of-service here,
3833 * so we're only going to search the list for
3834 * dtrace_msgdsize_max mblks.
3836 if (cont
++ > dtrace_msgdsize_max
) {
3837 *flags
|= CPU_DTRACE_ILLOP
;
3841 if (subr
== DIF_SUBR_MSGDSIZE
) {
3842 if (dtrace_load8(daddr
+
3843 offsetof(dblk_t
, db_type
)) != M_DATA
)
3847 count
+= wptr
- rptr
;
3850 if (!(*flags
& CPU_DTRACE_FAULT
))
3856 case DIF_SUBR_MSGSIZE
:
3857 case DIF_SUBR_MSGDSIZE
: {
3858 /* Darwin does not implement SysV streams messages */
3859 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3863 #endif /* __APPLE__ */
3865 #if !defined(__APPLE__)
3866 case DIF_SUBR_PROGENYOF
: {
3867 pid_t pid
= tupregs
[0].dttk_value
;
3871 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3873 for (p
= curthread
->t_procp
; p
!= NULL
; p
= p
->p_parent
) {
3874 if (p
->p_pidp
->pid_id
== pid
) {
3880 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3886 case DIF_SUBR_PROGENYOF
: {
3887 pid_t pid
= tupregs
[0].dttk_value
;
3888 struct proc
*p
= current_proc();
3889 int rval
= 0, lim
= nprocs
;
3891 while(p
&& (lim
-- > 0)) {
3894 ppid
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
));
3895 if (*flags
& CPU_DTRACE_FAULT
)
3904 break; /* Can't climb process tree any further. */
3906 p
= (struct proc
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
));
3907 if (*flags
& CPU_DTRACE_FAULT
)
3914 #endif /* __APPLE__ */
3916 case DIF_SUBR_SPECULATION
:
3917 regs
[rd
] = dtrace_speculation(state
);
3920 #if !defined(__APPLE__)
3921 case DIF_SUBR_COPYOUT
: {
3922 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3923 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3924 uint64_t size
= tupregs
[2].dttk_value
;
3926 if (!dtrace_destructive_disallow
&&
3927 dtrace_priv_proc_control(state
) &&
3928 !dtrace_istoxic(kaddr
, size
)) {
3929 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3930 dtrace_copyout(kaddr
, uaddr
, size
, flags
);
3931 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3936 case DIF_SUBR_COPYOUTSTR
: {
3937 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3938 uintptr_t uaddr
= tupregs
[1].dttk_value
;
3939 uint64_t size
= tupregs
[2].dttk_value
;
3941 if (!dtrace_destructive_disallow
&&
3942 dtrace_priv_proc_control(state
) &&
3943 !dtrace_istoxic(kaddr
, size
)) {
3944 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3945 dtrace_copyoutstr(kaddr
, uaddr
, size
, flags
);
3946 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3951 case DIF_SUBR_COPYOUT
: {
3952 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3953 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3954 uint64_t size
= tupregs
[2].dttk_value
;
3956 if (!dtrace_destructive_disallow
&&
3957 dtrace_priv_proc_control(state
) &&
3958 !dtrace_istoxic(kaddr
, size
)) {
3959 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3960 dtrace_copyout(kaddr
, uaddr
, size
, flags
);
3961 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3966 case DIF_SUBR_COPYOUTSTR
: {
3967 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3968 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3969 uint64_t size
= tupregs
[2].dttk_value
;
3971 if (!dtrace_destructive_disallow
&&
3972 dtrace_priv_proc_control(state
) &&
3973 !dtrace_istoxic(kaddr
, size
)) {
3974 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3975 dtrace_copyoutstr(kaddr
, uaddr
, size
, flags
);
3976 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3980 #endif /* __APPLE__ */
3982 case DIF_SUBR_STRLEN
: {
3984 uintptr_t addr
= (uintptr_t)tupregs
[0].dttk_value
;
3985 sz
= dtrace_strlen((char *)addr
,
3986 state
->dts_options
[DTRACEOPT_STRSIZE
]);
3988 if (!dtrace_canload(addr
, sz
+ 1, mstate
, vstate
)) {
3998 case DIF_SUBR_STRCHR
:
3999 case DIF_SUBR_STRRCHR
: {
4001 * We're going to iterate over the string looking for the
4002 * specified character. We will iterate until we have reached
4003 * the string length or we have found the character. If this
4004 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
4005 * of the specified character instead of the first.
4007 uintptr_t saddr
= tupregs
[0].dttk_value
;
4008 uintptr_t addr
= tupregs
[0].dttk_value
;
4009 uintptr_t limit
= addr
+ state
->dts_options
[DTRACEOPT_STRSIZE
];
4010 char c
, target
= (char)tupregs
[1].dttk_value
;
4012 for (regs
[rd
] = NULL
; addr
< limit
; addr
++) {
4013 if ((c
= dtrace_load8(addr
)) == target
) {
4016 if (subr
== DIF_SUBR_STRCHR
)
4024 if (!dtrace_canload(saddr
, addr
- saddr
, mstate
, vstate
)) {
4032 case DIF_SUBR_STRSTR
:
4033 case DIF_SUBR_INDEX
:
4034 case DIF_SUBR_RINDEX
: {
4036 * We're going to iterate over the string looking for the
4037 * specified string. We will iterate until we have reached
4038 * the string length or we have found the string. (Yes, this
4039 * is done in the most naive way possible -- but considering
4040 * that the string we're searching for is likely to be
4041 * relatively short, the complexity of Rabin-Karp or similar
4042 * hardly seems merited.)
4044 char *addr
= (char *)(uintptr_t)tupregs
[0].dttk_value
;
4045 char *substr
= (char *)(uintptr_t)tupregs
[1].dttk_value
;
4046 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4047 size_t len
= dtrace_strlen(addr
, size
);
4048 size_t sublen
= dtrace_strlen(substr
, size
);
4049 char *limit
= addr
+ len
, *orig
= addr
;
4050 int notfound
= subr
== DIF_SUBR_STRSTR
? 0 : -1;
4053 regs
[rd
] = notfound
;
4055 if (!dtrace_canload((uintptr_t)addr
, len
+ 1, mstate
, vstate
)) {
4060 if (!dtrace_canload((uintptr_t)substr
, sublen
+ 1, mstate
,
4067 * strstr() and index()/rindex() have similar semantics if
4068 * both strings are the empty string: strstr() returns a
4069 * pointer to the (empty) string, and index() and rindex()
4070 * both return index 0 (regardless of any position argument).
4072 if (sublen
== 0 && len
== 0) {
4073 if (subr
== DIF_SUBR_STRSTR
)
4074 regs
[rd
] = (uintptr_t)addr
;
4080 if (subr
!= DIF_SUBR_STRSTR
) {
4081 if (subr
== DIF_SUBR_RINDEX
) {
4088 * Both index() and rindex() take an optional position
4089 * argument that denotes the starting position.
4092 int64_t pos
= (int64_t)tupregs
[2].dttk_value
;
4095 * If the position argument to index() is
4096 * negative, Perl implicitly clamps it at
4097 * zero. This semantic is a little surprising
4098 * given the special meaning of negative
4099 * positions to similar Perl functions like
4100 * substr(), but it appears to reflect a
4101 * notion that index() can start from a
4102 * negative index and increment its way up to
4103 * the string. Given this notion, Perl's
4104 * rindex() is at least self-consistent in
4105 * that it implicitly clamps positions greater
4106 * than the string length to be the string
4107 * length. Where Perl completely loses
4108 * coherence, however, is when the specified
4109 * substring is the empty string (""). In
4110 * this case, even if the position is
4111 * negative, rindex() returns 0 -- and even if
4112 * the position is greater than the length,
4113 * index() returns the string length. These
4114 * semantics violate the notion that index()
4115 * should never return a value less than the
4116 * specified position and that rindex() should
4117 * never return a value greater than the
4118 * specified position. (One assumes that
4119 * these semantics are artifacts of Perl's
4120 * implementation and not the results of
4121 * deliberate design -- it beggars belief that
4122 * even Larry Wall could desire such oddness.)
4123 * While in the abstract one would wish for
4124 * consistent position semantics across
4125 * substr(), index() and rindex() -- or at the
4126 * very least self-consistent position
4127 * semantics for index() and rindex() -- we
4128 * instead opt to keep with the extant Perl
4129 * semantics, in all their broken glory. (Do
4130 * we have more desire to maintain Perl's
4131 * semantics than Perl does? Probably.)
4133 if (subr
== DIF_SUBR_RINDEX
) {
4140 #if !defined(__APPLE__) /* Quiet compiler warnings */
4143 if ((size_t)pos
> len
)
4144 #endif /* __APPLE__ */
4150 #if !defined(__APPLE__) /* Quiet compiler warnings */
4153 if ((size_t)pos
>= len
) {
4154 #endif /* __APPLE__ */
4165 for (regs
[rd
] = notfound
; addr
!= limit
; addr
+= inc
) {
4166 if (dtrace_strncmp(addr
, substr
, sublen
) == 0) {
4167 if (subr
!= DIF_SUBR_STRSTR
) {
4169 * As D index() and rindex() are
4170 * modeled on Perl (and not on awk),
4171 * we return a zero-based (and not a
4172 * one-based) index. (For you Perl
4173 * weenies: no, we're not going to add
4174 * $[ -- and shouldn't you be at a con
4177 regs
[rd
] = (uintptr_t)(addr
- orig
);
4181 ASSERT(subr
== DIF_SUBR_STRSTR
);
4182 regs
[rd
] = (uintptr_t)addr
;
4190 case DIF_SUBR_STRTOK
: {
4191 uintptr_t addr
= tupregs
[0].dttk_value
;
4192 uintptr_t tokaddr
= tupregs
[1].dttk_value
;
4193 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4194 uintptr_t limit
, toklimit
= tokaddr
+ size
;
4195 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
4196 #if !defined(__APPLE__) /* Quiet compiler warnings */
4197 uint8_t c
, tokmap
[32]; /* 256 / 8 */
4200 uint8_t c
='\0', tokmap
[32]; /* 256 / 8 */
4202 #endif /* __APPLE__ */
4205 * Check both the token buffer and (later) the input buffer,
4206 * since both could be non-scratch addresses.
4208 if (!dtrace_strcanload(tokaddr
, size
, mstate
, vstate
)) {
4213 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4214 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4221 * If the address specified is NULL, we use our saved
4222 * strtok pointer from the mstate. Note that this
4223 * means that the saved strtok pointer is _only_
4224 * valid within multiple enablings of the same probe --
4225 * it behaves like an implicit clause-local variable.
4227 addr
= mstate
->dtms_strtok
;
4230 * If the user-specified address is non-NULL we must
4231 * access check it. This is the only time we have
4232 * a chance to do so, since this address may reside
4233 * in the string table of this clause-- future calls
4234 * (when we fetch addr from mstate->dtms_strtok)
4235 * would fail this access check.
4237 if (!dtrace_strcanload(addr
, size
, mstate
, vstate
)) {
4244 * First, zero the token map, and then process the token
4245 * string -- setting a bit in the map for every character
4246 * found in the token string.
4248 for (i
= 0; i
< (int)sizeof (tokmap
); i
++)
4251 for (; tokaddr
< toklimit
; tokaddr
++) {
4252 if ((c
= dtrace_load8(tokaddr
)) == '\0')
4255 ASSERT((c
>> 3) < sizeof (tokmap
));
4256 tokmap
[c
>> 3] |= (1 << (c
& 0x7));
4259 for (limit
= addr
+ size
; addr
< limit
; addr
++) {
4261 * We're looking for a character that is _not_ contained
4262 * in the token string.
4264 if ((c
= dtrace_load8(addr
)) == '\0')
4267 if (!(tokmap
[c
>> 3] & (1 << (c
& 0x7))))
4273 * We reached the end of the string without finding
4274 * any character that was not in the token string.
4275 * We return NULL in this case, and we set the saved
4276 * address to NULL as well.
4279 mstate
->dtms_strtok
= NULL
;
4284 * From here on, we're copying into the destination string.
4286 for (i
= 0; addr
< limit
&& i
< size
- 1; addr
++) {
4287 if ((c
= dtrace_load8(addr
)) == '\0')
4290 if (tokmap
[c
>> 3] & (1 << (c
& 0x7)))
4299 regs
[rd
] = (uintptr_t)dest
;
4300 mstate
->dtms_scratch_ptr
+= size
;
4301 mstate
->dtms_strtok
= addr
;
4305 case DIF_SUBR_SUBSTR
: {
4306 uintptr_t s
= tupregs
[0].dttk_value
;
4307 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4308 char *d
= (char *)mstate
->dtms_scratch_ptr
;
4309 int64_t index
= (int64_t)tupregs
[1].dttk_value
;
4310 int64_t remaining
= (int64_t)tupregs
[2].dttk_value
;
4311 size_t len
= dtrace_strlen((char *)s
, size
);
4314 if (!dtrace_canload(s
, len
+ 1, mstate
, vstate
)) {
4319 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4320 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4326 remaining
= (int64_t)size
;
4331 if (index
< 0 && index
+ remaining
> 0) {
4337 #if !defined(__APPLE__) /* Quiet compiler warnings */
4338 if (index
>= len
|| index
< 0) {
4340 } else if (remaining
< 0) {
4341 remaining
+= len
- index
;
4342 } else if (index
+ remaining
> size
) {
4343 remaining
= size
- index
;
4346 if ((size_t)index
>= len
|| index
< 0) {
4348 } else if (remaining
< 0) {
4349 remaining
+= len
- index
;
4350 } else if ((uint64_t)index
+ (uint64_t)remaining
> size
) {
4351 remaining
= size
- index
;
4353 #endif /* __APPLE__ */
4354 for (i
= 0; i
< remaining
; i
++) {
4355 if ((d
[i
] = dtrace_load8(s
+ index
+ i
)) == '\0')
4361 mstate
->dtms_scratch_ptr
+= size
;
4362 regs
[rd
] = (uintptr_t)d
;
4366 #if !defined(__APPLE__)
4367 case DIF_SUBR_GETMAJOR
:
4369 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR64
) & MAXMAJ64
;
4371 regs
[rd
] = (tupregs
[0].dttk_value
>> NBITSMINOR
) & MAXMAJ
;
4375 #else /* __APPLE__ */
4376 case DIF_SUBR_GETMAJOR
:
4377 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value
);
4379 #endif /* __APPLE__ */
4381 #if !defined(__APPLE__)
4382 case DIF_SUBR_GETMINOR
:
4384 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN64
;
4386 regs
[rd
] = tupregs
[0].dttk_value
& MAXMIN
;
4390 #else /* __APPLE__ */
4391 case DIF_SUBR_GETMINOR
:
4392 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value
);
4394 #endif /* __APPLE__ */
4396 #if !defined(__APPLE__)
4397 case DIF_SUBR_DDI_PATHNAME
: {
4399 * This one is a galactic mess. We are going to roughly
4400 * emulate ddi_pathname(), but it's made more complicated
4401 * by the fact that we (a) want to include the minor name and
4402 * (b) must proceed iteratively instead of recursively.
4404 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
4405 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4406 char *start
= (char *)dest
, *end
= start
+ size
- 1;
4407 uintptr_t daddr
= tupregs
[0].dttk_value
;
4408 int64_t minor
= (int64_t)tupregs
[1].dttk_value
;
4410 int i
, len
, depth
= 0;
4413 * Due to all the pointer jumping we do and context we must
4414 * rely upon, we just mandate that the user must have kernel
4415 * read privileges to use this routine.
4417 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) == 0) {
4418 *flags
|= CPU_DTRACE_KPRIV
;
4423 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4424 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4432 * We want to have a name for the minor. In order to do this,
4433 * we need to walk the minor list from the devinfo. We want
4434 * to be sure that we don't infinitely walk a circular list,
4435 * so we check for circularity by sending a scout pointer
4436 * ahead two elements for every element that we iterate over;
4437 * if the list is circular, these will ultimately point to the
4438 * same element. You may recognize this little trick as the
4439 * answer to a stupid interview question -- one that always
4440 * seems to be asked by those who had to have it laboriously
4441 * explained to them, and who can't even concisely describe
4442 * the conditions under which one would be forced to resort to
4443 * this technique. Needless to say, those conditions are
4444 * found here -- and probably only here. Is this the only use
4445 * of this infamous trick in shipping, production code? If it
4446 * isn't, it probably should be...
4449 uintptr_t maddr
= dtrace_loadptr(daddr
+
4450 offsetof(struct dev_info
, devi_minor
));
4452 uintptr_t next
= offsetof(struct ddi_minor_data
, next
);
4453 uintptr_t name
= offsetof(struct ddi_minor_data
,
4454 d_minor
) + offsetof(struct ddi_minor
, name
);
4455 uintptr_t dev
= offsetof(struct ddi_minor_data
,
4456 d_minor
) + offsetof(struct ddi_minor
, dev
);
4460 scout
= dtrace_loadptr(maddr
+ next
);
4462 while (maddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4465 m
= dtrace_load64(maddr
+ dev
) & MAXMIN64
;
4467 m
= dtrace_load32(maddr
+ dev
) & MAXMIN
;
4470 maddr
= dtrace_loadptr(maddr
+ next
);
4475 scout
= dtrace_loadptr(scout
+ next
);
4480 scout
= dtrace_loadptr(scout
+ next
);
4485 if (scout
== maddr
) {
4486 *flags
|= CPU_DTRACE_ILLOP
;
4494 * We have the minor data. Now we need to
4495 * copy the minor's name into the end of the
4498 s
= (char *)dtrace_loadptr(maddr
+ name
);
4499 len
= dtrace_strlen(s
, size
);
4501 if (*flags
& CPU_DTRACE_FAULT
)
4505 if ((end
-= (len
+ 1)) < start
)
4511 for (i
= 1; i
<= len
; i
++)
4512 end
[i
] = dtrace_load8((uintptr_t)s
++);
4517 while (daddr
!= NULL
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4518 ddi_node_state_t devi_state
;
4520 devi_state
= dtrace_load32(daddr
+
4521 offsetof(struct dev_info
, devi_node_state
));
4523 if (*flags
& CPU_DTRACE_FAULT
)
4526 if (devi_state
>= DS_INITIALIZED
) {
4527 s
= (char *)dtrace_loadptr(daddr
+
4528 offsetof(struct dev_info
, devi_addr
));
4529 len
= dtrace_strlen(s
, size
);
4531 if (*flags
& CPU_DTRACE_FAULT
)
4535 if ((end
-= (len
+ 1)) < start
)
4541 for (i
= 1; i
<= len
; i
++)
4542 end
[i
] = dtrace_load8((uintptr_t)s
++);
4546 * Now for the node name...
4548 s
= (char *)dtrace_loadptr(daddr
+
4549 offsetof(struct dev_info
, devi_node_name
));
4551 daddr
= dtrace_loadptr(daddr
+
4552 offsetof(struct dev_info
, devi_parent
));
4555 * If our parent is NULL (that is, if we're the root
4556 * node), we're going to use the special path
4562 len
= dtrace_strlen(s
, size
);
4563 if (*flags
& CPU_DTRACE_FAULT
)
4566 if ((end
-= (len
+ 1)) < start
)
4569 for (i
= 1; i
<= len
; i
++)
4570 end
[i
] = dtrace_load8((uintptr_t)s
++);
4573 if (depth
++ > dtrace_devdepth_max
) {
4574 *flags
|= CPU_DTRACE_ILLOP
;
4580 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4582 if (daddr
== NULL
) {
4583 regs
[rd
] = (uintptr_t)end
;
4584 mstate
->dtms_scratch_ptr
+= size
;
4590 case DIF_SUBR_DDI_PATHNAME
: {
4591 /* FIXME: awaits galactic disentanglement ;-} */
4592 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
4596 #endif /* __APPLE__ */
4598 case DIF_SUBR_STRJOIN
: {
4599 char *d
= (char *)mstate
->dtms_scratch_ptr
;
4600 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4601 uintptr_t s1
= tupregs
[0].dttk_value
;
4602 uintptr_t s2
= tupregs
[1].dttk_value
;
4603 #if !defined(__APPLE__) /* Quiet compiler warnings */
4607 #endif /* __APPLE__ */
4609 if (!dtrace_strcanload(s1
, size
, mstate
, vstate
) ||
4610 !dtrace_strcanload(s2
, size
, mstate
, vstate
)) {
4615 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4616 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4623 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4628 if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') {
4636 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4641 if ((d
[i
++] = dtrace_load8(s2
++)) == '\0')
4646 mstate
->dtms_scratch_ptr
+= i
;
4647 regs
[rd
] = (uintptr_t)d
;
4653 case DIF_SUBR_LLTOSTR
: {
4654 int64_t i
= (int64_t)tupregs
[0].dttk_value
;
4655 int64_t val
= i
< 0 ? i
* -1 : i
;
4656 uint64_t size
= 22; /* enough room for 2^64 in decimal */
4657 char *end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4659 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4660 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4665 for (*end
-- = '\0'; val
; val
/= 10)
4666 *end
-- = '0' + (val
% 10);
4674 regs
[rd
] = (uintptr_t)end
+ 1;
4675 mstate
->dtms_scratch_ptr
+= size
;
4679 case DIF_SUBR_HTONS
:
4680 case DIF_SUBR_NTOHS
:
4682 regs
[rd
] = (uint16_t)tupregs
[0].dttk_value
;
4684 regs
[rd
] = DT_BSWAP_16((uint16_t)tupregs
[0].dttk_value
);
4689 case DIF_SUBR_HTONL
:
4690 case DIF_SUBR_NTOHL
:
4692 regs
[rd
] = (uint32_t)tupregs
[0].dttk_value
;
4694 regs
[rd
] = DT_BSWAP_32((uint32_t)tupregs
[0].dttk_value
);
4699 case DIF_SUBR_HTONLL
:
4700 case DIF_SUBR_NTOHLL
:
4702 regs
[rd
] = (uint64_t)tupregs
[0].dttk_value
;
4704 regs
[rd
] = DT_BSWAP_64((uint64_t)tupregs
[0].dttk_value
);
4709 case DIF_SUBR_DIRNAME
:
4710 case DIF_SUBR_BASENAME
: {
4711 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
4712 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4713 uintptr_t src
= tupregs
[0].dttk_value
;
4714 int i
, j
, len
= dtrace_strlen((char *)src
, size
);
4715 int lastbase
= -1, firstbase
= -1, lastdir
= -1;
4718 if (!dtrace_canload(src
, len
+ 1, mstate
, vstate
)) {
4723 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4724 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4730 * The basename and dirname for a zero-length string is
4735 src
= (uintptr_t)".";
4739 * Start from the back of the string, moving back toward the
4740 * front until we see a character that isn't a slash. That
4741 * character is the last character in the basename.
4743 for (i
= len
- 1; i
>= 0; i
--) {
4744 if (dtrace_load8(src
+ i
) != '/')
4752 * Starting from the last character in the basename, move
4753 * towards the front until we find a slash. The character
4754 * that we processed immediately before that is the first
4755 * character in the basename.
4757 for (; i
>= 0; i
--) {
4758 if (dtrace_load8(src
+ i
) == '/')
4766 * Now keep going until we find a non-slash character. That
4767 * character is the last character in the dirname.
4769 for (; i
>= 0; i
--) {
4770 if (dtrace_load8(src
+ i
) != '/')
4777 ASSERT(!(lastbase
== -1 && firstbase
!= -1));
4778 ASSERT(!(firstbase
== -1 && lastdir
!= -1));
4780 if (lastbase
== -1) {
4782 * We didn't find a non-slash character. We know that
4783 * the length is non-zero, so the whole string must be
4784 * slashes. In either the dirname or the basename
4785 * case, we return '/'.
4787 ASSERT(firstbase
== -1);
4788 firstbase
= lastbase
= lastdir
= 0;
4791 if (firstbase
== -1) {
4793 * The entire string consists only of a basename
4794 * component. If we're looking for dirname, we need
4795 * to change our string to be just "."; if we're
4796 * looking for a basename, we'll just set the first
4797 * character of the basename to be 0.
4799 if (subr
== DIF_SUBR_DIRNAME
) {
4800 ASSERT(lastdir
== -1);
4801 src
= (uintptr_t)".";
4808 if (subr
== DIF_SUBR_DIRNAME
) {
4809 if (lastdir
== -1) {
4811 * We know that we have a slash in the name --
4812 * or lastdir would be set to 0, above. And
4813 * because lastdir is -1, we know that this
4814 * slash must be the first character. (That
4815 * is, the full string must be of the form
4816 * "/basename".) In this case, the last
4817 * character of the directory name is 0.
4825 ASSERT(subr
== DIF_SUBR_BASENAME
);
4826 ASSERT(firstbase
!= -1 && lastbase
!= -1);
4831 #if !defined(__APPLE__) /* Quiet compiler warnings */
4832 for (i
= start
, j
= 0; i
<= end
&& j
< size
- 1; i
++, j
++)
4833 dest
[j
] = dtrace_load8(src
+ i
);
4835 for (i
= start
, j
= 0; i
<= end
&& (uint64_t)j
< size
- 1; i
++, j
++)
4836 dest
[j
] = dtrace_load8(src
+ i
);
4837 #endif /* __APPLE__ */
4840 regs
[rd
] = (uintptr_t)dest
;
4841 mstate
->dtms_scratch_ptr
+= size
;
4845 case DIF_SUBR_CLEANPATH
: {
4846 char *dest
= (char *)mstate
->dtms_scratch_ptr
, c
;
4847 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4848 uintptr_t src
= tupregs
[0].dttk_value
;
4851 if (!dtrace_strcanload(src
, size
, mstate
, vstate
)) {
4856 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4857 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4863 * Move forward, loading each character.
4866 c
= dtrace_load8(src
+ i
++);
4868 #if !defined(__APPLE__) /* Quiet compiler warnings */
4869 if (j
+ 5 >= size
) /* 5 = strlen("/..c\0") */
4872 if ((uint64_t)(j
+ 5) >= size
) /* 5 = strlen("/..c\0") */
4874 #endif /* __APPLE__ */
4881 c
= dtrace_load8(src
+ i
++);
4885 * We have two slashes -- we can just advance
4886 * to the next character.
4893 * This is not "." and it's not ".." -- we can
4894 * just store the "/" and this character and
4902 c
= dtrace_load8(src
+ i
++);
4906 * This is a "/./" component. We're not going
4907 * to store anything in the destination buffer;
4908 * we're just going to go to the next component.
4915 * This is not ".." -- we can just store the
4916 * "/." and this character and continue
4925 c
= dtrace_load8(src
+ i
++);
4927 if (c
!= '/' && c
!= '\0') {
4929 * This is not ".." -- it's "..[mumble]".
4930 * We'll store the "/.." and this character
4931 * and continue processing.
4941 * This is "/../" or "/..\0". We need to back up
4942 * our destination pointer until we find a "/".
4945 while (j
!= 0 && dest
[--j
] != '/')
4950 } while (c
!= '\0');
4953 regs
[rd
] = (uintptr_t)dest
;
4954 mstate
->dtms_scratch_ptr
+= size
;
4958 case DIF_SUBR_INET_NTOA
:
4959 case DIF_SUBR_INET_NTOA6
:
4960 case DIF_SUBR_INET_NTOP
: {
4965 if (subr
== DIF_SUBR_INET_NTOP
) {
4966 af
= (int)tupregs
[0].dttk_value
;
4969 af
= subr
== DIF_SUBR_INET_NTOA
? AF_INET
: AF_INET6
;
4973 if (af
== AF_INET
) {
4974 #if !defined(__APPLE__)
4978 #endif /* __APPLE__ */
4982 * Safely load the IPv4 address.
4984 #if !defined(__APPLE__)
4985 ip4
= dtrace_load32(tupregs
[argi
].dttk_value
);
4988 (void *)(uintptr_t)tupregs
[argi
].dttk_value
,
4989 (void *)(uintptr_t)&ip4
, sizeof (ip4
));
4990 #endif /* __APPLE__ */
4992 * Check an IPv4 string will fit in scratch.
4994 #if !defined(__APPLE__)
4995 size
= INET_ADDRSTRLEN
;
4997 size
= MAX_IPv4_STR_LEN
;
4998 #endif /* __APPLE__ */
4999 if (!DTRACE_INSCRATCH(mstate
, size
)) {
5000 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5004 base
= (char *)mstate
->dtms_scratch_ptr
;
5005 end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
5008 * Stringify as a dotted decimal quad.
5011 ptr8
= (uint8_t *)&ip4
;
5012 for (i
= 3; i
>= 0; i
--) {
5018 for (; val
; val
/= 10) {
5019 *end
-- = '0' + (val
% 10);
5026 ASSERT(end
+ 1 >= base
);
5028 } else if (af
== AF_INET6
) {
5029 #if defined(__APPLE__)
5030 #define _S6_un __u6_addr
5031 #define _S6_u8 __u6_addr8
5032 #endif /* __APPLE__ */
5033 struct in6_addr ip6
;
5034 int firstzero
, tryzero
, numzero
, v6end
;
5036 const char digits
[] = "0123456789abcdef";
5039 * Stringify using RFC 1884 convention 2 - 16 bit
5040 * hexadecimal values with a zero-run compression.
5041 * Lower case hexadecimal digits are used.
5042 * eg, fe80::214:4fff:fe0b:76c8.
5043 * The IPv4 embedded form is returned for inet_ntop,
5044 * just the IPv4 string is returned for inet_ntoa6.
5048 * Safely load the IPv6 address.
5051 (void *)(uintptr_t)tupregs
[argi
].dttk_value
,
5052 (void *)(uintptr_t)&ip6
, sizeof (struct in6_addr
));
5055 * Check an IPv6 string will fit in scratch.
5057 size
= INET6_ADDRSTRLEN
;
5058 if (!DTRACE_INSCRATCH(mstate
, size
)) {
5059 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5063 base
= (char *)mstate
->dtms_scratch_ptr
;
5064 end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
5068 * Find the longest run of 16 bit zero values
5069 * for the single allowed zero compression - "::".
5074 #if !defined(__APPLE__) /* Quiet compiler warnings */
5075 for (i
= 0; i
< sizeof (struct in6_addr
); i
++) {
5077 for (i
= 0; i
< (int)sizeof (struct in6_addr
); i
++) {
5078 #endif /* __APPLE__ */
5079 if (ip6
._S6_un
._S6_u8
[i
] == 0 &&
5080 tryzero
== -1 && i
% 2 == 0) {
5085 if (tryzero
!= -1 &&
5086 (ip6
._S6_un
._S6_u8
[i
] != 0 ||
5087 i
== sizeof (struct in6_addr
) - 1)) {
5089 if (i
- tryzero
<= numzero
) {
5094 firstzero
= tryzero
;
5095 numzero
= i
- i
% 2 - tryzero
;
5098 if (ip6
._S6_un
._S6_u8
[i
] == 0 &&
5099 i
== sizeof (struct in6_addr
) - 1)
5103 #if !defined(__APPLE__) /* Quiet compiler warnings */
5104 ASSERT(firstzero
+ numzero
<= sizeof (struct in6_addr
));
5106 ASSERT(firstzero
+ numzero
<= (int)sizeof (struct in6_addr
));
5107 #endif /* __APPLE__ */
5110 * Check for an IPv4 embedded address.
5112 v6end
= sizeof (struct in6_addr
) - 2;
5113 if (IN6_IS_ADDR_V4MAPPED(&ip6
) ||
5114 IN6_IS_ADDR_V4COMPAT(&ip6
)) {
5115 #if !defined(__APPLE__) /* Quiet compiler warnings */
5116 for (i
= sizeof (struct in6_addr
) - 1;
5117 i
>= DTRACE_V4MAPPED_OFFSET
; i
--) {
5119 for (i
= sizeof (struct in6_addr
) - 1;
5120 i
>= (int)DTRACE_V4MAPPED_OFFSET
; i
--) {
5121 #endif /* __APPLE__ */
5122 ASSERT(end
>= base
);
5124 val
= ip6
._S6_un
._S6_u8
[i
];
5129 for (; val
; val
/= 10) {
5130 *end
-- = '0' + val
% 10;
5134 #if !defined(__APPLE__) /* Quiet compiler warnings */
5135 if (i
> DTRACE_V4MAPPED_OFFSET
)
5138 if (i
> (int)DTRACE_V4MAPPED_OFFSET
)
5140 #endif /* __APPLE__ */
5143 if (subr
== DIF_SUBR_INET_NTOA6
)
5147 * Set v6end to skip the IPv4 address that
5148 * we have already stringified.
5154 * Build the IPv6 string by working through the
5155 * address in reverse.
5157 for (i
= v6end
; i
>= 0; i
-= 2) {
5158 ASSERT(end
>= base
);
5160 if (i
== firstzero
+ numzero
- 2) {
5167 if (i
< 14 && i
!= firstzero
- 2)
5170 val
= (ip6
._S6_un
._S6_u8
[i
] << 8) +
5171 ip6
._S6_un
._S6_u8
[i
+ 1];
5176 for (; val
; val
/= 16) {
5177 *end
-- = digits
[val
% 16];
5181 ASSERT(end
+ 1 >= base
);
5183 #if defined(__APPLE__)
5186 #endif /* __APPLE__ */
5189 * The user didn't use AH_INET or AH_INET6.
5191 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5196 inetout
: regs
[rd
] = (uintptr_t)end
+ 1;
5197 mstate
->dtms_scratch_ptr
+= size
;
5203 /* CoreProfile callback ('core_profile(uint64_t, [uint64_t], [uint64_t] ...)') */
5204 case DIF_SUBR_COREPROFILE
: {
5205 uint64_t selector
= tupregs
[0].dttk_value
;
5206 uint64_t args
[DIF_DTR_NREGS
-1] = {0ULL};
5208 uint32_t count
= (uint32_t)nargs
;
5211 regs
[rd
] = KERN_FAILURE
;
5215 if(count
> DIF_DTR_NREGS
)
5216 count
= DIF_DTR_NREGS
;
5218 /* copy in any variadic argument list, bounded by DIF_DTR_NREGS */
5219 for(ii
= 0; ii
< count
-1; ii
++) {
5220 args
[ii
] = tupregs
[ii
+1].dttk_value
;
5224 chudxnu_dtrace_callback(selector
, args
, count
-1);
5225 if(KERN_SUCCESS
!= ret
) {
5233 #endif /* __APPLE__ */
5239 * Emulate the execution of DTrace IR instructions specified by the given
5240 * DIF object. This function is deliberately void of assertions as all of
5241 * the necessary checks are handled by a call to dtrace_difo_validate().
5244 dtrace_dif_emulate(dtrace_difo_t
*difo
, dtrace_mstate_t
*mstate
,
5245 dtrace_vstate_t
*vstate
, dtrace_state_t
*state
)
5247 const dif_instr_t
*text
= difo
->dtdo_buf
;
5248 const uint_t textlen
= difo
->dtdo_len
;
5249 const char *strtab
= difo
->dtdo_strtab
;
5250 const uint64_t *inttab
= difo
->dtdo_inttab
;
5253 dtrace_statvar_t
*svar
;
5254 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
5256 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
5257 #if !defined(__APPLE__)
5258 volatile uintptr_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
5260 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
5261 #endif /* __APPLE__ */
5263 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
5264 uint64_t regs
[DIF_DIR_NREGS
];
5267 uint8_t cc_n
= 0, cc_z
= 0, cc_v
= 0, cc_c
= 0;
5269 #if !defined(__APPLE__) /* Quiet compiler warnings */
5270 uint_t pc
= 0, id
, opc
;
5272 uint_t pc
= 0, id
, opc
= 0;
5273 #endif /* __APPLE__ */
5279 * We stash the current DIF object into the machine state: we need it
5280 * for subsequent access checking.
5282 mstate
->dtms_difo
= difo
;
5284 regs
[DIF_REG_R0
] = 0; /* %r0 is fixed at zero */
5286 while (pc
< textlen
&& !(*flags
& CPU_DTRACE_FAULT
)) {
5290 r1
= DIF_INSTR_R1(instr
);
5291 r2
= DIF_INSTR_R2(instr
);
5292 rd
= DIF_INSTR_RD(instr
);
5294 switch (DIF_INSTR_OP(instr
)) {
5296 regs
[rd
] = regs
[r1
] | regs
[r2
];
5299 regs
[rd
] = regs
[r1
] ^ regs
[r2
];
5302 regs
[rd
] = regs
[r1
] & regs
[r2
];
5305 regs
[rd
] = regs
[r1
] << regs
[r2
];
5308 regs
[rd
] = regs
[r1
] >> regs
[r2
];
5311 regs
[rd
] = regs
[r1
] - regs
[r2
];
5314 regs
[rd
] = regs
[r1
] + regs
[r2
];
5317 regs
[rd
] = regs
[r1
] * regs
[r2
];
5320 if (regs
[r2
] == 0) {
5322 *flags
|= CPU_DTRACE_DIVZERO
;
5324 regs
[rd
] = (int64_t)regs
[r1
] /
5330 if (regs
[r2
] == 0) {
5332 *flags
|= CPU_DTRACE_DIVZERO
;
5334 regs
[rd
] = regs
[r1
] / regs
[r2
];
5339 if (regs
[r2
] == 0) {
5341 *flags
|= CPU_DTRACE_DIVZERO
;
5343 regs
[rd
] = (int64_t)regs
[r1
] %
5349 if (regs
[r2
] == 0) {
5351 *flags
|= CPU_DTRACE_DIVZERO
;
5353 regs
[rd
] = regs
[r1
] % regs
[r2
];
5358 regs
[rd
] = ~regs
[r1
];
5361 regs
[rd
] = regs
[r1
];
5364 cc_r
= regs
[r1
] - regs
[r2
];
5368 cc_c
= regs
[r1
] < regs
[r2
];
5371 cc_n
= cc_v
= cc_c
= 0;
5372 cc_z
= regs
[r1
] == 0;
5375 pc
= DIF_INSTR_LABEL(instr
);
5379 pc
= DIF_INSTR_LABEL(instr
);
5383 pc
= DIF_INSTR_LABEL(instr
);
5386 if ((cc_z
| (cc_n
^ cc_v
)) == 0)
5387 pc
= DIF_INSTR_LABEL(instr
);
5390 if ((cc_c
| cc_z
) == 0)
5391 pc
= DIF_INSTR_LABEL(instr
);
5394 if ((cc_n
^ cc_v
) == 0)
5395 pc
= DIF_INSTR_LABEL(instr
);
5399 pc
= DIF_INSTR_LABEL(instr
);
5403 pc
= DIF_INSTR_LABEL(instr
);
5407 pc
= DIF_INSTR_LABEL(instr
);
5410 if (cc_z
| (cc_n
^ cc_v
))
5411 pc
= DIF_INSTR_LABEL(instr
);
5415 pc
= DIF_INSTR_LABEL(instr
);
5418 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
5419 *flags
|= CPU_DTRACE_KPRIV
;
5425 regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]);
5428 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
5429 *flags
|= CPU_DTRACE_KPRIV
;
5435 regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]);
5438 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
5439 *flags
|= CPU_DTRACE_KPRIV
;
5445 regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]);
5448 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
5449 *flags
|= CPU_DTRACE_KPRIV
;
5455 regs
[rd
] = dtrace_load8(regs
[r1
]);
5458 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
5459 *flags
|= CPU_DTRACE_KPRIV
;
5465 regs
[rd
] = dtrace_load16(regs
[r1
]);
5468 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
5469 *flags
|= CPU_DTRACE_KPRIV
;
5475 regs
[rd
] = dtrace_load32(regs
[r1
]);
5478 if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) {
5479 *flags
|= CPU_DTRACE_KPRIV
;
5485 regs
[rd
] = dtrace_load64(regs
[r1
]);
5487 #if !defined(__APPLE__)
5490 dtrace_fuword8((void *)(uintptr_t)regs
[r1
]);
5493 regs
[rd
] = (int16_t)
5494 dtrace_fuword16((void *)(uintptr_t)regs
[r1
]);
5497 regs
[rd
] = (int32_t)
5498 dtrace_fuword32((void *)(uintptr_t)regs
[r1
]);
5502 dtrace_fuword8((void *)(uintptr_t)regs
[r1
]);
5506 dtrace_fuword16((void *)(uintptr_t)regs
[r1
]);
5510 dtrace_fuword32((void *)(uintptr_t)regs
[r1
]);
5514 dtrace_fuword64((void *)(uintptr_t)regs
[r1
]);
5516 #else /* Darwin 32-bit kernel may fetch from 64-bit user. Don't want uintptr_t cast. */
5519 dtrace_fuword8(regs
[r1
]);
5522 regs
[rd
] = (int16_t)
5523 dtrace_fuword16(regs
[r1
]);
5526 regs
[rd
] = (int32_t)
5527 dtrace_fuword32(regs
[r1
]);
5531 dtrace_fuword8(regs
[r1
]);
5535 dtrace_fuword16(regs
[r1
]);
5539 dtrace_fuword32(regs
[r1
]);
5543 dtrace_fuword64(regs
[r1
]);
5544 #endif /* __APPLE__ */
5553 regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)];
5556 regs
[rd
] = (uint64_t)(uintptr_t)
5557 (strtab
+ DIF_INSTR_STRING(instr
));
5560 size_t sz
= state
->dts_options
[DTRACEOPT_STRSIZE
];
5561 uintptr_t s1
= regs
[r1
];
5562 uintptr_t s2
= regs
[r2
];
5565 !dtrace_strcanload(s1
, sz
, mstate
, vstate
))
5568 !dtrace_strcanload(s2
, sz
, mstate
, vstate
))
5571 cc_r
= dtrace_strncmp((char *)s1
, (char *)s2
, sz
);
5579 regs
[rd
] = dtrace_dif_variable(mstate
, state
,
5583 id
= DIF_INSTR_VAR(instr
);
5585 if (id
>= DIF_VAR_OTHER_UBASE
) {
5588 id
-= DIF_VAR_OTHER_UBASE
;
5589 svar
= vstate
->dtvs_globals
[id
];
5590 ASSERT(svar
!= NULL
);
5591 v
= &svar
->dtsv_var
;
5593 if (!(v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)) {
5594 regs
[rd
] = svar
->dtsv_data
;
5598 a
= (uintptr_t)svar
->dtsv_data
;
5600 if (*(uint8_t *)a
== UINT8_MAX
) {
5602 * If the 0th byte is set to UINT8_MAX
5603 * then this is to be treated as a
5604 * reference to a NULL variable.
5608 regs
[rd
] = a
+ sizeof (uint64_t);
5614 regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0);
5618 id
= DIF_INSTR_VAR(instr
);
5620 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5621 id
-= DIF_VAR_OTHER_UBASE
;
5623 svar
= vstate
->dtvs_globals
[id
];
5624 ASSERT(svar
!= NULL
);
5625 v
= &svar
->dtsv_var
;
5627 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5628 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5631 ASSERT(svar
->dtsv_size
!= 0);
5633 if (regs
[rd
] == NULL
) {
5634 *(uint8_t *)a
= UINT8_MAX
;
5638 a
+= sizeof (uint64_t);
5640 if (!dtrace_vcanload(
5641 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5645 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5646 (void *)a
, &v
->dtdv_type
);
5650 svar
->dtsv_data
= regs
[rd
];
5655 * There are no DTrace built-in thread-local arrays at
5656 * present. This opcode is saved for future work.
5658 *flags
|= CPU_DTRACE_ILLOP
;
5663 id
= DIF_INSTR_VAR(instr
);
5665 if (id
< DIF_VAR_OTHER_UBASE
) {
5667 * For now, this has no meaning.
5673 id
-= DIF_VAR_OTHER_UBASE
;
5675 #if !defined(__APPLE__) /* Quiet compiler warnings */
5676 ASSERT(id
< vstate
->dtvs_nlocals
);
5678 ASSERT(id
< (uint_t
)vstate
->dtvs_nlocals
);
5679 #endif /* __APPLE__ */
5680 ASSERT(vstate
->dtvs_locals
!= NULL
);
5682 svar
= vstate
->dtvs_locals
[id
];
5683 ASSERT(svar
!= NULL
);
5684 v
= &svar
->dtsv_var
;
5686 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5687 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5688 size_t sz
= v
->dtdv_type
.dtdt_size
;
5690 sz
+= sizeof (uint64_t);
5691 ASSERT(svar
->dtsv_size
== (int)NCPU
* sz
);
5692 a
+= CPU
->cpu_id
* sz
;
5694 if (*(uint8_t *)a
== UINT8_MAX
) {
5696 * If the 0th byte is set to UINT8_MAX
5697 * then this is to be treated as a
5698 * reference to a NULL variable.
5702 regs
[rd
] = a
+ sizeof (uint64_t);
5708 ASSERT(svar
->dtsv_size
== (int)NCPU
* sizeof (uint64_t));
5709 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
5710 regs
[rd
] = tmp
[CPU
->cpu_id
];
5714 id
= DIF_INSTR_VAR(instr
);
5716 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5717 id
-= DIF_VAR_OTHER_UBASE
;
5718 #if !defined(__APPLE__) /* Quiet compiler warnings */
5719 ASSERT(id
< vstate
->dtvs_nlocals
);
5721 ASSERT(id
< (uint_t
)vstate
->dtvs_nlocals
);
5722 #endif /* __APPLE__ */
5724 ASSERT(vstate
->dtvs_locals
!= NULL
);
5725 svar
= vstate
->dtvs_locals
[id
];
5726 ASSERT(svar
!= NULL
);
5727 v
= &svar
->dtsv_var
;
5729 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5730 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5731 size_t sz
= v
->dtdv_type
.dtdt_size
;
5733 sz
+= sizeof (uint64_t);
5734 ASSERT(svar
->dtsv_size
== (int)NCPU
* sz
);
5735 a
+= CPU
->cpu_id
* sz
;
5737 if (regs
[rd
] == NULL
) {
5738 *(uint8_t *)a
= UINT8_MAX
;
5742 a
+= sizeof (uint64_t);
5745 if (!dtrace_vcanload(
5746 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5750 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5751 (void *)a
, &v
->dtdv_type
);
5755 ASSERT(svar
->dtsv_size
== (int)NCPU
* sizeof (uint64_t));
5756 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
5757 tmp
[CPU
->cpu_id
] = regs
[rd
];
5761 dtrace_dynvar_t
*dvar
;
5764 id
= DIF_INSTR_VAR(instr
);
5765 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5766 id
-= DIF_VAR_OTHER_UBASE
;
5767 v
= &vstate
->dtvs_tlocals
[id
];
5769 key
= &tupregs
[DIF_DTR_NREGS
];
5770 key
[0].dttk_value
= (uint64_t)id
;
5771 key
[0].dttk_size
= 0;
5772 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
5773 key
[1].dttk_size
= 0;
5775 dvar
= dtrace_dynvar(dstate
, 2, key
,
5776 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
,
5784 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5785 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
5787 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
5794 dtrace_dynvar_t
*dvar
;
5797 id
= DIF_INSTR_VAR(instr
);
5798 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5799 id
-= DIF_VAR_OTHER_UBASE
;
5801 key
= &tupregs
[DIF_DTR_NREGS
];
5802 key
[0].dttk_value
= (uint64_t)id
;
5803 key
[0].dttk_size
= 0;
5804 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
5805 key
[1].dttk_size
= 0;
5806 v
= &vstate
->dtvs_tlocals
[id
];
5808 dvar
= dtrace_dynvar(dstate
, 2, key
,
5809 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5810 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5811 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
5812 DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
);
5815 * Given that we're storing to thread-local data,
5816 * we need to flush our predicate cache.
5818 #if !defined(__APPLE__)
5819 curthread
->t_predcache
= NULL
;
5821 dtrace_set_thread_predcache(current_thread(), 0);
5822 #endif /* __APPLE__ */
5827 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5828 if (!dtrace_vcanload(
5829 (void *)(uintptr_t)regs
[rd
],
5830 &v
->dtdv_type
, mstate
, vstate
))
5833 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5834 dvar
->dtdv_data
, &v
->dtdv_type
);
5836 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
5843 regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
];
5847 dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
,
5848 regs
, tupregs
, ttop
, mstate
, state
);
5852 if (ttop
== DIF_DTR_NREGS
) {
5853 *flags
|= CPU_DTRACE_TUPOFLOW
;
5857 if (r1
== DIF_TYPE_STRING
) {
5859 * If this is a string type and the size is 0,
5860 * we'll use the system-wide default string
5861 * size. Note that we are _not_ looking at
5862 * the value of the DTRACEOPT_STRSIZE option;
5863 * had this been set, we would expect to have
5864 * a non-zero size value in the "pushtr".
5866 tupregs
[ttop
].dttk_size
=
5867 dtrace_strlen((char *)(uintptr_t)regs
[rd
],
5868 regs
[r2
] ? regs
[r2
] :
5869 dtrace_strsize_default
) + 1;
5871 tupregs
[ttop
].dttk_size
= regs
[r2
];
5874 tupregs
[ttop
++].dttk_value
= regs
[rd
];
5878 if (ttop
== DIF_DTR_NREGS
) {
5879 *flags
|= CPU_DTRACE_TUPOFLOW
;
5883 tupregs
[ttop
].dttk_value
= regs
[rd
];
5884 tupregs
[ttop
++].dttk_size
= 0;
5892 case DIF_OP_FLUSHTS
:
5897 case DIF_OP_LDTAA
: {
5898 dtrace_dynvar_t
*dvar
;
5899 dtrace_key_t
*key
= tupregs
;
5900 uint_t nkeys
= ttop
;
5902 id
= DIF_INSTR_VAR(instr
);
5903 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5904 id
-= DIF_VAR_OTHER_UBASE
;
5906 key
[nkeys
].dttk_value
= (uint64_t)id
;
5907 key
[nkeys
++].dttk_size
= 0;
5909 if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) {
5910 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
5911 key
[nkeys
++].dttk_size
= 0;
5912 v
= &vstate
->dtvs_tlocals
[id
];
5914 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
5917 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
5918 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5919 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5920 DTRACE_DYNVAR_NOALLOC
, mstate
, vstate
);
5927 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5928 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
5930 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
5937 case DIF_OP_STTAA
: {
5938 dtrace_dynvar_t
*dvar
;
5939 dtrace_key_t
*key
= tupregs
;
5940 uint_t nkeys
= ttop
;
5942 id
= DIF_INSTR_VAR(instr
);
5943 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5944 id
-= DIF_VAR_OTHER_UBASE
;
5946 key
[nkeys
].dttk_value
= (uint64_t)id
;
5947 key
[nkeys
++].dttk_size
= 0;
5949 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) {
5950 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
5951 key
[nkeys
++].dttk_size
= 0;
5952 v
= &vstate
->dtvs_tlocals
[id
];
5954 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
5957 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
5958 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5959 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5960 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
5961 DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
);
5966 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5967 if (!dtrace_vcanload(
5968 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5972 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5973 dvar
->dtdv_data
, &v
->dtdv_type
);
5975 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
5981 case DIF_OP_ALLOCS
: {
5982 uintptr_t ptr
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5983 size_t size
= ptr
- mstate
->dtms_scratch_ptr
+ regs
[r1
];
5986 * Rounding up the user allocation size could have
5987 * overflowed large, bogus allocations (like -1ULL) to
5990 if (size
< regs
[r1
] ||
5991 !DTRACE_INSCRATCH(mstate
, size
)) {
5992 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5997 dtrace_bzero((void *) mstate
->dtms_scratch_ptr
, size
);
5998 mstate
->dtms_scratch_ptr
+= size
;
6004 if (!dtrace_canstore(regs
[rd
], regs
[r2
],
6006 *flags
|= CPU_DTRACE_BADADDR
;
6011 if (!dtrace_canload(regs
[r1
], regs
[r2
], mstate
, vstate
))
6014 dtrace_bcopy((void *)(uintptr_t)regs
[r1
],
6015 (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]);
6019 if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) {
6020 *flags
|= CPU_DTRACE_BADADDR
;
6024 *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
];
6028 if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) {
6029 *flags
|= CPU_DTRACE_BADADDR
;
6034 *flags
|= CPU_DTRACE_BADALIGN
;
6038 *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
];
6042 if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) {
6043 *flags
|= CPU_DTRACE_BADADDR
;
6048 *flags
|= CPU_DTRACE_BADALIGN
;
6052 *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
];
6056 if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) {
6057 *flags
|= CPU_DTRACE_BADADDR
;
6061 #if !defined(__APPLE__)
6064 if (regs
[rd
] & 3) { /* Darwin kmem_zalloc() called from dtrace_difo_init() is 4-byte aligned. */
6065 #endif /* __APPLE__ */
6066 *flags
|= CPU_DTRACE_BADALIGN
;
6070 *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
];
6075 if (!(*flags
& CPU_DTRACE_FAULT
))
6078 mstate
->dtms_fltoffs
= opc
* sizeof (dif_instr_t
);
6079 mstate
->dtms_present
|= DTRACE_MSTATE_FLTOFFS
;
6085 dtrace_action_breakpoint(dtrace_ecb_t
*ecb
)
6087 dtrace_probe_t
*probe
= ecb
->dte_probe
;
6088 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
6089 char c
[DTRACE_FULLNAMELEN
+ 80], *str
;
6090 #if !defined(__APPLE__) /* Quiet compiler warnings */
6091 char *msg
= "dtrace: breakpoint action at probe ";
6092 char *ecbmsg
= " (ecb ";
6094 const char *msg
= "dtrace: breakpoint action at probe ";
6095 const char *ecbmsg
= " (ecb ";
6096 #endif /* __APPLE__ */
6097 uintptr_t mask
= (0xf << (sizeof (uintptr_t) * NBBY
/ 4));
6098 uintptr_t val
= (uintptr_t)ecb
;
6099 int shift
= (sizeof (uintptr_t) * NBBY
) - 4, i
= 0;
6101 if (dtrace_destructive_disallow
)
6105 * It's impossible to be taking action on the NULL probe.
6107 ASSERT(probe
!= NULL
);
6110 * This is a poor man's (destitute man's?) sprintf(): we want to
6111 * print the provider name, module name, function name and name of
6112 * the probe, along with the hex address of the ECB with the breakpoint
6113 * action -- all of which we must place in the character buffer by
6116 while (*msg
!= '\0')
6119 for (str
= prov
->dtpv_name
; *str
!= '\0'; str
++)
6123 for (str
= probe
->dtpr_mod
; *str
!= '\0'; str
++)
6127 for (str
= probe
->dtpr_func
; *str
!= '\0'; str
++)
6131 for (str
= probe
->dtpr_name
; *str
!= '\0'; str
++)
6134 while (*ecbmsg
!= '\0')
6137 while (shift
>= 0) {
6138 mask
= (uintptr_t)0xf << shift
;
6140 if (val
>= ((uintptr_t)1 << shift
))
6141 c
[i
++] = "0123456789abcdef"[(val
& mask
) >> shift
];
6152 dtrace_action_panic(dtrace_ecb_t
*ecb
)
6154 dtrace_probe_t
*probe
= ecb
->dte_probe
;
6157 * It's impossible to be taking action on the NULL probe.
6159 ASSERT(probe
!= NULL
);
6161 if (dtrace_destructive_disallow
)
6164 if (dtrace_panicked
!= NULL
)
6167 #if !defined(__APPLE__)
6168 if (dtrace_casptr(&dtrace_panicked
, NULL
, curthread
) != NULL
)
6171 if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
)
6173 #endif /* __APPLE__ */
6176 * We won the right to panic. (We want to be sure that only one
6177 * thread calls panic() from dtrace_probe(), and that panic() is
6178 * called exactly once.)
6180 panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
6181 probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
,
6182 probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
);
6184 #if defined(__APPLE__)
6185 /* Mac OS X debug feature -- can return from panic() */
6186 dtrace_panicked
= NULL
;
6187 #endif /* __APPLE__ */
6191 dtrace_action_raise(uint64_t sig
)
6193 if (dtrace_destructive_disallow
)
6197 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
6201 #if !defined(__APPLE__)
6203 * raise() has a queue depth of 1 -- we ignore all subsequent
6204 * invocations of the raise() action.
6206 if (curthread
->t_dtrace_sig
== 0)
6207 curthread
->t_dtrace_sig
= (uint8_t)sig
;
6209 curthread
->t_sig_check
= 1;
6212 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
6214 if (uthread
&& uthread
->t_dtrace_sig
== 0) {
6215 uthread
->t_dtrace_sig
= sig
;
6216 act_set_astbsd(current_thread());
6218 #endif /* __APPLE__ */
6222 dtrace_action_stop(void)
6224 if (dtrace_destructive_disallow
)
6227 #if !defined(__APPLE__)
6228 if (!curthread
->t_dtrace_stop
) {
6229 curthread
->t_dtrace_stop
= 1;
6230 curthread
->t_sig_check
= 1;
6234 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
6237 * The currently running process will be set to task_suspend
6238 * when it next leaves the kernel.
6240 uthread
->t_dtrace_stop
= 1;
6241 act_set_astbsd(current_thread());
6243 #endif /* __APPLE__ */
6246 #if defined(__APPLE__)
6248 dtrace_action_pidresume(uint64_t pid
)
6250 if (dtrace_destructive_disallow
)
6253 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
6254 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
6257 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
6260 * When the currently running process leaves the kernel, it attempts to
6261 * task_resume the process (denoted by pid), if that pid appears to have
6262 * been stopped by dtrace_action_stop().
6263 * The currently running process has a pidresume() queue depth of 1 --
6264 * subsequent invocations of the pidresume() action are ignored.
6267 if (pid
!= 0 && uthread
&& uthread
->t_dtrace_resumepid
== 0) {
6268 uthread
->t_dtrace_resumepid
= pid
;
6269 act_set_astbsd(current_thread());
6272 #endif /* __APPLE__ */
6276 dtrace_action_chill(dtrace_mstate_t
*mstate
, hrtime_t val
)
6279 volatile uint16_t *flags
;
6280 dtrace_cpu_t
*cpu
= CPU
;
6282 if (dtrace_destructive_disallow
)
6285 flags
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
;
6287 now
= dtrace_gethrtime();
6289 if (now
- cpu
->cpu_dtrace_chillmark
> dtrace_chill_interval
) {
6291 * We need to advance the mark to the current time.
6293 cpu
->cpu_dtrace_chillmark
= now
;
6294 cpu
->cpu_dtrace_chilled
= 0;
6298 * Now check to see if the requested chill time would take us over
6299 * the maximum amount of time allowed in the chill interval. (Or
6300 * worse, if the calculation itself induces overflow.)
6302 if (cpu
->cpu_dtrace_chilled
+ val
> dtrace_chill_max
||
6303 cpu
->cpu_dtrace_chilled
+ val
< cpu
->cpu_dtrace_chilled
) {
6304 *flags
|= CPU_DTRACE_ILLOP
;
6308 while (dtrace_gethrtime() - now
< val
)
6312 * Normally, we assure that the value of the variable "timestamp" does
6313 * not change within an ECB. The presence of chill() represents an
6314 * exception to this rule, however.
6316 mstate
->dtms_present
&= ~DTRACE_MSTATE_TIMESTAMP
;
6317 cpu
->cpu_dtrace_chilled
+= val
;
6321 dtrace_action_ustack(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
,
6322 uint64_t *buf
, uint64_t arg
)
6324 int nframes
= DTRACE_USTACK_NFRAMES(arg
);
6325 int strsize
= DTRACE_USTACK_STRSIZE(arg
);
6326 uint64_t *pcs
= &buf
[1], *fps
;
6327 char *str
= (char *)&pcs
[nframes
];
6328 int size
, offs
= 0, i
, j
;
6329 uintptr_t old
= mstate
->dtms_scratch_ptr
, saved
;
6330 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
6334 * Should be taking a faster path if string space has not been
6337 ASSERT(strsize
!= 0);
6340 * We will first allocate some temporary space for the frame pointers.
6342 fps
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
6343 size
= (uintptr_t)fps
- mstate
->dtms_scratch_ptr
+
6344 (nframes
* sizeof (uint64_t));
6346 #if !defined(__APPLE__) /* Quiet compiler warnings */
6347 if (!DTRACE_INSCRATCH(mstate
, size
)) {
6349 if (!DTRACE_INSCRATCH(mstate
, (uintptr_t)size
)) {
6350 #endif /* __APPLE__ */
6352 * Not enough room for our frame pointers -- need to indicate
6353 * that we ran out of scratch space.
6355 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
6359 mstate
->dtms_scratch_ptr
+= size
;
6360 saved
= mstate
->dtms_scratch_ptr
;
6363 * Now get a stack with both program counters and frame pointers.
6365 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
6366 dtrace_getufpstack(buf
, fps
, nframes
+ 1);
6367 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
6370 * If that faulted, we're cooked.
6372 if (*flags
& CPU_DTRACE_FAULT
)
6376 * Now we want to walk up the stack, calling the USTACK helper. For
6377 * each iteration, we restore the scratch pointer.
6379 for (i
= 0; i
< nframes
; i
++) {
6380 mstate
->dtms_scratch_ptr
= saved
;
6382 if (offs
>= strsize
)
6385 sym
= (char *)(uintptr_t)dtrace_helper(
6386 DTRACE_HELPER_ACTION_USTACK
,
6387 mstate
, state
, pcs
[i
], fps
[i
]);
6390 * If we faulted while running the helper, we're going to
6391 * clear the fault and null out the corresponding string.
6393 if (*flags
& CPU_DTRACE_FAULT
) {
6394 *flags
&= ~CPU_DTRACE_FAULT
;
6404 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
6407 * Now copy in the string that the helper returned to us.
6409 for (j
= 0; offs
+ j
< strsize
; j
++) {
6410 if ((str
[offs
+ j
] = sym
[j
]) == '\0')
6414 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
6419 if (offs
>= strsize
) {
6421 * If we didn't have room for all of the strings, we don't
6422 * abort processing -- this needn't be a fatal error -- but we
6423 * still want to increment a counter (dts_stkstroverflows) to
6424 * allow this condition to be warned about. (If this is from
6425 * a jstack() action, it is easily tuned via jstackstrsize.)
6427 dtrace_error(&state
->dts_stkstroverflows
);
6430 while (offs
< strsize
)
6434 mstate
->dtms_scratch_ptr
= old
;
6438 * If you're looking for the epicenter of DTrace, you just found it. This
6439 * is the function called by the provider to fire a probe -- from which all
6440 * subsequent probe-context DTrace activity emanates.
6442 #if !defined(__APPLE__)
6444 dtrace_probe(dtrace_id_t id
, uintptr_t arg0
, uintptr_t arg1
,
6445 uintptr_t arg2
, uintptr_t arg3
, uintptr_t arg4
)
6448 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
6449 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
6450 #endif /* __APPLE__ */
6452 processorid_t cpuid
;
6453 dtrace_icookie_t cookie
;
6454 dtrace_probe_t
*probe
;
6455 dtrace_mstate_t mstate
;
6457 dtrace_action_t
*act
;
6461 volatile uint16_t *flags
;
6464 #if !defined(__APPLE__)
6466 * Kick out immediately if this CPU is still being born (in which case
6467 * curthread will be set to -1) or the current thread can't allow
6468 * probes in its current context.
6470 if (((uintptr_t)curthread
& 1) || (curthread
->t_flag
& T_DONTDTRACE
))
6473 /* Not a concern for Darwin */
6474 #endif /* __APPLE__ */
6476 cookie
= dtrace_interrupt_disable();
6477 probe
= dtrace_probes
[id
- 1];
6478 cpuid
= CPU
->cpu_id
;
6479 onintr
= CPU_ON_INTR(CPU
);
6481 #if !defined(__APPLE__)
6482 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
6483 probe
->dtpr_predcache
== curthread
->t_predcache
) {
6485 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
6486 probe
->dtpr_predcache
== dtrace_get_thread_predcache(current_thread())) {
6487 #endif /* __APPLE__ */
6489 * We have hit in the predicate cache; we know that
6490 * this predicate would evaluate to be false.
6492 dtrace_interrupt_enable(cookie
);
6496 if (panic_quiesce
) {
6498 * We don't trace anything if we're panicking.
6500 dtrace_interrupt_enable(cookie
);
6504 #if !defined(__APPLE__)
6505 now
= dtrace_gethrtime();
6506 vtime
= dtrace_vtime_references
!= 0;
6508 if (vtime
&& curthread
->t_dtrace_start
)
6509 curthread
->t_dtrace_vtime
+= now
- curthread
->t_dtrace_start
;
6511 /* FIXME: the time spent entering DTrace and arriving to this point is attributed
6512 to the current thread. Instead it should accrue to DTrace. */
6513 vtime
= dtrace_vtime_references
!= 0;
6517 int64_t dtrace_accum_time
, recent_vtime
;
6518 thread_t thread
= current_thread();
6520 dtrace_accum_time
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */
6522 if (dtrace_accum_time
>= 0) {
6523 recent_vtime
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */
6525 recent_vtime
= recent_vtime
- dtrace_accum_time
; /* Time without DTrace contribution */
6527 dtrace_set_thread_vtime(thread
, recent_vtime
);
6531 now
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
6532 #endif /* __APPLE__ */
6534 #if defined(__APPLE__)
6536 * A provider may call dtrace_probe_error() in lieu of dtrace_probe() in some circumstances.
6537 * See, e.g. fasttrap_isa.c. However the provider has no access to ECB context, so passes
6538 * 0 through "arg0" and the probe_id of the overridden probe as arg1. Detect that here
6539 * and cons up a viable state (from the probe_id).
6541 if (dtrace_probeid_error
== id
&& 0 == arg0
) {
6542 dtrace_id_t ftp_id
= (dtrace_id_t
)arg1
;
6543 dtrace_probe_t
*ftp_probe
= dtrace_probes
[ftp_id
- 1];
6544 dtrace_ecb_t
*ftp_ecb
= ftp_probe
->dtpr_ecb
;
6546 if (NULL
!= ftp_ecb
) {
6547 dtrace_state_t
*ftp_state
= ftp_ecb
->dte_state
;
6549 arg0
= (uint64_t)(uintptr_t)ftp_state
;
6550 arg1
= ftp_ecb
->dte_epid
;
6552 * args[2-4] established by caller.
6554 ftp_state
->dts_arg_error_illval
= -1; /* arg5 */
6557 #endif /* __APPLE__ */
6559 mstate
.dtms_difo
= NULL
;
6560 mstate
.dtms_probe
= probe
;
6561 mstate
.dtms_strtok
= NULL
;
6562 mstate
.dtms_arg
[0] = arg0
;
6563 mstate
.dtms_arg
[1] = arg1
;
6564 mstate
.dtms_arg
[2] = arg2
;
6565 mstate
.dtms_arg
[3] = arg3
;
6566 mstate
.dtms_arg
[4] = arg4
;
6568 flags
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
;
6570 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
6571 dtrace_predicate_t
*pred
= ecb
->dte_predicate
;
6572 dtrace_state_t
*state
= ecb
->dte_state
;
6573 dtrace_buffer_t
*buf
= &state
->dts_buffer
[cpuid
];
6574 dtrace_buffer_t
*aggbuf
= &state
->dts_aggbuffer
[cpuid
];
6575 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
6576 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
6581 * A little subtlety with the following (seemingly innocuous)
6582 * declaration of the automatic 'val': by looking at the
6583 * code, you might think that it could be declared in the
6584 * action processing loop, below. (That is, it's only used in
6585 * the action processing loop.) However, it must be declared
6586 * out of that scope because in the case of DIF expression
6587 * arguments to aggregating actions, one iteration of the
6588 * action loop will use the last iteration's value.
6596 mstate
.dtms_present
= DTRACE_MSTATE_ARGS
| DTRACE_MSTATE_PROBE
;
6597 *flags
&= ~CPU_DTRACE_ERROR
;
6599 if (prov
== dtrace_provider
) {
6601 * If dtrace itself is the provider of this probe,
6602 * we're only going to continue processing the ECB if
6603 * arg0 (the dtrace_state_t) is equal to the ECB's
6604 * creating state. (This prevents disjoint consumers
6605 * from seeing one another's metaprobes.)
6607 if (arg0
!= (uint64_t)(uintptr_t)state
)
6611 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
) {
6613 * We're not currently active. If our provider isn't
6614 * the dtrace pseudo provider, we're not interested.
6616 if (prov
!= dtrace_provider
)
6620 * Now we must further check if we are in the BEGIN
6621 * probe. If we are, we will only continue processing
6622 * if we're still in WARMUP -- if one BEGIN enabling
6623 * has invoked the exit() action, we don't want to
6624 * evaluate subsequent BEGIN enablings.
6626 if (probe
->dtpr_id
== dtrace_probeid_begin
&&
6627 state
->dts_activity
!= DTRACE_ACTIVITY_WARMUP
) {
6628 ASSERT(state
->dts_activity
==
6629 DTRACE_ACTIVITY_DRAINING
);
6634 if (ecb
->dte_cond
) {
6636 * If the dte_cond bits indicate that this
6637 * consumer is only allowed to see user-mode firings
6638 * of this probe, call the provider's dtps_usermode()
6639 * entry point to check that the probe was fired
6640 * while in a user context. Skip this ECB if that's
6643 if ((ecb
->dte_cond
& DTRACE_COND_USERMODE
) &&
6644 prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
,
6645 probe
->dtpr_id
, probe
->dtpr_arg
) == 0)
6649 * This is more subtle than it looks. We have to be
6650 * absolutely certain that CRED() isn't going to
6651 * change out from under us so it's only legit to
6652 * examine that structure if we're in constrained
6653 * situations. Currently, the only times we'll this
6654 * check is if a non-super-user has enabled the
6655 * profile or syscall providers -- providers that
6656 * allow visibility of all processes. For the
6657 * profile case, the check above will ensure that
6658 * we're examining a user context.
6660 if (ecb
->dte_cond
& DTRACE_COND_OWNER
) {
6663 ecb
->dte_state
->dts_cred
.dcr_cred
;
6665 #pragma unused(proc) /* __APPLE__ */
6667 ASSERT(s_cr
!= NULL
);
6670 * XXX this is hackish, but so is setting a variable
6671 * XXX in a McCarthy OR...
6673 #if !defined(__APPLE__)
6674 if ((cr
= CRED()) == NULL
||
6676 if ((cr
= dtrace_CRED()) == NULL
||
6677 #endif /* __APPLE__ */
6678 posix_cred_get(s_cr
)->cr_uid
!= posix_cred_get(cr
)->cr_uid
||
6679 posix_cred_get(s_cr
)->cr_uid
!= posix_cred_get(cr
)->cr_ruid
||
6680 posix_cred_get(s_cr
)->cr_uid
!= posix_cred_get(cr
)->cr_suid
||
6681 posix_cred_get(s_cr
)->cr_gid
!= posix_cred_get(cr
)->cr_gid
||
6682 posix_cred_get(s_cr
)->cr_gid
!= posix_cred_get(cr
)->cr_rgid
||
6683 posix_cred_get(s_cr
)->cr_gid
!= posix_cred_get(cr
)->cr_sgid
||
6684 #if !defined(__APPLE__)
6685 (proc
= ttoproc(curthread
)) == NULL
||
6686 (proc
->p_flag
& SNOCD
))
6688 1) /* Darwin omits "No Core Dump" flag. */
6689 #endif /* __APPLE__ */
6693 if (ecb
->dte_cond
& DTRACE_COND_ZONEOWNER
) {
6696 ecb
->dte_state
->dts_cred
.dcr_cred
;
6697 #pragma unused(cr, s_cr) /* __APPLE__ */
6699 ASSERT(s_cr
!= NULL
);
6701 #if !defined(__APPLE__)
6702 if ((cr
= CRED()) == NULL
||
6703 s_cr
->cr_zone
->zone_id
!=
6704 cr
->cr_zone
->zone_id
)
6707 /* Darwin doesn't do zones. */
6708 #endif /* __APPLE__ */
6712 if (now
- state
->dts_alive
> dtrace_deadman_timeout
) {
6714 * We seem to be dead. Unless we (a) have kernel
6715 * destructive permissions (b) have expicitly enabled
6716 * destructive actions and (c) destructive actions have
6717 * not been disabled, we're going to transition into
6718 * the KILLED state, from which no further processing
6719 * on this state will be performed.
6721 if (!dtrace_priv_kernel_destructive(state
) ||
6722 !state
->dts_cred
.dcr_destructive
||
6723 dtrace_destructive_disallow
) {
6724 void *activity
= &state
->dts_activity
;
6725 dtrace_activity_t current
;
6728 current
= state
->dts_activity
;
6729 } while (dtrace_cas32(activity
, current
,
6730 DTRACE_ACTIVITY_KILLED
) != current
);
6736 if ((offs
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
,
6737 ecb
->dte_alignment
, state
, &mstate
)) < 0)
6740 tomax
= buf
->dtb_tomax
;
6741 ASSERT(tomax
!= NULL
);
6743 if (ecb
->dte_size
!= 0)
6744 DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
);
6746 mstate
.dtms_epid
= ecb
->dte_epid
;
6747 mstate
.dtms_present
|= DTRACE_MSTATE_EPID
;
6749 if (state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
)
6750 mstate
.dtms_access
= DTRACE_ACCESS_KERNEL
;
6752 mstate
.dtms_access
= 0;
6755 dtrace_difo_t
*dp
= pred
->dtp_difo
;
6758 rval
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
6760 if (!(*flags
& CPU_DTRACE_ERROR
) && !rval
) {
6761 dtrace_cacheid_t cid
= probe
->dtpr_predcache
;
6763 if (cid
!= DTRACE_CACHEIDNONE
&& !onintr
) {
6765 * Update the predicate cache...
6767 ASSERT(cid
== pred
->dtp_cacheid
);
6768 #if !defined(__APPLE__)
6769 curthread
->t_predcache
= cid
;
6771 dtrace_set_thread_predcache(current_thread(), cid
);
6772 #endif /* __APPLE__ */
6779 for (act
= ecb
->dte_action
; !(*flags
& CPU_DTRACE_ERROR
) &&
6780 act
!= NULL
; act
= act
->dta_next
) {
6783 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
6785 size
= rec
->dtrd_size
;
6786 valoffs
= offs
+ rec
->dtrd_offset
;
6788 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
6790 dtrace_aggregation_t
*agg
;
6792 agg
= (dtrace_aggregation_t
*)act
;
6794 if ((dp
= act
->dta_difo
) != NULL
)
6795 v
= dtrace_dif_emulate(dp
,
6796 &mstate
, vstate
, state
);
6798 if (*flags
& CPU_DTRACE_ERROR
)
6802 * Note that we always pass the expression
6803 * value from the previous iteration of the
6804 * action loop. This value will only be used
6805 * if there is an expression argument to the
6806 * aggregating action, denoted by the
6807 * dtag_hasarg field.
6809 dtrace_aggregate(agg
, buf
,
6810 offs
, aggbuf
, v
, val
);
6814 switch (act
->dta_kind
) {
6815 case DTRACEACT_STOP
:
6816 if (dtrace_priv_proc_destructive(state
))
6817 dtrace_action_stop();
6820 case DTRACEACT_BREAKPOINT
:
6821 if (dtrace_priv_kernel_destructive(state
))
6822 dtrace_action_breakpoint(ecb
);
6825 case DTRACEACT_PANIC
:
6826 if (dtrace_priv_kernel_destructive(state
))
6827 dtrace_action_panic(ecb
);
6830 case DTRACEACT_STACK
:
6831 if (!dtrace_priv_kernel(state
))
6834 #if !defined(__APPLE__) /* Quiet compiler warnings */
6835 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
6836 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
6837 DTRACE_ANCHORED(probe
) ? NULL
:
6840 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
6841 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
6842 DTRACE_ANCHORED(probe
) ? NULL
:
6843 (uint32_t *)(uintptr_t)arg0
);
6844 #endif /* __APPLE__ */
6848 case DTRACEACT_JSTACK
:
6849 case DTRACEACT_USTACK
:
6850 if (!dtrace_priv_proc(state
))
6854 * See comment in DIF_VAR_PID.
6856 if (DTRACE_ANCHORED(mstate
.dtms_probe
) &&
6858 int depth
= DTRACE_USTACK_NFRAMES(
6861 dtrace_bzero((void *)(tomax
+ valoffs
),
6862 DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
)
6863 + depth
* sizeof (uint64_t));
6868 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 &&
6869 curproc
->p_dtrace_helpers
!= NULL
) {
6871 * This is the slow path -- we have
6872 * allocated string space, and we're
6873 * getting the stack of a process that
6874 * has helpers. Call into a separate
6875 * routine to perform this processing.
6877 dtrace_action_ustack(&mstate
, state
,
6878 (uint64_t *)(tomax
+ valoffs
),
6883 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
6884 dtrace_getupcstack((uint64_t *)
6886 DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1);
6887 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
6897 val
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
6899 if (*flags
& CPU_DTRACE_ERROR
)
6902 switch (act
->dta_kind
) {
6903 case DTRACEACT_SPECULATE
:
6904 ASSERT(buf
== &state
->dts_buffer
[cpuid
]);
6905 buf
= dtrace_speculation_buffer(state
,
6909 *flags
|= CPU_DTRACE_DROP
;
6913 offs
= dtrace_buffer_reserve(buf
,
6914 ecb
->dte_needed
, ecb
->dte_alignment
,
6918 *flags
|= CPU_DTRACE_DROP
;
6922 tomax
= buf
->dtb_tomax
;
6923 ASSERT(tomax
!= NULL
);
6925 if (ecb
->dte_size
!= 0)
6926 DTRACE_STORE(uint32_t, tomax
, offs
,
6930 case DTRACEACT_CHILL
:
6931 if (dtrace_priv_kernel_destructive(state
))
6932 dtrace_action_chill(&mstate
, val
);
6935 case DTRACEACT_RAISE
:
6936 if (dtrace_priv_proc_destructive(state
))
6937 dtrace_action_raise(val
);
6940 #if defined(__APPLE__)
6941 case DTRACEACT_PIDRESUME
:
6942 if (dtrace_priv_proc_destructive(state
))
6943 dtrace_action_pidresume(val
);
6945 #endif /* __APPLE__ */
6947 case DTRACEACT_COMMIT
:
6951 * We need to commit our buffer state.
6954 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
6955 buf
= &state
->dts_buffer
[cpuid
];
6956 dtrace_speculation_commit(state
, cpuid
, val
);
6960 case DTRACEACT_DISCARD
:
6961 dtrace_speculation_discard(state
, cpuid
, val
);
6964 case DTRACEACT_DIFEXPR
:
6965 case DTRACEACT_LIBACT
:
6966 case DTRACEACT_PRINTF
:
6967 case DTRACEACT_PRINTA
:
6968 case DTRACEACT_SYSTEM
:
6969 case DTRACEACT_FREOPEN
:
6970 #if defined(__APPLE__)
6971 case DTRACEACT_APPLEBINARY
:
6972 #endif /* __APPLE__ */
6977 if (!dtrace_priv_kernel(state
))
6981 #if !defined(__APPLE__)
6982 case DTRACEACT_USYM
:
6983 case DTRACEACT_UMOD
:
6984 case DTRACEACT_UADDR
: {
6985 struct pid
*pid
= curthread
->t_procp
->p_pidp
;
6987 if (!dtrace_priv_proc(state
))
6990 DTRACE_STORE(uint64_t, tomax
,
6991 valoffs
, (uint64_t)pid
->pid_id
);
6992 DTRACE_STORE(uint64_t, tomax
,
6993 valoffs
+ sizeof (uint64_t), val
);
6998 case DTRACEACT_USYM
:
6999 case DTRACEACT_UMOD
:
7000 case DTRACEACT_UADDR
: {
7001 if (!dtrace_priv_proc(state
))
7004 DTRACE_STORE(uint64_t, tomax
,
7005 valoffs
, (uint64_t)dtrace_proc_selfpid());
7006 DTRACE_STORE(uint64_t, tomax
,
7007 valoffs
+ sizeof (uint64_t), val
);
7011 #endif /* __APPLE__ */
7013 case DTRACEACT_EXIT
: {
7015 * For the exit action, we are going to attempt
7016 * to atomically set our activity to be
7017 * draining. If this fails (either because
7018 * another CPU has beat us to the exit action,
7019 * or because our current activity is something
7020 * other than ACTIVE or WARMUP), we will
7021 * continue. This assures that the exit action
7022 * can be successfully recorded at most once
7023 * when we're in the ACTIVE state. If we're
7024 * encountering the exit() action while in
7025 * COOLDOWN, however, we want to honor the new
7026 * status code. (We know that we're the only
7027 * thread in COOLDOWN, so there is no race.)
7029 void *activity
= &state
->dts_activity
;
7030 dtrace_activity_t current
= state
->dts_activity
;
7032 if (current
== DTRACE_ACTIVITY_COOLDOWN
)
7035 if (current
!= DTRACE_ACTIVITY_WARMUP
)
7036 current
= DTRACE_ACTIVITY_ACTIVE
;
7038 if (dtrace_cas32(activity
, current
,
7039 DTRACE_ACTIVITY_DRAINING
) != current
) {
7040 *flags
|= CPU_DTRACE_DROP
;
7051 if (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
) {
7052 uintptr_t end
= valoffs
+ size
;
7054 if (!dtrace_vcanload((void *)(uintptr_t)val
,
7055 &dp
->dtdo_rtype
, &mstate
, vstate
))
7059 * If this is a string, we're going to only
7060 * load until we find the zero byte -- after
7061 * which we'll store zero bytes.
7063 if (dp
->dtdo_rtype
.dtdt_kind
==
7066 int intuple
= act
->dta_intuple
;
7069 for (s
= 0; s
< size
; s
++) {
7071 c
= dtrace_load8(val
++);
7073 DTRACE_STORE(uint8_t, tomax
,
7076 if (c
== '\0' && intuple
)
7083 while (valoffs
< end
) {
7084 DTRACE_STORE(uint8_t, tomax
, valoffs
++,
7085 dtrace_load8(val
++));
7095 case sizeof (uint8_t):
7096 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
);
7098 case sizeof (uint16_t):
7099 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
);
7101 case sizeof (uint32_t):
7102 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
);
7104 case sizeof (uint64_t):
7105 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
);
7109 * Any other size should have been returned by
7110 * reference, not by value.
7117 if (*flags
& CPU_DTRACE_DROP
)
7120 if (*flags
& CPU_DTRACE_FAULT
) {
7122 dtrace_action_t
*err
;
7126 if (probe
->dtpr_id
== dtrace_probeid_error
) {
7128 * There's nothing we can do -- we had an
7129 * error on the error probe. We bump an
7130 * error counter to at least indicate that
7131 * this condition happened.
7133 dtrace_error(&state
->dts_dblerrors
);
7139 * Before recursing on dtrace_probe(), we
7140 * need to explicitly clear out our start
7141 * time to prevent it from being accumulated
7142 * into t_dtrace_vtime.
7144 #if !defined(__APPLE__)
7145 curthread
->t_dtrace_start
= 0;
7147 /* Set the sign bit on t_dtrace_tracing to suspend accumulation to it. */
7148 dtrace_set_thread_tracing(current_thread(),
7149 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
7150 #endif /* __APPLE__ */
7154 * Iterate over the actions to figure out which action
7155 * we were processing when we experienced the error.
7156 * Note that act points _past_ the faulting action; if
7157 * act is ecb->dte_action, the fault was in the
7158 * predicate, if it's ecb->dte_action->dta_next it's
7159 * in action #1, and so on.
7161 for (err
= ecb
->dte_action
, ndx
= 0;
7162 err
!= act
; err
= err
->dta_next
, ndx
++)
7165 dtrace_probe_error(state
, ecb
->dte_epid
, ndx
,
7166 (mstate
.dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
7167 mstate
.dtms_fltoffs
: -1, DTRACE_FLAGS2FLT(*flags
),
7168 cpu_core
[cpuid
].cpuc_dtrace_illval
);
7174 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
7177 #if !defined(__APPLE__)
7179 curthread
->t_dtrace_start
= dtrace_gethrtime();
7181 /* FIXME: the time spent leaving DTrace from this point to the rti is attributed
7182 to the current thread. Instead it should accrue to DTrace. */
7184 thread_t thread
= current_thread();
7185 int64_t t
= dtrace_get_thread_tracing(thread
);
7188 /* Usual case, accumulate time spent here into t_dtrace_tracing */
7189 dtrace_set_thread_tracing(thread
, t
+ (dtrace_gethrtime() - now
));
7191 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
7192 dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);
7195 #endif /* __APPLE__ */
7197 dtrace_interrupt_enable(cookie
);
7200 #if defined(__APPLE__)
7201 /* Don't allow a thread to re-enter dtrace_probe(). This could occur if a probe is encountered
7202 on some function in the transitive closure of the call to dtrace_probe(). Solaris has some
7203 strong guarantees that this won't happen, the Darwin implementation is not so mature as to
7204 make those guarantees. */
7207 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
7208 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
7210 thread_t thread
= current_thread();
7211 disable_preemption();
7212 if (id
== dtrace_probeid_error
) {
7213 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
7214 dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */
7215 } else if (!dtrace_get_thread_reentering(thread
)) {
7216 dtrace_set_thread_reentering(thread
, TRUE
);
7217 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
7218 dtrace_set_thread_reentering(thread
, FALSE
);
7221 else __dtrace_probe(dtrace_probeid_error
, 0, id
, 1, -1, DTRACEFLT_UNKNOWN
);
7223 enable_preemption();
7225 #endif /* __APPLE__ */
7228 * DTrace Probe Hashing Functions
7230 * The functions in this section (and indeed, the functions in remaining
7231 * sections) are not _called_ from probe context. (Any exceptions to this are
7232 * marked with a "Note:".) Rather, they are called from elsewhere in the
7233 * DTrace framework to look-up probes in, add probes to and remove probes from
7234 * the DTrace probe hashes. (Each probe is hashed by each element of the
7235 * probe tuple -- allowing for fast lookups, regardless of what was
7239 #if !defined(__APPLE__) /* Quiet compiler warnings */
7240 dtrace_hash_str(char *p
)
7242 dtrace_hash_str(const char *p
)
7243 #endif /* __APPLE__ */
7249 hval
= (hval
<< 4) + *p
++;
7250 if ((g
= (hval
& 0xf0000000)) != 0)
7257 static dtrace_hash_t
*
7258 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
)
7260 dtrace_hash_t
*hash
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
);
7262 hash
->dth_stroffs
= stroffs
;
7263 hash
->dth_nextoffs
= nextoffs
;
7264 hash
->dth_prevoffs
= prevoffs
;
7267 hash
->dth_mask
= hash
->dth_size
- 1;
7269 hash
->dth_tab
= kmem_zalloc(hash
->dth_size
*
7270 sizeof (dtrace_hashbucket_t
*), KM_SLEEP
);
7275 #if !defined(__APPLE__) /* Unused. Quiet compiler warning. */
7277 dtrace_hash_destroy(dtrace_hash_t
*hash
)
7282 for (i
= 0; i
< hash
->dth_size
; i
++)
7283 ASSERT(hash
->dth_tab
[i
] == NULL
);
7286 kmem_free(hash
->dth_tab
,
7287 hash
->dth_size
* sizeof (dtrace_hashbucket_t
*));
7288 kmem_free(hash
, sizeof (dtrace_hash_t
));
7290 #endif /* __APPLE__ */
7293 dtrace_hash_resize(dtrace_hash_t
*hash
)
7295 int size
= hash
->dth_size
, i
, ndx
;
7296 int new_size
= hash
->dth_size
<< 1;
7297 int new_mask
= new_size
- 1;
7298 dtrace_hashbucket_t
**new_tab
, *bucket
, *next
;
7300 ASSERT((new_size
& new_mask
) == 0);
7302 new_tab
= kmem_zalloc(new_size
* sizeof (void *), KM_SLEEP
);
7304 for (i
= 0; i
< size
; i
++) {
7305 for (bucket
= hash
->dth_tab
[i
]; bucket
!= NULL
; bucket
= next
) {
7306 dtrace_probe_t
*probe
= bucket
->dthb_chain
;
7308 ASSERT(probe
!= NULL
);
7309 ndx
= DTRACE_HASHSTR(hash
, probe
) & new_mask
;
7311 next
= bucket
->dthb_next
;
7312 bucket
->dthb_next
= new_tab
[ndx
];
7313 new_tab
[ndx
] = bucket
;
7317 kmem_free(hash
->dth_tab
, hash
->dth_size
* sizeof (void *));
7318 hash
->dth_tab
= new_tab
;
7319 hash
->dth_size
= new_size
;
7320 hash
->dth_mask
= new_mask
;
7324 dtrace_hash_add(dtrace_hash_t
*hash
, dtrace_probe_t
*new)
7326 int hashval
= DTRACE_HASHSTR(hash
, new);
7327 int ndx
= hashval
& hash
->dth_mask
;
7328 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
7329 dtrace_probe_t
**nextp
, **prevp
;
7331 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
7332 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new))
7336 if ((hash
->dth_nbuckets
>> 1) > hash
->dth_size
) {
7337 dtrace_hash_resize(hash
);
7338 dtrace_hash_add(hash
, new);
7342 bucket
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
);
7343 bucket
->dthb_next
= hash
->dth_tab
[ndx
];
7344 hash
->dth_tab
[ndx
] = bucket
;
7345 hash
->dth_nbuckets
++;
7348 nextp
= DTRACE_HASHNEXT(hash
, new);
7349 ASSERT(*nextp
== NULL
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
);
7350 *nextp
= bucket
->dthb_chain
;
7352 if (bucket
->dthb_chain
!= NULL
) {
7353 prevp
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
);
7354 ASSERT(*prevp
== NULL
);
7358 bucket
->dthb_chain
= new;
7362 static dtrace_probe_t
*
7363 dtrace_hash_lookup(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
7365 int hashval
= DTRACE_HASHSTR(hash
, template);
7366 int ndx
= hashval
& hash
->dth_mask
;
7367 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
7369 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
7370 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
7371 return (bucket
->dthb_chain
);
7378 dtrace_hash_collisions(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
7380 int hashval
= DTRACE_HASHSTR(hash
, template);
7381 int ndx
= hashval
& hash
->dth_mask
;
7382 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
7384 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
7385 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
7386 return (bucket
->dthb_len
);
7393 dtrace_hash_remove(dtrace_hash_t
*hash
, dtrace_probe_t
*probe
)
7395 int ndx
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
;
7396 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
7398 dtrace_probe_t
**prevp
= DTRACE_HASHPREV(hash
, probe
);
7399 dtrace_probe_t
**nextp
= DTRACE_HASHNEXT(hash
, probe
);
7402 * Find the bucket that we're removing this probe from.
7404 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
7405 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
))
7409 ASSERT(bucket
!= NULL
);
7411 if (*prevp
== NULL
) {
7412 if (*nextp
== NULL
) {
7414 * The removed probe was the only probe on this
7415 * bucket; we need to remove the bucket.
7417 dtrace_hashbucket_t
*b
= hash
->dth_tab
[ndx
];
7419 ASSERT(bucket
->dthb_chain
== probe
);
7423 hash
->dth_tab
[ndx
] = bucket
->dthb_next
;
7425 while (b
->dthb_next
!= bucket
)
7427 b
->dthb_next
= bucket
->dthb_next
;
7430 ASSERT(hash
->dth_nbuckets
> 0);
7431 hash
->dth_nbuckets
--;
7432 kmem_free(bucket
, sizeof (dtrace_hashbucket_t
));
7436 bucket
->dthb_chain
= *nextp
;
7438 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
;
7442 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
;
7446 * DTrace Utility Functions
7448 * These are random utility functions that are _not_ called from probe context.
7451 dtrace_badattr(const dtrace_attribute_t
*a
)
7453 return (a
->dtat_name
> DTRACE_STABILITY_MAX
||
7454 a
->dtat_data
> DTRACE_STABILITY_MAX
||
7455 a
->dtat_class
> DTRACE_CLASS_MAX
);
7459 * Return a duplicate copy of a string. If the specified string is NULL,
7460 * this function returns a zero-length string.
7462 #if !defined(__APPLE__)
7464 dtrace_strdup(const char *str
)
7466 char *new = kmem_zalloc((str
!= NULL
? strlen(str
) : 0) + 1, KM_SLEEP
);
7469 (void) strcpy(new, str
);
7473 #else /* Employ size bounded string operation. */
7475 dtrace_strdup(const char *str
)
7477 size_t bufsize
= (str
!= NULL
? strlen(str
) : 0) + 1;
7478 char *new = kmem_zalloc(bufsize
, KM_SLEEP
);
7481 (void) strlcpy(new, str
, bufsize
);
7485 #endif /* __APPLE__ */
7487 #define DTRACE_ISALPHA(c) \
7488 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
7491 dtrace_badname(const char *s
)
7495 if (s
== NULL
|| (c
= *s
++) == '\0')
7498 if (!DTRACE_ISALPHA(c
) && c
!= '-' && c
!= '_' && c
!= '.')
7501 while ((c
= *s
++) != '\0') {
7502 if (!DTRACE_ISALPHA(c
) && (c
< '0' || c
> '9') &&
7503 c
!= '-' && c
!= '_' && c
!= '.' && c
!= '`')
7511 dtrace_cred2priv(cred_t
*cr
, uint32_t *privp
, uid_t
*uidp
, zoneid_t
*zoneidp
)
7515 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
7517 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
7519 priv
= DTRACE_PRIV_ALL
;
7521 *uidp
= crgetuid(cr
);
7522 *zoneidp
= crgetzoneid(cr
);
7525 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
))
7526 priv
|= DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
;
7527 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
))
7528 priv
|= DTRACE_PRIV_USER
;
7529 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
))
7530 priv
|= DTRACE_PRIV_PROC
;
7531 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
7532 priv
|= DTRACE_PRIV_OWNER
;
7533 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
7534 priv
|= DTRACE_PRIV_ZONEOWNER
;
7540 #ifdef DTRACE_ERRDEBUG
7542 dtrace_errdebug(const char *str
)
7544 #if !defined(__APPLE__) /* Quiet compiler warnings */
7545 int hval
= dtrace_hash_str((char *)str
) % DTRACE_ERRHASHSZ
;
7547 int hval
= dtrace_hash_str(str
) % DTRACE_ERRHASHSZ
;
7548 #endif /* __APPLE__ */
7551 lck_mtx_lock(&dtrace_errlock
);
7552 dtrace_errlast
= str
;
7553 #if !defined(__APPLE__)
7554 dtrace_errthread
= curthread
;
7556 dtrace_errthread
= (kthread_t
*)current_thread();
7557 #endif /* __APPLE__ */
7559 while (occupied
++ < DTRACE_ERRHASHSZ
) {
7560 if (dtrace_errhash
[hval
].dter_msg
== str
) {
7561 dtrace_errhash
[hval
].dter_count
++;
7565 if (dtrace_errhash
[hval
].dter_msg
!= NULL
) {
7566 hval
= (hval
+ 1) % DTRACE_ERRHASHSZ
;
7570 dtrace_errhash
[hval
].dter_msg
= str
;
7571 dtrace_errhash
[hval
].dter_count
= 1;
7575 panic("dtrace: undersized error hash");
7577 lck_mtx_unlock(&dtrace_errlock
);
7582 * DTrace Matching Functions
7584 * These functions are used to match groups of probes, given some elements of
7585 * a probe tuple, or some globbed expressions for elements of a probe tuple.
7588 dtrace_match_priv(const dtrace_probe_t
*prp
, uint32_t priv
, uid_t uid
,
7591 if (priv
!= DTRACE_PRIV_ALL
) {
7592 uint32_t ppriv
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
;
7593 uint32_t match
= priv
& ppriv
;
7596 * No PRIV_DTRACE_* privileges...
7598 if ((priv
& (DTRACE_PRIV_PROC
| DTRACE_PRIV_USER
|
7599 DTRACE_PRIV_KERNEL
)) == 0)
7603 * No matching bits, but there were bits to match...
7605 if (match
== 0 && ppriv
!= 0)
7609 * Need to have permissions to the process, but don't...
7611 if (((ppriv
& ~match
) & DTRACE_PRIV_OWNER
) != 0 &&
7612 uid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) {
7617 * Need to be in the same zone unless we possess the
7618 * privilege to examine all zones.
7620 if (((ppriv
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 &&
7621 zoneid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) {
7630 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
7631 * consists of input pattern strings and an ops-vector to evaluate them.
7632 * This function returns >0 for match, 0 for no match, and <0 for error.
7635 dtrace_match_probe(const dtrace_probe_t
*prp
, const dtrace_probekey_t
*pkp
,
7636 uint32_t priv
, uid_t uid
, zoneid_t zoneid
)
7638 dtrace_provider_t
*pvp
= prp
->dtpr_provider
;
7641 if (pvp
->dtpv_defunct
)
7644 if ((rv
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0)
7647 if ((rv
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0)
7650 if ((rv
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0)
7653 if ((rv
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0)
7656 if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0)
7663 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
7664 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
7665 * libc's version, the kernel version only applies to 8-bit ASCII strings.
7666 * In addition, all of the recursion cases except for '*' matching have been
7667 * unwound. For '*', we still implement recursive evaluation, but a depth
7668 * counter is maintained and matching is aborted if we recurse too deep.
7669 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
7672 dtrace_match_glob(const char *s
, const char *p
, int depth
)
7678 if (depth
> DTRACE_PROBEKEY_MAXDEPTH
)
7682 s
= ""; /* treat NULL as empty string */
7691 if ((c
= *p
++) == '\0')
7692 return (s1
== '\0');
7696 int ok
= 0, notflag
= 0;
7707 if ((c
= *p
++) == '\0')
7711 if (c
== '-' && lc
!= '\0' && *p
!= ']') {
7712 if ((c
= *p
++) == '\0')
7714 if (c
== '\\' && (c
= *p
++) == '\0')
7718 if (s1
< lc
|| s1
> c
)
7722 } else if (lc
<= s1
&& s1
<= c
)
7725 } else if (c
== '\\' && (c
= *p
++) == '\0')
7728 lc
= c
; /* save left-hand 'c' for next iteration */
7738 if ((c
= *p
++) == '\0')
7750 if ((c
= *p
++) == '\0')
7766 p
++; /* consecutive *'s are identical to a single one */
7771 for (s
= olds
; *s
!= '\0'; s
++) {
7772 if ((gs
= dtrace_match_glob(s
, p
, depth
+ 1)) != 0)
7782 dtrace_match_string(const char *s
, const char *p
, int depth
)
7784 #pragma unused(depth) /* __APPLE__ */
7785 #if !defined(__APPLE__)
7786 return (s
!= NULL
&& strcmp(s
, p
) == 0);
7787 #else /* Employ size bounded string operation. */
7788 return (s
!= NULL
&& strncmp(s
, p
, strlen(s
) + 1) == 0);
7789 #endif /* __APPLE__ */
7794 dtrace_match_nul(const char *s
, const char *p
, int depth
)
7796 #pragma unused(s, p, depth) /* __APPLE__ */
7797 return (1); /* always match the empty pattern */
7802 dtrace_match_nonzero(const char *s
, const char *p
, int depth
)
7804 #pragma unused(p, depth) /* __APPLE__ */
7805 return (s
!= NULL
&& s
[0] != '\0');
7809 dtrace_match(const dtrace_probekey_t
*pkp
, uint32_t priv
, uid_t uid
,
7810 zoneid_t zoneid
, int (*matched
)(dtrace_probe_t
*, void *), void *arg
)
7812 dtrace_probe_t
template, *probe
;
7813 dtrace_hash_t
*hash
= NULL
;
7814 int len
, rc
, best
= INT_MAX
, nmatched
= 0;
7817 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7820 * If the probe ID is specified in the key, just lookup by ID and
7821 * invoke the match callback once if a matching probe is found.
7823 if (pkp
->dtpk_id
!= DTRACE_IDNONE
) {
7824 if ((probe
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL
&&
7825 dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) {
7826 if ((*matched
)(probe
, arg
) == DTRACE_MATCH_FAIL
)
7827 return (DTRACE_MATCH_FAIL
);
7833 #if !defined(__APPLE__) /* Quiet compiler warnings */
7834 template.dtpr_mod
= (char *)pkp
->dtpk_mod
;
7835 template.dtpr_func
= (char *)pkp
->dtpk_func
;
7836 template.dtpr_name
= (char *)pkp
->dtpk_name
;
7838 template.dtpr_mod
= (char *)(uintptr_t)pkp
->dtpk_mod
;
7839 template.dtpr_func
= (char *)(uintptr_t)pkp
->dtpk_func
;
7840 template.dtpr_name
= (char *)(uintptr_t)pkp
->dtpk_name
;
7841 #endif /* __APPLE__ */
7844 * We want to find the most distinct of the module name, function
7845 * name, and name. So for each one that is not a glob pattern or
7846 * empty string, we perform a lookup in the corresponding hash and
7847 * use the hash table with the fewest collisions to do our search.
7849 if (pkp
->dtpk_mmatch
== &dtrace_match_string
&&
7850 (len
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) {
7852 hash
= dtrace_bymod
;
7855 if (pkp
->dtpk_fmatch
== &dtrace_match_string
&&
7856 (len
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) {
7858 hash
= dtrace_byfunc
;
7861 if (pkp
->dtpk_nmatch
== &dtrace_match_string
&&
7862 (len
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) {
7864 hash
= dtrace_byname
;
7868 * If we did not select a hash table, iterate over every probe and
7869 * invoke our callback for each one that matches our input probe key.
7872 #if !defined(__APPLE__) /* Quiet compiler warning */
7873 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7875 for (i
= 0; i
< (dtrace_id_t
)dtrace_nprobes
; i
++) {
7876 #endif /* __APPLE__ */
7877 if ((probe
= dtrace_probes
[i
]) == NULL
||
7878 dtrace_match_probe(probe
, pkp
, priv
, uid
,
7884 if ((rc
= (*matched
)(probe
, arg
)) != DTRACE_MATCH_NEXT
) {
7885 if (rc
== DTRACE_MATCH_FAIL
)
7886 return (DTRACE_MATCH_FAIL
);
7895 * If we selected a hash table, iterate over each probe of the same key
7896 * name and invoke the callback for every probe that matches the other
7897 * attributes of our input probe key.
7899 for (probe
= dtrace_hash_lookup(hash
, &template); probe
!= NULL
;
7900 probe
= *(DTRACE_HASHNEXT(hash
, probe
))) {
7902 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0)
7907 if ((rc
= (*matched
)(probe
, arg
)) != DTRACE_MATCH_NEXT
) {
7908 if (rc
== DTRACE_MATCH_FAIL
)
7909 return (DTRACE_MATCH_FAIL
);
7918 * Return the function pointer dtrace_probecmp() should use to compare the
7919 * specified pattern with a string. For NULL or empty patterns, we select
7920 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7921 * For non-empty non-glob strings, we use dtrace_match_string().
7923 static dtrace_probekey_f
*
7924 dtrace_probekey_func(const char *p
)
7928 if (p
== NULL
|| *p
== '\0')
7929 return (&dtrace_match_nul
);
7931 while ((c
= *p
++) != '\0') {
7932 if (c
== '[' || c
== '?' || c
== '*' || c
== '\\')
7933 return (&dtrace_match_glob
);
7936 return (&dtrace_match_string
);
7940 * Build a probe comparison key for use with dtrace_match_probe() from the
7941 * given probe description. By convention, a null key only matches anchored
7942 * probes: if each field is the empty string, reset dtpk_fmatch to
7943 * dtrace_match_nonzero().
7946 dtrace_probekey(const dtrace_probedesc_t
*pdp
, dtrace_probekey_t
*pkp
)
7948 pkp
->dtpk_prov
= pdp
->dtpd_provider
;
7949 pkp
->dtpk_pmatch
= dtrace_probekey_func(pdp
->dtpd_provider
);
7951 pkp
->dtpk_mod
= pdp
->dtpd_mod
;
7952 pkp
->dtpk_mmatch
= dtrace_probekey_func(pdp
->dtpd_mod
);
7954 pkp
->dtpk_func
= pdp
->dtpd_func
;
7955 pkp
->dtpk_fmatch
= dtrace_probekey_func(pdp
->dtpd_func
);
7957 pkp
->dtpk_name
= pdp
->dtpd_name
;
7958 pkp
->dtpk_nmatch
= dtrace_probekey_func(pdp
->dtpd_name
);
7960 pkp
->dtpk_id
= pdp
->dtpd_id
;
7962 if (pkp
->dtpk_id
== DTRACE_IDNONE
&&
7963 pkp
->dtpk_pmatch
== &dtrace_match_nul
&&
7964 pkp
->dtpk_mmatch
== &dtrace_match_nul
&&
7965 pkp
->dtpk_fmatch
== &dtrace_match_nul
&&
7966 pkp
->dtpk_nmatch
== &dtrace_match_nul
)
7967 pkp
->dtpk_fmatch
= &dtrace_match_nonzero
;
7971 * DTrace Provider-to-Framework API Functions
7973 * These functions implement much of the Provider-to-Framework API, as
7974 * described in <sys/dtrace.h>. The parts of the API not in this section are
7975 * the functions in the API for probe management (found below), and
7976 * dtrace_probe() itself (found above).
7980 * Register the calling provider with the DTrace framework. This should
7981 * generally be called by DTrace providers in their attach(9E) entry point.
7984 dtrace_register(const char *name
, const dtrace_pattr_t
*pap
, uint32_t priv
,
7985 cred_t
*cr
, const dtrace_pops_t
*pops
, void *arg
, dtrace_provider_id_t
*idp
)
7987 dtrace_provider_t
*provider
;
7989 if (name
== NULL
|| pap
== NULL
|| pops
== NULL
|| idp
== NULL
) {
7990 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7991 "arguments", name
? name
: "<NULL>");
7995 if (name
[0] == '\0' || dtrace_badname(name
)) {
7996 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7997 "provider name", name
);
8001 if ((pops
->dtps_provide
== NULL
&& pops
->dtps_provide_module
== NULL
) ||
8002 pops
->dtps_enable
== NULL
|| pops
->dtps_disable
== NULL
||
8003 pops
->dtps_destroy
== NULL
||
8004 ((pops
->dtps_resume
== NULL
) != (pops
->dtps_suspend
== NULL
))) {
8005 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
8006 "provider ops", name
);
8010 if (dtrace_badattr(&pap
->dtpa_provider
) ||
8011 dtrace_badattr(&pap
->dtpa_mod
) ||
8012 dtrace_badattr(&pap
->dtpa_func
) ||
8013 dtrace_badattr(&pap
->dtpa_name
) ||
8014 dtrace_badattr(&pap
->dtpa_args
)) {
8015 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
8016 "provider attributes", name
);
8020 if (priv
& ~DTRACE_PRIV_ALL
) {
8021 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
8022 "privilege attributes", name
);
8026 if ((priv
& DTRACE_PRIV_KERNEL
) &&
8027 (priv
& (DTRACE_PRIV_USER
| DTRACE_PRIV_OWNER
)) &&
8028 pops
->dtps_usermode
== NULL
) {
8029 cmn_err(CE_WARN
, "failed to register provider '%s': need "
8030 "dtps_usermode() op for given privilege attributes", name
);
8034 provider
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
);
8035 #if !defined(__APPLE__)
8036 provider
->dtpv_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
8037 (void) strcpy(provider
->dtpv_name
, name
);
8038 #else /* Employ size bounded string operation. */
8040 size_t bufsize
= strlen(name
) + 1;
8041 provider
->dtpv_name
= kmem_alloc(bufsize
, KM_SLEEP
);
8042 (void) strlcpy(provider
->dtpv_name
, name
, bufsize
);
8044 #endif /* __APPLE__ */
8046 provider
->dtpv_attr
= *pap
;
8047 provider
->dtpv_priv
.dtpp_flags
= priv
;
8049 provider
->dtpv_priv
.dtpp_uid
= crgetuid(cr
);
8050 provider
->dtpv_priv
.dtpp_zoneid
= crgetzoneid(cr
);
8052 provider
->dtpv_pops
= *pops
;
8054 if (pops
->dtps_provide
== NULL
) {
8055 ASSERT(pops
->dtps_provide_module
!= NULL
);
8056 provider
->dtpv_pops
.dtps_provide
=
8057 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
;
8060 if (pops
->dtps_provide_module
== NULL
) {
8061 ASSERT(pops
->dtps_provide
!= NULL
);
8062 provider
->dtpv_pops
.dtps_provide_module
=
8063 (void (*)(void *, struct modctl
*))dtrace_nullop
;
8066 if (pops
->dtps_suspend
== NULL
) {
8067 ASSERT(pops
->dtps_resume
== NULL
);
8068 provider
->dtpv_pops
.dtps_suspend
=
8069 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
8070 provider
->dtpv_pops
.dtps_resume
=
8071 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
8074 provider
->dtpv_arg
= arg
;
8075 *idp
= (dtrace_provider_id_t
)provider
;
8077 if (pops
== &dtrace_provider_ops
) {
8078 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
8079 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8080 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
8083 * We make sure that the DTrace provider is at the head of
8084 * the provider chain.
8086 provider
->dtpv_next
= dtrace_provider
;
8087 dtrace_provider
= provider
;
8091 lck_mtx_lock(&dtrace_provider_lock
);
8092 lck_mtx_lock(&dtrace_lock
);
8095 * If there is at least one provider registered, we'll add this
8096 * provider after the first provider.
8098 if (dtrace_provider
!= NULL
) {
8099 provider
->dtpv_next
= dtrace_provider
->dtpv_next
;
8100 dtrace_provider
->dtpv_next
= provider
;
8102 dtrace_provider
= provider
;
8105 if (dtrace_retained
!= NULL
) {
8106 dtrace_enabling_provide(provider
);
8109 * Now we need to call dtrace_enabling_matchall() -- which
8110 * will acquire cpu_lock and dtrace_lock. We therefore need
8111 * to drop all of our locks before calling into it...
8113 lck_mtx_unlock(&dtrace_lock
);
8114 lck_mtx_unlock(&dtrace_provider_lock
);
8115 dtrace_enabling_matchall();
8120 lck_mtx_unlock(&dtrace_lock
);
8121 lck_mtx_unlock(&dtrace_provider_lock
);
8127 * Unregister the specified provider from the DTrace framework. This should
8128 * generally be called by DTrace providers in their detach(9E) entry point.
8131 dtrace_unregister(dtrace_provider_id_t id
)
8133 dtrace_provider_t
*old
= (dtrace_provider_t
*)id
;
8134 dtrace_provider_t
*prev
= NULL
;
8136 dtrace_probe_t
*probe
, *first
= NULL
;
8138 if (old
->dtpv_pops
.dtps_enable
==
8139 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
) {
8141 * If DTrace itself is the provider, we're called with locks
8144 ASSERT(old
== dtrace_provider
);
8145 ASSERT(dtrace_devi
!= NULL
);
8146 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
8147 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8150 if (dtrace_provider
->dtpv_next
!= NULL
) {
8152 * There's another provider here; return failure.
8157 lck_mtx_lock(&dtrace_provider_lock
);
8158 lck_mtx_lock(&mod_lock
);
8159 lck_mtx_lock(&dtrace_lock
);
8163 * If anyone has /dev/dtrace open, or if there are anonymous enabled
8164 * probes, we refuse to let providers slither away, unless this
8165 * provider has already been explicitly invalidated.
8167 if (!old
->dtpv_defunct
&&
8168 (dtrace_opens
|| (dtrace_anon
.dta_state
!= NULL
&&
8169 dtrace_anon
.dta_state
->dts_necbs
> 0))) {
8171 lck_mtx_unlock(&dtrace_lock
);
8172 lck_mtx_unlock(&mod_lock
);
8173 lck_mtx_unlock(&dtrace_provider_lock
);
8179 * Attempt to destroy the probes associated with this provider.
8181 if (old
->ecb_count
!=0) {
8183 * We have at least one ECB; we can't remove this provider.
8186 lck_mtx_unlock(&dtrace_lock
);
8187 lck_mtx_unlock(&mod_lock
);
8188 lck_mtx_unlock(&dtrace_provider_lock
);
8194 * All of the probes for this provider are disabled; we can safely
8195 * remove all of them from their hash chains and from the probe array.
8197 for (i
= 0; i
< dtrace_nprobes
&& old
->probe_count
!=0; i
++) {
8198 if ((probe
= dtrace_probes
[i
]) == NULL
)
8201 if (probe
->dtpr_provider
!= old
)
8204 dtrace_probes
[i
] = NULL
;
8207 dtrace_hash_remove(dtrace_bymod
, probe
);
8208 dtrace_hash_remove(dtrace_byfunc
, probe
);
8209 dtrace_hash_remove(dtrace_byname
, probe
);
8211 if (first
== NULL
) {
8213 probe
->dtpr_nextmod
= NULL
;
8215 probe
->dtpr_nextmod
= first
;
8221 * The provider's probes have been removed from the hash chains and
8222 * from the probe array. Now issue a dtrace_sync() to be sure that
8223 * everyone has cleared out from any probe array processing.
8227 for (probe
= first
; probe
!= NULL
; probe
= first
) {
8228 first
= probe
->dtpr_nextmod
;
8230 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
,
8232 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
8233 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
8234 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
8235 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1);
8236 #if !defined(__APPLE__)
8237 kmem_free(probe
, sizeof (dtrace_probe_t
));
8239 zfree(dtrace_probe_t_zone
, probe
);
8243 if ((prev
= dtrace_provider
) == old
) {
8244 ASSERT(self
|| dtrace_devi
== NULL
);
8245 ASSERT(old
->dtpv_next
== NULL
|| dtrace_devi
== NULL
);
8246 dtrace_provider
= old
->dtpv_next
;
8248 while (prev
!= NULL
&& prev
->dtpv_next
!= old
)
8249 prev
= prev
->dtpv_next
;
8252 panic("attempt to unregister non-existent "
8253 "dtrace provider %p\n", (void *)id
);
8256 prev
->dtpv_next
= old
->dtpv_next
;
8260 lck_mtx_unlock(&dtrace_lock
);
8261 lck_mtx_unlock(&mod_lock
);
8262 lck_mtx_unlock(&dtrace_provider_lock
);
8265 kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1);
8266 kmem_free(old
, sizeof (dtrace_provider_t
));
8272 * Invalidate the specified provider. All subsequent probe lookups for the
8273 * specified provider will fail, but its probes will not be removed.
8276 dtrace_invalidate(dtrace_provider_id_t id
)
8278 dtrace_provider_t
*pvp
= (dtrace_provider_t
*)id
;
8280 ASSERT(pvp
->dtpv_pops
.dtps_enable
!=
8281 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
);
8283 lck_mtx_lock(&dtrace_provider_lock
);
8284 lck_mtx_lock(&dtrace_lock
);
8286 pvp
->dtpv_defunct
= 1;
8288 lck_mtx_unlock(&dtrace_lock
);
8289 lck_mtx_unlock(&dtrace_provider_lock
);
8293 * Indicate whether or not DTrace has attached.
8296 dtrace_attached(void)
8299 * dtrace_provider will be non-NULL iff the DTrace driver has
8300 * attached. (It's non-NULL because DTrace is always itself a
8303 return (dtrace_provider
!= NULL
);
8307 * Remove all the unenabled probes for the given provider. This function is
8308 * not unlike dtrace_unregister(), except that it doesn't remove the provider
8309 * -- just as many of its associated probes as it can.
8312 dtrace_condense(dtrace_provider_id_t id
)
8314 dtrace_provider_t
*prov
= (dtrace_provider_t
*)id
;
8316 dtrace_probe_t
*probe
;
8319 * Make sure this isn't the dtrace provider itself.
8321 ASSERT(prov
->dtpv_pops
.dtps_enable
!=
8322 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
);
8324 lck_mtx_lock(&dtrace_provider_lock
);
8325 lck_mtx_lock(&dtrace_lock
);
8328 * Attempt to destroy the probes associated with this provider.
8330 for (i
= 0; i
< dtrace_nprobes
; i
++) {
8331 if ((probe
= dtrace_probes
[i
]) == NULL
)
8334 if (probe
->dtpr_provider
!= prov
)
8337 if (probe
->dtpr_ecb
!= NULL
)
8340 dtrace_probes
[i
] = NULL
;
8341 prov
->probe_count
--;
8343 dtrace_hash_remove(dtrace_bymod
, probe
);
8344 dtrace_hash_remove(dtrace_byfunc
, probe
);
8345 dtrace_hash_remove(dtrace_byname
, probe
);
8347 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i
+ 1,
8349 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
8350 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
8351 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
8352 #if !defined(__APPLE__)
8353 kmem_free(probe
, sizeof (dtrace_probe_t
));
8355 zfree(dtrace_probe_t_zone
, probe
);
8357 vmem_free(dtrace_arena
, (void *)((uintptr_t)i
+ 1), 1);
8360 lck_mtx_unlock(&dtrace_lock
);
8361 lck_mtx_unlock(&dtrace_provider_lock
);
8367 * DTrace Probe Management Functions
8369 * The functions in this section perform the DTrace probe management,
8370 * including functions to create probes, look-up probes, and call into the
8371 * providers to request that probes be provided. Some of these functions are
8372 * in the Provider-to-Framework API; these functions can be identified by the
8373 * fact that they are not declared "static".
8377 * Create a probe with the specified module name, function name, and name.
8380 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
,
8381 const char *func
, const char *name
, int aframes
, void *arg
)
8383 dtrace_probe_t
*probe
, **probes
;
8384 dtrace_provider_t
*provider
= (dtrace_provider_t
*)prov
;
8387 if (provider
== dtrace_provider
) {
8388 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8390 lck_mtx_lock(&dtrace_lock
);
8393 id
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1,
8394 VM_BESTFIT
| VM_SLEEP
);
8395 #if !defined(__APPLE__)
8396 probe
= kmem_zalloc(sizeof (dtrace_probe_t
), KM_SLEEP
);
8398 probe
= zalloc(dtrace_probe_t_zone
);
8399 bzero(probe
, sizeof (dtrace_probe_t
));
8402 probe
->dtpr_id
= id
;
8403 probe
->dtpr_gen
= dtrace_probegen
++;
8404 probe
->dtpr_mod
= dtrace_strdup(mod
);
8405 probe
->dtpr_func
= dtrace_strdup(func
);
8406 probe
->dtpr_name
= dtrace_strdup(name
);
8407 probe
->dtpr_arg
= arg
;
8408 probe
->dtpr_aframes
= aframes
;
8409 probe
->dtpr_provider
= provider
;
8411 dtrace_hash_add(dtrace_bymod
, probe
);
8412 dtrace_hash_add(dtrace_byfunc
, probe
);
8413 dtrace_hash_add(dtrace_byname
, probe
);
8415 #if !defined(__APPLE__) /* Quiet compiler warning */
8416 if (id
- 1 >= dtrace_nprobes
) {
8418 if (id
- 1 >= (dtrace_id_t
)dtrace_nprobes
) {
8419 #endif /* __APPLE__ */
8420 size_t osize
= dtrace_nprobes
* sizeof (dtrace_probe_t
*);
8421 size_t nsize
= osize
<< 1;
8425 ASSERT(dtrace_probes
== NULL
);
8426 nsize
= sizeof (dtrace_probe_t
*);
8429 probes
= kmem_zalloc(nsize
, KM_SLEEP
);
8431 if (dtrace_probes
== NULL
) {
8433 dtrace_probes
= probes
;
8436 dtrace_probe_t
**oprobes
= dtrace_probes
;
8438 bcopy(oprobes
, probes
, osize
);
8439 dtrace_membar_producer();
8440 dtrace_probes
= probes
;
8445 * All CPUs are now seeing the new probes array; we can
8446 * safely free the old array.
8448 kmem_free(oprobes
, osize
);
8449 dtrace_nprobes
<<= 1;
8452 #if !defined(__APPLE__) /* Quiet compiler warning */
8453 ASSERT(id
- 1 < dtrace_nprobes
);
8455 ASSERT(id
- 1 < (dtrace_id_t
)dtrace_nprobes
);
8456 #endif /* __APPLE__ */
8459 ASSERT(dtrace_probes
[id
- 1] == NULL
);
8460 dtrace_probes
[id
- 1] = probe
;
8461 provider
->probe_count
++;
8463 if (provider
!= dtrace_provider
)
8464 lck_mtx_unlock(&dtrace_lock
);
8469 static dtrace_probe_t
*
8470 dtrace_probe_lookup_id(dtrace_id_t id
)
8472 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8474 #if !defined(__APPLE__) /* Quiet compiler warning */
8475 if (id
== 0 || id
> dtrace_nprobes
)
8478 if (id
== 0 || id
> (dtrace_id_t
)dtrace_nprobes
)
8480 #endif /* __APPLE__ */
8482 return (dtrace_probes
[id
- 1]);
8486 dtrace_probe_lookup_match(dtrace_probe_t
*probe
, void *arg
)
8488 *((dtrace_id_t
*)arg
) = probe
->dtpr_id
;
8490 return (DTRACE_MATCH_DONE
);
8494 * Look up a probe based on provider and one or more of module name, function
8495 * name and probe name.
8498 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
,
8499 const char *func
, const char *name
)
8501 dtrace_probekey_t pkey
;
8505 pkey
.dtpk_prov
= ((dtrace_provider_t
*)prid
)->dtpv_name
;
8506 pkey
.dtpk_pmatch
= &dtrace_match_string
;
8507 pkey
.dtpk_mod
= mod
;
8508 pkey
.dtpk_mmatch
= mod
? &dtrace_match_string
: &dtrace_match_nul
;
8509 pkey
.dtpk_func
= func
;
8510 pkey
.dtpk_fmatch
= func
? &dtrace_match_string
: &dtrace_match_nul
;
8511 pkey
.dtpk_name
= name
;
8512 pkey
.dtpk_nmatch
= name
? &dtrace_match_string
: &dtrace_match_nul
;
8513 pkey
.dtpk_id
= DTRACE_IDNONE
;
8515 lck_mtx_lock(&dtrace_lock
);
8516 match
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0,
8517 dtrace_probe_lookup_match
, &id
);
8518 lck_mtx_unlock(&dtrace_lock
);
8520 ASSERT(match
== 1 || match
== 0);
8521 return (match
? id
: 0);
8525 * Returns the probe argument associated with the specified probe.
8528 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
)
8530 dtrace_probe_t
*probe
;
8533 lck_mtx_lock(&dtrace_lock
);
8535 if ((probe
= dtrace_probe_lookup_id(pid
)) != NULL
&&
8536 probe
->dtpr_provider
== (dtrace_provider_t
*)id
)
8537 rval
= probe
->dtpr_arg
;
8539 lck_mtx_unlock(&dtrace_lock
);
8545 * Copy a probe into a probe description.
8548 dtrace_probe_description(const dtrace_probe_t
*prp
, dtrace_probedesc_t
*pdp
)
8550 bzero(pdp
, sizeof (dtrace_probedesc_t
));
8551 pdp
->dtpd_id
= prp
->dtpr_id
;
8553 #if !defined(__APPLE__)
8554 (void) strncpy(pdp
->dtpd_provider
,
8555 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
- 1);
8557 (void) strncpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
- 1);
8558 (void) strncpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
- 1);
8559 (void) strncpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
- 1);
8560 #else /* Employ size bounded string operation. */
8561 (void) strlcpy(pdp
->dtpd_provider
,
8562 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
);
8564 (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
);
8565 (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
);
8566 (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
);
8567 #endif /* __APPLE__ */
8571 * Called to indicate that a probe -- or probes -- should be provided by a
8572 * specfied provider. If the specified description is NULL, the provider will
8573 * be told to provide all of its probes. (This is done whenever a new
8574 * consumer comes along, or whenever a retained enabling is to be matched.) If
8575 * the specified description is non-NULL, the provider is given the
8576 * opportunity to dynamically provide the specified probe, allowing providers
8577 * to support the creation of probes on-the-fly. (So-called _autocreated_
8578 * probes.) If the provider is NULL, the operations will be applied to all
8579 * providers; if the provider is non-NULL the operations will only be applied
8580 * to the specified provider. The dtrace_provider_lock must be held, and the
8581 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
8582 * will need to grab the dtrace_lock when it reenters the framework through
8583 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
8586 dtrace_probe_provide(dtrace_probedesc_t
*desc
, dtrace_provider_t
*prv
)
8591 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
8595 prv
= dtrace_provider
;
8600 * First, call the blanket provide operation.
8602 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
);
8605 * Now call the per-module provide operation. We will grab
8606 * mod_lock to prevent the list from being modified. Note
8607 * that this also prevents the mod_busy bits from changing.
8608 * (mod_busy can only be changed with mod_lock held.)
8610 lck_mtx_lock(&mod_lock
);
8612 #if !defined(__APPLE__)
8615 if (ctl
->mod_busy
|| ctl
->mod_mp
== NULL
)
8618 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
8620 } while ((ctl
= ctl
->mod_next
) != &modules
);
8622 ctl
= dtrace_modctl_list
;
8624 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
8625 ctl
= ctl
->mod_next
;
8629 lck_mtx_unlock(&mod_lock
);
8630 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
8634 * Iterate over each probe, and call the Framework-to-Provider API function
8638 dtrace_probe_foreach(uintptr_t offs
)
8640 dtrace_provider_t
*prov
;
8641 void (*func
)(void *, dtrace_id_t
, void *);
8642 dtrace_probe_t
*probe
;
8643 dtrace_icookie_t cookie
;
8647 * We disable interrupts to walk through the probe array. This is
8648 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
8649 * won't see stale data.
8651 cookie
= dtrace_interrupt_disable();
8653 for (i
= 0; i
< dtrace_nprobes
; i
++) {
8654 if ((probe
= dtrace_probes
[i
]) == NULL
)
8657 if (probe
->dtpr_ecb
== NULL
) {
8659 * This probe isn't enabled -- don't call the function.
8664 prov
= probe
->dtpr_provider
;
8665 func
= *((void(**)(void *, dtrace_id_t
, void *))
8666 ((uintptr_t)&prov
->dtpv_pops
+ offs
));
8668 func(prov
->dtpv_arg
, i
+ 1, probe
->dtpr_arg
);
8671 dtrace_interrupt_enable(cookie
);
8675 dtrace_probe_enable(const dtrace_probedesc_t
*desc
, dtrace_enabling_t
*enab
)
8677 dtrace_probekey_t pkey
;
8682 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8684 dtrace_ecb_create_cache
= NULL
;
8688 * If we're passed a NULL description, we're being asked to
8689 * create an ECB with a NULL probe.
8691 (void) dtrace_ecb_create_enable(NULL
, enab
);
8695 dtrace_probekey(desc
, &pkey
);
8696 dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
,
8697 &priv
, &uid
, &zoneid
);
8699 return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
,
8704 * DTrace Helper Provider Functions
8707 dtrace_dofattr2attr(dtrace_attribute_t
*attr
, const dof_attr_t dofattr
)
8709 attr
->dtat_name
= DOF_ATTR_NAME(dofattr
);
8710 attr
->dtat_data
= DOF_ATTR_DATA(dofattr
);
8711 attr
->dtat_class
= DOF_ATTR_CLASS(dofattr
);
8715 dtrace_dofprov2hprov(dtrace_helper_provdesc_t
*hprov
,
8716 const dof_provider_t
*dofprov
, char *strtab
)
8718 hprov
->dthpv_provname
= strtab
+ dofprov
->dofpv_name
;
8719 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
,
8720 dofprov
->dofpv_provattr
);
8721 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
,
8722 dofprov
->dofpv_modattr
);
8723 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
,
8724 dofprov
->dofpv_funcattr
);
8725 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
,
8726 dofprov
->dofpv_nameattr
);
8727 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
,
8728 dofprov
->dofpv_argsattr
);
8732 dtrace_helper_provide_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
8734 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8735 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8736 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
8737 dof_provider_t
*provider
;
8739 uint32_t *off
, *enoff
;
8743 dtrace_helper_provdesc_t dhpv
;
8744 dtrace_helper_probedesc_t dhpb
;
8745 dtrace_meta_t
*meta
= dtrace_meta_pid
;
8746 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
8749 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
8750 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8751 provider
->dofpv_strtab
* dof
->dofh_secsize
);
8752 prb_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8753 provider
->dofpv_probes
* dof
->dofh_secsize
);
8754 arg_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8755 provider
->dofpv_prargs
* dof
->dofh_secsize
);
8756 off_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8757 provider
->dofpv_proffs
* dof
->dofh_secsize
);
8759 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
8760 off
= (uint32_t *)(uintptr_t)(daddr
+ off_sec
->dofs_offset
);
8761 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
8765 * See dtrace_helper_provider_validate().
8767 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
8768 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
) {
8769 enoff_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8770 provider
->dofpv_prenoffs
* dof
->dofh_secsize
);
8771 enoff
= (uint32_t *)(uintptr_t)(daddr
+ enoff_sec
->dofs_offset
);
8774 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
8777 * Create the provider.
8779 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
8781 if ((parg
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
)
8787 * Create the probes.
8789 for (i
= 0; i
< nprobes
; i
++) {
8790 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
8791 prb_sec
->dofs_offset
+ i
* prb_sec
->dofs_entsize
);
8793 dhpb
.dthpb_mod
= dhp
->dofhp_mod
;
8794 dhpb
.dthpb_func
= strtab
+ probe
->dofpr_func
;
8795 dhpb
.dthpb_name
= strtab
+ probe
->dofpr_name
;
8796 #if !defined(__APPLE__)
8797 dhpb
.dthpb_base
= probe
->dofpr_addr
;
8799 dhpb
.dthpb_base
= dhp
->dofhp_addr
; /* FIXME: James, why? */
8801 #if !defined(__APPLE__) /* Quiet compiler warning */
8802 dhpb
.dthpb_offs
= off
+ probe
->dofpr_offidx
;
8804 dhpb
.dthpb_offs
= (int32_t *)(off
+ probe
->dofpr_offidx
);
8805 #endif /* __APPLE__ */
8806 dhpb
.dthpb_noffs
= probe
->dofpr_noffs
;
8807 if (enoff
!= NULL
) {
8808 #if !defined(__APPLE__) /* Quiet compiler warning */
8809 dhpb
.dthpb_enoffs
= enoff
+ probe
->dofpr_enoffidx
;
8811 dhpb
.dthpb_enoffs
= (int32_t *)(enoff
+ probe
->dofpr_enoffidx
);
8812 #endif /* __APPLE__ */
8813 dhpb
.dthpb_nenoffs
= probe
->dofpr_nenoffs
;
8815 dhpb
.dthpb_enoffs
= NULL
;
8816 dhpb
.dthpb_nenoffs
= 0;
8818 dhpb
.dthpb_args
= arg
+ probe
->dofpr_argidx
;
8819 dhpb
.dthpb_nargc
= probe
->dofpr_nargc
;
8820 dhpb
.dthpb_xargc
= probe
->dofpr_xargc
;
8821 dhpb
.dthpb_ntypes
= strtab
+ probe
->dofpr_nargv
;
8822 dhpb
.dthpb_xtypes
= strtab
+ probe
->dofpr_xargv
;
8824 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
);
8829 dtrace_helper_provide(dof_helper_t
*dhp
, pid_t pid
)
8831 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8832 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8833 #if !defined(__APPLE__) /* Quiet compiler warning */
8837 #endif /* __APPLE__ */
8839 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
8841 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
8842 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
8843 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
8845 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
8848 dtrace_helper_provide_one(dhp
, sec
, pid
);
8852 * We may have just created probes, so we must now rematch against
8853 * any retained enablings. Note that this call will acquire both
8854 * cpu_lock and dtrace_lock; the fact that we are holding
8855 * dtrace_meta_lock now is what defines the ordering with respect to
8856 * these three locks.
8858 dtrace_enabling_matchall();
8862 dtrace_helper_provider_remove_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
8864 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8865 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8867 dof_provider_t
*provider
;
8869 dtrace_helper_provdesc_t dhpv
;
8870 dtrace_meta_t
*meta
= dtrace_meta_pid
;
8871 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
8873 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
8874 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8875 provider
->dofpv_strtab
* dof
->dofh_secsize
);
8877 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
8880 * Create the provider.
8882 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
8884 mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
);
8890 dtrace_helper_provider_remove(dof_helper_t
*dhp
, pid_t pid
)
8892 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8893 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8894 #if !defined(__APPLE__) /* Quiet compiler warning */
8898 #endif /* __APPLE__ */
8900 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
8902 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
8903 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
8904 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
8906 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
8909 dtrace_helper_provider_remove_one(dhp
, sec
, pid
);
8914 * DTrace Meta Provider-to-Framework API Functions
8916 * These functions implement the Meta Provider-to-Framework API, as described
8917 * in <sys/dtrace.h>.
8920 dtrace_meta_register(const char *name
, const dtrace_mops_t
*mops
, void *arg
,
8921 dtrace_meta_provider_id_t
*idp
)
8923 dtrace_meta_t
*meta
;
8924 dtrace_helpers_t
*help
, *next
;
8925 #if !defined(__APPLE__) /* Quiet compiler warning */
8929 #endif /* __APPLE__ */
8931 *idp
= DTRACE_METAPROVNONE
;
8934 * We strictly don't need the name, but we hold onto it for
8935 * debuggability. All hail error queues!
8938 cmn_err(CE_WARN
, "failed to register meta-provider: "
8944 mops
->dtms_create_probe
== NULL
||
8945 mops
->dtms_provide_pid
== NULL
||
8946 mops
->dtms_remove_pid
== NULL
) {
8947 cmn_err(CE_WARN
, "failed to register meta-register %s: "
8948 "invalid ops", name
);
8952 meta
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
);
8953 meta
->dtm_mops
= *mops
;
8954 #if !defined(__APPLE__)
8955 meta
->dtm_name
= kmem_alloc(strlen(name
) + 1, KM_SLEEP
);
8956 (void) strcpy(meta
->dtm_name
, name
);
8957 #else /* Employ size bounded string operation. */
8959 size_t bufsize
= strlen(name
) + 1;
8960 meta
->dtm_name
= kmem_alloc(bufsize
, KM_SLEEP
);
8961 (void) strlcpy(meta
->dtm_name
, name
, bufsize
);
8963 #endif /* __APPLE__ */
8964 meta
->dtm_arg
= arg
;
8966 lck_mtx_lock(&dtrace_meta_lock
);
8967 lck_mtx_lock(&dtrace_lock
);
8969 if (dtrace_meta_pid
!= NULL
) {
8970 lck_mtx_unlock(&dtrace_lock
);
8971 lck_mtx_unlock(&dtrace_meta_lock
);
8972 cmn_err(CE_WARN
, "failed to register meta-register %s: "
8973 "user-land meta-provider exists", name
);
8974 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1);
8975 kmem_free(meta
, sizeof (dtrace_meta_t
));
8979 dtrace_meta_pid
= meta
;
8980 *idp
= (dtrace_meta_provider_id_t
)meta
;
8983 * If there are providers and probes ready to go, pass them
8984 * off to the new meta provider now.
8987 help
= dtrace_deferred_pid
;
8988 dtrace_deferred_pid
= NULL
;
8990 lck_mtx_unlock(&dtrace_lock
);
8992 while (help
!= NULL
) {
8993 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
8994 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
8998 next
= help
->dthps_next
;
8999 help
->dthps_next
= NULL
;
9000 help
->dthps_prev
= NULL
;
9001 help
->dthps_deferred
= 0;
9005 lck_mtx_unlock(&dtrace_meta_lock
);
9011 dtrace_meta_unregister(dtrace_meta_provider_id_t id
)
9013 dtrace_meta_t
**pp
, *old
= (dtrace_meta_t
*)id
;
9015 lck_mtx_lock(&dtrace_meta_lock
);
9016 lck_mtx_lock(&dtrace_lock
);
9018 if (old
== dtrace_meta_pid
) {
9019 pp
= &dtrace_meta_pid
;
9021 panic("attempt to unregister non-existent "
9022 "dtrace meta-provider %p\n", (void *)old
);
9025 if (old
->dtm_count
!= 0) {
9026 lck_mtx_unlock(&dtrace_lock
);
9027 lck_mtx_unlock(&dtrace_meta_lock
);
9033 lck_mtx_unlock(&dtrace_lock
);
9034 lck_mtx_unlock(&dtrace_meta_lock
);
9036 kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1);
9037 kmem_free(old
, sizeof (dtrace_meta_t
));
9044 * DTrace DIF Object Functions
9047 dtrace_difo_err(uint_t pc
, const char *format
, ...)
9049 if (dtrace_err_verbose
) {
9052 (void) uprintf("dtrace DIF object error: [%u]: ", pc
);
9053 va_start(alist
, format
);
9054 (void) vuprintf(format
, alist
);
9058 #ifdef DTRACE_ERRDEBUG
9059 dtrace_errdebug(format
);
9065 * Validate a DTrace DIF object by checking the IR instructions. The following
9066 * rules are currently enforced by dtrace_difo_validate():
9068 * 1. Each instruction must have a valid opcode
9069 * 2. Each register, string, variable, or subroutine reference must be valid
9070 * 3. No instruction can modify register %r0 (must be zero)
9071 * 4. All instruction reserved bits must be set to zero
9072 * 5. The last instruction must be a "ret" instruction
9073 * 6. All branch targets must reference a valid instruction _after_ the branch
9076 dtrace_difo_validate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
, uint_t nregs
,
9079 #if !defined(__APPLE__) /* Quiet compiler warnings */
9084 #endif /* __APPLE__ */
9085 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
9089 kcheckload
= cr
== NULL
||
9090 (vstate
->dtvs_state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) == 0;
9092 dp
->dtdo_destructive
= 0;
9094 for (pc
= 0; pc
< dp
->dtdo_len
&& err
== 0; pc
++) {
9095 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
9097 uint_t r1
= DIF_INSTR_R1(instr
);
9098 uint_t r2
= DIF_INSTR_R2(instr
);
9099 uint_t rd
= DIF_INSTR_RD(instr
);
9100 uint_t rs
= DIF_INSTR_RS(instr
);
9101 uint_t label
= DIF_INSTR_LABEL(instr
);
9102 uint_t v
= DIF_INSTR_VAR(instr
);
9103 uint_t subr
= DIF_INSTR_SUBR(instr
);
9104 uint_t type
= DIF_INSTR_TYPE(instr
);
9105 uint_t op
= DIF_INSTR_OP(instr
);
9123 err
+= efunc(pc
, "invalid register %u\n", r1
);
9125 err
+= efunc(pc
, "invalid register %u\n", r2
);
9127 err
+= efunc(pc
, "invalid register %u\n", rd
);
9129 err
+= efunc(pc
, "cannot write to %r0\n");
9135 err
+= efunc(pc
, "invalid register %u\n", r1
);
9137 err
+= efunc(pc
, "non-zero reserved bits\n");
9139 err
+= efunc(pc
, "invalid register %u\n", rd
);
9141 err
+= efunc(pc
, "cannot write to %r0\n");
9151 err
+= efunc(pc
, "invalid register %u\n", r1
);
9153 err
+= efunc(pc
, "non-zero reserved bits\n");
9155 err
+= efunc(pc
, "invalid register %u\n", rd
);
9157 err
+= efunc(pc
, "cannot write to %r0\n");
9159 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op
+
9160 DIF_OP_RLDSB
- DIF_OP_LDSB
, r1
, rd
);
9170 err
+= efunc(pc
, "invalid register %u\n", r1
);
9172 err
+= efunc(pc
, "non-zero reserved bits\n");
9174 err
+= efunc(pc
, "invalid register %u\n", rd
);
9176 err
+= efunc(pc
, "cannot write to %r0\n");
9186 err
+= efunc(pc
, "invalid register %u\n", r1
);
9188 err
+= efunc(pc
, "non-zero reserved bits\n");
9190 err
+= efunc(pc
, "invalid register %u\n", rd
);
9192 err
+= efunc(pc
, "cannot write to %r0\n");
9199 err
+= efunc(pc
, "invalid register %u\n", r1
);
9201 err
+= efunc(pc
, "non-zero reserved bits\n");
9203 err
+= efunc(pc
, "invalid register %u\n", rd
);
9205 err
+= efunc(pc
, "cannot write to 0 address\n");
9210 err
+= efunc(pc
, "invalid register %u\n", r1
);
9212 err
+= efunc(pc
, "invalid register %u\n", r2
);
9214 err
+= efunc(pc
, "non-zero reserved bits\n");
9218 err
+= efunc(pc
, "invalid register %u\n", r1
);
9219 if (r2
!= 0 || rd
!= 0)
9220 err
+= efunc(pc
, "non-zero reserved bits\n");
9233 if (label
>= dp
->dtdo_len
) {
9234 err
+= efunc(pc
, "invalid branch target %u\n",
9238 err
+= efunc(pc
, "backward branch to %u\n",
9243 if (r1
!= 0 || r2
!= 0)
9244 err
+= efunc(pc
, "non-zero reserved bits\n");
9246 err
+= efunc(pc
, "invalid register %u\n", rd
);
9250 case DIF_OP_FLUSHTS
:
9251 if (r1
!= 0 || r2
!= 0 || rd
!= 0)
9252 err
+= efunc(pc
, "non-zero reserved bits\n");
9255 if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) {
9256 err
+= efunc(pc
, "invalid integer ref %u\n",
9257 DIF_INSTR_INTEGER(instr
));
9260 err
+= efunc(pc
, "invalid register %u\n", rd
);
9262 err
+= efunc(pc
, "cannot write to %r0\n");
9265 if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) {
9266 err
+= efunc(pc
, "invalid string ref %u\n",
9267 DIF_INSTR_STRING(instr
));
9270 err
+= efunc(pc
, "invalid register %u\n", rd
);
9272 err
+= efunc(pc
, "cannot write to %r0\n");
9276 if (r1
> DIF_VAR_ARRAY_MAX
)
9277 err
+= efunc(pc
, "invalid array %u\n", r1
);
9279 err
+= efunc(pc
, "invalid register %u\n", r2
);
9281 err
+= efunc(pc
, "invalid register %u\n", rd
);
9283 err
+= efunc(pc
, "cannot write to %r0\n");
9290 if (v
< DIF_VAR_OTHER_MIN
|| v
> DIF_VAR_OTHER_MAX
)
9291 err
+= efunc(pc
, "invalid variable %u\n", v
);
9293 err
+= efunc(pc
, "invalid register %u\n", rd
);
9295 err
+= efunc(pc
, "cannot write to %r0\n");
9302 if (v
< DIF_VAR_OTHER_UBASE
|| v
> DIF_VAR_OTHER_MAX
)
9303 err
+= efunc(pc
, "invalid variable %u\n", v
);
9305 err
+= efunc(pc
, "invalid register %u\n", rd
);
9308 if (subr
> DIF_SUBR_MAX
)
9309 err
+= efunc(pc
, "invalid subr %u\n", subr
);
9311 err
+= efunc(pc
, "invalid register %u\n", rd
);
9313 err
+= efunc(pc
, "cannot write to %r0\n");
9315 if (subr
== DIF_SUBR_COPYOUT
||
9316 subr
== DIF_SUBR_COPYOUTSTR
) {
9317 dp
->dtdo_destructive
= 1;
9321 if (type
!= DIF_TYPE_STRING
&& type
!= DIF_TYPE_CTF
)
9322 err
+= efunc(pc
, "invalid ref type %u\n", type
);
9324 err
+= efunc(pc
, "invalid register %u\n", r2
);
9326 err
+= efunc(pc
, "invalid register %u\n", rs
);
9329 if (type
!= DIF_TYPE_CTF
)
9330 err
+= efunc(pc
, "invalid val type %u\n", type
);
9332 err
+= efunc(pc
, "invalid register %u\n", r2
);
9334 err
+= efunc(pc
, "invalid register %u\n", rs
);
9337 err
+= efunc(pc
, "invalid opcode %u\n",
9338 DIF_INSTR_OP(instr
));
9342 if (dp
->dtdo_len
!= 0 &&
9343 DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len
- 1]) != DIF_OP_RET
) {
9344 err
+= efunc(dp
->dtdo_len
- 1,
9345 "expected 'ret' as last DIF instruction\n");
9348 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
)) {
9350 * If we're not returning by reference, the size must be either
9351 * 0 or the size of one of the base types.
9353 switch (dp
->dtdo_rtype
.dtdt_size
) {
9355 case sizeof (uint8_t):
9356 case sizeof (uint16_t):
9357 case sizeof (uint32_t):
9358 case sizeof (uint64_t):
9362 err
+= efunc(dp
->dtdo_len
- 1, "bad return size\n");
9366 for (i
= 0; i
< dp
->dtdo_varlen
&& err
== 0; i
++) {
9367 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
], *existing
= NULL
;
9368 dtrace_diftype_t
*vt
, *et
;
9369 #if !defined(__APPLE__) /* Quiet compiler warnings */
9374 #endif /* __APPLE__ */
9376 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
&&
9377 v
->dtdv_scope
!= DIFV_SCOPE_THREAD
&&
9378 v
->dtdv_scope
!= DIFV_SCOPE_LOCAL
) {
9379 err
+= efunc(i
, "unrecognized variable scope %d\n",
9384 if (v
->dtdv_kind
!= DIFV_KIND_ARRAY
&&
9385 v
->dtdv_kind
!= DIFV_KIND_SCALAR
) {
9386 err
+= efunc(i
, "unrecognized variable type %d\n",
9391 if ((id
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) {
9392 err
+= efunc(i
, "%d exceeds variable id limit\n", id
);
9396 if (id
< DIF_VAR_OTHER_UBASE
)
9400 * For user-defined variables, we need to check that this
9401 * definition is identical to any previous definition that we
9404 ndx
= id
- DIF_VAR_OTHER_UBASE
;
9406 switch (v
->dtdv_scope
) {
9407 case DIFV_SCOPE_GLOBAL
:
9408 if (ndx
< vstate
->dtvs_nglobals
) {
9409 dtrace_statvar_t
*svar
;
9411 if ((svar
= vstate
->dtvs_globals
[ndx
]) != NULL
)
9412 existing
= &svar
->dtsv_var
;
9417 case DIFV_SCOPE_THREAD
:
9418 if (ndx
< vstate
->dtvs_ntlocals
)
9419 existing
= &vstate
->dtvs_tlocals
[ndx
];
9422 case DIFV_SCOPE_LOCAL
:
9423 if (ndx
< vstate
->dtvs_nlocals
) {
9424 dtrace_statvar_t
*svar
;
9426 if ((svar
= vstate
->dtvs_locals
[ndx
]) != NULL
)
9427 existing
= &svar
->dtsv_var
;
9435 if (vt
->dtdt_flags
& DIF_TF_BYREF
) {
9436 if (vt
->dtdt_size
== 0) {
9437 err
+= efunc(i
, "zero-sized variable\n");
9441 if (v
->dtdv_scope
== DIFV_SCOPE_GLOBAL
&&
9442 vt
->dtdt_size
> dtrace_global_maxsize
) {
9443 err
+= efunc(i
, "oversized by-ref global\n");
9448 if (existing
== NULL
|| existing
->dtdv_id
== 0)
9451 ASSERT(existing
->dtdv_id
== v
->dtdv_id
);
9452 ASSERT(existing
->dtdv_scope
== v
->dtdv_scope
);
9454 if (existing
->dtdv_kind
!= v
->dtdv_kind
)
9455 err
+= efunc(i
, "%d changed variable kind\n", id
);
9457 et
= &existing
->dtdv_type
;
9459 if (vt
->dtdt_flags
!= et
->dtdt_flags
) {
9460 err
+= efunc(i
, "%d changed variable type flags\n", id
);
9464 if (vt
->dtdt_size
!= 0 && vt
->dtdt_size
!= et
->dtdt_size
) {
9465 err
+= efunc(i
, "%d changed variable type size\n", id
);
9474 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
9475 * are much more constrained than normal DIFOs. Specifically, they may
9478 * 1. Make calls to subroutines other than copyin(), copyinstr() or
9479 * miscellaneous string routines
9480 * 2. Access DTrace variables other than the args[] array, and the
9481 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
9482 * 3. Have thread-local variables.
9483 * 4. Have dynamic variables.
9486 dtrace_difo_validate_helper(dtrace_difo_t
*dp
)
9488 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
9492 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
9493 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
9495 uint_t v
= DIF_INSTR_VAR(instr
);
9496 uint_t subr
= DIF_INSTR_SUBR(instr
);
9497 uint_t op
= DIF_INSTR_OP(instr
);
9552 case DIF_OP_FLUSHTS
:
9564 if (v
>= DIF_VAR_OTHER_UBASE
)
9567 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
)
9570 if (v
== DIF_VAR_CURTHREAD
|| v
== DIF_VAR_PID
||
9571 v
== DIF_VAR_PPID
|| v
== DIF_VAR_TID
||
9572 v
== DIF_VAR_EXECNAME
|| v
== DIF_VAR_ZONENAME
||
9573 v
== DIF_VAR_UID
|| v
== DIF_VAR_GID
)
9576 err
+= efunc(pc
, "illegal variable %u\n", v
);
9583 err
+= efunc(pc
, "illegal dynamic variable load\n");
9589 err
+= efunc(pc
, "illegal dynamic variable store\n");
9593 if (subr
== DIF_SUBR_ALLOCA
||
9594 subr
== DIF_SUBR_BCOPY
||
9595 subr
== DIF_SUBR_COPYIN
||
9596 subr
== DIF_SUBR_COPYINTO
||
9597 subr
== DIF_SUBR_COPYINSTR
||
9598 subr
== DIF_SUBR_INDEX
||
9599 subr
== DIF_SUBR_INET_NTOA
||
9600 subr
== DIF_SUBR_INET_NTOA6
||
9601 subr
== DIF_SUBR_INET_NTOP
||
9602 subr
== DIF_SUBR_LLTOSTR
||
9603 subr
== DIF_SUBR_RINDEX
||
9604 subr
== DIF_SUBR_STRCHR
||
9605 subr
== DIF_SUBR_STRJOIN
||
9606 subr
== DIF_SUBR_STRRCHR
||
9607 subr
== DIF_SUBR_STRSTR
||
9608 #if defined(__APPLE__)
9609 subr
== DIF_SUBR_COREPROFILE
||
9610 #endif /* __APPLE__ */
9611 subr
== DIF_SUBR_HTONS
||
9612 subr
== DIF_SUBR_HTONL
||
9613 subr
== DIF_SUBR_HTONLL
||
9614 subr
== DIF_SUBR_NTOHS
||
9615 subr
== DIF_SUBR_NTOHL
||
9616 subr
== DIF_SUBR_NTOHLL
)
9619 err
+= efunc(pc
, "invalid subr %u\n", subr
);
9623 err
+= efunc(pc
, "invalid opcode %u\n",
9624 DIF_INSTR_OP(instr
));
9632 * Returns 1 if the expression in the DIF object can be cached on a per-thread
9636 dtrace_difo_cacheable(dtrace_difo_t
*dp
)
9638 #if !defined(__APPLE__) /* Quiet compiler warnings */
9642 #endif /* __APPLE__ */
9647 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9648 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9650 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
)
9653 switch (v
->dtdv_id
) {
9654 case DIF_VAR_CURTHREAD
:
9657 case DIF_VAR_EXECNAME
:
9658 case DIF_VAR_ZONENAME
:
9667 * This DIF object may be cacheable. Now we need to look for any
9668 * array loading instructions, any memory loading instructions, or
9669 * any stores to thread-local variables.
9671 for (i
= 0; i
< dp
->dtdo_len
; i
++) {
9672 uint_t op
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]);
9674 if ((op
>= DIF_OP_LDSB
&& op
<= DIF_OP_LDX
) ||
9675 (op
>= DIF_OP_ULDSB
&& op
<= DIF_OP_ULDX
) ||
9676 (op
>= DIF_OP_RLDSB
&& op
<= DIF_OP_RLDX
) ||
9677 op
== DIF_OP_LDGA
|| op
== DIF_OP_STTS
)
9685 dtrace_difo_hold(dtrace_difo_t
*dp
)
9687 #if !defined(__APPLE__) /* Quiet compiler warnings */
9691 #endif /* __APPLE__ */
9693 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9696 ASSERT(dp
->dtdo_refcnt
!= 0);
9699 * We need to check this DIF object for references to the variable
9700 * DIF_VAR_VTIMESTAMP.
9702 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9703 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9705 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
9708 if (dtrace_vtime_references
++ == 0)
9709 dtrace_vtime_enable();
9714 * This routine calculates the dynamic variable chunksize for a given DIF
9715 * object. The calculation is not fool-proof, and can probably be tricked by
9716 * malicious DIF -- but it works for all compiler-generated DIF. Because this
9717 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
9718 * if a dynamic variable size exceeds the chunksize.
9721 dtrace_difo_chunksize(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9723 #if !defined(__APPLE__) /* Quiet compiler warnings */
9727 #endif /* __APPLE__ */
9728 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
9729 const dif_instr_t
*text
= dp
->dtdo_buf
;
9735 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
9736 dif_instr_t instr
= text
[pc
];
9737 uint_t op
= DIF_INSTR_OP(instr
);
9738 uint_t rd
= DIF_INSTR_RD(instr
);
9739 uint_t r1
= DIF_INSTR_R1(instr
);
9743 dtrace_key_t
*key
= tupregs
;
9747 sval
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)];
9752 key
= &tupregs
[DIF_DTR_NREGS
];
9753 key
[0].dttk_size
= 0;
9754 key
[1].dttk_size
= 0;
9756 scope
= DIFV_SCOPE_THREAD
;
9763 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
)
9764 key
[nkeys
++].dttk_size
= 0;
9766 key
[nkeys
++].dttk_size
= 0;
9768 if (op
== DIF_OP_STTAA
) {
9769 scope
= DIFV_SCOPE_THREAD
;
9771 scope
= DIFV_SCOPE_GLOBAL
;
9777 if (ttop
== DIF_DTR_NREGS
)
9780 if ((srd
== 0 || sval
== 0) && r1
== DIF_TYPE_STRING
) {
9782 * If the register for the size of the "pushtr"
9783 * is %r0 (or the value is 0) and the type is
9784 * a string, we'll use the system-wide default
9787 tupregs
[ttop
++].dttk_size
=
9788 dtrace_strsize_default
;
9793 tupregs
[ttop
++].dttk_size
= sval
;
9799 if (ttop
== DIF_DTR_NREGS
)
9802 tupregs
[ttop
++].dttk_size
= 0;
9805 case DIF_OP_FLUSHTS
:
9822 * We have a dynamic variable allocation; calculate its size.
9824 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
9825 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
9827 size
= sizeof (dtrace_dynvar_t
);
9828 size
+= sizeof (dtrace_key_t
) * (nkeys
- 1);
9832 * Now we need to determine the size of the stored data.
9834 id
= DIF_INSTR_VAR(instr
);
9836 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9837 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9839 if (v
->dtdv_id
== id
&& v
->dtdv_scope
== scope
) {
9840 size
+= v
->dtdv_type
.dtdt_size
;
9845 if (i
== dp
->dtdo_varlen
)
9849 * We have the size. If this is larger than the chunk size
9850 * for our dynamic variable state, reset the chunk size.
9852 size
= P2ROUNDUP(size
, sizeof (uint64_t));
9854 if (size
> vstate
->dtvs_dynvars
.dtds_chunksize
)
9855 vstate
->dtvs_dynvars
.dtds_chunksize
= size
;
9860 dtrace_difo_init(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9862 #if !defined(__APPLE__) /* Quiet compiler warnings */
9863 int i
, oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
9866 int oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
9868 #endif /* __APPLE__ */
9870 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9871 ASSERT(dp
->dtdo_buf
!= NULL
&& dp
->dtdo_len
!= 0);
9873 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9874 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9875 #if !defined(__APPLE__) /* Quiet compiler warnings */
9876 dtrace_statvar_t
*svar
, ***svarp
;
9878 dtrace_statvar_t
*svar
;
9879 dtrace_statvar_t
***svarp
= NULL
;
9880 #endif /* __APPLE__ */
9882 uint8_t scope
= v
->dtdv_scope
;
9883 int *np
= (int *)NULL
;
9885 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
9888 id
-= DIF_VAR_OTHER_UBASE
;
9891 case DIFV_SCOPE_THREAD
:
9892 #if !defined(__APPLE__) /* Quiet compiler warnings */
9893 while (id
>= (otlocals
= vstate
->dtvs_ntlocals
)) {
9895 while (id
>= (uint_t
)(otlocals
= vstate
->dtvs_ntlocals
)) {
9896 #endif /* __APPLE__ */
9897 dtrace_difv_t
*tlocals
;
9899 if ((ntlocals
= (otlocals
<< 1)) == 0)
9902 osz
= otlocals
* sizeof (dtrace_difv_t
);
9903 nsz
= ntlocals
* sizeof (dtrace_difv_t
);
9905 tlocals
= kmem_zalloc(nsz
, KM_SLEEP
);
9908 bcopy(vstate
->dtvs_tlocals
,
9910 kmem_free(vstate
->dtvs_tlocals
, osz
);
9913 vstate
->dtvs_tlocals
= tlocals
;
9914 vstate
->dtvs_ntlocals
= ntlocals
;
9917 vstate
->dtvs_tlocals
[id
] = *v
;
9920 case DIFV_SCOPE_LOCAL
:
9921 np
= &vstate
->dtvs_nlocals
;
9922 svarp
= &vstate
->dtvs_locals
;
9924 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
9925 dsize
= (int)NCPU
* (v
->dtdv_type
.dtdt_size
+
9928 dsize
= (int)NCPU
* sizeof (uint64_t);
9932 case DIFV_SCOPE_GLOBAL
:
9933 np
= &vstate
->dtvs_nglobals
;
9934 svarp
= &vstate
->dtvs_globals
;
9936 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
9937 dsize
= v
->dtdv_type
.dtdt_size
+
9946 #if !defined(__APPLE__) /* Quiet compiler warnings */
9947 while (id
>= (oldsvars
= *np
)) {
9949 while (id
>= (uint_t
)(oldsvars
= *np
)) {
9950 #endif /* __APPLE__ */
9951 dtrace_statvar_t
**statics
;
9952 int newsvars
, oldsize
, newsize
;
9954 if ((newsvars
= (oldsvars
<< 1)) == 0)
9957 oldsize
= oldsvars
* sizeof (dtrace_statvar_t
*);
9958 newsize
= newsvars
* sizeof (dtrace_statvar_t
*);
9960 statics
= kmem_zalloc(newsize
, KM_SLEEP
);
9963 bcopy(*svarp
, statics
, oldsize
);
9964 kmem_free(*svarp
, oldsize
);
9971 if ((svar
= (*svarp
)[id
]) == NULL
) {
9972 svar
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
);
9973 svar
->dtsv_var
= *v
;
9975 if ((svar
->dtsv_size
= dsize
) != 0) {
9976 svar
->dtsv_data
= (uint64_t)(uintptr_t)
9977 kmem_zalloc(dsize
, KM_SLEEP
);
9980 (*svarp
)[id
] = svar
;
9983 svar
->dtsv_refcnt
++;
9986 dtrace_difo_chunksize(dp
, vstate
);
9987 dtrace_difo_hold(dp
);
9990 static dtrace_difo_t
*
9991 dtrace_difo_duplicate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9996 ASSERT(dp
->dtdo_buf
!= NULL
);
9997 ASSERT(dp
->dtdo_refcnt
!= 0);
9999 new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
10001 ASSERT(dp
->dtdo_buf
!= NULL
);
10002 sz
= dp
->dtdo_len
* sizeof (dif_instr_t
);
10003 new->dtdo_buf
= kmem_alloc(sz
, KM_SLEEP
);
10004 bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
);
10005 new->dtdo_len
= dp
->dtdo_len
;
10007 if (dp
->dtdo_strtab
!= NULL
) {
10008 ASSERT(dp
->dtdo_strlen
!= 0);
10009 new->dtdo_strtab
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
);
10010 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
);
10011 new->dtdo_strlen
= dp
->dtdo_strlen
;
10014 if (dp
->dtdo_inttab
!= NULL
) {
10015 ASSERT(dp
->dtdo_intlen
!= 0);
10016 sz
= dp
->dtdo_intlen
* sizeof (uint64_t);
10017 new->dtdo_inttab
= kmem_alloc(sz
, KM_SLEEP
);
10018 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
);
10019 new->dtdo_intlen
= dp
->dtdo_intlen
;
10022 if (dp
->dtdo_vartab
!= NULL
) {
10023 ASSERT(dp
->dtdo_varlen
!= 0);
10024 sz
= dp
->dtdo_varlen
* sizeof (dtrace_difv_t
);
10025 new->dtdo_vartab
= kmem_alloc(sz
, KM_SLEEP
);
10026 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
);
10027 new->dtdo_varlen
= dp
->dtdo_varlen
;
10030 dtrace_difo_init(new, vstate
);
10035 dtrace_difo_destroy(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
10037 #if !defined(__APPLE__) /* Quiet compiler warnings */
10041 #endif /* __APPLE__ */
10043 ASSERT(dp
->dtdo_refcnt
== 0);
10045 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
10046 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
10047 #if !defined(__APPLE__) /* Quiet compiler warnings */
10048 dtrace_statvar_t
*svar
, **svarp
;
10050 uint8_t scope
= v
->dtdv_scope
;
10053 dtrace_statvar_t
*svar
;
10054 dtrace_statvar_t
**svarp
= NULL
;
10056 uint8_t scope
= v
->dtdv_scope
;
10058 #endif /* __APPLE__ */
10061 case DIFV_SCOPE_THREAD
:
10064 case DIFV_SCOPE_LOCAL
:
10065 np
= &vstate
->dtvs_nlocals
;
10066 svarp
= vstate
->dtvs_locals
;
10069 case DIFV_SCOPE_GLOBAL
:
10070 np
= &vstate
->dtvs_nglobals
;
10071 svarp
= vstate
->dtvs_globals
;
10078 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
10081 id
-= DIF_VAR_OTHER_UBASE
;
10083 #if !defined(__APPLE__) /* Quiet compiler warnings */
10086 ASSERT(id
< (uint_t
)*np
);
10087 #endif /* __APPLE__ */
10090 ASSERT(svar
!= NULL
);
10091 ASSERT(svar
->dtsv_refcnt
> 0);
10093 if (--svar
->dtsv_refcnt
> 0)
10096 if (svar
->dtsv_size
!= 0) {
10097 ASSERT(svar
->dtsv_data
!= NULL
);
10098 kmem_free((void *)(uintptr_t)svar
->dtsv_data
,
10102 kmem_free(svar
, sizeof (dtrace_statvar_t
));
10106 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
10107 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
10108 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
10109 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
10111 kmem_free(dp
, sizeof (dtrace_difo_t
));
10115 dtrace_difo_release(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
10117 #if !defined(__APPLE__) /* Quiet compiler warnings */
10121 #endif /* __APPLE__ */
10123 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10124 ASSERT(dp
->dtdo_refcnt
!= 0);
10126 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
10127 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
10129 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
10132 ASSERT(dtrace_vtime_references
> 0);
10133 if (--dtrace_vtime_references
== 0)
10134 dtrace_vtime_disable();
10137 if (--dp
->dtdo_refcnt
== 0)
10138 dtrace_difo_destroy(dp
, vstate
);
10142 * DTrace Format Functions
10145 dtrace_format_add(dtrace_state_t
*state
, char *str
)
10148 uint16_t ndx
, len
= strlen(str
) + 1;
10150 fmt
= kmem_zalloc(len
, KM_SLEEP
);
10151 bcopy(str
, fmt
, len
);
10153 for (ndx
= 0; ndx
< state
->dts_nformats
; ndx
++) {
10154 if (state
->dts_formats
[ndx
] == NULL
) {
10155 state
->dts_formats
[ndx
] = fmt
;
10160 if (state
->dts_nformats
== USHRT_MAX
) {
10162 * This is only likely if a denial-of-service attack is being
10163 * attempted. As such, it's okay to fail silently here.
10165 kmem_free(fmt
, len
);
10170 * For simplicity, we always resize the formats array to be exactly the
10171 * number of formats.
10173 ndx
= state
->dts_nformats
++;
10174 new = kmem_alloc((ndx
+ 1) * sizeof (char *), KM_SLEEP
);
10176 if (state
->dts_formats
!= NULL
) {
10178 bcopy(state
->dts_formats
, new, ndx
* sizeof (char *));
10179 kmem_free(state
->dts_formats
, ndx
* sizeof (char *));
10182 state
->dts_formats
= new;
10183 state
->dts_formats
[ndx
] = fmt
;
10189 dtrace_format_remove(dtrace_state_t
*state
, uint16_t format
)
10193 ASSERT(state
->dts_formats
!= NULL
);
10194 ASSERT(format
<= state
->dts_nformats
);
10195 ASSERT(state
->dts_formats
[format
- 1] != NULL
);
10197 fmt
= state
->dts_formats
[format
- 1];
10198 kmem_free(fmt
, strlen(fmt
) + 1);
10199 state
->dts_formats
[format
- 1] = NULL
;
10203 dtrace_format_destroy(dtrace_state_t
*state
)
10207 if (state
->dts_nformats
== 0) {
10208 ASSERT(state
->dts_formats
== NULL
);
10212 ASSERT(state
->dts_formats
!= NULL
);
10214 for (i
= 0; i
< state
->dts_nformats
; i
++) {
10215 char *fmt
= state
->dts_formats
[i
];
10220 kmem_free(fmt
, strlen(fmt
) + 1);
10223 kmem_free(state
->dts_formats
, state
->dts_nformats
* sizeof (char *));
10224 state
->dts_nformats
= 0;
10225 state
->dts_formats
= NULL
;
10229 * DTrace Predicate Functions
10231 static dtrace_predicate_t
*
10232 dtrace_predicate_create(dtrace_difo_t
*dp
)
10234 dtrace_predicate_t
*pred
;
10236 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10237 ASSERT(dp
->dtdo_refcnt
!= 0);
10239 pred
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
);
10240 pred
->dtp_difo
= dp
;
10241 pred
->dtp_refcnt
= 1;
10243 if (!dtrace_difo_cacheable(dp
))
10246 if (dtrace_predcache_id
== DTRACE_CACHEIDNONE
) {
10248 * This is only theoretically possible -- we have had 2^32
10249 * cacheable predicates on this machine. We cannot allow any
10250 * more predicates to become cacheable: as unlikely as it is,
10251 * there may be a thread caching a (now stale) predicate cache
10252 * ID. (N.B.: the temptation is being successfully resisted to
10253 * have this cmn_err() "Holy shit -- we executed this code!")
10258 pred
->dtp_cacheid
= dtrace_predcache_id
++;
10264 dtrace_predicate_hold(dtrace_predicate_t
*pred
)
10266 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10267 ASSERT(pred
->dtp_difo
!= NULL
&& pred
->dtp_difo
->dtdo_refcnt
!= 0);
10268 ASSERT(pred
->dtp_refcnt
> 0);
10270 pred
->dtp_refcnt
++;
10274 dtrace_predicate_release(dtrace_predicate_t
*pred
, dtrace_vstate_t
*vstate
)
10276 dtrace_difo_t
*dp
= pred
->dtp_difo
;
10277 #pragma unused(dp) /* __APPLE__ */
10279 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10280 ASSERT(dp
!= NULL
&& dp
->dtdo_refcnt
!= 0);
10281 ASSERT(pred
->dtp_refcnt
> 0);
10283 if (--pred
->dtp_refcnt
== 0) {
10284 dtrace_difo_release(pred
->dtp_difo
, vstate
);
10285 kmem_free(pred
, sizeof (dtrace_predicate_t
));
10290 * DTrace Action Description Functions
10292 static dtrace_actdesc_t
*
10293 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
,
10294 uint64_t uarg
, uint64_t arg
)
10296 dtrace_actdesc_t
*act
;
10298 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind
) || (arg
!= NULL
&&
10299 arg
>= KERNELBASE
) || (arg
== NULL
&& kind
== DTRACEACT_PRINTA
));
10301 act
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
);
10302 act
->dtad_kind
= kind
;
10303 act
->dtad_ntuple
= ntuple
;
10304 act
->dtad_uarg
= uarg
;
10305 act
->dtad_arg
= arg
;
10306 act
->dtad_refcnt
= 1;
10312 dtrace_actdesc_hold(dtrace_actdesc_t
*act
)
10314 ASSERT(act
->dtad_refcnt
>= 1);
10315 act
->dtad_refcnt
++;
10319 dtrace_actdesc_release(dtrace_actdesc_t
*act
, dtrace_vstate_t
*vstate
)
10321 dtrace_actkind_t kind
= act
->dtad_kind
;
10324 ASSERT(act
->dtad_refcnt
>= 1);
10326 if (--act
->dtad_refcnt
!= 0)
10329 if ((dp
= act
->dtad_difo
) != NULL
)
10330 dtrace_difo_release(dp
, vstate
);
10332 if (DTRACEACT_ISPRINTFLIKE(kind
)) {
10333 char *str
= (char *)(uintptr_t)act
->dtad_arg
;
10335 ASSERT((str
!= NULL
&& (uintptr_t)str
>= KERNELBASE
) ||
10336 (str
== NULL
&& act
->dtad_kind
== DTRACEACT_PRINTA
));
10339 kmem_free(str
, strlen(str
) + 1);
10342 kmem_free(act
, sizeof (dtrace_actdesc_t
));
10346 * DTrace ECB Functions
10348 static dtrace_ecb_t
*
10349 dtrace_ecb_add(dtrace_state_t
*state
, dtrace_probe_t
*probe
)
10352 dtrace_epid_t epid
;
10354 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10356 ecb
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
);
10357 ecb
->dte_predicate
= NULL
;
10358 ecb
->dte_probe
= probe
;
10361 * The default size is the size of the default action: recording
10364 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
10365 ecb
->dte_alignment
= sizeof (dtrace_epid_t
);
10367 epid
= state
->dts_epid
++;
10369 #if !defined(__APPLE__) /* Quiet compiler warnings */
10370 if (epid
- 1 >= state
->dts_necbs
) {
10372 if (epid
- 1 >= (dtrace_epid_t
)state
->dts_necbs
) {
10373 #endif /* __APPLE__ */
10374 dtrace_ecb_t
**oecbs
= state
->dts_ecbs
, **ecbs
;
10375 int necbs
= state
->dts_necbs
<< 1;
10377 #if !defined(__APPLE__) /* Quiet compiler warnings */
10378 ASSERT(epid
== state
->dts_necbs
+ 1);
10380 ASSERT(epid
== (dtrace_epid_t
)state
->dts_necbs
+ 1);
10381 #endif /* __APPLE__ */
10384 ASSERT(oecbs
== NULL
);
10388 ecbs
= kmem_zalloc(necbs
* sizeof (*ecbs
), KM_SLEEP
);
10391 bcopy(oecbs
, ecbs
, state
->dts_necbs
* sizeof (*ecbs
));
10393 dtrace_membar_producer();
10394 state
->dts_ecbs
= ecbs
;
10396 if (oecbs
!= NULL
) {
10398 * If this state is active, we must dtrace_sync()
10399 * before we can free the old dts_ecbs array: we're
10400 * coming in hot, and there may be active ring
10401 * buffer processing (which indexes into the dts_ecbs
10402 * array) on another CPU.
10404 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
10407 kmem_free(oecbs
, state
->dts_necbs
* sizeof (*ecbs
));
10410 dtrace_membar_producer();
10411 state
->dts_necbs
= necbs
;
10414 ecb
->dte_state
= state
;
10416 ASSERT(state
->dts_ecbs
[epid
- 1] == NULL
);
10417 dtrace_membar_producer();
10418 state
->dts_ecbs
[(ecb
->dte_epid
= epid
) - 1] = ecb
;
10424 dtrace_ecb_enable(dtrace_ecb_t
*ecb
)
10426 dtrace_probe_t
*probe
= ecb
->dte_probe
;
10428 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10429 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10430 ASSERT(ecb
->dte_next
== NULL
);
10432 if (probe
== NULL
) {
10434 * This is the NULL probe -- there's nothing to do.
10439 probe
->dtpr_provider
->ecb_count
++;
10440 if (probe
->dtpr_ecb
== NULL
) {
10441 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
10444 * We're the first ECB on this probe.
10446 probe
->dtpr_ecb
= probe
->dtpr_ecb_last
= ecb
;
10448 if (ecb
->dte_predicate
!= NULL
)
10449 probe
->dtpr_predcache
= ecb
->dte_predicate
->dtp_cacheid
;
10451 return (prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
,
10452 probe
->dtpr_id
, probe
->dtpr_arg
));
10455 * This probe is already active. Swing the last pointer to
10456 * point to the new ECB, and issue a dtrace_sync() to assure
10457 * that all CPUs have seen the change.
10459 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
10460 probe
->dtpr_ecb_last
->dte_next
= ecb
;
10461 probe
->dtpr_ecb_last
= ecb
;
10462 probe
->dtpr_predcache
= 0;
10470 dtrace_ecb_resize(dtrace_ecb_t
*ecb
)
10472 uint32_t maxalign
= sizeof (dtrace_epid_t
);
10473 uint32_t align
= sizeof (uint8_t), offs
, diff
;
10474 dtrace_action_t
*act
;
10476 uint32_t aggbase
= UINT32_MAX
;
10477 dtrace_state_t
*state
= ecb
->dte_state
;
10480 * If we record anything, we always record the epid. (And we always
10481 * record it first.)
10483 offs
= sizeof (dtrace_epid_t
);
10484 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
10486 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
10487 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
10489 if ((align
= rec
->dtrd_alignment
) > maxalign
)
10492 if (!wastuple
&& act
->dta_intuple
) {
10494 * This is the first record in a tuple. Align the
10495 * offset to be at offset 4 in an 8-byte aligned
10498 diff
= offs
+ sizeof (dtrace_aggid_t
);
10500 if ((diff
= (diff
& (sizeof (uint64_t) - 1))))
10501 offs
+= sizeof (uint64_t) - diff
;
10503 aggbase
= offs
- sizeof (dtrace_aggid_t
);
10504 ASSERT(!(aggbase
& (sizeof (uint64_t) - 1)));
10508 if (rec
->dtrd_size
!= 0 && (diff
= (offs
& (align
- 1)))) {
10510 * The current offset is not properly aligned; align it.
10512 offs
+= align
- diff
;
10515 rec
->dtrd_offset
= offs
;
10517 if (offs
+ rec
->dtrd_size
> ecb
->dte_needed
) {
10518 ecb
->dte_needed
= offs
+ rec
->dtrd_size
;
10520 if (ecb
->dte_needed
> state
->dts_needed
)
10521 state
->dts_needed
= ecb
->dte_needed
;
10524 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
10525 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
10526 dtrace_action_t
*first
= agg
->dtag_first
, *prev
;
10528 ASSERT(rec
->dtrd_size
!= 0 && first
!= NULL
);
10530 ASSERT(aggbase
!= UINT32_MAX
);
10532 agg
->dtag_base
= aggbase
;
10534 while ((prev
= first
->dta_prev
) != NULL
&&
10535 DTRACEACT_ISAGG(prev
->dta_kind
)) {
10536 agg
= (dtrace_aggregation_t
*)prev
;
10537 first
= agg
->dtag_first
;
10540 if (prev
!= NULL
) {
10541 offs
= prev
->dta_rec
.dtrd_offset
+
10542 prev
->dta_rec
.dtrd_size
;
10544 offs
= sizeof (dtrace_epid_t
);
10548 if (!act
->dta_intuple
)
10549 ecb
->dte_size
= offs
+ rec
->dtrd_size
;
10551 offs
+= rec
->dtrd_size
;
10554 wastuple
= act
->dta_intuple
;
10557 if ((act
= ecb
->dte_action
) != NULL
&&
10558 !(act
->dta_kind
== DTRACEACT_SPECULATE
&& act
->dta_next
== NULL
) &&
10559 ecb
->dte_size
== sizeof (dtrace_epid_t
)) {
10561 * If the size is still sizeof (dtrace_epid_t), then all
10562 * actions store no data; set the size to 0.
10564 ecb
->dte_alignment
= maxalign
;
10568 * If the needed space is still sizeof (dtrace_epid_t), then
10569 * all actions need no additional space; set the needed
10572 if (ecb
->dte_needed
== sizeof (dtrace_epid_t
))
10573 ecb
->dte_needed
= 0;
10579 * Set our alignment, and make sure that the dte_size and dte_needed
10580 * are aligned to the size of an EPID.
10582 ecb
->dte_alignment
= maxalign
;
10583 ecb
->dte_size
= (ecb
->dte_size
+ (sizeof (dtrace_epid_t
) - 1)) &
10584 ~(sizeof (dtrace_epid_t
) - 1);
10585 ecb
->dte_needed
= (ecb
->dte_needed
+ (sizeof (dtrace_epid_t
) - 1)) &
10586 ~(sizeof (dtrace_epid_t
) - 1);
10587 ASSERT(ecb
->dte_size
<= ecb
->dte_needed
);
10590 static dtrace_action_t
*
10591 dtrace_ecb_aggregation_create(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
10593 dtrace_aggregation_t
*agg
;
10594 size_t size
= sizeof (uint64_t);
10595 int ntuple
= desc
->dtad_ntuple
;
10596 dtrace_action_t
*act
;
10597 dtrace_recdesc_t
*frec
;
10598 dtrace_aggid_t aggid
;
10599 dtrace_state_t
*state
= ecb
->dte_state
;
10601 agg
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
);
10602 agg
->dtag_ecb
= ecb
;
10604 ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
));
10606 switch (desc
->dtad_kind
) {
10607 case DTRACEAGG_MIN
:
10608 agg
->dtag_initial
= INT64_MAX
;
10609 agg
->dtag_aggregate
= dtrace_aggregate_min
;
10612 case DTRACEAGG_MAX
:
10613 agg
->dtag_initial
= INT64_MIN
;
10614 agg
->dtag_aggregate
= dtrace_aggregate_max
;
10617 case DTRACEAGG_COUNT
:
10618 agg
->dtag_aggregate
= dtrace_aggregate_count
;
10621 case DTRACEAGG_QUANTIZE
:
10622 agg
->dtag_aggregate
= dtrace_aggregate_quantize
;
10623 size
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) *
10627 case DTRACEAGG_LQUANTIZE
: {
10628 uint16_t step
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
);
10629 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
);
10631 agg
->dtag_initial
= desc
->dtad_arg
;
10632 agg
->dtag_aggregate
= dtrace_aggregate_lquantize
;
10634 if (step
== 0 || levels
== 0)
10637 size
= levels
* sizeof (uint64_t) + 3 * sizeof (uint64_t);
10641 case DTRACEAGG_LLQUANTIZE
: {
10642 uint16_t factor
= DTRACE_LLQUANTIZE_FACTOR(desc
->dtad_arg
);
10643 uint16_t low
= DTRACE_LLQUANTIZE_LOW(desc
->dtad_arg
);
10644 uint16_t high
= DTRACE_LLQUANTIZE_HIGH(desc
->dtad_arg
);
10645 uint16_t nsteps
= DTRACE_LLQUANTIZE_NSTEP(desc
->dtad_arg
);
10648 agg
->dtag_initial
= desc
->dtad_arg
;
10649 agg
->dtag_aggregate
= dtrace_aggregate_llquantize
;
10651 if (factor
< 2 || low
>= high
|| nsteps
< factor
)
10655 * Now check that the number of steps evenly divides a power
10656 * of the factor. (This assures both integer bucket size and
10657 * linearity within each magnitude.)
10659 for (v
= factor
; v
< nsteps
; v
*= factor
)
10662 if ((v
% nsteps
) || (nsteps
% factor
))
10665 size
= (dtrace_aggregate_llquantize_bucket(factor
, low
, high
, nsteps
, INT64_MAX
) + 2) * sizeof (uint64_t);
10669 case DTRACEAGG_AVG
:
10670 agg
->dtag_aggregate
= dtrace_aggregate_avg
;
10671 size
= sizeof (uint64_t) * 2;
10674 case DTRACEAGG_STDDEV
:
10675 agg
->dtag_aggregate
= dtrace_aggregate_stddev
;
10676 size
= sizeof (uint64_t) * 4;
10679 case DTRACEAGG_SUM
:
10680 agg
->dtag_aggregate
= dtrace_aggregate_sum
;
10687 agg
->dtag_action
.dta_rec
.dtrd_size
= size
;
10693 * We must make sure that we have enough actions for the n-tuple.
10695 for (act
= ecb
->dte_action_last
; act
!= NULL
; act
= act
->dta_prev
) {
10696 if (DTRACEACT_ISAGG(act
->dta_kind
))
10699 if (--ntuple
== 0) {
10701 * This is the action with which our n-tuple begins.
10703 agg
->dtag_first
= act
;
10709 * This n-tuple is short by ntuple elements. Return failure.
10711 ASSERT(ntuple
!= 0);
10713 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
10718 * If the last action in the tuple has a size of zero, it's actually
10719 * an expression argument for the aggregating action.
10721 ASSERT(ecb
->dte_action_last
!= NULL
);
10722 act
= ecb
->dte_action_last
;
10724 if (act
->dta_kind
== DTRACEACT_DIFEXPR
) {
10725 ASSERT(act
->dta_difo
!= NULL
);
10727 if (act
->dta_difo
->dtdo_rtype
.dtdt_size
== 0)
10728 agg
->dtag_hasarg
= 1;
10732 * We need to allocate an id for this aggregation.
10734 aggid
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1,
10735 VM_BESTFIT
| VM_SLEEP
);
10737 #if !defined(__APPLE__) /* Quiet compiler warnings */
10738 if (aggid
- 1 >= state
->dts_naggregations
) {
10740 if (aggid
- 1 >= (dtrace_aggid_t
)state
->dts_naggregations
) {
10741 #endif /* __APPLE__ */
10742 dtrace_aggregation_t
**oaggs
= state
->dts_aggregations
;
10743 dtrace_aggregation_t
**aggs
;
10744 int naggs
= state
->dts_naggregations
<< 1;
10745 int onaggs
= state
->dts_naggregations
;
10747 #if !defined(__APPLE__) /* Quiet compiler warnings */
10748 ASSERT(aggid
== state
->dts_naggregations
+ 1);
10750 ASSERT(aggid
== (dtrace_aggid_t
)state
->dts_naggregations
+ 1);
10751 #endif /* __APPLE */
10754 ASSERT(oaggs
== NULL
);
10758 aggs
= kmem_zalloc(naggs
* sizeof (*aggs
), KM_SLEEP
);
10760 if (oaggs
!= NULL
) {
10761 bcopy(oaggs
, aggs
, onaggs
* sizeof (*aggs
));
10762 kmem_free(oaggs
, onaggs
* sizeof (*aggs
));
10765 state
->dts_aggregations
= aggs
;
10766 state
->dts_naggregations
= naggs
;
10769 ASSERT(state
->dts_aggregations
[aggid
- 1] == NULL
);
10770 state
->dts_aggregations
[(agg
->dtag_id
= aggid
) - 1] = agg
;
10772 frec
= &agg
->dtag_first
->dta_rec
;
10773 if (frec
->dtrd_alignment
< sizeof (dtrace_aggid_t
))
10774 frec
->dtrd_alignment
= sizeof (dtrace_aggid_t
);
10776 for (act
= agg
->dtag_first
; act
!= NULL
; act
= act
->dta_next
) {
10777 ASSERT(!act
->dta_intuple
);
10778 act
->dta_intuple
= 1;
10781 return (&agg
->dtag_action
);
10785 dtrace_ecb_aggregation_destroy(dtrace_ecb_t
*ecb
, dtrace_action_t
*act
)
10787 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
10788 dtrace_state_t
*state
= ecb
->dte_state
;
10789 dtrace_aggid_t aggid
= agg
->dtag_id
;
10791 ASSERT(DTRACEACT_ISAGG(act
->dta_kind
));
10792 vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1);
10794 ASSERT(state
->dts_aggregations
[aggid
- 1] == agg
);
10795 state
->dts_aggregations
[aggid
- 1] = NULL
;
10797 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
10801 dtrace_ecb_action_add(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
10803 dtrace_action_t
*action
, *last
;
10804 dtrace_difo_t
*dp
= desc
->dtad_difo
;
10805 uint32_t size
= 0, align
= sizeof (uint8_t), mask
;
10806 uint16_t format
= 0;
10807 dtrace_recdesc_t
*rec
;
10808 dtrace_state_t
*state
= ecb
->dte_state
;
10809 #if !defined(__APPLE__) /* Quiet compiler warnings */
10810 dtrace_optval_t
*opt
= state
->dts_options
, nframes
, strsize
;
10812 dtrace_optval_t
*opt
= state
->dts_options
;
10813 dtrace_optval_t nframes
=0, strsize
;
10814 #endif /* __APPLE__ */
10815 uint64_t arg
= desc
->dtad_arg
;
10817 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10818 ASSERT(ecb
->dte_action
== NULL
|| ecb
->dte_action
->dta_refcnt
== 1);
10820 if (DTRACEACT_ISAGG(desc
->dtad_kind
)) {
10822 * If this is an aggregating action, there must be neither
10823 * a speculate nor a commit on the action chain.
10825 dtrace_action_t
*act
;
10827 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
10828 if (act
->dta_kind
== DTRACEACT_COMMIT
)
10831 if (act
->dta_kind
== DTRACEACT_SPECULATE
)
10835 action
= dtrace_ecb_aggregation_create(ecb
, desc
);
10837 if (action
== NULL
)
10840 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) ||
10841 (desc
->dtad_kind
== DTRACEACT_DIFEXPR
&&
10842 dp
!= NULL
&& dp
->dtdo_destructive
)) {
10843 state
->dts_destructive
= 1;
10846 switch (desc
->dtad_kind
) {
10847 case DTRACEACT_PRINTF
:
10848 case DTRACEACT_PRINTA
:
10849 case DTRACEACT_SYSTEM
:
10850 case DTRACEACT_FREOPEN
:
10852 * We know that our arg is a string -- turn it into a
10856 ASSERT(desc
->dtad_kind
== DTRACEACT_PRINTA
);
10859 ASSERT(arg
!= NULL
);
10860 ASSERT(arg
> KERNELBASE
);
10861 format
= dtrace_format_add(state
,
10862 (char *)(uintptr_t)arg
);
10866 case DTRACEACT_LIBACT
:
10867 case DTRACEACT_DIFEXPR
:
10868 #if defined(__APPLE__)
10869 case DTRACEACT_APPLEBINARY
:
10870 #endif /* __APPLE__ */
10874 if ((size
= dp
->dtdo_rtype
.dtdt_size
) != 0)
10877 if (dp
->dtdo_rtype
.dtdt_kind
== DIF_TYPE_STRING
) {
10878 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10881 size
= opt
[DTRACEOPT_STRSIZE
];
10886 case DTRACEACT_STACK
:
10887 if ((nframes
= arg
) == 0) {
10888 nframes
= opt
[DTRACEOPT_STACKFRAMES
];
10889 ASSERT(nframes
> 0);
10893 size
= nframes
* sizeof (pc_t
);
10896 case DTRACEACT_JSTACK
:
10897 if ((strsize
= DTRACE_USTACK_STRSIZE(arg
)) == 0)
10898 strsize
= opt
[DTRACEOPT_JSTACKSTRSIZE
];
10900 if ((nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0)
10901 nframes
= opt
[DTRACEOPT_JSTACKFRAMES
];
10903 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
10906 case DTRACEACT_USTACK
:
10907 if (desc
->dtad_kind
!= DTRACEACT_JSTACK
&&
10908 (nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0) {
10909 strsize
= DTRACE_USTACK_STRSIZE(arg
);
10910 nframes
= opt
[DTRACEOPT_USTACKFRAMES
];
10911 ASSERT(nframes
> 0);
10912 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
10916 * Save a slot for the pid.
10918 size
= (nframes
+ 1) * sizeof (uint64_t);
10919 size
+= DTRACE_USTACK_STRSIZE(arg
);
10920 size
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t)));
10924 case DTRACEACT_SYM
:
10925 case DTRACEACT_MOD
:
10926 if (dp
== NULL
|| ((size
= dp
->dtdo_rtype
.dtdt_size
) !=
10927 sizeof (uint64_t)) ||
10928 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10932 case DTRACEACT_USYM
:
10933 case DTRACEACT_UMOD
:
10934 case DTRACEACT_UADDR
:
10936 (dp
->dtdo_rtype
.dtdt_size
!= sizeof (uint64_t)) ||
10937 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10941 * We have a slot for the pid, plus a slot for the
10942 * argument. To keep things simple (aligned with
10943 * bitness-neutral sizing), we store each as a 64-bit
10946 size
= 2 * sizeof (uint64_t);
10949 case DTRACEACT_STOP
:
10950 case DTRACEACT_BREAKPOINT
:
10951 case DTRACEACT_PANIC
:
10954 case DTRACEACT_CHILL
:
10955 case DTRACEACT_DISCARD
:
10956 case DTRACEACT_RAISE
:
10957 #if defined(__APPLE__)
10958 case DTRACEACT_PIDRESUME
:
10959 #endif /* __APPLE__ */
10964 case DTRACEACT_EXIT
:
10966 (size
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) ||
10967 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10971 case DTRACEACT_SPECULATE
:
10972 if (ecb
->dte_size
> sizeof (dtrace_epid_t
))
10978 state
->dts_speculates
= 1;
10981 case DTRACEACT_COMMIT
: {
10982 dtrace_action_t
*act
= ecb
->dte_action
;
10984 for (; act
!= NULL
; act
= act
->dta_next
) {
10985 if (act
->dta_kind
== DTRACEACT_COMMIT
)
10998 if (size
!= 0 || desc
->dtad_kind
== DTRACEACT_SPECULATE
) {
11000 * If this is a data-storing action or a speculate,
11001 * we must be sure that there isn't a commit on the
11004 dtrace_action_t
*act
= ecb
->dte_action
;
11006 for (; act
!= NULL
; act
= act
->dta_next
) {
11007 if (act
->dta_kind
== DTRACEACT_COMMIT
)
11012 action
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
);
11013 action
->dta_rec
.dtrd_size
= size
;
11016 action
->dta_refcnt
= 1;
11017 rec
= &action
->dta_rec
;
11018 size
= rec
->dtrd_size
;
11020 for (mask
= sizeof (uint64_t) - 1; size
!= 0 && mask
> 0; mask
>>= 1) {
11021 if (!(size
& mask
)) {
11027 action
->dta_kind
= desc
->dtad_kind
;
11029 if ((action
->dta_difo
= dp
) != NULL
)
11030 dtrace_difo_hold(dp
);
11032 rec
->dtrd_action
= action
->dta_kind
;
11033 rec
->dtrd_arg
= arg
;
11034 rec
->dtrd_uarg
= desc
->dtad_uarg
;
11035 rec
->dtrd_alignment
= (uint16_t)align
;
11036 rec
->dtrd_format
= format
;
11038 if ((last
= ecb
->dte_action_last
) != NULL
) {
11039 ASSERT(ecb
->dte_action
!= NULL
);
11040 action
->dta_prev
= last
;
11041 last
->dta_next
= action
;
11043 ASSERT(ecb
->dte_action
== NULL
);
11044 ecb
->dte_action
= action
;
11047 ecb
->dte_action_last
= action
;
11053 dtrace_ecb_action_remove(dtrace_ecb_t
*ecb
)
11055 dtrace_action_t
*act
= ecb
->dte_action
, *next
;
11056 dtrace_vstate_t
*vstate
= &ecb
->dte_state
->dts_vstate
;
11060 if (act
!= NULL
&& act
->dta_refcnt
> 1) {
11061 ASSERT(act
->dta_next
== NULL
|| act
->dta_next
->dta_refcnt
== 1);
11064 for (; act
!= NULL
; act
= next
) {
11065 next
= act
->dta_next
;
11066 ASSERT(next
!= NULL
|| act
== ecb
->dte_action_last
);
11067 ASSERT(act
->dta_refcnt
== 1);
11069 if ((format
= act
->dta_rec
.dtrd_format
) != 0)
11070 dtrace_format_remove(ecb
->dte_state
, format
);
11072 if ((dp
= act
->dta_difo
) != NULL
)
11073 dtrace_difo_release(dp
, vstate
);
11075 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
11076 dtrace_ecb_aggregation_destroy(ecb
, act
);
11078 kmem_free(act
, sizeof (dtrace_action_t
));
11083 ecb
->dte_action
= NULL
;
11084 ecb
->dte_action_last
= NULL
;
11085 ecb
->dte_size
= sizeof (dtrace_epid_t
);
11089 dtrace_ecb_disable(dtrace_ecb_t
*ecb
)
11092 * We disable the ECB by removing it from its probe.
11094 dtrace_ecb_t
*pecb
, *prev
= NULL
;
11095 dtrace_probe_t
*probe
= ecb
->dte_probe
;
11097 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11099 if (probe
== NULL
) {
11101 * This is the NULL probe; there is nothing to disable.
11106 for (pecb
= probe
->dtpr_ecb
; pecb
!= NULL
; pecb
= pecb
->dte_next
) {
11112 ASSERT(pecb
!= NULL
);
11114 if (prev
== NULL
) {
11115 probe
->dtpr_ecb
= ecb
->dte_next
;
11117 prev
->dte_next
= ecb
->dte_next
;
11120 if (ecb
== probe
->dtpr_ecb_last
) {
11121 ASSERT(ecb
->dte_next
== NULL
);
11122 probe
->dtpr_ecb_last
= prev
;
11125 probe
->dtpr_provider
->ecb_count
--;
11127 * The ECB has been disconnected from the probe; now sync to assure
11128 * that all CPUs have seen the change before returning.
11132 if (probe
->dtpr_ecb
== NULL
) {
11134 * That was the last ECB on the probe; clear the predicate
11135 * cache ID for the probe, disable it and sync one more time
11136 * to assure that we'll never hit it again.
11138 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
11140 ASSERT(ecb
->dte_next
== NULL
);
11141 ASSERT(probe
->dtpr_ecb_last
== NULL
);
11142 probe
->dtpr_predcache
= DTRACE_CACHEIDNONE
;
11143 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
,
11144 probe
->dtpr_id
, probe
->dtpr_arg
);
11148 * There is at least one ECB remaining on the probe. If there
11149 * is _exactly_ one, set the probe's predicate cache ID to be
11150 * the predicate cache ID of the remaining ECB.
11152 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
11153 ASSERT(probe
->dtpr_predcache
== DTRACE_CACHEIDNONE
);
11155 if (probe
->dtpr_ecb
== probe
->dtpr_ecb_last
) {
11156 dtrace_predicate_t
*p
= probe
->dtpr_ecb
->dte_predicate
;
11158 ASSERT(probe
->dtpr_ecb
->dte_next
== NULL
);
11161 probe
->dtpr_predcache
= p
->dtp_cacheid
;
11164 ecb
->dte_next
= NULL
;
11169 dtrace_ecb_destroy(dtrace_ecb_t
*ecb
)
11171 dtrace_state_t
*state
= ecb
->dte_state
;
11172 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
11173 dtrace_predicate_t
*pred
;
11174 dtrace_epid_t epid
= ecb
->dte_epid
;
11176 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11177 ASSERT(ecb
->dte_next
== NULL
);
11178 ASSERT(ecb
->dte_probe
== NULL
|| ecb
->dte_probe
->dtpr_ecb
!= ecb
);
11180 if ((pred
= ecb
->dte_predicate
) != NULL
)
11181 dtrace_predicate_release(pred
, vstate
);
11183 dtrace_ecb_action_remove(ecb
);
11185 ASSERT(state
->dts_ecbs
[epid
- 1] == ecb
);
11186 state
->dts_ecbs
[epid
- 1] = NULL
;
11188 kmem_free(ecb
, sizeof (dtrace_ecb_t
));
11191 static dtrace_ecb_t
*
11192 dtrace_ecb_create(dtrace_state_t
*state
, dtrace_probe_t
*probe
,
11193 dtrace_enabling_t
*enab
)
11196 dtrace_predicate_t
*pred
;
11197 dtrace_actdesc_t
*act
;
11198 dtrace_provider_t
*prov
;
11199 dtrace_ecbdesc_t
*desc
= enab
->dten_current
;
11201 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11202 ASSERT(state
!= NULL
);
11204 ecb
= dtrace_ecb_add(state
, probe
);
11205 ecb
->dte_uarg
= desc
->dted_uarg
;
11207 if ((pred
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) {
11208 dtrace_predicate_hold(pred
);
11209 ecb
->dte_predicate
= pred
;
11212 if (probe
!= NULL
) {
11214 * If the provider shows more leg than the consumer is old
11215 * enough to see, we need to enable the appropriate implicit
11216 * predicate bits to prevent the ecb from activating at
11219 * Providers specifying DTRACE_PRIV_USER at register time
11220 * are stating that they need the /proc-style privilege
11221 * model to be enforced, and this is what DTRACE_COND_OWNER
11222 * and DTRACE_COND_ZONEOWNER will then do at probe time.
11224 prov
= probe
->dtpr_provider
;
11225 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLPROC
) &&
11226 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
11227 ecb
->dte_cond
|= DTRACE_COND_OWNER
;
11229 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLZONE
) &&
11230 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
11231 ecb
->dte_cond
|= DTRACE_COND_ZONEOWNER
;
11234 * If the provider shows us kernel innards and the user
11235 * is lacking sufficient privilege, enable the
11236 * DTRACE_COND_USERMODE implicit predicate.
11238 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) &&
11239 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_KERNEL
))
11240 ecb
->dte_cond
|= DTRACE_COND_USERMODE
;
11243 if (dtrace_ecb_create_cache
!= NULL
) {
11245 * If we have a cached ecb, we'll use its action list instead
11246 * of creating our own (saving both time and space).
11248 dtrace_ecb_t
*cached
= dtrace_ecb_create_cache
;
11249 dtrace_action_t
*act_if
= cached
->dte_action
;
11251 if (act_if
!= NULL
) {
11252 ASSERT(act_if
->dta_refcnt
> 0);
11253 act_if
->dta_refcnt
++;
11254 ecb
->dte_action
= act_if
;
11255 ecb
->dte_action_last
= cached
->dte_action_last
;
11256 ecb
->dte_needed
= cached
->dte_needed
;
11257 ecb
->dte_size
= cached
->dte_size
;
11258 ecb
->dte_alignment
= cached
->dte_alignment
;
11264 for (act
= desc
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
11265 if ((enab
->dten_error
= dtrace_ecb_action_add(ecb
, act
)) != 0) {
11266 dtrace_ecb_destroy(ecb
);
11271 dtrace_ecb_resize(ecb
);
11273 return (dtrace_ecb_create_cache
= ecb
);
11277 dtrace_ecb_create_enable(dtrace_probe_t
*probe
, void *arg
)
11280 dtrace_enabling_t
*enab
= arg
;
11281 dtrace_state_t
*state
= enab
->dten_vstate
->dtvs_state
;
11283 ASSERT(state
!= NULL
);
11285 if (probe
!= NULL
&& probe
->dtpr_gen
< enab
->dten_probegen
) {
11287 * This probe was created in a generation for which this
11288 * enabling has previously created ECBs; we don't want to
11289 * enable it again, so just kick out.
11291 return (DTRACE_MATCH_NEXT
);
11294 if ((ecb
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
)
11295 return (DTRACE_MATCH_DONE
);
11297 if (dtrace_ecb_enable(ecb
) < 0)
11298 return (DTRACE_MATCH_FAIL
);
11300 return (DTRACE_MATCH_NEXT
);
11303 static dtrace_ecb_t
*
11304 dtrace_epid2ecb(dtrace_state_t
*state
, dtrace_epid_t id
)
11307 #pragma unused(ecb) /* __APPLE__ */
11309 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11311 #if !defined(__APPLE__) /* Quiet compiler warnings */
11312 if (id
== 0 || id
> state
->dts_necbs
)
11314 if (id
== 0 || id
> (dtrace_epid_t
)state
->dts_necbs
)
11315 #endif /* __APPLE__ */
11318 ASSERT(state
->dts_necbs
> 0 && state
->dts_ecbs
!= NULL
);
11319 ASSERT((ecb
= state
->dts_ecbs
[id
- 1]) == NULL
|| ecb
->dte_epid
== id
);
11321 return (state
->dts_ecbs
[id
- 1]);
11324 static dtrace_aggregation_t
*
11325 dtrace_aggid2agg(dtrace_state_t
*state
, dtrace_aggid_t id
)
11327 dtrace_aggregation_t
*agg
;
11328 #pragma unused(agg) /* __APPLE__ */
11330 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11332 #if !defined(__APPLE__) /* Quiet compiler warnings */
11333 if (id
== 0 || id
> state
->dts_naggregations
)
11335 if (id
== 0 || id
> (dtrace_aggid_t
)state
->dts_naggregations
)
11336 #endif /* __APPLE__ */
11339 ASSERT(state
->dts_naggregations
> 0 && state
->dts_aggregations
!= NULL
);
11340 ASSERT((agg
= state
->dts_aggregations
[id
- 1]) == NULL
||
11341 agg
->dtag_id
== id
);
11343 return (state
->dts_aggregations
[id
- 1]);
11347 * DTrace Buffer Functions
11349 * The following functions manipulate DTrace buffers. Most of these functions
11350 * are called in the context of establishing or processing consumer state;
11351 * exceptions are explicitly noted.
11355 * Note: called from cross call context. This function switches the two
11356 * buffers on a given CPU. The atomicity of this operation is assured by
11357 * disabling interrupts while the actual switch takes place; the disabling of
11358 * interrupts serializes the execution with any execution of dtrace_probe() on
11362 dtrace_buffer_switch(dtrace_buffer_t
*buf
)
11364 caddr_t tomax
= buf
->dtb_tomax
;
11365 caddr_t xamot
= buf
->dtb_xamot
;
11366 dtrace_icookie_t cookie
;
11368 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
11369 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_RING
));
11371 cookie
= dtrace_interrupt_disable();
11372 buf
->dtb_tomax
= xamot
;
11373 buf
->dtb_xamot
= tomax
;
11374 buf
->dtb_xamot_drops
= buf
->dtb_drops
;
11375 buf
->dtb_xamot_offset
= buf
->dtb_offset
;
11376 buf
->dtb_xamot_errors
= buf
->dtb_errors
;
11377 buf
->dtb_xamot_flags
= buf
->dtb_flags
;
11378 buf
->dtb_offset
= 0;
11379 buf
->dtb_drops
= 0;
11380 buf
->dtb_errors
= 0;
11381 buf
->dtb_flags
&= ~(DTRACEBUF_ERROR
| DTRACEBUF_DROPPED
);
11382 dtrace_interrupt_enable(cookie
);
11386 * Note: called from cross call context. This function activates a buffer
11387 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
11388 * is guaranteed by the disabling of interrupts.
11391 dtrace_buffer_activate(dtrace_state_t
*state
)
11393 dtrace_buffer_t
*buf
;
11394 dtrace_icookie_t cookie
= dtrace_interrupt_disable();
11396 buf
= &state
->dts_buffer
[CPU
->cpu_id
];
11398 if (buf
->dtb_tomax
!= NULL
) {
11400 * We might like to assert that the buffer is marked inactive,
11401 * but this isn't necessarily true: the buffer for the CPU
11402 * that processes the BEGIN probe has its buffer activated
11403 * manually. In this case, we take the (harmless) action
11404 * re-clearing the bit INACTIVE bit.
11406 buf
->dtb_flags
&= ~DTRACEBUF_INACTIVE
;
11409 dtrace_interrupt_enable(cookie
);
11413 dtrace_buffer_alloc(dtrace_buffer_t
*bufs
, size_t size
, int flags
,
11417 dtrace_buffer_t
*buf
;
11419 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
11420 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11422 #if !defined(__APPLE__) /* Quiet compiler warnings */
11423 if (size
> dtrace_nonroot_maxsize
&&
11424 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
11427 if (size
> (size_t)dtrace_nonroot_maxsize
&&
11428 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
11430 #endif /* __APPLE__ */
11433 #if defined(__APPLE__)
11434 if (size
> (sane_size
/ 8) / (int)NCPU
) /* As in kdbg_set_nkdbufs(), roughly. */
11436 #endif /* __APPLE__ */
11441 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
11444 buf
= &bufs
[cp
->cpu_id
];
11447 * If there is already a buffer allocated for this CPU, it
11448 * is only possible that this is a DR event. In this case,
11449 * the buffer size must match our specified size.
11451 if (buf
->dtb_tomax
!= NULL
) {
11452 ASSERT(buf
->dtb_size
== size
);
11456 ASSERT(buf
->dtb_xamot
== NULL
);
11458 if ((buf
->dtb_tomax
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
11461 buf
->dtb_size
= size
;
11462 buf
->dtb_flags
= flags
;
11463 buf
->dtb_offset
= 0;
11464 buf
->dtb_drops
= 0;
11466 if (flags
& DTRACEBUF_NOSWITCH
)
11469 if ((buf
->dtb_xamot
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
11471 } while ((cp
= cp
->cpu_next
) != cpu_list
);
11479 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
11482 buf
= &bufs
[cp
->cpu_id
];
11484 if (buf
->dtb_xamot
!= NULL
) {
11485 ASSERT(buf
->dtb_tomax
!= NULL
);
11486 ASSERT(buf
->dtb_size
== size
);
11487 kmem_free(buf
->dtb_xamot
, size
);
11490 if (buf
->dtb_tomax
!= NULL
) {
11491 ASSERT(buf
->dtb_size
== size
);
11492 kmem_free(buf
->dtb_tomax
, size
);
11495 buf
->dtb_tomax
= NULL
;
11496 buf
->dtb_xamot
= NULL
;
11498 } while ((cp
= cp
->cpu_next
) != cpu_list
);
11504 * Note: called from probe context. This function just increments the drop
11505 * count on a buffer. It has been made a function to allow for the
11506 * possibility of understanding the source of mysterious drop counts. (A
11507 * problem for which one may be particularly disappointed that DTrace cannot
11508 * be used to understand DTrace.)
11511 dtrace_buffer_drop(dtrace_buffer_t
*buf
)
11517 * Note: called from probe context. This function is called to reserve space
11518 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
11519 * mstate. Returns the new offset in the buffer, or a negative value if an
11520 * error has occurred.
11523 dtrace_buffer_reserve(dtrace_buffer_t
*buf
, size_t needed
, size_t align
,
11524 dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
11526 intptr_t offs
= buf
->dtb_offset
, soffs
;
11531 if (buf
->dtb_flags
& DTRACEBUF_INACTIVE
)
11534 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
11535 dtrace_buffer_drop(buf
);
11539 if (!(buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
))) {
11540 while (offs
& (align
- 1)) {
11542 * Assert that our alignment is off by a number which
11543 * is itself sizeof (uint32_t) aligned.
11545 ASSERT(!((align
- (offs
& (align
- 1))) &
11546 (sizeof (uint32_t) - 1)));
11547 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
11548 offs
+= sizeof (uint32_t);
11551 #if !defined(__APPLE__) /* Quiet compiler warnings */
11552 if ((soffs
= offs
+ needed
) > buf
->dtb_size
) {
11554 if ((uint64_t)(soffs
= offs
+ needed
) > buf
->dtb_size
) {
11555 #endif /* __APPLE__ */
11556 dtrace_buffer_drop(buf
);
11560 if (mstate
== NULL
)
11563 mstate
->dtms_scratch_base
= (uintptr_t)tomax
+ soffs
;
11564 mstate
->dtms_scratch_size
= buf
->dtb_size
- soffs
;
11565 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
11570 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
11571 if (state
->dts_activity
!= DTRACE_ACTIVITY_COOLDOWN
&&
11572 (buf
->dtb_flags
& DTRACEBUF_FULL
))
11577 total_off
= needed
+ (offs
& (align
- 1));
11580 * For a ring buffer, life is quite a bit more complicated. Before
11581 * we can store any padding, we need to adjust our wrapping offset.
11582 * (If we've never before wrapped or we're not about to, no adjustment
11585 if ((buf
->dtb_flags
& DTRACEBUF_WRAPPED
) ||
11586 offs
+ total_off
> buf
->dtb_size
) {
11587 woffs
= buf
->dtb_xamot_offset
;
11589 if (offs
+ total_off
> buf
->dtb_size
) {
11591 * We can't fit in the end of the buffer. First, a
11592 * sanity check that we can fit in the buffer at all.
11594 if (total_off
> buf
->dtb_size
) {
11595 dtrace_buffer_drop(buf
);
11600 * We're going to be storing at the top of the buffer,
11601 * so now we need to deal with the wrapped offset. We
11602 * only reset our wrapped offset to 0 if it is
11603 * currently greater than the current offset. If it
11604 * is less than the current offset, it is because a
11605 * previous allocation induced a wrap -- but the
11606 * allocation didn't subsequently take the space due
11607 * to an error or false predicate evaluation. In this
11608 * case, we'll just leave the wrapped offset alone: if
11609 * the wrapped offset hasn't been advanced far enough
11610 * for this allocation, it will be adjusted in the
11613 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
11621 * Now we know that we're going to be storing to the
11622 * top of the buffer and that there is room for us
11623 * there. We need to clear the buffer from the current
11624 * offset to the end (there may be old gunk there).
11626 #if !defined(__APPLE__) /* Quiet compiler warnings */
11627 while (offs
< buf
->dtb_size
)
11629 while ((uint64_t)offs
< buf
->dtb_size
)
11630 #endif /* __APPLE__ */
11634 * We need to set our offset to zero. And because we
11635 * are wrapping, we need to set the bit indicating as
11636 * much. We can also adjust our needed space back
11637 * down to the space required by the ECB -- we know
11638 * that the top of the buffer is aligned.
11641 total_off
= needed
;
11642 buf
->dtb_flags
|= DTRACEBUF_WRAPPED
;
11645 * There is room for us in the buffer, so we simply
11646 * need to check the wrapped offset.
11648 if (woffs
< offs
) {
11650 * The wrapped offset is less than the offset.
11651 * This can happen if we allocated buffer space
11652 * that induced a wrap, but then we didn't
11653 * subsequently take the space due to an error
11654 * or false predicate evaluation. This is
11655 * okay; we know that _this_ allocation isn't
11656 * going to induce a wrap. We still can't
11657 * reset the wrapped offset to be zero,
11658 * however: the space may have been trashed in
11659 * the previous failed probe attempt. But at
11660 * least the wrapped offset doesn't need to
11661 * be adjusted at all...
11667 #if !defined(__APPLE__) /* Quiet compiler warnings */
11668 while (offs
+ total_off
> woffs
) {
11670 while (offs
+ total_off
> (size_t)woffs
) {
11671 #endif /* __APPLE__ */
11672 dtrace_epid_t epid
= *(uint32_t *)(tomax
+ woffs
);
11675 if (epid
== DTRACE_EPIDNONE
) {
11676 size
= sizeof (uint32_t);
11678 #if !defined(__APPLE__) /* Quiet compiler warnings */
11679 ASSERT(epid
<= state
->dts_necbs
);
11681 ASSERT(epid
<= (dtrace_epid_t
)state
->dts_necbs
);
11682 #endif /* __APPLE__ */
11683 ASSERT(state
->dts_ecbs
[epid
- 1] != NULL
);
11685 size
= state
->dts_ecbs
[epid
- 1]->dte_size
;
11688 ASSERT(woffs
+ size
<= buf
->dtb_size
);
11691 if (woffs
+ size
== buf
->dtb_size
) {
11693 * We've reached the end of the buffer; we want
11694 * to set the wrapped offset to 0 and break
11695 * out. However, if the offs is 0, then we're
11696 * in a strange edge-condition: the amount of
11697 * space that we want to reserve plus the size
11698 * of the record that we're overwriting is
11699 * greater than the size of the buffer. This
11700 * is problematic because if we reserve the
11701 * space but subsequently don't consume it (due
11702 * to a failed predicate or error) the wrapped
11703 * offset will be 0 -- yet the EPID at offset 0
11704 * will not be committed. This situation is
11705 * relatively easy to deal with: if we're in
11706 * this case, the buffer is indistinguishable
11707 * from one that hasn't wrapped; we need only
11708 * finish the job by clearing the wrapped bit,
11709 * explicitly setting the offset to be 0, and
11710 * zero'ing out the old data in the buffer.
11713 buf
->dtb_flags
&= ~DTRACEBUF_WRAPPED
;
11714 buf
->dtb_offset
= 0;
11717 #if !defined(__APPLE__) /* Quiet compiler warnings */
11718 while (woffs
< buf
->dtb_size
)
11720 while ((uint64_t)woffs
< buf
->dtb_size
)
11721 #endif /* __APPLE__ */
11723 tomax
[woffs
++] = 0;
11734 * We have a wrapped offset. It may be that the wrapped offset
11735 * has become zero -- that's okay.
11737 buf
->dtb_xamot_offset
= woffs
;
11742 * Now we can plow the buffer with any necessary padding.
11744 while (offs
& (align
- 1)) {
11746 * Assert that our alignment is off by a number which
11747 * is itself sizeof (uint32_t) aligned.
11749 ASSERT(!((align
- (offs
& (align
- 1))) &
11750 (sizeof (uint32_t) - 1)));
11751 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
11752 offs
+= sizeof (uint32_t);
11755 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
11756 if (offs
+ needed
> buf
->dtb_size
- state
->dts_reserve
) {
11757 buf
->dtb_flags
|= DTRACEBUF_FULL
;
11762 if (mstate
== NULL
)
11766 * For ring buffers and fill buffers, the scratch space is always
11767 * the inactive buffer.
11769 mstate
->dtms_scratch_base
= (uintptr_t)buf
->dtb_xamot
;
11770 mstate
->dtms_scratch_size
= buf
->dtb_size
;
11771 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
11777 dtrace_buffer_polish(dtrace_buffer_t
*buf
)
11779 ASSERT(buf
->dtb_flags
& DTRACEBUF_RING
);
11780 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11782 if (!(buf
->dtb_flags
& DTRACEBUF_WRAPPED
))
11786 * We need to polish the ring buffer. There are three cases:
11788 * - The first (and presumably most common) is that there is no gap
11789 * between the buffer offset and the wrapped offset. In this case,
11790 * there is nothing in the buffer that isn't valid data; we can
11791 * mark the buffer as polished and return.
11793 * - The second (less common than the first but still more common
11794 * than the third) is that there is a gap between the buffer offset
11795 * and the wrapped offset, and the wrapped offset is larger than the
11796 * buffer offset. This can happen because of an alignment issue, or
11797 * can happen because of a call to dtrace_buffer_reserve() that
11798 * didn't subsequently consume the buffer space. In this case,
11799 * we need to zero the data from the buffer offset to the wrapped
11802 * - The third (and least common) is that there is a gap between the
11803 * buffer offset and the wrapped offset, but the wrapped offset is
11804 * _less_ than the buffer offset. This can only happen because a
11805 * call to dtrace_buffer_reserve() induced a wrap, but the space
11806 * was not subsequently consumed. In this case, we need to zero the
11807 * space from the offset to the end of the buffer _and_ from the
11808 * top of the buffer to the wrapped offset.
11810 if (buf
->dtb_offset
< buf
->dtb_xamot_offset
) {
11811 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
11812 buf
->dtb_xamot_offset
- buf
->dtb_offset
);
11815 if (buf
->dtb_offset
> buf
->dtb_xamot_offset
) {
11816 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
11817 buf
->dtb_size
- buf
->dtb_offset
);
11818 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
);
11823 dtrace_buffer_free(dtrace_buffer_t
*bufs
)
11827 for (i
= 0; i
< (int)NCPU
; i
++) {
11828 dtrace_buffer_t
*buf
= &bufs
[i
];
11830 if (buf
->dtb_tomax
== NULL
) {
11831 ASSERT(buf
->dtb_xamot
== NULL
);
11832 ASSERT(buf
->dtb_size
== 0);
11836 if (buf
->dtb_xamot
!= NULL
) {
11837 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
11838 kmem_free(buf
->dtb_xamot
, buf
->dtb_size
);
11841 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
);
11843 buf
->dtb_tomax
= NULL
;
11844 buf
->dtb_xamot
= NULL
;
11849 * DTrace Enabling Functions
11851 static dtrace_enabling_t
*
11852 dtrace_enabling_create(dtrace_vstate_t
*vstate
)
11854 dtrace_enabling_t
*enab
;
11856 enab
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
);
11857 enab
->dten_vstate
= vstate
;
11863 dtrace_enabling_add(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
)
11865 dtrace_ecbdesc_t
**ndesc
;
11866 size_t osize
, nsize
;
11869 * We can't add to enablings after we've enabled them, or after we've
11872 ASSERT(enab
->dten_probegen
== 0);
11873 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
11875 #if defined(__APPLE__)
11876 if (ecb
== NULL
) return; /* Note: protection against gcc 4.0 botch on x86 */
11877 #endif /* __APPLE__ */
11879 if (enab
->dten_ndesc
< enab
->dten_maxdesc
) {
11880 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
11884 osize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
11886 if (enab
->dten_maxdesc
== 0) {
11887 enab
->dten_maxdesc
= 1;
11889 enab
->dten_maxdesc
<<= 1;
11892 ASSERT(enab
->dten_ndesc
< enab
->dten_maxdesc
);
11894 nsize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
11895 ndesc
= kmem_zalloc(nsize
, KM_SLEEP
);
11896 bcopy(enab
->dten_desc
, ndesc
, osize
);
11897 kmem_free(enab
->dten_desc
, osize
);
11899 enab
->dten_desc
= ndesc
;
11900 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
11904 dtrace_enabling_addlike(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
,
11905 dtrace_probedesc_t
*pd
)
11907 dtrace_ecbdesc_t
*new;
11908 dtrace_predicate_t
*pred
;
11909 dtrace_actdesc_t
*act
;
11912 * We're going to create a new ECB description that matches the
11913 * specified ECB in every way, but has the specified probe description.
11915 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
11917 if ((pred
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
)
11918 dtrace_predicate_hold(pred
);
11920 for (act
= ecb
->dted_action
; act
!= NULL
; act
= act
->dtad_next
)
11921 dtrace_actdesc_hold(act
);
11923 new->dted_action
= ecb
->dted_action
;
11924 new->dted_pred
= ecb
->dted_pred
;
11925 new->dted_probe
= *pd
;
11926 new->dted_uarg
= ecb
->dted_uarg
;
11928 dtrace_enabling_add(enab
, new);
11932 dtrace_enabling_dump(dtrace_enabling_t
*enab
)
11936 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11937 dtrace_probedesc_t
*desc
= &enab
->dten_desc
[i
]->dted_probe
;
11939 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
,
11940 desc
->dtpd_provider
, desc
->dtpd_mod
,
11941 desc
->dtpd_func
, desc
->dtpd_name
);
11946 dtrace_enabling_destroy(dtrace_enabling_t
*enab
)
11949 dtrace_ecbdesc_t
*ep
;
11950 dtrace_vstate_t
*vstate
= enab
->dten_vstate
;
11952 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11954 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11955 dtrace_actdesc_t
*act
, *next
;
11956 dtrace_predicate_t
*pred
;
11958 ep
= enab
->dten_desc
[i
];
11960 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
)
11961 dtrace_predicate_release(pred
, vstate
);
11963 for (act
= ep
->dted_action
; act
!= NULL
; act
= next
) {
11964 next
= act
->dtad_next
;
11965 dtrace_actdesc_release(act
, vstate
);
11968 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
11971 kmem_free(enab
->dten_desc
,
11972 enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*));
11975 * If this was a retained enabling, decrement the dts_nretained count
11976 * and take it off of the dtrace_retained list.
11978 if (enab
->dten_prev
!= NULL
|| enab
->dten_next
!= NULL
||
11979 dtrace_retained
== enab
) {
11980 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11981 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained
> 0);
11982 enab
->dten_vstate
->dtvs_state
->dts_nretained
--;
11983 dtrace_retained_gen
++;
11986 if (enab
->dten_prev
== NULL
) {
11987 if (dtrace_retained
== enab
) {
11988 dtrace_retained
= enab
->dten_next
;
11990 if (dtrace_retained
!= NULL
)
11991 dtrace_retained
->dten_prev
= NULL
;
11994 ASSERT(enab
!= dtrace_retained
);
11995 ASSERT(dtrace_retained
!= NULL
);
11996 enab
->dten_prev
->dten_next
= enab
->dten_next
;
11999 if (enab
->dten_next
!= NULL
) {
12000 ASSERT(dtrace_retained
!= NULL
);
12001 enab
->dten_next
->dten_prev
= enab
->dten_prev
;
12004 kmem_free(enab
, sizeof (dtrace_enabling_t
));
12008 dtrace_enabling_retain(dtrace_enabling_t
*enab
)
12010 dtrace_state_t
*state
;
12012 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12013 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
12014 ASSERT(enab
->dten_vstate
!= NULL
);
12016 state
= enab
->dten_vstate
->dtvs_state
;
12017 ASSERT(state
!= NULL
);
12020 * We only allow each state to retain dtrace_retain_max enablings.
12022 if (state
->dts_nretained
>= dtrace_retain_max
)
12025 state
->dts_nretained
++;
12026 dtrace_retained_gen
++;
12028 if (dtrace_retained
== NULL
) {
12029 dtrace_retained
= enab
;
12033 enab
->dten_next
= dtrace_retained
;
12034 dtrace_retained
->dten_prev
= enab
;
12035 dtrace_retained
= enab
;
12041 dtrace_enabling_replicate(dtrace_state_t
*state
, dtrace_probedesc_t
*match
,
12042 dtrace_probedesc_t
*create
)
12044 dtrace_enabling_t
*new, *enab
;
12045 int found
= 0, err
= ENOENT
;
12047 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12048 ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
);
12049 ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
);
12050 ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
);
12051 ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
);
12053 new = dtrace_enabling_create(&state
->dts_vstate
);
12056 * Iterate over all retained enablings, looking for enablings that
12057 * match the specified state.
12059 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
12063 * dtvs_state can only be NULL for helper enablings -- and
12064 * helper enablings can't be retained.
12066 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
12068 if (enab
->dten_vstate
->dtvs_state
!= state
)
12072 * Now iterate over each probe description; we're looking for
12073 * an exact match to the specified probe description.
12075 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
12076 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
12077 dtrace_probedesc_t
*pd
= &ep
->dted_probe
;
12079 #if !defined(__APPLE__)
12080 if (strcmp(pd
->dtpd_provider
, match
->dtpd_provider
))
12083 if (strcmp(pd
->dtpd_mod
, match
->dtpd_mod
))
12086 if (strcmp(pd
->dtpd_func
, match
->dtpd_func
))
12089 if (strcmp(pd
->dtpd_name
, match
->dtpd_name
))
12091 #else /* Employ size bounded string operation. */
12092 if (strncmp(pd
->dtpd_provider
, match
->dtpd_provider
, DTRACE_PROVNAMELEN
))
12095 if (strncmp(pd
->dtpd_mod
, match
->dtpd_mod
, DTRACE_MODNAMELEN
))
12098 if (strncmp(pd
->dtpd_func
, match
->dtpd_func
, DTRACE_FUNCNAMELEN
))
12101 if (strncmp(pd
->dtpd_name
, match
->dtpd_name
, DTRACE_NAMELEN
))
12103 #endif /* __APPLE__ */
12106 * We have a winning probe! Add it to our growing
12110 dtrace_enabling_addlike(new, ep
, create
);
12114 if (!found
|| (err
= dtrace_enabling_retain(new)) != 0) {
12115 dtrace_enabling_destroy(new);
12123 dtrace_enabling_retract(dtrace_state_t
*state
)
12125 dtrace_enabling_t
*enab
, *next
;
12127 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12130 * Iterate over all retained enablings, destroy the enablings retained
12131 * for the specified state.
12133 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= next
) {
12134 next
= enab
->dten_next
;
12137 * dtvs_state can only be NULL for helper enablings -- and
12138 * helper enablings can't be retained.
12140 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
12142 if (enab
->dten_vstate
->dtvs_state
== state
) {
12143 ASSERT(state
->dts_nretained
> 0);
12144 dtrace_enabling_destroy(enab
);
12148 ASSERT(state
->dts_nretained
== 0);
12152 dtrace_enabling_match(dtrace_enabling_t
*enab
, int *nmatched
)
12155 int total_matched
= 0, matched
= 0;
12157 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12158 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12160 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
12161 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
12163 enab
->dten_current
= ep
;
12164 enab
->dten_error
= 0;
12167 * If a provider failed to enable a probe then get out and
12168 * let the consumer know we failed.
12170 if ((matched
= dtrace_probe_enable(&ep
->dted_probe
, enab
)) < 0)
12173 total_matched
+= matched
;
12175 if (enab
->dten_error
!= 0) {
12177 * If we get an error half-way through enabling the
12178 * probes, we kick out -- perhaps with some number of
12179 * them enabled. Leaving enabled probes enabled may
12180 * be slightly confusing for user-level, but we expect
12181 * that no one will attempt to actually drive on in
12182 * the face of such errors. If this is an anonymous
12183 * enabling (indicated with a NULL nmatched pointer),
12184 * we cmn_err() a message. We aren't expecting to
12185 * get such an error -- such as it can exist at all,
12186 * it would be a result of corrupted DOF in the driver
12189 if (nmatched
== NULL
) {
12190 cmn_err(CE_WARN
, "dtrace_enabling_match() "
12191 "error on %p: %d", (void *)ep
,
12195 return (enab
->dten_error
);
12199 enab
->dten_probegen
= dtrace_probegen
;
12200 if (nmatched
!= NULL
)
12201 *nmatched
= total_matched
;
12207 dtrace_enabling_matchall(void)
12209 dtrace_enabling_t
*enab
;
12211 lck_mtx_lock(&cpu_lock
);
12212 lck_mtx_lock(&dtrace_lock
);
12215 * Iterate over all retained enablings to see if any probes match
12216 * against them. We only perform this operation on enablings for which
12217 * we have sufficient permissions by virtue of being in the global zone
12218 * or in the same zone as the DTrace client. Because we can be called
12219 * after dtrace_detach() has been called, we cannot assert that there
12220 * are retained enablings. We can safely load from dtrace_retained,
12221 * however: the taskq_destroy() at the end of dtrace_detach() will
12222 * block pending our completion.
12224 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
12225 #if !defined(__APPLE__)
12226 cred_t
*cr
= enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
;
12228 if (INGLOBALZONE(curproc
) ||
12229 cr
!= NULL
&& getzoneid() == crgetzoneid(cr
))
12230 (void) dtrace_enabling_match(enab
, NULL
);
12232 (void) dtrace_enabling_match(enab
, NULL
); /* As if always in "global" zone." */
12233 #endif /* __APPLE__ */
12236 lck_mtx_unlock(&dtrace_lock
);
12237 lck_mtx_unlock(&cpu_lock
);
12241 * If an enabling is to be enabled without having matched probes (that is, if
12242 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
12243 * enabling must be _primed_ by creating an ECB for every ECB description.
12244 * This must be done to assure that we know the number of speculations, the
12245 * number of aggregations, the minimum buffer size needed, etc. before we
12246 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
12247 * enabling any probes, we create ECBs for every ECB decription, but with a
12248 * NULL probe -- which is exactly what this function does.
12251 dtrace_enabling_prime(dtrace_state_t
*state
)
12253 dtrace_enabling_t
*enab
;
12256 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
12257 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
12259 if (enab
->dten_vstate
->dtvs_state
!= state
)
12263 * We don't want to prime an enabling more than once, lest
12264 * we allow a malicious user to induce resource exhaustion.
12265 * (The ECBs that result from priming an enabling aren't
12266 * leaked -- but they also aren't deallocated until the
12267 * consumer state is destroyed.)
12269 if (enab
->dten_primed
)
12272 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
12273 enab
->dten_current
= enab
->dten_desc
[i
];
12274 (void) dtrace_probe_enable(NULL
, enab
);
12277 enab
->dten_primed
= 1;
12282 * Called to indicate that probes should be provided due to retained
12283 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
12284 * must take an initial lap through the enabling calling the dtps_provide()
12285 * entry point explicitly to allow for autocreated probes.
12288 dtrace_enabling_provide(dtrace_provider_t
*prv
)
12291 dtrace_probedesc_t desc
;
12292 dtrace_genid_t gen
;
12294 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12295 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
12299 prv
= dtrace_provider
;
12303 dtrace_enabling_t
*enab
;
12304 void *parg
= prv
->dtpv_arg
;
12307 gen
= dtrace_retained_gen
;
12308 for (enab
= dtrace_retained
; enab
!= NULL
;
12309 enab
= enab
->dten_next
) {
12310 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
12311 desc
= enab
->dten_desc
[i
]->dted_probe
;
12312 lck_mtx_unlock(&dtrace_lock
);
12313 prv
->dtpv_pops
.dtps_provide(parg
, &desc
);
12314 lck_mtx_lock(&dtrace_lock
);
12316 * Process the retained enablings again if
12317 * they have changed while we weren't holding
12320 if (gen
!= dtrace_retained_gen
)
12324 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
12326 lck_mtx_unlock(&dtrace_lock
);
12327 dtrace_probe_provide(NULL
, all
? NULL
: prv
);
12328 lck_mtx_lock(&dtrace_lock
);
12332 * DTrace DOF Functions
12336 dtrace_dof_error(dof_hdr_t
*dof
, const char *str
)
12338 #pragma unused(dof) /* __APPLE__ */
12339 if (dtrace_err_verbose
)
12340 cmn_err(CE_WARN
, "failed to process DOF: %s", str
);
12342 #ifdef DTRACE_ERRDEBUG
12343 dtrace_errdebug(str
);
12348 * Create DOF out of a currently enabled state. Right now, we only create
12349 * DOF containing the run-time options -- but this could be expanded to create
12350 * complete DOF representing the enabled state.
12353 dtrace_dof_create(dtrace_state_t
*state
)
12357 dof_optdesc_t
*opt
;
12358 int i
, len
= sizeof (dof_hdr_t
) +
12359 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) +
12360 sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
12362 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12364 #if !defined(__APPLE__)
12365 dof
= kmem_zalloc(len
, KM_SLEEP
);
12367 dof
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
);
12368 #endif /* __APPLE__ */
12369 dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
;
12370 dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
;
12371 dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
;
12372 dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
;
12374 dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
;
12375 dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
;
12376 dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
;
12377 dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
;
12378 dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
;
12379 dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
;
12381 dof
->dofh_flags
= 0;
12382 dof
->dofh_hdrsize
= sizeof (dof_hdr_t
);
12383 dof
->dofh_secsize
= sizeof (dof_sec_t
);
12384 dof
->dofh_secnum
= 1; /* only DOF_SECT_OPTDESC */
12385 dof
->dofh_secoff
= sizeof (dof_hdr_t
);
12386 dof
->dofh_loadsz
= len
;
12387 dof
->dofh_filesz
= len
;
12391 * Fill in the option section header...
12393 sec
= (dof_sec_t
*)((uintptr_t)dof
+ sizeof (dof_hdr_t
));
12394 sec
->dofs_type
= DOF_SECT_OPTDESC
;
12395 sec
->dofs_align
= sizeof (uint64_t);
12396 sec
->dofs_flags
= DOF_SECF_LOAD
;
12397 sec
->dofs_entsize
= sizeof (dof_optdesc_t
);
12399 opt
= (dof_optdesc_t
*)((uintptr_t)sec
+
12400 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)));
12402 sec
->dofs_offset
= (uintptr_t)opt
- (uintptr_t)dof
;
12403 sec
->dofs_size
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
12405 for (i
= 0; i
< DTRACEOPT_MAX
; i
++) {
12406 opt
[i
].dofo_option
= i
;
12407 opt
[i
].dofo_strtab
= DOF_SECIDX_NONE
;
12408 opt
[i
].dofo_value
= state
->dts_options
[i
];
12415 #if !defined(__APPLE__)
12416 dtrace_dof_copyin(uintptr_t uarg
, int *errp
)
12418 dtrace_dof_copyin(user_addr_t uarg
, int *errp
)
12421 dof_hdr_t hdr
, *dof
;
12423 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
12426 * First, we're going to copyin() the sizeof (dof_hdr_t).
12428 #if !defined(__APPLE__)
12429 if (copyin((void *)uarg
, &hdr
, sizeof (hdr
)) != 0) {
12431 if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) {
12433 dtrace_dof_error(NULL
, "failed to copyin DOF header");
12439 * Now we'll allocate the entire DOF and copy it in -- provided
12440 * that the length isn't outrageous.
12442 #if !defined(__APPLE__) /* Quiet compiler warnings */
12443 if (hdr
.dofh_loadsz
>= dtrace_dof_maxsize
) {
12445 if (hdr
.dofh_loadsz
>= (uint64_t)dtrace_dof_maxsize
) {
12446 #endif /* __APPLE__ */
12447 dtrace_dof_error(&hdr
, "load size exceeds maximum");
12452 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
12453 dtrace_dof_error(&hdr
, "invalid load size");
12458 #if !defined(__APPLE__)
12459 dof
= kmem_alloc(hdr
.dofh_loadsz
, KM_SLEEP
);
12461 if (copyin((void *)uarg
, dof
, hdr
.dofh_loadsz
) != 0 ||
12462 dof
->dofh_loadsz
!= hdr
.dofh_loadsz
) {
12463 kmem_free(dof
, hdr
.dofh_loadsz
);
12468 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
12470 if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0 ||
12471 dof
->dofh_loadsz
!= hdr
.dofh_loadsz
) {
12472 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
12481 #if defined(__APPLE__)
12484 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
)
12486 dof_hdr_t hdr
, *dof
;
12488 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
12491 * First, we're going to copyin() the sizeof (dof_hdr_t).
12493 if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) {
12494 dtrace_dof_error(NULL
, "failed to copyin DOF header");
12500 * Now we'll allocate the entire DOF and copy it in -- provided
12501 * that the length isn't outrageous.
12503 if (hdr
.dofh_loadsz
>= (uint64_t)dtrace_dof_maxsize
) {
12504 dtrace_dof_error(&hdr
, "load size exceeds maximum");
12509 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
12510 dtrace_dof_error(&hdr
, "invalid load size");
12515 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
12517 if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) {
12518 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
12526 #endif /* __APPLE__ */
12529 dtrace_dof_property(const char *name
)
12533 unsigned int len
, i
;
12537 * Unfortunately, array of values in .conf files are always (and
12538 * only) interpreted to be integer arrays. We must read our DOF
12539 * as an integer array, and then squeeze it into a byte array.
12541 #if !defined(__APPLE__) /* Quiet compiler warnings */
12542 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
12543 (char *)name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
12546 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
12547 name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
12549 #endif /* __APPLE__ */
12551 for (i
= 0; i
< len
; i
++)
12552 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]);
12554 if (len
< sizeof (dof_hdr_t
)) {
12555 ddi_prop_free(buf
);
12556 dtrace_dof_error(NULL
, "truncated header");
12560 if (len
< (loadsz
= ((dof_hdr_t
*)buf
)->dofh_loadsz
)) {
12561 ddi_prop_free(buf
);
12562 dtrace_dof_error(NULL
, "truncated DOF");
12566 #if !defined(__APPLE__) /* Quiet compiler warnings */
12567 if (loadsz
>= dtrace_dof_maxsize
) {
12569 if (loadsz
>= (uint64_t)dtrace_dof_maxsize
) {
12570 #endif /* __APPLE__ */
12571 ddi_prop_free(buf
);
12572 dtrace_dof_error(NULL
, "oversized DOF");
12576 #if !defined(__APPLE__)
12577 dof
= kmem_alloc(loadsz
, KM_SLEEP
);
12579 dof
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
);
12580 #endif /* __APPLE__ */
12581 bcopy(buf
, dof
, loadsz
);
12582 ddi_prop_free(buf
);
12588 dtrace_dof_destroy(dof_hdr_t
*dof
)
12590 #if !defined(__APPLE__)
12591 kmem_free(dof
, dof
->dofh_loadsz
);
12593 dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
);
12594 #endif /* __APPLE__ */
12598 * Return the dof_sec_t pointer corresponding to a given section index. If the
12599 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
12600 * a type other than DOF_SECT_NONE is specified, the header is checked against
12601 * this type and NULL is returned if the types do not match.
12604 dtrace_dof_sect(dof_hdr_t
*dof
, uint32_t type
, dof_secidx_t i
)
12606 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)
12607 ((uintptr_t)dof
+ dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12609 if (i
>= dof
->dofh_secnum
) {
12610 dtrace_dof_error(dof
, "referenced section index is invalid");
12614 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
12615 dtrace_dof_error(dof
, "referenced section is not loadable");
12619 if (type
!= DOF_SECT_NONE
&& type
!= sec
->dofs_type
) {
12620 dtrace_dof_error(dof
, "referenced section is the wrong type");
12627 static dtrace_probedesc_t
*
12628 dtrace_dof_probedesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_probedesc_t
*desc
)
12630 dof_probedesc_t
*probe
;
12632 uintptr_t daddr
= (uintptr_t)dof
;
12636 if (sec
->dofs_type
!= DOF_SECT_PROBEDESC
) {
12637 dtrace_dof_error(dof
, "invalid probe section");
12641 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
12642 dtrace_dof_error(dof
, "bad alignment in probe description");
12646 if (sec
->dofs_offset
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) {
12647 dtrace_dof_error(dof
, "truncated probe description");
12651 probe
= (dof_probedesc_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
12652 strtab
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
);
12654 if (strtab
== NULL
)
12657 str
= daddr
+ strtab
->dofs_offset
;
12658 size
= strtab
->dofs_size
;
12660 if (probe
->dofp_provider
>= strtab
->dofs_size
) {
12661 dtrace_dof_error(dof
, "corrupt probe provider");
12665 (void) strncpy(desc
->dtpd_provider
,
12666 (char *)(str
+ probe
->dofp_provider
),
12667 MIN(DTRACE_PROVNAMELEN
- 1, size
- probe
->dofp_provider
));
12668 #if defined(__APPLE__) /* Employ size bounded string operation. */
12669 desc
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
12670 #endif /* __APPLE__ */
12672 if (probe
->dofp_mod
>= strtab
->dofs_size
) {
12673 dtrace_dof_error(dof
, "corrupt probe module");
12677 (void) strncpy(desc
->dtpd_mod
, (char *)(str
+ probe
->dofp_mod
),
12678 MIN(DTRACE_MODNAMELEN
- 1, size
- probe
->dofp_mod
));
12679 #if defined(__APPLE__) /* Employ size bounded string operation. */
12680 desc
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
12681 #endif /* __APPLE__ */
12683 if (probe
->dofp_func
>= strtab
->dofs_size
) {
12684 dtrace_dof_error(dof
, "corrupt probe function");
12688 (void) strncpy(desc
->dtpd_func
, (char *)(str
+ probe
->dofp_func
),
12689 MIN(DTRACE_FUNCNAMELEN
- 1, size
- probe
->dofp_func
));
12690 #if defined(__APPLE__) /* Employ size bounded string operation. */
12691 desc
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
12692 #endif /* __APPLE__ */
12694 if (probe
->dofp_name
>= strtab
->dofs_size
) {
12695 dtrace_dof_error(dof
, "corrupt probe name");
12699 (void) strncpy(desc
->dtpd_name
, (char *)(str
+ probe
->dofp_name
),
12700 MIN(DTRACE_NAMELEN
- 1, size
- probe
->dofp_name
));
12701 #if defined(__APPLE__) /* Employ size bounded string operation. */
12702 desc
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
12703 #endif /* __APPLE__ */
12708 static dtrace_difo_t
*
12709 dtrace_dof_difo(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
12714 dof_difohdr_t
*dofd
;
12715 uintptr_t daddr
= (uintptr_t)dof
;
12716 size_t max_size
= dtrace_difo_maxsize
;
12717 #if !defined(__APPLE__) /* Quiet compiler warnings */
12722 #endif /* __APPLE__ */
12725 static const struct {
12733 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
),
12734 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
),
12735 sizeof (dif_instr_t
), "multiple DIF sections" },
12737 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
),
12738 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t),
12739 sizeof (uint64_t), "multiple integer tables" },
12741 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
),
12742 offsetof(dtrace_difo_t
, dtdo_strlen
), 0,
12743 sizeof (char), "multiple string tables" },
12745 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
),
12746 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
),
12747 sizeof (uint_t
), "multiple variable tables" },
12749 #if !defined(__APPLE__)
12750 { DOF_SECT_NONE
, 0, 0, 0, NULL
}
12752 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL
}
12753 #endif /* __APPLE__ */
12756 if (sec
->dofs_type
!= DOF_SECT_DIFOHDR
) {
12757 dtrace_dof_error(dof
, "invalid DIFO header section");
12761 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
12762 dtrace_dof_error(dof
, "bad alignment in DIFO header");
12766 if (sec
->dofs_size
< sizeof (dof_difohdr_t
) ||
12767 sec
->dofs_size
% sizeof (dof_secidx_t
)) {
12768 dtrace_dof_error(dof
, "bad size in DIFO header");
12772 dofd
= (dof_difohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
12773 n
= (sec
->dofs_size
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1;
12775 dp
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
12776 dp
->dtdo_rtype
= dofd
->dofd_rtype
;
12778 for (l
= 0; l
< n
; l
++) {
12783 if ((subsec
= dtrace_dof_sect(dof
, DOF_SECT_NONE
,
12784 dofd
->dofd_links
[l
])) == NULL
)
12785 goto err
; /* invalid section link */
12787 if (ttl
+ subsec
->dofs_size
> max_size
) {
12788 dtrace_dof_error(dof
, "exceeds maximum size");
12792 ttl
+= subsec
->dofs_size
;
12794 for (i
= 0; difo
[i
].section
!= DOF_SECT_NONE
; i
++) {
12796 #if !defined(__APPLE__) /* Quiet compiler warnings */
12797 if (subsec
->dofs_type
!= difo
[i
].section
)
12800 if (subsec
->dofs_type
!= (uint32_t)difo
[i
].section
)
12802 #endif /* __APPLE __ */
12804 if (!(subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
12805 dtrace_dof_error(dof
, "section not loaded");
12809 #if !defined(__APPLE__) /* Quiet compiler warnings */
12810 if (subsec
->dofs_align
!= difo
[i
].align
) {
12811 dtrace_dof_error(dof
, "bad alignment");
12815 if (subsec
->dofs_align
!= (uint32_t)difo
[i
].align
) {
12816 dtrace_dof_error(dof
, "bad alignment");
12819 #endif /* __APPLE__ */
12821 bufp
= (void **)((uintptr_t)dp
+ difo
[i
].bufoffs
);
12822 lenp
= (uint32_t *)((uintptr_t)dp
+ difo
[i
].lenoffs
);
12824 if (*bufp
!= NULL
) {
12825 dtrace_dof_error(dof
, difo
[i
].msg
);
12829 #if !defined(__APPLE__) /* Quiet compiler warnings */
12830 if (difo
[i
].entsize
!= subsec
->dofs_entsize
) {
12831 dtrace_dof_error(dof
, "entry size mismatch");
12835 if ((uint32_t)difo
[i
].entsize
!= subsec
->dofs_entsize
) {
12836 dtrace_dof_error(dof
, "entry size mismatch");
12839 #endif /* __APPLE__ */
12841 if (subsec
->dofs_entsize
!= 0 &&
12842 (subsec
->dofs_size
% subsec
->dofs_entsize
) != 0) {
12843 dtrace_dof_error(dof
, "corrupt entry size");
12847 *lenp
= subsec
->dofs_size
;
12848 *bufp
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
);
12849 bcopy((char *)(uintptr_t)(daddr
+ subsec
->dofs_offset
),
12850 *bufp
, subsec
->dofs_size
);
12852 if (subsec
->dofs_entsize
!= 0)
12853 *lenp
/= subsec
->dofs_entsize
;
12859 * If we encounter a loadable DIFO sub-section that is not
12860 * known to us, assume this is a broken program and fail.
12862 if (difo
[i
].section
== DOF_SECT_NONE
&&
12863 (subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
12864 dtrace_dof_error(dof
, "unrecognized DIFO subsection");
12869 if (dp
->dtdo_buf
== NULL
) {
12871 * We can't have a DIF object without DIF text.
12873 dtrace_dof_error(dof
, "missing DIF text");
12878 * Before we validate the DIF object, run through the variable table
12879 * looking for the strings -- if any of their size are under, we'll set
12880 * their size to be the system-wide default string size. Note that
12881 * this should _not_ happen if the "strsize" option has been set --
12882 * in this case, the compiler should have set the size to reflect the
12883 * setting of the option.
12885 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
12886 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
12887 dtrace_diftype_t
*t
= &v
->dtdv_type
;
12889 if (v
->dtdv_id
< DIF_VAR_OTHER_UBASE
)
12892 if (t
->dtdt_kind
== DIF_TYPE_STRING
&& t
->dtdt_size
== 0)
12893 t
->dtdt_size
= dtrace_strsize_default
;
12896 if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0)
12899 dtrace_difo_init(dp
, vstate
);
12903 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
12904 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
12905 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
12906 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
12908 kmem_free(dp
, sizeof (dtrace_difo_t
));
12912 static dtrace_predicate_t
*
12913 dtrace_dof_predicate(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
12918 if ((dp
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
)
12921 return (dtrace_predicate_create(dp
));
12924 static dtrace_actdesc_t
*
12925 dtrace_dof_actdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
12928 dtrace_actdesc_t
*act
, *first
= NULL
, *last
= NULL
, *next
;
12929 dof_actdesc_t
*desc
;
12930 dof_sec_t
*difosec
;
12932 uintptr_t daddr
= (uintptr_t)dof
;
12934 dtrace_actkind_t kind
;
12936 if (sec
->dofs_type
!= DOF_SECT_ACTDESC
) {
12937 dtrace_dof_error(dof
, "invalid action section");
12941 if (sec
->dofs_offset
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) {
12942 dtrace_dof_error(dof
, "truncated action description");
12946 if (sec
->dofs_align
!= sizeof (uint64_t)) {
12947 dtrace_dof_error(dof
, "bad alignment in action description");
12951 if (sec
->dofs_size
< sec
->dofs_entsize
) {
12952 dtrace_dof_error(dof
, "section entry size exceeds total size");
12956 if (sec
->dofs_entsize
!= sizeof (dof_actdesc_t
)) {
12957 dtrace_dof_error(dof
, "bad entry size in action description");
12961 if (sec
->dofs_size
/ sec
->dofs_entsize
> dtrace_actions_max
) {
12962 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max");
12966 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= sec
->dofs_entsize
) {
12967 desc
= (dof_actdesc_t
*)(daddr
+
12968 (uintptr_t)sec
->dofs_offset
+ offs
);
12969 kind
= (dtrace_actkind_t
)desc
->dofa_kind
;
12971 if (DTRACEACT_ISPRINTFLIKE(kind
) &&
12972 (kind
!= DTRACEACT_PRINTA
||
12973 desc
->dofa_strtab
!= DOF_SECIDX_NONE
)) {
12979 * printf()-like actions must have a format string.
12981 if ((strtab
= dtrace_dof_sect(dof
,
12982 DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
)
12985 str
= (char *)((uintptr_t)dof
+
12986 (uintptr_t)strtab
->dofs_offset
);
12988 for (i
= desc
->dofa_arg
; i
< strtab
->dofs_size
; i
++) {
12989 if (str
[i
] == '\0')
12993 if (i
>= strtab
->dofs_size
) {
12994 dtrace_dof_error(dof
, "bogus format string");
12998 if (i
== desc
->dofa_arg
) {
12999 dtrace_dof_error(dof
, "empty format string");
13003 i
-= desc
->dofa_arg
;
13004 fmt
= kmem_alloc(i
+ 1, KM_SLEEP
);
13005 bcopy(&str
[desc
->dofa_arg
], fmt
, i
+ 1);
13006 arg
= (uint64_t)(uintptr_t)fmt
;
13008 if (kind
== DTRACEACT_PRINTA
) {
13009 ASSERT(desc
->dofa_strtab
== DOF_SECIDX_NONE
);
13012 arg
= desc
->dofa_arg
;
13016 act
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
,
13017 desc
->dofa_uarg
, arg
);
13019 if (last
!= NULL
) {
13020 last
->dtad_next
= act
;
13027 if (desc
->dofa_difo
== DOF_SECIDX_NONE
)
13030 if ((difosec
= dtrace_dof_sect(dof
,
13031 DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
)
13034 act
->dtad_difo
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
);
13036 if (act
->dtad_difo
== NULL
)
13040 ASSERT(first
!= NULL
);
13044 for (act
= first
; act
!= NULL
; act
= next
) {
13045 next
= act
->dtad_next
;
13046 dtrace_actdesc_release(act
, vstate
);
13052 static dtrace_ecbdesc_t
*
13053 dtrace_dof_ecbdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
13056 dtrace_ecbdesc_t
*ep
;
13057 dof_ecbdesc_t
*ecb
;
13058 dtrace_probedesc_t
*desc
;
13059 dtrace_predicate_t
*pred
= NULL
;
13061 if (sec
->dofs_size
< sizeof (dof_ecbdesc_t
)) {
13062 dtrace_dof_error(dof
, "truncated ECB description");
13066 if (sec
->dofs_align
!= sizeof (uint64_t)) {
13067 dtrace_dof_error(dof
, "bad alignment in ECB description");
13071 ecb
= (dof_ecbdesc_t
*)((uintptr_t)dof
+ (uintptr_t)sec
->dofs_offset
);
13072 sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
);
13077 ep
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
13078 ep
->dted_uarg
= ecb
->dofe_uarg
;
13079 desc
= &ep
->dted_probe
;
13081 if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
)
13084 if (ecb
->dofe_pred
!= DOF_SECIDX_NONE
) {
13085 if ((sec
= dtrace_dof_sect(dof
,
13086 DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
)
13089 if ((pred
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
)
13092 ep
->dted_pred
.dtpdd_predicate
= pred
;
13095 if (ecb
->dofe_actions
!= DOF_SECIDX_NONE
) {
13096 if ((sec
= dtrace_dof_sect(dof
,
13097 DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
)
13100 ep
->dted_action
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
);
13102 if (ep
->dted_action
== NULL
)
13110 dtrace_predicate_release(pred
, vstate
);
13111 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
13115 #if !defined(__APPLE__) /* APPLE dyld has already done this for us */
13117 * Apply the relocations from the specified 'sec' (a DOF_SECT_URELHDR) to the
13118 * specified DOF. At present, this amounts to simply adding 'ubase' to the
13119 * site of any user SETX relocations to account for load object base address.
13120 * In the future, if we need other relocations, this function can be extended.
13123 dtrace_dof_relocate(dof_hdr_t
*dof
, dof_sec_t
*sec
, uint64_t ubase
)
13125 uintptr_t daddr
= (uintptr_t)dof
;
13126 dof_relohdr_t
*dofr
=
13127 (dof_relohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
13128 dof_sec_t
*ss
, *rs
, *ts
;
13132 if (sec
->dofs_size
< sizeof (dof_relohdr_t
) ||
13133 sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
13134 dtrace_dof_error(dof
, "invalid relocation header");
13138 ss
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, dofr
->dofr_strtab
);
13139 rs
= dtrace_dof_sect(dof
, DOF_SECT_RELTAB
, dofr
->dofr_relsec
);
13140 ts
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, dofr
->dofr_tgtsec
);
13142 if (ss
== NULL
|| rs
== NULL
|| ts
== NULL
)
13143 return (-1); /* dtrace_dof_error() has been called already */
13145 if (rs
->dofs_entsize
< sizeof (dof_relodesc_t
) ||
13146 rs
->dofs_align
!= sizeof (uint64_t)) {
13147 dtrace_dof_error(dof
, "invalid relocation section");
13151 r
= (dof_relodesc_t
*)(uintptr_t)(daddr
+ rs
->dofs_offset
);
13152 n
= rs
->dofs_size
/ rs
->dofs_entsize
;
13154 for (i
= 0; i
< n
; i
++) {
13155 uintptr_t taddr
= daddr
+ ts
->dofs_offset
+ r
->dofr_offset
;
13157 switch (r
->dofr_type
) {
13158 case DOF_RELO_NONE
:
13160 case DOF_RELO_SETX
:
13161 if (r
->dofr_offset
>= ts
->dofs_size
|| r
->dofr_offset
+
13162 sizeof (uint64_t) > ts
->dofs_size
) {
13163 dtrace_dof_error(dof
, "bad relocation offset");
13167 if (!IS_P2ALIGNED(taddr
, sizeof (uint64_t))) {
13168 dtrace_dof_error(dof
, "misaligned setx relo");
13172 *(uint64_t *)taddr
+= ubase
;
13175 dtrace_dof_error(dof
, "invalid relocation type");
13179 r
= (dof_relodesc_t
*)((uintptr_t)r
+ rs
->dofs_entsize
);
13184 #endif /* __APPLE__ */
13187 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
13188 * header: it should be at the front of a memory region that is at least
13189 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
13190 * size. It need not be validated in any other way.
13193 dtrace_dof_slurp(dof_hdr_t
*dof
, dtrace_vstate_t
*vstate
, cred_t
*cr
,
13194 dtrace_enabling_t
**enabp
, uint64_t ubase
, int noprobes
)
13196 #pragma unused(ubase) /* __APPLE__ */
13197 uint64_t len
= dof
->dofh_loadsz
, seclen
;
13198 uintptr_t daddr
= (uintptr_t)dof
;
13199 dtrace_ecbdesc_t
*ep
;
13200 dtrace_enabling_t
*enab
;
13203 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13204 ASSERT(dof
->dofh_loadsz
>= sizeof (dof_hdr_t
));
13207 * Check the DOF header identification bytes. In addition to checking
13208 * valid settings, we also verify that unused bits/bytes are zeroed so
13209 * we can use them later without fear of regressing existing binaries.
13211 if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
],
13212 DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) {
13213 dtrace_dof_error(dof
, "DOF magic string mismatch");
13217 if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32
&&
13218 dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) {
13219 dtrace_dof_error(dof
, "DOF has invalid data model");
13223 if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) {
13224 dtrace_dof_error(dof
, "DOF encoding mismatch");
13228 #if !defined(__APPLE__)
13229 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
13230 dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_2
) {
13231 dtrace_dof_error(dof
, "DOF version mismatch");
13236 * We only support DOF_VERSION_3 for now.
13238 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) {
13239 dtrace_dof_error(dof
, "DOF version mismatch");
13244 if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) {
13245 dtrace_dof_error(dof
, "DOF uses unsupported instruction set");
13249 if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) {
13250 dtrace_dof_error(dof
, "DOF uses too many integer registers");
13254 if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) {
13255 dtrace_dof_error(dof
, "DOF uses too many tuple registers");
13259 for (i
= DOF_ID_PAD
; i
< DOF_ID_SIZE
; i
++) {
13260 if (dof
->dofh_ident
[i
] != 0) {
13261 dtrace_dof_error(dof
, "DOF has invalid ident byte set");
13266 if (dof
->dofh_flags
& ~DOF_FL_VALID
) {
13267 dtrace_dof_error(dof
, "DOF has invalid flag bits set");
13271 if (dof
->dofh_secsize
== 0) {
13272 dtrace_dof_error(dof
, "zero section header size");
13277 * Check that the section headers don't exceed the amount of DOF
13278 * data. Note that we cast the section size and number of sections
13279 * to uint64_t's to prevent possible overflow in the multiplication.
13281 seclen
= (uint64_t)dof
->dofh_secnum
* (uint64_t)dof
->dofh_secsize
;
13283 if (dof
->dofh_secoff
> len
|| seclen
> len
||
13284 dof
->dofh_secoff
+ seclen
> len
) {
13285 dtrace_dof_error(dof
, "truncated section headers");
13289 if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) {
13290 dtrace_dof_error(dof
, "misaligned section headers");
13294 if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) {
13295 dtrace_dof_error(dof
, "misaligned section size");
13300 * Take an initial pass through the section headers to be sure that
13301 * the headers don't have stray offsets. If the 'noprobes' flag is
13302 * set, do not permit sections relating to providers, probes, or args.
13304 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13305 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
13306 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13309 switch (sec
->dofs_type
) {
13310 case DOF_SECT_PROVIDER
:
13311 case DOF_SECT_PROBES
:
13312 case DOF_SECT_PRARGS
:
13313 case DOF_SECT_PROFFS
:
13314 dtrace_dof_error(dof
, "illegal sections "
13320 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
13321 continue; /* just ignore non-loadable sections */
13323 if (sec
->dofs_align
& (sec
->dofs_align
- 1)) {
13324 dtrace_dof_error(dof
, "bad section alignment");
13328 if (sec
->dofs_offset
& (sec
->dofs_align
- 1)) {
13329 dtrace_dof_error(dof
, "misaligned section");
13333 if (sec
->dofs_offset
> len
|| sec
->dofs_size
> len
||
13334 sec
->dofs_offset
+ sec
->dofs_size
> len
) {
13335 dtrace_dof_error(dof
, "corrupt section header");
13339 if (sec
->dofs_type
== DOF_SECT_STRTAB
&& *((char *)daddr
+
13340 sec
->dofs_offset
+ sec
->dofs_size
- 1) != '\0') {
13341 dtrace_dof_error(dof
, "non-terminating string table");
13346 #if !defined(__APPLE__)
13348 * Take a second pass through the sections and locate and perform any
13349 * relocations that are present. We do this after the first pass to
13350 * be sure that all sections have had their headers validated.
13352 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13353 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
13354 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13356 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
13357 continue; /* skip sections that are not loadable */
13359 switch (sec
->dofs_type
) {
13360 case DOF_SECT_URELHDR
:
13361 if (dtrace_dof_relocate(dof
, sec
, ubase
) != 0)
13368 * APPLE NOTE: We have no relocation to perform. All dof values are
13369 * relative offsets.
13371 #endif /* __APPLE__ */
13373 if ((enab
= *enabp
) == NULL
)
13374 enab
= *enabp
= dtrace_enabling_create(vstate
);
13376 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13377 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
13378 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13380 if (sec
->dofs_type
!= DOF_SECT_ECBDESC
)
13383 #if !defined(__APPLE__)
13384 if ((ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
)) == NULL
) {
13385 dtrace_enabling_destroy(enab
);
13390 /* Note: Defend against gcc 4.0 botch on x86 (not all paths out of inlined dtrace_dof_ecbdesc
13391 are checked for the NULL return value.) */
13392 ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
);
13394 dtrace_enabling_destroy(enab
);
13398 #endif /* __APPLE__ */
13400 dtrace_enabling_add(enab
, ep
);
13407 * Process DOF for any options. This routine assumes that the DOF has been
13408 * at least processed by dtrace_dof_slurp().
13411 dtrace_dof_options(dof_hdr_t
*dof
, dtrace_state_t
*state
)
13413 #if !defined(__APPLE__) /* Quiet compiler warnings */
13418 #endif /* __APPLE__ */
13421 dof_optdesc_t
*desc
;
13423 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
13424 dof_sec_t
*sec
= (dof_sec_t
*)((uintptr_t)dof
+
13425 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
13427 if (sec
->dofs_type
!= DOF_SECT_OPTDESC
)
13430 if (sec
->dofs_align
!= sizeof (uint64_t)) {
13431 dtrace_dof_error(dof
, "bad alignment in "
13432 "option description");
13436 if ((entsize
= sec
->dofs_entsize
) == 0) {
13437 dtrace_dof_error(dof
, "zeroed option entry size");
13441 if (entsize
< sizeof (dof_optdesc_t
)) {
13442 dtrace_dof_error(dof
, "bad option entry size");
13446 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= entsize
) {
13447 desc
= (dof_optdesc_t
*)((uintptr_t)dof
+
13448 (uintptr_t)sec
->dofs_offset
+ offs
);
13450 if (desc
->dofo_strtab
!= DOF_SECIDX_NONE
) {
13451 dtrace_dof_error(dof
, "non-zero option string");
13455 #if !defined(__APPLE__) /* Quiet compiler warnings */
13456 if (desc
->dofo_value
== DTRACEOPT_UNSET
) {
13458 if (desc
->dofo_value
== (uint64_t)DTRACEOPT_UNSET
) {
13459 #endif /* __APPLE __ */
13460 dtrace_dof_error(dof
, "unset option");
13464 if ((rval
= dtrace_state_option(state
,
13465 desc
->dofo_option
, desc
->dofo_value
)) != 0) {
13466 dtrace_dof_error(dof
, "rejected option");
13476 * DTrace Consumer State Functions
13478 #if defined(__APPLE__) /* Quiet compiler warning. */
13480 #endif /* __APPLE__ */
13482 dtrace_dstate_init(dtrace_dstate_t
*dstate
, size_t size
)
13484 size_t hashsize
, maxper
, min_size
, chunksize
= dstate
->dtds_chunksize
;
13487 dtrace_dynvar_t
*dvar
, *next
, *start
;
13488 #if !defined(__APPLE__) /* Quiet compiler warning */
13492 #endif /* __APPLE__ */
13494 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13495 ASSERT(dstate
->dtds_base
== NULL
&& dstate
->dtds_percpu
== NULL
);
13497 bzero(dstate
, sizeof (dtrace_dstate_t
));
13499 if ((dstate
->dtds_chunksize
= chunksize
) == 0)
13500 dstate
->dtds_chunksize
= DTRACE_DYNVAR_CHUNKSIZE
;
13502 if (size
< (min_size
= dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
)))
13505 if ((base
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
13508 dstate
->dtds_size
= size
;
13509 dstate
->dtds_base
= base
;
13510 dstate
->dtds_percpu
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
);
13511 bzero(dstate
->dtds_percpu
, (int)NCPU
* sizeof (dtrace_dstate_percpu_t
));
13513 hashsize
= size
/ (dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
));
13515 if (hashsize
!= 1 && (hashsize
& 1))
13518 dstate
->dtds_hashsize
= hashsize
;
13519 dstate
->dtds_hash
= dstate
->dtds_base
;
13522 * Set all of our hash buckets to point to the single sink, and (if
13523 * it hasn't already been set), set the sink's hash value to be the
13524 * sink sentinel value. The sink is needed for dynamic variable
13525 * lookups to know that they have iterated over an entire, valid hash
13528 for (i
= 0; i
< hashsize
; i
++)
13529 dstate
->dtds_hash
[i
].dtdh_chain
= &dtrace_dynhash_sink
;
13531 if (dtrace_dynhash_sink
.dtdv_hashval
!= DTRACE_DYNHASH_SINK
)
13532 dtrace_dynhash_sink
.dtdv_hashval
= DTRACE_DYNHASH_SINK
;
13535 * Determine number of active CPUs. Divide free list evenly among
13538 start
= (dtrace_dynvar_t
*)
13539 ((uintptr_t)base
+ hashsize
* sizeof (dtrace_dynhash_t
));
13540 limit
= (uintptr_t)base
+ size
;
13542 maxper
= (limit
- (uintptr_t)start
) / (int)NCPU
;
13543 maxper
= (maxper
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
;
13545 for (i
= 0; i
< NCPU
; i
++) {
13546 dstate
->dtds_percpu
[i
].dtdsc_free
= dvar
= start
;
13549 * If we don't even have enough chunks to make it once through
13550 * NCPUs, we're just going to allocate everything to the first
13551 * CPU. And if we're on the last CPU, we're going to allocate
13552 * whatever is left over. In either case, we set the limit to
13553 * be the limit of the dynamic variable space.
13555 if (maxper
== 0 || i
== NCPU
- 1) {
13556 limit
= (uintptr_t)base
+ size
;
13559 limit
= (uintptr_t)start
+ maxper
;
13560 start
= (dtrace_dynvar_t
*)limit
;
13563 ASSERT(limit
<= (uintptr_t)base
+ size
);
13566 next
= (dtrace_dynvar_t
*)((uintptr_t)dvar
+
13567 dstate
->dtds_chunksize
);
13569 if ((uintptr_t)next
+ dstate
->dtds_chunksize
>= limit
)
13572 dvar
->dtdv_next
= next
;
13583 #if defined(__APPLE__) /* Quiet compiler warning. */
13585 #endif /* __APPLE__ */
13587 dtrace_dstate_fini(dtrace_dstate_t
*dstate
)
13589 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
13591 if (dstate
->dtds_base
== NULL
)
13594 kmem_free(dstate
->dtds_base
, dstate
->dtds_size
);
13595 kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
);
13599 dtrace_vstate_fini(dtrace_vstate_t
*vstate
)
13602 * Logical XOR, where are you?
13604 ASSERT((vstate
->dtvs_nglobals
== 0) ^ (vstate
->dtvs_globals
!= NULL
));
13606 if (vstate
->dtvs_nglobals
> 0) {
13607 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals
*
13608 sizeof (dtrace_statvar_t
*));
13611 if (vstate
->dtvs_ntlocals
> 0) {
13612 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals
*
13613 sizeof (dtrace_difv_t
));
13616 ASSERT((vstate
->dtvs_nlocals
== 0) ^ (vstate
->dtvs_locals
!= NULL
));
13618 if (vstate
->dtvs_nlocals
> 0) {
13619 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals
*
13620 sizeof (dtrace_statvar_t
*));
13625 dtrace_state_clean(dtrace_state_t
*state
)
13627 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
)
13630 dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
);
13631 dtrace_speculation_clean(state
);
13635 dtrace_state_deadman(dtrace_state_t
*state
)
13641 now
= dtrace_gethrtime();
13643 if (state
!= dtrace_anon
.dta_state
&&
13644 now
- state
->dts_laststatus
>= dtrace_deadman_user
)
13648 * We must be sure that dts_alive never appears to be less than the
13649 * value upon entry to dtrace_state_deadman(), and because we lack a
13650 * dtrace_cas64(), we cannot store to it atomically. We thus instead
13651 * store INT64_MAX to it, followed by a memory barrier, followed by
13652 * the new value. This assures that dts_alive never appears to be
13653 * less than its true value, regardless of the order in which the
13654 * stores to the underlying storage are issued.
13656 state
->dts_alive
= INT64_MAX
;
13657 dtrace_membar_producer();
13658 state
->dts_alive
= now
;
13661 #if !defined(__APPLE__)
13663 dtrace_state_create(dev_t
*devp
, cred_t
*cr
)
13666 dtrace_state_create(dev_t
*devp
, cred_t
*cr
, dtrace_state_t
**new_state
)
13667 #endif /* __APPLE__ */
13672 dtrace_state_t
*state
;
13673 dtrace_optval_t
*opt
;
13674 int bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
), i
;
13676 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13677 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
13679 #if !defined(__APPLE__)
13680 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
13681 VM_BESTFIT
| VM_SLEEP
);
13683 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
13684 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
13688 /* Cause restart */
13692 * Darwin's DEVFS layer acquired the minor number for this "device" when it called
13693 * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number
13694 * (next unused according to vmem_alloc()) and then immediately put the number back in play
13695 * (by calling vmem_free()). Now that minor number is being used for an open, so committing it
13696 * to use. The following vmem_alloc() must deliver that same minor number. FIXME.
13699 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
13700 VM_BESTFIT
| VM_SLEEP
);
13702 if (NULL
!= devp
) {
13703 ASSERT(getminor(*devp
) == minor
);
13704 if (getminor(*devp
) != minor
) {
13705 printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n",
13706 getminor(*devp
), minor
);
13707 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
13708 return (ERESTART
); /* can't reacquire */
13711 /* NULL==devp iff "Anonymous state" (see dtrace_anon_property),
13712 * so just vend the minor device number here de novo since no "open" has occurred. */
13715 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
13716 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
13717 return (EAGAIN
); /* temporary resource shortage */
13720 #endif /* __APPLE__ */
13722 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
13723 state
->dts_epid
= DTRACE_EPIDNONE
+ 1;
13725 (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
);
13726 state
->dts_aggid_arena
= vmem_create(c
, (void *)1, UINT32_MAX
, 1,
13727 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
13729 if (devp
!= NULL
) {
13730 major
= getemajor(*devp
);
13732 major
= ddi_driver_major(dtrace_devi
);
13735 state
->dts_dev
= makedevice(major
, minor
);
13738 *devp
= state
->dts_dev
;
13741 * We allocate NCPU buffers. On the one hand, this can be quite
13742 * a bit of memory per instance (nearly 36K on a Starcat). On the
13743 * other hand, it saves an additional memory reference in the probe
13746 state
->dts_buffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
13747 state
->dts_aggbuffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
13748 state
->dts_cleaner
= CYCLIC_NONE
;
13749 state
->dts_deadman
= CYCLIC_NONE
;
13750 state
->dts_vstate
.dtvs_state
= state
;
13752 for (i
= 0; i
< DTRACEOPT_MAX
; i
++)
13753 state
->dts_options
[i
] = DTRACEOPT_UNSET
;
13756 * Set the default options.
13758 opt
= state
->dts_options
;
13759 opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
;
13760 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
;
13761 opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
;
13762 opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
;
13763 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
;
13764 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
;
13765 opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
;
13766 opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
;
13767 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
;
13768 opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
;
13769 opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
;
13770 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
;
13771 opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
;
13772 opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
;
13774 state
->dts_activity
= DTRACE_ACTIVITY_INACTIVE
;
13777 * Depending on the user credentials, we set flag bits which alter probe
13778 * visibility or the amount of destructiveness allowed. In the case of
13779 * actual anonymous tracing, or the possession of all privileges, all of
13780 * the normal checks are bypassed.
13782 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
13783 state
->dts_cred
.dcr_visible
= DTRACE_CRV_ALL
;
13784 state
->dts_cred
.dcr_action
= DTRACE_CRA_ALL
;
13787 * Set up the credentials for this instantiation. We take a
13788 * hold on the credential to prevent it from disappearing on
13789 * us; this in turn prevents the zone_t referenced by this
13790 * credential from disappearing. This means that we can
13791 * examine the credential and the zone from probe context.
13794 state
->dts_cred
.dcr_cred
= cr
;
13797 * CRA_PROC means "we have *some* privilege for dtrace" and
13798 * unlocks the use of variables like pid, zonename, etc.
13800 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) ||
13801 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
13802 state
->dts_cred
.dcr_action
|= DTRACE_CRA_PROC
;
13806 * dtrace_user allows use of syscall and profile providers.
13807 * If the user also has proc_owner and/or proc_zone, we
13808 * extend the scope to include additional visibility and
13809 * destructive power.
13811 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) {
13812 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) {
13813 state
->dts_cred
.dcr_visible
|=
13814 DTRACE_CRV_ALLPROC
;
13816 state
->dts_cred
.dcr_action
|=
13817 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
13820 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) {
13821 state
->dts_cred
.dcr_visible
|=
13822 DTRACE_CRV_ALLZONE
;
13824 state
->dts_cred
.dcr_action
|=
13825 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
13829 * If we have all privs in whatever zone this is,
13830 * we can do destructive things to processes which
13831 * have altered credentials.
13833 #if !defined(__APPLE__)
13834 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
13835 cr
->cr_zone
->zone_privset
)) {
13836 state
->dts_cred
.dcr_action
|=
13837 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
13840 /* Darwin doesn't do zones. */
13841 state
->dts_cred
.dcr_action
|=
13842 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
13843 #endif /* __APPLE__ */
13847 * Holding the dtrace_kernel privilege also implies that
13848 * the user has the dtrace_user privilege from a visibility
13849 * perspective. But without further privileges, some
13850 * destructive actions are not available.
13852 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) {
13854 * Make all probes in all zones visible. However,
13855 * this doesn't mean that all actions become available
13858 state
->dts_cred
.dcr_visible
|= DTRACE_CRV_KERNEL
|
13859 DTRACE_CRV_ALLPROC
| DTRACE_CRV_ALLZONE
;
13861 state
->dts_cred
.dcr_action
|= DTRACE_CRA_KERNEL
|
13864 * Holding proc_owner means that destructive actions
13865 * for *this* zone are allowed.
13867 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
13868 state
->dts_cred
.dcr_action
|=
13869 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
13872 * Holding proc_zone means that destructive actions
13873 * for this user/group ID in all zones is allowed.
13875 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
13876 state
->dts_cred
.dcr_action
|=
13877 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
13880 * If we have all privs in whatever zone this is,
13881 * we can do destructive things to processes which
13882 * have altered credentials.
13884 #if !defined(__APPLE__)
13885 if (priv_isequalset(priv_getset(cr
, PRIV_EFFECTIVE
),
13886 cr
->cr_zone
->zone_privset
)) {
13887 state
->dts_cred
.dcr_action
|=
13888 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
13891 /* Darwin doesn't do zones. */
13892 state
->dts_cred
.dcr_action
|=
13893 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
13894 #endif /* __APPLE__ */
13898 * Holding the dtrace_proc privilege gives control over fasttrap
13899 * and pid providers. We need to grant wider destructive
13900 * privileges in the event that the user has proc_owner and/or
13903 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
13904 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
13905 state
->dts_cred
.dcr_action
|=
13906 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
13908 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
13909 state
->dts_cred
.dcr_action
|=
13910 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
13914 #if !defined(__APPLE__)
13917 *new_state
= state
;
13918 return(0); /* Success */
13919 #endif /* __APPLE__ */
13923 dtrace_state_buffer(dtrace_state_t
*state
, dtrace_buffer_t
*buf
, int which
)
13925 dtrace_optval_t
*opt
= state
->dts_options
, size
;
13926 processorid_t cpu
= 0;
13927 int flags
= 0, rval
;
13929 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13930 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
13931 ASSERT(which
< DTRACEOPT_MAX
);
13932 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
||
13933 (state
== dtrace_anon
.dta_state
&&
13934 state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
));
13936 if (opt
[which
] == DTRACEOPT_UNSET
|| opt
[which
] == 0)
13939 if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
)
13940 cpu
= opt
[DTRACEOPT_CPU
];
13942 if (which
== DTRACEOPT_SPECSIZE
)
13943 flags
|= DTRACEBUF_NOSWITCH
;
13945 if (which
== DTRACEOPT_BUFSIZE
) {
13946 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
)
13947 flags
|= DTRACEBUF_RING
;
13949 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
)
13950 flags
|= DTRACEBUF_FILL
;
13952 if (state
!= dtrace_anon
.dta_state
||
13953 state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
13954 flags
|= DTRACEBUF_INACTIVE
;
13957 #if !defined(__APPLE__) /* Quiet compiler warning */
13958 for (size
= opt
[which
]; size
>= sizeof (uint64_t); size
>>= 1) {
13960 for (size
= opt
[which
]; (size_t)size
>= sizeof (uint64_t); size
>>= 1) {
13961 #endif /* __APPLE__ */
13963 * The size must be 8-byte aligned. If the size is not 8-byte
13964 * aligned, drop it down by the difference.
13966 if (size
& (sizeof (uint64_t) - 1))
13967 size
-= size
& (sizeof (uint64_t) - 1);
13969 if (size
< state
->dts_reserve
) {
13971 * Buffers always must be large enough to accommodate
13972 * their prereserved space. We return E2BIG instead
13973 * of ENOMEM in this case to allow for user-level
13974 * software to differentiate the cases.
13979 rval
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
);
13981 if (rval
!= ENOMEM
) {
13986 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
13994 dtrace_state_buffers(dtrace_state_t
*state
)
13996 dtrace_speculation_t
*spec
= state
->dts_speculations
;
13999 if ((rval
= dtrace_state_buffer(state
, state
->dts_buffer
,
14000 DTRACEOPT_BUFSIZE
)) != 0)
14003 if ((rval
= dtrace_state_buffer(state
, state
->dts_aggbuffer
,
14004 DTRACEOPT_AGGSIZE
)) != 0)
14007 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
14008 if ((rval
= dtrace_state_buffer(state
,
14009 spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0)
14017 dtrace_state_prereserve(dtrace_state_t
*state
)
14020 dtrace_probe_t
*probe
;
14022 state
->dts_reserve
= 0;
14024 if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
)
14028 * If our buffer policy is a "fill" buffer policy, we need to set the
14029 * prereserved space to be the space required by the END probes.
14031 probe
= dtrace_probes
[dtrace_probeid_end
- 1];
14032 ASSERT(probe
!= NULL
);
14034 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
14035 if (ecb
->dte_state
!= state
)
14038 state
->dts_reserve
+= ecb
->dte_needed
+ ecb
->dte_alignment
;
14043 dtrace_state_go(dtrace_state_t
*state
, processorid_t
*cpu
)
14045 dtrace_optval_t
*opt
= state
->dts_options
, sz
, nspec
;
14046 dtrace_speculation_t
*spec
;
14047 dtrace_buffer_t
*buf
;
14048 cyc_handler_t hdlr
;
14050 int rval
= 0, i
, bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
);
14051 dtrace_icookie_t cookie
;
14053 lck_mtx_lock(&cpu_lock
);
14054 lck_mtx_lock(&dtrace_lock
);
14056 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
14062 * Before we can perform any checks, we must prime all of the
14063 * retained enablings that correspond to this state.
14065 dtrace_enabling_prime(state
);
14067 if (state
->dts_destructive
&& !state
->dts_cred
.dcr_destructive
) {
14072 dtrace_state_prereserve(state
);
14075 * Now we want to do is try to allocate our speculations.
14076 * We do not automatically resize the number of speculations; if
14077 * this fails, we will fail the operation.
14079 nspec
= opt
[DTRACEOPT_NSPEC
];
14080 ASSERT(nspec
!= DTRACEOPT_UNSET
);
14082 if (nspec
> INT_MAX
) {
14087 spec
= kmem_zalloc(nspec
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
);
14089 if (spec
== NULL
) {
14094 state
->dts_speculations
= spec
;
14095 state
->dts_nspeculations
= (int)nspec
;
14097 for (i
= 0; i
< nspec
; i
++) {
14098 if ((buf
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) {
14103 spec
[i
].dtsp_buffer
= buf
;
14106 if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) {
14107 if (dtrace_anon
.dta_state
== NULL
) {
14112 if (state
->dts_necbs
!= 0) {
14117 state
->dts_anon
= dtrace_anon_grab();
14118 ASSERT(state
->dts_anon
!= NULL
);
14119 state
= state
->dts_anon
;
14122 * We want "grabanon" to be set in the grabbed state, so we'll
14123 * copy that option value from the grabbing state into the
14126 state
->dts_options
[DTRACEOPT_GRABANON
] =
14127 opt
[DTRACEOPT_GRABANON
];
14129 *cpu
= dtrace_anon
.dta_beganon
;
14132 * If the anonymous state is active (as it almost certainly
14133 * is if the anonymous enabling ultimately matched anything),
14134 * we don't allow any further option processing -- but we
14135 * don't return failure.
14137 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
14141 if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET
&&
14142 opt
[DTRACEOPT_AGGSIZE
] != 0) {
14143 if (state
->dts_aggregations
== NULL
) {
14145 * We're not going to create an aggregation buffer
14146 * because we don't have any ECBs that contain
14147 * aggregations -- set this option to 0.
14149 opt
[DTRACEOPT_AGGSIZE
] = 0;
14152 * If we have an aggregation buffer, we must also have
14153 * a buffer to use as scratch.
14155 #if !defined(__APPLE__) /* Quiet compiler warning */
14156 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
14157 opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
14158 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
14161 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
14162 (size_t)opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
14163 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
14165 #endif /* __APPLE__ */
14169 if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET
&&
14170 opt
[DTRACEOPT_SPECSIZE
] != 0) {
14171 if (!state
->dts_speculates
) {
14173 * We're not going to create speculation buffers
14174 * because we don't have any ECBs that actually
14175 * speculate -- set the speculation size to 0.
14177 opt
[DTRACEOPT_SPECSIZE
] = 0;
14182 * The bare minimum size for any buffer that we're actually going to
14183 * do anything to is sizeof (uint64_t).
14185 sz
= sizeof (uint64_t);
14187 if ((state
->dts_needed
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) ||
14188 (state
->dts_speculates
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) ||
14189 (state
->dts_aggregations
!= NULL
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) {
14191 * A buffer size has been explicitly set to 0 (or to a size
14192 * that will be adjusted to 0) and we need the space -- we
14193 * need to return failure. We return ENOSPC to differentiate
14194 * it from failing to allocate a buffer due to failure to meet
14195 * the reserve (for which we return E2BIG).
14201 if ((rval
= dtrace_state_buffers(state
)) != 0)
14204 if ((sz
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
)
14205 sz
= dtrace_dstate_defsize
;
14208 rval
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
);
14213 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
14215 } while (sz
>>= 1);
14217 opt
[DTRACEOPT_DYNVARSIZE
] = sz
;
14222 if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
)
14223 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
;
14225 if (opt
[DTRACEOPT_CLEANRATE
] == 0)
14226 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
14228 if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
)
14229 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
;
14231 if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
)
14232 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
14234 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_clean
;
14235 hdlr
.cyh_arg
= state
;
14236 hdlr
.cyh_level
= CY_LOW_LEVEL
;
14239 when
.cyt_interval
= opt
[DTRACEOPT_CLEANRATE
];
14241 state
->dts_cleaner
= cyclic_add(&hdlr
, &when
);
14243 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_deadman
;
14244 hdlr
.cyh_arg
= state
;
14245 hdlr
.cyh_level
= CY_LOW_LEVEL
;
14248 when
.cyt_interval
= dtrace_deadman_interval
;
14250 state
->dts_alive
= state
->dts_laststatus
= dtrace_gethrtime();
14251 state
->dts_deadman
= cyclic_add(&hdlr
, &when
);
14253 state
->dts_activity
= DTRACE_ACTIVITY_WARMUP
;
14256 * Now it's time to actually fire the BEGIN probe. We need to disable
14257 * interrupts here both to record the CPU on which we fired the BEGIN
14258 * probe (the data from this CPU will be processed first at user
14259 * level) and to manually activate the buffer for this CPU.
14261 cookie
= dtrace_interrupt_disable();
14262 *cpu
= CPU
->cpu_id
;
14263 ASSERT(state
->dts_buffer
[*cpu
].dtb_flags
& DTRACEBUF_INACTIVE
);
14264 state
->dts_buffer
[*cpu
].dtb_flags
&= ~DTRACEBUF_INACTIVE
;
14266 dtrace_probe(dtrace_probeid_begin
,
14267 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
14268 dtrace_interrupt_enable(cookie
);
14270 * We may have had an exit action from a BEGIN probe; only change our
14271 * state to ACTIVE if we're still in WARMUP.
14273 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
||
14274 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
);
14276 if (state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
)
14277 state
->dts_activity
= DTRACE_ACTIVITY_ACTIVE
;
14280 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
14281 * want each CPU to transition its principal buffer out of the
14282 * INACTIVE state. Doing this assures that no CPU will suddenly begin
14283 * processing an ECB halfway down a probe's ECB chain; all CPUs will
14284 * atomically transition from processing none of a state's ECBs to
14285 * processing all of them.
14287 dtrace_xcall(DTRACE_CPUALL
,
14288 (dtrace_xcall_t
)dtrace_buffer_activate
, state
);
14292 dtrace_buffer_free(state
->dts_buffer
);
14293 dtrace_buffer_free(state
->dts_aggbuffer
);
14295 if ((nspec
= state
->dts_nspeculations
) == 0) {
14296 ASSERT(state
->dts_speculations
== NULL
);
14300 spec
= state
->dts_speculations
;
14301 ASSERT(spec
!= NULL
);
14303 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
14304 if ((buf
= spec
[i
].dtsp_buffer
) == NULL
)
14307 dtrace_buffer_free(buf
);
14308 kmem_free(buf
, bufsize
);
14311 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
14312 state
->dts_nspeculations
= 0;
14313 state
->dts_speculations
= NULL
;
14316 lck_mtx_unlock(&dtrace_lock
);
14317 lck_mtx_unlock(&cpu_lock
);
14323 dtrace_state_stop(dtrace_state_t
*state
, processorid_t
*cpu
)
14325 dtrace_icookie_t cookie
;
14327 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14329 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
&&
14330 state
->dts_activity
!= DTRACE_ACTIVITY_DRAINING
)
14334 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
14335 * to be sure that every CPU has seen it. See below for the details
14336 * on why this is done.
14338 state
->dts_activity
= DTRACE_ACTIVITY_DRAINING
;
14342 * By this point, it is impossible for any CPU to be still processing
14343 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
14344 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
14345 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
14346 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
14347 * iff we're in the END probe.
14349 state
->dts_activity
= DTRACE_ACTIVITY_COOLDOWN
;
14351 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_COOLDOWN
);
14354 * Finally, we can release the reserve and call the END probe. We
14355 * disable interrupts across calling the END probe to allow us to
14356 * return the CPU on which we actually called the END probe. This
14357 * allows user-land to be sure that this CPU's principal buffer is
14360 state
->dts_reserve
= 0;
14362 cookie
= dtrace_interrupt_disable();
14363 *cpu
= CPU
->cpu_id
;
14364 dtrace_probe(dtrace_probeid_end
,
14365 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
14366 dtrace_interrupt_enable(cookie
);
14368 state
->dts_activity
= DTRACE_ACTIVITY_STOPPED
;
14375 dtrace_state_option(dtrace_state_t
*state
, dtrace_optid_t option
,
14376 dtrace_optval_t val
)
14378 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14380 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
14383 if (option
>= DTRACEOPT_MAX
)
14386 if (option
!= DTRACEOPT_CPU
&& val
< 0)
14390 case DTRACEOPT_DESTRUCTIVE
:
14391 if (dtrace_destructive_disallow
)
14394 state
->dts_cred
.dcr_destructive
= 1;
14397 case DTRACEOPT_BUFSIZE
:
14398 case DTRACEOPT_DYNVARSIZE
:
14399 case DTRACEOPT_AGGSIZE
:
14400 case DTRACEOPT_SPECSIZE
:
14401 case DTRACEOPT_STRSIZE
:
14405 if (val
>= LONG_MAX
) {
14407 * If this is an otherwise negative value, set it to
14408 * the highest multiple of 128m less than LONG_MAX.
14409 * Technically, we're adjusting the size without
14410 * regard to the buffer resizing policy, but in fact,
14411 * this has no effect -- if we set the buffer size to
14412 * ~LONG_MAX and the buffer policy is ultimately set to
14413 * be "manual", the buffer allocation is guaranteed to
14414 * fail, if only because the allocation requires two
14415 * buffers. (We set the the size to the highest
14416 * multiple of 128m because it ensures that the size
14417 * will remain a multiple of a megabyte when
14418 * repeatedly halved -- all the way down to 15m.)
14420 val
= LONG_MAX
- (1 << 27) + 1;
14424 state
->dts_options
[option
] = val
;
14430 dtrace_state_destroy(dtrace_state_t
*state
)
14433 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
14434 minor_t minor
= getminor(state
->dts_dev
);
14435 int i
, bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
);
14436 dtrace_speculation_t
*spec
= state
->dts_speculations
;
14437 int nspec
= state
->dts_nspeculations
;
14440 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14441 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14444 * First, retract any retained enablings for this state.
14446 dtrace_enabling_retract(state
);
14447 ASSERT(state
->dts_nretained
== 0);
14449 if (state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
||
14450 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
) {
14452 * We have managed to come into dtrace_state_destroy() on a
14453 * hot enabling -- almost certainly because of a disorderly
14454 * shutdown of a consumer. (That is, a consumer that is
14455 * exiting without having called dtrace_stop().) In this case,
14456 * we're going to set our activity to be KILLED, and then
14457 * issue a sync to be sure that everyone is out of probe
14458 * context before we start blowing away ECBs.
14460 state
->dts_activity
= DTRACE_ACTIVITY_KILLED
;
14465 * Release the credential hold we took in dtrace_state_create().
14467 if (state
->dts_cred
.dcr_cred
!= NULL
)
14468 crfree(state
->dts_cred
.dcr_cred
);
14471 * Now we can safely disable and destroy any enabled probes. Because
14472 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
14473 * (especially if they're all enabled), we take two passes through the
14474 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
14475 * in the second we disable whatever is left over.
14477 for (match
= DTRACE_PRIV_KERNEL
; ; match
= 0) {
14478 for (i
= 0; i
< state
->dts_necbs
; i
++) {
14479 if ((ecb
= state
->dts_ecbs
[i
]) == NULL
)
14482 if (match
&& ecb
->dte_probe
!= NULL
) {
14483 dtrace_probe_t
*probe
= ecb
->dte_probe
;
14484 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
14486 if (!(prov
->dtpv_priv
.dtpp_flags
& match
))
14490 dtrace_ecb_disable(ecb
);
14491 dtrace_ecb_destroy(ecb
);
14499 * Before we free the buffers, perform one more sync to assure that
14500 * every CPU is out of probe context.
14504 dtrace_buffer_free(state
->dts_buffer
);
14505 dtrace_buffer_free(state
->dts_aggbuffer
);
14507 for (i
= 0; i
< nspec
; i
++)
14508 dtrace_buffer_free(spec
[i
].dtsp_buffer
);
14510 if (state
->dts_cleaner
!= CYCLIC_NONE
)
14511 cyclic_remove(state
->dts_cleaner
);
14513 if (state
->dts_deadman
!= CYCLIC_NONE
)
14514 cyclic_remove(state
->dts_deadman
);
14516 dtrace_dstate_fini(&vstate
->dtvs_dynvars
);
14517 dtrace_vstate_fini(vstate
);
14518 kmem_free(state
->dts_ecbs
, state
->dts_necbs
* sizeof (dtrace_ecb_t
*));
14520 if (state
->dts_aggregations
!= NULL
) {
14522 for (i
= 0; i
< state
->dts_naggregations
; i
++)
14523 ASSERT(state
->dts_aggregations
[i
] == NULL
);
14525 ASSERT(state
->dts_naggregations
> 0);
14526 kmem_free(state
->dts_aggregations
,
14527 state
->dts_naggregations
* sizeof (dtrace_aggregation_t
*));
14530 kmem_free(state
->dts_buffer
, bufsize
);
14531 kmem_free(state
->dts_aggbuffer
, bufsize
);
14533 for (i
= 0; i
< nspec
; i
++)
14534 kmem_free(spec
[i
].dtsp_buffer
, bufsize
);
14536 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
14538 dtrace_format_destroy(state
);
14540 vmem_destroy(state
->dts_aggid_arena
);
14541 ddi_soft_state_free(dtrace_softstate
, minor
);
14542 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
14546 * DTrace Anonymous Enabling Functions
14548 static dtrace_state_t
*
14549 dtrace_anon_grab(void)
14551 dtrace_state_t
*state
;
14553 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14555 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
14556 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
14560 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
14561 ASSERT(dtrace_retained
!= NULL
);
14563 dtrace_enabling_destroy(dtrace_anon
.dta_enabling
);
14564 dtrace_anon
.dta_enabling
= NULL
;
14565 dtrace_anon
.dta_state
= NULL
;
14571 dtrace_anon_property(void)
14574 dtrace_state_t
*state
;
14576 char c
[32]; /* enough for "dof-data-" + digits */
14578 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14579 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
14581 for (i
= 0; ; i
++) {
14582 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
);
14584 dtrace_err_verbose
= 1;
14586 if ((dof
= dtrace_dof_property(c
)) == NULL
) {
14587 dtrace_err_verbose
= 0;
14592 * We want to create anonymous state, so we need to transition
14593 * the kernel debugger to indicate that DTrace is active. If
14594 * this fails (e.g. because the debugger has modified text in
14595 * some way), we won't continue with the processing.
14597 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
14598 cmn_err(CE_NOTE
, "kernel debugger active; anonymous "
14599 "enabling ignored.");
14600 dtrace_dof_destroy(dof
);
14605 * If we haven't allocated an anonymous state, we'll do so now.
14607 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
14608 #if !defined(__APPLE__)
14609 state
= dtrace_state_create(NULL
, NULL
);
14610 dtrace_anon
.dta_state
= state
;
14611 if (state
== NULL
) {
14613 rv
= dtrace_state_create(NULL
, NULL
, &state
);
14614 dtrace_anon
.dta_state
= state
;
14615 if (rv
!= 0 || state
== NULL
) {
14616 #endif /* __APPLE__ */
14618 * This basically shouldn't happen: the only
14619 * failure mode from dtrace_state_create() is a
14620 * failure of ddi_soft_state_zalloc() that
14621 * itself should never happen. Still, the
14622 * interface allows for a failure mode, and
14623 * we want to fail as gracefully as possible:
14624 * we'll emit an error message and cease
14625 * processing anonymous state in this case.
14627 cmn_err(CE_WARN
, "failed to create "
14628 "anonymous state");
14629 dtrace_dof_destroy(dof
);
14634 rv
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(),
14635 &dtrace_anon
.dta_enabling
, 0, B_TRUE
);
14638 rv
= dtrace_dof_options(dof
, state
);
14640 dtrace_err_verbose
= 0;
14641 dtrace_dof_destroy(dof
);
14645 * This is malformed DOF; chuck any anonymous state
14648 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
14649 dtrace_state_destroy(state
);
14650 dtrace_anon
.dta_state
= NULL
;
14654 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
14657 if (dtrace_anon
.dta_enabling
!= NULL
) {
14661 * dtrace_enabling_retain() can only fail because we are
14662 * trying to retain more enablings than are allowed -- but
14663 * we only have one anonymous enabling, and we are guaranteed
14664 * to be allowed at least one retained enabling; we assert
14665 * that dtrace_enabling_retain() returns success.
14667 rval
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
);
14670 dtrace_enabling_dump(dtrace_anon
.dta_enabling
);
14675 * DTrace Helper Functions
14678 dtrace_helper_trace(dtrace_helper_action_t
*helper
,
14679 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
, int where
)
14681 #if !defined(__APPLE__) /* Quiet compiler warning */
14682 uint32_t size
, next
, nnext
, i
;
14684 uint32_t size
, next
, nnext
;
14686 #endif /* __APPLE__ */
14687 dtrace_helptrace_t
*ent
;
14688 uint16_t flags
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
14690 if (!dtrace_helptrace_enabled
)
14693 #if !defined(__APPLE__) /* Quiet compiler warning */
14694 ASSERT(vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
14696 ASSERT((uint32_t)vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
14697 #endif /* __APPLE__ */
14700 * What would a tracing framework be without its own tracing
14701 * framework? (Well, a hell of a lot simpler, for starters...)
14703 size
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals
*
14704 sizeof (uint64_t) - sizeof (uint64_t);
14707 * Iterate until we can allocate a slot in the trace buffer.
14710 next
= dtrace_helptrace_next
;
14712 if (next
+ size
< dtrace_helptrace_bufsize
) {
14713 nnext
= next
+ size
;
14717 } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
);
14720 * We have our slot; fill it in.
14725 ent
= (dtrace_helptrace_t
*)&dtrace_helptrace_buffer
[next
];
14726 ent
->dtht_helper
= helper
;
14727 ent
->dtht_where
= where
;
14728 ent
->dtht_nlocals
= vstate
->dtvs_nlocals
;
14730 ent
->dtht_fltoffs
= (mstate
->dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
14731 mstate
->dtms_fltoffs
: -1;
14732 ent
->dtht_fault
= DTRACE_FLAGS2FLT(flags
);
14733 ent
->dtht_illval
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
14735 for (i
= 0; i
< vstate
->dtvs_nlocals
; i
++) {
14736 dtrace_statvar_t
*svar
;
14738 if ((svar
= vstate
->dtvs_locals
[i
]) == NULL
)
14741 ASSERT(svar
->dtsv_size
>= (int)NCPU
* sizeof (uint64_t));
14742 ent
->dtht_locals
[i
] =
14743 ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
];
14748 dtrace_helper(int which
, dtrace_mstate_t
*mstate
,
14749 dtrace_state_t
*state
, uint64_t arg0
, uint64_t arg1
)
14751 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
14752 uint64_t sarg0
= mstate
->dtms_arg
[0];
14753 uint64_t sarg1
= mstate
->dtms_arg
[1];
14755 dtrace_helpers_t
*helpers
= curproc
->p_dtrace_helpers
;
14756 dtrace_helper_action_t
*helper
;
14757 dtrace_vstate_t
*vstate
;
14758 dtrace_difo_t
*pred
;
14759 int i
, trace
= dtrace_helptrace_enabled
;
14761 ASSERT(which
>= 0 && which
< DTRACE_NHELPER_ACTIONS
);
14763 if (helpers
== NULL
)
14766 if ((helper
= helpers
->dthps_actions
[which
]) == NULL
)
14769 vstate
= &helpers
->dthps_vstate
;
14770 mstate
->dtms_arg
[0] = arg0
;
14771 mstate
->dtms_arg
[1] = arg1
;
14774 * Now iterate over each helper. If its predicate evaluates to 'true',
14775 * we'll call the corresponding actions. Note that the below calls
14776 * to dtrace_dif_emulate() may set faults in machine state. This is
14777 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
14778 * the stored DIF offset with its own (which is the desired behavior).
14779 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
14780 * from machine state; this is okay, too.
14782 for (; helper
!= NULL
; helper
= helper
->dtha_next
) {
14783 if ((pred
= helper
->dtha_predicate
) != NULL
) {
14785 dtrace_helper_trace(helper
, mstate
, vstate
, 0);
14787 if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
))
14790 if (*flags
& CPU_DTRACE_FAULT
)
14794 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
14796 dtrace_helper_trace(helper
,
14797 mstate
, vstate
, i
+ 1);
14799 rval
= dtrace_dif_emulate(helper
->dtha_actions
[i
],
14800 mstate
, vstate
, state
);
14802 if (*flags
& CPU_DTRACE_FAULT
)
14808 dtrace_helper_trace(helper
, mstate
, vstate
,
14809 DTRACE_HELPTRACE_NEXT
);
14813 dtrace_helper_trace(helper
, mstate
, vstate
,
14814 DTRACE_HELPTRACE_DONE
);
14817 * Restore the arg0 that we saved upon entry.
14819 mstate
->dtms_arg
[0] = sarg0
;
14820 mstate
->dtms_arg
[1] = sarg1
;
14826 dtrace_helper_trace(helper
, mstate
, vstate
,
14827 DTRACE_HELPTRACE_ERR
);
14830 * Restore the arg0 that we saved upon entry.
14832 mstate
->dtms_arg
[0] = sarg0
;
14833 mstate
->dtms_arg
[1] = sarg1
;
14839 dtrace_helper_action_destroy(dtrace_helper_action_t
*helper
,
14840 dtrace_vstate_t
*vstate
)
14844 if (helper
->dtha_predicate
!= NULL
)
14845 dtrace_difo_release(helper
->dtha_predicate
, vstate
);
14847 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
14848 ASSERT(helper
->dtha_actions
[i
] != NULL
);
14849 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
);
14852 kmem_free(helper
->dtha_actions
,
14853 helper
->dtha_nactions
* sizeof (dtrace_difo_t
*));
14854 kmem_free(helper
, sizeof (dtrace_helper_action_t
));
14857 #if !defined(__APPLE__)
14859 dtrace_helper_destroygen(int gen
)
14861 proc_t
*p
= curproc
;
14864 dtrace_helper_destroygen(proc_t
* p
, int gen
)
14867 dtrace_helpers_t
*help
= p
->p_dtrace_helpers
;
14868 dtrace_vstate_t
*vstate
;
14869 #if !defined(__APPLE__) /* Quiet compiler warning */
14873 #endif /* __APPLE__ */
14875 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14877 if (help
== NULL
|| gen
> help
->dthps_generation
)
14880 vstate
= &help
->dthps_vstate
;
14882 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14883 dtrace_helper_action_t
*last
= NULL
, *h
, *next
;
14885 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
14886 next
= h
->dtha_next
;
14888 if (h
->dtha_generation
== gen
) {
14889 if (last
!= NULL
) {
14890 last
->dtha_next
= next
;
14892 help
->dthps_actions
[i
] = next
;
14895 dtrace_helper_action_destroy(h
, vstate
);
14903 * Interate until we've cleared out all helper providers with the
14904 * given generation number.
14907 dtrace_helper_provider_t
*prov
= NULL
;
14910 * Look for a helper provider with the right generation. We
14911 * have to start back at the beginning of the list each time
14912 * because we drop dtrace_lock. It's unlikely that we'll make
14913 * more than two passes.
14915 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14916 prov
= help
->dthps_provs
[i
];
14918 if (prov
->dthp_generation
== gen
)
14923 * If there were no matches, we're done.
14925 if (i
== help
->dthps_nprovs
)
14929 * Move the last helper provider into this slot.
14931 help
->dthps_nprovs
--;
14932 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
];
14933 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
;
14935 lck_mtx_unlock(&dtrace_lock
);
14938 * If we have a meta provider, remove this helper provider.
14940 lck_mtx_lock(&dtrace_meta_lock
);
14941 if (dtrace_meta_pid
!= NULL
) {
14942 ASSERT(dtrace_deferred_pid
== NULL
);
14943 dtrace_helper_provider_remove(&prov
->dthp_prov
,
14946 lck_mtx_unlock(&dtrace_meta_lock
);
14948 dtrace_helper_provider_destroy(prov
);
14950 lck_mtx_lock(&dtrace_lock
);
14957 dtrace_helper_validate(dtrace_helper_action_t
*helper
)
14962 if ((dp
= helper
->dtha_predicate
) != NULL
)
14963 err
+= dtrace_difo_validate_helper(dp
);
14965 for (i
= 0; i
< helper
->dtha_nactions
; i
++)
14966 err
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]);
14971 #if !defined(__APPLE__)
14973 dtrace_helper_action_add(int which
, dtrace_ecbdesc_t
*ep
)
14976 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t
*ep
)
14979 dtrace_helpers_t
*help
;
14980 dtrace_helper_action_t
*helper
, *last
;
14981 dtrace_actdesc_t
*act
;
14982 dtrace_vstate_t
*vstate
;
14983 dtrace_predicate_t
*pred
;
14984 int count
= 0, nactions
= 0, i
;
14986 if (which
< 0 || which
>= DTRACE_NHELPER_ACTIONS
)
14989 #if !defined(__APPLE__)
14990 help
= curproc
->p_dtrace_helpers
;
14992 help
= p
->p_dtrace_helpers
;
14994 last
= help
->dthps_actions
[which
];
14995 vstate
= &help
->dthps_vstate
;
14997 for (count
= 0; last
!= NULL
; last
= last
->dtha_next
) {
14999 if (last
->dtha_next
== NULL
)
15004 * If we already have dtrace_helper_actions_max helper actions for this
15005 * helper action type, we'll refuse to add a new one.
15007 if (count
>= dtrace_helper_actions_max
)
15010 helper
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
);
15011 helper
->dtha_generation
= help
->dthps_generation
;
15013 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) {
15014 ASSERT(pred
->dtp_difo
!= NULL
);
15015 dtrace_difo_hold(pred
->dtp_difo
);
15016 helper
->dtha_predicate
= pred
->dtp_difo
;
15019 for (act
= ep
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
15020 if (act
->dtad_kind
!= DTRACEACT_DIFEXPR
)
15023 if (act
->dtad_difo
== NULL
)
15029 helper
->dtha_actions
= kmem_zalloc(sizeof (dtrace_difo_t
*) *
15030 (helper
->dtha_nactions
= nactions
), KM_SLEEP
);
15032 for (act
= ep
->dted_action
, i
= 0; act
!= NULL
; act
= act
->dtad_next
) {
15033 dtrace_difo_hold(act
->dtad_difo
);
15034 helper
->dtha_actions
[i
++] = act
->dtad_difo
;
15037 if (!dtrace_helper_validate(helper
))
15040 if (last
== NULL
) {
15041 help
->dthps_actions
[which
] = helper
;
15043 last
->dtha_next
= helper
;
15046 #if !defined(__APPLE__) /* Quiet compiler warning */
15047 if (vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
15049 if ((uint32_t)vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
15050 #endif /* __APPLE__ */
15051 dtrace_helptrace_nlocals
= vstate
->dtvs_nlocals
;
15052 dtrace_helptrace_next
= 0;
15057 dtrace_helper_action_destroy(helper
, vstate
);
15062 dtrace_helper_provider_register(proc_t
*p
, dtrace_helpers_t
*help
,
15063 dof_helper_t
*dofhp
)
15065 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
15067 lck_mtx_lock(&dtrace_meta_lock
);
15068 lck_mtx_lock(&dtrace_lock
);
15070 if (!dtrace_attached() || dtrace_meta_pid
== NULL
) {
15072 * If the dtrace module is loaded but not attached, or if
15073 * there aren't isn't a meta provider registered to deal with
15074 * these provider descriptions, we need to postpone creating
15075 * the actual providers until later.
15078 if (help
->dthps_next
== NULL
&& help
->dthps_prev
== NULL
&&
15079 dtrace_deferred_pid
!= help
) {
15080 help
->dthps_deferred
= 1;
15081 help
->dthps_pid
= p
->p_pid
;
15082 help
->dthps_next
= dtrace_deferred_pid
;
15083 help
->dthps_prev
= NULL
;
15084 if (dtrace_deferred_pid
!= NULL
)
15085 dtrace_deferred_pid
->dthps_prev
= help
;
15086 dtrace_deferred_pid
= help
;
15089 lck_mtx_unlock(&dtrace_lock
);
15091 } else if (dofhp
!= NULL
) {
15093 * If the dtrace module is loaded and we have a particular
15094 * helper provider description, pass that off to the
15098 lck_mtx_unlock(&dtrace_lock
);
15100 dtrace_helper_provide(dofhp
, p
->p_pid
);
15104 * Otherwise, just pass all the helper provider descriptions
15105 * off to the meta provider.
15108 #if !defined(__APPLE__) /* Quiet compiler warning */
15112 #endif /* __APPLE__ */
15113 lck_mtx_unlock(&dtrace_lock
);
15115 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
15116 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
15121 lck_mtx_unlock(&dtrace_meta_lock
);
15124 #if !defined(__APPLE__)
15126 dtrace_helper_provider_add(dof_helper_t
*dofhp
, int gen
)
15129 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t
*dofhp
, int gen
)
15132 dtrace_helpers_t
*help
;
15133 dtrace_helper_provider_t
*hprov
, **tmp_provs
;
15134 uint_t tmp_maxprovs
, i
;
15136 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
15138 #if !defined(__APPLE__)
15139 help
= curproc
->p_dtrace_helpers
;
15141 help
= p
->p_dtrace_helpers
;
15143 ASSERT(help
!= NULL
);
15146 * If we already have dtrace_helper_providers_max helper providers,
15147 * we're refuse to add a new one.
15149 if (help
->dthps_nprovs
>= dtrace_helper_providers_max
)
15153 * Check to make sure this isn't a duplicate.
15155 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
15156 if (dofhp
->dofhp_addr
==
15157 help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
)
15161 hprov
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
);
15162 hprov
->dthp_prov
= *dofhp
;
15163 hprov
->dthp_ref
= 1;
15164 hprov
->dthp_generation
= gen
;
15167 * Allocate a bigger table for helper providers if it's already full.
15169 if (help
->dthps_maxprovs
== help
->dthps_nprovs
) {
15170 tmp_maxprovs
= help
->dthps_maxprovs
;
15171 tmp_provs
= help
->dthps_provs
;
15173 if (help
->dthps_maxprovs
== 0)
15174 help
->dthps_maxprovs
= 2;
15176 help
->dthps_maxprovs
*= 2;
15177 if (help
->dthps_maxprovs
> dtrace_helper_providers_max
)
15178 help
->dthps_maxprovs
= dtrace_helper_providers_max
;
15180 ASSERT(tmp_maxprovs
< help
->dthps_maxprovs
);
15182 help
->dthps_provs
= kmem_zalloc(help
->dthps_maxprovs
*
15183 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
15185 if (tmp_provs
!= NULL
) {
15186 bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs
*
15187 sizeof (dtrace_helper_provider_t
*));
15188 kmem_free(tmp_provs
, tmp_maxprovs
*
15189 sizeof (dtrace_helper_provider_t
*));
15193 help
->dthps_provs
[help
->dthps_nprovs
] = hprov
;
15194 help
->dthps_nprovs
++;
15200 dtrace_helper_provider_destroy(dtrace_helper_provider_t
*hprov
)
15202 lck_mtx_lock(&dtrace_lock
);
15204 if (--hprov
->dthp_ref
== 0) {
15206 lck_mtx_unlock(&dtrace_lock
);
15207 dof
= (dof_hdr_t
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
;
15208 dtrace_dof_destroy(dof
);
15209 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
));
15211 lck_mtx_unlock(&dtrace_lock
);
15216 dtrace_helper_provider_validate(dof_hdr_t
*dof
, dof_sec_t
*sec
)
15218 uintptr_t daddr
= (uintptr_t)dof
;
15219 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
15220 dof_provider_t
*provider
;
15221 dof_probe_t
*probe
;
15223 char *strtab
, *typestr
;
15224 dof_stridx_t typeidx
;
15226 uint_t nprobes
, j
, k
;
15228 ASSERT(sec
->dofs_type
== DOF_SECT_PROVIDER
);
15230 if (sec
->dofs_offset
& (sizeof (uint_t
) - 1)) {
15231 dtrace_dof_error(dof
, "misaligned section offset");
15236 * The section needs to be large enough to contain the DOF provider
15237 * structure appropriate for the given version.
15239 if (sec
->dofs_size
<
15240 ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ?
15241 offsetof(dof_provider_t
, dofpv_prenoffs
) :
15242 sizeof (dof_provider_t
))) {
15243 dtrace_dof_error(dof
, "provider section too small");
15247 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
15248 str_sec
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
);
15249 prb_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
);
15250 arg_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
);
15251 off_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
);
15253 if (str_sec
== NULL
|| prb_sec
== NULL
||
15254 arg_sec
== NULL
|| off_sec
== NULL
)
15259 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
15260 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
&&
15261 (enoff_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
,
15262 provider
->dofpv_prenoffs
)) == NULL
)
15265 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
15267 if (provider
->dofpv_name
>= str_sec
->dofs_size
||
15268 strlen(strtab
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) {
15269 dtrace_dof_error(dof
, "invalid provider name");
15273 if (prb_sec
->dofs_entsize
== 0 ||
15274 prb_sec
->dofs_entsize
> prb_sec
->dofs_size
) {
15275 dtrace_dof_error(dof
, "invalid entry size");
15279 if (prb_sec
->dofs_entsize
& (sizeof (uintptr_t) - 1)) {
15280 dtrace_dof_error(dof
, "misaligned entry size");
15284 if (off_sec
->dofs_entsize
!= sizeof (uint32_t)) {
15285 dtrace_dof_error(dof
, "invalid entry size");
15289 if (off_sec
->dofs_offset
& (sizeof (uint32_t) - 1)) {
15290 dtrace_dof_error(dof
, "misaligned section offset");
15294 if (arg_sec
->dofs_entsize
!= sizeof (uint8_t)) {
15295 dtrace_dof_error(dof
, "invalid entry size");
15299 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
15301 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
15304 * Take a pass through the probes to check for errors.
15306 for (j
= 0; j
< nprobes
; j
++) {
15307 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
15308 prb_sec
->dofs_offset
+ j
* prb_sec
->dofs_entsize
);
15310 if (probe
->dofpr_func
>= str_sec
->dofs_size
) {
15311 dtrace_dof_error(dof
, "invalid function name");
15315 if (strlen(strtab
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) {
15316 dtrace_dof_error(dof
, "function name too long");
15320 if (probe
->dofpr_name
>= str_sec
->dofs_size
||
15321 strlen(strtab
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) {
15322 dtrace_dof_error(dof
, "invalid probe name");
15327 * The offset count must not wrap the index, and the offsets
15328 * must also not overflow the section's data.
15330 if (probe
->dofpr_offidx
+ probe
->dofpr_noffs
<
15331 probe
->dofpr_offidx
||
15332 (probe
->dofpr_offidx
+ probe
->dofpr_noffs
) *
15333 off_sec
->dofs_entsize
> off_sec
->dofs_size
) {
15334 dtrace_dof_error(dof
, "invalid probe offset");
15338 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) {
15340 * If there's no is-enabled offset section, make sure
15341 * there aren't any is-enabled offsets. Otherwise
15342 * perform the same checks as for probe offsets
15343 * (immediately above).
15345 if (enoff_sec
== NULL
) {
15346 if (probe
->dofpr_enoffidx
!= 0 ||
15347 probe
->dofpr_nenoffs
!= 0) {
15348 dtrace_dof_error(dof
, "is-enabled "
15349 "offsets with null section");
15352 } else if (probe
->dofpr_enoffidx
+
15353 probe
->dofpr_nenoffs
< probe
->dofpr_enoffidx
||
15354 (probe
->dofpr_enoffidx
+ probe
->dofpr_nenoffs
) *
15355 enoff_sec
->dofs_entsize
> enoff_sec
->dofs_size
) {
15356 dtrace_dof_error(dof
, "invalid is-enabled "
15361 if (probe
->dofpr_noffs
+ probe
->dofpr_nenoffs
== 0) {
15362 dtrace_dof_error(dof
, "zero probe and "
15363 "is-enabled offsets");
15366 } else if (probe
->dofpr_noffs
== 0) {
15367 dtrace_dof_error(dof
, "zero probe offsets");
15371 if (probe
->dofpr_argidx
+ probe
->dofpr_xargc
<
15372 probe
->dofpr_argidx
||
15373 (probe
->dofpr_argidx
+ probe
->dofpr_xargc
) *
15374 arg_sec
->dofs_entsize
> arg_sec
->dofs_size
) {
15375 dtrace_dof_error(dof
, "invalid args");
15379 typeidx
= probe
->dofpr_nargv
;
15380 typestr
= strtab
+ probe
->dofpr_nargv
;
15381 for (k
= 0; k
< probe
->dofpr_nargc
; k
++) {
15382 if (typeidx
>= str_sec
->dofs_size
) {
15383 dtrace_dof_error(dof
, "bad "
15384 "native argument type");
15388 typesz
= strlen(typestr
) + 1;
15389 if (typesz
> DTRACE_ARGTYPELEN
) {
15390 dtrace_dof_error(dof
, "native "
15391 "argument type too long");
15398 typeidx
= probe
->dofpr_xargv
;
15399 typestr
= strtab
+ probe
->dofpr_xargv
;
15400 for (k
= 0; k
< probe
->dofpr_xargc
; k
++) {
15401 if (arg
[probe
->dofpr_argidx
+ k
] > probe
->dofpr_nargc
) {
15402 dtrace_dof_error(dof
, "bad "
15403 "native argument index");
15407 if (typeidx
>= str_sec
->dofs_size
) {
15408 dtrace_dof_error(dof
, "bad "
15409 "translated argument type");
15413 typesz
= strlen(typestr
) + 1;
15414 if (typesz
> DTRACE_ARGTYPELEN
) {
15415 dtrace_dof_error(dof
, "translated argument "
15428 #if !defined(__APPLE__)
15430 dtrace_helper_slurp(dof_hdr_t
*dof
, dof_helper_t
*dhp
)
15433 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t
*dof
, dof_helper_t
*dhp
)
15436 dtrace_helpers_t
*help
;
15437 dtrace_vstate_t
*vstate
;
15438 dtrace_enabling_t
*enab
= NULL
;
15439 int i
, gen
, rv
, nhelpers
= 0, nprovs
= 0, destroy
= 1;
15440 uintptr_t daddr
= (uintptr_t)dof
;
15442 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
15444 #if !defined(__APPLE__)
15445 if ((help
= curproc
->p_dtrace_helpers
) == NULL
)
15446 help
= dtrace_helpers_create(curproc
);
15448 if ((help
= p
->p_dtrace_helpers
) == NULL
)
15449 help
= dtrace_helpers_create(p
);
15452 vstate
= &help
->dthps_vstate
;
15454 if ((rv
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
,
15455 dhp
!= NULL
? dhp
->dofhp_addr
: 0, B_FALSE
)) != 0) {
15456 dtrace_dof_destroy(dof
);
15461 * Look for helper providers and validate their descriptions.
15464 #if !defined(__APPLE__) /* Quiet compiler warning */
15465 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
15467 for (i
= 0; (uint32_t)i
< dof
->dofh_secnum
; i
++) {
15468 #endif /* __APPLE__ */
15469 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
15470 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
15472 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
15475 if (dtrace_helper_provider_validate(dof
, sec
) != 0) {
15476 dtrace_enabling_destroy(enab
);
15477 dtrace_dof_destroy(dof
);
15486 * Now we need to walk through the ECB descriptions in the enabling.
15488 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
15489 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
15490 dtrace_probedesc_t
*desc
= &ep
->dted_probe
;
15492 #if !defined(__APPLE__)
15493 if (strcmp(desc
->dtpd_provider
, "dtrace") != 0)
15496 if (strcmp(desc
->dtpd_mod
, "helper") != 0)
15499 if (strcmp(desc
->dtpd_func
, "ustack") != 0)
15501 #else /* Employ size bounded string operation. */
15502 if (!LIT_STRNEQL(desc
->dtpd_provider
, "dtrace"))
15505 if (!LIT_STRNEQL(desc
->dtpd_mod
, "helper"))
15508 if (!LIT_STRNEQL(desc
->dtpd_func
, "ustack"))
15510 #endif /* __APPLE__ */
15512 #if !defined(__APPLE__)
15513 if ((rv
= dtrace_helper_action_add(DTRACE_HELPER_ACTION_USTACK
,
15516 if ((rv
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
,
15520 * Adding this helper action failed -- we are now going
15521 * to rip out the entire generation and return failure.
15523 #if !defined(__APPLE__)
15524 (void) dtrace_helper_destroygen(help
->dthps_generation
);
15526 (void) dtrace_helper_destroygen(p
, help
->dthps_generation
);
15528 dtrace_enabling_destroy(enab
);
15529 dtrace_dof_destroy(dof
);
15536 if (nhelpers
< enab
->dten_ndesc
)
15537 dtrace_dof_error(dof
, "unmatched helpers");
15539 gen
= help
->dthps_generation
++;
15540 dtrace_enabling_destroy(enab
);
15542 if (dhp
!= NULL
&& nprovs
> 0) {
15543 dhp
->dofhp_dof
= (uint64_t)(uintptr_t)dof
;
15544 #if !defined(__APPLE__)
15545 if (dtrace_helper_provider_add(dhp
, gen
) == 0) {
15547 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) {
15549 lck_mtx_unlock(&dtrace_lock
);
15550 #if !defined(__APPLE__)
15551 dtrace_helper_provider_register(curproc
, help
, dhp
);
15553 dtrace_helper_provider_register(p
, help
, dhp
);
15555 lck_mtx_lock(&dtrace_lock
);
15562 dtrace_dof_destroy(dof
);
15567 #if defined(__APPLE__)
15572 * DTrace user static probes (USDT probes) and helper actions are loaded
15573 * in a process by proccessing dof sections. The dof sections are passed
15574 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
15575 * expensive to process dof for a process that will never use it. There
15576 * is a memory cost (allocating the providers/probes), and a cpu cost
15577 * (creating the providers/probes).
15579 * To reduce this cost, we use "lazy dof". The normal proceedure for
15580 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
15581 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
15582 * used, each process retains the dof_ioctl_data_t block, instead of
15583 * copying in the data it points to.
15585 * The dof_ioctl_data_t blocks are managed as if they were the actual
15586 * processed dof; on fork the block is copied to the child, on exec and
15587 * exit the block is freed.
15589 * If the process loads library(s) containing additional dof, the
15590 * new dof_ioctl_data_t is merged with the existing block.
15592 * There are a few catches that make this slightly more difficult.
15593 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
15594 * identifier value for each dof in the block. In non-lazy dof terms,
15595 * this is the generation that dof was loaded in. If we hand back
15596 * a UID for a lazy dof, that same UID must be able to unload the
15597 * dof once it has become non-lazy. To meet this requirement, the
15598 * code that loads lazy dof requires that the UID's for dof(s) in
15599 * the lazy dof be sorted, and in ascending order. It is okay to skip
15600 * UID's, I.E., 1 -> 5 -> 6 is legal.
15602 * Once a process has become non-lazy, it will stay non-lazy. All
15603 * future dof operations for that process will be non-lazy, even
15604 * if the dof mode transitions back to lazy.
15606 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
15607 * That way if the lazy check fails due to transitioning to non-lazy, the
15608 * right thing is done with the newly faulted in dof.
15612 * This method is a bit squicky. It must handle:
15614 * dof should not be lazy.
15615 * dof should have been handled lazily, but there was an error
15616 * dof was handled lazily, and needs to be freed.
15617 * dof was handled lazily, and must not be freed.
15620 * Returns EACCESS if dof should be handled non-lazily.
15622 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
15624 * If the dofs data is claimed by this method, dofs_claimed will be set.
15625 * Callers should not free claimed dofs.
15628 dtrace_lazy_dofs_add(proc_t
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
)
15631 ASSERT(incoming_dofs
&& incoming_dofs
->dofiod_count
> 0);
15636 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
15639 * If we have lazy dof, dof mode better be LAZY_ON.
15641 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
15642 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
15643 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
15646 * Any existing helpers force non-lazy behavior.
15648 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
15649 lck_mtx_lock(&p
->p_dtrace_sprlock
);
15651 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
15652 unsigned int existing_dofs_count
= (existing_dofs
) ? existing_dofs
->dofiod_count
: 0;
15653 unsigned int i
, merged_dofs_count
= incoming_dofs
->dofiod_count
+ existing_dofs_count
;
15658 if (merged_dofs_count
== 0 || merged_dofs_count
> 1024) {
15659 dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range");
15665 * Each dof being added must be assigned a unique generation.
15667 uint64_t generation
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count
- 1].dofhp_dof
+ 1 : 1;
15668 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) {
15670 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
15672 ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
);
15673 incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
= generation
++;
15677 if (existing_dofs
) {
15679 * Merge the existing and incoming dofs
15681 size_t merged_dofs_size
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
);
15682 dof_ioctl_data_t
* merged_dofs
= kmem_alloc(merged_dofs_size
, KM_SLEEP
);
15684 bcopy(&existing_dofs
->dofiod_helpers
[0],
15685 &merged_dofs
->dofiod_helpers
[0],
15686 sizeof(dof_helper_t
) * existing_dofs_count
);
15687 bcopy(&incoming_dofs
->dofiod_helpers
[0],
15688 &merged_dofs
->dofiod_helpers
[existing_dofs_count
],
15689 sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
);
15691 merged_dofs
->dofiod_count
= merged_dofs_count
;
15693 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
15695 p
->p_dtrace_lazy_dofs
= merged_dofs
;
15698 * Claim the incoming dofs
15701 p
->p_dtrace_lazy_dofs
= incoming_dofs
;
15705 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
15706 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
15707 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
15712 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
15717 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
15725 * EINVAL: lazy dof is enabled, but the requested generation was not found.
15726 * EACCES: This removal needs to be handled non-lazily.
15729 dtrace_lazy_dofs_remove(proc_t
*p
, int generation
)
15733 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
15736 * If we have lazy dof, dof mode better be LAZY_ON.
15738 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
15739 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
15740 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
15743 * Any existing helpers force non-lazy behavior.
15745 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
15746 lck_mtx_lock(&p
->p_dtrace_sprlock
);
15748 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
15750 if (existing_dofs
) {
15751 int index
, existing_dofs_count
= existing_dofs
->dofiod_count
;
15752 for (index
=0; index
<existing_dofs_count
; index
++) {
15753 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof
== generation
) {
15754 dof_ioctl_data_t
* removed_dofs
= NULL
;
15757 * If there is only 1 dof, we'll delete it and swap in NULL.
15759 if (existing_dofs_count
> 1) {
15760 int removed_dofs_count
= existing_dofs_count
- 1;
15761 size_t removed_dofs_size
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
);
15763 removed_dofs
= kmem_alloc(removed_dofs_size
, KM_SLEEP
);
15764 removed_dofs
->dofiod_count
= removed_dofs_count
;
15767 * copy the remaining data.
15770 bcopy(&existing_dofs
->dofiod_helpers
[0],
15771 &removed_dofs
->dofiod_helpers
[0],
15772 index
* sizeof(dof_helper_t
));
15775 if (index
< existing_dofs_count
-1) {
15776 bcopy(&existing_dofs
->dofiod_helpers
[index
+1],
15777 &removed_dofs
->dofiod_helpers
[index
],
15778 (existing_dofs_count
- index
- 1) * sizeof(dof_helper_t
));
15782 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
15784 p
->p_dtrace_lazy_dofs
= removed_dofs
;
15786 rval
= KERN_SUCCESS
;
15793 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
15796 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
15797 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
15804 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
15809 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
15815 dtrace_lazy_dofs_destroy(proc_t
*p
)
15817 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
15818 lck_mtx_lock(&p
->p_dtrace_sprlock
);
15821 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
15822 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
15823 * kern_exit.c and kern_exec.c.
15825 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
|| p
->p_lflag
& P_LEXIT
);
15826 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
15828 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
15829 p
->p_dtrace_lazy_dofs
= NULL
;
15831 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
15832 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
15835 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
15840 dtrace_lazy_dofs_duplicate(proc_t
*parent
, proc_t
*child
)
15842 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
15843 lck_mtx_assert(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
15844 lck_mtx_assert(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
15846 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
15847 lck_mtx_lock(&parent
->p_dtrace_sprlock
);
15850 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
15851 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
15854 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
15855 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| parent
->p_dtrace_helpers
== NULL
);
15857 * In theory we should hold the child sprlock, but this is safe...
15859 ASSERT(child
->p_dtrace_lazy_dofs
== NULL
&& child
->p_dtrace_helpers
== NULL
);
15861 dof_ioctl_data_t
* parent_dofs
= parent
->p_dtrace_lazy_dofs
;
15862 dof_ioctl_data_t
* child_dofs
= NULL
;
15864 size_t parent_dofs_size
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
);
15865 child_dofs
= kmem_alloc(parent_dofs_size
, KM_SLEEP
);
15866 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
);
15869 lck_mtx_unlock(&parent
->p_dtrace_sprlock
);
15872 lck_mtx_lock(&child
->p_dtrace_sprlock
);
15873 child
->p_dtrace_lazy_dofs
= child_dofs
;
15874 lck_mtx_unlock(&child
->p_dtrace_sprlock
);
15877 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
15881 dtrace_lazy_dofs_proc_iterate_filter(proc_t
*p
, void* ignored
)
15883 #pragma unused(ignored)
15885 * Okay to NULL test without taking the sprlock.
15887 return p
->p_dtrace_lazy_dofs
!= NULL
;
15891 dtrace_lazy_dofs_proc_iterate_doit(proc_t
*p
, void* ignored
)
15893 #pragma unused(ignored)
15895 * It is possible this process may exit during our attempt to
15896 * fault in the dof. We could fix this by holding locks longer,
15897 * but the errors are benign.
15899 lck_mtx_lock(&p
->p_dtrace_sprlock
);
15902 * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF
15904 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
15905 ASSERT(dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
);
15908 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
15909 p
->p_dtrace_lazy_dofs
= NULL
;
15911 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
15914 * Process each dof_helper_t
15916 if (lazy_dofs
!= NULL
) {
15920 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) {
15922 * When loading lazy dof, we depend on the generations being sorted in ascending order.
15924 ASSERT(i
>= (lazy_dofs
->dofiod_count
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
15926 dof_helper_t
*dhp
= &lazy_dofs
->dofiod_helpers
[i
];
15929 * We stored the generation in dofhp_dof. Save it, and restore the original value.
15931 int generation
= dhp
->dofhp_dof
;
15932 dhp
->dofhp_dof
= dhp
->dofhp_addr
;
15934 dof_hdr_t
*dof
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
);
15937 dtrace_helpers_t
*help
;
15939 lck_mtx_lock(&dtrace_lock
);
15942 * This must be done with the dtrace_lock held
15944 if ((help
= p
->p_dtrace_helpers
) == NULL
)
15945 help
= dtrace_helpers_create(p
);
15948 * If the generation value has been bumped, someone snuck in
15949 * when we released the dtrace lock. We have to dump this generation,
15950 * there is no safe way to load it.
15952 if (help
->dthps_generation
<= generation
) {
15953 help
->dthps_generation
= generation
;
15956 * dtrace_helper_slurp() takes responsibility for the dof --
15957 * it may free it now or it may save it and free it later.
15959 if ((rval
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) {
15960 dtrace_dof_error(NULL
, "returned value did not match expected generation");
15964 lck_mtx_unlock(&dtrace_lock
);
15968 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
15971 return PROC_RETURNED
;
15974 #endif /* __APPLE__ */
15976 static dtrace_helpers_t
*
15977 dtrace_helpers_create(proc_t
*p
)
15979 dtrace_helpers_t
*help
;
15981 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
15982 ASSERT(p
->p_dtrace_helpers
== NULL
);
15984 help
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
);
15985 help
->dthps_actions
= kmem_zalloc(sizeof (dtrace_helper_action_t
*) *
15986 DTRACE_NHELPER_ACTIONS
, KM_SLEEP
);
15988 p
->p_dtrace_helpers
= help
;
15994 #if !defined(__APPLE__)
15996 dtrace_helpers_destroy(void)
15998 dtrace_helpers_t
*help
;
15999 dtrace_vstate_t
*vstate
;
16000 proc_t
*p
= curproc
;
16004 dtrace_helpers_destroy(proc_t
* p
)
16006 dtrace_helpers_t
*help
;
16007 dtrace_vstate_t
*vstate
;
16011 lck_mtx_lock(&dtrace_lock
);
16013 ASSERT(p
->p_dtrace_helpers
!= NULL
);
16014 ASSERT(dtrace_helpers
> 0);
16016 help
= p
->p_dtrace_helpers
;
16017 vstate
= &help
->dthps_vstate
;
16020 * We're now going to lose the help from this process.
16022 p
->p_dtrace_helpers
= NULL
;
16026 * Destory the helper actions.
16028 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
16029 dtrace_helper_action_t
*h
, *next
;
16031 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
16032 next
= h
->dtha_next
;
16033 dtrace_helper_action_destroy(h
, vstate
);
16038 lck_mtx_unlock(&dtrace_lock
);
16041 * Destroy the helper providers.
16043 if (help
->dthps_maxprovs
> 0) {
16044 lck_mtx_lock(&dtrace_meta_lock
);
16045 if (dtrace_meta_pid
!= NULL
) {
16046 ASSERT(dtrace_deferred_pid
== NULL
);
16048 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
16049 dtrace_helper_provider_remove(
16050 &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
);
16053 lck_mtx_lock(&dtrace_lock
);
16054 ASSERT(help
->dthps_deferred
== 0 ||
16055 help
->dthps_next
!= NULL
||
16056 help
->dthps_prev
!= NULL
||
16057 help
== dtrace_deferred_pid
);
16060 * Remove the helper from the deferred list.
16062 if (help
->dthps_next
!= NULL
)
16063 help
->dthps_next
->dthps_prev
= help
->dthps_prev
;
16064 if (help
->dthps_prev
!= NULL
)
16065 help
->dthps_prev
->dthps_next
= help
->dthps_next
;
16066 if (dtrace_deferred_pid
== help
) {
16067 dtrace_deferred_pid
= help
->dthps_next
;
16068 ASSERT(help
->dthps_prev
== NULL
);
16071 lck_mtx_unlock(&dtrace_lock
);
16074 lck_mtx_unlock(&dtrace_meta_lock
);
16076 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
16077 dtrace_helper_provider_destroy(help
->dthps_provs
[i
]);
16080 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs
*
16081 sizeof (dtrace_helper_provider_t
*));
16084 lck_mtx_lock(&dtrace_lock
);
16086 dtrace_vstate_fini(&help
->dthps_vstate
);
16087 kmem_free(help
->dthps_actions
,
16088 sizeof (dtrace_helper_action_t
*) * DTRACE_NHELPER_ACTIONS
);
16089 kmem_free(help
, sizeof (dtrace_helpers_t
));
16092 lck_mtx_unlock(&dtrace_lock
);
16096 dtrace_helpers_duplicate(proc_t
*from
, proc_t
*to
)
16098 dtrace_helpers_t
*help
, *newhelp
;
16099 dtrace_helper_action_t
*helper
, *new, *last
;
16101 dtrace_vstate_t
*vstate
;
16102 #if !defined(__APPLE__) /* Quiet compiler warning */
16103 int i
, j
, sz
, hasprovs
= 0;
16106 int j
, sz
, hasprovs
= 0;
16107 #endif /* __APPLE__ */
16109 lck_mtx_lock(&dtrace_lock
);
16110 ASSERT(from
->p_dtrace_helpers
!= NULL
);
16111 ASSERT(dtrace_helpers
> 0);
16113 help
= from
->p_dtrace_helpers
;
16114 newhelp
= dtrace_helpers_create(to
);
16115 ASSERT(to
->p_dtrace_helpers
!= NULL
);
16117 newhelp
->dthps_generation
= help
->dthps_generation
;
16118 vstate
= &newhelp
->dthps_vstate
;
16121 * Duplicate the helper actions.
16123 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
16124 if ((helper
= help
->dthps_actions
[i
]) == NULL
)
16127 for (last
= NULL
; helper
!= NULL
; helper
= helper
->dtha_next
) {
16128 new = kmem_zalloc(sizeof (dtrace_helper_action_t
),
16130 new->dtha_generation
= helper
->dtha_generation
;
16132 if ((dp
= helper
->dtha_predicate
) != NULL
) {
16133 dp
= dtrace_difo_duplicate(dp
, vstate
);
16134 new->dtha_predicate
= dp
;
16137 new->dtha_nactions
= helper
->dtha_nactions
;
16138 sz
= sizeof (dtrace_difo_t
*) * new->dtha_nactions
;
16139 new->dtha_actions
= kmem_alloc(sz
, KM_SLEEP
);
16141 #if !defined(__APPLE__) /* Quiet compiler warning */
16142 for (j
= 0; j
< new->dtha_nactions
; j
++) {
16143 dtrace_difo_t
*dp
= helper
->dtha_actions
[j
];
16145 ASSERT(dp
!= NULL
);
16146 dp
= dtrace_difo_duplicate(dp
, vstate
);
16147 new->dtha_actions
[j
] = dp
;
16150 for (j
= 0; j
< new->dtha_nactions
; j
++) {
16151 dtrace_difo_t
*dpj
= helper
->dtha_actions
[j
];
16153 ASSERT(dpj
!= NULL
);
16154 dpj
= dtrace_difo_duplicate(dpj
, vstate
);
16155 new->dtha_actions
[j
] = dpj
;
16157 #endif /* __APPLE__ */
16159 if (last
!= NULL
) {
16160 last
->dtha_next
= new;
16162 newhelp
->dthps_actions
[i
] = new;
16170 * Duplicate the helper providers and register them with the
16171 * DTrace framework.
16173 if (help
->dthps_nprovs
> 0) {
16174 newhelp
->dthps_nprovs
= help
->dthps_nprovs
;
16175 newhelp
->dthps_maxprovs
= help
->dthps_nprovs
;
16176 newhelp
->dthps_provs
= kmem_alloc(newhelp
->dthps_nprovs
*
16177 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
16178 for (i
= 0; i
< newhelp
->dthps_nprovs
; i
++) {
16179 newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
];
16180 newhelp
->dthps_provs
[i
]->dthp_ref
++;
16186 lck_mtx_unlock(&dtrace_lock
);
16189 dtrace_helper_provider_register(to
, newhelp
, NULL
);
16193 * DTrace Hook Functions
16196 #if defined(__APPLE__)
16198 * Routines to manipulate the modctl list within dtrace
16201 modctl_t
*dtrace_modctl_list
;
16204 dtrace_modctl_add(struct modctl
* newctl
)
16206 struct modctl
*nextp
, *prevp
;
16208 ASSERT(newctl
!= NULL
);
16209 lck_mtx_assert(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
16211 // Insert new module at the front of the list,
16213 newctl
->mod_next
= dtrace_modctl_list
;
16214 dtrace_modctl_list
= newctl
;
16217 * If a module exists with the same name, then that module
16218 * must have been unloaded with enabled probes. We will move
16219 * the unloaded module to the new module's stale chain and
16220 * then stop traversing the list.
16224 nextp
= newctl
->mod_next
;
16226 while (nextp
!= NULL
) {
16227 if (nextp
->mod_loaded
) {
16228 /* This is a loaded module. Keep traversing. */
16230 nextp
= nextp
->mod_next
;
16234 /* Found an unloaded module */
16235 if (strncmp (newctl
->mod_modname
, nextp
->mod_modname
, KMOD_MAX_NAME
)) {
16236 /* Names don't match. Keep traversing. */
16238 nextp
= nextp
->mod_next
;
16242 /* We found a stale entry, move it. We're done. */
16243 prevp
->mod_next
= nextp
->mod_next
;
16244 newctl
->mod_stale
= nextp
;
16245 nextp
->mod_next
= NULL
;
16253 dtrace_modctl_lookup(struct kmod_info
* kmod
)
16255 lck_mtx_assert(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
16257 struct modctl
* ctl
;
16259 for (ctl
= dtrace_modctl_list
; ctl
; ctl
=ctl
->mod_next
) {
16260 if (ctl
->mod_id
== kmod
->id
)
16267 * This routine is called from dtrace_module_unloaded().
16268 * It removes a modctl structure and its stale chain
16269 * from the kext shadow list.
16272 dtrace_modctl_remove(struct modctl
* ctl
)
16274 ASSERT(ctl
!= NULL
);
16275 lck_mtx_assert(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
16276 modctl_t
*prevp
, *nextp
, *curp
;
16278 // Remove stale chain first
16279 for (curp
=ctl
->mod_stale
; curp
!= NULL
; curp
=nextp
) {
16280 nextp
= curp
->mod_stale
;
16281 /* There should NEVER be user symbols allocated at this point */
16282 ASSERT(curp
->mod_user_symbols
== NULL
);
16283 kmem_free(curp
, sizeof(modctl_t
));
16287 curp
= dtrace_modctl_list
;
16289 while (curp
!= ctl
) {
16291 curp
= curp
->mod_next
;
16294 if (prevp
!= NULL
) {
16295 prevp
->mod_next
= ctl
->mod_next
;
16298 dtrace_modctl_list
= ctl
->mod_next
;
16301 /* There should NEVER be user symbols allocated at this point */
16302 ASSERT(ctl
->mod_user_symbols
== NULL
);
16304 kmem_free (ctl
, sizeof(modctl_t
));
16307 #endif /* __APPLE__ */
16310 * APPLE NOTE: The kext loader will call dtrace_module_loaded
16311 * when the kext is loaded in memory, but before calling the
16312 * kext's start routine.
16314 * Return 0 on success
16315 * Return -1 on failure
16318 #if !defined (__APPLE__)
16320 dtrace_module_loaded(struct modctl
*ctl
)
16323 dtrace_module_loaded(struct kmod_info
*kmod
, uint32_t flag
)
16324 #endif /* __APPLE__ */
16326 dtrace_provider_t
*prv
;
16328 #if !defined(__APPLE__)
16329 mutex_enter(&dtrace_provider_lock
);
16330 mutex_enter(&mod_lock
);
16332 ASSERT(ctl
->mod_busy
);
16336 * If kernel symbols have been disabled, return immediately
16337 * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks
16339 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_NEVER
)
16342 struct modctl
*ctl
= NULL
;
16343 if (!kmod
|| kmod
->address
== 0 || kmod
->size
== 0)
16346 lck_mtx_lock(&dtrace_provider_lock
);
16347 lck_mtx_lock(&mod_lock
);
16350 * Have we seen this kext before?
16353 ctl
= dtrace_modctl_lookup(kmod
);
16356 /* bail... we already have this kext in the modctl list */
16357 lck_mtx_unlock(&mod_lock
);
16358 lck_mtx_unlock(&dtrace_provider_lock
);
16359 if (dtrace_err_verbose
)
16360 cmn_err(CE_WARN
, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod
->name
, (uint_t
)kmod
->id
, ctl
->mod_modname
, ctl
->mod_id
);
16364 ctl
= kmem_alloc(sizeof(struct modctl
), KM_SLEEP
);
16366 if (dtrace_err_verbose
)
16367 cmn_err(CE_WARN
, "dtrace module load '%s %u' is failing ", kmod
->name
, (uint_t
)kmod
->id
);
16368 lck_mtx_unlock(&mod_lock
);
16369 lck_mtx_unlock(&dtrace_provider_lock
);
16372 ctl
->mod_next
= NULL
;
16373 ctl
->mod_stale
= NULL
;
16374 strlcpy (ctl
->mod_modname
, kmod
->name
, sizeof(ctl
->mod_modname
));
16375 ctl
->mod_loadcnt
= kmod
->id
;
16376 ctl
->mod_nenabled
= 0;
16377 ctl
->mod_address
= kmod
->address
;
16378 ctl
->mod_size
= kmod
->size
;
16379 ctl
->mod_id
= kmod
->id
;
16380 ctl
->mod_loaded
= 1;
16381 ctl
->mod_flags
= 0;
16382 ctl
->mod_user_symbols
= NULL
;
16385 * Find the UUID for this module, if it has one
16387 kernel_mach_header_t
* header
= (kernel_mach_header_t
*)ctl
->mod_address
;
16388 struct load_command
* load_cmd
= (struct load_command
*)&header
[1];
16390 for (i
= 0; i
< header
->ncmds
; i
++) {
16391 if (load_cmd
->cmd
== LC_UUID
) {
16392 struct uuid_command
* uuid_cmd
= (struct uuid_command
*)load_cmd
;
16393 memcpy(ctl
->mod_uuid
, uuid_cmd
->uuid
, sizeof(uuid_cmd
->uuid
));
16394 ctl
->mod_flags
|= MODCTL_HAS_UUID
;
16397 load_cmd
= (struct load_command
*)((caddr_t
)load_cmd
+ load_cmd
->cmdsize
);
16400 if (ctl
->mod_address
== g_kernel_kmod_info
.address
) {
16401 ctl
->mod_flags
|= MODCTL_IS_MACH_KERNEL
;
16404 dtrace_modctl_add(ctl
);
16407 * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s)
16409 lck_mtx_lock(&dtrace_lock
);
16412 * DTrace must decide if it will instrument modules lazily via
16413 * userspace symbols (default mode), or instrument immediately via
16414 * kernel symbols (non-default mode)
16416 * When in default/lazy mode, DTrace will only support modules
16417 * built with a valid UUID.
16419 * Overriding the default can be done explicitly in one of
16420 * the following two ways.
16422 * A module can force symbols from kernel space using the plist key,
16423 * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set,
16424 * we fall through and instrument this module now.
16426 * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols
16427 * from kernel space (see dtrace_impl.h). If this system state is set
16428 * to a non-userspace mode, we fall through and instrument the module now.
16431 if ((dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) &&
16432 (!(flag
& KMOD_DTRACE_FORCE_INIT
)))
16434 /* We will instrument the module lazily -- this is the default */
16435 lck_mtx_unlock(&dtrace_lock
);
16436 lck_mtx_unlock(&mod_lock
);
16437 lck_mtx_unlock(&dtrace_provider_lock
);
16441 /* We will instrument the module immediately using kernel symbols */
16442 ctl
->mod_flags
|= MODCTL_HAS_KERNEL_SYMBOLS
;
16444 lck_mtx_unlock(&dtrace_lock
);
16445 #endif /* __APPLE__ */
16448 * We're going to call each providers per-module provide operation
16449 * specifying only this module.
16451 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
16452 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
16454 #if defined(__APPLE__)
16456 * The contract with the kext loader is that once this function has completed,
16457 * it may delete kernel symbols at will. We must set this while still holding
16460 ctl
->mod_flags
&= ~MODCTL_HAS_KERNEL_SYMBOLS
;
16463 lck_mtx_unlock(&mod_lock
);
16464 lck_mtx_unlock(&dtrace_provider_lock
);
16467 * If we have any retained enablings, we need to match against them.
16468 * Enabling probes requires that cpu_lock be held, and we cannot hold
16469 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
16470 * module. (In particular, this happens when loading scheduling
16471 * classes.) So if we have any retained enablings, we need to dispatch
16472 * our task queue to do the match for us.
16474 lck_mtx_lock(&dtrace_lock
);
16476 if (dtrace_retained
== NULL
) {
16477 lck_mtx_unlock(&dtrace_lock
);
16478 #if !defined(__APPLE__)
16485 #if !defined(__APPLE__)
16486 (void) taskq_dispatch(dtrace_taskq
,
16487 (task_func_t
*)dtrace_enabling_matchall
, NULL
, TQ_SLEEP
);
16489 mutex_exit(&dtrace_lock
);
16492 * And now, for a little heuristic sleaze: in general, we want to
16493 * match modules as soon as they load. However, we cannot guarantee
16494 * this, because it would lead us to the lock ordering violation
16495 * outlined above. The common case, of course, is that cpu_lock is
16496 * _not_ held -- so we delay here for a clock tick, hoping that that's
16497 * long enough for the task queue to do its work. If it's not, it's
16498 * not a serious problem -- it just means that the module that we
16499 * just loaded may not be immediately instrumentable.
16505 * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually
16506 * holds it for any reason. Thus the comment above is invalid, we can directly invoke
16507 * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid
16508 * the delay call as well.
16510 lck_mtx_unlock(&dtrace_lock
);
16512 dtrace_enabling_matchall();
16515 #endif /* __APPLE__ */
16518 #if !defined(__APPLE__)
16520 dtrace_module_unloaded(struct modctl
*ctl
)
16522 dtrace_probe_t
template, *probe
, *first
, *next
;
16523 dtrace_provider_t
*prov
;
16525 template.dtpr_mod
= ctl
->mod_modname
;
16527 mutex_enter(&dtrace_provider_lock
);
16528 mutex_enter(&mod_lock
);
16529 mutex_enter(&dtrace_lock
);
16531 if (dtrace_bymod
== NULL
) {
16533 * The DTrace module is loaded (obviously) but not attached;
16534 * we don't have any work to do.
16536 mutex_exit(&dtrace_provider_lock
);
16537 mutex_exit(&mod_lock
);
16538 mutex_exit(&dtrace_lock
);
16542 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
16543 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
16544 if (probe
->dtpr_ecb
!= NULL
) {
16545 mutex_exit(&dtrace_provider_lock
);
16546 mutex_exit(&mod_lock
);
16547 mutex_exit(&dtrace_lock
);
16550 * This shouldn't _actually_ be possible -- we're
16551 * unloading a module that has an enabled probe in it.
16552 * (It's normally up to the provider to make sure that
16553 * this can't happen.) However, because dtps_enable()
16554 * doesn't have a failure mode, there can be an
16555 * enable/unload race. Upshot: we don't want to
16556 * assert, but we're not going to disable the
16559 if (dtrace_err_verbose
) {
16560 cmn_err(CE_WARN
, "unloaded module '%s' had "
16561 "enabled probes", ctl
->mod_modname
);
16570 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
16571 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
16573 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
16574 probe
->dtpr_provider
->probe_count
--;
16576 next
= probe
->dtpr_nextmod
;
16577 dtrace_hash_remove(dtrace_bymod
, probe
);
16578 dtrace_hash_remove(dtrace_byfunc
, probe
);
16579 dtrace_hash_remove(dtrace_byname
, probe
);
16581 if (first
== NULL
) {
16583 probe
->dtpr_nextmod
= NULL
;
16585 probe
->dtpr_nextmod
= first
;
16591 * We've removed all of the module's probes from the hash chains and
16592 * from the probe array. Now issue a dtrace_sync() to be sure that
16593 * everyone has cleared out from any probe array processing.
16597 for (probe
= first
; probe
!= NULL
; probe
= first
) {
16598 first
= probe
->dtpr_nextmod
;
16599 prov
= probe
->dtpr_provider
;
16600 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
16602 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
16603 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
16604 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
16605 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
16606 kmem_free(probe
, sizeof (dtrace_probe_t
));
16609 mutex_exit(&dtrace_lock
);
16610 mutex_exit(&mod_lock
);
16611 mutex_exit(&dtrace_provider_lock
);
16613 #else /* __APPLE__ */
16616 * Return 0 on success
16617 * Return -1 on failure
16620 dtrace_module_unloaded(struct kmod_info
*kmod
)
16622 dtrace_probe_t
template, *probe
, *first
, *next
;
16623 dtrace_provider_t
*prov
;
16624 struct modctl
*ctl
= NULL
;
16625 struct modctl
*syncctl
= NULL
;
16626 struct modctl
*nextsyncctl
= NULL
;
16629 lck_mtx_lock(&dtrace_provider_lock
);
16630 lck_mtx_lock(&mod_lock
);
16631 lck_mtx_lock(&dtrace_lock
);
16633 if (kmod
== NULL
) {
16637 ctl
= dtrace_modctl_lookup(kmod
);
16640 lck_mtx_unlock(&dtrace_lock
);
16641 lck_mtx_unlock(&mod_lock
);
16642 lck_mtx_unlock(&dtrace_provider_lock
);
16645 ctl
->mod_loaded
= 0;
16646 ctl
->mod_address
= 0;
16650 if (dtrace_bymod
== NULL
) {
16652 * The DTrace module is loaded (obviously) but not attached;
16653 * we don't have any work to do.
16656 (void)dtrace_modctl_remove(ctl
);
16657 lck_mtx_unlock(&dtrace_provider_lock
);
16658 lck_mtx_unlock(&mod_lock
);
16659 lck_mtx_unlock(&dtrace_lock
);
16663 /* Syncmode set means we target and traverse entire modctl list. */
16665 nextsyncctl
= dtrace_modctl_list
;
16670 /* find a stale modctl struct */
16671 for (syncctl
= nextsyncctl
; syncctl
!= NULL
; syncctl
=syncctl
->mod_next
) {
16672 if (syncctl
->mod_address
== 0)
16677 /* We have no more work to do */
16678 lck_mtx_unlock(&dtrace_provider_lock
);
16679 lck_mtx_unlock(&mod_lock
);
16680 lck_mtx_unlock(&dtrace_lock
);
16684 /* keep track of next syncctl in case this one is removed */
16685 nextsyncctl
= syncctl
->mod_next
;
16690 template.dtpr_mod
= ctl
->mod_modname
;
16692 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
16693 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
16694 if (probe
->dtpr_ecb
!= NULL
) {
16696 * This shouldn't _actually_ be possible -- we're
16697 * unloading a module that has an enabled probe in it.
16698 * (It's normally up to the provider to make sure that
16699 * this can't happen.) However, because dtps_enable()
16700 * doesn't have a failure mode, there can be an
16701 * enable/unload race. Upshot: we don't want to
16702 * assert, but we're not going to disable the
16708 /* We're syncing, let's look at next in list */
16712 lck_mtx_unlock(&dtrace_provider_lock
);
16713 lck_mtx_unlock(&mod_lock
);
16714 lck_mtx_unlock(&dtrace_lock
);
16716 if (dtrace_err_verbose
) {
16717 cmn_err(CE_WARN
, "unloaded module '%s' had "
16718 "enabled probes", ctl
->mod_modname
);
16726 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
16727 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
16729 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
16730 probe
->dtpr_provider
->probe_count
--;
16732 next
= probe
->dtpr_nextmod
;
16733 dtrace_hash_remove(dtrace_bymod
, probe
);
16734 dtrace_hash_remove(dtrace_byfunc
, probe
);
16735 dtrace_hash_remove(dtrace_byname
, probe
);
16737 if (first
== NULL
) {
16739 probe
->dtpr_nextmod
= NULL
;
16741 probe
->dtpr_nextmod
= first
;
16747 * We've removed all of the module's probes from the hash chains and
16748 * from the probe array. Now issue a dtrace_sync() to be sure that
16749 * everyone has cleared out from any probe array processing.
16753 for (probe
= first
; probe
!= NULL
; probe
= first
) {
16754 first
= probe
->dtpr_nextmod
;
16755 prov
= probe
->dtpr_provider
;
16756 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
16758 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
16759 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
16760 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
16761 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
16763 zfree(dtrace_probe_t_zone
, probe
);
16766 dtrace_modctl_remove(ctl
);
16771 lck_mtx_unlock(&dtrace_lock
);
16772 lck_mtx_unlock(&mod_lock
);
16773 lck_mtx_unlock(&dtrace_provider_lock
);
16777 #endif /* __APPLE__ */
16780 dtrace_suspend(void)
16782 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
));
16786 dtrace_resume(void)
16788 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
));
16792 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
)
16794 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
16795 lck_mtx_lock(&dtrace_lock
);
16799 dtrace_state_t
*state
;
16800 dtrace_optval_t
*opt
, rs
, c
;
16803 * For now, we only allocate a new buffer for anonymous state.
16805 if ((state
= dtrace_anon
.dta_state
) == NULL
)
16808 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
16811 opt
= state
->dts_options
;
16812 c
= opt
[DTRACEOPT_CPU
];
16814 if (c
!= DTRACE_CPUALL
&& c
!= DTRACEOPT_UNSET
&& c
!= cpu
)
16818 * Regardless of what the actual policy is, we're going to
16819 * temporarily set our resize policy to be manual. We're
16820 * also going to temporarily set our CPU option to denote
16821 * the newly configured CPU.
16823 rs
= opt
[DTRACEOPT_BUFRESIZE
];
16824 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
;
16825 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
;
16827 (void) dtrace_state_buffers(state
);
16829 opt
[DTRACEOPT_BUFRESIZE
] = rs
;
16830 opt
[DTRACEOPT_CPU
] = c
;
16837 * We don't free the buffer in the CPU_UNCONFIG case. (The
16838 * buffer will be freed when the consumer exits.)
16846 lck_mtx_unlock(&dtrace_lock
);
16851 dtrace_cpu_setup_initial(processorid_t cpu
)
16853 (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
);
16857 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
)
16859 if (dtrace_toxranges
>= dtrace_toxranges_max
) {
16861 dtrace_toxrange_t
*range
;
16863 osize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
16866 ASSERT(dtrace_toxrange
== NULL
);
16867 ASSERT(dtrace_toxranges_max
== 0);
16868 dtrace_toxranges_max
= 1;
16870 dtrace_toxranges_max
<<= 1;
16873 nsize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
16874 range
= kmem_zalloc(nsize
, KM_SLEEP
);
16876 if (dtrace_toxrange
!= NULL
) {
16877 ASSERT(osize
!= 0);
16878 bcopy(dtrace_toxrange
, range
, osize
);
16879 kmem_free(dtrace_toxrange
, osize
);
16882 dtrace_toxrange
= range
;
16885 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base
== NULL
);
16886 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit
== NULL
);
16888 dtrace_toxrange
[dtrace_toxranges
].dtt_base
= base
;
16889 dtrace_toxrange
[dtrace_toxranges
].dtt_limit
= limit
;
16890 dtrace_toxranges
++;
16894 * DTrace Driver Cookbook Functions
16898 dtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
16900 #pragma unused(cmd) /* __APPLE__ */
16901 dtrace_provider_id_t id
;
16902 dtrace_state_t
*state
= NULL
;
16903 dtrace_enabling_t
*enab
;
16905 lck_mtx_lock(&cpu_lock
);
16906 lck_mtx_lock(&dtrace_provider_lock
);
16907 lck_mtx_lock(&dtrace_lock
);
16909 if (ddi_soft_state_init(&dtrace_softstate
,
16910 sizeof (dtrace_state_t
), 0) != 0) {
16911 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state");
16912 lck_mtx_unlock(&cpu_lock
);
16913 lck_mtx_unlock(&dtrace_provider_lock
);
16914 lck_mtx_unlock(&dtrace_lock
);
16915 return (DDI_FAILURE
);
16918 #if !defined(__APPLE__)
16919 if (ddi_create_minor_node(devi
, DTRACEMNR_DTRACE
, S_IFCHR
,
16920 DTRACEMNRN_DTRACE
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
||
16921 ddi_create_minor_node(devi
, DTRACEMNR_HELPER
, S_IFCHR
,
16922 DTRACEMNRN_HELPER
, DDI_PSEUDO
, NULL
) == DDI_FAILURE
) {
16923 cmn_err(CE_NOTE
, "/dev/dtrace couldn't create minor nodes");
16924 ddi_remove_minor_node(devi
, NULL
);
16925 ddi_soft_state_fini(&dtrace_softstate
);
16926 lck_mtx_unlock(&cpu_lock
);
16927 lck_mtx_unlock(&dtrace_provider_lock
);
16928 lck_mtx_unlock(&dtrace_lock
);
16929 return (DDI_FAILURE
);
16932 /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */
16933 #endif /* __APPLE__ */
16935 ddi_report_dev(devi
);
16936 dtrace_devi
= devi
;
16938 dtrace_modload
= dtrace_module_loaded
;
16939 dtrace_modunload
= dtrace_module_unloaded
;
16940 dtrace_cpu_init
= dtrace_cpu_setup_initial
;
16941 dtrace_helpers_cleanup
= dtrace_helpers_destroy
;
16942 dtrace_helpers_fork
= dtrace_helpers_duplicate
;
16943 dtrace_cpustart_init
= dtrace_suspend
;
16944 dtrace_cpustart_fini
= dtrace_resume
;
16945 dtrace_debugger_init
= dtrace_suspend
;
16946 dtrace_debugger_fini
= dtrace_resume
;
16948 register_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
16950 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
16952 dtrace_arena
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1,
16953 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
16954 dtrace_minor
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
,
16955 UINT32_MAX
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
16956 VM_SLEEP
| VMC_IDENTIFIER
);
16957 dtrace_taskq
= taskq_create("dtrace_taskq", 1, maxclsyspri
,
16960 dtrace_state_cache
= kmem_cache_create("dtrace_state_cache",
16961 sizeof (dtrace_dstate_percpu_t
) * (int)NCPU
, DTRACE_STATE_ALIGN
,
16962 NULL
, NULL
, NULL
, NULL
, NULL
, 0);
16964 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
16965 dtrace_bymod
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
),
16966 offsetof(dtrace_probe_t
, dtpr_nextmod
),
16967 offsetof(dtrace_probe_t
, dtpr_prevmod
));
16969 dtrace_byfunc
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
),
16970 offsetof(dtrace_probe_t
, dtpr_nextfunc
),
16971 offsetof(dtrace_probe_t
, dtpr_prevfunc
));
16973 dtrace_byname
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
),
16974 offsetof(dtrace_probe_t
, dtpr_nextname
),
16975 offsetof(dtrace_probe_t
, dtpr_prevname
));
16977 if (dtrace_retain_max
< 1) {
16978 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; "
16979 "setting to 1", dtrace_retain_max
);
16980 dtrace_retain_max
= 1;
16984 * Now discover our toxic ranges.
16986 dtrace_toxic_ranges(dtrace_toxrange_add
);
16989 * Before we register ourselves as a provider to our own framework,
16990 * we would like to assert that dtrace_provider is NULL -- but that's
16991 * not true if we were loaded as a dependency of a DTrace provider.
16992 * Once we've registered, we can assert that dtrace_provider is our
16995 (void) dtrace_register("dtrace", &dtrace_provider_attr
,
16996 DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
);
16998 ASSERT(dtrace_provider
!= NULL
);
16999 ASSERT((dtrace_provider_id_t
)dtrace_provider
== id
);
17001 #if !defined(__APPLE__)
17002 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
17003 dtrace_provider
, NULL
, NULL
, "BEGIN", 0, NULL
);
17004 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
17005 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
17006 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
17007 dtrace_provider
, NULL
, NULL
, "ERROR", 1, NULL
);
17008 #elif defined (__x86_64__)
17009 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
17010 dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
);
17011 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
17012 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
17013 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
17014 dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
);
17016 #error Unknown Architecture
17017 #endif /* __APPLE__ */
17019 dtrace_anon_property();
17020 lck_mtx_unlock(&cpu_lock
);
17023 * If DTrace helper tracing is enabled, we need to allocate the
17024 * trace buffer and initialize the values.
17026 if (dtrace_helptrace_enabled
) {
17027 ASSERT(dtrace_helptrace_buffer
== NULL
);
17028 dtrace_helptrace_buffer
=
17029 kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
);
17030 dtrace_helptrace_next
= 0;
17034 * If there are already providers, we must ask them to provide their
17035 * probes, and then match any anonymous enabling against them. Note
17036 * that there should be no other retained enablings at this time:
17037 * the only retained enablings at this time should be the anonymous
17040 if (dtrace_anon
.dta_enabling
!= NULL
) {
17041 ASSERT(dtrace_retained
== dtrace_anon
.dta_enabling
);
17043 #if defined(__APPLE__)
17045 * If there is anonymous dof, we should switch symbol modes.
17047 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) {
17048 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
;
17052 dtrace_enabling_provide(NULL
);
17053 state
= dtrace_anon
.dta_state
;
17056 * We couldn't hold cpu_lock across the above call to
17057 * dtrace_enabling_provide(), but we must hold it to actually
17058 * enable the probes. We have to drop all of our locks, pick
17059 * up cpu_lock, and regain our locks before matching the
17060 * retained anonymous enabling.
17062 lck_mtx_unlock(&dtrace_lock
);
17063 lck_mtx_unlock(&dtrace_provider_lock
);
17065 lck_mtx_lock(&cpu_lock
);
17066 lck_mtx_lock(&dtrace_provider_lock
);
17067 lck_mtx_lock(&dtrace_lock
);
17069 if ((enab
= dtrace_anon
.dta_enabling
) != NULL
)
17070 (void) dtrace_enabling_match(enab
, NULL
);
17072 lck_mtx_unlock(&cpu_lock
);
17075 lck_mtx_unlock(&dtrace_lock
);
17076 lck_mtx_unlock(&dtrace_provider_lock
);
17078 if (state
!= NULL
) {
17080 * If we created any anonymous state, set it going now.
17082 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
);
17085 return (DDI_SUCCESS
);
17090 dtrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
17092 #pragma unused(flag, otyp)
17093 dtrace_state_t
*state
;
17097 #if defined (__APPLE__)
17099 #endif /* __APPLE__ */
17101 #if !defined(__APPLE__)
17102 if (getminor(*devp
) == DTRACEMNRN_HELPER
)
17106 * If this wasn't an open with the "helper" minor, then it must be
17107 * the "dtrace" minor.
17109 if (getminor(*devp
) != DTRACEMNRN_DTRACE
)
17112 /* Darwin puts Helper on its own major device. */
17113 #endif /* __APPLE__ */
17116 * If no DTRACE_PRIV_* bits are set in the credential, then the
17117 * caller lacks sufficient permission to do anything with DTrace.
17119 dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
);
17120 if (priv
== DTRACE_PRIV_NONE
)
17123 #if defined(__APPLE__)
17125 * We delay the initialization of fasttrap as late as possible.
17126 * It certainly can't be later than now!
17129 #endif /* __APPLE__ */
17132 * Ask all providers to provide all their probes.
17134 lck_mtx_lock(&dtrace_provider_lock
);
17135 dtrace_probe_provide(NULL
, NULL
);
17136 lck_mtx_unlock(&dtrace_provider_lock
);
17138 lck_mtx_lock(&cpu_lock
);
17139 lck_mtx_lock(&dtrace_lock
);
17141 dtrace_membar_producer();
17144 * If the kernel debugger is active (that is, if the kernel debugger
17145 * modified text in some way), we won't allow the open.
17147 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
17149 lck_mtx_unlock(&cpu_lock
);
17150 lck_mtx_unlock(&dtrace_lock
);
17154 #if !defined(__APPLE__)
17155 state
= dtrace_state_create(devp
, cred_p
);
17156 lck_mtx_unlock(&cpu_lock
);
17158 if (state
== NULL
) {
17159 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
17160 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
17161 lck_mtx_unlock(&dtrace_lock
);
17165 lck_mtx_unlock(&dtrace_lock
);
17167 rv
= dtrace_state_create(devp
, cred_p
, &state
);
17168 lck_mtx_unlock(&cpu_lock
);
17170 if (rv
!= 0 || state
== NULL
) {
17171 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
17172 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
17173 lck_mtx_unlock(&dtrace_lock
);
17174 /* propagate EAGAIN or ERESTART */
17178 lck_mtx_unlock(&dtrace_lock
);
17180 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
17183 * If we are currently lazy, transition states.
17185 * Unlike dtrace_close, we do not need to check the
17186 * value of dtrace_opens, as any positive value (and
17187 * we count as 1) means we transition states.
17189 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
) {
17190 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_OFF
;
17193 * Iterate all existing processes and load lazy dofs.
17195 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
,
17196 dtrace_lazy_dofs_proc_iterate_doit
,
17198 dtrace_lazy_dofs_proc_iterate_filter
,
17202 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
17205 * Update kernel symbol state.
17207 * We must own the provider and dtrace locks.
17209 * NOTE! It may appear there is a race by setting this value so late
17210 * after dtrace_probe_provide. However, any kext loaded after the
17211 * call to probe provide and before we set LAZY_OFF will be marked as
17212 * eligible for symbols from userspace. The same dtrace that is currently
17213 * calling dtrace_open() (this call!) will get a list of kexts needing
17214 * symbols and fill them in, thus closing the race window.
17216 * We want to set this value only after it certain it will succeed, as
17217 * this significantly reduces the complexity of error exits.
17219 lck_mtx_lock(&dtrace_lock
);
17220 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) {
17221 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
;
17223 lck_mtx_unlock(&dtrace_lock
);
17224 #endif /* __APPLE__ */
17231 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
17233 #pragma unused(flag, otyp, cred_p) /* __APPLE__ */
17234 minor_t minor
= getminor(dev
);
17235 dtrace_state_t
*state
;
17237 #if !defined(__APPLE__)
17238 if (minor
== DTRACEMNRN_HELPER
)
17241 /* Darwin puts Helper on its own major device. */
17242 #endif /* __APPLE__ */
17244 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
17246 lck_mtx_lock(&cpu_lock
);
17247 lck_mtx_lock(&dtrace_lock
);
17249 if (state
->dts_anon
) {
17251 * There is anonymous state. Destroy that first.
17253 ASSERT(dtrace_anon
.dta_state
== NULL
);
17254 dtrace_state_destroy(state
->dts_anon
);
17257 dtrace_state_destroy(state
);
17258 ASSERT(dtrace_opens
> 0);
17261 * Only relinquish control of the kernel debugger interface when there
17262 * are no consumers and no anonymous enablings.
17264 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
17265 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
17267 lck_mtx_unlock(&dtrace_lock
);
17268 lck_mtx_unlock(&cpu_lock
);
17270 #if defined(__APPLE__)
17272 * Lock ordering requires the dof mode lock be taken before
17275 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
17276 lck_mtx_lock(&dtrace_lock
);
17278 if (dtrace_opens
== 0) {
17280 * If we are currently lazy-off, and this is the last close, transition to
17283 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
) {
17284 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
17288 * If we are the last dtrace client, switch back to lazy (from userspace) symbols
17290 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
) {
17291 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
;
17295 lck_mtx_unlock(&dtrace_lock
);
17296 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
17299 * Kext probes may be retained past the end of the kext's lifespan. The
17300 * probes are kept until the last reference to them has been removed.
17301 * Since closing an active dtrace context is likely to drop that last reference,
17302 * lets take a shot at cleaning out the orphaned probes now.
17304 dtrace_module_unloaded(NULL
);
17305 #endif /* __APPLE__ */
17310 #if !defined(__APPLE__)
17313 dtrace_ioctl_helper(int cmd
, intptr_t arg
, int *rv
)
17316 dof_helper_t help
, *dhp
= NULL
;
17319 case DTRACEHIOC_ADDDOF
:
17320 if (copyin((void *)arg
, &help
, sizeof (help
)) != 0) {
17321 dtrace_dof_error(NULL
, "failed to copyin DOF helper");
17326 arg
= (intptr_t)help
.dofhp_dof
;
17329 case DTRACEHIOC_ADD
: {
17330 dof_hdr_t
*dof
= dtrace_dof_copyin(arg
, &rval
);
17335 mutex_enter(&dtrace_lock
);
17338 * dtrace_helper_slurp() takes responsibility for the dof --
17339 * it may free it now or it may save it and free it later.
17341 if ((rval
= dtrace_helper_slurp(dof
, dhp
)) != -1) {
17348 mutex_exit(&dtrace_lock
);
17352 case DTRACEHIOC_REMOVE
: {
17353 mutex_enter(&dtrace_lock
);
17354 rval
= dtrace_helper_destroygen(arg
);
17355 mutex_exit(&dtrace_lock
);
17369 dtrace_ioctl(dev_t dev
, u_long cmd
, intptr_t arg
, int md
, cred_t
*cr
, int *rv
)
17371 minor_t minor
= getminor(dev
);
17372 dtrace_state_t
*state
;
17375 if (minor
== DTRACEMNRN_HELPER
)
17376 return (dtrace_ioctl_helper(cmd
, arg
, rv
));
17378 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
17380 if (state
->dts_anon
) {
17381 ASSERT(dtrace_anon
.dta_state
== NULL
);
17382 state
= state
->dts_anon
;
17386 case DTRACEIOC_PROVIDER
: {
17387 dtrace_providerdesc_t pvd
;
17388 dtrace_provider_t
*pvp
;
17390 if (copyin((void *)arg
, &pvd
, sizeof (pvd
)) != 0)
17393 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
17394 lck_mtx_lock(&dtrace_provider_lock
);
17396 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
17397 if (strcmp(pvp
->dtpv_name
, pvd
.dtvd_name
) == 0)
17401 lck_mtx_unlock(&dtrace_provider_lock
);
17406 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
17407 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
17408 if (copyout(&pvd
, (void *)arg
, sizeof (pvd
)) != 0)
17414 case DTRACEIOC_EPROBE
: {
17415 dtrace_eprobedesc_t epdesc
;
17417 dtrace_action_t
*act
;
17423 if (copyin((void *)arg
, &epdesc
, sizeof (epdesc
)) != 0)
17426 lck_mtx_lock(&dtrace_lock
);
17428 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
17429 lck_mtx_unlock(&dtrace_lock
);
17433 if (ecb
->dte_probe
== NULL
) {
17434 lck_mtx_unlock(&dtrace_lock
);
17438 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
17439 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
17440 epdesc
.dtepd_size
= ecb
->dte_size
;
17442 nrecs
= epdesc
.dtepd_nrecs
;
17443 epdesc
.dtepd_nrecs
= 0;
17444 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
17445 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
17448 epdesc
.dtepd_nrecs
++;
17452 * Now that we have the size, we need to allocate a temporary
17453 * buffer in which to store the complete description. We need
17454 * the temporary buffer to be able to drop dtrace_lock()
17455 * across the copyout(), below.
17457 size
= sizeof (dtrace_eprobedesc_t
) +
17458 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
17460 buf
= kmem_alloc(size
, KM_SLEEP
);
17461 dest
= (uintptr_t)buf
;
17463 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
17464 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
17466 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
17467 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
17473 bcopy(&act
->dta_rec
, (void *)dest
,
17474 sizeof (dtrace_recdesc_t
));
17475 dest
+= sizeof (dtrace_recdesc_t
);
17478 lck_mtx_unlock(&dtrace_lock
);
17480 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
17481 kmem_free(buf
, size
);
17485 kmem_free(buf
, size
);
17489 case DTRACEIOC_AGGDESC
: {
17490 dtrace_aggdesc_t aggdesc
;
17491 dtrace_action_t
*act
;
17492 dtrace_aggregation_t
*agg
;
17495 dtrace_recdesc_t
*lrec
;
17500 if (copyin((void *)arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
17503 lck_mtx_lock(&dtrace_lock
);
17505 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
17506 lck_mtx_unlock(&dtrace_lock
);
17510 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
17512 nrecs
= aggdesc
.dtagd_nrecs
;
17513 aggdesc
.dtagd_nrecs
= 0;
17515 offs
= agg
->dtag_base
;
17516 lrec
= &agg
->dtag_action
.dta_rec
;
17517 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
17519 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
17520 ASSERT(act
->dta_intuple
||
17521 DTRACEACT_ISAGG(act
->dta_kind
));
17524 * If this action has a record size of zero, it
17525 * denotes an argument to the aggregating action.
17526 * Because the presence of this record doesn't (or
17527 * shouldn't) affect the way the data is interpreted,
17528 * we don't copy it out to save user-level the
17529 * confusion of dealing with a zero-length record.
17531 if (act
->dta_rec
.dtrd_size
== 0) {
17532 ASSERT(agg
->dtag_hasarg
);
17536 aggdesc
.dtagd_nrecs
++;
17538 if (act
== &agg
->dtag_action
)
17543 * Now that we have the size, we need to allocate a temporary
17544 * buffer in which to store the complete description. We need
17545 * the temporary buffer to be able to drop dtrace_lock()
17546 * across the copyout(), below.
17548 size
= sizeof (dtrace_aggdesc_t
) +
17549 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
17551 buf
= kmem_alloc(size
, KM_SLEEP
);
17552 dest
= (uintptr_t)buf
;
17554 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
17555 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
17557 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
17558 dtrace_recdesc_t rec
= act
->dta_rec
;
17561 * See the comment in the above loop for why we pass
17562 * over zero-length records.
17564 if (rec
.dtrd_size
== 0) {
17565 ASSERT(agg
->dtag_hasarg
);
17572 rec
.dtrd_offset
-= offs
;
17573 bcopy(&rec
, (void *)dest
, sizeof (rec
));
17574 dest
+= sizeof (dtrace_recdesc_t
);
17576 if (act
== &agg
->dtag_action
)
17580 lck_mtx_unlock(&dtrace_lock
);
17582 if (copyout(buf
, (void *)arg
, dest
- (uintptr_t)buf
) != 0) {
17583 kmem_free(buf
, size
);
17587 kmem_free(buf
, size
);
17591 case DTRACEIOC_ENABLE
: {
17593 dtrace_enabling_t
*enab
= NULL
;
17594 dtrace_vstate_t
*vstate
;
17600 * If a NULL argument has been passed, we take this as our
17601 * cue to reevaluate our enablings.
17604 dtrace_enabling_matchall();
17609 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
17612 lck_mtx_lock(&cpu_lock
);
17613 lck_mtx_lock(&dtrace_lock
);
17614 vstate
= &state
->dts_vstate
;
17616 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
17617 lck_mtx_unlock(&dtrace_lock
);
17618 lck_mtx_unlock(&cpu_lock
);
17619 dtrace_dof_destroy(dof
);
17623 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
17624 lck_mtx_unlock(&dtrace_lock
);
17625 lck_mtx_unlock(&cpu_lock
);
17626 dtrace_dof_destroy(dof
);
17630 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
17631 dtrace_enabling_destroy(enab
);
17632 lck_mtx_unlock(&dtrace_lock
);
17633 lck_mtx_unlock(&cpu_lock
);
17634 dtrace_dof_destroy(dof
);
17638 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
17639 err
= dtrace_enabling_retain(enab
);
17641 dtrace_enabling_destroy(enab
);
17644 lck_mtx_unlock(&cpu_lock
);
17645 lck_mtx_unlock(&dtrace_lock
);
17646 dtrace_dof_destroy(dof
);
17651 case DTRACEIOC_REPLICATE
: {
17652 dtrace_repldesc_t desc
;
17653 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
17654 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
17657 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
17660 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
17661 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
17662 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
17663 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
17665 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
17666 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
17667 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
17668 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
17670 lck_mtx_lock(&dtrace_lock
);
17671 err
= dtrace_enabling_replicate(state
, match
, create
);
17672 lck_mtx_unlock(&dtrace_lock
);
17677 case DTRACEIOC_PROBEMATCH
:
17678 case DTRACEIOC_PROBES
: {
17679 dtrace_probe_t
*probe
= NULL
;
17680 dtrace_probedesc_t desc
;
17681 dtrace_probekey_t pkey
;
17688 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
17691 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
17692 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
17693 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
17694 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
17697 * Before we attempt to match this probe, we want to give
17698 * all providers the opportunity to provide it.
17700 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
17701 lck_mtx_lock(&dtrace_provider_lock
);
17702 dtrace_probe_provide(&desc
, NULL
);
17703 lck_mtx_unlock(&dtrace_provider_lock
);
17707 if (cmd
== DTRACEIOC_PROBEMATCH
) {
17708 dtrace_probekey(&desc
, &pkey
);
17709 pkey
.dtpk_id
= DTRACE_IDNONE
;
17712 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
17714 lck_mtx_lock(&dtrace_lock
);
17716 if (cmd
== DTRACEIOC_PROBEMATCH
) {
17717 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
17718 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
17719 (m
= dtrace_match_probe(probe
, &pkey
,
17720 priv
, uid
, zoneid
)) != 0)
17725 lck_mtx_unlock(&dtrace_lock
);
17730 for (i
= desc
.dtpd_id
; i
<= dtrace_nprobes
; i
++) {
17731 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
17732 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
17737 if (probe
== NULL
) {
17738 lck_mtx_unlock(&dtrace_lock
);
17742 dtrace_probe_description(probe
, &desc
);
17743 lck_mtx_unlock(&dtrace_lock
);
17745 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
17751 case DTRACEIOC_PROBEARG
: {
17752 dtrace_argdesc_t desc
;
17753 dtrace_probe_t
*probe
;
17754 dtrace_provider_t
*prov
;
17756 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
17759 if (desc
.dtargd_id
== DTRACE_IDNONE
)
17762 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
17765 lck_mtx_lock(&dtrace_provider_lock
);
17766 lck_mtx_lock(&mod_lock
);
17767 lck_mtx_lock(&dtrace_lock
);
17769 if (desc
.dtargd_id
> dtrace_nprobes
) {
17770 lck_mtx_unlock(&dtrace_lock
);
17771 lck_mtx_unlock(&mod_lock
);
17772 lck_mtx_unlock(&dtrace_provider_lock
);
17776 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
17777 lck_mtx_unlock(&dtrace_lock
);
17778 lck_mtx_unlock(&mod_lock
);
17779 lck_mtx_unlock(&dtrace_provider_lock
);
17783 lck_mtx_unlock(&dtrace_lock
);
17785 prov
= probe
->dtpr_provider
;
17787 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
17789 * There isn't any typed information for this probe.
17790 * Set the argument number to DTRACE_ARGNONE.
17792 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
17794 desc
.dtargd_native
[0] = '\0';
17795 desc
.dtargd_xlate
[0] = '\0';
17796 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
17798 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
17799 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
17802 lck_mtx_unlock(&mod_lock
);
17803 lck_mtx_unlock(&dtrace_provider_lock
);
17805 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
17811 case DTRACEIOC_GO
: {
17812 processorid_t cpuid
;
17813 rval
= dtrace_state_go(state
, &cpuid
);
17818 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
17824 case DTRACEIOC_STOP
: {
17825 processorid_t cpuid
;
17827 lck_mtx_lock(&dtrace_lock
);
17828 rval
= dtrace_state_stop(state
, &cpuid
);
17829 lck_mtx_unlock(&dtrace_lock
);
17834 if (copyout(&cpuid
, (void *)arg
, sizeof (cpuid
)) != 0)
17840 case DTRACEIOC_DOFGET
: {
17841 dof_hdr_t hdr
, *dof
;
17844 if (copyin((void *)arg
, &hdr
, sizeof (hdr
)) != 0)
17847 lck_mtx_lock(&dtrace_lock
);
17848 dof
= dtrace_dof_create(state
);
17849 lck_mtx_unlock(&dtrace_lock
);
17851 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
17852 rval
= copyout(dof
, (void *)arg
, len
);
17853 dtrace_dof_destroy(dof
);
17855 return (rval
== 0 ? 0 : EFAULT
);
17858 case DTRACEIOC_AGGSNAP
:
17859 case DTRACEIOC_BUFSNAP
: {
17860 dtrace_bufdesc_t desc
;
17862 dtrace_buffer_t
*buf
;
17864 if (copyin((void *)arg
, &desc
, sizeof (desc
)) != 0)
17867 if (desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= NCPU
)
17870 lck_mtx_lock(&dtrace_lock
);
17872 if (cmd
== DTRACEIOC_BUFSNAP
) {
17873 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
17875 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
17878 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
17879 size_t sz
= buf
->dtb_offset
;
17881 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
17882 lck_mtx_unlock(&dtrace_lock
);
17887 * If this buffer has already been consumed, we're
17888 * going to indicate that there's nothing left here
17891 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
17892 lck_mtx_unlock(&dtrace_lock
);
17894 desc
.dtbd_size
= 0;
17895 desc
.dtbd_drops
= 0;
17896 desc
.dtbd_errors
= 0;
17897 desc
.dtbd_oldest
= 0;
17898 sz
= sizeof (desc
);
17900 if (copyout(&desc
, (void *)arg
, sz
) != 0)
17907 * If this is a ring buffer that has wrapped, we want
17908 * to copy the whole thing out.
17910 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
17911 dtrace_buffer_polish(buf
);
17912 sz
= buf
->dtb_size
;
17915 if (copyout(buf
->dtb_tomax
, desc
.dtbd_data
, sz
) != 0) {
17916 lck_mtx_unlock(&dtrace_lock
);
17920 desc
.dtbd_size
= sz
;
17921 desc
.dtbd_drops
= buf
->dtb_drops
;
17922 desc
.dtbd_errors
= buf
->dtb_errors
;
17923 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
17925 lck_mtx_unlock(&dtrace_lock
);
17927 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
17930 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
17935 if (buf
->dtb_tomax
== NULL
) {
17936 ASSERT(buf
->dtb_xamot
== NULL
);
17937 lck_mtx_unlock(&dtrace_lock
);
17941 cached
= buf
->dtb_tomax
;
17942 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
17944 dtrace_xcall(desc
.dtbd_cpu
,
17945 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
17947 state
->dts_errors
+= buf
->dtb_xamot_errors
;
17950 * If the buffers did not actually switch, then the cross call
17951 * did not take place -- presumably because the given CPU is
17952 * not in the ready set. If this is the case, we'll return
17955 if (buf
->dtb_tomax
== cached
) {
17956 ASSERT(buf
->dtb_xamot
!= cached
);
17957 lck_mtx_unlock(&dtrace_lock
);
17961 ASSERT(cached
== buf
->dtb_xamot
);
17964 * We have our snapshot; now copy it out.
17966 if (copyout(buf
->dtb_xamot
, desc
.dtbd_data
,
17967 buf
->dtb_xamot_offset
) != 0) {
17968 lck_mtx_unlock(&dtrace_lock
);
17972 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
17973 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
17974 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
17975 desc
.dtbd_oldest
= 0;
17977 lck_mtx_unlock(&dtrace_lock
);
17980 * Finally, copy out the buffer description.
17982 if (copyout(&desc
, (void *)arg
, sizeof (desc
)) != 0)
17988 case DTRACEIOC_CONF
: {
17989 dtrace_conf_t conf
;
17991 bzero(&conf
, sizeof (conf
));
17992 conf
.dtc_difversion
= DIF_VERSION
;
17993 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
17994 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
17995 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
17997 if (copyout(&conf
, (void *)arg
, sizeof (conf
)) != 0)
18003 case DTRACEIOC_STATUS
: {
18004 dtrace_status_t stat
;
18005 dtrace_dstate_t
*dstate
;
18010 * See the comment in dtrace_state_deadman() for the reason
18011 * for setting dts_laststatus to INT64_MAX before setting
18012 * it to the correct value.
18014 state
->dts_laststatus
= INT64_MAX
;
18015 dtrace_membar_producer();
18016 state
->dts_laststatus
= dtrace_gethrtime();
18018 bzero(&stat
, sizeof (stat
));
18020 lck_mtx_lock(&dtrace_lock
);
18022 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
18023 lck_mtx_unlock(&dtrace_lock
);
18027 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
18028 stat
.dtst_exiting
= 1;
18030 nerrs
= state
->dts_errors
;
18031 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
18033 for (i
= 0; i
< NCPU
; i
++) {
18034 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
18036 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
18037 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
18038 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
18040 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
18041 stat
.dtst_filled
++;
18043 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
18045 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
18046 dtrace_speculation_t
*spec
;
18047 dtrace_buffer_t
*buf
;
18049 spec
= &state
->dts_speculations
[j
];
18050 buf
= &spec
->dtsp_buffer
[i
];
18051 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
18055 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
18056 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
18057 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
18058 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
18060 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
18061 stat
.dtst_errors
= nerrs
;
18063 lck_mtx_unlock(&dtrace_lock
);
18065 if (copyout(&stat
, (void *)arg
, sizeof (stat
)) != 0)
18071 case DTRACEIOC_FORMAT
: {
18072 dtrace_fmtdesc_t fmt
;
18076 if (copyin((void *)arg
, &fmt
, sizeof (fmt
)) != 0)
18079 lck_mtx_lock(&dtrace_lock
);
18081 if (fmt
.dtfd_format
== 0 ||
18082 fmt
.dtfd_format
> state
->dts_nformats
) {
18083 lck_mtx_unlock(&dtrace_lock
);
18088 * Format strings are allocated contiguously and they are
18089 * never freed; if a format index is less than the number
18090 * of formats, we can assert that the format map is non-NULL
18091 * and that the format for the specified index is non-NULL.
18093 ASSERT(state
->dts_formats
!= NULL
);
18094 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
18095 ASSERT(str
!= NULL
);
18097 len
= strlen(str
) + 1;
18099 if (len
> fmt
.dtfd_length
) {
18100 fmt
.dtfd_length
= len
;
18102 if (copyout(&fmt
, (void *)arg
, sizeof (fmt
)) != 0) {
18103 lck_mtx_unlock(&dtrace_lock
);
18107 if (copyout(str
, fmt
.dtfd_string
, len
) != 0) {
18108 lck_mtx_unlock(&dtrace_lock
);
18113 lck_mtx_unlock(&dtrace_lock
);
18126 dtrace_ioctl_helper(u_long cmd
, caddr_t arg
, int *rv
)
18130 * Safe to check this outside the dof mode lock
18132 if (dtrace_dof_mode
== DTRACE_DOF_MODE_NEVER
)
18133 return KERN_SUCCESS
;
18136 case DTRACEHIOC_ADDDOF
:
18138 dof_helper_t
*dhp
= NULL
;
18139 size_t dof_ioctl_data_size
;
18140 dof_ioctl_data_t
* multi_dof
;
18143 user_addr_t user_address
= *(user_addr_t
*)arg
;
18144 uint64_t dof_count
;
18145 int multi_dof_claimed
= 0;
18146 proc_t
* p
= current_proc();
18149 * Read the number of DOF sections being passed in.
18151 if (copyin(user_address
+ offsetof(dof_ioctl_data_t
, dofiod_count
),
18153 sizeof(dof_count
))) {
18154 dtrace_dof_error(NULL
, "failed to copyin dofiod_count");
18159 * Range check the count.
18161 if (dof_count
== 0 || dof_count
> 1024) {
18162 dtrace_dof_error(NULL
, "dofiod_count is not valid");
18167 * Allocate a correctly sized structure and copyin the data.
18169 dof_ioctl_data_size
= DOF_IOCTL_DATA_T_SIZE(dof_count
);
18170 if ((multi_dof
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)
18173 /* NOTE! We can no longer exit this method via return */
18174 if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) {
18175 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t");
18181 * Check that the count didn't change between the first copyin and the second.
18183 if (multi_dof
->dofiod_count
!= dof_count
) {
18189 * Try to process lazily first.
18191 rval
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
);
18194 * If rval is EACCES, we must be non-lazy.
18196 if (rval
== EACCES
) {
18199 * Process each dof_helper_t
18203 dhp
= &multi_dof
->dofiod_helpers
[i
];
18205 dof_hdr_t
*dof
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
);
18208 lck_mtx_lock(&dtrace_lock
);
18211 * dtrace_helper_slurp() takes responsibility for the dof --
18212 * it may free it now or it may save it and free it later.
18214 if ((dhp
->dofhp_dof
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) {
18218 lck_mtx_unlock(&dtrace_lock
);
18220 } while (++i
< multi_dof
->dofiod_count
&& rval
== 0);
18224 * We need to copyout the multi_dof struct, because it contains
18225 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
18227 * This could certainly be better optimized.
18229 if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) {
18230 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t");
18231 /* Don't overwrite pre-existing error code */
18232 if (rval
== 0) rval
= EFAULT
;
18237 * If we had to allocate struct memory, free it.
18239 if (multi_dof
!= NULL
&& !multi_dof_claimed
) {
18240 kmem_free(multi_dof
, dof_ioctl_data_size
);
18246 case DTRACEHIOC_REMOVE
: {
18247 int generation
= *(int*)arg
;
18248 proc_t
* p
= current_proc();
18253 int rval
= dtrace_lazy_dofs_remove(p
, generation
);
18256 * EACCES means non-lazy
18258 if (rval
== EACCES
) {
18259 lck_mtx_lock(&dtrace_lock
);
18260 rval
= dtrace_helper_destroygen(p
, generation
);
18261 lck_mtx_unlock(&dtrace_lock
);
18276 dtrace_ioctl(dev_t dev
, u_long cmd
, user_addr_t arg
, int md
, cred_t
*cr
, int *rv
)
18279 minor_t minor
= getminor(dev
);
18280 dtrace_state_t
*state
;
18283 /* Darwin puts Helper on its own major device. */
18285 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
18287 if (state
->dts_anon
) {
18288 ASSERT(dtrace_anon
.dta_state
== NULL
);
18289 state
= state
->dts_anon
;
18293 case DTRACEIOC_PROVIDER
: {
18294 dtrace_providerdesc_t pvd
;
18295 dtrace_provider_t
*pvp
;
18297 if (copyin(arg
, &pvd
, sizeof (pvd
)) != 0)
18300 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
18301 lck_mtx_lock(&dtrace_provider_lock
);
18303 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
18304 if (strncmp(pvp
->dtpv_name
, pvd
.dtvd_name
, DTRACE_PROVNAMELEN
) == 0)
18308 lck_mtx_unlock(&dtrace_provider_lock
);
18313 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
18314 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
18315 if (copyout(&pvd
, arg
, sizeof (pvd
)) != 0)
18321 case DTRACEIOC_EPROBE
: {
18322 dtrace_eprobedesc_t epdesc
;
18324 dtrace_action_t
*act
;
18330 if (copyin(arg
, &epdesc
, sizeof (epdesc
)) != 0)
18333 lck_mtx_lock(&dtrace_lock
);
18335 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
18336 lck_mtx_unlock(&dtrace_lock
);
18340 if (ecb
->dte_probe
== NULL
) {
18341 lck_mtx_unlock(&dtrace_lock
);
18345 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
18346 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
18347 epdesc
.dtepd_size
= ecb
->dte_size
;
18349 nrecs
= epdesc
.dtepd_nrecs
;
18350 epdesc
.dtepd_nrecs
= 0;
18351 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
18352 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
18355 epdesc
.dtepd_nrecs
++;
18359 * Now that we have the size, we need to allocate a temporary
18360 * buffer in which to store the complete description. We need
18361 * the temporary buffer to be able to drop dtrace_lock()
18362 * across the copyout(), below.
18364 size
= sizeof (dtrace_eprobedesc_t
) +
18365 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
18367 buf
= kmem_alloc(size
, KM_SLEEP
);
18368 dest
= (uintptr_t)buf
;
18370 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
18371 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
18373 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
18374 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
18380 bcopy(&act
->dta_rec
, (void *)dest
,
18381 sizeof (dtrace_recdesc_t
));
18382 dest
+= sizeof (dtrace_recdesc_t
);
18385 lck_mtx_unlock(&dtrace_lock
);
18387 if (copyout(buf
, arg
, dest
- (uintptr_t)buf
) != 0) {
18388 kmem_free(buf
, size
);
18392 kmem_free(buf
, size
);
18396 case DTRACEIOC_AGGDESC
: {
18397 dtrace_aggdesc_t aggdesc
;
18398 dtrace_action_t
*act
;
18399 dtrace_aggregation_t
*agg
;
18402 dtrace_recdesc_t
*lrec
;
18407 if (copyin(arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
18410 lck_mtx_lock(&dtrace_lock
);
18412 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
18413 lck_mtx_unlock(&dtrace_lock
);
18417 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
18419 nrecs
= aggdesc
.dtagd_nrecs
;
18420 aggdesc
.dtagd_nrecs
= 0;
18422 offs
= agg
->dtag_base
;
18423 lrec
= &agg
->dtag_action
.dta_rec
;
18424 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
18426 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
18427 ASSERT(act
->dta_intuple
||
18428 DTRACEACT_ISAGG(act
->dta_kind
));
18431 * If this action has a record size of zero, it
18432 * denotes an argument to the aggregating action.
18433 * Because the presence of this record doesn't (or
18434 * shouldn't) affect the way the data is interpreted,
18435 * we don't copy it out to save user-level the
18436 * confusion of dealing with a zero-length record.
18438 if (act
->dta_rec
.dtrd_size
== 0) {
18439 ASSERT(agg
->dtag_hasarg
);
18443 aggdesc
.dtagd_nrecs
++;
18445 if (act
== &agg
->dtag_action
)
18450 * Now that we have the size, we need to allocate a temporary
18451 * buffer in which to store the complete description. We need
18452 * the temporary buffer to be able to drop dtrace_lock()
18453 * across the copyout(), below.
18455 size
= sizeof (dtrace_aggdesc_t
) +
18456 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
18458 buf
= kmem_alloc(size
, KM_SLEEP
);
18459 dest
= (uintptr_t)buf
;
18461 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
18462 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
18464 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
18465 dtrace_recdesc_t rec
= act
->dta_rec
;
18468 * See the comment in the above loop for why we pass
18469 * over zero-length records.
18471 if (rec
.dtrd_size
== 0) {
18472 ASSERT(agg
->dtag_hasarg
);
18479 rec
.dtrd_offset
-= offs
;
18480 bcopy(&rec
, (void *)dest
, sizeof (rec
));
18481 dest
+= sizeof (dtrace_recdesc_t
);
18483 if (act
== &agg
->dtag_action
)
18487 lck_mtx_unlock(&dtrace_lock
);
18489 if (copyout(buf
, arg
, dest
- (uintptr_t)buf
) != 0) {
18490 kmem_free(buf
, size
);
18494 kmem_free(buf
, size
);
18498 case DTRACEIOC_ENABLE
: {
18500 dtrace_enabling_t
*enab
= NULL
;
18501 dtrace_vstate_t
*vstate
;
18507 * If a NULL argument has been passed, we take this as our
18508 * cue to reevaluate our enablings.
18511 dtrace_enabling_matchall();
18516 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
18519 lck_mtx_lock(&cpu_lock
);
18520 lck_mtx_lock(&dtrace_lock
);
18521 vstate
= &state
->dts_vstate
;
18523 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
18524 lck_mtx_unlock(&dtrace_lock
);
18525 lck_mtx_unlock(&cpu_lock
);
18526 dtrace_dof_destroy(dof
);
18530 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
18531 lck_mtx_unlock(&dtrace_lock
);
18532 lck_mtx_unlock(&cpu_lock
);
18533 dtrace_dof_destroy(dof
);
18537 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
18538 dtrace_enabling_destroy(enab
);
18539 lck_mtx_unlock(&dtrace_lock
);
18540 lck_mtx_unlock(&cpu_lock
);
18541 dtrace_dof_destroy(dof
);
18545 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
18546 err
= dtrace_enabling_retain(enab
);
18548 dtrace_enabling_destroy(enab
);
18551 lck_mtx_unlock(&cpu_lock
);
18552 lck_mtx_unlock(&dtrace_lock
);
18553 dtrace_dof_destroy(dof
);
18558 case DTRACEIOC_REPLICATE
: {
18559 dtrace_repldesc_t desc
;
18560 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
18561 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
18564 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
18567 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
18568 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
18569 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
18570 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
18572 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
18573 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
18574 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
18575 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
18577 lck_mtx_lock(&dtrace_lock
);
18578 err
= dtrace_enabling_replicate(state
, match
, create
);
18579 lck_mtx_unlock(&dtrace_lock
);
18584 case DTRACEIOC_PROBEMATCH
:
18585 case DTRACEIOC_PROBES
: {
18586 dtrace_probe_t
*probe
= NULL
;
18587 dtrace_probedesc_t desc
;
18588 dtrace_probekey_t pkey
;
18595 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
18598 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
18599 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
18600 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
18601 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
18604 * Before we attempt to match this probe, we want to give
18605 * all providers the opportunity to provide it.
18607 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
18608 lck_mtx_lock(&dtrace_provider_lock
);
18609 dtrace_probe_provide(&desc
, NULL
);
18610 lck_mtx_unlock(&dtrace_provider_lock
);
18614 if (cmd
== DTRACEIOC_PROBEMATCH
) {
18615 dtrace_probekey(&desc
, &pkey
);
18616 pkey
.dtpk_id
= DTRACE_IDNONE
;
18619 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
18621 lck_mtx_lock(&dtrace_lock
);
18623 if (cmd
== DTRACEIOC_PROBEMATCH
) {
18624 /* Quiet compiler warning */
18625 for (i
= desc
.dtpd_id
; i
<= (dtrace_id_t
)dtrace_nprobes
; i
++) {
18626 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
18627 (m
= dtrace_match_probe(probe
, &pkey
,
18628 priv
, uid
, zoneid
)) != 0)
18633 lck_mtx_unlock(&dtrace_lock
);
18638 /* Quiet compiler warning */
18639 for (i
= desc
.dtpd_id
; i
<= (dtrace_id_t
)dtrace_nprobes
; i
++) {
18640 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
18641 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
18646 if (probe
== NULL
) {
18647 lck_mtx_unlock(&dtrace_lock
);
18651 dtrace_probe_description(probe
, &desc
);
18652 lck_mtx_unlock(&dtrace_lock
);
18654 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
18660 case DTRACEIOC_PROBEARG
: {
18661 dtrace_argdesc_t desc
;
18662 dtrace_probe_t
*probe
;
18663 dtrace_provider_t
*prov
;
18665 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
18668 if (desc
.dtargd_id
== DTRACE_IDNONE
)
18671 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
18674 lck_mtx_lock(&dtrace_provider_lock
);
18675 lck_mtx_lock(&mod_lock
);
18676 lck_mtx_lock(&dtrace_lock
);
18678 /* Quiet compiler warning */
18679 if (desc
.dtargd_id
> (dtrace_id_t
)dtrace_nprobes
) {
18680 lck_mtx_unlock(&dtrace_lock
);
18681 lck_mtx_unlock(&mod_lock
);
18682 lck_mtx_unlock(&dtrace_provider_lock
);
18686 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
18687 lck_mtx_unlock(&dtrace_lock
);
18688 lck_mtx_unlock(&mod_lock
);
18689 lck_mtx_unlock(&dtrace_provider_lock
);
18693 lck_mtx_unlock(&dtrace_lock
);
18695 prov
= probe
->dtpr_provider
;
18697 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
18699 * There isn't any typed information for this probe.
18700 * Set the argument number to DTRACE_ARGNONE.
18702 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
18704 desc
.dtargd_native
[0] = '\0';
18705 desc
.dtargd_xlate
[0] = '\0';
18706 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
18708 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
18709 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
18712 lck_mtx_unlock(&mod_lock
);
18713 lck_mtx_unlock(&dtrace_provider_lock
);
18715 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
18721 case DTRACEIOC_GO
: {
18722 processorid_t cpuid
;
18723 rval
= dtrace_state_go(state
, &cpuid
);
18728 if (copyout(&cpuid
, arg
, sizeof (cpuid
)) != 0)
18734 case DTRACEIOC_STOP
: {
18735 processorid_t cpuid
;
18737 lck_mtx_lock(&dtrace_lock
);
18738 rval
= dtrace_state_stop(state
, &cpuid
);
18739 lck_mtx_unlock(&dtrace_lock
);
18744 if (copyout(&cpuid
, arg
, sizeof (cpuid
)) != 0)
18750 case DTRACEIOC_DOFGET
: {
18751 dof_hdr_t hdr
, *dof
;
18754 if (copyin(arg
, &hdr
, sizeof (hdr
)) != 0)
18757 lck_mtx_lock(&dtrace_lock
);
18758 dof
= dtrace_dof_create(state
);
18759 lck_mtx_unlock(&dtrace_lock
);
18761 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
18762 rval
= copyout(dof
, arg
, len
);
18763 dtrace_dof_destroy(dof
);
18765 return (rval
== 0 ? 0 : EFAULT
);
18768 case DTRACEIOC_AGGSNAP
:
18769 case DTRACEIOC_BUFSNAP
: {
18770 dtrace_bufdesc_t desc
;
18772 dtrace_buffer_t
*buf
;
18774 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
18777 if ((int)desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= NCPU
)
18780 lck_mtx_lock(&dtrace_lock
);
18782 if (cmd
== DTRACEIOC_BUFSNAP
) {
18783 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
18785 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
18788 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
18789 size_t sz
= buf
->dtb_offset
;
18791 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
18792 lck_mtx_unlock(&dtrace_lock
);
18797 * If this buffer has already been consumed, we're
18798 * going to indicate that there's nothing left here
18801 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
18802 lck_mtx_unlock(&dtrace_lock
);
18804 desc
.dtbd_size
= 0;
18805 desc
.dtbd_drops
= 0;
18806 desc
.dtbd_errors
= 0;
18807 desc
.dtbd_oldest
= 0;
18808 sz
= sizeof (desc
);
18810 if (copyout(&desc
, arg
, sz
) != 0)
18817 * If this is a ring buffer that has wrapped, we want
18818 * to copy the whole thing out.
18820 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
18821 dtrace_buffer_polish(buf
);
18822 sz
= buf
->dtb_size
;
18825 if (copyout(buf
->dtb_tomax
, (user_addr_t
)desc
.dtbd_data
, sz
) != 0) {
18826 lck_mtx_unlock(&dtrace_lock
);
18830 desc
.dtbd_size
= sz
;
18831 desc
.dtbd_drops
= buf
->dtb_drops
;
18832 desc
.dtbd_errors
= buf
->dtb_errors
;
18833 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
18835 lck_mtx_unlock(&dtrace_lock
);
18837 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
18840 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
18845 if (buf
->dtb_tomax
== NULL
) {
18846 ASSERT(buf
->dtb_xamot
== NULL
);
18847 lck_mtx_unlock(&dtrace_lock
);
18851 cached
= buf
->dtb_tomax
;
18852 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
18854 dtrace_xcall(desc
.dtbd_cpu
,
18855 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
18857 state
->dts_errors
+= buf
->dtb_xamot_errors
;
18860 * If the buffers did not actually switch, then the cross call
18861 * did not take place -- presumably because the given CPU is
18862 * not in the ready set. If this is the case, we'll return
18865 if (buf
->dtb_tomax
== cached
) {
18866 ASSERT(buf
->dtb_xamot
!= cached
);
18867 lck_mtx_unlock(&dtrace_lock
);
18871 ASSERT(cached
== buf
->dtb_xamot
);
18874 * We have our snapshot; now copy it out.
18876 if (copyout(buf
->dtb_xamot
, (user_addr_t
)desc
.dtbd_data
,
18877 buf
->dtb_xamot_offset
) != 0) {
18878 lck_mtx_unlock(&dtrace_lock
);
18882 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
18883 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
18884 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
18885 desc
.dtbd_oldest
= 0;
18887 lck_mtx_unlock(&dtrace_lock
);
18890 * Finally, copy out the buffer description.
18892 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
18898 case DTRACEIOC_CONF
: {
18899 dtrace_conf_t conf
;
18901 bzero(&conf
, sizeof (conf
));
18902 conf
.dtc_difversion
= DIF_VERSION
;
18903 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
18904 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
18905 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
18907 if (copyout(&conf
, arg
, sizeof (conf
)) != 0)
18913 case DTRACEIOC_STATUS
: {
18914 dtrace_status_t stat
;
18915 dtrace_dstate_t
*dstate
;
18920 * See the comment in dtrace_state_deadman() for the reason
18921 * for setting dts_laststatus to INT64_MAX before setting
18922 * it to the correct value.
18924 state
->dts_laststatus
= INT64_MAX
;
18925 dtrace_membar_producer();
18926 state
->dts_laststatus
= dtrace_gethrtime();
18928 bzero(&stat
, sizeof (stat
));
18930 lck_mtx_lock(&dtrace_lock
);
18932 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
18933 lck_mtx_unlock(&dtrace_lock
);
18937 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
18938 stat
.dtst_exiting
= 1;
18940 nerrs
= state
->dts_errors
;
18941 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
18943 for (i
= 0; i
< (int)NCPU
; i
++) {
18944 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
18946 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
18947 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
18948 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
18950 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
18951 stat
.dtst_filled
++;
18953 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
18955 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
18956 dtrace_speculation_t
*spec
;
18957 dtrace_buffer_t
*buf
;
18959 spec
= &state
->dts_speculations
[j
];
18960 buf
= &spec
->dtsp_buffer
[i
];
18961 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
18965 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
18966 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
18967 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
18968 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
18970 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
18971 stat
.dtst_errors
= nerrs
;
18973 lck_mtx_unlock(&dtrace_lock
);
18975 if (copyout(&stat
, arg
, sizeof (stat
)) != 0)
18981 case DTRACEIOC_FORMAT
: {
18982 dtrace_fmtdesc_t fmt
;
18986 if (copyin(arg
, &fmt
, sizeof (fmt
)) != 0)
18989 lck_mtx_lock(&dtrace_lock
);
18991 if (fmt
.dtfd_format
== 0 ||
18992 fmt
.dtfd_format
> state
->dts_nformats
) {
18993 lck_mtx_unlock(&dtrace_lock
);
18998 * Format strings are allocated contiguously and they are
18999 * never freed; if a format index is less than the number
19000 * of formats, we can assert that the format map is non-NULL
19001 * and that the format for the specified index is non-NULL.
19003 ASSERT(state
->dts_formats
!= NULL
);
19004 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
19005 ASSERT(str
!= NULL
);
19007 len
= strlen(str
) + 1;
19009 if (len
> fmt
.dtfd_length
) {
19010 fmt
.dtfd_length
= len
;
19012 if (copyout(&fmt
, arg
, sizeof (fmt
)) != 0) {
19013 lck_mtx_unlock(&dtrace_lock
);
19017 if (copyout(str
, (user_addr_t
)fmt
.dtfd_string
, len
) != 0) {
19018 lck_mtx_unlock(&dtrace_lock
);
19023 lck_mtx_unlock(&dtrace_lock
);
19027 case DTRACEIOC_MODUUIDSLIST
: {
19028 size_t module_uuids_list_size
;
19029 dtrace_module_uuids_list_t
* uuids_list
;
19030 uint64_t dtmul_count
;
19033 * Fail if the kernel symbol mode makes this operation illegal.
19034 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
19035 * for them without holding the dtrace_lock.
19037 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_NEVER
||
19038 dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL
) {
19039 cmn_err(CE_WARN
, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode
);
19044 * Read the number of symbolsdesc structs being passed in.
19046 if (copyin(arg
+ offsetof(dtrace_module_uuids_list_t
, dtmul_count
),
19048 sizeof(dtmul_count
))) {
19049 cmn_err(CE_WARN
, "failed to copyin dtmul_count");
19054 * Range check the count. More than 2k kexts is probably an error.
19056 if (dtmul_count
> 2048) {
19057 cmn_err(CE_WARN
, "dtmul_count is not valid");
19062 * For all queries, we return EINVAL when the user specified
19063 * count does not match the actual number of modules we find
19066 * If the user specified count is zero, then this serves as a
19067 * simple query to count the available modules in need of symbols.
19072 if (dtmul_count
== 0)
19074 lck_mtx_lock(&mod_lock
);
19075 struct modctl
* ctl
= dtrace_modctl_list
;
19077 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
));
19078 if (!MOD_SYMBOLS_DONE(ctl
)) {
19082 ctl
= ctl
->mod_next
;
19084 lck_mtx_unlock(&mod_lock
);
19086 if (copyout(&dtmul_count
, arg
, sizeof (dtmul_count
)) != 0)
19093 * If we reach this point, then we have a request for full list data.
19094 * Allocate a correctly sized structure and copyin the data.
19096 module_uuids_list_size
= DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count
);
19097 if ((uuids_list
= kmem_alloc(module_uuids_list_size
, KM_SLEEP
)) == NULL
)
19100 /* NOTE! We can no longer exit this method via return */
19101 if (copyin(arg
, uuids_list
, module_uuids_list_size
) != 0) {
19102 cmn_err(CE_WARN
, "failed copyin of dtrace_module_uuids_list_t");
19104 goto moduuidslist_cleanup
;
19108 * Check that the count didn't change between the first copyin and the second.
19110 if (uuids_list
->dtmul_count
!= dtmul_count
) {
19112 goto moduuidslist_cleanup
;
19116 * Build the list of UUID's that need symbols
19118 lck_mtx_lock(&mod_lock
);
19122 struct modctl
* ctl
= dtrace_modctl_list
;
19125 * We assume that userspace symbols will be "better" than kernel level symbols,
19126 * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms
19127 * are available, add user syms if the module might use them.
19129 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
));
19130 if (!MOD_SYMBOLS_DONE(ctl
)) {
19131 UUID
* uuid
= &uuids_list
->dtmul_uuid
[dtmul_count
];
19132 if (dtmul_count
++ < uuids_list
->dtmul_count
) {
19133 memcpy(uuid
, ctl
->mod_uuid
, sizeof(UUID
));
19136 ctl
= ctl
->mod_next
;
19139 lck_mtx_unlock(&mod_lock
);
19141 if (uuids_list
->dtmul_count
< dtmul_count
)
19144 uuids_list
->dtmul_count
= dtmul_count
;
19147 * Copyout the symbols list (or at least the count!)
19149 if (copyout(uuids_list
, arg
, module_uuids_list_size
) != 0) {
19150 cmn_err(CE_WARN
, "failed copyout of dtrace_symbolsdesc_list_t");
19154 moduuidslist_cleanup
:
19156 * If we had to allocate struct memory, free it.
19158 if (uuids_list
!= NULL
) {
19159 kmem_free(uuids_list
, module_uuids_list_size
);
19165 case DTRACEIOC_PROVMODSYMS
: {
19166 size_t module_symbols_size
;
19167 dtrace_module_symbols_t
* module_symbols
;
19168 uint64_t dtmodsyms_count
;
19171 * Fail if the kernel symbol mode makes this operation illegal.
19172 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
19173 * for them without holding the dtrace_lock.
19175 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_NEVER
||
19176 dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL
) {
19177 cmn_err(CE_WARN
, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode
);
19182 * Read the number of module symbols structs being passed in.
19184 if (copyin(arg
+ offsetof(dtrace_module_symbols_t
, dtmodsyms_count
),
19186 sizeof(dtmodsyms_count
))) {
19187 cmn_err(CE_WARN
, "failed to copyin dtmodsyms_count");
19192 * Range check the count. How much data can we pass around?
19195 if (dtmodsyms_count
== 0 || (dtmodsyms_count
> 100 * 1024)) {
19196 cmn_err(CE_WARN
, "dtmodsyms_count is not valid");
19201 * Allocate a correctly sized structure and copyin the data.
19203 module_symbols_size
= DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count
);
19204 if ((module_symbols
= kmem_alloc(module_symbols_size
, KM_SLEEP
)) == NULL
)
19209 /* NOTE! We can no longer exit this method via return */
19210 if (copyin(arg
, module_symbols
, module_symbols_size
) != 0) {
19211 cmn_err(CE_WARN
, "failed copyin of dtrace_module_symbols_t, symbol count %llu", module_symbols
->dtmodsyms_count
);
19213 goto module_symbols_cleanup
;
19217 * Check that the count didn't change between the first copyin and the second.
19219 if (module_symbols
->dtmodsyms_count
!= dtmodsyms_count
) {
19221 goto module_symbols_cleanup
;
19225 * Find the modctl to add symbols to.
19227 lck_mtx_lock(&dtrace_provider_lock
);
19228 lck_mtx_lock(&mod_lock
);
19230 struct modctl
* ctl
= dtrace_modctl_list
;
19232 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
));
19233 if (MOD_HAS_UUID(ctl
) && !MOD_SYMBOLS_DONE(ctl
)) {
19234 if (memcmp(module_symbols
->dtmodsyms_uuid
, ctl
->mod_uuid
, sizeof(UUID
)) == 0) {
19236 ctl
->mod_user_symbols
= module_symbols
;
19240 ctl
= ctl
->mod_next
;
19244 dtrace_provider_t
*prv
;
19247 * We're going to call each providers per-module provide operation
19248 * specifying only this module.
19250 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
19251 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
19254 * We gave every provider a chance to provide with the user syms, go ahead and clear them
19256 ctl
->mod_user_symbols
= NULL
; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */
19259 lck_mtx_unlock(&mod_lock
);
19260 lck_mtx_unlock(&dtrace_provider_lock
);
19262 module_symbols_cleanup
:
19264 * If we had to allocate struct memory, free it.
19266 if (module_symbols
!= NULL
) {
19267 kmem_free(module_symbols
, module_symbols_size
);
19279 #endif /* __APPLE__ */
19281 #if !defined(__APPLE__)
19284 dtrace_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
19286 dtrace_state_t
*state
;
19293 return (DDI_SUCCESS
);
19296 return (DDI_FAILURE
);
19299 lck_mtx_lock(&cpu_lock
);
19300 lck_mtx_lock(&dtrace_provider_lock
);
19301 lck_mtx_lock(&dtrace_lock
);
19303 ASSERT(dtrace_opens
== 0);
19305 if (dtrace_helpers
> 0) {
19306 lck_mtx_unlock(&dtrace_provider_lock
);
19307 lck_mtx_unlock(&dtrace_lock
);
19308 lck_mtx_unlock(&cpu_lock
);
19309 return (DDI_FAILURE
);
19312 if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) {
19313 lck_mtx_unlock(&dtrace_provider_lock
);
19314 lck_mtx_unlock(&dtrace_lock
);
19315 lck_mtx_unlock(&cpu_lock
);
19316 return (DDI_FAILURE
);
19319 dtrace_provider
= NULL
;
19321 if ((state
= dtrace_anon_grab()) != NULL
) {
19323 * If there were ECBs on this state, the provider should
19324 * have not been allowed to detach; assert that there is
19327 ASSERT(state
->dts_necbs
== 0);
19328 dtrace_state_destroy(state
);
19331 * If we're being detached with anonymous state, we need to
19332 * indicate to the kernel debugger that DTrace is now inactive.
19334 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
19337 bzero(&dtrace_anon
, sizeof (dtrace_anon_t
));
19338 unregister_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
19339 dtrace_cpu_init
= NULL
;
19340 dtrace_helpers_cleanup
= NULL
;
19341 dtrace_helpers_fork
= NULL
;
19342 dtrace_cpustart_init
= NULL
;
19343 dtrace_cpustart_fini
= NULL
;
19344 dtrace_debugger_init
= NULL
;
19345 dtrace_debugger_fini
= NULL
;
19346 dtrace_kreloc_init
= NULL
;
19347 dtrace_kreloc_fini
= NULL
;
19348 dtrace_modload
= NULL
;
19349 dtrace_modunload
= NULL
;
19351 lck_mtx_unlock(&cpu_lock
);
19353 if (dtrace_helptrace_enabled
) {
19354 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
);
19355 dtrace_helptrace_buffer
= NULL
;
19358 kmem_free(dtrace_probes
, dtrace_nprobes
* sizeof (dtrace_probe_t
*));
19359 dtrace_probes
= NULL
;
19360 dtrace_nprobes
= 0;
19362 dtrace_hash_destroy(dtrace_bymod
);
19363 dtrace_hash_destroy(dtrace_byfunc
);
19364 dtrace_hash_destroy(dtrace_byname
);
19365 dtrace_bymod
= NULL
;
19366 dtrace_byfunc
= NULL
;
19367 dtrace_byname
= NULL
;
19369 kmem_cache_destroy(dtrace_state_cache
);
19370 vmem_destroy(dtrace_minor
);
19371 vmem_destroy(dtrace_arena
);
19373 if (dtrace_toxrange
!= NULL
) {
19374 kmem_free(dtrace_toxrange
,
19375 dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
));
19376 dtrace_toxrange
= NULL
;
19377 dtrace_toxranges
= 0;
19378 dtrace_toxranges_max
= 0;
19381 ddi_remove_minor_node(dtrace_devi
, NULL
);
19382 dtrace_devi
= NULL
;
19384 ddi_soft_state_fini(&dtrace_softstate
);
19386 ASSERT(dtrace_vtime_references
== 0);
19387 ASSERT(dtrace_opens
== 0);
19388 ASSERT(dtrace_retained
== NULL
);
19390 lck_mtx_unlock(&dtrace_lock
);
19391 lck_mtx_unlock(&dtrace_provider_lock
);
19394 * We don't destroy the task queue until after we have dropped our
19395 * locks (taskq_destroy() may block on running tasks). To prevent
19396 * attempting to do work after we have effectively detached but before
19397 * the task queue has been destroyed, all tasks dispatched via the
19398 * task queue must check that DTrace is still attached before
19399 * performing any operation.
19401 taskq_destroy(dtrace_taskq
);
19402 dtrace_taskq
= NULL
;
19404 return (DDI_SUCCESS
);
19409 dtrace_info(dev_info_t
*dip
, ddi_info_cmd_t infocmd
, void *arg
, void **result
)
19414 case DDI_INFO_DEVT2DEVINFO
:
19415 *result
= (void *)dtrace_devi
;
19416 error
= DDI_SUCCESS
;
19418 case DDI_INFO_DEVT2INSTANCE
:
19419 *result
= (void *)0;
19420 error
= DDI_SUCCESS
;
19423 error
= DDI_FAILURE
;
19428 static struct cb_ops dtrace_cb_ops
= {
19429 dtrace_open
, /* open */
19430 dtrace_close
, /* close */
19431 nulldev
, /* strategy */
19432 nulldev
, /* print */
19436 dtrace_ioctl
, /* ioctl */
19437 nodev
, /* devmap */
19439 nodev
, /* segmap */
19440 nochpoll
, /* poll */
19441 ddi_prop_op
, /* cb_prop_op */
19443 D_NEW
| D_MP
/* Driver compatibility flag */
19446 static struct dev_ops dtrace_ops
= {
19447 DEVO_REV
, /* devo_rev */
19449 dtrace_info
, /* get_dev_info */
19450 nulldev
, /* identify */
19451 nulldev
, /* probe */
19452 dtrace_attach
, /* attach */
19453 dtrace_detach
, /* detach */
19455 &dtrace_cb_ops
, /* driver operations */
19456 NULL
, /* bus operations */
19457 nodev
/* dev power */
19460 static struct modldrv modldrv
= {
19461 &mod_driverops
, /* module type (this is a pseudo driver) */
19462 "Dynamic Tracing", /* name of module */
19463 &dtrace_ops
, /* driver ops */
19466 static struct modlinkage modlinkage
= {
19475 return (mod_install(&modlinkage
));
19479 _info(struct modinfo
*modinfop
)
19481 return (mod_info(&modlinkage
, modinfop
));
19487 return (mod_remove(&modlinkage
));
19489 #else /* Darwin BSD driver model. */
19491 d_open_t _dtrace_open
, helper_open
;
19492 d_close_t _dtrace_close
, helper_close
;
19493 d_ioctl_t _dtrace_ioctl
, helper_ioctl
;
19496 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
19499 dev_t locdev
= dev
;
19501 return dtrace_open( &locdev
, flags
, devtype
, CRED());
19505 helper_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
19507 #pragma unused(dev,flags,devtype,p)
19512 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
19515 return dtrace_close( dev
, flags
, devtype
, CRED());
19519 helper_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
19521 #pragma unused(dev,flags,devtype,p)
19526 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
19530 user_addr_t uaddrp
;
19532 if (proc_is64bit(p
))
19533 uaddrp
= *(user_addr_t
*)data
;
19535 uaddrp
= (user_addr_t
) *(uint32_t *)data
;
19537 err
= dtrace_ioctl(dev
, cmd
, uaddrp
, fflag
, CRED(), &rv
);
19539 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
19541 ASSERT( (err
& 0xfffff000) == 0 );
19542 return (err
& 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
19543 } else if (rv
!= 0) {
19544 ASSERT( (rv
& 0xfff00000) == 0 );
19545 return (((rv
& 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
19551 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
19553 #pragma unused(dev,fflag,p)
19556 err
= dtrace_ioctl_helper(cmd
, data
, &rv
);
19557 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
19559 ASSERT( (err
& 0xfffff000) == 0 );
19560 return (err
& 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
19561 } else if (rv
!= 0) {
19562 ASSERT( (rv
& 0xfff00000) == 0 );
19563 return (((rv
& 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
19568 #define HELPER_MAJOR -24 /* let the kernel pick the device number */
19571 * A struct describing which functions will get invoked for certain
19574 static struct cdevsw helper_cdevsw
=
19576 helper_open
, /* open */
19577 helper_close
, /* close */
19578 eno_rdwrt
, /* read */
19579 eno_rdwrt
, /* write */
19580 helper_ioctl
, /* ioctl */
19581 (stop_fcn_t
*)nulldev
, /* stop */
19582 (reset_fcn_t
*)nulldev
, /* reset */
19584 eno_select
, /* select */
19585 eno_mmap
, /* mmap */
19586 eno_strat
, /* strategy */
19587 eno_getc
, /* getc */
19588 eno_putc
, /* putc */
19592 static int helper_majdevno
= 0;
19594 static int gDTraceInited
= 0;
19597 helper_init( void )
19600 * Once the "helper" is initialized, it can take ioctl calls that use locks
19601 * and zones initialized in dtrace_init. Make certain dtrace_init was called
19605 if (!gDTraceInited
) {
19606 panic("helper_init before dtrace_init\n");
19609 if (0 >= helper_majdevno
)
19611 helper_majdevno
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
);
19613 if (helper_majdevno
< 0) {
19614 printf("helper_init: failed to allocate a major number!\n");
19618 if (NULL
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
19619 DTRACEMNR_HELPER
, 0 )) {
19620 printf("dtrace_init: failed to devfs_make_node for helper!\n");
19624 panic("helper_init: called twice!\n");
19627 #undef HELPER_MAJOR
19630 * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected.
19633 dtrace_clone_func(dev_t dev
, int action
)
19635 #pragma unused(dev)
19637 if (action
== DEVFS_CLONE_ALLOC
) {
19638 if (NULL
== dtrace_minor
) /* Arena not created yet!?! */
19642 * Propose a minor number, namely the next number that vmem_alloc() will return.
19643 * Immediately put it back in play by calling vmem_free(). FIXME.
19645 int ret
= (int)(uintptr_t)vmem_alloc(dtrace_minor
, 1, VM_BESTFIT
| VM_SLEEP
);
19647 vmem_free(dtrace_minor
, (void *)(uintptr_t)ret
, 1);
19652 else if (action
== DEVFS_CLONE_FREE
) {
19658 #define DTRACE_MAJOR -24 /* let the kernel pick the device number */
19660 static struct cdevsw dtrace_cdevsw
=
19662 _dtrace_open
, /* open */
19663 _dtrace_close
, /* close */
19664 eno_rdwrt
, /* read */
19665 eno_rdwrt
, /* write */
19666 _dtrace_ioctl
, /* ioctl */
19667 (stop_fcn_t
*)nulldev
, /* stop */
19668 (reset_fcn_t
*)nulldev
, /* reset */
19670 eno_select
, /* select */
19671 eno_mmap
, /* mmap */
19672 eno_strat
, /* strategy */
19673 eno_getc
, /* getc */
19674 eno_putc
, /* putc */
19678 lck_attr_t
* dtrace_lck_attr
;
19679 lck_grp_attr_t
* dtrace_lck_grp_attr
;
19680 lck_grp_t
* dtrace_lck_grp
;
19682 static int gMajDevNo
;
19685 dtrace_init( void )
19687 if (0 == gDTraceInited
) {
19691 * DTrace allocates buffers based on the maximum number
19692 * of enabled cpus. This call avoids any race when finding
19695 ASSERT(dtrace_max_cpus
== 0);
19696 ncpu
= dtrace_max_cpus
= ml_get_max_cpus();
19698 gMajDevNo
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
);
19700 if (gMajDevNo
< 0) {
19701 printf("dtrace_init: failed to allocate a major number!\n");
19706 if (NULL
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
19707 dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) {
19708 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
19713 #if defined(DTRACE_MEMORY_ZONES)
19715 * Initialize the dtrace kalloc-emulation zones.
19717 dtrace_alloc_init();
19718 #endif /* DTRACE_MEMORY_ZONES */
19721 * Allocate the dtrace_probe_t zone
19723 dtrace_probe_t_zone
= zinit(sizeof(dtrace_probe_t
),
19724 1024 * sizeof(dtrace_probe_t
),
19725 sizeof(dtrace_probe_t
),
19726 "dtrace.dtrace_probe_t");
19729 * Create the dtrace lock group and attrs.
19731 dtrace_lck_attr
= lck_attr_alloc_init();
19732 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();
19733 dtrace_lck_grp
= lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr
);
19736 * We have to initialize all locks explicitly
19738 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19739 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19740 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19742 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
);
19744 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19747 * The cpu_core structure consists of per-CPU state available in any context.
19748 * On some architectures, this may mean that the page(s) containing the
19749 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
19750 * is up to the platform to assure that this is performed properly. Note that
19751 * the structure is sized to avoid false sharing.
19753 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19754 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19756 dtrace_modctl_list
= NULL
;
19758 cpu_core
= (cpu_core_t
*)kmem_zalloc( ncpu
* sizeof(cpu_core_t
), KM_SLEEP
);
19759 for (i
= 0; i
< ncpu
; ++i
) {
19760 lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19763 cpu_list
= (dtrace_cpu_t
*)kmem_zalloc( ncpu
* sizeof(dtrace_cpu_t
), KM_SLEEP
);
19764 for (i
= 0; i
< ncpu
; ++i
) {
19765 cpu_list
[i
].cpu_id
= (processorid_t
)i
;
19766 cpu_list
[i
].cpu_next
= &(cpu_list
[(i
+1) % ncpu
]);
19767 lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
19770 lck_mtx_lock(&cpu_lock
);
19771 for (i
= 0; i
< ncpu
; ++i
)
19772 /* FIXME: track CPU configuration a la CHUD Processor Pref Pane. */
19773 dtrace_cpu_setup_initial( (processorid_t
)i
); /* In lieu of register_cpu_setup_func() callback */
19774 lck_mtx_unlock(&cpu_lock
);
19776 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
19781 * See dtrace_impl.h for a description of dof modes.
19782 * The default is lazy dof.
19784 * FIXME: Warn if state is LAZY_OFF? It won't break anything, but
19785 * makes no sense...
19787 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode
, sizeof (dtrace_dof_mode
))) {
19788 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
19792 * Sanity check of dof mode value.
19794 switch (dtrace_dof_mode
) {
19795 case DTRACE_DOF_MODE_NEVER
:
19796 case DTRACE_DOF_MODE_LAZY_ON
:
19797 /* valid modes, but nothing else we need to do */
19800 case DTRACE_DOF_MODE_LAZY_OFF
:
19801 case DTRACE_DOF_MODE_NON_LAZY
:
19802 /* Cannot wait for a dtrace_open to init fasttrap */
19807 /* Invalid, clamp to non lazy */
19808 dtrace_dof_mode
= DTRACE_DOF_MODE_NON_LAZY
;
19814 * See dtrace_impl.h for a description of kernel symbol modes.
19815 * The default is to wait for symbols from userspace (lazy symbols).
19817 if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode
, sizeof (dtrace_kernel_symbol_mode
))) {
19818 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
;
19824 panic("dtrace_init: called twice!\n");
19828 dtrace_postinit(void)
19831 * Called from bsd_init after all provider's *_init() routines have been
19832 * run. That way, anonymous DOF enabled under dtrace_attach() is safe
19835 dtrace_attach( (dev_info_t
*)(uintptr_t)makedev(gMajDevNo
, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
19838 * Add the mach_kernel to the module list for lazy processing
19840 struct kmod_info fake_kernel_kmod
;
19841 memset(&fake_kernel_kmod
, 0, sizeof(fake_kernel_kmod
));
19843 strlcpy(fake_kernel_kmod
.name
, "mach_kernel", sizeof(fake_kernel_kmod
.name
));
19844 fake_kernel_kmod
.id
= 1;
19845 fake_kernel_kmod
.address
= g_kernel_kmod_info
.address
;
19846 fake_kernel_kmod
.size
= g_kernel_kmod_info
.size
;
19848 if (dtrace_module_loaded(&fake_kernel_kmod
, 0) != 0) {
19849 printf("dtrace_postinit: Could not register mach_kernel modctl\n");
19852 (void)OSKextRegisterKextsWithDTrace();
19854 #undef DTRACE_MAJOR
19857 * Routines used to register interest in cpu's being added to or removed
19861 register_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
19863 #pragma unused(ignore1,ignore2)
19867 unregister_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
19869 #pragma unused(ignore1,ignore2)
19871 #endif /* __APPLE__ */