4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
23 * Portions copyright (c) 2011, Joyent, Inc. All rights reserved.
27 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
28 * Use is subject to license terms.
31 /* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */
34 * DTrace - Dynamic Tracing for Solaris
36 * This is the implementation of the Solaris Dynamic Tracing framework
37 * (DTrace). The user-visible interface to DTrace is described at length in
38 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
39 * library, the in-kernel DTrace framework, and the DTrace providers are
40 * described in the block comments in the <sys/dtrace.h> header file. The
41 * internal architecture of DTrace is described in the block comments in the
42 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
43 * implementation very much assume mastery of all of these sources; if one has
44 * an unanswered question about the implementation, one should consult them
47 * The functions here are ordered roughly as follows:
49 * - Probe context functions
50 * - Probe hashing functions
51 * - Non-probe context utility functions
52 * - Matching functions
53 * - Provider-to-Framework API functions
54 * - Probe management functions
55 * - DIF object functions
57 * - Predicate functions
60 * - Enabling functions
62 * - Anonymous enabling functions
63 * - Consumer state functions
66 * - Driver cookbook functions
68 * Each group of functions begins with a block comment labelled the "DTrace
69 * [Group] Functions", allowing one to find each block by searching forward
70 * on capital-f functions.
72 #include <sys/errno.h>
73 #include <sys/types.h>
76 #include <sys/systm.h>
77 #include <sys/dtrace_impl.h>
78 #include <sys/param.h>
79 #include <sys/proc_internal.h>
80 #include <sys/ioctl.h>
81 #include <sys/fcntl.h>
82 #include <miscfs/devfs/devfs.h>
83 #include <sys/malloc.h>
84 #include <sys/kernel_types.h>
85 #include <sys/proc_internal.h>
86 #include <sys/uio_internal.h>
87 #include <sys/kauth.h>
90 #include <mach/exception_types.h>
91 #include <sys/signalvar.h>
92 #include <mach/task.h>
93 #include <kern/zalloc.h>
95 #include <kern/task.h>
96 #include <netinet/in.h>
98 #include <kern/cpu_data.h>
99 extern uint32_t pmap_find_phys(void *, uint64_t);
100 extern boolean_t
pmap_valid_page(uint32_t);
101 extern void OSKextRegisterKextsWithDTrace(void);
102 extern kmod_info_t g_kernel_kmod_info
;
104 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
105 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
107 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
109 extern void dtrace_suspend(void);
110 extern void dtrace_resume(void);
111 extern void dtrace_init(void);
112 extern void helper_init(void);
113 extern void fasttrap_init(void);
114 extern void dtrace_lazy_dofs_duplicate(proc_t
*, proc_t
*);
115 extern void dtrace_lazy_dofs_destroy(proc_t
*);
116 extern void dtrace_postinit(void);
118 #include "../../../osfmk/chud/chud_dtrace.h"
120 extern kern_return_t chudxnu_dtrace_callback
121 (uint64_t selector
, uint64_t *args
, uint32_t count
);
123 /* Import this function to retrieve the physical memory. */
124 extern int kernel_sysctlbyname(const char *name
, void *oldp
,
125 size_t *oldlenp
, void *newp
, size_t newlen
);
128 * DTrace Tunable Variables
130 * The following variables may be dynamically tuned by using sysctl(8), the
131 * variables being stored in the kern.dtrace namespace. For example:
132 * sysctl kern.dtrace.dof_maxsize = 1048575 # 1M
134 * In general, the only variables that one should be tuning this way are those
135 * that affect system-wide DTrace behavior, and for which the default behavior
136 * is undesirable. Most of these variables are tunable on a per-consumer
137 * basis using DTrace options, and need not be tuned on a system-wide basis.
138 * When tuning these variables, avoid pathological values; while some attempt
139 * is made to verify the integrity of these variables, they are not considered
140 * part of the supported interface to DTrace, and they are therefore not
141 * checked comprehensively.
143 uint64_t dtrace_buffer_memory_maxsize
= 0; /* initialized in dtrace_init */
144 uint64_t dtrace_buffer_memory_inuse
= 0;
145 int dtrace_destructive_disallow
= 0;
146 dtrace_optval_t dtrace_nonroot_maxsize
= (16 * 1024 * 1024);
147 size_t dtrace_difo_maxsize
= (256 * 1024);
148 dtrace_optval_t dtrace_dof_maxsize
= (384 * 1024);
149 size_t dtrace_global_maxsize
= (16 * 1024);
150 size_t dtrace_actions_max
= (16 * 1024);
151 size_t dtrace_retain_max
= 1024;
152 dtrace_optval_t dtrace_helper_actions_max
= 32;
153 dtrace_optval_t dtrace_helper_providers_max
= 64;
154 dtrace_optval_t dtrace_dstate_defsize
= (1 * 1024 * 1024);
155 size_t dtrace_strsize_default
= 256;
156 dtrace_optval_t dtrace_cleanrate_default
= 990099000; /* 1.1 hz */
157 dtrace_optval_t dtrace_cleanrate_min
= 20000000; /* 50 hz */
158 dtrace_optval_t dtrace_cleanrate_max
= (uint64_t)60 * NANOSEC
; /* 1/minute */
159 dtrace_optval_t dtrace_aggrate_default
= NANOSEC
; /* 1 hz */
160 dtrace_optval_t dtrace_statusrate_default
= NANOSEC
; /* 1 hz */
161 dtrace_optval_t dtrace_statusrate_max
= (hrtime_t
)10 * NANOSEC
; /* 6/minute */
162 dtrace_optval_t dtrace_switchrate_default
= NANOSEC
; /* 1 hz */
163 dtrace_optval_t dtrace_nspec_default
= 1;
164 dtrace_optval_t dtrace_specsize_default
= 32 * 1024;
165 dtrace_optval_t dtrace_stackframes_default
= 20;
166 dtrace_optval_t dtrace_ustackframes_default
= 20;
167 dtrace_optval_t dtrace_jstackframes_default
= 50;
168 dtrace_optval_t dtrace_jstackstrsize_default
= 512;
169 int dtrace_msgdsize_max
= 128;
170 hrtime_t dtrace_chill_max
= 500 * (NANOSEC
/ MILLISEC
); /* 500 ms */
171 hrtime_t dtrace_chill_interval
= NANOSEC
; /* 1000 ms */
172 int dtrace_devdepth_max
= 32;
173 int dtrace_err_verbose
;
174 int dtrace_provide_private_probes
= 0;
175 hrtime_t dtrace_deadman_interval
= NANOSEC
;
176 hrtime_t dtrace_deadman_timeout
= (hrtime_t
)10 * NANOSEC
;
177 hrtime_t dtrace_deadman_user
= (hrtime_t
)30 * NANOSEC
;
180 * DTrace External Variables
182 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
183 * available to DTrace consumers via the backtick (`) syntax. One of these,
184 * dtrace_zero, is made deliberately so: it is provided as a source of
185 * well-known, zero-filled memory. While this variable is not documented,
186 * it is used by some translators as an implementation detail.
188 const char dtrace_zero
[256] = { 0 }; /* zero-filled memory */
189 unsigned int dtrace_max_cpus
= 0; /* number of enabled cpus */
191 * DTrace Internal Variables
193 static dev_info_t
*dtrace_devi
; /* device info */
194 static vmem_t
*dtrace_arena
; /* probe ID arena */
195 static vmem_t
*dtrace_minor
; /* minor number arena */
196 static taskq_t
*dtrace_taskq
; /* task queue */
197 static dtrace_probe_t
**dtrace_probes
; /* array of all probes */
198 static int dtrace_nprobes
; /* number of probes */
199 static dtrace_provider_t
*dtrace_provider
; /* provider list */
200 static dtrace_meta_t
*dtrace_meta_pid
; /* user-land meta provider */
201 static int dtrace_opens
; /* number of opens */
202 static int dtrace_helpers
; /* number of helpers */
203 static void *dtrace_softstate
; /* softstate pointer */
204 static dtrace_hash_t
*dtrace_bymod
; /* probes hashed by module */
205 static dtrace_hash_t
*dtrace_byfunc
; /* probes hashed by function */
206 static dtrace_hash_t
*dtrace_byname
; /* probes hashed by name */
207 static dtrace_toxrange_t
*dtrace_toxrange
; /* toxic range array */
208 static int dtrace_toxranges
; /* number of toxic ranges */
209 static int dtrace_toxranges_max
; /* size of toxic range array */
210 static dtrace_anon_t dtrace_anon
; /* anonymous enabling */
211 static kmem_cache_t
*dtrace_state_cache
; /* cache for dynamic state */
212 static uint64_t dtrace_vtime_references
; /* number of vtimestamp refs */
213 static kthread_t
*dtrace_panicked
; /* panicking thread */
214 static dtrace_ecb_t
*dtrace_ecb_create_cache
; /* cached created ECB */
215 static dtrace_genid_t dtrace_probegen
; /* current probe generation */
216 static dtrace_helpers_t
*dtrace_deferred_pid
; /* deferred helper list */
217 static dtrace_enabling_t
*dtrace_retained
; /* list of retained enablings */
218 static dtrace_genid_t dtrace_retained_gen
; /* current retained enab gen */
219 static dtrace_dynvar_t dtrace_dynhash_sink
; /* end of dynamic hash chains */
221 static int dtrace_dof_mode
; /* See dtrace_impl.h for a description of Darwin's dof modes. */
224 * This does't quite fit as an internal variable, as it must be accessed in
225 * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either...
227 int dtrace_kernel_symbol_mode
; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */
231 * To save memory, some common memory allocations are given a
232 * unique zone. For example, dtrace_probe_t is 72 bytes in size,
233 * which means it would fall into the kalloc.128 bucket. With
234 * 20k elements allocated, the space saved is substantial.
237 struct zone
*dtrace_probe_t_zone
;
239 static int dtrace_module_unloaded(struct kmod_info
*kmod
);
243 * DTrace is protected by three (relatively coarse-grained) locks:
245 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
246 * including enabling state, probes, ECBs, consumer state, helper state,
247 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
248 * probe context is lock-free -- synchronization is handled via the
249 * dtrace_sync() cross call mechanism.
251 * (2) dtrace_provider_lock is required when manipulating provider state, or
252 * when provider state must be held constant.
254 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
255 * when meta provider state must be held constant.
257 * The lock ordering between these three locks is dtrace_meta_lock before
258 * dtrace_provider_lock before dtrace_lock. (In particular, there are
259 * several places where dtrace_provider_lock is held by the framework as it
260 * calls into the providers -- which then call back into the framework,
261 * grabbing dtrace_lock.)
263 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
264 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
265 * role as a coarse-grained lock; it is acquired before both of these locks.
266 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
267 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
268 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
269 * acquired _between_ dtrace_provider_lock and dtrace_lock.
276 * For porting purposes, all kmutex_t vars have been changed
277 * to lck_mtx_t, which require explicit initialization.
279 * kmutex_t becomes lck_mtx_t
280 * mutex_enter() becomes lck_mtx_lock()
281 * mutex_exit() becomes lck_mtx_unlock()
283 * Lock asserts are changed like this:
285 * ASSERT(MUTEX_HELD(&cpu_lock));
287 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
290 static lck_mtx_t dtrace_lock
; /* probe state lock */
291 static lck_mtx_t dtrace_provider_lock
; /* provider state lock */
292 static lck_mtx_t dtrace_meta_lock
; /* meta-provider state lock */
293 static lck_rw_t dtrace_dof_mode_lock
; /* dof mode lock */
296 * DTrace Provider Variables
298 * These are the variables relating to DTrace as a provider (that is, the
299 * provider of the BEGIN, END, and ERROR probes).
301 static dtrace_pattr_t dtrace_provider_attr
= {
302 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
303 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
304 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN
},
305 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
306 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON
},
314 dtrace_enable_nullop(void)
319 static dtrace_pops_t dtrace_provider_ops
= {
320 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
,
321 (void (*)(void *, struct modctl
*))dtrace_nullop
,
322 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
,
323 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
324 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
325 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
,
329 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
332 static dtrace_id_t dtrace_probeid_begin
; /* special BEGIN probe */
333 static dtrace_id_t dtrace_probeid_end
; /* special END probe */
334 dtrace_id_t dtrace_probeid_error
; /* special ERROR probe */
337 * DTrace Helper Tracing Variables
339 uint32_t dtrace_helptrace_next
= 0;
340 uint32_t dtrace_helptrace_nlocals
;
341 char *dtrace_helptrace_buffer
;
342 size_t dtrace_helptrace_bufsize
= 512 * 1024;
345 int dtrace_helptrace_enabled
= 1;
347 int dtrace_helptrace_enabled
= 0;
352 * DTrace Error Hashing
354 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
355 * table. This is very useful for checking coverage of tests that are
356 * expected to induce DIF or DOF processing errors, and may be useful for
357 * debugging problems in the DIF code generator or in DOF generation . The
358 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
361 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
];
362 static const char *dtrace_errlast
;
363 static kthread_t
*dtrace_errthread
;
364 static lck_mtx_t dtrace_errlock
;
368 * DTrace Macros and Constants
370 * These are various macros that are useful in various spots in the
371 * implementation, along with a few random constants that have no meaning
372 * outside of the implementation. There is no real structure to this cpp
373 * mishmash -- but is there ever?
375 #define DTRACE_HASHSTR(hash, probe) \
376 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
378 #define DTRACE_HASHNEXT(hash, probe) \
379 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
381 #define DTRACE_HASHPREV(hash, probe) \
382 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
384 #define DTRACE_HASHEQ(hash, lhs, rhs) \
385 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
386 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
388 #define DTRACE_AGGHASHSIZE_SLEW 17
390 #define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
393 * The key for a thread-local variable consists of the lower 61 bits of the
394 * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
395 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
396 * equal to a variable identifier. This is necessary (but not sufficient) to
397 * assure that global associative arrays never collide with thread-local
398 * variables. To guarantee that they cannot collide, we must also define the
399 * order for keying dynamic variables. That order is:
401 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
403 * Because the variable-key and the tls-key are in orthogonal spaces, there is
404 * no way for a global variable key signature to match a thread-local key
407 #if defined (__x86_64__)
408 /* FIXME: two function calls!! */
409 #define DTRACE_TLS_THRKEY(where) { \
410 uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
411 uint64_t thr = (uintptr_t)current_thread(); \
412 ASSERT(intr < (1 << 3)); \
413 (where) = ((thr + DIF_VARIABLE_MAX) & \
414 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
417 #error Unknown architecture
420 #define DT_BSWAP_8(x) ((x) & 0xff)
421 #define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
422 #define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
423 #define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
425 #define DT_MASK_LO 0x00000000FFFFFFFFULL
427 #define DTRACE_STORE(type, tomax, offset, what) \
428 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
431 #define DTRACE_ALIGNCHECK(addr, size, flags) \
432 if (addr & (MIN(size,4) - 1)) { \
433 *flags |= CPU_DTRACE_BADALIGN; \
434 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
439 * Test whether a range of memory starting at testaddr of size testsz falls
440 * within the range of memory described by addr, sz. We take care to avoid
441 * problems with overflow and underflow of the unsigned quantities, and
442 * disallow all negative sizes. Ranges of size 0 are allowed.
444 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
445 ((testaddr) - (baseaddr) < (basesz) && \
446 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
447 (testaddr) + (testsz) >= (testaddr))
450 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
451 * alloc_sz on the righthand side of the comparison in order to avoid overflow
452 * or underflow in the comparison with it. This is simpler than the INRANGE
453 * check above, because we know that the dtms_scratch_ptr is valid in the
454 * range. Allocations of size zero are allowed.
456 #define DTRACE_INSCRATCH(mstate, alloc_sz) \
457 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
458 (mstate)->dtms_scratch_ptr >= (alloc_sz))
460 #define RECOVER_LABEL(bits) dtraceLoadRecover##bits:
462 #if defined (__x86_64__)
463 #define DTRACE_LOADFUNC(bits) \
465 uint##bits##_t dtrace_load##bits(uintptr_t addr); \
468 dtrace_load##bits(uintptr_t addr) \
470 size_t size = bits / NBBY; \
472 uint##bits##_t rval = 0; \
474 volatile uint16_t *flags = (volatile uint16_t *) \
475 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
477 DTRACE_ALIGNCHECK(addr, size, flags); \
479 for (i = 0; i < dtrace_toxranges; i++) { \
480 if (addr >= dtrace_toxrange[i].dtt_limit) \
483 if (addr + size <= dtrace_toxrange[i].dtt_base) \
487 * This address falls within a toxic region; return 0. \
489 *flags |= CPU_DTRACE_BADADDR; \
490 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
495 volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \
496 *flags |= CPU_DTRACE_NOFAULT; \
497 recover = dtrace_set_thread_recover(current_thread(), recover); \
500 * PR6394061 - avoid device memory that is unpredictably \
501 * mapped and unmapped \
503 if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \
504 rval = *((volatile uint##bits##_t *)addr); \
505 RECOVER_LABEL(bits); \
506 (void)dtrace_set_thread_recover(current_thread(), recover); \
507 *flags &= ~CPU_DTRACE_NOFAULT; \
512 #else /* all other architectures */
513 #error Unknown Architecture
517 #define dtrace_loadptr dtrace_load64
519 #define dtrace_loadptr dtrace_load32
522 #define DTRACE_DYNHASH_FREE 0
523 #define DTRACE_DYNHASH_SINK 1
524 #define DTRACE_DYNHASH_VALID 2
526 #define DTRACE_MATCH_FAIL -1
527 #define DTRACE_MATCH_NEXT 0
528 #define DTRACE_MATCH_DONE 1
529 #define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
530 #define DTRACE_STATE_ALIGN 64
532 #define DTRACE_FLAGS2FLT(flags) \
533 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
534 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
535 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
536 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
537 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
538 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
539 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
540 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
541 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
544 #define DTRACEACT_ISSTRING(act) \
545 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
546 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
549 static size_t dtrace_strlen(const char *, size_t);
550 static dtrace_probe_t
*dtrace_probe_lookup_id(dtrace_id_t id
);
551 static void dtrace_enabling_provide(dtrace_provider_t
*);
552 static int dtrace_enabling_match(dtrace_enabling_t
*, int *);
553 static void dtrace_enabling_matchall(void);
554 static dtrace_state_t
*dtrace_anon_grab(void);
555 static uint64_t dtrace_helper(int, dtrace_mstate_t
*,
556 dtrace_state_t
*, uint64_t, uint64_t);
557 static dtrace_helpers_t
*dtrace_helpers_create(proc_t
*);
558 static void dtrace_buffer_drop(dtrace_buffer_t
*);
559 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t
*, size_t, size_t,
560 dtrace_state_t
*, dtrace_mstate_t
*);
561 static int dtrace_state_option(dtrace_state_t
*, dtrace_optid_t
,
563 static int dtrace_ecb_create_enable(dtrace_probe_t
*, void *);
564 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t
*);
568 * DTrace sysctl handlers
570 * These declarations and functions are used for a deeper DTrace configuration.
571 * Most of them are not per-consumer basis and may impact the other DTrace
572 * consumers. Correctness may not be supported for all the variables, so you
573 * should be careful about what values you are using.
576 SYSCTL_DECL(_kern_dtrace
);
577 SYSCTL_NODE(_kern
, OID_AUTO
, dtrace
, CTLFLAG_RW
| CTLFLAG_LOCKED
, 0, "dtrace");
580 sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS
582 #pragma unused(oidp, arg2)
584 int value
= *(int *) arg1
;
586 error
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
);
587 if (error
|| !changed
)
590 if (value
!= 0 && value
!= 1)
593 lck_mtx_lock(&dtrace_lock
);
594 dtrace_err_verbose
= value
;
595 lck_mtx_unlock(&dtrace_lock
);
601 * kern.dtrace.err_verbose
603 * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld).
604 * Errors are reported when a DIFO or a DOF has been rejected by the kernel.
606 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, err_verbose
,
607 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
608 &dtrace_err_verbose
, 0,
609 sysctl_dtrace_err_verbose
, "I", "dtrace error verbose");
612 sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS
614 #pragma unused(oidp, arg2, req)
616 uint64_t value
= *(uint64_t *) arg1
;
618 error
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
);
619 if (error
|| !changed
)
622 if (value
<= dtrace_buffer_memory_inuse
)
625 lck_mtx_lock(&dtrace_lock
);
626 dtrace_buffer_memory_maxsize
= value
;
627 lck_mtx_unlock(&dtrace_lock
);
633 * kern.dtrace.buffer_memory_maxsize
635 * Set DTrace maximal size in bytes used by all the consumers' state buffers. By default
636 * the limit is PHYS_MEM / 3 for *all* consumers. Attempting to set a null, a negative value
637 * or a value <= to dtrace_buffer_memory_inuse will result in a failure.
639 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, buffer_memory_maxsize
,
640 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
641 &dtrace_buffer_memory_maxsize
, 0,
642 sysctl_dtrace_buffer_memory_maxsize
, "Q", "dtrace state buffer memory maxsize");
645 * kern.dtrace.buffer_memory_inuse
647 * Current state buffer memory used, in bytes, by all the DTrace consumers.
648 * This value is read-only.
650 SYSCTL_QUAD(_kern_dtrace
, OID_AUTO
, buffer_memory_inuse
, CTLFLAG_RD
| CTLFLAG_LOCKED
,
651 &dtrace_buffer_memory_inuse
, "dtrace state buffer memory in-use");
654 sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS
656 #pragma unused(oidp, arg2, req)
658 size_t value
= *(size_t*) arg1
;
660 error
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
);
661 if (error
|| !changed
)
667 lck_mtx_lock(&dtrace_lock
);
668 dtrace_difo_maxsize
= value
;
669 lck_mtx_unlock(&dtrace_lock
);
675 * kern.dtrace.difo_maxsize
677 * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize
678 * to get the default value. Attempting to set a null or negative size will
679 * result in a failure.
681 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, difo_maxsize
,
682 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
683 &dtrace_difo_maxsize
, 0,
684 sysctl_dtrace_difo_maxsize
, "Q", "dtrace difo maxsize");
687 sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS
689 #pragma unused(oidp, arg2, req)
691 dtrace_optval_t value
= *(dtrace_optval_t
*) arg1
;
693 error
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
);
694 if (error
|| !changed
)
700 lck_mtx_lock(&dtrace_lock
);
701 dtrace_dof_maxsize
= value
;
702 lck_mtx_unlock(&dtrace_lock
);
708 * kern.dtrace.dof_maxsize
710 * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to
711 * get the default value. Attempting to set a null or negative size will result
714 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, dof_maxsize
,
715 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
716 &dtrace_dof_maxsize
, 0,
717 sysctl_dtrace_dof_maxsize
, "Q", "dtrace dof maxsize");
720 sysctl_dtrace_global_maxsize SYSCTL_HANDLER_ARGS
722 #pragma unused(oidp, arg2, req)
724 dtrace_optval_t value
= *(dtrace_optval_t
*) arg1
;
726 error
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
);
727 if (error
|| !changed
)
733 lck_mtx_lock(&dtrace_lock
);
734 dtrace_global_maxsize
= value
;
735 lck_mtx_unlock(&dtrace_lock
);
741 * kern.dtrace.global_maxsize
743 * Set the global variable max size in bytes, check the definition of
744 * dtrace_global_maxsize to get the default value. Attempting to set a null or
745 * negative size will result in a failure.
747 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, global_maxsize
,
748 CTLTYPE_QUAD
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
749 &dtrace_global_maxsize
, 0,
750 sysctl_dtrace_global_maxsize
, "Q", "dtrace global maxsize");
753 sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS
755 #pragma unused(oidp, arg2)
757 int value
= *(int *) arg1
;
759 error
= sysctl_io_number(req
, value
, sizeof(value
), &value
, NULL
);
763 if (value
!= 0 && value
!= 1)
766 lck_mtx_lock(&dtrace_lock
);
767 dtrace_provide_private_probes
= value
;
768 lck_mtx_unlock(&dtrace_lock
);
774 * kern.dtrace.provide_private_probes
776 * Set whether the providers must provide the private probes. This is
777 * mainly used by the FBT provider to request probes for the private/static
780 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, provide_private_probes
,
781 CTLTYPE_INT
| CTLFLAG_RW
| CTLFLAG_LOCKED
,
782 &dtrace_provide_private_probes
, 0,
783 sysctl_dtrace_provide_private_probes
, "I", "provider must provide the private probes");
786 * DTrace Probe Context Functions
788 * These functions are called from probe context. Because probe context is
789 * any context in which C may be called, arbitrarily locks may be held,
790 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
791 * As a result, functions called from probe context may only call other DTrace
792 * support functions -- they may not interact at all with the system at large.
793 * (Note that the ASSERT macro is made probe-context safe by redefining it in
794 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
795 * loads are to be performed from probe context, they _must_ be in terms of
796 * the safe dtrace_load*() variants.
798 * Some functions in this block are not actually called from probe context;
799 * for these functions, there will be a comment above the function reading
800 * "Note: not called from probe context."
804 dtrace_assfail(const char *a
, const char *f
, int l
)
806 panic("dtrace: assertion failed: %s, file: %s, line: %d", a
, f
, l
);
809 * We just need something here that even the most clever compiler
810 * cannot optimize away.
812 return (a
[(uintptr_t)f
]);
816 * Atomically increment a specified error counter from probe context.
819 dtrace_error(uint32_t *counter
)
822 * Most counters stored to in probe context are per-CPU counters.
823 * However, there are some error conditions that are sufficiently
824 * arcane that they don't merit per-CPU storage. If these counters
825 * are incremented concurrently on different CPUs, scalability will be
826 * adversely affected -- but we don't expect them to be white-hot in a
827 * correctly constructed enabling...
834 if ((nval
= oval
+ 1) == 0) {
836 * If the counter would wrap, set it to 1 -- assuring
837 * that the counter is never zero when we have seen
838 * errors. (The counter must be 32-bits because we
839 * aren't guaranteed a 64-bit compare&swap operation.)
840 * To save this code both the infamy of being fingered
841 * by a priggish news story and the indignity of being
842 * the target of a neo-puritan witch trial, we're
843 * carefully avoiding any colorful description of the
844 * likelihood of this condition -- but suffice it to
845 * say that it is only slightly more likely than the
846 * overflow of predicate cache IDs, as discussed in
847 * dtrace_predicate_create().
851 } while (dtrace_cas32(counter
, oval
, nval
) != oval
);
855 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
856 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
864 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t
*mstate
)
866 if (dest
< mstate
->dtms_scratch_base
)
869 if (dest
+ size
< dest
)
872 if (dest
+ size
> mstate
->dtms_scratch_ptr
)
879 dtrace_canstore_statvar(uint64_t addr
, size_t sz
,
880 dtrace_statvar_t
**svars
, int nsvars
)
884 for (i
= 0; i
< nsvars
; i
++) {
885 dtrace_statvar_t
*svar
= svars
[i
];
887 if (svar
== NULL
|| svar
->dtsv_size
== 0)
890 if (DTRACE_INRANGE(addr
, sz
, svar
->dtsv_data
, svar
->dtsv_size
))
898 * Check to see if the address is within a memory region to which a store may
899 * be issued. This includes the DTrace scratch areas, and any DTrace variable
900 * region. The caller of dtrace_canstore() is responsible for performing any
901 * alignment checks that are needed before stores are actually executed.
904 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
905 dtrace_vstate_t
*vstate
)
908 * First, check to see if the address is in scratch space...
910 if (DTRACE_INRANGE(addr
, sz
, mstate
->dtms_scratch_base
,
911 mstate
->dtms_scratch_size
))
915 * Now check to see if it's a dynamic variable. This check will pick
916 * up both thread-local variables and any global dynamically-allocated
919 if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
,
920 vstate
->dtvs_dynvars
.dtds_size
)) {
921 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
922 uintptr_t base
= (uintptr_t)dstate
->dtds_base
+
923 (dstate
->dtds_hashsize
* sizeof (dtrace_dynhash_t
));
927 * Before we assume that we can store here, we need to make
928 * sure that it isn't in our metadata -- storing to our
929 * dynamic variable metadata would corrupt our state. For
930 * the range to not include any dynamic variable metadata,
933 * (1) Start above the hash table that is at the base of
934 * the dynamic variable space
936 * (2) Have a starting chunk offset that is beyond the
937 * dtrace_dynvar_t that is at the base of every chunk
939 * (3) Not span a chunk boundary
945 chunkoffs
= (addr
- base
) % dstate
->dtds_chunksize
;
947 if (chunkoffs
< sizeof (dtrace_dynvar_t
))
950 if (chunkoffs
+ sz
> dstate
->dtds_chunksize
)
957 * Finally, check the static local and global variables. These checks
958 * take the longest, so we perform them last.
960 if (dtrace_canstore_statvar(addr
, sz
,
961 vstate
->dtvs_locals
, vstate
->dtvs_nlocals
))
964 if (dtrace_canstore_statvar(addr
, sz
,
965 vstate
->dtvs_globals
, vstate
->dtvs_nglobals
))
973 * Convenience routine to check to see if the address is within a memory
974 * region in which a load may be issued given the user's privilege level;
975 * if not, it sets the appropriate error flags and loads 'addr' into the
976 * illegal value slot.
978 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
979 * appropriate memory access protection.
982 dtrace_canload(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
983 dtrace_vstate_t
*vstate
)
985 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
988 * If we hold the privilege to read from kernel memory, then
989 * everything is readable.
991 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
995 * You can obviously read that which you can store.
997 if (dtrace_canstore(addr
, sz
, mstate
, vstate
))
1001 * We're allowed to read from our own string table.
1003 if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)mstate
->dtms_difo
->dtdo_strtab
,
1004 mstate
->dtms_difo
->dtdo_strlen
))
1007 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV
);
1013 * Convenience routine to check to see if a given string is within a memory
1014 * region in which a load may be issued given the user's privilege level;
1015 * this exists so that we don't need to issue unnecessary dtrace_strlen()
1016 * calls in the event that the user has all privileges.
1019 dtrace_strcanload(uint64_t addr
, size_t sz
, dtrace_mstate_t
*mstate
,
1020 dtrace_vstate_t
*vstate
)
1025 * If we hold the privilege to read from kernel memory, then
1026 * everything is readable.
1028 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
1031 strsz
= 1 + dtrace_strlen((char *)(uintptr_t)addr
, sz
);
1032 if (dtrace_canload(addr
, strsz
, mstate
, vstate
))
1039 * Convenience routine to check to see if a given variable is within a memory
1040 * region in which a load may be issued given the user's privilege level.
1043 dtrace_vcanload(void *src
, dtrace_diftype_t
*type
, dtrace_mstate_t
*mstate
,
1044 dtrace_vstate_t
*vstate
)
1047 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
1050 * If we hold the privilege to read from kernel memory, then
1051 * everything is readable.
1053 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
1056 if (type
->dtdt_kind
== DIF_TYPE_STRING
)
1057 sz
= dtrace_strlen(src
,
1058 vstate
->dtvs_state
->dts_options
[DTRACEOPT_STRSIZE
]) + 1;
1060 sz
= type
->dtdt_size
;
1062 return (dtrace_canload((uintptr_t)src
, sz
, mstate
, vstate
));
1066 * Compare two strings using safe loads.
1069 dtrace_strncmp(char *s1
, char *s2
, size_t limit
)
1072 volatile uint16_t *flags
;
1074 if (s1
== s2
|| limit
== 0)
1077 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
1083 c1
= dtrace_load8((uintptr_t)s1
++);
1089 c2
= dtrace_load8((uintptr_t)s2
++);
1094 } while (--limit
&& c1
!= '\0' && !(*flags
& CPU_DTRACE_FAULT
));
1100 * Compute strlen(s) for a string using safe memory accesses. The additional
1101 * len parameter is used to specify a maximum length to ensure completion.
1104 dtrace_strlen(const char *s
, size_t lim
)
1108 for (len
= 0; len
!= lim
; len
++) {
1109 if (dtrace_load8((uintptr_t)s
++) == '\0')
1117 * Check if an address falls within a toxic region.
1120 dtrace_istoxic(uintptr_t kaddr
, size_t size
)
1122 uintptr_t taddr
, tsize
;
1125 for (i
= 0; i
< dtrace_toxranges
; i
++) {
1126 taddr
= dtrace_toxrange
[i
].dtt_base
;
1127 tsize
= dtrace_toxrange
[i
].dtt_limit
- taddr
;
1129 if (kaddr
- taddr
< tsize
) {
1130 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1131 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= kaddr
;
1135 if (taddr
- kaddr
< size
) {
1136 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
1137 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= taddr
;
1146 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
1147 * memory specified by the DIF program. The dst is assumed to be safe memory
1148 * that we can store to directly because it is managed by DTrace. As with
1149 * standard bcopy, overlapping copies are handled properly.
1152 dtrace_bcopy(const void *src
, void *dst
, size_t len
)
1156 const uint8_t *s2
= src
;
1160 *s1
++ = dtrace_load8((uintptr_t)s2
++);
1161 } while (--len
!= 0);
1167 *--s1
= dtrace_load8((uintptr_t)--s2
);
1168 } while (--len
!= 0);
1174 * Copy src to dst using safe memory accesses, up to either the specified
1175 * length, or the point that a nul byte is encountered. The src is assumed to
1176 * be unsafe memory specified by the DIF program. The dst is assumed to be
1177 * safe memory that we can store to directly because it is managed by DTrace.
1178 * Unlike dtrace_bcopy(), overlapping regions are not handled.
1181 dtrace_strcpy(const void *src
, void *dst
, size_t len
)
1184 uint8_t *s1
= dst
, c
;
1185 const uint8_t *s2
= src
;
1188 *s1
++ = c
= dtrace_load8((uintptr_t)s2
++);
1189 } while (--len
!= 0 && c
!= '\0');
1194 * Copy src to dst, deriving the size and type from the specified (BYREF)
1195 * variable type. The src is assumed to be unsafe memory specified by the DIF
1196 * program. The dst is assumed to be DTrace variable memory that is of the
1197 * specified type; we assume that we can store to directly.
1200 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t
*type
)
1202 ASSERT(type
->dtdt_flags
& DIF_TF_BYREF
);
1204 if (type
->dtdt_kind
== DIF_TYPE_STRING
) {
1205 dtrace_strcpy(src
, dst
, type
->dtdt_size
);
1207 dtrace_bcopy(src
, dst
, type
->dtdt_size
);
1212 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1213 * unsafe memory specified by the DIF program. The s2 data is assumed to be
1214 * safe memory that we can access directly because it is managed by DTrace.
1217 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
)
1219 volatile uint16_t *flags
;
1221 flags
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
1226 if (s1
== NULL
|| s2
== NULL
)
1229 if (s1
!= s2
&& len
!= 0) {
1230 const uint8_t *ps1
= s1
;
1231 const uint8_t *ps2
= s2
;
1234 if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++)
1236 } while (--len
!= 0 && !(*flags
& CPU_DTRACE_FAULT
));
1242 * Zero the specified region using a simple byte-by-byte loop. Note that this
1243 * is for safe DTrace-managed memory only.
1246 dtrace_bzero(void *dst
, size_t len
)
1250 for (cp
= dst
; len
!= 0; len
--)
1255 dtrace_add_128(uint64_t *addend1
, uint64_t *addend2
, uint64_t *sum
)
1259 result
[0] = addend1
[0] + addend2
[0];
1260 result
[1] = addend1
[1] + addend2
[1] +
1261 (result
[0] < addend1
[0] || result
[0] < addend2
[0] ? 1 : 0);
1268 * Shift the 128-bit value in a by b. If b is positive, shift left.
1269 * If b is negative, shift right.
1272 dtrace_shift_128(uint64_t *a
, int b
)
1282 a
[0] = a
[1] >> (b
- 64);
1286 mask
= 1LL << (64 - b
);
1288 a
[0] |= ((a
[1] & mask
) << (64 - b
));
1293 a
[1] = a
[0] << (b
- 64);
1297 mask
= a
[0] >> (64 - b
);
1305 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1306 * use native multiplication on those, and then re-combine into the
1307 * resulting 128-bit value.
1309 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1316 dtrace_multiply_128(uint64_t factor1
, uint64_t factor2
, uint64_t *product
)
1318 uint64_t hi1
, hi2
, lo1
, lo2
;
1321 hi1
= factor1
>> 32;
1322 hi2
= factor2
>> 32;
1324 lo1
= factor1
& DT_MASK_LO
;
1325 lo2
= factor2
& DT_MASK_LO
;
1327 product
[0] = lo1
* lo2
;
1328 product
[1] = hi1
* hi2
;
1332 dtrace_shift_128(tmp
, 32);
1333 dtrace_add_128(product
, tmp
, product
);
1337 dtrace_shift_128(tmp
, 32);
1338 dtrace_add_128(product
, tmp
, product
);
1342 * This privilege check should be used by actions and subroutines to
1343 * verify that the user credentials of the process that enabled the
1344 * invoking ECB match the target credentials
1347 dtrace_priv_proc_common_user(dtrace_state_t
*state
)
1349 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
1352 * We should always have a non-NULL state cred here, since if cred
1353 * is null (anonymous tracing), we fast-path bypass this routine.
1355 ASSERT(s_cr
!= NULL
);
1357 if ((cr
= dtrace_CRED()) != NULL
&&
1358 posix_cred_get(s_cr
)->cr_uid
== posix_cred_get(cr
)->cr_uid
&&
1359 posix_cred_get(s_cr
)->cr_uid
== posix_cred_get(cr
)->cr_ruid
&&
1360 posix_cred_get(s_cr
)->cr_uid
== posix_cred_get(cr
)->cr_suid
&&
1361 posix_cred_get(s_cr
)->cr_gid
== posix_cred_get(cr
)->cr_gid
&&
1362 posix_cred_get(s_cr
)->cr_gid
== posix_cred_get(cr
)->cr_rgid
&&
1363 posix_cred_get(s_cr
)->cr_gid
== posix_cred_get(cr
)->cr_sgid
)
1370 * This privilege check should be used by actions and subroutines to
1371 * verify that the zone of the process that enabled the invoking ECB
1372 * matches the target credentials
1375 dtrace_priv_proc_common_zone(dtrace_state_t
*state
)
1377 cred_t
*cr
, *s_cr
= state
->dts_cred
.dcr_cred
;
1378 #pragma unused(cr, s_cr, state) /* __APPLE__ */
1381 * We should always have a non-NULL state cred here, since if cred
1382 * is null (anonymous tracing), we fast-path bypass this routine.
1384 ASSERT(s_cr
!= NULL
);
1386 return 1; /* APPLE NOTE: Darwin doesn't do zones. */
1390 * This privilege check should be used by actions and subroutines to
1391 * verify that the process has not setuid or changed credentials.
1394 dtrace_priv_proc_common_nocd(void)
1396 return 1; /* Darwin omits "No Core Dump" flag. */
1400 dtrace_priv_proc_destructive(dtrace_state_t
*state
)
1402 int action
= state
->dts_cred
.dcr_action
;
1404 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1407 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1410 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) &&
1411 dtrace_priv_proc_common_zone(state
) == 0)
1414 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) &&
1415 dtrace_priv_proc_common_user(state
) == 0)
1418 if (((action
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) &&
1419 dtrace_priv_proc_common_nocd() == 0)
1425 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1431 dtrace_priv_proc_control(dtrace_state_t
*state
)
1433 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1436 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1439 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC_CONTROL
)
1442 if (dtrace_priv_proc_common_zone(state
) &&
1443 dtrace_priv_proc_common_user(state
) &&
1444 dtrace_priv_proc_common_nocd())
1448 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1454 dtrace_priv_proc(dtrace_state_t
*state
)
1456 if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
))
1459 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1462 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1466 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1472 * The P_LNOATTACH check is an Apple specific check.
1473 * We need a version of dtrace_priv_proc() that omits
1474 * that check for PID and EXECNAME accesses
1477 dtrace_priv_proc_relaxed(dtrace_state_t
*state
)
1480 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_PROC
)
1483 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_UPRIV
;
1489 dtrace_priv_kernel(dtrace_state_t
*state
)
1491 if (dtrace_is_restricted())
1494 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL
)
1498 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1504 dtrace_priv_kernel_destructive(dtrace_state_t
*state
)
1506 if (dtrace_is_restricted())
1509 if (state
->dts_cred
.dcr_action
& DTRACE_CRA_KERNEL_DESTRUCTIVE
)
1513 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
|= CPU_DTRACE_KPRIV
;
1519 * Note: not called from probe context. This function is called
1520 * asynchronously (and at a regular interval) from outside of probe context to
1521 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1522 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1525 dtrace_dynvar_clean(dtrace_dstate_t
*dstate
)
1527 dtrace_dynvar_t
*dirty
;
1528 dtrace_dstate_percpu_t
*dcpu
;
1531 for (i
= 0; i
< (int)NCPU
; i
++) {
1532 dcpu
= &dstate
->dtds_percpu
[i
];
1534 ASSERT(dcpu
->dtdsc_rinsing
== NULL
);
1537 * If the dirty list is NULL, there is no dirty work to do.
1539 if (dcpu
->dtdsc_dirty
== NULL
)
1543 * If the clean list is non-NULL, then we're not going to do
1544 * any work for this CPU -- it means that there has not been
1545 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1546 * since the last time we cleaned house.
1548 if (dcpu
->dtdsc_clean
!= NULL
)
1554 * Atomically move the dirty list aside.
1557 dirty
= dcpu
->dtdsc_dirty
;
1560 * Before we zap the dirty list, set the rinsing list.
1561 * (This allows for a potential assertion in
1562 * dtrace_dynvar(): if a free dynamic variable appears
1563 * on a hash chain, either the dirty list or the
1564 * rinsing list for some CPU must be non-NULL.)
1566 dcpu
->dtdsc_rinsing
= dirty
;
1567 dtrace_membar_producer();
1568 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
,
1569 dirty
, NULL
) != dirty
);
1574 * We have no work to do; we can simply return.
1581 for (i
= 0; i
< (int)NCPU
; i
++) {
1582 dcpu
= &dstate
->dtds_percpu
[i
];
1584 if (dcpu
->dtdsc_rinsing
== NULL
)
1588 * We are now guaranteed that no hash chain contains a pointer
1589 * into this dirty list; we can make it clean.
1591 ASSERT(dcpu
->dtdsc_clean
== NULL
);
1592 dcpu
->dtdsc_clean
= dcpu
->dtdsc_rinsing
;
1593 dcpu
->dtdsc_rinsing
= NULL
;
1597 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1598 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1599 * This prevents a race whereby a CPU incorrectly decides that
1600 * the state should be something other than DTRACE_DSTATE_CLEAN
1601 * after dtrace_dynvar_clean() has completed.
1605 dstate
->dtds_state
= DTRACE_DSTATE_CLEAN
;
1609 * Depending on the value of the op parameter, this function looks-up,
1610 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1611 * allocation is requested, this function will return a pointer to a
1612 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1613 * variable can be allocated. If NULL is returned, the appropriate counter
1614 * will be incremented.
1616 static dtrace_dynvar_t
*
1617 dtrace_dynvar(dtrace_dstate_t
*dstate
, uint_t nkeys
,
1618 dtrace_key_t
*key
, size_t dsize
, dtrace_dynvar_op_t op
,
1619 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
)
1621 uint64_t hashval
= DTRACE_DYNHASH_VALID
;
1622 dtrace_dynhash_t
*hash
= dstate
->dtds_hash
;
1623 dtrace_dynvar_t
*free
, *new_free
, *next
, *dvar
, *start
, *prev
= NULL
;
1624 processorid_t me
= CPU
->cpu_id
, cpu
= me
;
1625 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[me
];
1626 size_t bucket
, ksize
;
1627 size_t chunksize
= dstate
->dtds_chunksize
;
1628 uintptr_t kdata
, lock
, nstate
;
1634 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1635 * algorithm. For the by-value portions, we perform the algorithm in
1636 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1637 * bit, and seems to have only a minute effect on distribution. For
1638 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1639 * over each referenced byte. It's painful to do this, but it's much
1640 * better than pathological hash distribution. The efficacy of the
1641 * hashing algorithm (and a comparison with other algorithms) may be
1642 * found by running the ::dtrace_dynstat MDB dcmd.
1644 for (i
= 0; i
< nkeys
; i
++) {
1645 if (key
[i
].dttk_size
== 0) {
1646 uint64_t val
= key
[i
].dttk_value
;
1648 hashval
+= (val
>> 48) & 0xffff;
1649 hashval
+= (hashval
<< 10);
1650 hashval
^= (hashval
>> 6);
1652 hashval
+= (val
>> 32) & 0xffff;
1653 hashval
+= (hashval
<< 10);
1654 hashval
^= (hashval
>> 6);
1656 hashval
+= (val
>> 16) & 0xffff;
1657 hashval
+= (hashval
<< 10);
1658 hashval
^= (hashval
>> 6);
1660 hashval
+= val
& 0xffff;
1661 hashval
+= (hashval
<< 10);
1662 hashval
^= (hashval
>> 6);
1665 * This is incredibly painful, but it beats the hell
1666 * out of the alternative.
1668 uint64_t j
, size
= key
[i
].dttk_size
;
1669 uintptr_t base
= (uintptr_t)key
[i
].dttk_value
;
1671 if (!dtrace_canload(base
, size
, mstate
, vstate
))
1674 for (j
= 0; j
< size
; j
++) {
1675 hashval
+= dtrace_load8(base
+ j
);
1676 hashval
+= (hashval
<< 10);
1677 hashval
^= (hashval
>> 6);
1682 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT
))
1685 hashval
+= (hashval
<< 3);
1686 hashval
^= (hashval
>> 11);
1687 hashval
+= (hashval
<< 15);
1690 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1691 * comes out to be one of our two sentinel hash values. If this
1692 * actually happens, we set the hashval to be a value known to be a
1693 * non-sentinel value.
1695 if (hashval
== DTRACE_DYNHASH_FREE
|| hashval
== DTRACE_DYNHASH_SINK
)
1696 hashval
= DTRACE_DYNHASH_VALID
;
1699 * Yes, it's painful to do a divide here. If the cycle count becomes
1700 * important here, tricks can be pulled to reduce it. (However, it's
1701 * critical that hash collisions be kept to an absolute minimum;
1702 * they're much more painful than a divide.) It's better to have a
1703 * solution that generates few collisions and still keeps things
1704 * relatively simple.
1706 bucket
= hashval
% dstate
->dtds_hashsize
;
1708 if (op
== DTRACE_DYNVAR_DEALLOC
) {
1709 volatile uintptr_t *lockp
= &hash
[bucket
].dtdh_lock
;
1712 while ((lock
= *lockp
) & 1)
1715 if (dtrace_casptr((void *)(uintptr_t)lockp
,
1716 (void *)lock
, (void *)(lock
+ 1)) == (void *)lock
)
1720 dtrace_membar_producer();
1725 lock
= hash
[bucket
].dtdh_lock
;
1727 dtrace_membar_consumer();
1729 start
= hash
[bucket
].dtdh_chain
;
1730 ASSERT(start
!= NULL
&& (start
->dtdv_hashval
== DTRACE_DYNHASH_SINK
||
1731 start
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
||
1732 op
!= DTRACE_DYNVAR_DEALLOC
));
1734 for (dvar
= start
; dvar
!= NULL
; dvar
= dvar
->dtdv_next
) {
1735 dtrace_tuple_t
*dtuple
= &dvar
->dtdv_tuple
;
1736 dtrace_key_t
*dkey
= &dtuple
->dtt_key
[0];
1738 if (dvar
->dtdv_hashval
!= hashval
) {
1739 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_SINK
) {
1741 * We've reached the sink, and therefore the
1742 * end of the hash chain; we can kick out of
1743 * the loop knowing that we have seen a valid
1744 * snapshot of state.
1746 ASSERT(dvar
->dtdv_next
== NULL
);
1747 ASSERT(dvar
== &dtrace_dynhash_sink
);
1751 if (dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
) {
1753 * We've gone off the rails: somewhere along
1754 * the line, one of the members of this hash
1755 * chain was deleted. Note that we could also
1756 * detect this by simply letting this loop run
1757 * to completion, as we would eventually hit
1758 * the end of the dirty list. However, we
1759 * want to avoid running the length of the
1760 * dirty list unnecessarily (it might be quite
1761 * long), so we catch this as early as
1762 * possible by detecting the hash marker. In
1763 * this case, we simply set dvar to NULL and
1764 * break; the conditional after the loop will
1765 * send us back to top.
1774 if (dtuple
->dtt_nkeys
!= nkeys
)
1777 for (i
= 0; i
< nkeys
; i
++, dkey
++) {
1778 if (dkey
->dttk_size
!= key
[i
].dttk_size
)
1779 goto next
; /* size or type mismatch */
1781 if (dkey
->dttk_size
!= 0) {
1783 (void *)(uintptr_t)key
[i
].dttk_value
,
1784 (void *)(uintptr_t)dkey
->dttk_value
,
1788 if (dkey
->dttk_value
!= key
[i
].dttk_value
)
1793 if (op
!= DTRACE_DYNVAR_DEALLOC
)
1796 ASSERT(dvar
->dtdv_next
== NULL
||
1797 dvar
->dtdv_next
->dtdv_hashval
!= DTRACE_DYNHASH_FREE
);
1800 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1801 ASSERT(start
!= dvar
);
1802 ASSERT(prev
->dtdv_next
== dvar
);
1803 prev
->dtdv_next
= dvar
->dtdv_next
;
1805 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
,
1806 start
, dvar
->dtdv_next
) != start
) {
1808 * We have failed to atomically swing the
1809 * hash table head pointer, presumably because
1810 * of a conflicting allocation on another CPU.
1811 * We need to reread the hash chain and try
1818 dtrace_membar_producer();
1821 * Now set the hash value to indicate that it's free.
1823 ASSERT(hash
[bucket
].dtdh_chain
!= dvar
);
1824 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
1826 dtrace_membar_producer();
1829 * Set the next pointer to point at the dirty list, and
1830 * atomically swing the dirty pointer to the newly freed dvar.
1833 next
= dcpu
->dtdsc_dirty
;
1834 dvar
->dtdv_next
= next
;
1835 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
);
1838 * Finally, unlock this hash bucket.
1840 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1842 hash
[bucket
].dtdh_lock
++;
1852 * If dvar is NULL, it is because we went off the rails:
1853 * one of the elements that we traversed in the hash chain
1854 * was deleted while we were traversing it. In this case,
1855 * we assert that we aren't doing a dealloc (deallocs lock
1856 * the hash bucket to prevent themselves from racing with
1857 * one another), and retry the hash chain traversal.
1859 ASSERT(op
!= DTRACE_DYNVAR_DEALLOC
);
1863 if (op
!= DTRACE_DYNVAR_ALLOC
) {
1865 * If we are not to allocate a new variable, we want to
1866 * return NULL now. Before we return, check that the value
1867 * of the lock word hasn't changed. If it has, we may have
1868 * seen an inconsistent snapshot.
1870 if (op
== DTRACE_DYNVAR_NOALLOC
) {
1871 if (hash
[bucket
].dtdh_lock
!= lock
)
1874 ASSERT(op
== DTRACE_DYNVAR_DEALLOC
);
1875 ASSERT(hash
[bucket
].dtdh_lock
== lock
);
1877 hash
[bucket
].dtdh_lock
++;
1884 * We need to allocate a new dynamic variable. The size we need is the
1885 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
1886 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
1887 * the size of any referred-to data (dsize). We then round the final
1888 * size up to the chunksize for allocation.
1890 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
1891 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
1894 * This should be pretty much impossible, but could happen if, say,
1895 * strange DIF specified the tuple. Ideally, this should be an
1896 * assertion and not an error condition -- but that requires that the
1897 * chunksize calculation in dtrace_difo_chunksize() be absolutely
1898 * bullet-proof. (That is, it must not be able to be fooled by
1899 * malicious DIF.) Given the lack of backwards branches in DIF,
1900 * solving this would presumably not amount to solving the Halting
1901 * Problem -- but it still seems awfully hard.
1903 if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys
- 1) +
1904 ksize
+ dsize
> chunksize
) {
1905 dcpu
->dtdsc_drops
++;
1909 nstate
= DTRACE_DSTATE_EMPTY
;
1913 free
= dcpu
->dtdsc_free
;
1916 dtrace_dynvar_t
*clean
= dcpu
->dtdsc_clean
;
1919 if (clean
== NULL
) {
1921 * We're out of dynamic variable space on
1922 * this CPU. Unless we have tried all CPUs,
1923 * we'll try to allocate from a different
1926 switch (dstate
->dtds_state
) {
1927 case DTRACE_DSTATE_CLEAN
: {
1928 void *sp
= &dstate
->dtds_state
;
1930 if (++cpu
>= (int)NCPU
)
1933 if (dcpu
->dtdsc_dirty
!= NULL
&&
1934 nstate
== DTRACE_DSTATE_EMPTY
)
1935 nstate
= DTRACE_DSTATE_DIRTY
;
1937 if (dcpu
->dtdsc_rinsing
!= NULL
)
1938 nstate
= DTRACE_DSTATE_RINSING
;
1940 dcpu
= &dstate
->dtds_percpu
[cpu
];
1945 (void) dtrace_cas32(sp
,
1946 DTRACE_DSTATE_CLEAN
, nstate
);
1949 * To increment the correct bean
1950 * counter, take another lap.
1955 case DTRACE_DSTATE_DIRTY
:
1956 dcpu
->dtdsc_dirty_drops
++;
1959 case DTRACE_DSTATE_RINSING
:
1960 dcpu
->dtdsc_rinsing_drops
++;
1963 case DTRACE_DSTATE_EMPTY
:
1964 dcpu
->dtdsc_drops
++;
1968 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
);
1973 * The clean list appears to be non-empty. We want to
1974 * move the clean list to the free list; we start by
1975 * moving the clean pointer aside.
1977 if (dtrace_casptr(&dcpu
->dtdsc_clean
,
1978 clean
, NULL
) != clean
) {
1980 * We are in one of two situations:
1982 * (a) The clean list was switched to the
1983 * free list by another CPU.
1985 * (b) The clean list was added to by the
1988 * In either of these situations, we can
1989 * just reattempt the free list allocation.
1994 ASSERT(clean
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
1997 * Now we'll move the clean list to the free list.
1998 * It's impossible for this to fail: the only way
1999 * the free list can be updated is through this
2000 * code path, and only one CPU can own the clean list.
2001 * Thus, it would only be possible for this to fail if
2002 * this code were racing with dtrace_dynvar_clean().
2003 * (That is, if dtrace_dynvar_clean() updated the clean
2004 * list, and we ended up racing to update the free
2005 * list.) This race is prevented by the dtrace_sync()
2006 * in dtrace_dynvar_clean() -- which flushes the
2007 * owners of the clean lists out before resetting
2010 rval
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
);
2011 ASSERT(rval
== NULL
);
2016 new_free
= dvar
->dtdv_next
;
2017 } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
);
2020 * We have now allocated a new chunk. We copy the tuple keys into the
2021 * tuple array and copy any referenced key data into the data space
2022 * following the tuple array. As we do this, we relocate dttk_value
2023 * in the final tuple to point to the key data address in the chunk.
2025 kdata
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
];
2026 dvar
->dtdv_data
= (void *)(kdata
+ ksize
);
2027 dvar
->dtdv_tuple
.dtt_nkeys
= nkeys
;
2029 for (i
= 0; i
< nkeys
; i
++) {
2030 dtrace_key_t
*dkey
= &dvar
->dtdv_tuple
.dtt_key
[i
];
2031 size_t kesize
= key
[i
].dttk_size
;
2035 (const void *)(uintptr_t)key
[i
].dttk_value
,
2036 (void *)kdata
, kesize
);
2037 dkey
->dttk_value
= kdata
;
2038 kdata
+= P2ROUNDUP(kesize
, sizeof (uint64_t));
2040 dkey
->dttk_value
= key
[i
].dttk_value
;
2043 dkey
->dttk_size
= kesize
;
2046 ASSERT(dvar
->dtdv_hashval
== DTRACE_DYNHASH_FREE
);
2047 dvar
->dtdv_hashval
= hashval
;
2048 dvar
->dtdv_next
= start
;
2050 if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
)
2054 * The cas has failed. Either another CPU is adding an element to
2055 * this hash chain, or another CPU is deleting an element from this
2056 * hash chain. The simplest way to deal with both of these cases
2057 * (though not necessarily the most efficient) is to free our
2058 * allocated block and tail-call ourselves. Note that the free is
2059 * to the dirty list and _not_ to the free list. This is to prevent
2060 * races with allocators, above.
2062 dvar
->dtdv_hashval
= DTRACE_DYNHASH_FREE
;
2064 dtrace_membar_producer();
2067 free
= dcpu
->dtdsc_dirty
;
2068 dvar
->dtdv_next
= free
;
2069 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
);
2071 return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
, mstate
, vstate
));
2076 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2078 #pragma unused(arg) /* __APPLE__ */
2079 if ((int64_t)nval
< (int64_t)*oval
)
2085 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2087 #pragma unused(arg) /* __APPLE__ */
2088 if ((int64_t)nval
> (int64_t)*oval
)
2093 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
)
2095 int i
, zero
= DTRACE_QUANTIZE_ZEROBUCKET
;
2096 int64_t val
= (int64_t)nval
;
2099 for (i
= 0; i
< zero
; i
++) {
2100 if (val
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) {
2106 for (i
= zero
+ 1; i
< DTRACE_QUANTIZE_NBUCKETS
; i
++) {
2107 if (val
< DTRACE_QUANTIZE_BUCKETVAL(i
)) {
2108 quanta
[i
- 1] += incr
;
2113 quanta
[DTRACE_QUANTIZE_NBUCKETS
- 1] += incr
;
2121 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
)
2123 uint64_t arg
= *lquanta
++;
2124 int32_t base
= DTRACE_LQUANTIZE_BASE(arg
);
2125 uint16_t step
= DTRACE_LQUANTIZE_STEP(arg
);
2126 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(arg
);
2127 int32_t val
= (int32_t)nval
, level
;
2130 ASSERT(levels
!= 0);
2134 * This is an underflow.
2140 level
= (val
- base
) / step
;
2142 if (level
< levels
) {
2143 lquanta
[level
+ 1] += incr
;
2148 * This is an overflow.
2150 lquanta
[levels
+ 1] += incr
;
2154 dtrace_aggregate_llquantize_bucket(int16_t factor
, int16_t low
, int16_t high
,
2155 int16_t nsteps
, int64_t value
)
2157 int64_t this = 1, last
, next
;
2158 int base
= 1, order
;
2160 for (order
= 0; order
< low
; ++order
)
2164 * If our value is less than our factor taken to the power of the
2165 * low order of magnitude, it goes into the zeroth bucket.
2172 for (this *= factor
; order
<= high
; ++order
) {
2173 int nbuckets
= this > nsteps
? nsteps
: this;
2176 * We should not generally get log/linear quantizations
2177 * with a high magnitude that allows 64-bits to
2178 * overflow, but we nonetheless protect against this
2179 * by explicitly checking for overflow, and clamping
2180 * our value accordingly.
2182 next
= this * factor
;
2188 * If our value lies within this order of magnitude,
2189 * determine its position by taking the offset within
2190 * the order of magnitude, dividing by the bucket
2191 * width, and adding to our (accumulated) base.
2194 return (base
+ (value
- last
) / (this / nbuckets
));
2197 base
+= nbuckets
- (nbuckets
/ factor
);
2203 * Our value is greater than or equal to our factor taken to the
2204 * power of one plus the high magnitude -- return the top bucket.
2210 dtrace_aggregate_llquantize(uint64_t *llquanta
, uint64_t nval
, uint64_t incr
)
2212 uint64_t arg
= *llquanta
++;
2213 uint16_t factor
= DTRACE_LLQUANTIZE_FACTOR(arg
);
2214 uint16_t low
= DTRACE_LLQUANTIZE_LOW(arg
);
2215 uint16_t high
= DTRACE_LLQUANTIZE_HIGH(arg
);
2216 uint16_t nsteps
= DTRACE_LLQUANTIZE_NSTEP(arg
);
2218 llquanta
[dtrace_aggregate_llquantize_bucket(factor
, low
, high
, nsteps
, nval
)] += incr
;
2223 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
)
2225 #pragma unused(arg) /* __APPLE__ */
2232 dtrace_aggregate_stddev(uint64_t *data
, uint64_t nval
, uint64_t arg
)
2234 #pragma unused(arg) /* __APPLE__ */
2235 int64_t snval
= (int64_t)nval
;
2242 * What we want to say here is:
2244 * data[2] += nval * nval;
2246 * But given that nval is 64-bit, we could easily overflow, so
2247 * we do this as 128-bit arithmetic.
2252 dtrace_multiply_128((uint64_t)snval
, (uint64_t)snval
, tmp
);
2253 dtrace_add_128(data
+ 2, tmp
, data
+ 2);
2258 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2260 #pragma unused(nval, arg) /* __APPLE__ */
2266 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
)
2268 #pragma unused(arg) /* __APPLE__ */
2273 * Aggregate given the tuple in the principal data buffer, and the aggregating
2274 * action denoted by the specified dtrace_aggregation_t. The aggregation
2275 * buffer is specified as the buf parameter. This routine does not return
2276 * failure; if there is no space in the aggregation buffer, the data will be
2277 * dropped, and a corresponding counter incremented.
2280 dtrace_aggregate(dtrace_aggregation_t
*agg
, dtrace_buffer_t
*dbuf
,
2281 intptr_t offset
, dtrace_buffer_t
*buf
, uint64_t expr
, uint64_t arg
)
2284 dtrace_recdesc_t
*rec
= &agg
->dtag_action
.dta_rec
;
2285 uint32_t i
, ndx
, size
, fsize
;
2286 uint32_t align
= sizeof (uint64_t) - 1;
2287 dtrace_aggbuffer_t
*agb
;
2288 dtrace_aggkey_t
*key
;
2289 uint32_t hashval
= 0, limit
, isstr
;
2290 caddr_t tomax
, data
, kdata
;
2291 dtrace_actkind_t action
;
2292 dtrace_action_t
*act
;
2298 if (!agg
->dtag_hasarg
) {
2300 * Currently, only quantize() and lquantize() take additional
2301 * arguments, and they have the same semantics: an increment
2302 * value that defaults to 1 when not present. If additional
2303 * aggregating actions take arguments, the setting of the
2304 * default argument value will presumably have to become more
2310 action
= agg
->dtag_action
.dta_kind
- DTRACEACT_AGGREGATION
;
2311 size
= rec
->dtrd_offset
- agg
->dtag_base
;
2312 fsize
= size
+ rec
->dtrd_size
;
2314 ASSERT(dbuf
->dtb_tomax
!= NULL
);
2315 data
= dbuf
->dtb_tomax
+ offset
+ agg
->dtag_base
;
2317 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
2318 dtrace_buffer_drop(buf
);
2323 * The metastructure is always at the bottom of the buffer.
2325 agb
= (dtrace_aggbuffer_t
*)(tomax
+ buf
->dtb_size
-
2326 sizeof (dtrace_aggbuffer_t
));
2328 if (buf
->dtb_offset
== 0) {
2330 * We just kludge up approximately 1/8th of the size to be
2331 * buckets. If this guess ends up being routinely
2332 * off-the-mark, we may need to dynamically readjust this
2333 * based on past performance.
2335 uintptr_t hashsize
= (buf
->dtb_size
>> 3) / sizeof (uintptr_t);
2337 if ((uintptr_t)agb
- hashsize
* sizeof (dtrace_aggkey_t
*) <
2338 (uintptr_t)tomax
|| hashsize
== 0) {
2340 * We've been given a ludicrously small buffer;
2341 * increment our drop count and leave.
2343 dtrace_buffer_drop(buf
);
2348 * And now, a pathetic attempt to try to get a an odd (or
2349 * perchance, a prime) hash size for better hash distribution.
2351 if (hashsize
> (DTRACE_AGGHASHSIZE_SLEW
<< 3))
2352 hashsize
-= DTRACE_AGGHASHSIZE_SLEW
;
2354 agb
->dtagb_hashsize
= hashsize
;
2355 agb
->dtagb_hash
= (dtrace_aggkey_t
**)((uintptr_t)agb
-
2356 agb
->dtagb_hashsize
* sizeof (dtrace_aggkey_t
*));
2357 agb
->dtagb_free
= (uintptr_t)agb
->dtagb_hash
;
2359 for (i
= 0; i
< agb
->dtagb_hashsize
; i
++)
2360 agb
->dtagb_hash
[i
] = NULL
;
2363 ASSERT(agg
->dtag_first
!= NULL
);
2364 ASSERT(agg
->dtag_first
->dta_intuple
);
2367 * Calculate the hash value based on the key. Note that we _don't_
2368 * include the aggid in the hashing (but we will store it as part of
2369 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2370 * algorithm: a simple, quick algorithm that has no known funnels, and
2371 * gets good distribution in practice. The efficacy of the hashing
2372 * algorithm (and a comparison with other algorithms) may be found by
2373 * running the ::dtrace_aggstat MDB dcmd.
2375 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
2376 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2377 limit
= i
+ act
->dta_rec
.dtrd_size
;
2378 ASSERT(limit
<= size
);
2379 isstr
= DTRACEACT_ISSTRING(act
);
2381 for (; i
< limit
; i
++) {
2383 hashval
+= (hashval
<< 10);
2384 hashval
^= (hashval
>> 6);
2386 if (isstr
&& data
[i
] == '\0')
2391 hashval
+= (hashval
<< 3);
2392 hashval
^= (hashval
>> 11);
2393 hashval
+= (hashval
<< 15);
2396 * Yes, the divide here is expensive -- but it's generally the least
2397 * of the performance issues given the amount of data that we iterate
2398 * over to compute hash values, compare data, etc.
2400 ndx
= hashval
% agb
->dtagb_hashsize
;
2402 for (key
= agb
->dtagb_hash
[ndx
]; key
!= NULL
; key
= key
->dtak_next
) {
2403 ASSERT((caddr_t
)key
>= tomax
);
2404 ASSERT((caddr_t
)key
< tomax
+ buf
->dtb_size
);
2406 if (hashval
!= key
->dtak_hashval
|| key
->dtak_size
!= size
)
2409 kdata
= key
->dtak_data
;
2410 ASSERT(kdata
>= tomax
&& kdata
< tomax
+ buf
->dtb_size
);
2412 for (act
= agg
->dtag_first
; act
->dta_intuple
;
2413 act
= act
->dta_next
) {
2414 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2415 limit
= i
+ act
->dta_rec
.dtrd_size
;
2416 ASSERT(limit
<= size
);
2417 isstr
= DTRACEACT_ISSTRING(act
);
2419 for (; i
< limit
; i
++) {
2420 if (kdata
[i
] != data
[i
])
2423 if (isstr
&& data
[i
] == '\0')
2428 if (action
!= key
->dtak_action
) {
2430 * We are aggregating on the same value in the same
2431 * aggregation with two different aggregating actions.
2432 * (This should have been picked up in the compiler,
2433 * so we may be dealing with errant or devious DIF.)
2434 * This is an error condition; we indicate as much,
2437 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
2442 * This is a hit: we need to apply the aggregator to
2443 * the value at this key.
2445 agg
->dtag_aggregate((uint64_t *)(kdata
+ size
), expr
, arg
);
2452 * We didn't find it. We need to allocate some zero-filled space,
2453 * link it into the hash table appropriately, and apply the aggregator
2454 * to the (zero-filled) value.
2456 offs
= buf
->dtb_offset
;
2457 while (offs
& (align
- 1))
2458 offs
+= sizeof (uint32_t);
2461 * If we don't have enough room to both allocate a new key _and_
2462 * its associated data, increment the drop count and return.
2464 if ((uintptr_t)tomax
+ offs
+ fsize
>
2465 agb
->dtagb_free
- sizeof (dtrace_aggkey_t
)) {
2466 dtrace_buffer_drop(buf
);
2471 ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1)));
2472 key
= (dtrace_aggkey_t
*)(agb
->dtagb_free
- sizeof (dtrace_aggkey_t
));
2473 agb
->dtagb_free
-= sizeof (dtrace_aggkey_t
);
2475 key
->dtak_data
= kdata
= tomax
+ offs
;
2476 buf
->dtb_offset
= offs
+ fsize
;
2479 * Now copy the data across.
2481 *((dtrace_aggid_t
*)kdata
) = agg
->dtag_id
;
2483 for (i
= sizeof (dtrace_aggid_t
); i
< size
; i
++)
2487 * Because strings are not zeroed out by default, we need to iterate
2488 * looking for actions that store strings, and we need to explicitly
2489 * pad these strings out with zeroes.
2491 for (act
= agg
->dtag_first
; act
->dta_intuple
; act
= act
->dta_next
) {
2494 if (!DTRACEACT_ISSTRING(act
))
2497 i
= act
->dta_rec
.dtrd_offset
- agg
->dtag_base
;
2498 limit
= i
+ act
->dta_rec
.dtrd_size
;
2499 ASSERT(limit
<= size
);
2501 for (nul
= 0; i
< limit
; i
++) {
2507 if (data
[i
] != '\0')
2514 for (i
= size
; i
< fsize
; i
++)
2517 key
->dtak_hashval
= hashval
;
2518 key
->dtak_size
= size
;
2519 key
->dtak_action
= action
;
2520 key
->dtak_next
= agb
->dtagb_hash
[ndx
];
2521 agb
->dtagb_hash
[ndx
] = key
;
2524 * Finally, apply the aggregator.
2526 *((uint64_t *)(key
->dtak_data
+ size
)) = agg
->dtag_initial
;
2527 agg
->dtag_aggregate((uint64_t *)(key
->dtak_data
+ size
), expr
, arg
);
2531 * Given consumer state, this routine finds a speculation in the INACTIVE
2532 * state and transitions it into the ACTIVE state. If there is no speculation
2533 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2534 * incremented -- it is up to the caller to take appropriate action.
2537 dtrace_speculation(dtrace_state_t
*state
)
2540 dtrace_speculation_state_t current
;
2541 uint32_t *stat
= &state
->dts_speculations_unavail
, count
;
2543 while (i
< state
->dts_nspeculations
) {
2544 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2546 current
= spec
->dtsp_state
;
2548 if (current
!= DTRACESPEC_INACTIVE
) {
2549 if (current
== DTRACESPEC_COMMITTINGMANY
||
2550 current
== DTRACESPEC_COMMITTING
||
2551 current
== DTRACESPEC_DISCARDING
)
2552 stat
= &state
->dts_speculations_busy
;
2557 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2558 current
, DTRACESPEC_ACTIVE
) == current
)
2563 * We couldn't find a speculation. If we found as much as a single
2564 * busy speculation buffer, we'll attribute this failure as "busy"
2565 * instead of "unavail".
2569 } while (dtrace_cas32(stat
, count
, count
+ 1) != count
);
2575 * This routine commits an active speculation. If the specified speculation
2576 * is not in a valid state to perform a commit(), this routine will silently do
2577 * nothing. The state of the specified speculation is transitioned according
2578 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2581 dtrace_speculation_commit(dtrace_state_t
*state
, processorid_t cpu
,
2582 dtrace_specid_t which
)
2584 dtrace_speculation_t
*spec
;
2585 dtrace_buffer_t
*src
, *dest
;
2586 uintptr_t daddr
, saddr
, dlimit
;
2587 dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
;
2593 if (which
> (dtrace_specid_t
)state
->dts_nspeculations
) {
2594 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2598 spec
= &state
->dts_speculations
[which
- 1];
2599 src
= &spec
->dtsp_buffer
[cpu
];
2600 dest
= &state
->dts_buffer
[cpu
];
2603 current
= spec
->dtsp_state
;
2605 if (current
== DTRACESPEC_COMMITTINGMANY
)
2609 case DTRACESPEC_INACTIVE
:
2610 case DTRACESPEC_DISCARDING
:
2613 case DTRACESPEC_COMMITTING
:
2615 * This is only possible if we are (a) commit()'ing
2616 * without having done a prior speculate() on this CPU
2617 * and (b) racing with another commit() on a different
2618 * CPU. There's nothing to do -- we just assert that
2621 ASSERT(src
->dtb_offset
== 0);
2624 case DTRACESPEC_ACTIVE
:
2625 new = DTRACESPEC_COMMITTING
;
2628 case DTRACESPEC_ACTIVEONE
:
2630 * This speculation is active on one CPU. If our
2631 * buffer offset is non-zero, we know that the one CPU
2632 * must be us. Otherwise, we are committing on a
2633 * different CPU from the speculate(), and we must
2634 * rely on being asynchronously cleaned.
2636 if (src
->dtb_offset
!= 0) {
2637 new = DTRACESPEC_COMMITTING
;
2642 case DTRACESPEC_ACTIVEMANY
:
2643 new = DTRACESPEC_COMMITTINGMANY
;
2649 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2650 current
, new) != current
);
2653 * We have set the state to indicate that we are committing this
2654 * speculation. Now reserve the necessary space in the destination
2657 if ((offs
= dtrace_buffer_reserve(dest
, src
->dtb_offset
,
2658 sizeof (uint64_t), state
, NULL
)) < 0) {
2659 dtrace_buffer_drop(dest
);
2664 * We have the space; copy the buffer across. (Note that this is a
2665 * highly subobtimal bcopy(); in the unlikely event that this becomes
2666 * a serious performance issue, a high-performance DTrace-specific
2667 * bcopy() should obviously be invented.)
2669 daddr
= (uintptr_t)dest
->dtb_tomax
+ offs
;
2670 dlimit
= daddr
+ src
->dtb_offset
;
2671 saddr
= (uintptr_t)src
->dtb_tomax
;
2674 * First, the aligned portion.
2676 while (dlimit
- daddr
>= sizeof (uint64_t)) {
2677 *((uint64_t *)daddr
) = *((uint64_t *)saddr
);
2679 daddr
+= sizeof (uint64_t);
2680 saddr
+= sizeof (uint64_t);
2684 * Now any left-over bit...
2686 while (dlimit
- daddr
)
2687 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++);
2690 * Finally, commit the reserved space in the destination buffer.
2692 dest
->dtb_offset
= offs
+ src
->dtb_offset
;
2696 * If we're lucky enough to be the only active CPU on this speculation
2697 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2699 if (current
== DTRACESPEC_ACTIVE
||
2700 (current
== DTRACESPEC_ACTIVEONE
&& new == DTRACESPEC_COMMITTING
)) {
2701 uint32_t rval
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2702 DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
);
2703 #pragma unused(rval) /* __APPLE__ */
2705 ASSERT(rval
== DTRACESPEC_COMMITTING
);
2708 src
->dtb_offset
= 0;
2709 src
->dtb_xamot_drops
+= src
->dtb_drops
;
2714 * This routine discards an active speculation. If the specified speculation
2715 * is not in a valid state to perform a discard(), this routine will silently
2716 * do nothing. The state of the specified speculation is transitioned
2717 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2720 dtrace_speculation_discard(dtrace_state_t
*state
, processorid_t cpu
,
2721 dtrace_specid_t which
)
2723 dtrace_speculation_t
*spec
;
2724 dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
;
2725 dtrace_buffer_t
*buf
;
2730 if (which
> (dtrace_specid_t
)state
->dts_nspeculations
) {
2731 cpu_core
[cpu
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2735 spec
= &state
->dts_speculations
[which
- 1];
2736 buf
= &spec
->dtsp_buffer
[cpu
];
2739 current
= spec
->dtsp_state
;
2742 case DTRACESPEC_INACTIVE
:
2743 case DTRACESPEC_COMMITTINGMANY
:
2744 case DTRACESPEC_COMMITTING
:
2745 case DTRACESPEC_DISCARDING
:
2748 case DTRACESPEC_ACTIVE
:
2749 case DTRACESPEC_ACTIVEMANY
:
2750 new = DTRACESPEC_DISCARDING
;
2753 case DTRACESPEC_ACTIVEONE
:
2754 if (buf
->dtb_offset
!= 0) {
2755 new = DTRACESPEC_INACTIVE
;
2757 new = DTRACESPEC_DISCARDING
;
2764 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2765 current
, new) != current
);
2767 buf
->dtb_offset
= 0;
2772 * Note: not called from probe context. This function is called
2773 * asynchronously from cross call context to clean any speculations that are
2774 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2775 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2779 dtrace_speculation_clean_here(dtrace_state_t
*state
)
2781 dtrace_icookie_t cookie
;
2782 processorid_t cpu
= CPU
->cpu_id
;
2783 dtrace_buffer_t
*dest
= &state
->dts_buffer
[cpu
];
2786 cookie
= dtrace_interrupt_disable();
2788 if (dest
->dtb_tomax
== NULL
) {
2789 dtrace_interrupt_enable(cookie
);
2793 for (i
= 0; i
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) {
2794 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2795 dtrace_buffer_t
*src
= &spec
->dtsp_buffer
[cpu
];
2797 if (src
->dtb_tomax
== NULL
)
2800 if (spec
->dtsp_state
== DTRACESPEC_DISCARDING
) {
2801 src
->dtb_offset
= 0;
2805 if (spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2808 if (src
->dtb_offset
== 0)
2811 dtrace_speculation_commit(state
, cpu
, i
+ 1);
2814 dtrace_interrupt_enable(cookie
);
2818 * Note: not called from probe context. This function is called
2819 * asynchronously (and at a regular interval) to clean any speculations that
2820 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2821 * is work to be done, it cross calls all CPUs to perform that work;
2822 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2823 * INACTIVE state until they have been cleaned by all CPUs.
2826 dtrace_speculation_clean(dtrace_state_t
*state
)
2832 for (i
= 0; i
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) {
2833 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2835 ASSERT(!spec
->dtsp_cleaning
);
2837 if (spec
->dtsp_state
!= DTRACESPEC_DISCARDING
&&
2838 spec
->dtsp_state
!= DTRACESPEC_COMMITTINGMANY
)
2842 spec
->dtsp_cleaning
= 1;
2848 dtrace_xcall(DTRACE_CPUALL
,
2849 (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
);
2852 * We now know that all CPUs have committed or discarded their
2853 * speculation buffers, as appropriate. We can now set the state
2856 for (i
= 0; i
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) {
2857 dtrace_speculation_t
*spec
= &state
->dts_speculations
[i
];
2858 dtrace_speculation_state_t current
, new;
2860 if (!spec
->dtsp_cleaning
)
2863 current
= spec
->dtsp_state
;
2864 ASSERT(current
== DTRACESPEC_DISCARDING
||
2865 current
== DTRACESPEC_COMMITTINGMANY
);
2867 new = DTRACESPEC_INACTIVE
;
2869 rv
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new);
2870 ASSERT(rv
== current
);
2871 spec
->dtsp_cleaning
= 0;
2876 * Called as part of a speculate() to get the speculative buffer associated
2877 * with a given speculation. Returns NULL if the specified speculation is not
2878 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
2879 * the active CPU is not the specified CPU -- the speculation will be
2880 * atomically transitioned into the ACTIVEMANY state.
2882 static dtrace_buffer_t
*
2883 dtrace_speculation_buffer(dtrace_state_t
*state
, processorid_t cpuid
,
2884 dtrace_specid_t which
)
2886 dtrace_speculation_t
*spec
;
2887 dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
;
2888 dtrace_buffer_t
*buf
;
2893 if (which
> (dtrace_specid_t
)state
->dts_nspeculations
) {
2894 cpu_core
[cpuid
].cpuc_dtrace_flags
|= CPU_DTRACE_ILLOP
;
2898 spec
= &state
->dts_speculations
[which
- 1];
2899 buf
= &spec
->dtsp_buffer
[cpuid
];
2902 current
= spec
->dtsp_state
;
2905 case DTRACESPEC_INACTIVE
:
2906 case DTRACESPEC_COMMITTINGMANY
:
2907 case DTRACESPEC_DISCARDING
:
2910 case DTRACESPEC_COMMITTING
:
2911 ASSERT(buf
->dtb_offset
== 0);
2914 case DTRACESPEC_ACTIVEONE
:
2916 * This speculation is currently active on one CPU.
2917 * Check the offset in the buffer; if it's non-zero,
2918 * that CPU must be us (and we leave the state alone).
2919 * If it's zero, assume that we're starting on a new
2920 * CPU -- and change the state to indicate that the
2921 * speculation is active on more than one CPU.
2923 if (buf
->dtb_offset
!= 0)
2926 new = DTRACESPEC_ACTIVEMANY
;
2929 case DTRACESPEC_ACTIVEMANY
:
2932 case DTRACESPEC_ACTIVE
:
2933 new = DTRACESPEC_ACTIVEONE
;
2939 } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
,
2940 current
, new) != current
);
2942 ASSERT(new == DTRACESPEC_ACTIVEONE
|| new == DTRACESPEC_ACTIVEMANY
);
2947 * Return a string. In the event that the user lacks the privilege to access
2948 * arbitrary kernel memory, we copy the string out to scratch memory so that we
2949 * don't fail access checking.
2951 * dtrace_dif_variable() uses this routine as a helper for various
2952 * builtin values such as 'execname' and 'probefunc.'
2956 dtrace_dif_varstr(uintptr_t addr
, dtrace_state_t
*state
,
2957 dtrace_mstate_t
*mstate
)
2959 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
2964 * The easy case: this probe is allowed to read all of memory, so
2965 * we can just return this as a vanilla pointer.
2967 if ((mstate
->dtms_access
& DTRACE_ACCESS_KERNEL
) != 0)
2971 * This is the tougher case: we copy the string in question from
2972 * kernel memory into scratch memory and return it that way: this
2973 * ensures that we won't trip up when access checking tests the
2974 * BYREF return value.
2976 strsz
= dtrace_strlen((char *)addr
, size
) + 1;
2978 if (mstate
->dtms_scratch_ptr
+ strsz
>
2979 mstate
->dtms_scratch_base
+ mstate
->dtms_scratch_size
) {
2980 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
2984 dtrace_strcpy((const void *)addr
, (void *)mstate
->dtms_scratch_ptr
,
2986 ret
= mstate
->dtms_scratch_ptr
;
2987 mstate
->dtms_scratch_ptr
+= strsz
;
2992 * This function implements the DIF emulator's variable lookups. The emulator
2993 * passes a reserved variable identifier and optional built-in array index.
2996 dtrace_dif_variable(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
, uint64_t v
,
3000 * If we're accessing one of the uncached arguments, we'll turn this
3001 * into a reference in the args array.
3003 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
) {
3004 ndx
= v
- DIF_VAR_ARG0
;
3010 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_ARGS
);
3011 if (ndx
>= sizeof (mstate
->dtms_arg
) /
3012 sizeof (mstate
->dtms_arg
[0])) {
3014 * APPLE NOTE: Account for introduction of __dtrace_probe()
3016 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
3017 dtrace_provider_t
*pv
;
3020 pv
= mstate
->dtms_probe
->dtpr_provider
;
3021 if (pv
->dtpv_pops
.dtps_getargval
!= NULL
)
3022 val
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
,
3023 mstate
->dtms_probe
->dtpr_id
,
3024 mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
);
3025 /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */
3026 else if (mstate
->dtms_probe
->dtpr_id
== dtrace_probeid_error
&& ndx
== 5) {
3027 return ((dtrace_state_t
*)(uintptr_t)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
;
3031 val
= dtrace_getarg(ndx
, aframes
);
3034 * This is regrettably required to keep the compiler
3035 * from tail-optimizing the call to dtrace_getarg().
3036 * The condition always evaluates to true, but the
3037 * compiler has no way of figuring that out a priori.
3038 * (None of this would be necessary if the compiler
3039 * could be relied upon to _always_ tail-optimize
3040 * the call to dtrace_getarg() -- but it can't.)
3042 if (mstate
->dtms_probe
!= NULL
)
3048 return (mstate
->dtms_arg
[ndx
]);
3050 case DIF_VAR_UREGS
: {
3053 if (!dtrace_priv_proc(state
))
3056 if ((thread
= current_thread()) == NULL
) {
3057 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
);
3058 cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
= 0;
3062 return (dtrace_getreg(find_user_regs(thread
), ndx
));
3066 case DIF_VAR_CURTHREAD
:
3067 if (!dtrace_priv_kernel(state
))
3070 return ((uint64_t)(uintptr_t)current_thread());
3072 case DIF_VAR_TIMESTAMP
:
3073 if (!(mstate
->dtms_present
& DTRACE_MSTATE_TIMESTAMP
)) {
3074 mstate
->dtms_timestamp
= dtrace_gethrtime();
3075 mstate
->dtms_present
|= DTRACE_MSTATE_TIMESTAMP
;
3077 return (mstate
->dtms_timestamp
);
3079 case DIF_VAR_VTIMESTAMP
:
3080 ASSERT(dtrace_vtime_references
!= 0);
3081 return (dtrace_get_thread_vtime(current_thread()));
3083 case DIF_VAR_WALLTIMESTAMP
:
3084 if (!(mstate
->dtms_present
& DTRACE_MSTATE_WALLTIMESTAMP
)) {
3085 mstate
->dtms_walltimestamp
= dtrace_gethrestime();
3086 mstate
->dtms_present
|= DTRACE_MSTATE_WALLTIMESTAMP
;
3088 return (mstate
->dtms_walltimestamp
);
3090 case DIF_VAR_MACHTIMESTAMP
:
3091 if (!(mstate
->dtms_present
& DTRACE_MSTATE_MACHTIMESTAMP
)) {
3092 mstate
->dtms_machtimestamp
= mach_absolute_time();
3093 mstate
->dtms_present
|= DTRACE_MSTATE_MACHTIMESTAMP
;
3095 return (mstate
->dtms_machtimestamp
);
3098 if (!dtrace_priv_kernel(state
))
3100 if (!(mstate
->dtms_present
& DTRACE_MSTATE_IPL
)) {
3101 mstate
->dtms_ipl
= dtrace_getipl();
3102 mstate
->dtms_present
|= DTRACE_MSTATE_IPL
;
3104 return (mstate
->dtms_ipl
);
3107 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_EPID
);
3108 return (mstate
->dtms_epid
);
3111 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3112 return (mstate
->dtms_probe
->dtpr_id
);
3114 case DIF_VAR_STACKDEPTH
:
3115 if (!dtrace_priv_kernel(state
))
3117 if (!(mstate
->dtms_present
& DTRACE_MSTATE_STACKDEPTH
)) {
3119 * APPLE NOTE: Account for introduction of __dtrace_probe()
3121 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
3123 mstate
->dtms_stackdepth
= dtrace_getstackdepth(aframes
);
3124 mstate
->dtms_present
|= DTRACE_MSTATE_STACKDEPTH
;
3126 return (mstate
->dtms_stackdepth
);
3128 case DIF_VAR_USTACKDEPTH
:
3129 if (!dtrace_priv_proc(state
))
3131 if (!(mstate
->dtms_present
& DTRACE_MSTATE_USTACKDEPTH
)) {
3133 * See comment in DIF_VAR_PID.
3135 if (DTRACE_ANCHORED(mstate
->dtms_probe
) &&
3137 mstate
->dtms_ustackdepth
= 0;
3139 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3140 mstate
->dtms_ustackdepth
=
3141 dtrace_getustackdepth();
3142 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3144 mstate
->dtms_present
|= DTRACE_MSTATE_USTACKDEPTH
;
3146 return (mstate
->dtms_ustackdepth
);
3148 case DIF_VAR_CALLER
:
3149 if (!dtrace_priv_kernel(state
))
3151 if (!(mstate
->dtms_present
& DTRACE_MSTATE_CALLER
)) {
3153 * APPLE NOTE: Account for introduction of __dtrace_probe()
3155 int aframes
= mstate
->dtms_probe
->dtpr_aframes
+ 3;
3157 if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) {
3159 * If this is an unanchored probe, we are
3160 * required to go through the slow path:
3161 * dtrace_caller() only guarantees correct
3162 * results for anchored probes.
3166 dtrace_getpcstack(caller
, 2, aframes
,
3167 (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]);
3168 mstate
->dtms_caller
= caller
[1];
3169 } else if ((mstate
->dtms_caller
=
3170 dtrace_caller(aframes
)) == (uintptr_t)-1) {
3172 * We have failed to do this the quick way;
3173 * we must resort to the slower approach of
3174 * calling dtrace_getpcstack().
3178 dtrace_getpcstack(&caller
, 1, aframes
, NULL
);
3179 mstate
->dtms_caller
= caller
;
3182 mstate
->dtms_present
|= DTRACE_MSTATE_CALLER
;
3184 return (mstate
->dtms_caller
);
3186 case DIF_VAR_UCALLER
:
3187 if (!dtrace_priv_proc(state
))
3190 if (!(mstate
->dtms_present
& DTRACE_MSTATE_UCALLER
)) {
3194 * dtrace_getupcstack() fills in the first uint64_t
3195 * with the current PID. The second uint64_t will
3196 * be the program counter at user-level. The third
3197 * uint64_t will contain the caller, which is what
3201 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3202 dtrace_getupcstack(ustack
, 3);
3203 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3204 mstate
->dtms_ucaller
= ustack
[2];
3205 mstate
->dtms_present
|= DTRACE_MSTATE_UCALLER
;
3208 return (mstate
->dtms_ucaller
);
3210 case DIF_VAR_PROBEPROV
:
3211 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3212 return (dtrace_dif_varstr(
3213 (uintptr_t)mstate
->dtms_probe
->dtpr_provider
->dtpv_name
,
3216 case DIF_VAR_PROBEMOD
:
3217 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3218 return (dtrace_dif_varstr(
3219 (uintptr_t)mstate
->dtms_probe
->dtpr_mod
,
3222 case DIF_VAR_PROBEFUNC
:
3223 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3224 return (dtrace_dif_varstr(
3225 (uintptr_t)mstate
->dtms_probe
->dtpr_func
,
3228 case DIF_VAR_PROBENAME
:
3229 ASSERT(mstate
->dtms_present
& DTRACE_MSTATE_PROBE
);
3230 return (dtrace_dif_varstr(
3231 (uintptr_t)mstate
->dtms_probe
->dtpr_name
,
3235 if (!dtrace_priv_proc_relaxed(state
))
3239 * Note that we are assuming that an unanchored probe is
3240 * always due to a high-level interrupt. (And we're assuming
3241 * that there is only a single high level interrupt.)
3243 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3244 /* Anchored probe that fires while on an interrupt accrues to process 0 */
3247 return ((uint64_t)dtrace_proc_selfpid());
3250 if (!dtrace_priv_proc_relaxed(state
))
3254 * See comment in DIF_VAR_PID.
3256 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3259 return ((uint64_t)dtrace_proc_selfppid());
3262 /* We do not need to check for null current_thread() */
3263 return thread_tid(current_thread()); /* globally unique */
3265 case DIF_VAR_PTHREAD_SELF
:
3266 if (!dtrace_priv_proc(state
))
3269 /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */
3272 case DIF_VAR_DISPATCHQADDR
:
3273 if (!dtrace_priv_proc(state
))
3276 /* We do not need to check for null current_thread() */
3277 return thread_dispatchqaddr(current_thread());
3279 case DIF_VAR_EXECNAME
:
3281 char *xname
= (char *)mstate
->dtms_scratch_ptr
;
3282 size_t scratch_size
= MAXCOMLEN
+1;
3284 /* The scratch allocation's lifetime is that of the clause. */
3285 if (!DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3286 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3290 if (!dtrace_priv_proc_relaxed(state
))
3293 mstate
->dtms_scratch_ptr
+= scratch_size
;
3294 proc_selfname( xname
, MAXCOMLEN
);
3296 return ((uint64_t)(uintptr_t)xname
);
3300 case DIF_VAR_ZONENAME
:
3302 /* scratch_size is equal to length('global') + 1 for the null-terminator. */
3303 char *zname
= (char *)mstate
->dtms_scratch_ptr
;
3304 size_t scratch_size
= 6 + 1;
3306 if (!dtrace_priv_proc(state
))
3309 /* The scratch allocation's lifetime is that of the clause. */
3310 if (!DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3311 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3315 mstate
->dtms_scratch_ptr
+= scratch_size
;
3317 /* The kernel does not provide zonename, it will always return 'global'. */
3318 strlcpy(zname
, "global", scratch_size
);
3320 return ((uint64_t)(uintptr_t)zname
);
3324 if (!dtrace_priv_proc_relaxed(state
))
3328 * See comment in DIF_VAR_PID.
3330 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3333 return ((uint64_t) dtrace_proc_selfruid());
3336 if (!dtrace_priv_proc(state
))
3340 * See comment in DIF_VAR_PID.
3342 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3345 if (dtrace_CRED() != NULL
)
3346 /* Credential does not require lazy initialization. */
3347 return ((uint64_t)kauth_getgid());
3349 /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */
3350 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3354 case DIF_VAR_ERRNO
: {
3355 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
3356 if (!dtrace_priv_proc(state
))
3360 * See comment in DIF_VAR_PID.
3362 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
))
3366 return (uint64_t)uthread
->t_dtrace_errno
;
3368 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3374 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3380 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3381 * Notice that we don't bother validating the proper number of arguments or
3382 * their types in the tuple stack. This isn't needed because all argument
3383 * interpretation is safe because of our load safety -- the worst that can
3384 * happen is that a bogus program can obtain bogus results.
3387 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
,
3388 dtrace_key_t
*tupregs
, int nargs
,
3389 dtrace_mstate_t
*mstate
, dtrace_state_t
*state
)
3391 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
3392 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
3393 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
3395 #if !defined(__APPLE__)
3406 /* FIXME: awaits lock/mutex work */
3407 #endif /* __APPLE__ */
3411 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3414 #if !defined(__APPLE__)
3415 case DIF_SUBR_MUTEX_OWNED
:
3416 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3422 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3423 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
))
3424 regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
;
3426 regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
);
3429 case DIF_SUBR_MUTEX_OWNER
:
3430 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3436 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3437 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) &&
3438 MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
)
3439 regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
);
3444 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
:
3445 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3451 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3452 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
);
3455 case DIF_SUBR_MUTEX_TYPE_SPIN
:
3456 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
),
3462 m
.mx
= dtrace_load64(tupregs
[0].dttk_value
);
3463 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
);
3466 case DIF_SUBR_RW_READ_HELD
: {
3469 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (uintptr_t),
3475 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3476 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
);
3480 case DIF_SUBR_RW_WRITE_HELD
:
3481 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
),
3487 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3488 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
);
3491 case DIF_SUBR_RW_ISWRITER
:
3492 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
),
3498 r
.rw
= dtrace_loadptr(tupregs
[0].dttk_value
);
3499 regs
[rd
] = _RW_ISWRITER(&r
.ri
);
3502 /* FIXME: awaits lock/mutex work */
3503 #endif /* __APPLE__ */
3505 case DIF_SUBR_BCOPY
: {
3507 * We need to be sure that the destination is in the scratch
3508 * region -- no other region is allowed.
3510 uintptr_t src
= tupregs
[0].dttk_value
;
3511 uintptr_t dest
= tupregs
[1].dttk_value
;
3512 size_t size
= tupregs
[2].dttk_value
;
3514 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3515 *flags
|= CPU_DTRACE_BADADDR
;
3520 if (!dtrace_canload(src
, size
, mstate
, vstate
)) {
3525 dtrace_bcopy((void *)src
, (void *)dest
, size
);
3529 case DIF_SUBR_ALLOCA
:
3530 case DIF_SUBR_COPYIN
: {
3531 uintptr_t dest
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
3533 tupregs
[subr
== DIF_SUBR_ALLOCA
? 0 : 1].dttk_value
;
3534 size_t scratch_size
= (dest
- mstate
->dtms_scratch_ptr
) + size
;
3537 * This action doesn't require any credential checks since
3538 * probes will not activate in user contexts to which the
3539 * enabling user does not have permissions.
3543 * Rounding up the user allocation size could have overflowed
3544 * a large, bogus allocation (like -1ULL) to 0.
3546 if (scratch_size
< size
||
3547 !DTRACE_INSCRATCH(mstate
, scratch_size
)) {
3548 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3553 if (subr
== DIF_SUBR_COPYIN
) {
3554 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3555 if (dtrace_priv_proc(state
))
3556 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3557 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3560 mstate
->dtms_scratch_ptr
+= scratch_size
;
3565 case DIF_SUBR_COPYINTO
: {
3566 uint64_t size
= tupregs
[1].dttk_value
;
3567 uintptr_t dest
= tupregs
[2].dttk_value
;
3570 * This action doesn't require any credential checks since
3571 * probes will not activate in user contexts to which the
3572 * enabling user does not have permissions.
3574 if (!dtrace_inscratch(dest
, size
, mstate
)) {
3575 *flags
|= CPU_DTRACE_BADADDR
;
3580 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3581 if (dtrace_priv_proc(state
))
3582 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
);
3583 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3587 case DIF_SUBR_COPYINSTR
: {
3588 uintptr_t dest
= mstate
->dtms_scratch_ptr
;
3589 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3591 if (nargs
> 1 && tupregs
[1].dttk_value
< size
)
3592 size
= tupregs
[1].dttk_value
+ 1;
3595 * This action doesn't require any credential checks since
3596 * probes will not activate in user contexts to which the
3597 * enabling user does not have permissions.
3599 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3600 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3605 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3606 if (dtrace_priv_proc(state
))
3607 dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
, flags
);
3608 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3610 ((char *)dest
)[size
- 1] = '\0';
3611 mstate
->dtms_scratch_ptr
+= size
;
3616 case DIF_SUBR_MSGSIZE
:
3617 case DIF_SUBR_MSGDSIZE
: {
3618 /* Darwin does not implement SysV streams messages */
3619 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
3624 case DIF_SUBR_PROGENYOF
: {
3625 pid_t pid
= tupregs
[0].dttk_value
;
3626 struct proc
*p
= current_proc();
3627 int rval
= 0, lim
= nprocs
;
3629 while(p
&& (lim
-- > 0)) {
3632 ppid
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
));
3633 if (*flags
& CPU_DTRACE_FAULT
)
3642 break; /* Can't climb process tree any further. */
3644 p
= (struct proc
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
));
3645 if (*flags
& CPU_DTRACE_FAULT
)
3653 case DIF_SUBR_SPECULATION
:
3654 regs
[rd
] = dtrace_speculation(state
);
3658 case DIF_SUBR_COPYOUT
: {
3659 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3660 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3661 uint64_t size
= tupregs
[2].dttk_value
;
3663 if (!dtrace_destructive_disallow
&&
3664 dtrace_priv_proc_control(state
) &&
3665 !dtrace_istoxic(kaddr
, size
)) {
3666 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3667 dtrace_copyout(kaddr
, uaddr
, size
, flags
);
3668 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3673 case DIF_SUBR_COPYOUTSTR
: {
3674 uintptr_t kaddr
= tupregs
[0].dttk_value
;
3675 user_addr_t uaddr
= tupregs
[1].dttk_value
;
3676 uint64_t size
= tupregs
[2].dttk_value
;
3678 if (!dtrace_destructive_disallow
&&
3679 dtrace_priv_proc_control(state
) &&
3680 !dtrace_istoxic(kaddr
, size
)) {
3681 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
3682 dtrace_copyoutstr(kaddr
, uaddr
, size
, flags
);
3683 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
3688 case DIF_SUBR_STRLEN
: {
3690 uintptr_t addr
= (uintptr_t)tupregs
[0].dttk_value
;
3691 sz
= dtrace_strlen((char *)addr
,
3692 state
->dts_options
[DTRACEOPT_STRSIZE
]);
3694 if (!dtrace_canload(addr
, sz
+ 1, mstate
, vstate
)) {
3704 case DIF_SUBR_STRCHR
:
3705 case DIF_SUBR_STRRCHR
: {
3707 * We're going to iterate over the string looking for the
3708 * specified character. We will iterate until we have reached
3709 * the string length or we have found the character. If this
3710 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3711 * of the specified character instead of the first.
3713 uintptr_t saddr
= tupregs
[0].dttk_value
;
3714 uintptr_t addr
= tupregs
[0].dttk_value
;
3715 uintptr_t limit
= addr
+ state
->dts_options
[DTRACEOPT_STRSIZE
];
3716 char c
, target
= (char)tupregs
[1].dttk_value
;
3718 for (regs
[rd
] = 0; addr
< limit
; addr
++) {
3719 if ((c
= dtrace_load8(addr
)) == target
) {
3722 if (subr
== DIF_SUBR_STRCHR
)
3730 if (!dtrace_canload(saddr
, addr
- saddr
, mstate
, vstate
)) {
3738 case DIF_SUBR_STRSTR
:
3739 case DIF_SUBR_INDEX
:
3740 case DIF_SUBR_RINDEX
: {
3742 * We're going to iterate over the string looking for the
3743 * specified string. We will iterate until we have reached
3744 * the string length or we have found the string. (Yes, this
3745 * is done in the most naive way possible -- but considering
3746 * that the string we're searching for is likely to be
3747 * relatively short, the complexity of Rabin-Karp or similar
3748 * hardly seems merited.)
3750 char *addr
= (char *)(uintptr_t)tupregs
[0].dttk_value
;
3751 char *substr
= (char *)(uintptr_t)tupregs
[1].dttk_value
;
3752 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3753 size_t len
= dtrace_strlen(addr
, size
);
3754 size_t sublen
= dtrace_strlen(substr
, size
);
3755 char *limit
= addr
+ len
, *orig
= addr
;
3756 int notfound
= subr
== DIF_SUBR_STRSTR
? 0 : -1;
3759 regs
[rd
] = notfound
;
3761 if (!dtrace_canload((uintptr_t)addr
, len
+ 1, mstate
, vstate
)) {
3766 if (!dtrace_canload((uintptr_t)substr
, sublen
+ 1, mstate
,
3773 * strstr() and index()/rindex() have similar semantics if
3774 * both strings are the empty string: strstr() returns a
3775 * pointer to the (empty) string, and index() and rindex()
3776 * both return index 0 (regardless of any position argument).
3778 if (sublen
== 0 && len
== 0) {
3779 if (subr
== DIF_SUBR_STRSTR
)
3780 regs
[rd
] = (uintptr_t)addr
;
3786 if (subr
!= DIF_SUBR_STRSTR
) {
3787 if (subr
== DIF_SUBR_RINDEX
) {
3794 * Both index() and rindex() take an optional position
3795 * argument that denotes the starting position.
3798 int64_t pos
= (int64_t)tupregs
[2].dttk_value
;
3801 * If the position argument to index() is
3802 * negative, Perl implicitly clamps it at
3803 * zero. This semantic is a little surprising
3804 * given the special meaning of negative
3805 * positions to similar Perl functions like
3806 * substr(), but it appears to reflect a
3807 * notion that index() can start from a
3808 * negative index and increment its way up to
3809 * the string. Given this notion, Perl's
3810 * rindex() is at least self-consistent in
3811 * that it implicitly clamps positions greater
3812 * than the string length to be the string
3813 * length. Where Perl completely loses
3814 * coherence, however, is when the specified
3815 * substring is the empty string (""). In
3816 * this case, even if the position is
3817 * negative, rindex() returns 0 -- and even if
3818 * the position is greater than the length,
3819 * index() returns the string length. These
3820 * semantics violate the notion that index()
3821 * should never return a value less than the
3822 * specified position and that rindex() should
3823 * never return a value greater than the
3824 * specified position. (One assumes that
3825 * these semantics are artifacts of Perl's
3826 * implementation and not the results of
3827 * deliberate design -- it beggars belief that
3828 * even Larry Wall could desire such oddness.)
3829 * While in the abstract one would wish for
3830 * consistent position semantics across
3831 * substr(), index() and rindex() -- or at the
3832 * very least self-consistent position
3833 * semantics for index() and rindex() -- we
3834 * instead opt to keep with the extant Perl
3835 * semantics, in all their broken glory. (Do
3836 * we have more desire to maintain Perl's
3837 * semantics than Perl does? Probably.)
3839 if (subr
== DIF_SUBR_RINDEX
) {
3846 if ((size_t)pos
> len
)
3852 if ((size_t)pos
>= len
) {
3863 for (regs
[rd
] = notfound
; addr
!= limit
; addr
+= inc
) {
3864 if (dtrace_strncmp(addr
, substr
, sublen
) == 0) {
3865 if (subr
!= DIF_SUBR_STRSTR
) {
3867 * As D index() and rindex() are
3868 * modeled on Perl (and not on awk),
3869 * we return a zero-based (and not a
3870 * one-based) index. (For you Perl
3871 * weenies: no, we're not going to add
3872 * $[ -- and shouldn't you be at a con
3875 regs
[rd
] = (uintptr_t)(addr
- orig
);
3879 ASSERT(subr
== DIF_SUBR_STRSTR
);
3880 regs
[rd
] = (uintptr_t)addr
;
3888 case DIF_SUBR_STRTOK
: {
3889 uintptr_t addr
= tupregs
[0].dttk_value
;
3890 uintptr_t tokaddr
= tupregs
[1].dttk_value
;
3891 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
3892 uintptr_t limit
, toklimit
= tokaddr
+ size
;
3893 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
3894 uint8_t c
='\0', tokmap
[32]; /* 256 / 8 */
3898 * Check both the token buffer and (later) the input buffer,
3899 * since both could be non-scratch addresses.
3901 if (!dtrace_strcanload(tokaddr
, size
, mstate
, vstate
)) {
3906 if (!DTRACE_INSCRATCH(mstate
, size
)) {
3907 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
3914 * If the address specified is NULL, we use our saved
3915 * strtok pointer from the mstate. Note that this
3916 * means that the saved strtok pointer is _only_
3917 * valid within multiple enablings of the same probe --
3918 * it behaves like an implicit clause-local variable.
3920 addr
= mstate
->dtms_strtok
;
3923 * If the user-specified address is non-NULL we must
3924 * access check it. This is the only time we have
3925 * a chance to do so, since this address may reside
3926 * in the string table of this clause-- future calls
3927 * (when we fetch addr from mstate->dtms_strtok)
3928 * would fail this access check.
3930 if (!dtrace_strcanload(addr
, size
, mstate
, vstate
)) {
3937 * First, zero the token map, and then process the token
3938 * string -- setting a bit in the map for every character
3939 * found in the token string.
3941 for (i
= 0; i
< (int)sizeof (tokmap
); i
++)
3944 for (; tokaddr
< toklimit
; tokaddr
++) {
3945 if ((c
= dtrace_load8(tokaddr
)) == '\0')
3948 ASSERT((c
>> 3) < sizeof (tokmap
));
3949 tokmap
[c
>> 3] |= (1 << (c
& 0x7));
3952 for (limit
= addr
+ size
; addr
< limit
; addr
++) {
3954 * We're looking for a character that is _not_ contained
3955 * in the token string.
3957 if ((c
= dtrace_load8(addr
)) == '\0')
3960 if (!(tokmap
[c
>> 3] & (1 << (c
& 0x7))))
3966 * We reached the end of the string without finding
3967 * any character that was not in the token string.
3968 * We return NULL in this case, and we set the saved
3969 * address to NULL as well.
3972 mstate
->dtms_strtok
= 0;
3977 * From here on, we're copying into the destination string.
3979 for (i
= 0; addr
< limit
&& i
< size
- 1; addr
++) {
3980 if ((c
= dtrace_load8(addr
)) == '\0')
3983 if (tokmap
[c
>> 3] & (1 << (c
& 0x7)))
3992 regs
[rd
] = (uintptr_t)dest
;
3993 mstate
->dtms_scratch_ptr
+= size
;
3994 mstate
->dtms_strtok
= addr
;
3998 case DIF_SUBR_SUBSTR
: {
3999 uintptr_t s
= tupregs
[0].dttk_value
;
4000 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4001 char *d
= (char *)mstate
->dtms_scratch_ptr
;
4002 int64_t index
= (int64_t)tupregs
[1].dttk_value
;
4003 int64_t remaining
= (int64_t)tupregs
[2].dttk_value
;
4004 size_t len
= dtrace_strlen((char *)s
, size
);
4007 if (!dtrace_canload(s
, len
+ 1, mstate
, vstate
)) {
4012 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4013 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4019 remaining
= (int64_t)size
;
4024 if (index
< 0 && index
+ remaining
> 0) {
4030 if ((size_t)index
>= len
|| index
< 0) {
4032 } else if (remaining
< 0) {
4033 remaining
+= len
- index
;
4034 } else if ((uint64_t)index
+ (uint64_t)remaining
> size
) {
4035 remaining
= size
- index
;
4038 for (i
= 0; i
< remaining
; i
++) {
4039 if ((d
[i
] = dtrace_load8(s
+ index
+ i
)) == '\0')
4045 mstate
->dtms_scratch_ptr
+= size
;
4046 regs
[rd
] = (uintptr_t)d
;
4050 case DIF_SUBR_GETMAJOR
:
4051 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value
);
4054 case DIF_SUBR_GETMINOR
:
4055 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value
);
4058 case DIF_SUBR_DDI_PATHNAME
: {
4059 /* APPLE NOTE: currently unsupported on Darwin */
4060 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
4065 case DIF_SUBR_STRJOIN
: {
4066 char *d
= (char *)mstate
->dtms_scratch_ptr
;
4067 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4068 uintptr_t s1
= tupregs
[0].dttk_value
;
4069 uintptr_t s2
= tupregs
[1].dttk_value
;
4072 if (!dtrace_strcanload(s1
, size
, mstate
, vstate
) ||
4073 !dtrace_strcanload(s2
, size
, mstate
, vstate
)) {
4078 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4079 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4086 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4091 if ((d
[i
++] = dtrace_load8(s1
++)) == '\0') {
4099 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4104 if ((d
[i
++] = dtrace_load8(s2
++)) == '\0')
4109 mstate
->dtms_scratch_ptr
+= i
;
4110 regs
[rd
] = (uintptr_t)d
;
4116 case DIF_SUBR_LLTOSTR
: {
4117 int64_t i
= (int64_t)tupregs
[0].dttk_value
;
4118 int64_t val
= i
< 0 ? i
* -1 : i
;
4119 uint64_t size
= 22; /* enough room for 2^64 in decimal */
4120 char *end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4122 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4123 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4128 for (*end
-- = '\0'; val
; val
/= 10)
4129 *end
-- = '0' + (val
% 10);
4137 regs
[rd
] = (uintptr_t)end
+ 1;
4138 mstate
->dtms_scratch_ptr
+= size
;
4142 case DIF_SUBR_HTONS
:
4143 case DIF_SUBR_NTOHS
:
4145 regs
[rd
] = (uint16_t)tupregs
[0].dttk_value
;
4147 regs
[rd
] = DT_BSWAP_16((uint16_t)tupregs
[0].dttk_value
);
4152 case DIF_SUBR_HTONL
:
4153 case DIF_SUBR_NTOHL
:
4155 regs
[rd
] = (uint32_t)tupregs
[0].dttk_value
;
4157 regs
[rd
] = DT_BSWAP_32((uint32_t)tupregs
[0].dttk_value
);
4162 case DIF_SUBR_HTONLL
:
4163 case DIF_SUBR_NTOHLL
:
4165 regs
[rd
] = (uint64_t)tupregs
[0].dttk_value
;
4167 regs
[rd
] = DT_BSWAP_64((uint64_t)tupregs
[0].dttk_value
);
4172 case DIF_SUBR_DIRNAME
:
4173 case DIF_SUBR_BASENAME
: {
4174 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
4175 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4176 uintptr_t src
= tupregs
[0].dttk_value
;
4177 int i
, j
, len
= dtrace_strlen((char *)src
, size
);
4178 int lastbase
= -1, firstbase
= -1, lastdir
= -1;
4181 if (!dtrace_canload(src
, len
+ 1, mstate
, vstate
)) {
4186 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4187 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4193 * The basename and dirname for a zero-length string is
4198 src
= (uintptr_t)".";
4202 * Start from the back of the string, moving back toward the
4203 * front until we see a character that isn't a slash. That
4204 * character is the last character in the basename.
4206 for (i
= len
- 1; i
>= 0; i
--) {
4207 if (dtrace_load8(src
+ i
) != '/')
4215 * Starting from the last character in the basename, move
4216 * towards the front until we find a slash. The character
4217 * that we processed immediately before that is the first
4218 * character in the basename.
4220 for (; i
>= 0; i
--) {
4221 if (dtrace_load8(src
+ i
) == '/')
4229 * Now keep going until we find a non-slash character. That
4230 * character is the last character in the dirname.
4232 for (; i
>= 0; i
--) {
4233 if (dtrace_load8(src
+ i
) != '/')
4240 ASSERT(!(lastbase
== -1 && firstbase
!= -1));
4241 ASSERT(!(firstbase
== -1 && lastdir
!= -1));
4243 if (lastbase
== -1) {
4245 * We didn't find a non-slash character. We know that
4246 * the length is non-zero, so the whole string must be
4247 * slashes. In either the dirname or the basename
4248 * case, we return '/'.
4250 ASSERT(firstbase
== -1);
4251 firstbase
= lastbase
= lastdir
= 0;
4254 if (firstbase
== -1) {
4256 * The entire string consists only of a basename
4257 * component. If we're looking for dirname, we need
4258 * to change our string to be just "."; if we're
4259 * looking for a basename, we'll just set the first
4260 * character of the basename to be 0.
4262 if (subr
== DIF_SUBR_DIRNAME
) {
4263 ASSERT(lastdir
== -1);
4264 src
= (uintptr_t)".";
4271 if (subr
== DIF_SUBR_DIRNAME
) {
4272 if (lastdir
== -1) {
4274 * We know that we have a slash in the name --
4275 * or lastdir would be set to 0, above. And
4276 * because lastdir is -1, we know that this
4277 * slash must be the first character. (That
4278 * is, the full string must be of the form
4279 * "/basename".) In this case, the last
4280 * character of the directory name is 0.
4288 ASSERT(subr
== DIF_SUBR_BASENAME
);
4289 ASSERT(firstbase
!= -1 && lastbase
!= -1);
4294 for (i
= start
, j
= 0; i
<= end
&& (uint64_t)j
< size
- 1; i
++, j
++)
4295 dest
[j
] = dtrace_load8(src
+ i
);
4298 regs
[rd
] = (uintptr_t)dest
;
4299 mstate
->dtms_scratch_ptr
+= size
;
4303 case DIF_SUBR_CLEANPATH
: {
4304 char *dest
= (char *)mstate
->dtms_scratch_ptr
, c
;
4305 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4306 uintptr_t src
= tupregs
[0].dttk_value
;
4309 if (!dtrace_strcanload(src
, size
, mstate
, vstate
)) {
4314 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4315 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4321 * Move forward, loading each character.
4324 c
= dtrace_load8(src
+ i
++);
4326 if ((uint64_t)(j
+ 5) >= size
) /* 5 = strlen("/..c\0") */
4334 c
= dtrace_load8(src
+ i
++);
4338 * We have two slashes -- we can just advance
4339 * to the next character.
4346 * This is not "." and it's not ".." -- we can
4347 * just store the "/" and this character and
4355 c
= dtrace_load8(src
+ i
++);
4359 * This is a "/./" component. We're not going
4360 * to store anything in the destination buffer;
4361 * we're just going to go to the next component.
4368 * This is not ".." -- we can just store the
4369 * "/." and this character and continue
4378 c
= dtrace_load8(src
+ i
++);
4380 if (c
!= '/' && c
!= '\0') {
4382 * This is not ".." -- it's "..[mumble]".
4383 * We'll store the "/.." and this character
4384 * and continue processing.
4394 * This is "/../" or "/..\0". We need to back up
4395 * our destination pointer until we find a "/".
4398 while (j
!= 0 && dest
[--j
] != '/')
4403 } while (c
!= '\0');
4406 regs
[rd
] = (uintptr_t)dest
;
4407 mstate
->dtms_scratch_ptr
+= size
;
4411 case DIF_SUBR_INET_NTOA
:
4412 case DIF_SUBR_INET_NTOA6
:
4413 case DIF_SUBR_INET_NTOP
: {
4418 if (subr
== DIF_SUBR_INET_NTOP
) {
4419 af
= (int)tupregs
[0].dttk_value
;
4422 af
= subr
== DIF_SUBR_INET_NTOA
? AF_INET
: AF_INET6
;
4426 if (af
== AF_INET
) {
4427 #if !defined(__APPLE__)
4431 #endif /* __APPLE__ */
4435 * Safely load the IPv4 address.
4437 #if !defined(__APPLE__)
4438 ip4
= dtrace_load32(tupregs
[argi
].dttk_value
);
4441 (void *)(uintptr_t)tupregs
[argi
].dttk_value
,
4442 (void *)(uintptr_t)&ip4
, sizeof (ip4
));
4443 #endif /* __APPLE__ */
4445 * Check an IPv4 string will fit in scratch.
4447 #if !defined(__APPLE__)
4448 size
= INET_ADDRSTRLEN
;
4450 size
= MAX_IPv4_STR_LEN
;
4451 #endif /* __APPLE__ */
4452 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4453 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4457 base
= (char *)mstate
->dtms_scratch_ptr
;
4458 end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4461 * Stringify as a dotted decimal quad.
4464 ptr8
= (uint8_t *)&ip4
;
4465 for (i
= 3; i
>= 0; i
--) {
4471 for (; val
; val
/= 10) {
4472 *end
-- = '0' + (val
% 10);
4479 ASSERT(end
+ 1 >= base
);
4481 } else if (af
== AF_INET6
) {
4482 #if defined(__APPLE__)
4483 #define _S6_un __u6_addr
4484 #define _S6_u8 __u6_addr8
4485 #endif /* __APPLE__ */
4486 struct in6_addr ip6
;
4487 int firstzero
, tryzero
, numzero
, v6end
;
4489 const char digits
[] = "0123456789abcdef";
4492 * Stringify using RFC 1884 convention 2 - 16 bit
4493 * hexadecimal values with a zero-run compression.
4494 * Lower case hexadecimal digits are used.
4495 * eg, fe80::214:4fff:fe0b:76c8.
4496 * The IPv4 embedded form is returned for inet_ntop,
4497 * just the IPv4 string is returned for inet_ntoa6.
4501 * Safely load the IPv6 address.
4504 (void *)(uintptr_t)tupregs
[argi
].dttk_value
,
4505 (void *)(uintptr_t)&ip6
, sizeof (struct in6_addr
));
4508 * Check an IPv6 string will fit in scratch.
4510 size
= INET6_ADDRSTRLEN
;
4511 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4512 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4516 base
= (char *)mstate
->dtms_scratch_ptr
;
4517 end
= (char *)mstate
->dtms_scratch_ptr
+ size
- 1;
4521 * Find the longest run of 16 bit zero values
4522 * for the single allowed zero compression - "::".
4527 for (i
= 0; i
< (int)sizeof (struct in6_addr
); i
++) {
4528 if (ip6
._S6_un
._S6_u8
[i
] == 0 &&
4529 tryzero
== -1 && i
% 2 == 0) {
4534 if (tryzero
!= -1 &&
4535 (ip6
._S6_un
._S6_u8
[i
] != 0 ||
4536 i
== sizeof (struct in6_addr
) - 1)) {
4538 if (i
- tryzero
<= numzero
) {
4543 firstzero
= tryzero
;
4544 numzero
= i
- i
% 2 - tryzero
;
4547 if (ip6
._S6_un
._S6_u8
[i
] == 0 &&
4548 i
== sizeof (struct in6_addr
) - 1)
4552 ASSERT(firstzero
+ numzero
<= (int)sizeof (struct in6_addr
));
4555 * Check for an IPv4 embedded address.
4557 v6end
= sizeof (struct in6_addr
) - 2;
4558 if (IN6_IS_ADDR_V4MAPPED(&ip6
) ||
4559 IN6_IS_ADDR_V4COMPAT(&ip6
)) {
4560 for (i
= sizeof (struct in6_addr
) - 1;
4561 i
>= (int)DTRACE_V4MAPPED_OFFSET
; i
--) {
4562 ASSERT(end
>= base
);
4564 val
= ip6
._S6_un
._S6_u8
[i
];
4569 for (; val
; val
/= 10) {
4570 *end
-- = '0' + val
% 10;
4574 if (i
> (int)DTRACE_V4MAPPED_OFFSET
)
4578 if (subr
== DIF_SUBR_INET_NTOA6
)
4582 * Set v6end to skip the IPv4 address that
4583 * we have already stringified.
4589 * Build the IPv6 string by working through the
4590 * address in reverse.
4592 for (i
= v6end
; i
>= 0; i
-= 2) {
4593 ASSERT(end
>= base
);
4595 if (i
== firstzero
+ numzero
- 2) {
4602 if (i
< 14 && i
!= firstzero
- 2)
4605 val
= (ip6
._S6_un
._S6_u8
[i
] << 8) +
4606 ip6
._S6_un
._S6_u8
[i
+ 1];
4611 for (; val
; val
/= 16) {
4612 *end
-- = digits
[val
% 16];
4616 ASSERT(end
+ 1 >= base
);
4618 #if defined(__APPLE__)
4621 #endif /* __APPLE__ */
4624 * The user didn't use AH_INET or AH_INET6.
4626 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
4631 inetout
: regs
[rd
] = (uintptr_t)end
+ 1;
4632 mstate
->dtms_scratch_ptr
+= size
;
4636 case DIF_SUBR_TOUPPER
:
4637 case DIF_SUBR_TOLOWER
: {
4638 uintptr_t src
= tupregs
[0].dttk_value
;
4639 char *dest
= (char *)mstate
->dtms_scratch_ptr
;
4640 char lower
, upper
, base
, c
;
4641 uint64_t size
= state
->dts_options
[DTRACEOPT_STRSIZE
];
4642 size_t len
= dtrace_strlen((char*) src
, size
);
4645 lower
= (subr
== DIF_SUBR_TOUPPER
) ? 'a' : 'A';
4646 upper
= (subr
== DIF_SUBR_TOUPPER
) ? 'z' : 'Z';
4647 base
= (subr
== DIF_SUBR_TOUPPER
) ? 'A' : 'a';
4649 if (!dtrace_canload(src
, len
+ 1, mstate
, vstate
)) {
4654 if (!DTRACE_INSCRATCH(mstate
, size
)) {
4655 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
4660 for (i
= 0; i
< size
- 1; ++i
) {
4661 if ((c
= dtrace_load8(src
+ i
)) == '\0')
4663 if (c
>= lower
&& c
<= upper
)
4664 c
= base
+ (c
- lower
);
4671 regs
[rd
] = (uintptr_t) dest
;
4672 mstate
->dtms_scratch_ptr
+= size
;
4679 * CoreProfile callback ('core_profile (uint64_t, [uint64_t], [uint64_t] ...)')
4681 case DIF_SUBR_COREPROFILE
: {
4682 uint64_t selector
= tupregs
[0].dttk_value
;
4683 uint64_t args
[DIF_DTR_NREGS
-1] = {0ULL};
4685 uint32_t count
= (uint32_t)nargs
;
4688 regs
[rd
] = KERN_FAILURE
;
4692 if(count
> DIF_DTR_NREGS
)
4693 count
= DIF_DTR_NREGS
;
4695 /* copy in any variadic argument list, bounded by DIF_DTR_NREGS */
4696 for(ii
= 0; ii
< count
-1; ii
++) {
4697 args
[ii
] = tupregs
[ii
+1].dttk_value
;
4701 chudxnu_dtrace_callback(selector
, args
, count
-1);
4702 if(KERN_SUCCESS
!= ret
) {
4713 * Emulate the execution of DTrace IR instructions specified by the given
4714 * DIF object. This function is deliberately void of assertions as all of
4715 * the necessary checks are handled by a call to dtrace_difo_validate().
4718 dtrace_dif_emulate(dtrace_difo_t
*difo
, dtrace_mstate_t
*mstate
,
4719 dtrace_vstate_t
*vstate
, dtrace_state_t
*state
)
4721 const dif_instr_t
*text
= difo
->dtdo_buf
;
4722 const uint_t textlen
= difo
->dtdo_len
;
4723 const char *strtab
= difo
->dtdo_strtab
;
4724 const uint64_t *inttab
= difo
->dtdo_inttab
;
4727 dtrace_statvar_t
*svar
;
4728 dtrace_dstate_t
*dstate
= &vstate
->dtvs_dynvars
;
4730 volatile uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
4731 volatile uint64_t *illval
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
4733 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
4734 uint64_t regs
[DIF_DIR_NREGS
];
4737 uint8_t cc_n
= 0, cc_z
= 0, cc_v
= 0, cc_c
= 0;
4739 uint_t pc
= 0, id
, opc
= 0;
4745 * We stash the current DIF object into the machine state: we need it
4746 * for subsequent access checking.
4748 mstate
->dtms_difo
= difo
;
4750 regs
[DIF_REG_R0
] = 0; /* %r0 is fixed at zero */
4752 while (pc
< textlen
&& !(*flags
& CPU_DTRACE_FAULT
)) {
4756 r1
= DIF_INSTR_R1(instr
);
4757 r2
= DIF_INSTR_R2(instr
);
4758 rd
= DIF_INSTR_RD(instr
);
4760 switch (DIF_INSTR_OP(instr
)) {
4762 regs
[rd
] = regs
[r1
] | regs
[r2
];
4765 regs
[rd
] = regs
[r1
] ^ regs
[r2
];
4768 regs
[rd
] = regs
[r1
] & regs
[r2
];
4771 regs
[rd
] = regs
[r1
] << regs
[r2
];
4774 regs
[rd
] = regs
[r1
] >> regs
[r2
];
4777 regs
[rd
] = regs
[r1
] - regs
[r2
];
4780 regs
[rd
] = regs
[r1
] + regs
[r2
];
4783 regs
[rd
] = regs
[r1
] * regs
[r2
];
4786 if (regs
[r2
] == 0) {
4788 *flags
|= CPU_DTRACE_DIVZERO
;
4790 regs
[rd
] = (int64_t)regs
[r1
] /
4796 if (regs
[r2
] == 0) {
4798 *flags
|= CPU_DTRACE_DIVZERO
;
4800 regs
[rd
] = regs
[r1
] / regs
[r2
];
4805 if (regs
[r2
] == 0) {
4807 *flags
|= CPU_DTRACE_DIVZERO
;
4809 regs
[rd
] = (int64_t)regs
[r1
] %
4815 if (regs
[r2
] == 0) {
4817 *flags
|= CPU_DTRACE_DIVZERO
;
4819 regs
[rd
] = regs
[r1
] % regs
[r2
];
4824 regs
[rd
] = ~regs
[r1
];
4827 regs
[rd
] = regs
[r1
];
4830 cc_r
= regs
[r1
] - regs
[r2
];
4834 cc_c
= regs
[r1
] < regs
[r2
];
4837 cc_n
= cc_v
= cc_c
= 0;
4838 cc_z
= regs
[r1
] == 0;
4841 pc
= DIF_INSTR_LABEL(instr
);
4845 pc
= DIF_INSTR_LABEL(instr
);
4849 pc
= DIF_INSTR_LABEL(instr
);
4852 if ((cc_z
| (cc_n
^ cc_v
)) == 0)
4853 pc
= DIF_INSTR_LABEL(instr
);
4856 if ((cc_c
| cc_z
) == 0)
4857 pc
= DIF_INSTR_LABEL(instr
);
4860 if ((cc_n
^ cc_v
) == 0)
4861 pc
= DIF_INSTR_LABEL(instr
);
4865 pc
= DIF_INSTR_LABEL(instr
);
4869 pc
= DIF_INSTR_LABEL(instr
);
4873 pc
= DIF_INSTR_LABEL(instr
);
4876 if (cc_z
| (cc_n
^ cc_v
))
4877 pc
= DIF_INSTR_LABEL(instr
);
4881 pc
= DIF_INSTR_LABEL(instr
);
4884 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4885 *flags
|= CPU_DTRACE_KPRIV
;
4891 regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]);
4894 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4895 *flags
|= CPU_DTRACE_KPRIV
;
4901 regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]);
4904 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4905 *flags
|= CPU_DTRACE_KPRIV
;
4911 regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]);
4914 if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) {
4915 *flags
|= CPU_DTRACE_KPRIV
;
4921 regs
[rd
] = dtrace_load8(regs
[r1
]);
4924 if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) {
4925 *flags
|= CPU_DTRACE_KPRIV
;
4931 regs
[rd
] = dtrace_load16(regs
[r1
]);
4934 if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) {
4935 *flags
|= CPU_DTRACE_KPRIV
;
4941 regs
[rd
] = dtrace_load32(regs
[r1
]);
4944 if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) {
4945 *flags
|= CPU_DTRACE_KPRIV
;
4951 regs
[rd
] = dtrace_load64(regs
[r1
]);
4954 * Darwin 32-bit kernel may fetch from 64-bit user.
4955 * Do not cast regs to uintptr_t
4956 * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB
4957 * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX
4961 dtrace_fuword8(regs
[r1
]);
4964 regs
[rd
] = (int16_t)
4965 dtrace_fuword16(regs
[r1
]);
4968 regs
[rd
] = (int32_t)
4969 dtrace_fuword32(regs
[r1
]);
4973 dtrace_fuword8(regs
[r1
]);
4977 dtrace_fuword16(regs
[r1
]);
4981 dtrace_fuword32(regs
[r1
]);
4985 dtrace_fuword64(regs
[r1
]);
4994 regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)];
4997 regs
[rd
] = (uint64_t)(uintptr_t)
4998 (strtab
+ DIF_INSTR_STRING(instr
));
5001 size_t sz
= state
->dts_options
[DTRACEOPT_STRSIZE
];
5002 uintptr_t s1
= regs
[r1
];
5003 uintptr_t s2
= regs
[r2
];
5006 !dtrace_strcanload(s1
, sz
, mstate
, vstate
))
5009 !dtrace_strcanload(s2
, sz
, mstate
, vstate
))
5012 cc_r
= dtrace_strncmp((char *)s1
, (char *)s2
, sz
);
5020 regs
[rd
] = dtrace_dif_variable(mstate
, state
,
5024 id
= DIF_INSTR_VAR(instr
);
5026 if (id
>= DIF_VAR_OTHER_UBASE
) {
5029 id
-= DIF_VAR_OTHER_UBASE
;
5030 svar
= vstate
->dtvs_globals
[id
];
5031 ASSERT(svar
!= NULL
);
5032 v
= &svar
->dtsv_var
;
5034 if (!(v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)) {
5035 regs
[rd
] = svar
->dtsv_data
;
5039 a
= (uintptr_t)svar
->dtsv_data
;
5041 if (*(uint8_t *)a
== UINT8_MAX
) {
5043 * If the 0th byte is set to UINT8_MAX
5044 * then this is to be treated as a
5045 * reference to a NULL variable.
5049 regs
[rd
] = a
+ sizeof (uint64_t);
5055 regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0);
5059 id
= DIF_INSTR_VAR(instr
);
5061 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5062 id
-= DIF_VAR_OTHER_UBASE
;
5064 svar
= vstate
->dtvs_globals
[id
];
5065 ASSERT(svar
!= NULL
);
5066 v
= &svar
->dtsv_var
;
5068 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5069 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5072 ASSERT(svar
->dtsv_size
!= 0);
5074 if (regs
[rd
] == 0) {
5075 *(uint8_t *)a
= UINT8_MAX
;
5079 a
+= sizeof (uint64_t);
5081 if (!dtrace_vcanload(
5082 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5086 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5087 (void *)a
, &v
->dtdv_type
);
5091 svar
->dtsv_data
= regs
[rd
];
5096 * There are no DTrace built-in thread-local arrays at
5097 * present. This opcode is saved for future work.
5099 *flags
|= CPU_DTRACE_ILLOP
;
5104 id
= DIF_INSTR_VAR(instr
);
5106 if (id
< DIF_VAR_OTHER_UBASE
) {
5108 * For now, this has no meaning.
5114 id
-= DIF_VAR_OTHER_UBASE
;
5116 ASSERT(id
< (uint_t
)vstate
->dtvs_nlocals
);
5117 ASSERT(vstate
->dtvs_locals
!= NULL
);
5118 svar
= vstate
->dtvs_locals
[id
];
5119 ASSERT(svar
!= NULL
);
5120 v
= &svar
->dtsv_var
;
5122 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5123 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5124 size_t sz
= v
->dtdv_type
.dtdt_size
;
5126 sz
+= sizeof (uint64_t);
5127 ASSERT(svar
->dtsv_size
== (int)NCPU
* sz
);
5128 a
+= CPU
->cpu_id
* sz
;
5130 if (*(uint8_t *)a
== UINT8_MAX
) {
5132 * If the 0th byte is set to UINT8_MAX
5133 * then this is to be treated as a
5134 * reference to a NULL variable.
5138 regs
[rd
] = a
+ sizeof (uint64_t);
5144 ASSERT(svar
->dtsv_size
== (int)NCPU
* sizeof (uint64_t));
5145 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
5146 regs
[rd
] = tmp
[CPU
->cpu_id
];
5150 id
= DIF_INSTR_VAR(instr
);
5152 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5153 id
-= DIF_VAR_OTHER_UBASE
;
5154 ASSERT(id
< (uint_t
)vstate
->dtvs_nlocals
);
5155 ASSERT(vstate
->dtvs_locals
!= NULL
);
5156 svar
= vstate
->dtvs_locals
[id
];
5157 ASSERT(svar
!= NULL
);
5158 v
= &svar
->dtsv_var
;
5160 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5161 uintptr_t a
= (uintptr_t)svar
->dtsv_data
;
5162 size_t sz
= v
->dtdv_type
.dtdt_size
;
5164 sz
+= sizeof (uint64_t);
5165 ASSERT(svar
->dtsv_size
== (int)NCPU
* sz
);
5166 a
+= CPU
->cpu_id
* sz
;
5168 if (regs
[rd
] == 0) {
5169 *(uint8_t *)a
= UINT8_MAX
;
5173 a
+= sizeof (uint64_t);
5176 if (!dtrace_vcanload(
5177 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5181 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5182 (void *)a
, &v
->dtdv_type
);
5186 ASSERT(svar
->dtsv_size
== (int)NCPU
* sizeof (uint64_t));
5187 tmp
= (uint64_t *)(uintptr_t)svar
->dtsv_data
;
5188 tmp
[CPU
->cpu_id
] = regs
[rd
];
5192 dtrace_dynvar_t
*dvar
;
5195 id
= DIF_INSTR_VAR(instr
);
5196 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5197 id
-= DIF_VAR_OTHER_UBASE
;
5198 v
= &vstate
->dtvs_tlocals
[id
];
5200 key
= &tupregs
[DIF_DTR_NREGS
];
5201 key
[0].dttk_value
= (uint64_t)id
;
5202 key
[0].dttk_size
= 0;
5203 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
5204 key
[1].dttk_size
= 0;
5206 dvar
= dtrace_dynvar(dstate
, 2, key
,
5207 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
,
5215 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5216 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
5218 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
5225 dtrace_dynvar_t
*dvar
;
5228 id
= DIF_INSTR_VAR(instr
);
5229 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5230 id
-= DIF_VAR_OTHER_UBASE
;
5232 key
= &tupregs
[DIF_DTR_NREGS
];
5233 key
[0].dttk_value
= (uint64_t)id
;
5234 key
[0].dttk_size
= 0;
5235 DTRACE_TLS_THRKEY(key
[1].dttk_value
);
5236 key
[1].dttk_size
= 0;
5237 v
= &vstate
->dtvs_tlocals
[id
];
5239 dvar
= dtrace_dynvar(dstate
, 2, key
,
5240 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5241 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5242 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
5243 DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
);
5246 * Given that we're storing to thread-local data,
5247 * we need to flush our predicate cache.
5249 dtrace_set_thread_predcache(current_thread(), 0);
5254 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5255 if (!dtrace_vcanload(
5256 (void *)(uintptr_t)regs
[rd
],
5257 &v
->dtdv_type
, mstate
, vstate
))
5260 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5261 dvar
->dtdv_data
, &v
->dtdv_type
);
5263 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
5270 regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
];
5274 dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
,
5275 regs
, tupregs
, ttop
, mstate
, state
);
5279 if (ttop
== DIF_DTR_NREGS
) {
5280 *flags
|= CPU_DTRACE_TUPOFLOW
;
5284 if (r1
== DIF_TYPE_STRING
) {
5286 * If this is a string type and the size is 0,
5287 * we'll use the system-wide default string
5288 * size. Note that we are _not_ looking at
5289 * the value of the DTRACEOPT_STRSIZE option;
5290 * had this been set, we would expect to have
5291 * a non-zero size value in the "pushtr".
5293 tupregs
[ttop
].dttk_size
=
5294 dtrace_strlen((char *)(uintptr_t)regs
[rd
],
5295 regs
[r2
] ? regs
[r2
] :
5296 dtrace_strsize_default
) + 1;
5298 tupregs
[ttop
].dttk_size
= regs
[r2
];
5301 tupregs
[ttop
++].dttk_value
= regs
[rd
];
5305 if (ttop
== DIF_DTR_NREGS
) {
5306 *flags
|= CPU_DTRACE_TUPOFLOW
;
5310 tupregs
[ttop
].dttk_value
= regs
[rd
];
5311 tupregs
[ttop
++].dttk_size
= 0;
5319 case DIF_OP_FLUSHTS
:
5324 case DIF_OP_LDTAA
: {
5325 dtrace_dynvar_t
*dvar
;
5326 dtrace_key_t
*key
= tupregs
;
5327 uint_t nkeys
= ttop
;
5329 id
= DIF_INSTR_VAR(instr
);
5330 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5331 id
-= DIF_VAR_OTHER_UBASE
;
5333 key
[nkeys
].dttk_value
= (uint64_t)id
;
5334 key
[nkeys
++].dttk_size
= 0;
5336 if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) {
5337 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
5338 key
[nkeys
++].dttk_size
= 0;
5339 v
= &vstate
->dtvs_tlocals
[id
];
5341 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
5344 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
5345 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5346 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5347 DTRACE_DYNVAR_NOALLOC
, mstate
, vstate
);
5354 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5355 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
;
5357 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
);
5364 case DIF_OP_STTAA
: {
5365 dtrace_dynvar_t
*dvar
;
5366 dtrace_key_t
*key
= tupregs
;
5367 uint_t nkeys
= ttop
;
5369 id
= DIF_INSTR_VAR(instr
);
5370 ASSERT(id
>= DIF_VAR_OTHER_UBASE
);
5371 id
-= DIF_VAR_OTHER_UBASE
;
5373 key
[nkeys
].dttk_value
= (uint64_t)id
;
5374 key
[nkeys
++].dttk_size
= 0;
5376 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) {
5377 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
);
5378 key
[nkeys
++].dttk_size
= 0;
5379 v
= &vstate
->dtvs_tlocals
[id
];
5381 v
= &vstate
->dtvs_globals
[id
]->dtsv_var
;
5384 dvar
= dtrace_dynvar(dstate
, nkeys
, key
,
5385 v
->dtdv_type
.dtdt_size
> sizeof (uint64_t) ?
5386 v
->dtdv_type
.dtdt_size
: sizeof (uint64_t),
5387 regs
[rd
] ? DTRACE_DYNVAR_ALLOC
:
5388 DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
);
5393 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
) {
5394 if (!dtrace_vcanload(
5395 (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
,
5399 dtrace_vcopy((void *)(uintptr_t)regs
[rd
],
5400 dvar
->dtdv_data
, &v
->dtdv_type
);
5402 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
];
5408 case DIF_OP_ALLOCS
: {
5409 uintptr_t ptr
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5410 size_t size
= ptr
- mstate
->dtms_scratch_ptr
+ regs
[r1
];
5413 * Rounding up the user allocation size could have
5414 * overflowed large, bogus allocations (like -1ULL) to
5417 if (size
< regs
[r1
] ||
5418 !DTRACE_INSCRATCH(mstate
, size
)) {
5419 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5424 dtrace_bzero((void *) mstate
->dtms_scratch_ptr
, size
);
5425 mstate
->dtms_scratch_ptr
+= size
;
5431 if (!dtrace_canstore(regs
[rd
], regs
[r2
],
5433 *flags
|= CPU_DTRACE_BADADDR
;
5438 if (!dtrace_canload(regs
[r1
], regs
[r2
], mstate
, vstate
))
5441 dtrace_bcopy((void *)(uintptr_t)regs
[r1
],
5442 (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]);
5446 if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) {
5447 *flags
|= CPU_DTRACE_BADADDR
;
5451 *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
];
5455 if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) {
5456 *flags
|= CPU_DTRACE_BADADDR
;
5461 *flags
|= CPU_DTRACE_BADALIGN
;
5465 *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
];
5469 if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) {
5470 *flags
|= CPU_DTRACE_BADADDR
;
5475 *flags
|= CPU_DTRACE_BADALIGN
;
5479 *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
];
5483 if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) {
5484 *flags
|= CPU_DTRACE_BADADDR
;
5490 * Darwin kmem_zalloc() called from
5491 * dtrace_difo_init() is 4-byte aligned.
5494 *flags
|= CPU_DTRACE_BADALIGN
;
5498 *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
];
5503 if (!(*flags
& CPU_DTRACE_FAULT
))
5506 mstate
->dtms_fltoffs
= opc
* sizeof (dif_instr_t
);
5507 mstate
->dtms_present
|= DTRACE_MSTATE_FLTOFFS
;
5513 dtrace_action_breakpoint(dtrace_ecb_t
*ecb
)
5515 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5516 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5517 char c
[DTRACE_FULLNAMELEN
+ 80], *str
;
5518 const char *msg
= "dtrace: breakpoint action at probe ";
5519 const char *ecbmsg
= " (ecb ";
5520 uintptr_t mask
= (0xf << (sizeof (uintptr_t) * NBBY
/ 4));
5521 uintptr_t val
= (uintptr_t)ecb
;
5522 int shift
= (sizeof (uintptr_t) * NBBY
) - 4, i
= 0;
5524 if (dtrace_destructive_disallow
)
5528 * It's impossible to be taking action on the NULL probe.
5530 ASSERT(probe
!= NULL
);
5533 * This is a poor man's (destitute man's?) sprintf(): we want to
5534 * print the provider name, module name, function name and name of
5535 * the probe, along with the hex address of the ECB with the breakpoint
5536 * action -- all of which we must place in the character buffer by
5539 while (*msg
!= '\0')
5542 for (str
= prov
->dtpv_name
; *str
!= '\0'; str
++)
5546 for (str
= probe
->dtpr_mod
; *str
!= '\0'; str
++)
5550 for (str
= probe
->dtpr_func
; *str
!= '\0'; str
++)
5554 for (str
= probe
->dtpr_name
; *str
!= '\0'; str
++)
5557 while (*ecbmsg
!= '\0')
5560 while (shift
>= 0) {
5561 mask
= (uintptr_t)0xf << shift
;
5563 if (val
>= ((uintptr_t)1 << shift
))
5564 c
[i
++] = "0123456789abcdef"[(val
& mask
) >> shift
];
5575 dtrace_action_panic(dtrace_ecb_t
*ecb
)
5577 dtrace_probe_t
*probe
= ecb
->dte_probe
;
5580 * It's impossible to be taking action on the NULL probe.
5582 ASSERT(probe
!= NULL
);
5584 if (dtrace_destructive_disallow
)
5587 if (dtrace_panicked
!= NULL
)
5590 if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
)
5594 * We won the right to panic. (We want to be sure that only one
5595 * thread calls panic() from dtrace_probe(), and that panic() is
5596 * called exactly once.)
5598 panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
5599 probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
,
5600 probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
);
5603 * APPLE NOTE: this was for an old Mac OS X debug feature
5604 * allowing a return from panic(). Revisit someday.
5606 dtrace_panicked
= NULL
;
5610 dtrace_action_raise(uint64_t sig
)
5612 if (dtrace_destructive_disallow
)
5616 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5621 * raise() has a queue depth of 1 -- we ignore all subsequent
5622 * invocations of the raise() action.
5625 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
5627 if (uthread
&& uthread
->t_dtrace_sig
== 0) {
5628 uthread
->t_dtrace_sig
= sig
;
5629 act_set_astbsd(current_thread());
5634 dtrace_action_stop(void)
5636 if (dtrace_destructive_disallow
)
5639 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
5642 * The currently running process will be set to task_suspend
5643 * when it next leaves the kernel.
5645 uthread
->t_dtrace_stop
= 1;
5646 act_set_astbsd(current_thread());
5652 * APPLE NOTE: pidresume works in conjunction with the dtrace stop action.
5653 * Both activate only when the currently running process next leaves the
5657 dtrace_action_pidresume(uint64_t pid
)
5659 if (dtrace_destructive_disallow
)
5662 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
5663 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);
5666 uthread_t uthread
= (uthread_t
)get_bsdthread_info(current_thread());
5669 * When the currently running process leaves the kernel, it attempts to
5670 * task_resume the process (denoted by pid), if that pid appears to have
5671 * been stopped by dtrace_action_stop().
5672 * The currently running process has a pidresume() queue depth of 1 --
5673 * subsequent invocations of the pidresume() action are ignored.
5676 if (pid
!= 0 && uthread
&& uthread
->t_dtrace_resumepid
== 0) {
5677 uthread
->t_dtrace_resumepid
= pid
;
5678 act_set_astbsd(current_thread());
5683 dtrace_action_chill(dtrace_mstate_t
*mstate
, hrtime_t val
)
5686 volatile uint16_t *flags
;
5687 dtrace_cpu_t
*cpu
= CPU
;
5689 if (dtrace_destructive_disallow
)
5692 flags
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
;
5694 now
= dtrace_gethrtime();
5696 if (now
- cpu
->cpu_dtrace_chillmark
> dtrace_chill_interval
) {
5698 * We need to advance the mark to the current time.
5700 cpu
->cpu_dtrace_chillmark
= now
;
5701 cpu
->cpu_dtrace_chilled
= 0;
5705 * Now check to see if the requested chill time would take us over
5706 * the maximum amount of time allowed in the chill interval. (Or
5707 * worse, if the calculation itself induces overflow.)
5709 if (cpu
->cpu_dtrace_chilled
+ val
> dtrace_chill_max
||
5710 cpu
->cpu_dtrace_chilled
+ val
< cpu
->cpu_dtrace_chilled
) {
5711 *flags
|= CPU_DTRACE_ILLOP
;
5715 while (dtrace_gethrtime() - now
< val
)
5719 * Normally, we assure that the value of the variable "timestamp" does
5720 * not change within an ECB. The presence of chill() represents an
5721 * exception to this rule, however.
5723 mstate
->dtms_present
&= ~DTRACE_MSTATE_TIMESTAMP
;
5724 cpu
->cpu_dtrace_chilled
+= val
;
5728 dtrace_action_ustack(dtrace_mstate_t
*mstate
, dtrace_state_t
*state
,
5729 uint64_t *buf
, uint64_t arg
)
5731 int nframes
= DTRACE_USTACK_NFRAMES(arg
);
5732 int strsize
= DTRACE_USTACK_STRSIZE(arg
);
5733 uint64_t *pcs
= &buf
[1], *fps
;
5734 char *str
= (char *)&pcs
[nframes
];
5735 int size
, offs
= 0, i
, j
;
5736 uintptr_t old
= mstate
->dtms_scratch_ptr
, saved
;
5737 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
5741 * Should be taking a faster path if string space has not been
5744 ASSERT(strsize
!= 0);
5747 * We will first allocate some temporary space for the frame pointers.
5749 fps
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8);
5750 size
= (uintptr_t)fps
- mstate
->dtms_scratch_ptr
+
5751 (nframes
* sizeof (uint64_t));
5753 if (!DTRACE_INSCRATCH(mstate
, (uintptr_t)size
)) {
5755 * Not enough room for our frame pointers -- need to indicate
5756 * that we ran out of scratch space.
5758 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
);
5762 mstate
->dtms_scratch_ptr
+= size
;
5763 saved
= mstate
->dtms_scratch_ptr
;
5766 * Now get a stack with both program counters and frame pointers.
5768 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5769 dtrace_getufpstack(buf
, fps
, nframes
+ 1);
5770 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5773 * If that faulted, we're cooked.
5775 if (*flags
& CPU_DTRACE_FAULT
)
5779 * Now we want to walk up the stack, calling the USTACK helper. For
5780 * each iteration, we restore the scratch pointer.
5782 for (i
= 0; i
< nframes
; i
++) {
5783 mstate
->dtms_scratch_ptr
= saved
;
5785 if (offs
>= strsize
)
5788 sym
= (char *)(uintptr_t)dtrace_helper(
5789 DTRACE_HELPER_ACTION_USTACK
,
5790 mstate
, state
, pcs
[i
], fps
[i
]);
5793 * If we faulted while running the helper, we're going to
5794 * clear the fault and null out the corresponding string.
5796 if (*flags
& CPU_DTRACE_FAULT
) {
5797 *flags
&= ~CPU_DTRACE_FAULT
;
5807 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
5810 * Now copy in the string that the helper returned to us.
5812 for (j
= 0; offs
+ j
< strsize
; j
++) {
5813 if ((str
[offs
+ j
] = sym
[j
]) == '\0')
5817 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
5822 if (offs
>= strsize
) {
5824 * If we didn't have room for all of the strings, we don't
5825 * abort processing -- this needn't be a fatal error -- but we
5826 * still want to increment a counter (dts_stkstroverflows) to
5827 * allow this condition to be warned about. (If this is from
5828 * a jstack() action, it is easily tuned via jstackstrsize.)
5830 dtrace_error(&state
->dts_stkstroverflows
);
5833 while (offs
< strsize
)
5837 mstate
->dtms_scratch_ptr
= old
;
5841 * If you're looking for the epicenter of DTrace, you just found it. This
5842 * is the function called by the provider to fire a probe -- from which all
5843 * subsequent probe-context DTrace activity emanates.
5846 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
5847 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
5849 processorid_t cpuid
;
5850 dtrace_icookie_t cookie
;
5851 dtrace_probe_t
*probe
;
5852 dtrace_mstate_t mstate
;
5854 dtrace_action_t
*act
;
5858 volatile uint16_t *flags
;
5861 cookie
= dtrace_interrupt_disable();
5862 probe
= dtrace_probes
[id
- 1];
5863 cpuid
= CPU
->cpu_id
;
5864 onintr
= CPU_ON_INTR(CPU
);
5866 if (!onintr
&& probe
->dtpr_predcache
!= DTRACE_CACHEIDNONE
&&
5867 probe
->dtpr_predcache
== dtrace_get_thread_predcache(current_thread())) {
5869 * We have hit in the predicate cache; we know that
5870 * this predicate would evaluate to be false.
5872 dtrace_interrupt_enable(cookie
);
5876 if (panic_quiesce
) {
5878 * We don't trace anything if we're panicking.
5880 dtrace_interrupt_enable(cookie
);
5884 #if !defined(__APPLE__)
5885 now
= dtrace_gethrtime();
5886 vtime
= dtrace_vtime_references
!= 0;
5888 if (vtime
&& curthread
->t_dtrace_start
)
5889 curthread
->t_dtrace_vtime
+= now
- curthread
->t_dtrace_start
;
5892 * APPLE NOTE: The time spent entering DTrace and arriving
5893 * to this point, is attributed to the current thread.
5894 * Instead it should accrue to DTrace. FIXME
5896 vtime
= dtrace_vtime_references
!= 0;
5900 int64_t dtrace_accum_time
, recent_vtime
;
5901 thread_t thread
= current_thread();
5903 dtrace_accum_time
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */
5905 if (dtrace_accum_time
>= 0) {
5906 recent_vtime
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */
5908 recent_vtime
= recent_vtime
- dtrace_accum_time
; /* Time without DTrace contribution */
5910 dtrace_set_thread_vtime(thread
, recent_vtime
);
5914 now
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
5915 #endif /* __APPLE__ */
5918 * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of
5919 * dtrace_probe() in some circumstances. See, e.g. fasttrap_isa.c.
5920 * However the provider has no access to ECB context, so passes
5921 * 0 through "arg0" and the probe_id of the overridden probe as arg1.
5922 * Detect that here and cons up a viable state (from the probe_id).
5924 if (dtrace_probeid_error
== id
&& 0 == arg0
) {
5925 dtrace_id_t ftp_id
= (dtrace_id_t
)arg1
;
5926 dtrace_probe_t
*ftp_probe
= dtrace_probes
[ftp_id
- 1];
5927 dtrace_ecb_t
*ftp_ecb
= ftp_probe
->dtpr_ecb
;
5929 if (NULL
!= ftp_ecb
) {
5930 dtrace_state_t
*ftp_state
= ftp_ecb
->dte_state
;
5932 arg0
= (uint64_t)(uintptr_t)ftp_state
;
5933 arg1
= ftp_ecb
->dte_epid
;
5935 * args[2-4] established by caller.
5937 ftp_state
->dts_arg_error_illval
= -1; /* arg5 */
5941 mstate
.dtms_difo
= NULL
;
5942 mstate
.dtms_probe
= probe
;
5943 mstate
.dtms_strtok
= 0;
5944 mstate
.dtms_arg
[0] = arg0
;
5945 mstate
.dtms_arg
[1] = arg1
;
5946 mstate
.dtms_arg
[2] = arg2
;
5947 mstate
.dtms_arg
[3] = arg3
;
5948 mstate
.dtms_arg
[4] = arg4
;
5950 flags
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
;
5952 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
5953 dtrace_predicate_t
*pred
= ecb
->dte_predicate
;
5954 dtrace_state_t
*state
= ecb
->dte_state
;
5955 dtrace_buffer_t
*buf
= &state
->dts_buffer
[cpuid
];
5956 dtrace_buffer_t
*aggbuf
= &state
->dts_aggbuffer
[cpuid
];
5957 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
5958 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
5959 uint64_t tracememsize
= 0;
5964 * A little subtlety with the following (seemingly innocuous)
5965 * declaration of the automatic 'val': by looking at the
5966 * code, you might think that it could be declared in the
5967 * action processing loop, below. (That is, it's only used in
5968 * the action processing loop.) However, it must be declared
5969 * out of that scope because in the case of DIF expression
5970 * arguments to aggregating actions, one iteration of the
5971 * action loop will use the last iteration's value.
5979 mstate
.dtms_present
= DTRACE_MSTATE_ARGS
| DTRACE_MSTATE_PROBE
;
5980 *flags
&= ~CPU_DTRACE_ERROR
;
5982 if (prov
== dtrace_provider
) {
5984 * If dtrace itself is the provider of this probe,
5985 * we're only going to continue processing the ECB if
5986 * arg0 (the dtrace_state_t) is equal to the ECB's
5987 * creating state. (This prevents disjoint consumers
5988 * from seeing one another's metaprobes.)
5990 if (arg0
!= (uint64_t)(uintptr_t)state
)
5994 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
) {
5996 * We're not currently active. If our provider isn't
5997 * the dtrace pseudo provider, we're not interested.
5999 if (prov
!= dtrace_provider
)
6003 * Now we must further check if we are in the BEGIN
6004 * probe. If we are, we will only continue processing
6005 * if we're still in WARMUP -- if one BEGIN enabling
6006 * has invoked the exit() action, we don't want to
6007 * evaluate subsequent BEGIN enablings.
6009 if (probe
->dtpr_id
== dtrace_probeid_begin
&&
6010 state
->dts_activity
!= DTRACE_ACTIVITY_WARMUP
) {
6011 ASSERT(state
->dts_activity
==
6012 DTRACE_ACTIVITY_DRAINING
);
6017 if (ecb
->dte_cond
) {
6019 * If the dte_cond bits indicate that this
6020 * consumer is only allowed to see user-mode firings
6021 * of this probe, call the provider's dtps_usermode()
6022 * entry point to check that the probe was fired
6023 * while in a user context. Skip this ECB if that's
6026 if ((ecb
->dte_cond
& DTRACE_COND_USERMODE
) &&
6027 prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
,
6028 probe
->dtpr_id
, probe
->dtpr_arg
) == 0)
6032 * This is more subtle than it looks. We have to be
6033 * absolutely certain that CRED() isn't going to
6034 * change out from under us so it's only legit to
6035 * examine that structure if we're in constrained
6036 * situations. Currently, the only times we'll this
6037 * check is if a non-super-user has enabled the
6038 * profile or syscall providers -- providers that
6039 * allow visibility of all processes. For the
6040 * profile case, the check above will ensure that
6041 * we're examining a user context.
6043 if (ecb
->dte_cond
& DTRACE_COND_OWNER
) {
6046 ecb
->dte_state
->dts_cred
.dcr_cred
;
6048 #pragma unused(proc) /* __APPLE__ */
6050 ASSERT(s_cr
!= NULL
);
6053 * XXX this is hackish, but so is setting a variable
6054 * XXX in a McCarthy OR...
6056 if ((cr
= dtrace_CRED()) == NULL
||
6057 posix_cred_get(s_cr
)->cr_uid
!= posix_cred_get(cr
)->cr_uid
||
6058 posix_cred_get(s_cr
)->cr_uid
!= posix_cred_get(cr
)->cr_ruid
||
6059 posix_cred_get(s_cr
)->cr_uid
!= posix_cred_get(cr
)->cr_suid
||
6060 posix_cred_get(s_cr
)->cr_gid
!= posix_cred_get(cr
)->cr_gid
||
6061 posix_cred_get(s_cr
)->cr_gid
!= posix_cred_get(cr
)->cr_rgid
||
6062 posix_cred_get(s_cr
)->cr_gid
!= posix_cred_get(cr
)->cr_sgid
||
6063 #if !defined(__APPLE__)
6064 (proc
= ttoproc(curthread
)) == NULL
||
6065 (proc
->p_flag
& SNOCD
))
6067 1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */
6068 #endif /* __APPLE__ */
6072 if (ecb
->dte_cond
& DTRACE_COND_ZONEOWNER
) {
6075 ecb
->dte_state
->dts_cred
.dcr_cred
;
6076 #pragma unused(cr, s_cr) /* __APPLE__ */
6078 ASSERT(s_cr
!= NULL
);
6080 #if !defined(__APPLE__)
6081 if ((cr
= CRED()) == NULL
||
6082 s_cr
->cr_zone
->zone_id
!=
6083 cr
->cr_zone
->zone_id
)
6086 /* APPLE NOTE: Darwin doesn't do zones. */
6087 #endif /* __APPLE__ */
6091 if (now
- state
->dts_alive
> dtrace_deadman_timeout
) {
6093 * We seem to be dead. Unless we (a) have kernel
6094 * destructive permissions (b) have expicitly enabled
6095 * destructive actions and (c) destructive actions have
6096 * not been disabled, we're going to transition into
6097 * the KILLED state, from which no further processing
6098 * on this state will be performed.
6100 if (!dtrace_priv_kernel_destructive(state
) ||
6101 !state
->dts_cred
.dcr_destructive
||
6102 dtrace_destructive_disallow
) {
6103 void *activity
= &state
->dts_activity
;
6104 dtrace_activity_t current
;
6107 current
= state
->dts_activity
;
6108 } while (dtrace_cas32(activity
, current
,
6109 DTRACE_ACTIVITY_KILLED
) != current
);
6115 if ((offs
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
,
6116 ecb
->dte_alignment
, state
, &mstate
)) < 0)
6119 tomax
= buf
->dtb_tomax
;
6120 ASSERT(tomax
!= NULL
);
6122 if (ecb
->dte_size
!= 0)
6123 DTRACE_STORE(uint32_t, tomax
, offs
, ecb
->dte_epid
);
6125 mstate
.dtms_epid
= ecb
->dte_epid
;
6126 mstate
.dtms_present
|= DTRACE_MSTATE_EPID
;
6128 if (state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
)
6129 mstate
.dtms_access
= DTRACE_ACCESS_KERNEL
;
6131 mstate
.dtms_access
= 0;
6134 dtrace_difo_t
*dp
= pred
->dtp_difo
;
6137 rval
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
6139 if (!(*flags
& CPU_DTRACE_ERROR
) && !rval
) {
6140 dtrace_cacheid_t cid
= probe
->dtpr_predcache
;
6142 if (cid
!= DTRACE_CACHEIDNONE
&& !onintr
) {
6144 * Update the predicate cache...
6146 ASSERT(cid
== pred
->dtp_cacheid
);
6148 dtrace_set_thread_predcache(current_thread(), cid
);
6155 for (act
= ecb
->dte_action
; !(*flags
& CPU_DTRACE_ERROR
) &&
6156 act
!= NULL
; act
= act
->dta_next
) {
6159 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
6161 size
= rec
->dtrd_size
;
6162 valoffs
= offs
+ rec
->dtrd_offset
;
6164 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
6166 dtrace_aggregation_t
*agg
;
6168 agg
= (dtrace_aggregation_t
*)act
;
6170 if ((dp
= act
->dta_difo
) != NULL
)
6171 v
= dtrace_dif_emulate(dp
,
6172 &mstate
, vstate
, state
);
6174 if (*flags
& CPU_DTRACE_ERROR
)
6178 * Note that we always pass the expression
6179 * value from the previous iteration of the
6180 * action loop. This value will only be used
6181 * if there is an expression argument to the
6182 * aggregating action, denoted by the
6183 * dtag_hasarg field.
6185 dtrace_aggregate(agg
, buf
,
6186 offs
, aggbuf
, v
, val
);
6190 switch (act
->dta_kind
) {
6191 case DTRACEACT_STOP
:
6192 if (dtrace_priv_proc_destructive(state
))
6193 dtrace_action_stop();
6196 case DTRACEACT_BREAKPOINT
:
6197 if (dtrace_priv_kernel_destructive(state
))
6198 dtrace_action_breakpoint(ecb
);
6201 case DTRACEACT_PANIC
:
6202 if (dtrace_priv_kernel_destructive(state
))
6203 dtrace_action_panic(ecb
);
6206 case DTRACEACT_STACK
:
6207 if (!dtrace_priv_kernel(state
))
6210 dtrace_getpcstack((pc_t
*)(tomax
+ valoffs
),
6211 size
/ sizeof (pc_t
), probe
->dtpr_aframes
,
6212 DTRACE_ANCHORED(probe
) ? NULL
:
6213 (uint32_t *)(uintptr_t)arg0
);
6216 case DTRACEACT_JSTACK
:
6217 case DTRACEACT_USTACK
:
6218 if (!dtrace_priv_proc(state
))
6222 * See comment in DIF_VAR_PID.
6224 if (DTRACE_ANCHORED(mstate
.dtms_probe
) &&
6226 int depth
= DTRACE_USTACK_NFRAMES(
6229 dtrace_bzero((void *)(tomax
+ valoffs
),
6230 DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
)
6231 + depth
* sizeof (uint64_t));
6236 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 &&
6237 curproc
->p_dtrace_helpers
!= NULL
) {
6239 * This is the slow path -- we have
6240 * allocated string space, and we're
6241 * getting the stack of a process that
6242 * has helpers. Call into a separate
6243 * routine to perform this processing.
6245 dtrace_action_ustack(&mstate
, state
,
6246 (uint64_t *)(tomax
+ valoffs
),
6251 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
);
6252 dtrace_getupcstack((uint64_t *)
6254 DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1);
6255 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
);
6265 val
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
);
6267 if (*flags
& CPU_DTRACE_ERROR
)
6270 switch (act
->dta_kind
) {
6271 case DTRACEACT_SPECULATE
:
6272 ASSERT(buf
== &state
->dts_buffer
[cpuid
]);
6273 buf
= dtrace_speculation_buffer(state
,
6277 *flags
|= CPU_DTRACE_DROP
;
6281 offs
= dtrace_buffer_reserve(buf
,
6282 ecb
->dte_needed
, ecb
->dte_alignment
,
6286 *flags
|= CPU_DTRACE_DROP
;
6290 tomax
= buf
->dtb_tomax
;
6291 ASSERT(tomax
!= NULL
);
6293 if (ecb
->dte_size
!= 0)
6294 DTRACE_STORE(uint32_t, tomax
, offs
,
6298 case DTRACEACT_CHILL
:
6299 if (dtrace_priv_kernel_destructive(state
))
6300 dtrace_action_chill(&mstate
, val
);
6303 case DTRACEACT_RAISE
:
6304 if (dtrace_priv_proc_destructive(state
))
6305 dtrace_action_raise(val
);
6308 case DTRACEACT_PIDRESUME
: /* __APPLE__ */
6309 if (dtrace_priv_proc_destructive(state
))
6310 dtrace_action_pidresume(val
);
6313 case DTRACEACT_COMMIT
:
6317 * We need to commit our buffer state.
6320 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
6321 buf
= &state
->dts_buffer
[cpuid
];
6322 dtrace_speculation_commit(state
, cpuid
, val
);
6326 case DTRACEACT_DISCARD
:
6327 dtrace_speculation_discard(state
, cpuid
, val
);
6330 case DTRACEACT_DIFEXPR
:
6331 case DTRACEACT_LIBACT
:
6332 case DTRACEACT_PRINTF
:
6333 case DTRACEACT_PRINTA
:
6334 case DTRACEACT_SYSTEM
:
6335 case DTRACEACT_FREOPEN
:
6336 case DTRACEACT_APPLEBINARY
: /* __APPLE__ */
6337 case DTRACEACT_TRACEMEM
:
6340 case DTRACEACT_TRACEMEM_DYNSIZE
:
6346 if (!dtrace_priv_kernel(state
))
6350 case DTRACEACT_USYM
:
6351 case DTRACEACT_UMOD
:
6352 case DTRACEACT_UADDR
: {
6353 if (!dtrace_priv_proc(state
))
6356 DTRACE_STORE(uint64_t, tomax
,
6357 valoffs
, (uint64_t)dtrace_proc_selfpid());
6358 DTRACE_STORE(uint64_t, tomax
,
6359 valoffs
+ sizeof (uint64_t), val
);
6364 case DTRACEACT_EXIT
: {
6366 * For the exit action, we are going to attempt
6367 * to atomically set our activity to be
6368 * draining. If this fails (either because
6369 * another CPU has beat us to the exit action,
6370 * or because our current activity is something
6371 * other than ACTIVE or WARMUP), we will
6372 * continue. This assures that the exit action
6373 * can be successfully recorded at most once
6374 * when we're in the ACTIVE state. If we're
6375 * encountering the exit() action while in
6376 * COOLDOWN, however, we want to honor the new
6377 * status code. (We know that we're the only
6378 * thread in COOLDOWN, so there is no race.)
6380 void *activity
= &state
->dts_activity
;
6381 dtrace_activity_t current
= state
->dts_activity
;
6383 if (current
== DTRACE_ACTIVITY_COOLDOWN
)
6386 if (current
!= DTRACE_ACTIVITY_WARMUP
)
6387 current
= DTRACE_ACTIVITY_ACTIVE
;
6389 if (dtrace_cas32(activity
, current
,
6390 DTRACE_ACTIVITY_DRAINING
) != current
) {
6391 *flags
|= CPU_DTRACE_DROP
;
6402 if (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
) {
6403 uintptr_t end
= valoffs
+ size
;
6405 if (tracememsize
!= 0 &&
6406 valoffs
+ tracememsize
< end
)
6408 end
= valoffs
+ tracememsize
;
6412 if (!dtrace_vcanload((void *)(uintptr_t)val
,
6413 &dp
->dtdo_rtype
, &mstate
, vstate
))
6417 * If this is a string, we're going to only
6418 * load until we find the zero byte -- after
6419 * which we'll store zero bytes.
6421 if (dp
->dtdo_rtype
.dtdt_kind
==
6424 int intuple
= act
->dta_intuple
;
6427 for (s
= 0; s
< size
; s
++) {
6429 c
= dtrace_load8(val
++);
6431 DTRACE_STORE(uint8_t, tomax
,
6434 if (c
== '\0' && intuple
)
6441 while (valoffs
< end
) {
6442 DTRACE_STORE(uint8_t, tomax
, valoffs
++,
6443 dtrace_load8(val
++));
6453 case sizeof (uint8_t):
6454 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
);
6456 case sizeof (uint16_t):
6457 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
);
6459 case sizeof (uint32_t):
6460 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
);
6462 case sizeof (uint64_t):
6463 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
);
6467 * Any other size should have been returned by
6468 * reference, not by value.
6475 if (*flags
& CPU_DTRACE_DROP
)
6478 if (*flags
& CPU_DTRACE_FAULT
) {
6480 dtrace_action_t
*err
;
6484 if (probe
->dtpr_id
== dtrace_probeid_error
) {
6486 * There's nothing we can do -- we had an
6487 * error on the error probe. We bump an
6488 * error counter to at least indicate that
6489 * this condition happened.
6491 dtrace_error(&state
->dts_dblerrors
);
6497 * Before recursing on dtrace_probe(), we
6498 * need to explicitly clear out our start
6499 * time to prevent it from being accumulated
6500 * into t_dtrace_vtime.
6504 * Darwin sets the sign bit on t_dtrace_tracing
6505 * to suspend accumulation to it.
6507 dtrace_set_thread_tracing(current_thread(),
6508 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
6513 * Iterate over the actions to figure out which action
6514 * we were processing when we experienced the error.
6515 * Note that act points _past_ the faulting action; if
6516 * act is ecb->dte_action, the fault was in the
6517 * predicate, if it's ecb->dte_action->dta_next it's
6518 * in action #1, and so on.
6520 for (err
= ecb
->dte_action
, ndx
= 0;
6521 err
!= act
; err
= err
->dta_next
, ndx
++)
6524 dtrace_probe_error(state
, ecb
->dte_epid
, ndx
,
6525 (mstate
.dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
6526 mstate
.dtms_fltoffs
: -1, DTRACE_FLAGS2FLT(*flags
),
6527 cpu_core
[cpuid
].cpuc_dtrace_illval
);
6533 buf
->dtb_offset
= offs
+ ecb
->dte_size
;
6536 /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed
6537 to the current thread. Instead it should accrue to DTrace. */
6539 thread_t thread
= current_thread();
6540 int64_t t
= dtrace_get_thread_tracing(thread
);
6543 /* Usual case, accumulate time spent here into t_dtrace_tracing */
6544 dtrace_set_thread_tracing(thread
, t
+ (dtrace_gethrtime() - now
));
6546 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
6547 dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);
6551 dtrace_interrupt_enable(cookie
);
6555 * APPLE NOTE: Don't allow a thread to re-enter dtrace_probe().
6556 * This could occur if a probe is encountered on some function in the
6557 * transitive closure of the call to dtrace_probe().
6558 * Solaris has some strong guarantees that this won't happen.
6559 * The Darwin implementation is not so mature as to make those guarantees.
6560 * Hence, the introduction of __dtrace_probe() on xnu.
6564 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
,
6565 uint64_t arg2
, uint64_t arg3
, uint64_t arg4
)
6567 thread_t thread
= current_thread();
6568 disable_preemption();
6569 if (id
== dtrace_probeid_error
) {
6570 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
6571 dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */
6572 } else if (!dtrace_get_thread_reentering(thread
)) {
6573 dtrace_set_thread_reentering(thread
, TRUE
);
6574 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
);
6575 dtrace_set_thread_reentering(thread
, FALSE
);
6578 else __dtrace_probe(dtrace_probeid_error
, 0, id
, 1, -1, DTRACEFLT_UNKNOWN
);
6580 enable_preemption();
6584 * DTrace Probe Hashing Functions
6586 * The functions in this section (and indeed, the functions in remaining
6587 * sections) are not _called_ from probe context. (Any exceptions to this are
6588 * marked with a "Note:".) Rather, they are called from elsewhere in the
6589 * DTrace framework to look-up probes in, add probes to and remove probes from
6590 * the DTrace probe hashes. (Each probe is hashed by each element of the
6591 * probe tuple -- allowing for fast lookups, regardless of what was
6595 dtrace_hash_str(const char *p
)
6601 hval
= (hval
<< 4) + *p
++;
6602 if ((g
= (hval
& 0xf0000000)) != 0)
6609 static dtrace_hash_t
*
6610 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
)
6612 dtrace_hash_t
*hash
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
);
6614 hash
->dth_stroffs
= stroffs
;
6615 hash
->dth_nextoffs
= nextoffs
;
6616 hash
->dth_prevoffs
= prevoffs
;
6619 hash
->dth_mask
= hash
->dth_size
- 1;
6621 hash
->dth_tab
= kmem_zalloc(hash
->dth_size
*
6622 sizeof (dtrace_hashbucket_t
*), KM_SLEEP
);
6628 * APPLE NOTE: dtrace_hash_destroy is not used.
6629 * It is called by dtrace_detach which is not
6630 * currently implemented. Revisit someday.
6632 #if !defined(__APPLE__)
6634 dtrace_hash_destroy(dtrace_hash_t
*hash
)
6639 for (i
= 0; i
< hash
->dth_size
; i
++)
6640 ASSERT(hash
->dth_tab
[i
] == NULL
);
6643 kmem_free(hash
->dth_tab
,
6644 hash
->dth_size
* sizeof (dtrace_hashbucket_t
*));
6645 kmem_free(hash
, sizeof (dtrace_hash_t
));
6647 #endif /* __APPLE__ */
6650 dtrace_hash_resize(dtrace_hash_t
*hash
)
6652 int size
= hash
->dth_size
, i
, ndx
;
6653 int new_size
= hash
->dth_size
<< 1;
6654 int new_mask
= new_size
- 1;
6655 dtrace_hashbucket_t
**new_tab
, *bucket
, *next
;
6657 ASSERT((new_size
& new_mask
) == 0);
6659 new_tab
= kmem_zalloc(new_size
* sizeof (void *), KM_SLEEP
);
6661 for (i
= 0; i
< size
; i
++) {
6662 for (bucket
= hash
->dth_tab
[i
]; bucket
!= NULL
; bucket
= next
) {
6663 dtrace_probe_t
*probe
= bucket
->dthb_chain
;
6665 ASSERT(probe
!= NULL
);
6666 ndx
= DTRACE_HASHSTR(hash
, probe
) & new_mask
;
6668 next
= bucket
->dthb_next
;
6669 bucket
->dthb_next
= new_tab
[ndx
];
6670 new_tab
[ndx
] = bucket
;
6674 kmem_free(hash
->dth_tab
, hash
->dth_size
* sizeof (void *));
6675 hash
->dth_tab
= new_tab
;
6676 hash
->dth_size
= new_size
;
6677 hash
->dth_mask
= new_mask
;
6681 dtrace_hash_add(dtrace_hash_t
*hash
, dtrace_probe_t
*new)
6683 int hashval
= DTRACE_HASHSTR(hash
, new);
6684 int ndx
= hashval
& hash
->dth_mask
;
6685 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6686 dtrace_probe_t
**nextp
, **prevp
;
6688 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6689 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new))
6693 if ((hash
->dth_nbuckets
>> 1) > hash
->dth_size
) {
6694 dtrace_hash_resize(hash
);
6695 dtrace_hash_add(hash
, new);
6699 bucket
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
);
6700 bucket
->dthb_next
= hash
->dth_tab
[ndx
];
6701 hash
->dth_tab
[ndx
] = bucket
;
6702 hash
->dth_nbuckets
++;
6705 nextp
= DTRACE_HASHNEXT(hash
, new);
6706 ASSERT(*nextp
== NULL
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
);
6707 *nextp
= bucket
->dthb_chain
;
6709 if (bucket
->dthb_chain
!= NULL
) {
6710 prevp
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
);
6711 ASSERT(*prevp
== NULL
);
6715 bucket
->dthb_chain
= new;
6719 static dtrace_probe_t
*
6720 dtrace_hash_lookup(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6722 int hashval
= DTRACE_HASHSTR(hash
, template);
6723 int ndx
= hashval
& hash
->dth_mask
;
6724 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6726 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6727 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6728 return (bucket
->dthb_chain
);
6735 dtrace_hash_collisions(dtrace_hash_t
*hash
, dtrace_probe_t
*template)
6737 int hashval
= DTRACE_HASHSTR(hash
, template);
6738 int ndx
= hashval
& hash
->dth_mask
;
6739 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6741 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6742 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template))
6743 return (bucket
->dthb_len
);
6750 dtrace_hash_remove(dtrace_hash_t
*hash
, dtrace_probe_t
*probe
)
6752 int ndx
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
;
6753 dtrace_hashbucket_t
*bucket
= hash
->dth_tab
[ndx
];
6755 dtrace_probe_t
**prevp
= DTRACE_HASHPREV(hash
, probe
);
6756 dtrace_probe_t
**nextp
= DTRACE_HASHNEXT(hash
, probe
);
6759 * Find the bucket that we're removing this probe from.
6761 for (; bucket
!= NULL
; bucket
= bucket
->dthb_next
) {
6762 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
))
6766 ASSERT(bucket
!= NULL
);
6768 if (*prevp
== NULL
) {
6769 if (*nextp
== NULL
) {
6771 * The removed probe was the only probe on this
6772 * bucket; we need to remove the bucket.
6774 dtrace_hashbucket_t
*b
= hash
->dth_tab
[ndx
];
6776 ASSERT(bucket
->dthb_chain
== probe
);
6780 hash
->dth_tab
[ndx
] = bucket
->dthb_next
;
6782 while (b
->dthb_next
!= bucket
)
6784 b
->dthb_next
= bucket
->dthb_next
;
6787 ASSERT(hash
->dth_nbuckets
> 0);
6788 hash
->dth_nbuckets
--;
6789 kmem_free(bucket
, sizeof (dtrace_hashbucket_t
));
6793 bucket
->dthb_chain
= *nextp
;
6795 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
;
6799 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
;
6803 * DTrace Utility Functions
6805 * These are random utility functions that are _not_ called from probe context.
6808 dtrace_badattr(const dtrace_attribute_t
*a
)
6810 return (a
->dtat_name
> DTRACE_STABILITY_MAX
||
6811 a
->dtat_data
> DTRACE_STABILITY_MAX
||
6812 a
->dtat_class
> DTRACE_CLASS_MAX
);
6816 * Return a duplicate copy of a string. If the specified string is NULL,
6817 * this function returns a zero-length string.
6818 * APPLE NOTE: Darwin employs size bounded string operation.
6821 dtrace_strdup(const char *str
)
6823 size_t bufsize
= (str
!= NULL
? strlen(str
) : 0) + 1;
6824 char *new = kmem_zalloc(bufsize
, KM_SLEEP
);
6827 (void) strlcpy(new, str
, bufsize
);
6832 #define DTRACE_ISALPHA(c) \
6833 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
6836 dtrace_badname(const char *s
)
6840 if (s
== NULL
|| (c
= *s
++) == '\0')
6843 if (!DTRACE_ISALPHA(c
) && c
!= '-' && c
!= '_' && c
!= '.')
6846 while ((c
= *s
++) != '\0') {
6847 if (!DTRACE_ISALPHA(c
) && (c
< '0' || c
> '9') &&
6848 c
!= '-' && c
!= '_' && c
!= '.' && c
!= '`')
6856 dtrace_cred2priv(cred_t
*cr
, uint32_t *privp
, uid_t
*uidp
, zoneid_t
*zoneidp
)
6860 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
6862 * For DTRACE_PRIV_ALL, the uid and zoneid don't matter.
6864 priv
= DTRACE_PRIV_ALL
;
6866 *uidp
= crgetuid(cr
);
6867 *zoneidp
= crgetzoneid(cr
);
6870 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
))
6871 priv
|= DTRACE_PRIV_KERNEL
| DTRACE_PRIV_USER
;
6872 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
))
6873 priv
|= DTRACE_PRIV_USER
;
6874 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
))
6875 priv
|= DTRACE_PRIV_PROC
;
6876 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
6877 priv
|= DTRACE_PRIV_OWNER
;
6878 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
6879 priv
|= DTRACE_PRIV_ZONEOWNER
;
6885 #ifdef DTRACE_ERRDEBUG
6887 dtrace_errdebug(const char *str
)
6889 int hval
= dtrace_hash_str(str
) % DTRACE_ERRHASHSZ
;
6892 lck_mtx_lock(&dtrace_errlock
);
6893 dtrace_errlast
= str
;
6894 dtrace_errthread
= (kthread_t
*)current_thread();
6896 while (occupied
++ < DTRACE_ERRHASHSZ
) {
6897 if (dtrace_errhash
[hval
].dter_msg
== str
) {
6898 dtrace_errhash
[hval
].dter_count
++;
6902 if (dtrace_errhash
[hval
].dter_msg
!= NULL
) {
6903 hval
= (hval
+ 1) % DTRACE_ERRHASHSZ
;
6907 dtrace_errhash
[hval
].dter_msg
= str
;
6908 dtrace_errhash
[hval
].dter_count
= 1;
6912 panic("dtrace: undersized error hash");
6914 lck_mtx_unlock(&dtrace_errlock
);
6919 * DTrace Matching Functions
6921 * These functions are used to match groups of probes, given some elements of
6922 * a probe tuple, or some globbed expressions for elements of a probe tuple.
6925 dtrace_match_priv(const dtrace_probe_t
*prp
, uint32_t priv
, uid_t uid
,
6928 if (priv
!= DTRACE_PRIV_ALL
) {
6929 uint32_t ppriv
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
;
6930 uint32_t match
= priv
& ppriv
;
6933 * No PRIV_DTRACE_* privileges...
6935 if ((priv
& (DTRACE_PRIV_PROC
| DTRACE_PRIV_USER
|
6936 DTRACE_PRIV_KERNEL
)) == 0)
6940 * No matching bits, but there were bits to match...
6942 if (match
== 0 && ppriv
!= 0)
6946 * Need to have permissions to the process, but don't...
6948 if (((ppriv
& ~match
) & DTRACE_PRIV_OWNER
) != 0 &&
6949 uid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) {
6954 * Need to be in the same zone unless we possess the
6955 * privilege to examine all zones.
6957 if (((ppriv
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 &&
6958 zoneid
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) {
6967 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
6968 * consists of input pattern strings and an ops-vector to evaluate them.
6969 * This function returns >0 for match, 0 for no match, and <0 for error.
6972 dtrace_match_probe(const dtrace_probe_t
*prp
, const dtrace_probekey_t
*pkp
,
6973 uint32_t priv
, uid_t uid
, zoneid_t zoneid
)
6975 dtrace_provider_t
*pvp
= prp
->dtpr_provider
;
6978 if (pvp
->dtpv_defunct
)
6981 if ((rv
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0)
6984 if ((rv
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0)
6987 if ((rv
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0)
6990 if ((rv
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0)
6993 if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0)
7000 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
7001 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
7002 * libc's version, the kernel version only applies to 8-bit ASCII strings.
7003 * In addition, all of the recursion cases except for '*' matching have been
7004 * unwound. For '*', we still implement recursive evaluation, but a depth
7005 * counter is maintained and matching is aborted if we recurse too deep.
7006 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
7009 dtrace_match_glob(const char *s
, const char *p
, int depth
)
7015 if (depth
> DTRACE_PROBEKEY_MAXDEPTH
)
7019 s
= ""; /* treat NULL as empty string */
7028 if ((c
= *p
++) == '\0')
7029 return (s1
== '\0');
7033 int ok
= 0, notflag
= 0;
7044 if ((c
= *p
++) == '\0')
7048 if (c
== '-' && lc
!= '\0' && *p
!= ']') {
7049 if ((c
= *p
++) == '\0')
7051 if (c
== '\\' && (c
= *p
++) == '\0')
7055 if (s1
< lc
|| s1
> c
)
7059 } else if (lc
<= s1
&& s1
<= c
)
7062 } else if (c
== '\\' && (c
= *p
++) == '\0')
7065 lc
= c
; /* save left-hand 'c' for next iteration */
7075 if ((c
= *p
++) == '\0')
7087 if ((c
= *p
++) == '\0')
7103 p
++; /* consecutive *'s are identical to a single one */
7108 for (s
= olds
; *s
!= '\0'; s
++) {
7109 if ((gs
= dtrace_match_glob(s
, p
, depth
+ 1)) != 0)
7119 dtrace_match_string(const char *s
, const char *p
, int depth
)
7121 #pragma unused(depth) /* __APPLE__ */
7123 /* APPLE NOTE: Darwin employs size bounded string operation. */
7124 return (s
!= NULL
&& strncmp(s
, p
, strlen(s
) + 1) == 0);
7129 dtrace_match_nul(const char *s
, const char *p
, int depth
)
7131 #pragma unused(s, p, depth) /* __APPLE__ */
7132 return (1); /* always match the empty pattern */
7137 dtrace_match_nonzero(const char *s
, const char *p
, int depth
)
7139 #pragma unused(p, depth) /* __APPLE__ */
7140 return (s
!= NULL
&& s
[0] != '\0');
7144 dtrace_match(const dtrace_probekey_t
*pkp
, uint32_t priv
, uid_t uid
,
7145 zoneid_t zoneid
, int (*matched
)(dtrace_probe_t
*, void *), void *arg
)
7147 dtrace_probe_t
template, *probe
;
7148 dtrace_hash_t
*hash
= NULL
;
7149 int len
, rc
, best
= INT_MAX
, nmatched
= 0;
7152 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7155 * If the probe ID is specified in the key, just lookup by ID and
7156 * invoke the match callback once if a matching probe is found.
7158 if (pkp
->dtpk_id
!= DTRACE_IDNONE
) {
7159 if ((probe
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL
&&
7160 dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) {
7161 if ((*matched
)(probe
, arg
) == DTRACE_MATCH_FAIL
)
7162 return (DTRACE_MATCH_FAIL
);
7168 template.dtpr_mod
= (char *)(uintptr_t)pkp
->dtpk_mod
;
7169 template.dtpr_func
= (char *)(uintptr_t)pkp
->dtpk_func
;
7170 template.dtpr_name
= (char *)(uintptr_t)pkp
->dtpk_name
;
7173 * We want to find the most distinct of the module name, function
7174 * name, and name. So for each one that is not a glob pattern or
7175 * empty string, we perform a lookup in the corresponding hash and
7176 * use the hash table with the fewest collisions to do our search.
7178 if (pkp
->dtpk_mmatch
== &dtrace_match_string
&&
7179 (len
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) {
7181 hash
= dtrace_bymod
;
7184 if (pkp
->dtpk_fmatch
== &dtrace_match_string
&&
7185 (len
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) {
7187 hash
= dtrace_byfunc
;
7190 if (pkp
->dtpk_nmatch
== &dtrace_match_string
&&
7191 (len
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) {
7193 hash
= dtrace_byname
;
7197 * If we did not select a hash table, iterate over every probe and
7198 * invoke our callback for each one that matches our input probe key.
7201 for (i
= 0; i
< (dtrace_id_t
)dtrace_nprobes
; i
++) {
7202 if ((probe
= dtrace_probes
[i
]) == NULL
||
7203 dtrace_match_probe(probe
, pkp
, priv
, uid
,
7209 if ((rc
= (*matched
)(probe
, arg
)) != DTRACE_MATCH_NEXT
) {
7210 if (rc
== DTRACE_MATCH_FAIL
)
7211 return (DTRACE_MATCH_FAIL
);
7220 * If we selected a hash table, iterate over each probe of the same key
7221 * name and invoke the callback for every probe that matches the other
7222 * attributes of our input probe key.
7224 for (probe
= dtrace_hash_lookup(hash
, &template); probe
!= NULL
;
7225 probe
= *(DTRACE_HASHNEXT(hash
, probe
))) {
7227 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0)
7232 if ((rc
= (*matched
)(probe
, arg
)) != DTRACE_MATCH_NEXT
) {
7233 if (rc
== DTRACE_MATCH_FAIL
)
7234 return (DTRACE_MATCH_FAIL
);
7243 * Return the function pointer dtrace_probecmp() should use to compare the
7244 * specified pattern with a string. For NULL or empty patterns, we select
7245 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7246 * For non-empty non-glob strings, we use dtrace_match_string().
7248 static dtrace_probekey_f
*
7249 dtrace_probekey_func(const char *p
)
7253 if (p
== NULL
|| *p
== '\0')
7254 return (&dtrace_match_nul
);
7256 while ((c
= *p
++) != '\0') {
7257 if (c
== '[' || c
== '?' || c
== '*' || c
== '\\')
7258 return (&dtrace_match_glob
);
7261 return (&dtrace_match_string
);
7265 * Build a probe comparison key for use with dtrace_match_probe() from the
7266 * given probe description. By convention, a null key only matches anchored
7267 * probes: if each field is the empty string, reset dtpk_fmatch to
7268 * dtrace_match_nonzero().
7271 dtrace_probekey(const dtrace_probedesc_t
*pdp
, dtrace_probekey_t
*pkp
)
7273 pkp
->dtpk_prov
= pdp
->dtpd_provider
;
7274 pkp
->dtpk_pmatch
= dtrace_probekey_func(pdp
->dtpd_provider
);
7276 pkp
->dtpk_mod
= pdp
->dtpd_mod
;
7277 pkp
->dtpk_mmatch
= dtrace_probekey_func(pdp
->dtpd_mod
);
7279 pkp
->dtpk_func
= pdp
->dtpd_func
;
7280 pkp
->dtpk_fmatch
= dtrace_probekey_func(pdp
->dtpd_func
);
7282 pkp
->dtpk_name
= pdp
->dtpd_name
;
7283 pkp
->dtpk_nmatch
= dtrace_probekey_func(pdp
->dtpd_name
);
7285 pkp
->dtpk_id
= pdp
->dtpd_id
;
7287 if (pkp
->dtpk_id
== DTRACE_IDNONE
&&
7288 pkp
->dtpk_pmatch
== &dtrace_match_nul
&&
7289 pkp
->dtpk_mmatch
== &dtrace_match_nul
&&
7290 pkp
->dtpk_fmatch
== &dtrace_match_nul
&&
7291 pkp
->dtpk_nmatch
== &dtrace_match_nul
)
7292 pkp
->dtpk_fmatch
= &dtrace_match_nonzero
;
7296 * DTrace Provider-to-Framework API Functions
7298 * These functions implement much of the Provider-to-Framework API, as
7299 * described in <sys/dtrace.h>. The parts of the API not in this section are
7300 * the functions in the API for probe management (found below), and
7301 * dtrace_probe() itself (found above).
7305 * Register the calling provider with the DTrace framework. This should
7306 * generally be called by DTrace providers in their attach(9E) entry point.
7309 dtrace_register(const char *name
, const dtrace_pattr_t
*pap
, uint32_t priv
,
7310 cred_t
*cr
, const dtrace_pops_t
*pops
, void *arg
, dtrace_provider_id_t
*idp
)
7312 dtrace_provider_t
*provider
;
7314 if (name
== NULL
|| pap
== NULL
|| pops
== NULL
|| idp
== NULL
) {
7315 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7316 "arguments", name
? name
: "<NULL>");
7320 if (name
[0] == '\0' || dtrace_badname(name
)) {
7321 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7322 "provider name", name
);
7326 if ((pops
->dtps_provide
== NULL
&& pops
->dtps_provide_module
== NULL
) ||
7327 pops
->dtps_enable
== NULL
|| pops
->dtps_disable
== NULL
||
7328 pops
->dtps_destroy
== NULL
||
7329 ((pops
->dtps_resume
== NULL
) != (pops
->dtps_suspend
== NULL
))) {
7330 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7331 "provider ops", name
);
7335 if (dtrace_badattr(&pap
->dtpa_provider
) ||
7336 dtrace_badattr(&pap
->dtpa_mod
) ||
7337 dtrace_badattr(&pap
->dtpa_func
) ||
7338 dtrace_badattr(&pap
->dtpa_name
) ||
7339 dtrace_badattr(&pap
->dtpa_args
)) {
7340 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7341 "provider attributes", name
);
7345 if (priv
& ~DTRACE_PRIV_ALL
) {
7346 cmn_err(CE_WARN
, "failed to register provider '%s': invalid "
7347 "privilege attributes", name
);
7351 if ((priv
& DTRACE_PRIV_KERNEL
) &&
7352 (priv
& (DTRACE_PRIV_USER
| DTRACE_PRIV_OWNER
)) &&
7353 pops
->dtps_usermode
== NULL
) {
7354 cmn_err(CE_WARN
, "failed to register provider '%s': need "
7355 "dtps_usermode() op for given privilege attributes", name
);
7359 provider
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
);
7361 /* APPLE NOTE: Darwin employs size bounded string operation. */
7363 size_t bufsize
= strlen(name
) + 1;
7364 provider
->dtpv_name
= kmem_alloc(bufsize
, KM_SLEEP
);
7365 (void) strlcpy(provider
->dtpv_name
, name
, bufsize
);
7368 provider
->dtpv_attr
= *pap
;
7369 provider
->dtpv_priv
.dtpp_flags
= priv
;
7371 provider
->dtpv_priv
.dtpp_uid
= crgetuid(cr
);
7372 provider
->dtpv_priv
.dtpp_zoneid
= crgetzoneid(cr
);
7374 provider
->dtpv_pops
= *pops
;
7376 if (pops
->dtps_provide
== NULL
) {
7377 ASSERT(pops
->dtps_provide_module
!= NULL
);
7378 provider
->dtpv_pops
.dtps_provide
=
7379 (void (*)(void *, const dtrace_probedesc_t
*))dtrace_nullop
;
7382 if (pops
->dtps_provide_module
== NULL
) {
7383 ASSERT(pops
->dtps_provide
!= NULL
);
7384 provider
->dtpv_pops
.dtps_provide_module
=
7385 (void (*)(void *, struct modctl
*))dtrace_nullop
;
7388 if (pops
->dtps_suspend
== NULL
) {
7389 ASSERT(pops
->dtps_resume
== NULL
);
7390 provider
->dtpv_pops
.dtps_suspend
=
7391 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
7392 provider
->dtpv_pops
.dtps_resume
=
7393 (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
;
7396 provider
->dtpv_arg
= arg
;
7397 *idp
= (dtrace_provider_id_t
)provider
;
7399 if (pops
== &dtrace_provider_ops
) {
7400 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
7401 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7402 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
7405 * We make sure that the DTrace provider is at the head of
7406 * the provider chain.
7408 provider
->dtpv_next
= dtrace_provider
;
7409 dtrace_provider
= provider
;
7413 lck_mtx_lock(&dtrace_provider_lock
);
7414 lck_mtx_lock(&dtrace_lock
);
7417 * If there is at least one provider registered, we'll add this
7418 * provider after the first provider.
7420 if (dtrace_provider
!= NULL
) {
7421 provider
->dtpv_next
= dtrace_provider
->dtpv_next
;
7422 dtrace_provider
->dtpv_next
= provider
;
7424 dtrace_provider
= provider
;
7427 if (dtrace_retained
!= NULL
) {
7428 dtrace_enabling_provide(provider
);
7431 * Now we need to call dtrace_enabling_matchall() -- which
7432 * will acquire cpu_lock and dtrace_lock. We therefore need
7433 * to drop all of our locks before calling into it...
7435 lck_mtx_unlock(&dtrace_lock
);
7436 lck_mtx_unlock(&dtrace_provider_lock
);
7437 dtrace_enabling_matchall();
7442 lck_mtx_unlock(&dtrace_lock
);
7443 lck_mtx_unlock(&dtrace_provider_lock
);
7449 * Unregister the specified provider from the DTrace framework. This should
7450 * generally be called by DTrace providers in their detach(9E) entry point.
7453 dtrace_unregister(dtrace_provider_id_t id
)
7455 dtrace_provider_t
*old
= (dtrace_provider_t
*)id
;
7456 dtrace_provider_t
*prev
= NULL
;
7458 dtrace_probe_t
*probe
, *first
= NULL
;
7460 if (old
->dtpv_pops
.dtps_enable
==
7461 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
) {
7463 * If DTrace itself is the provider, we're called with locks
7466 ASSERT(old
== dtrace_provider
);
7467 ASSERT(dtrace_devi
!= NULL
);
7468 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
7469 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7472 if (dtrace_provider
->dtpv_next
!= NULL
) {
7474 * There's another provider here; return failure.
7479 lck_mtx_lock(&dtrace_provider_lock
);
7480 lck_mtx_lock(&mod_lock
);
7481 lck_mtx_lock(&dtrace_lock
);
7485 * If anyone has /dev/dtrace open, or if there are anonymous enabled
7486 * probes, we refuse to let providers slither away, unless this
7487 * provider has already been explicitly invalidated.
7489 if (!old
->dtpv_defunct
&&
7490 (dtrace_opens
|| (dtrace_anon
.dta_state
!= NULL
&&
7491 dtrace_anon
.dta_state
->dts_necbs
> 0))) {
7493 lck_mtx_unlock(&dtrace_lock
);
7494 lck_mtx_unlock(&mod_lock
);
7495 lck_mtx_unlock(&dtrace_provider_lock
);
7501 * Attempt to destroy the probes associated with this provider.
7503 if (old
->dtpv_ecb_count
!=0) {
7505 * We have at least one ECB; we can't remove this provider.
7508 lck_mtx_unlock(&dtrace_lock
);
7509 lck_mtx_unlock(&mod_lock
);
7510 lck_mtx_unlock(&dtrace_provider_lock
);
7516 * All of the probes for this provider are disabled; we can safely
7517 * remove all of them from their hash chains and from the probe array.
7519 for (i
= 0; i
< dtrace_nprobes
&& old
->dtpv_probe_count
!=0; i
++) {
7520 if ((probe
= dtrace_probes
[i
]) == NULL
)
7523 if (probe
->dtpr_provider
!= old
)
7526 dtrace_probes
[i
] = NULL
;
7527 old
->dtpv_probe_count
--;
7529 dtrace_hash_remove(dtrace_bymod
, probe
);
7530 dtrace_hash_remove(dtrace_byfunc
, probe
);
7531 dtrace_hash_remove(dtrace_byname
, probe
);
7533 if (first
== NULL
) {
7535 probe
->dtpr_nextmod
= NULL
;
7537 probe
->dtpr_nextmod
= first
;
7543 * The provider's probes have been removed from the hash chains and
7544 * from the probe array. Now issue a dtrace_sync() to be sure that
7545 * everyone has cleared out from any probe array processing.
7549 for (probe
= first
; probe
!= NULL
; probe
= first
) {
7550 first
= probe
->dtpr_nextmod
;
7552 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
,
7554 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7555 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7556 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7557 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1);
7558 zfree(dtrace_probe_t_zone
, probe
);
7561 if ((prev
= dtrace_provider
) == old
) {
7562 ASSERT(self
|| dtrace_devi
== NULL
);
7563 ASSERT(old
->dtpv_next
== NULL
|| dtrace_devi
== NULL
);
7564 dtrace_provider
= old
->dtpv_next
;
7566 while (prev
!= NULL
&& prev
->dtpv_next
!= old
)
7567 prev
= prev
->dtpv_next
;
7570 panic("attempt to unregister non-existent "
7571 "dtrace provider %p\n", (void *)id
);
7574 prev
->dtpv_next
= old
->dtpv_next
;
7578 lck_mtx_unlock(&dtrace_lock
);
7579 lck_mtx_unlock(&mod_lock
);
7580 lck_mtx_unlock(&dtrace_provider_lock
);
7583 kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1);
7584 kmem_free(old
, sizeof (dtrace_provider_t
));
7590 * Invalidate the specified provider. All subsequent probe lookups for the
7591 * specified provider will fail, but its probes will not be removed.
7594 dtrace_invalidate(dtrace_provider_id_t id
)
7596 dtrace_provider_t
*pvp
= (dtrace_provider_t
*)id
;
7598 ASSERT(pvp
->dtpv_pops
.dtps_enable
!=
7599 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
);
7601 lck_mtx_lock(&dtrace_provider_lock
);
7602 lck_mtx_lock(&dtrace_lock
);
7604 pvp
->dtpv_defunct
= 1;
7606 lck_mtx_unlock(&dtrace_lock
);
7607 lck_mtx_unlock(&dtrace_provider_lock
);
7611 * Indicate whether or not DTrace has attached.
7614 dtrace_attached(void)
7617 * dtrace_provider will be non-NULL iff the DTrace driver has
7618 * attached. (It's non-NULL because DTrace is always itself a
7621 return (dtrace_provider
!= NULL
);
7625 * Remove all the unenabled probes for the given provider. This function is
7626 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7627 * -- just as many of its associated probes as it can.
7630 dtrace_condense(dtrace_provider_id_t id
)
7632 dtrace_provider_t
*prov
= (dtrace_provider_t
*)id
;
7634 dtrace_probe_t
*probe
;
7637 * Make sure this isn't the dtrace provider itself.
7639 ASSERT(prov
->dtpv_pops
.dtps_enable
!=
7640 (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
);
7642 lck_mtx_lock(&dtrace_provider_lock
);
7643 lck_mtx_lock(&dtrace_lock
);
7646 * Attempt to destroy the probes associated with this provider.
7648 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7649 if ((probe
= dtrace_probes
[i
]) == NULL
)
7652 if (probe
->dtpr_provider
!= prov
)
7655 if (probe
->dtpr_ecb
!= NULL
)
7658 dtrace_probes
[i
] = NULL
;
7659 prov
->dtpv_probe_count
--;
7661 dtrace_hash_remove(dtrace_bymod
, probe
);
7662 dtrace_hash_remove(dtrace_byfunc
, probe
);
7663 dtrace_hash_remove(dtrace_byname
, probe
);
7665 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i
+ 1,
7667 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
7668 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
7669 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
7670 zfree(dtrace_probe_t_zone
, probe
);
7671 vmem_free(dtrace_arena
, (void *)((uintptr_t)i
+ 1), 1);
7674 lck_mtx_unlock(&dtrace_lock
);
7675 lck_mtx_unlock(&dtrace_provider_lock
);
7681 * DTrace Probe Management Functions
7683 * The functions in this section perform the DTrace probe management,
7684 * including functions to create probes, look-up probes, and call into the
7685 * providers to request that probes be provided. Some of these functions are
7686 * in the Provider-to-Framework API; these functions can be identified by the
7687 * fact that they are not declared "static".
7691 * Create a probe with the specified module name, function name, and name.
7694 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
,
7695 const char *func
, const char *name
, int aframes
, void *arg
)
7697 dtrace_probe_t
*probe
, **probes
;
7698 dtrace_provider_t
*provider
= (dtrace_provider_t
*)prov
;
7701 if (provider
== dtrace_provider
) {
7702 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7704 lck_mtx_lock(&dtrace_lock
);
7707 id
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1,
7708 VM_BESTFIT
| VM_SLEEP
);
7710 probe
= zalloc(dtrace_probe_t_zone
);
7711 bzero(probe
, sizeof (dtrace_probe_t
));
7713 probe
->dtpr_id
= id
;
7714 probe
->dtpr_gen
= dtrace_probegen
++;
7715 probe
->dtpr_mod
= dtrace_strdup(mod
);
7716 probe
->dtpr_func
= dtrace_strdup(func
);
7717 probe
->dtpr_name
= dtrace_strdup(name
);
7718 probe
->dtpr_arg
= arg
;
7719 probe
->dtpr_aframes
= aframes
;
7720 probe
->dtpr_provider
= provider
;
7722 dtrace_hash_add(dtrace_bymod
, probe
);
7723 dtrace_hash_add(dtrace_byfunc
, probe
);
7724 dtrace_hash_add(dtrace_byname
, probe
);
7726 if (id
- 1 >= (dtrace_id_t
)dtrace_nprobes
) {
7727 size_t osize
= dtrace_nprobes
* sizeof (dtrace_probe_t
*);
7728 size_t nsize
= osize
<< 1;
7732 ASSERT(dtrace_probes
== NULL
);
7733 nsize
= sizeof (dtrace_probe_t
*);
7736 probes
= kmem_zalloc(nsize
, KM_SLEEP
);
7738 if (dtrace_probes
== NULL
) {
7740 dtrace_probes
= probes
;
7743 dtrace_probe_t
**oprobes
= dtrace_probes
;
7745 bcopy(oprobes
, probes
, osize
);
7746 dtrace_membar_producer();
7747 dtrace_probes
= probes
;
7752 * All CPUs are now seeing the new probes array; we can
7753 * safely free the old array.
7755 kmem_free(oprobes
, osize
);
7756 dtrace_nprobes
<<= 1;
7759 ASSERT(id
- 1 < (dtrace_id_t
)dtrace_nprobes
);
7762 ASSERT(dtrace_probes
[id
- 1] == NULL
);
7763 dtrace_probes
[id
- 1] = probe
;
7764 provider
->dtpv_probe_count
++;
7766 if (provider
!= dtrace_provider
)
7767 lck_mtx_unlock(&dtrace_lock
);
7772 static dtrace_probe_t
*
7773 dtrace_probe_lookup_id(dtrace_id_t id
)
7775 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7777 if (id
== 0 || id
> (dtrace_id_t
)dtrace_nprobes
)
7780 return (dtrace_probes
[id
- 1]);
7784 dtrace_probe_lookup_match(dtrace_probe_t
*probe
, void *arg
)
7786 *((dtrace_id_t
*)arg
) = probe
->dtpr_id
;
7788 return (DTRACE_MATCH_DONE
);
7792 * Look up a probe based on provider and one or more of module name, function
7793 * name and probe name.
7796 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
,
7797 const char *func
, const char *name
)
7799 dtrace_probekey_t pkey
;
7803 pkey
.dtpk_prov
= ((dtrace_provider_t
*)prid
)->dtpv_name
;
7804 pkey
.dtpk_pmatch
= &dtrace_match_string
;
7805 pkey
.dtpk_mod
= mod
;
7806 pkey
.dtpk_mmatch
= mod
? &dtrace_match_string
: &dtrace_match_nul
;
7807 pkey
.dtpk_func
= func
;
7808 pkey
.dtpk_fmatch
= func
? &dtrace_match_string
: &dtrace_match_nul
;
7809 pkey
.dtpk_name
= name
;
7810 pkey
.dtpk_nmatch
= name
? &dtrace_match_string
: &dtrace_match_nul
;
7811 pkey
.dtpk_id
= DTRACE_IDNONE
;
7813 lck_mtx_lock(&dtrace_lock
);
7814 match
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0,
7815 dtrace_probe_lookup_match
, &id
);
7816 lck_mtx_unlock(&dtrace_lock
);
7818 ASSERT(match
== 1 || match
== 0);
7819 return (match
? id
: 0);
7823 * Returns the probe argument associated with the specified probe.
7826 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
)
7828 dtrace_probe_t
*probe
;
7831 lck_mtx_lock(&dtrace_lock
);
7833 if ((probe
= dtrace_probe_lookup_id(pid
)) != NULL
&&
7834 probe
->dtpr_provider
== (dtrace_provider_t
*)id
)
7835 rval
= probe
->dtpr_arg
;
7837 lck_mtx_unlock(&dtrace_lock
);
7843 * Copy a probe into a probe description.
7846 dtrace_probe_description(const dtrace_probe_t
*prp
, dtrace_probedesc_t
*pdp
)
7848 bzero(pdp
, sizeof (dtrace_probedesc_t
));
7849 pdp
->dtpd_id
= prp
->dtpr_id
;
7851 /* APPLE NOTE: Darwin employs size bounded string operation. */
7852 (void) strlcpy(pdp
->dtpd_provider
,
7853 prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
);
7855 (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
);
7856 (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
);
7857 (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
);
7861 * Called to indicate that a probe -- or probes -- should be provided by a
7862 * specfied provider. If the specified description is NULL, the provider will
7863 * be told to provide all of its probes. (This is done whenever a new
7864 * consumer comes along, or whenever a retained enabling is to be matched.) If
7865 * the specified description is non-NULL, the provider is given the
7866 * opportunity to dynamically provide the specified probe, allowing providers
7867 * to support the creation of probes on-the-fly. (So-called _autocreated_
7868 * probes.) If the provider is NULL, the operations will be applied to all
7869 * providers; if the provider is non-NULL the operations will only be applied
7870 * to the specified provider. The dtrace_provider_lock must be held, and the
7871 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
7872 * will need to grab the dtrace_lock when it reenters the framework through
7873 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
7876 dtrace_probe_provide(dtrace_probedesc_t
*desc
, dtrace_provider_t
*prv
)
7881 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
7885 prv
= dtrace_provider
;
7890 * First, call the blanket provide operation.
7892 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
);
7895 * Now call the per-module provide operation. We will grab
7896 * mod_lock to prevent the list from being modified. Note
7897 * that this also prevents the mod_busy bits from changing.
7898 * (mod_busy can only be changed with mod_lock held.)
7900 lck_mtx_lock(&mod_lock
);
7902 ctl
= dtrace_modctl_list
;
7904 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
7905 ctl
= ctl
->mod_next
;
7908 lck_mtx_unlock(&mod_lock
);
7909 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
7913 * Iterate over each probe, and call the Framework-to-Provider API function
7917 dtrace_probe_foreach(uintptr_t offs
)
7919 dtrace_provider_t
*prov
;
7920 void (*func
)(void *, dtrace_id_t
, void *);
7921 dtrace_probe_t
*probe
;
7922 dtrace_icookie_t cookie
;
7926 * We disable interrupts to walk through the probe array. This is
7927 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
7928 * won't see stale data.
7930 cookie
= dtrace_interrupt_disable();
7932 for (i
= 0; i
< dtrace_nprobes
; i
++) {
7933 if ((probe
= dtrace_probes
[i
]) == NULL
)
7936 if (probe
->dtpr_ecb
== NULL
) {
7938 * This probe isn't enabled -- don't call the function.
7943 prov
= probe
->dtpr_provider
;
7944 func
= *((void(**)(void *, dtrace_id_t
, void *))
7945 ((uintptr_t)&prov
->dtpv_pops
+ offs
));
7947 func(prov
->dtpv_arg
, i
+ 1, probe
->dtpr_arg
);
7950 dtrace_interrupt_enable(cookie
);
7954 dtrace_probe_enable(const dtrace_probedesc_t
*desc
, dtrace_enabling_t
*enab
)
7956 dtrace_probekey_t pkey
;
7961 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
7963 dtrace_ecb_create_cache
= NULL
;
7967 * If we're passed a NULL description, we're being asked to
7968 * create an ECB with a NULL probe.
7970 (void) dtrace_ecb_create_enable(NULL
, enab
);
7974 dtrace_probekey(desc
, &pkey
);
7975 dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
,
7976 &priv
, &uid
, &zoneid
);
7978 return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
,
7983 * DTrace Helper Provider Functions
7986 dtrace_dofattr2attr(dtrace_attribute_t
*attr
, const dof_attr_t dofattr
)
7988 attr
->dtat_name
= DOF_ATTR_NAME(dofattr
);
7989 attr
->dtat_data
= DOF_ATTR_DATA(dofattr
);
7990 attr
->dtat_class
= DOF_ATTR_CLASS(dofattr
);
7994 dtrace_dofprov2hprov(dtrace_helper_provdesc_t
*hprov
,
7995 const dof_provider_t
*dofprov
, char *strtab
)
7997 hprov
->dthpv_provname
= strtab
+ dofprov
->dofpv_name
;
7998 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
,
7999 dofprov
->dofpv_provattr
);
8000 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
,
8001 dofprov
->dofpv_modattr
);
8002 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
,
8003 dofprov
->dofpv_funcattr
);
8004 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
,
8005 dofprov
->dofpv_nameattr
);
8006 dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
,
8007 dofprov
->dofpv_argsattr
);
8011 dtrace_helper_provide_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
8013 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8014 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8015 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
8016 dof_provider_t
*provider
;
8018 uint32_t *off
, *enoff
;
8022 dtrace_helper_provdesc_t dhpv
;
8023 dtrace_helper_probedesc_t dhpb
;
8024 dtrace_meta_t
*meta
= dtrace_meta_pid
;
8025 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
8028 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
8029 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8030 provider
->dofpv_strtab
* dof
->dofh_secsize
);
8031 prb_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8032 provider
->dofpv_probes
* dof
->dofh_secsize
);
8033 arg_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8034 provider
->dofpv_prargs
* dof
->dofh_secsize
);
8035 off_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8036 provider
->dofpv_proffs
* dof
->dofh_secsize
);
8038 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
8039 off
= (uint32_t *)(uintptr_t)(daddr
+ off_sec
->dofs_offset
);
8040 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
8044 * See dtrace_helper_provider_validate().
8046 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
8047 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
) {
8048 enoff_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8049 provider
->dofpv_prenoffs
* dof
->dofh_secsize
);
8050 enoff
= (uint32_t *)(uintptr_t)(daddr
+ enoff_sec
->dofs_offset
);
8053 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
8056 * Create the provider.
8058 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
8060 if ((parg
= mops
->dtms_provide_pid(meta
->dtm_arg
, &dhpv
, pid
)) == NULL
)
8066 * Create the probes.
8068 for (i
= 0; i
< nprobes
; i
++) {
8069 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
8070 prb_sec
->dofs_offset
+ i
* prb_sec
->dofs_entsize
);
8072 dhpb
.dthpb_mod
= dhp
->dofhp_mod
;
8073 dhpb
.dthpb_func
= strtab
+ probe
->dofpr_func
;
8074 dhpb
.dthpb_name
= strtab
+ probe
->dofpr_name
;
8075 #if !defined(__APPLE__)
8076 dhpb
.dthpb_base
= probe
->dofpr_addr
;
8078 dhpb
.dthpb_base
= dhp
->dofhp_addr
; /* FIXME: James, why? */
8080 dhpb
.dthpb_offs
= (int32_t *)(off
+ probe
->dofpr_offidx
);
8081 dhpb
.dthpb_noffs
= probe
->dofpr_noffs
;
8082 if (enoff
!= NULL
) {
8083 dhpb
.dthpb_enoffs
= (int32_t *)(enoff
+ probe
->dofpr_enoffidx
);
8084 dhpb
.dthpb_nenoffs
= probe
->dofpr_nenoffs
;
8086 dhpb
.dthpb_enoffs
= NULL
;
8087 dhpb
.dthpb_nenoffs
= 0;
8089 dhpb
.dthpb_args
= arg
+ probe
->dofpr_argidx
;
8090 dhpb
.dthpb_nargc
= probe
->dofpr_nargc
;
8091 dhpb
.dthpb_xargc
= probe
->dofpr_xargc
;
8092 dhpb
.dthpb_ntypes
= strtab
+ probe
->dofpr_nargv
;
8093 dhpb
.dthpb_xtypes
= strtab
+ probe
->dofpr_xargv
;
8095 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
);
8100 dtrace_helper_provide(dof_helper_t
*dhp
, pid_t pid
)
8102 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8103 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8106 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
8108 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
8109 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
8110 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
8112 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
8115 dtrace_helper_provide_one(dhp
, sec
, pid
);
8119 * We may have just created probes, so we must now rematch against
8120 * any retained enablings. Note that this call will acquire both
8121 * cpu_lock and dtrace_lock; the fact that we are holding
8122 * dtrace_meta_lock now is what defines the ordering with respect to
8123 * these three locks.
8125 dtrace_enabling_matchall();
8129 dtrace_helper_provider_remove_one(dof_helper_t
*dhp
, dof_sec_t
*sec
, pid_t pid
)
8131 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8132 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8134 dof_provider_t
*provider
;
8136 dtrace_helper_provdesc_t dhpv
;
8137 dtrace_meta_t
*meta
= dtrace_meta_pid
;
8138 dtrace_mops_t
*mops
= &meta
->dtm_mops
;
8140 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
8141 str_sec
= (dof_sec_t
*)(uintptr_t)(daddr
+ dof
->dofh_secoff
+
8142 provider
->dofpv_strtab
* dof
->dofh_secsize
);
8144 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
8147 * Create the provider.
8149 dtrace_dofprov2hprov(&dhpv
, provider
, strtab
);
8151 mops
->dtms_remove_pid(meta
->dtm_arg
, &dhpv
, pid
);
8157 dtrace_helper_provider_remove(dof_helper_t
*dhp
, pid_t pid
)
8159 uintptr_t daddr
= (uintptr_t)dhp
->dofhp_dof
;
8160 dof_hdr_t
*dof
= (dof_hdr_t
*)daddr
;
8163 lck_mtx_assert(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
);
8165 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
8166 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
8167 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
8169 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
8172 dtrace_helper_provider_remove_one(dhp
, sec
, pid
);
8177 * DTrace Meta Provider-to-Framework API Functions
8179 * These functions implement the Meta Provider-to-Framework API, as described
8180 * in <sys/dtrace.h>.
8183 dtrace_meta_register(const char *name
, const dtrace_mops_t
*mops
, void *arg
,
8184 dtrace_meta_provider_id_t
*idp
)
8186 dtrace_meta_t
*meta
;
8187 dtrace_helpers_t
*help
, *next
;
8190 *idp
= DTRACE_METAPROVNONE
;
8193 * We strictly don't need the name, but we hold onto it for
8194 * debuggability. All hail error queues!
8197 cmn_err(CE_WARN
, "failed to register meta-provider: "
8203 mops
->dtms_create_probe
== NULL
||
8204 mops
->dtms_provide_pid
== NULL
||
8205 mops
->dtms_remove_pid
== NULL
) {
8206 cmn_err(CE_WARN
, "failed to register meta-register %s: "
8207 "invalid ops", name
);
8211 meta
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
);
8212 meta
->dtm_mops
= *mops
;
8214 /* APPLE NOTE: Darwin employs size bounded string operation. */
8216 size_t bufsize
= strlen(name
) + 1;
8217 meta
->dtm_name
= kmem_alloc(bufsize
, KM_SLEEP
);
8218 (void) strlcpy(meta
->dtm_name
, name
, bufsize
);
8221 meta
->dtm_arg
= arg
;
8223 lck_mtx_lock(&dtrace_meta_lock
);
8224 lck_mtx_lock(&dtrace_lock
);
8226 if (dtrace_meta_pid
!= NULL
) {
8227 lck_mtx_unlock(&dtrace_lock
);
8228 lck_mtx_unlock(&dtrace_meta_lock
);
8229 cmn_err(CE_WARN
, "failed to register meta-register %s: "
8230 "user-land meta-provider exists", name
);
8231 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1);
8232 kmem_free(meta
, sizeof (dtrace_meta_t
));
8236 dtrace_meta_pid
= meta
;
8237 *idp
= (dtrace_meta_provider_id_t
)meta
;
8240 * If there are providers and probes ready to go, pass them
8241 * off to the new meta provider now.
8244 help
= dtrace_deferred_pid
;
8245 dtrace_deferred_pid
= NULL
;
8247 lck_mtx_unlock(&dtrace_lock
);
8249 while (help
!= NULL
) {
8250 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
8251 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
8255 next
= help
->dthps_next
;
8256 help
->dthps_next
= NULL
;
8257 help
->dthps_prev
= NULL
;
8258 help
->dthps_deferred
= 0;
8262 lck_mtx_unlock(&dtrace_meta_lock
);
8268 dtrace_meta_unregister(dtrace_meta_provider_id_t id
)
8270 dtrace_meta_t
**pp
, *old
= (dtrace_meta_t
*)id
;
8272 lck_mtx_lock(&dtrace_meta_lock
);
8273 lck_mtx_lock(&dtrace_lock
);
8275 if (old
== dtrace_meta_pid
) {
8276 pp
= &dtrace_meta_pid
;
8278 panic("attempt to unregister non-existent "
8279 "dtrace meta-provider %p\n", (void *)old
);
8282 if (old
->dtm_count
!= 0) {
8283 lck_mtx_unlock(&dtrace_lock
);
8284 lck_mtx_unlock(&dtrace_meta_lock
);
8290 lck_mtx_unlock(&dtrace_lock
);
8291 lck_mtx_unlock(&dtrace_meta_lock
);
8293 kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1);
8294 kmem_free(old
, sizeof (dtrace_meta_t
));
8301 * DTrace DIF Object Functions
8304 dtrace_difo_err(uint_t pc
, const char *format
, ...)
8306 if (dtrace_err_verbose
) {
8309 (void) uprintf("dtrace DIF object error: [%u]: ", pc
);
8310 va_start(alist
, format
);
8311 (void) vuprintf(format
, alist
);
8315 #ifdef DTRACE_ERRDEBUG
8316 dtrace_errdebug(format
);
8322 * Validate a DTrace DIF object by checking the IR instructions. The following
8323 * rules are currently enforced by dtrace_difo_validate():
8325 * 1. Each instruction must have a valid opcode
8326 * 2. Each register, string, variable, or subroutine reference must be valid
8327 * 3. No instruction can modify register %r0 (must be zero)
8328 * 4. All instruction reserved bits must be set to zero
8329 * 5. The last instruction must be a "ret" instruction
8330 * 6. All branch targets must reference a valid instruction _after_ the branch
8333 dtrace_difo_validate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
, uint_t nregs
,
8339 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8343 kcheckload
= cr
== NULL
||
8344 (vstate
->dtvs_state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) == 0;
8346 dp
->dtdo_destructive
= 0;
8348 for (pc
= 0; pc
< dp
->dtdo_len
&& err
== 0; pc
++) {
8349 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8351 uint_t r1
= DIF_INSTR_R1(instr
);
8352 uint_t r2
= DIF_INSTR_R2(instr
);
8353 uint_t rd
= DIF_INSTR_RD(instr
);
8354 uint_t rs
= DIF_INSTR_RS(instr
);
8355 uint_t label
= DIF_INSTR_LABEL(instr
);
8356 uint_t v
= DIF_INSTR_VAR(instr
);
8357 uint_t subr
= DIF_INSTR_SUBR(instr
);
8358 uint_t type
= DIF_INSTR_TYPE(instr
);
8359 uint_t op
= DIF_INSTR_OP(instr
);
8377 err
+= efunc(pc
, "invalid register %u\n", r1
);
8379 err
+= efunc(pc
, "invalid register %u\n", r2
);
8381 err
+= efunc(pc
, "invalid register %u\n", rd
);
8383 err
+= efunc(pc
, "cannot write to %r0\n");
8389 err
+= efunc(pc
, "invalid register %u\n", r1
);
8391 err
+= efunc(pc
, "non-zero reserved bits\n");
8393 err
+= efunc(pc
, "invalid register %u\n", rd
);
8395 err
+= efunc(pc
, "cannot write to %r0\n");
8405 err
+= efunc(pc
, "invalid register %u\n", r1
);
8407 err
+= efunc(pc
, "non-zero reserved bits\n");
8409 err
+= efunc(pc
, "invalid register %u\n", rd
);
8411 err
+= efunc(pc
, "cannot write to %r0\n");
8413 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op
+
8414 DIF_OP_RLDSB
- DIF_OP_LDSB
, r1
, rd
);
8424 err
+= efunc(pc
, "invalid register %u\n", r1
);
8426 err
+= efunc(pc
, "non-zero reserved bits\n");
8428 err
+= efunc(pc
, "invalid register %u\n", rd
);
8430 err
+= efunc(pc
, "cannot write to %r0\n");
8440 err
+= efunc(pc
, "invalid register %u\n", r1
);
8442 err
+= efunc(pc
, "non-zero reserved bits\n");
8444 err
+= efunc(pc
, "invalid register %u\n", rd
);
8446 err
+= efunc(pc
, "cannot write to %r0\n");
8453 err
+= efunc(pc
, "invalid register %u\n", r1
);
8455 err
+= efunc(pc
, "non-zero reserved bits\n");
8457 err
+= efunc(pc
, "invalid register %u\n", rd
);
8459 err
+= efunc(pc
, "cannot write to 0 address\n");
8464 err
+= efunc(pc
, "invalid register %u\n", r1
);
8466 err
+= efunc(pc
, "invalid register %u\n", r2
);
8468 err
+= efunc(pc
, "non-zero reserved bits\n");
8472 err
+= efunc(pc
, "invalid register %u\n", r1
);
8473 if (r2
!= 0 || rd
!= 0)
8474 err
+= efunc(pc
, "non-zero reserved bits\n");
8487 if (label
>= dp
->dtdo_len
) {
8488 err
+= efunc(pc
, "invalid branch target %u\n",
8492 err
+= efunc(pc
, "backward branch to %u\n",
8497 if (r1
!= 0 || r2
!= 0)
8498 err
+= efunc(pc
, "non-zero reserved bits\n");
8500 err
+= efunc(pc
, "invalid register %u\n", rd
);
8504 case DIF_OP_FLUSHTS
:
8505 if (r1
!= 0 || r2
!= 0 || rd
!= 0)
8506 err
+= efunc(pc
, "non-zero reserved bits\n");
8509 if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) {
8510 err
+= efunc(pc
, "invalid integer ref %u\n",
8511 DIF_INSTR_INTEGER(instr
));
8514 err
+= efunc(pc
, "invalid register %u\n", rd
);
8516 err
+= efunc(pc
, "cannot write to %r0\n");
8519 if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) {
8520 err
+= efunc(pc
, "invalid string ref %u\n",
8521 DIF_INSTR_STRING(instr
));
8524 err
+= efunc(pc
, "invalid register %u\n", rd
);
8526 err
+= efunc(pc
, "cannot write to %r0\n");
8530 if (r1
> DIF_VAR_ARRAY_MAX
)
8531 err
+= efunc(pc
, "invalid array %u\n", r1
);
8533 err
+= efunc(pc
, "invalid register %u\n", r2
);
8535 err
+= efunc(pc
, "invalid register %u\n", rd
);
8537 err
+= efunc(pc
, "cannot write to %r0\n");
8544 if (v
< DIF_VAR_OTHER_MIN
|| v
> DIF_VAR_OTHER_MAX
)
8545 err
+= efunc(pc
, "invalid variable %u\n", v
);
8547 err
+= efunc(pc
, "invalid register %u\n", rd
);
8549 err
+= efunc(pc
, "cannot write to %r0\n");
8556 if (v
< DIF_VAR_OTHER_UBASE
|| v
> DIF_VAR_OTHER_MAX
)
8557 err
+= efunc(pc
, "invalid variable %u\n", v
);
8559 err
+= efunc(pc
, "invalid register %u\n", rd
);
8562 if (subr
> DIF_SUBR_MAX
)
8563 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8565 err
+= efunc(pc
, "invalid register %u\n", rd
);
8567 err
+= efunc(pc
, "cannot write to %r0\n");
8569 if (subr
== DIF_SUBR_COPYOUT
||
8570 subr
== DIF_SUBR_COPYOUTSTR
) {
8571 dp
->dtdo_destructive
= 1;
8575 if (type
!= DIF_TYPE_STRING
&& type
!= DIF_TYPE_CTF
)
8576 err
+= efunc(pc
, "invalid ref type %u\n", type
);
8578 err
+= efunc(pc
, "invalid register %u\n", r2
);
8580 err
+= efunc(pc
, "invalid register %u\n", rs
);
8583 if (type
!= DIF_TYPE_CTF
)
8584 err
+= efunc(pc
, "invalid val type %u\n", type
);
8586 err
+= efunc(pc
, "invalid register %u\n", r2
);
8588 err
+= efunc(pc
, "invalid register %u\n", rs
);
8591 err
+= efunc(pc
, "invalid opcode %u\n",
8592 DIF_INSTR_OP(instr
));
8596 if (dp
->dtdo_len
!= 0 &&
8597 DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len
- 1]) != DIF_OP_RET
) {
8598 err
+= efunc(dp
->dtdo_len
- 1,
8599 "expected 'ret' as last DIF instruction\n");
8602 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
)) {
8604 * If we're not returning by reference, the size must be either
8605 * 0 or the size of one of the base types.
8607 switch (dp
->dtdo_rtype
.dtdt_size
) {
8609 case sizeof (uint8_t):
8610 case sizeof (uint16_t):
8611 case sizeof (uint32_t):
8612 case sizeof (uint64_t):
8616 err
+= efunc(dp
->dtdo_len
- 1, "bad return size\n");
8620 for (i
= 0; i
< dp
->dtdo_varlen
&& err
== 0; i
++) {
8621 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
], *existing
= NULL
;
8622 dtrace_diftype_t
*vt
, *et
;
8626 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
&&
8627 v
->dtdv_scope
!= DIFV_SCOPE_THREAD
&&
8628 v
->dtdv_scope
!= DIFV_SCOPE_LOCAL
) {
8629 err
+= efunc(i
, "unrecognized variable scope %d\n",
8634 if (v
->dtdv_kind
!= DIFV_KIND_ARRAY
&&
8635 v
->dtdv_kind
!= DIFV_KIND_SCALAR
) {
8636 err
+= efunc(i
, "unrecognized variable type %d\n",
8641 if ((id
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) {
8642 err
+= efunc(i
, "%d exceeds variable id limit\n", id
);
8646 if (id
< DIF_VAR_OTHER_UBASE
)
8650 * For user-defined variables, we need to check that this
8651 * definition is identical to any previous definition that we
8654 ndx
= id
- DIF_VAR_OTHER_UBASE
;
8656 switch (v
->dtdv_scope
) {
8657 case DIFV_SCOPE_GLOBAL
:
8658 if (ndx
< vstate
->dtvs_nglobals
) {
8659 dtrace_statvar_t
*svar
;
8661 if ((svar
= vstate
->dtvs_globals
[ndx
]) != NULL
)
8662 existing
= &svar
->dtsv_var
;
8667 case DIFV_SCOPE_THREAD
:
8668 if (ndx
< vstate
->dtvs_ntlocals
)
8669 existing
= &vstate
->dtvs_tlocals
[ndx
];
8672 case DIFV_SCOPE_LOCAL
:
8673 if (ndx
< vstate
->dtvs_nlocals
) {
8674 dtrace_statvar_t
*svar
;
8676 if ((svar
= vstate
->dtvs_locals
[ndx
]) != NULL
)
8677 existing
= &svar
->dtsv_var
;
8685 if (vt
->dtdt_flags
& DIF_TF_BYREF
) {
8686 if (vt
->dtdt_size
== 0) {
8687 err
+= efunc(i
, "zero-sized variable\n");
8691 if (v
->dtdv_scope
== DIFV_SCOPE_GLOBAL
&&
8692 vt
->dtdt_size
> dtrace_global_maxsize
) {
8693 err
+= efunc(i
, "oversized by-ref global\n");
8698 if (existing
== NULL
|| existing
->dtdv_id
== 0)
8701 ASSERT(existing
->dtdv_id
== v
->dtdv_id
);
8702 ASSERT(existing
->dtdv_scope
== v
->dtdv_scope
);
8704 if (existing
->dtdv_kind
!= v
->dtdv_kind
)
8705 err
+= efunc(i
, "%d changed variable kind\n", id
);
8707 et
= &existing
->dtdv_type
;
8709 if (vt
->dtdt_flags
!= et
->dtdt_flags
) {
8710 err
+= efunc(i
, "%d changed variable type flags\n", id
);
8714 if (vt
->dtdt_size
!= 0 && vt
->dtdt_size
!= et
->dtdt_size
) {
8715 err
+= efunc(i
, "%d changed variable type size\n", id
);
8724 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
8725 * are much more constrained than normal DIFOs. Specifically, they may
8728 * 1. Make calls to subroutines other than copyin(), copyinstr() or
8729 * miscellaneous string routines
8730 * 2. Access DTrace variables other than the args[] array, and the
8731 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
8732 * 3. Have thread-local variables.
8733 * 4. Have dynamic variables.
8736 dtrace_difo_validate_helper(dtrace_difo_t
*dp
)
8738 int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
;
8742 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8743 dif_instr_t instr
= dp
->dtdo_buf
[pc
];
8745 uint_t v
= DIF_INSTR_VAR(instr
);
8746 uint_t subr
= DIF_INSTR_SUBR(instr
);
8747 uint_t op
= DIF_INSTR_OP(instr
);
8802 case DIF_OP_FLUSHTS
:
8814 if (v
>= DIF_VAR_OTHER_UBASE
)
8817 if (v
>= DIF_VAR_ARG0
&& v
<= DIF_VAR_ARG9
)
8820 if (v
== DIF_VAR_CURTHREAD
|| v
== DIF_VAR_PID
||
8821 v
== DIF_VAR_PPID
|| v
== DIF_VAR_TID
||
8822 v
== DIF_VAR_EXECNAME
|| v
== DIF_VAR_ZONENAME
||
8823 v
== DIF_VAR_UID
|| v
== DIF_VAR_GID
)
8826 err
+= efunc(pc
, "illegal variable %u\n", v
);
8833 err
+= efunc(pc
, "illegal dynamic variable load\n");
8839 err
+= efunc(pc
, "illegal dynamic variable store\n");
8843 if (subr
== DIF_SUBR_ALLOCA
||
8844 subr
== DIF_SUBR_BCOPY
||
8845 subr
== DIF_SUBR_COPYIN
||
8846 subr
== DIF_SUBR_COPYINTO
||
8847 subr
== DIF_SUBR_COPYINSTR
||
8848 subr
== DIF_SUBR_INDEX
||
8849 subr
== DIF_SUBR_INET_NTOA
||
8850 subr
== DIF_SUBR_INET_NTOA6
||
8851 subr
== DIF_SUBR_INET_NTOP
||
8852 subr
== DIF_SUBR_LLTOSTR
||
8853 subr
== DIF_SUBR_RINDEX
||
8854 subr
== DIF_SUBR_STRCHR
||
8855 subr
== DIF_SUBR_STRJOIN
||
8856 subr
== DIF_SUBR_STRRCHR
||
8857 subr
== DIF_SUBR_STRSTR
||
8858 subr
== DIF_SUBR_COREPROFILE
||
8859 subr
== DIF_SUBR_HTONS
||
8860 subr
== DIF_SUBR_HTONL
||
8861 subr
== DIF_SUBR_HTONLL
||
8862 subr
== DIF_SUBR_NTOHS
||
8863 subr
== DIF_SUBR_NTOHL
||
8864 subr
== DIF_SUBR_NTOHLL
)
8867 err
+= efunc(pc
, "invalid subr %u\n", subr
);
8871 err
+= efunc(pc
, "invalid opcode %u\n",
8872 DIF_INSTR_OP(instr
));
8880 * Returns 1 if the expression in the DIF object can be cached on a per-thread
8884 dtrace_difo_cacheable(dtrace_difo_t
*dp
)
8891 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8892 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8894 if (v
->dtdv_scope
!= DIFV_SCOPE_GLOBAL
)
8897 switch (v
->dtdv_id
) {
8898 case DIF_VAR_CURTHREAD
:
8901 case DIF_VAR_EXECNAME
:
8902 case DIF_VAR_ZONENAME
:
8911 * This DIF object may be cacheable. Now we need to look for any
8912 * array loading instructions, any memory loading instructions, or
8913 * any stores to thread-local variables.
8915 for (i
= 0; i
< dp
->dtdo_len
; i
++) {
8916 uint_t op
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]);
8918 if ((op
>= DIF_OP_LDSB
&& op
<= DIF_OP_LDX
) ||
8919 (op
>= DIF_OP_ULDSB
&& op
<= DIF_OP_ULDX
) ||
8920 (op
>= DIF_OP_RLDSB
&& op
<= DIF_OP_RLDX
) ||
8921 op
== DIF_OP_LDGA
|| op
== DIF_OP_STTS
)
8929 dtrace_difo_hold(dtrace_difo_t
*dp
)
8933 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
8936 ASSERT(dp
->dtdo_refcnt
!= 0);
8939 * We need to check this DIF object for references to the variable
8940 * DIF_VAR_VTIMESTAMP.
8942 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
8943 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
8945 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
8948 if (dtrace_vtime_references
++ == 0)
8949 dtrace_vtime_enable();
8954 * This routine calculates the dynamic variable chunksize for a given DIF
8955 * object. The calculation is not fool-proof, and can probably be tricked by
8956 * malicious DIF -- but it works for all compiler-generated DIF. Because this
8957 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
8958 * if a dynamic variable size exceeds the chunksize.
8961 dtrace_difo_chunksize(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
8964 dtrace_key_t tupregs
[DIF_DTR_NREGS
+ 2]; /* +2 for thread and id */
8965 const dif_instr_t
*text
= dp
->dtdo_buf
;
8971 for (pc
= 0; pc
< dp
->dtdo_len
; pc
++) {
8972 dif_instr_t instr
= text
[pc
];
8973 uint_t op
= DIF_INSTR_OP(instr
);
8974 uint_t rd
= DIF_INSTR_RD(instr
);
8975 uint_t r1
= DIF_INSTR_R1(instr
);
8979 dtrace_key_t
*key
= tupregs
;
8983 sval
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)];
8988 key
= &tupregs
[DIF_DTR_NREGS
];
8989 key
[0].dttk_size
= 0;
8990 key
[1].dttk_size
= 0;
8992 scope
= DIFV_SCOPE_THREAD
;
8999 if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
)
9000 key
[nkeys
++].dttk_size
= 0;
9002 key
[nkeys
++].dttk_size
= 0;
9004 if (op
== DIF_OP_STTAA
) {
9005 scope
= DIFV_SCOPE_THREAD
;
9007 scope
= DIFV_SCOPE_GLOBAL
;
9013 if (ttop
== DIF_DTR_NREGS
)
9016 if ((srd
== 0 || sval
== 0) && r1
== DIF_TYPE_STRING
) {
9018 * If the register for the size of the "pushtr"
9019 * is %r0 (or the value is 0) and the type is
9020 * a string, we'll use the system-wide default
9023 tupregs
[ttop
++].dttk_size
=
9024 dtrace_strsize_default
;
9029 tupregs
[ttop
++].dttk_size
= sval
;
9035 if (ttop
== DIF_DTR_NREGS
)
9038 tupregs
[ttop
++].dttk_size
= 0;
9041 case DIF_OP_FLUSHTS
:
9058 * We have a dynamic variable allocation; calculate its size.
9060 for (ksize
= 0, i
= 0; i
< nkeys
; i
++)
9061 ksize
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t));
9063 size
= sizeof (dtrace_dynvar_t
);
9064 size
+= sizeof (dtrace_key_t
) * (nkeys
- 1);
9068 * Now we need to determine the size of the stored data.
9070 id
= DIF_INSTR_VAR(instr
);
9072 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9073 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9075 if (v
->dtdv_id
== id
&& v
->dtdv_scope
== scope
) {
9076 size
+= v
->dtdv_type
.dtdt_size
;
9081 if (i
== dp
->dtdo_varlen
)
9085 * We have the size. If this is larger than the chunk size
9086 * for our dynamic variable state, reset the chunk size.
9088 size
= P2ROUNDUP(size
, sizeof (uint64_t));
9090 if (size
> vstate
->dtvs_dynvars
.dtds_chunksize
)
9091 vstate
->dtvs_dynvars
.dtds_chunksize
= size
;
9096 dtrace_difo_init(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9098 int oldsvars
, osz
, nsz
, otlocals
, ntlocals
;
9101 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9102 ASSERT(dp
->dtdo_buf
!= NULL
&& dp
->dtdo_len
!= 0);
9104 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9105 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9106 dtrace_statvar_t
*svar
;
9107 dtrace_statvar_t
***svarp
= NULL
;
9109 uint8_t scope
= v
->dtdv_scope
;
9110 int *np
= (int *)NULL
;
9112 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
9115 id
-= DIF_VAR_OTHER_UBASE
;
9118 case DIFV_SCOPE_THREAD
:
9119 while (id
>= (uint_t
)(otlocals
= vstate
->dtvs_ntlocals
)) {
9120 dtrace_difv_t
*tlocals
;
9122 if ((ntlocals
= (otlocals
<< 1)) == 0)
9125 osz
= otlocals
* sizeof (dtrace_difv_t
);
9126 nsz
= ntlocals
* sizeof (dtrace_difv_t
);
9128 tlocals
= kmem_zalloc(nsz
, KM_SLEEP
);
9131 bcopy(vstate
->dtvs_tlocals
,
9133 kmem_free(vstate
->dtvs_tlocals
, osz
);
9136 vstate
->dtvs_tlocals
= tlocals
;
9137 vstate
->dtvs_ntlocals
= ntlocals
;
9140 vstate
->dtvs_tlocals
[id
] = *v
;
9143 case DIFV_SCOPE_LOCAL
:
9144 np
= &vstate
->dtvs_nlocals
;
9145 svarp
= &vstate
->dtvs_locals
;
9147 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
9148 dsize
= (int)NCPU
* (v
->dtdv_type
.dtdt_size
+
9151 dsize
= (int)NCPU
* sizeof (uint64_t);
9155 case DIFV_SCOPE_GLOBAL
:
9156 np
= &vstate
->dtvs_nglobals
;
9157 svarp
= &vstate
->dtvs_globals
;
9159 if (v
->dtdv_type
.dtdt_flags
& DIF_TF_BYREF
)
9160 dsize
= v
->dtdv_type
.dtdt_size
+
9169 while (id
>= (uint_t
)(oldsvars
= *np
)) {
9170 dtrace_statvar_t
**statics
;
9171 int newsvars
, oldsize
, newsize
;
9173 if ((newsvars
= (oldsvars
<< 1)) == 0)
9176 oldsize
= oldsvars
* sizeof (dtrace_statvar_t
*);
9177 newsize
= newsvars
* sizeof (dtrace_statvar_t
*);
9179 statics
= kmem_zalloc(newsize
, KM_SLEEP
);
9182 bcopy(*svarp
, statics
, oldsize
);
9183 kmem_free(*svarp
, oldsize
);
9190 if ((svar
= (*svarp
)[id
]) == NULL
) {
9191 svar
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
);
9192 svar
->dtsv_var
= *v
;
9194 if ((svar
->dtsv_size
= dsize
) != 0) {
9195 svar
->dtsv_data
= (uint64_t)(uintptr_t)
9196 kmem_zalloc(dsize
, KM_SLEEP
);
9199 (*svarp
)[id
] = svar
;
9202 svar
->dtsv_refcnt
++;
9205 dtrace_difo_chunksize(dp
, vstate
);
9206 dtrace_difo_hold(dp
);
9209 static dtrace_difo_t
*
9210 dtrace_difo_duplicate(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9215 ASSERT(dp
->dtdo_buf
!= NULL
);
9216 ASSERT(dp
->dtdo_refcnt
!= 0);
9218 new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
9220 ASSERT(dp
->dtdo_buf
!= NULL
);
9221 sz
= dp
->dtdo_len
* sizeof (dif_instr_t
);
9222 new->dtdo_buf
= kmem_alloc(sz
, KM_SLEEP
);
9223 bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
);
9224 new->dtdo_len
= dp
->dtdo_len
;
9226 if (dp
->dtdo_strtab
!= NULL
) {
9227 ASSERT(dp
->dtdo_strlen
!= 0);
9228 new->dtdo_strtab
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
);
9229 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
);
9230 new->dtdo_strlen
= dp
->dtdo_strlen
;
9233 if (dp
->dtdo_inttab
!= NULL
) {
9234 ASSERT(dp
->dtdo_intlen
!= 0);
9235 sz
= dp
->dtdo_intlen
* sizeof (uint64_t);
9236 new->dtdo_inttab
= kmem_alloc(sz
, KM_SLEEP
);
9237 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
);
9238 new->dtdo_intlen
= dp
->dtdo_intlen
;
9241 if (dp
->dtdo_vartab
!= NULL
) {
9242 ASSERT(dp
->dtdo_varlen
!= 0);
9243 sz
= dp
->dtdo_varlen
* sizeof (dtrace_difv_t
);
9244 new->dtdo_vartab
= kmem_alloc(sz
, KM_SLEEP
);
9245 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
);
9246 new->dtdo_varlen
= dp
->dtdo_varlen
;
9249 dtrace_difo_init(new, vstate
);
9254 dtrace_difo_destroy(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9258 ASSERT(dp
->dtdo_refcnt
== 0);
9260 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9261 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9262 dtrace_statvar_t
*svar
;
9263 dtrace_statvar_t
**svarp
= NULL
;
9265 uint8_t scope
= v
->dtdv_scope
;
9269 case DIFV_SCOPE_THREAD
:
9272 case DIFV_SCOPE_LOCAL
:
9273 np
= &vstate
->dtvs_nlocals
;
9274 svarp
= vstate
->dtvs_locals
;
9277 case DIFV_SCOPE_GLOBAL
:
9278 np
= &vstate
->dtvs_nglobals
;
9279 svarp
= vstate
->dtvs_globals
;
9286 if ((id
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
)
9289 id
-= DIF_VAR_OTHER_UBASE
;
9291 ASSERT(id
< (uint_t
)*np
);
9294 ASSERT(svar
!= NULL
);
9295 ASSERT(svar
->dtsv_refcnt
> 0);
9297 if (--svar
->dtsv_refcnt
> 0)
9300 if (svar
->dtsv_size
!= 0) {
9301 ASSERT(svar
->dtsv_data
!= 0);
9302 kmem_free((void *)(uintptr_t)svar
->dtsv_data
,
9306 kmem_free(svar
, sizeof (dtrace_statvar_t
));
9310 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
9311 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
9312 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
9313 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
9315 kmem_free(dp
, sizeof (dtrace_difo_t
));
9319 dtrace_difo_release(dtrace_difo_t
*dp
, dtrace_vstate_t
*vstate
)
9323 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9324 ASSERT(dp
->dtdo_refcnt
!= 0);
9326 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
9327 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
9329 if (v
->dtdv_id
!= DIF_VAR_VTIMESTAMP
)
9332 ASSERT(dtrace_vtime_references
> 0);
9333 if (--dtrace_vtime_references
== 0)
9334 dtrace_vtime_disable();
9337 if (--dp
->dtdo_refcnt
== 0)
9338 dtrace_difo_destroy(dp
, vstate
);
9342 * DTrace Format Functions
9345 dtrace_format_add(dtrace_state_t
*state
, char *str
)
9348 uint16_t ndx
, len
= strlen(str
) + 1;
9350 fmt
= kmem_zalloc(len
, KM_SLEEP
);
9351 bcopy(str
, fmt
, len
);
9353 for (ndx
= 0; ndx
< state
->dts_nformats
; ndx
++) {
9354 if (state
->dts_formats
[ndx
] == NULL
) {
9355 state
->dts_formats
[ndx
] = fmt
;
9360 if (state
->dts_nformats
== USHRT_MAX
) {
9362 * This is only likely if a denial-of-service attack is being
9363 * attempted. As such, it's okay to fail silently here.
9365 kmem_free(fmt
, len
);
9370 * For simplicity, we always resize the formats array to be exactly the
9371 * number of formats.
9373 ndx
= state
->dts_nformats
++;
9374 new = kmem_alloc((ndx
+ 1) * sizeof (char *), KM_SLEEP
);
9376 if (state
->dts_formats
!= NULL
) {
9378 bcopy(state
->dts_formats
, new, ndx
* sizeof (char *));
9379 kmem_free(state
->dts_formats
, ndx
* sizeof (char *));
9382 state
->dts_formats
= new;
9383 state
->dts_formats
[ndx
] = fmt
;
9389 dtrace_format_remove(dtrace_state_t
*state
, uint16_t format
)
9393 ASSERT(state
->dts_formats
!= NULL
);
9394 ASSERT(format
<= state
->dts_nformats
);
9395 ASSERT(state
->dts_formats
[format
- 1] != NULL
);
9397 fmt
= state
->dts_formats
[format
- 1];
9398 kmem_free(fmt
, strlen(fmt
) + 1);
9399 state
->dts_formats
[format
- 1] = NULL
;
9403 dtrace_format_destroy(dtrace_state_t
*state
)
9407 if (state
->dts_nformats
== 0) {
9408 ASSERT(state
->dts_formats
== NULL
);
9412 ASSERT(state
->dts_formats
!= NULL
);
9414 for (i
= 0; i
< state
->dts_nformats
; i
++) {
9415 char *fmt
= state
->dts_formats
[i
];
9420 kmem_free(fmt
, strlen(fmt
) + 1);
9423 kmem_free(state
->dts_formats
, state
->dts_nformats
* sizeof (char *));
9424 state
->dts_nformats
= 0;
9425 state
->dts_formats
= NULL
;
9429 * DTrace Predicate Functions
9431 static dtrace_predicate_t
*
9432 dtrace_predicate_create(dtrace_difo_t
*dp
)
9434 dtrace_predicate_t
*pred
;
9436 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9437 ASSERT(dp
->dtdo_refcnt
!= 0);
9439 pred
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
);
9440 pred
->dtp_difo
= dp
;
9441 pred
->dtp_refcnt
= 1;
9443 if (!dtrace_difo_cacheable(dp
))
9446 if (dtrace_predcache_id
== DTRACE_CACHEIDNONE
) {
9448 * This is only theoretically possible -- we have had 2^32
9449 * cacheable predicates on this machine. We cannot allow any
9450 * more predicates to become cacheable: as unlikely as it is,
9451 * there may be a thread caching a (now stale) predicate cache
9452 * ID. (N.B.: the temptation is being successfully resisted to
9453 * have this cmn_err() "Holy shit -- we executed this code!")
9458 pred
->dtp_cacheid
= dtrace_predcache_id
++;
9464 dtrace_predicate_hold(dtrace_predicate_t
*pred
)
9466 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9467 ASSERT(pred
->dtp_difo
!= NULL
&& pred
->dtp_difo
->dtdo_refcnt
!= 0);
9468 ASSERT(pred
->dtp_refcnt
> 0);
9474 dtrace_predicate_release(dtrace_predicate_t
*pred
, dtrace_vstate_t
*vstate
)
9476 dtrace_difo_t
*dp
= pred
->dtp_difo
;
9477 #pragma unused(dp) /* __APPLE__ */
9479 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9480 ASSERT(dp
!= NULL
&& dp
->dtdo_refcnt
!= 0);
9481 ASSERT(pred
->dtp_refcnt
> 0);
9483 if (--pred
->dtp_refcnt
== 0) {
9484 dtrace_difo_release(pred
->dtp_difo
, vstate
);
9485 kmem_free(pred
, sizeof (dtrace_predicate_t
));
9490 * DTrace Action Description Functions
9492 static dtrace_actdesc_t
*
9493 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
,
9494 uint64_t uarg
, uint64_t arg
)
9496 dtrace_actdesc_t
*act
;
9498 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind
) || (arg
!= 0 &&
9499 arg
>= KERNELBASE
) || (arg
== 0 && kind
== DTRACEACT_PRINTA
));
9501 act
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
);
9502 act
->dtad_kind
= kind
;
9503 act
->dtad_ntuple
= ntuple
;
9504 act
->dtad_uarg
= uarg
;
9505 act
->dtad_arg
= arg
;
9506 act
->dtad_refcnt
= 1;
9512 dtrace_actdesc_hold(dtrace_actdesc_t
*act
)
9514 ASSERT(act
->dtad_refcnt
>= 1);
9519 dtrace_actdesc_release(dtrace_actdesc_t
*act
, dtrace_vstate_t
*vstate
)
9521 dtrace_actkind_t kind
= act
->dtad_kind
;
9524 ASSERT(act
->dtad_refcnt
>= 1);
9526 if (--act
->dtad_refcnt
!= 0)
9529 if ((dp
= act
->dtad_difo
) != NULL
)
9530 dtrace_difo_release(dp
, vstate
);
9532 if (DTRACEACT_ISPRINTFLIKE(kind
)) {
9533 char *str
= (char *)(uintptr_t)act
->dtad_arg
;
9535 ASSERT((str
!= NULL
&& (uintptr_t)str
>= KERNELBASE
) ||
9536 (str
== NULL
&& act
->dtad_kind
== DTRACEACT_PRINTA
));
9539 kmem_free(str
, strlen(str
) + 1);
9542 kmem_free(act
, sizeof (dtrace_actdesc_t
));
9546 * DTrace ECB Functions
9548 static dtrace_ecb_t
*
9549 dtrace_ecb_add(dtrace_state_t
*state
, dtrace_probe_t
*probe
)
9554 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9556 ecb
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
);
9557 ecb
->dte_predicate
= NULL
;
9558 ecb
->dte_probe
= probe
;
9561 * The default size is the size of the default action: recording
9564 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9565 ecb
->dte_alignment
= sizeof (dtrace_epid_t
);
9567 epid
= state
->dts_epid
++;
9569 if (epid
- 1 >= (dtrace_epid_t
)state
->dts_necbs
) {
9570 dtrace_ecb_t
**oecbs
= state
->dts_ecbs
, **ecbs
;
9571 int necbs
= state
->dts_necbs
<< 1;
9573 ASSERT(epid
== (dtrace_epid_t
)state
->dts_necbs
+ 1);
9576 ASSERT(oecbs
== NULL
);
9580 ecbs
= kmem_zalloc(necbs
* sizeof (*ecbs
), KM_SLEEP
);
9583 bcopy(oecbs
, ecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9585 dtrace_membar_producer();
9586 state
->dts_ecbs
= ecbs
;
9588 if (oecbs
!= NULL
) {
9590 * If this state is active, we must dtrace_sync()
9591 * before we can free the old dts_ecbs array: we're
9592 * coming in hot, and there may be active ring
9593 * buffer processing (which indexes into the dts_ecbs
9594 * array) on another CPU.
9596 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
9599 kmem_free(oecbs
, state
->dts_necbs
* sizeof (*ecbs
));
9602 dtrace_membar_producer();
9603 state
->dts_necbs
= necbs
;
9606 ecb
->dte_state
= state
;
9608 ASSERT(state
->dts_ecbs
[epid
- 1] == NULL
);
9609 dtrace_membar_producer();
9610 state
->dts_ecbs
[(ecb
->dte_epid
= epid
) - 1] = ecb
;
9616 dtrace_ecb_enable(dtrace_ecb_t
*ecb
)
9618 dtrace_probe_t
*probe
= ecb
->dte_probe
;
9620 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
9621 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9622 ASSERT(ecb
->dte_next
== NULL
);
9624 if (probe
== NULL
) {
9626 * This is the NULL probe -- there's nothing to do.
9631 probe
->dtpr_provider
->dtpv_ecb_count
++;
9632 if (probe
->dtpr_ecb
== NULL
) {
9633 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
9636 * We're the first ECB on this probe.
9638 probe
->dtpr_ecb
= probe
->dtpr_ecb_last
= ecb
;
9640 if (ecb
->dte_predicate
!= NULL
)
9641 probe
->dtpr_predcache
= ecb
->dte_predicate
->dtp_cacheid
;
9643 return (prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
,
9644 probe
->dtpr_id
, probe
->dtpr_arg
));
9647 * This probe is already active. Swing the last pointer to
9648 * point to the new ECB, and issue a dtrace_sync() to assure
9649 * that all CPUs have seen the change.
9651 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
9652 probe
->dtpr_ecb_last
->dte_next
= ecb
;
9653 probe
->dtpr_ecb_last
= ecb
;
9654 probe
->dtpr_predcache
= 0;
9662 dtrace_ecb_resize(dtrace_ecb_t
*ecb
)
9664 uint32_t maxalign
= sizeof (dtrace_epid_t
);
9665 uint32_t align
= sizeof (uint8_t), offs
, diff
;
9666 dtrace_action_t
*act
;
9668 uint32_t aggbase
= UINT32_MAX
;
9669 dtrace_state_t
*state
= ecb
->dte_state
;
9672 * If we record anything, we always record the epid. (And we always
9675 offs
= sizeof (dtrace_epid_t
);
9676 ecb
->dte_size
= ecb
->dte_needed
= sizeof (dtrace_epid_t
);
9678 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
9679 dtrace_recdesc_t
*rec
= &act
->dta_rec
;
9681 if ((align
= rec
->dtrd_alignment
) > maxalign
)
9684 if (!wastuple
&& act
->dta_intuple
) {
9686 * This is the first record in a tuple. Align the
9687 * offset to be at offset 4 in an 8-byte aligned
9690 diff
= offs
+ sizeof (dtrace_aggid_t
);
9692 if ((diff
= (diff
& (sizeof (uint64_t) - 1))))
9693 offs
+= sizeof (uint64_t) - diff
;
9695 aggbase
= offs
- sizeof (dtrace_aggid_t
);
9696 ASSERT(!(aggbase
& (sizeof (uint64_t) - 1)));
9700 if (rec
->dtrd_size
!= 0 && (diff
= (offs
& (align
- 1)))) {
9702 * The current offset is not properly aligned; align it.
9704 offs
+= align
- diff
;
9707 rec
->dtrd_offset
= offs
;
9709 if (offs
+ rec
->dtrd_size
> ecb
->dte_needed
) {
9710 ecb
->dte_needed
= offs
+ rec
->dtrd_size
;
9712 if (ecb
->dte_needed
> state
->dts_needed
)
9713 state
->dts_needed
= ecb
->dte_needed
;
9716 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
9717 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9718 dtrace_action_t
*first
= agg
->dtag_first
, *prev
;
9720 ASSERT(rec
->dtrd_size
!= 0 && first
!= NULL
);
9722 ASSERT(aggbase
!= UINT32_MAX
);
9724 agg
->dtag_base
= aggbase
;
9726 while ((prev
= first
->dta_prev
) != NULL
&&
9727 DTRACEACT_ISAGG(prev
->dta_kind
)) {
9728 agg
= (dtrace_aggregation_t
*)prev
;
9729 first
= agg
->dtag_first
;
9733 offs
= prev
->dta_rec
.dtrd_offset
+
9734 prev
->dta_rec
.dtrd_size
;
9736 offs
= sizeof (dtrace_epid_t
);
9740 if (!act
->dta_intuple
)
9741 ecb
->dte_size
= offs
+ rec
->dtrd_size
;
9743 offs
+= rec
->dtrd_size
;
9746 wastuple
= act
->dta_intuple
;
9749 if ((act
= ecb
->dte_action
) != NULL
&&
9750 !(act
->dta_kind
== DTRACEACT_SPECULATE
&& act
->dta_next
== NULL
) &&
9751 ecb
->dte_size
== sizeof (dtrace_epid_t
)) {
9753 * If the size is still sizeof (dtrace_epid_t), then all
9754 * actions store no data; set the size to 0.
9756 ecb
->dte_alignment
= maxalign
;
9760 * If the needed space is still sizeof (dtrace_epid_t), then
9761 * all actions need no additional space; set the needed
9764 if (ecb
->dte_needed
== sizeof (dtrace_epid_t
))
9765 ecb
->dte_needed
= 0;
9771 * Set our alignment, and make sure that the dte_size and dte_needed
9772 * are aligned to the size of an EPID.
9774 ecb
->dte_alignment
= maxalign
;
9775 ecb
->dte_size
= (ecb
->dte_size
+ (sizeof (dtrace_epid_t
) - 1)) &
9776 ~(sizeof (dtrace_epid_t
) - 1);
9777 ecb
->dte_needed
= (ecb
->dte_needed
+ (sizeof (dtrace_epid_t
) - 1)) &
9778 ~(sizeof (dtrace_epid_t
) - 1);
9779 ASSERT(ecb
->dte_size
<= ecb
->dte_needed
);
9782 static dtrace_action_t
*
9783 dtrace_ecb_aggregation_create(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9785 dtrace_aggregation_t
*agg
;
9786 size_t size
= sizeof (uint64_t);
9787 int ntuple
= desc
->dtad_ntuple
;
9788 dtrace_action_t
*act
;
9789 dtrace_recdesc_t
*frec
;
9790 dtrace_aggid_t aggid
;
9791 dtrace_state_t
*state
= ecb
->dte_state
;
9793 agg
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
);
9794 agg
->dtag_ecb
= ecb
;
9796 ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
));
9798 switch (desc
->dtad_kind
) {
9800 agg
->dtag_initial
= INT64_MAX
;
9801 agg
->dtag_aggregate
= dtrace_aggregate_min
;
9805 agg
->dtag_initial
= INT64_MIN
;
9806 agg
->dtag_aggregate
= dtrace_aggregate_max
;
9809 case DTRACEAGG_COUNT
:
9810 agg
->dtag_aggregate
= dtrace_aggregate_count
;
9813 case DTRACEAGG_QUANTIZE
:
9814 agg
->dtag_aggregate
= dtrace_aggregate_quantize
;
9815 size
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) *
9819 case DTRACEAGG_LQUANTIZE
: {
9820 uint16_t step
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
);
9821 uint16_t levels
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
);
9823 agg
->dtag_initial
= desc
->dtad_arg
;
9824 agg
->dtag_aggregate
= dtrace_aggregate_lquantize
;
9826 if (step
== 0 || levels
== 0)
9829 size
= levels
* sizeof (uint64_t) + 3 * sizeof (uint64_t);
9833 case DTRACEAGG_LLQUANTIZE
: {
9834 uint16_t factor
= DTRACE_LLQUANTIZE_FACTOR(desc
->dtad_arg
);
9835 uint16_t low
= DTRACE_LLQUANTIZE_LOW(desc
->dtad_arg
);
9836 uint16_t high
= DTRACE_LLQUANTIZE_HIGH(desc
->dtad_arg
);
9837 uint16_t nsteps
= DTRACE_LLQUANTIZE_NSTEP(desc
->dtad_arg
);
9840 agg
->dtag_initial
= desc
->dtad_arg
;
9841 agg
->dtag_aggregate
= dtrace_aggregate_llquantize
;
9843 if (factor
< 2 || low
>= high
|| nsteps
< factor
)
9847 * Now check that the number of steps evenly divides a power
9848 * of the factor. (This assures both integer bucket size and
9849 * linearity within each magnitude.)
9851 for (v
= factor
; v
< nsteps
; v
*= factor
)
9854 if ((v
% nsteps
) || (nsteps
% factor
))
9857 size
= (dtrace_aggregate_llquantize_bucket(factor
, low
, high
, nsteps
, INT64_MAX
) + 2) * sizeof (uint64_t);
9862 agg
->dtag_aggregate
= dtrace_aggregate_avg
;
9863 size
= sizeof (uint64_t) * 2;
9866 case DTRACEAGG_STDDEV
:
9867 agg
->dtag_aggregate
= dtrace_aggregate_stddev
;
9868 size
= sizeof (uint64_t) * 4;
9872 agg
->dtag_aggregate
= dtrace_aggregate_sum
;
9879 agg
->dtag_action
.dta_rec
.dtrd_size
= size
;
9885 * We must make sure that we have enough actions for the n-tuple.
9887 for (act
= ecb
->dte_action_last
; act
!= NULL
; act
= act
->dta_prev
) {
9888 if (DTRACEACT_ISAGG(act
->dta_kind
))
9891 if (--ntuple
== 0) {
9893 * This is the action with which our n-tuple begins.
9895 agg
->dtag_first
= act
;
9901 * This n-tuple is short by ntuple elements. Return failure.
9903 ASSERT(ntuple
!= 0);
9905 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9910 * If the last action in the tuple has a size of zero, it's actually
9911 * an expression argument for the aggregating action.
9913 ASSERT(ecb
->dte_action_last
!= NULL
);
9914 act
= ecb
->dte_action_last
;
9916 if (act
->dta_kind
== DTRACEACT_DIFEXPR
) {
9917 ASSERT(act
->dta_difo
!= NULL
);
9919 if (act
->dta_difo
->dtdo_rtype
.dtdt_size
== 0)
9920 agg
->dtag_hasarg
= 1;
9924 * We need to allocate an id for this aggregation.
9926 aggid
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1,
9927 VM_BESTFIT
| VM_SLEEP
);
9929 if (aggid
- 1 >= (dtrace_aggid_t
)state
->dts_naggregations
) {
9930 dtrace_aggregation_t
**oaggs
= state
->dts_aggregations
;
9931 dtrace_aggregation_t
**aggs
;
9932 int naggs
= state
->dts_naggregations
<< 1;
9933 int onaggs
= state
->dts_naggregations
;
9935 ASSERT(aggid
== (dtrace_aggid_t
)state
->dts_naggregations
+ 1);
9938 ASSERT(oaggs
== NULL
);
9942 aggs
= kmem_zalloc(naggs
* sizeof (*aggs
), KM_SLEEP
);
9944 if (oaggs
!= NULL
) {
9945 bcopy(oaggs
, aggs
, onaggs
* sizeof (*aggs
));
9946 kmem_free(oaggs
, onaggs
* sizeof (*aggs
));
9949 state
->dts_aggregations
= aggs
;
9950 state
->dts_naggregations
= naggs
;
9953 ASSERT(state
->dts_aggregations
[aggid
- 1] == NULL
);
9954 state
->dts_aggregations
[(agg
->dtag_id
= aggid
) - 1] = agg
;
9956 frec
= &agg
->dtag_first
->dta_rec
;
9957 if (frec
->dtrd_alignment
< sizeof (dtrace_aggid_t
))
9958 frec
->dtrd_alignment
= sizeof (dtrace_aggid_t
);
9960 for (act
= agg
->dtag_first
; act
!= NULL
; act
= act
->dta_next
) {
9961 ASSERT(!act
->dta_intuple
);
9962 act
->dta_intuple
= 1;
9965 return (&agg
->dtag_action
);
9969 dtrace_ecb_aggregation_destroy(dtrace_ecb_t
*ecb
, dtrace_action_t
*act
)
9971 dtrace_aggregation_t
*agg
= (dtrace_aggregation_t
*)act
;
9972 dtrace_state_t
*state
= ecb
->dte_state
;
9973 dtrace_aggid_t aggid
= agg
->dtag_id
;
9975 ASSERT(DTRACEACT_ISAGG(act
->dta_kind
));
9976 vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1);
9978 ASSERT(state
->dts_aggregations
[aggid
- 1] == agg
);
9979 state
->dts_aggregations
[aggid
- 1] = NULL
;
9981 kmem_free(agg
, sizeof (dtrace_aggregation_t
));
9985 dtrace_ecb_action_add(dtrace_ecb_t
*ecb
, dtrace_actdesc_t
*desc
)
9987 dtrace_action_t
*action
, *last
;
9988 dtrace_difo_t
*dp
= desc
->dtad_difo
;
9989 uint32_t size
= 0, align
= sizeof (uint8_t), mask
;
9990 uint16_t format
= 0;
9991 dtrace_recdesc_t
*rec
;
9992 dtrace_state_t
*state
= ecb
->dte_state
;
9993 dtrace_optval_t
*opt
= state
->dts_options
;
9994 dtrace_optval_t nframes
=0, strsize
;
9995 uint64_t arg
= desc
->dtad_arg
;
9997 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
9998 ASSERT(ecb
->dte_action
== NULL
|| ecb
->dte_action
->dta_refcnt
== 1);
10000 if (DTRACEACT_ISAGG(desc
->dtad_kind
)) {
10002 * If this is an aggregating action, there must be neither
10003 * a speculate nor a commit on the action chain.
10005 dtrace_action_t
*act
;
10007 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
10008 if (act
->dta_kind
== DTRACEACT_COMMIT
)
10011 if (act
->dta_kind
== DTRACEACT_SPECULATE
)
10015 action
= dtrace_ecb_aggregation_create(ecb
, desc
);
10017 if (action
== NULL
)
10020 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) ||
10021 (desc
->dtad_kind
== DTRACEACT_DIFEXPR
&&
10022 dp
!= NULL
&& dp
->dtdo_destructive
)) {
10023 state
->dts_destructive
= 1;
10026 switch (desc
->dtad_kind
) {
10027 case DTRACEACT_PRINTF
:
10028 case DTRACEACT_PRINTA
:
10029 case DTRACEACT_SYSTEM
:
10030 case DTRACEACT_FREOPEN
:
10032 * We know that our arg is a string -- turn it into a
10036 ASSERT(desc
->dtad_kind
== DTRACEACT_PRINTA
);
10040 ASSERT(arg
> KERNELBASE
);
10041 format
= dtrace_format_add(state
,
10042 (char *)(uintptr_t)arg
);
10046 case DTRACEACT_LIBACT
:
10047 case DTRACEACT_DIFEXPR
:
10048 case DTRACEACT_TRACEMEM
:
10049 case DTRACEACT_TRACEMEM_DYNSIZE
:
10050 case DTRACEACT_APPLEBINARY
: /* __APPLE__ */
10054 if ((size
= dp
->dtdo_rtype
.dtdt_size
) != 0)
10057 if (dp
->dtdo_rtype
.dtdt_kind
== DIF_TYPE_STRING
) {
10058 if (!(dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10061 size
= opt
[DTRACEOPT_STRSIZE
];
10066 case DTRACEACT_STACK
:
10067 if ((nframes
= arg
) == 0) {
10068 nframes
= opt
[DTRACEOPT_STACKFRAMES
];
10069 ASSERT(nframes
> 0);
10073 size
= nframes
* sizeof (pc_t
);
10076 case DTRACEACT_JSTACK
:
10077 if ((strsize
= DTRACE_USTACK_STRSIZE(arg
)) == 0)
10078 strsize
= opt
[DTRACEOPT_JSTACKSTRSIZE
];
10080 if ((nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0)
10081 nframes
= opt
[DTRACEOPT_JSTACKFRAMES
];
10083 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
10086 case DTRACEACT_USTACK
:
10087 if (desc
->dtad_kind
!= DTRACEACT_JSTACK
&&
10088 (nframes
= DTRACE_USTACK_NFRAMES(arg
)) == 0) {
10089 strsize
= DTRACE_USTACK_STRSIZE(arg
);
10090 nframes
= opt
[DTRACEOPT_USTACKFRAMES
];
10091 ASSERT(nframes
> 0);
10092 arg
= DTRACE_USTACK_ARG(nframes
, strsize
);
10096 * Save a slot for the pid.
10098 size
= (nframes
+ 1) * sizeof (uint64_t);
10099 size
+= DTRACE_USTACK_STRSIZE(arg
);
10100 size
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t)));
10104 case DTRACEACT_SYM
:
10105 case DTRACEACT_MOD
:
10106 if (dp
== NULL
|| ((size
= dp
->dtdo_rtype
.dtdt_size
) !=
10107 sizeof (uint64_t)) ||
10108 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10112 case DTRACEACT_USYM
:
10113 case DTRACEACT_UMOD
:
10114 case DTRACEACT_UADDR
:
10116 (dp
->dtdo_rtype
.dtdt_size
!= sizeof (uint64_t)) ||
10117 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10121 * We have a slot for the pid, plus a slot for the
10122 * argument. To keep things simple (aligned with
10123 * bitness-neutral sizing), we store each as a 64-bit
10126 size
= 2 * sizeof (uint64_t);
10129 case DTRACEACT_STOP
:
10130 case DTRACEACT_BREAKPOINT
:
10131 case DTRACEACT_PANIC
:
10134 case DTRACEACT_CHILL
:
10135 case DTRACEACT_DISCARD
:
10136 case DTRACEACT_RAISE
:
10137 case DTRACEACT_PIDRESUME
: /* __APPLE__ */
10142 case DTRACEACT_EXIT
:
10144 (size
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) ||
10145 (dp
->dtdo_rtype
.dtdt_flags
& DIF_TF_BYREF
))
10149 case DTRACEACT_SPECULATE
:
10150 if (ecb
->dte_size
> sizeof (dtrace_epid_t
))
10156 state
->dts_speculates
= 1;
10159 case DTRACEACT_COMMIT
: {
10160 dtrace_action_t
*act
= ecb
->dte_action
;
10162 for (; act
!= NULL
; act
= act
->dta_next
) {
10163 if (act
->dta_kind
== DTRACEACT_COMMIT
)
10176 if (size
!= 0 || desc
->dtad_kind
== DTRACEACT_SPECULATE
) {
10178 * If this is a data-storing action or a speculate,
10179 * we must be sure that there isn't a commit on the
10182 dtrace_action_t
*act
= ecb
->dte_action
;
10184 for (; act
!= NULL
; act
= act
->dta_next
) {
10185 if (act
->dta_kind
== DTRACEACT_COMMIT
)
10190 action
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
);
10191 action
->dta_rec
.dtrd_size
= size
;
10194 action
->dta_refcnt
= 1;
10195 rec
= &action
->dta_rec
;
10196 size
= rec
->dtrd_size
;
10198 for (mask
= sizeof (uint64_t) - 1; size
!= 0 && mask
> 0; mask
>>= 1) {
10199 if (!(size
& mask
)) {
10205 action
->dta_kind
= desc
->dtad_kind
;
10207 if ((action
->dta_difo
= dp
) != NULL
)
10208 dtrace_difo_hold(dp
);
10210 rec
->dtrd_action
= action
->dta_kind
;
10211 rec
->dtrd_arg
= arg
;
10212 rec
->dtrd_uarg
= desc
->dtad_uarg
;
10213 rec
->dtrd_alignment
= (uint16_t)align
;
10214 rec
->dtrd_format
= format
;
10216 if ((last
= ecb
->dte_action_last
) != NULL
) {
10217 ASSERT(ecb
->dte_action
!= NULL
);
10218 action
->dta_prev
= last
;
10219 last
->dta_next
= action
;
10221 ASSERT(ecb
->dte_action
== NULL
);
10222 ecb
->dte_action
= action
;
10225 ecb
->dte_action_last
= action
;
10231 dtrace_ecb_action_remove(dtrace_ecb_t
*ecb
)
10233 dtrace_action_t
*act
= ecb
->dte_action
, *next
;
10234 dtrace_vstate_t
*vstate
= &ecb
->dte_state
->dts_vstate
;
10238 if (act
!= NULL
&& act
->dta_refcnt
> 1) {
10239 ASSERT(act
->dta_next
== NULL
|| act
->dta_next
->dta_refcnt
== 1);
10242 for (; act
!= NULL
; act
= next
) {
10243 next
= act
->dta_next
;
10244 ASSERT(next
!= NULL
|| act
== ecb
->dte_action_last
);
10245 ASSERT(act
->dta_refcnt
== 1);
10247 if ((format
= act
->dta_rec
.dtrd_format
) != 0)
10248 dtrace_format_remove(ecb
->dte_state
, format
);
10250 if ((dp
= act
->dta_difo
) != NULL
)
10251 dtrace_difo_release(dp
, vstate
);
10253 if (DTRACEACT_ISAGG(act
->dta_kind
)) {
10254 dtrace_ecb_aggregation_destroy(ecb
, act
);
10256 kmem_free(act
, sizeof (dtrace_action_t
));
10261 ecb
->dte_action
= NULL
;
10262 ecb
->dte_action_last
= NULL
;
10263 ecb
->dte_size
= sizeof (dtrace_epid_t
);
10267 dtrace_ecb_disable(dtrace_ecb_t
*ecb
)
10270 * We disable the ECB by removing it from its probe.
10272 dtrace_ecb_t
*pecb
, *prev
= NULL
;
10273 dtrace_probe_t
*probe
= ecb
->dte_probe
;
10275 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10277 if (probe
== NULL
) {
10279 * This is the NULL probe; there is nothing to disable.
10284 for (pecb
= probe
->dtpr_ecb
; pecb
!= NULL
; pecb
= pecb
->dte_next
) {
10290 ASSERT(pecb
!= NULL
);
10292 if (prev
== NULL
) {
10293 probe
->dtpr_ecb
= ecb
->dte_next
;
10295 prev
->dte_next
= ecb
->dte_next
;
10298 if (ecb
== probe
->dtpr_ecb_last
) {
10299 ASSERT(ecb
->dte_next
== NULL
);
10300 probe
->dtpr_ecb_last
= prev
;
10303 probe
->dtpr_provider
->dtpv_ecb_count
--;
10305 * The ECB has been disconnected from the probe; now sync to assure
10306 * that all CPUs have seen the change before returning.
10310 if (probe
->dtpr_ecb
== NULL
) {
10312 * That was the last ECB on the probe; clear the predicate
10313 * cache ID for the probe, disable it and sync one more time
10314 * to assure that we'll never hit it again.
10316 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
10318 ASSERT(ecb
->dte_next
== NULL
);
10319 ASSERT(probe
->dtpr_ecb_last
== NULL
);
10320 probe
->dtpr_predcache
= DTRACE_CACHEIDNONE
;
10321 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
,
10322 probe
->dtpr_id
, probe
->dtpr_arg
);
10326 * There is at least one ECB remaining on the probe. If there
10327 * is _exactly_ one, set the probe's predicate cache ID to be
10328 * the predicate cache ID of the remaining ECB.
10330 ASSERT(probe
->dtpr_ecb_last
!= NULL
);
10331 ASSERT(probe
->dtpr_predcache
== DTRACE_CACHEIDNONE
);
10333 if (probe
->dtpr_ecb
== probe
->dtpr_ecb_last
) {
10334 dtrace_predicate_t
*p
= probe
->dtpr_ecb
->dte_predicate
;
10336 ASSERT(probe
->dtpr_ecb
->dte_next
== NULL
);
10339 probe
->dtpr_predcache
= p
->dtp_cacheid
;
10342 ecb
->dte_next
= NULL
;
10347 dtrace_ecb_destroy(dtrace_ecb_t
*ecb
)
10349 dtrace_state_t
*state
= ecb
->dte_state
;
10350 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
10351 dtrace_predicate_t
*pred
;
10352 dtrace_epid_t epid
= ecb
->dte_epid
;
10354 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10355 ASSERT(ecb
->dte_next
== NULL
);
10356 ASSERT(ecb
->dte_probe
== NULL
|| ecb
->dte_probe
->dtpr_ecb
!= ecb
);
10358 if ((pred
= ecb
->dte_predicate
) != NULL
)
10359 dtrace_predicate_release(pred
, vstate
);
10361 dtrace_ecb_action_remove(ecb
);
10363 ASSERT(state
->dts_ecbs
[epid
- 1] == ecb
);
10364 state
->dts_ecbs
[epid
- 1] = NULL
;
10366 kmem_free(ecb
, sizeof (dtrace_ecb_t
));
10369 static dtrace_ecb_t
*
10370 dtrace_ecb_create(dtrace_state_t
*state
, dtrace_probe_t
*probe
,
10371 dtrace_enabling_t
*enab
)
10374 dtrace_predicate_t
*pred
;
10375 dtrace_actdesc_t
*act
;
10376 dtrace_provider_t
*prov
;
10377 dtrace_ecbdesc_t
*desc
= enab
->dten_current
;
10379 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10380 ASSERT(state
!= NULL
);
10382 ecb
= dtrace_ecb_add(state
, probe
);
10383 ecb
->dte_uarg
= desc
->dted_uarg
;
10385 if ((pred
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) {
10386 dtrace_predicate_hold(pred
);
10387 ecb
->dte_predicate
= pred
;
10390 if (probe
!= NULL
) {
10392 * If the provider shows more leg than the consumer is old
10393 * enough to see, we need to enable the appropriate implicit
10394 * predicate bits to prevent the ecb from activating at
10397 * Providers specifying DTRACE_PRIV_USER at register time
10398 * are stating that they need the /proc-style privilege
10399 * model to be enforced, and this is what DTRACE_COND_OWNER
10400 * and DTRACE_COND_ZONEOWNER will then do at probe time.
10402 prov
= probe
->dtpr_provider
;
10403 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLPROC
) &&
10404 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
10405 ecb
->dte_cond
|= DTRACE_COND_OWNER
;
10407 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_ALLZONE
) &&
10408 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_USER
))
10409 ecb
->dte_cond
|= DTRACE_COND_ZONEOWNER
;
10412 * If the provider shows us kernel innards and the user
10413 * is lacking sufficient privilege, enable the
10414 * DTRACE_COND_USERMODE implicit predicate.
10416 if (!(state
->dts_cred
.dcr_visible
& DTRACE_CRV_KERNEL
) &&
10417 (prov
->dtpv_priv
.dtpp_flags
& DTRACE_PRIV_KERNEL
))
10418 ecb
->dte_cond
|= DTRACE_COND_USERMODE
;
10421 if (dtrace_ecb_create_cache
!= NULL
) {
10423 * If we have a cached ecb, we'll use its action list instead
10424 * of creating our own (saving both time and space).
10426 dtrace_ecb_t
*cached
= dtrace_ecb_create_cache
;
10427 dtrace_action_t
*act_if
= cached
->dte_action
;
10429 if (act_if
!= NULL
) {
10430 ASSERT(act_if
->dta_refcnt
> 0);
10431 act_if
->dta_refcnt
++;
10432 ecb
->dte_action
= act_if
;
10433 ecb
->dte_action_last
= cached
->dte_action_last
;
10434 ecb
->dte_needed
= cached
->dte_needed
;
10435 ecb
->dte_size
= cached
->dte_size
;
10436 ecb
->dte_alignment
= cached
->dte_alignment
;
10442 for (act
= desc
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
10443 if ((enab
->dten_error
= dtrace_ecb_action_add(ecb
, act
)) != 0) {
10444 dtrace_ecb_destroy(ecb
);
10449 dtrace_ecb_resize(ecb
);
10451 return (dtrace_ecb_create_cache
= ecb
);
10455 dtrace_ecb_create_enable(dtrace_probe_t
*probe
, void *arg
)
10458 dtrace_enabling_t
*enab
= arg
;
10459 dtrace_state_t
*state
= enab
->dten_vstate
->dtvs_state
;
10461 ASSERT(state
!= NULL
);
10463 if (probe
!= NULL
&& probe
->dtpr_gen
< enab
->dten_probegen
) {
10465 * This probe was created in a generation for which this
10466 * enabling has previously created ECBs; we don't want to
10467 * enable it again, so just kick out.
10469 return (DTRACE_MATCH_NEXT
);
10472 if ((ecb
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
)
10473 return (DTRACE_MATCH_DONE
);
10475 if (dtrace_ecb_enable(ecb
) < 0)
10476 return (DTRACE_MATCH_FAIL
);
10478 return (DTRACE_MATCH_NEXT
);
10481 static dtrace_ecb_t
*
10482 dtrace_epid2ecb(dtrace_state_t
*state
, dtrace_epid_t id
)
10485 #pragma unused(ecb) /* __APPLE__ */
10487 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10489 if (id
== 0 || id
> (dtrace_epid_t
)state
->dts_necbs
)
10492 ASSERT(state
->dts_necbs
> 0 && state
->dts_ecbs
!= NULL
);
10493 ASSERT((ecb
= state
->dts_ecbs
[id
- 1]) == NULL
|| ecb
->dte_epid
== id
);
10495 return (state
->dts_ecbs
[id
- 1]);
10498 static dtrace_aggregation_t
*
10499 dtrace_aggid2agg(dtrace_state_t
*state
, dtrace_aggid_t id
)
10501 dtrace_aggregation_t
*agg
;
10502 #pragma unused(agg) /* __APPLE__ */
10504 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10506 if (id
== 0 || id
> (dtrace_aggid_t
)state
->dts_naggregations
)
10509 ASSERT(state
->dts_naggregations
> 0 && state
->dts_aggregations
!= NULL
);
10510 ASSERT((agg
= state
->dts_aggregations
[id
- 1]) == NULL
||
10511 agg
->dtag_id
== id
);
10513 return (state
->dts_aggregations
[id
- 1]);
10517 * DTrace Buffer Functions
10519 * The following functions manipulate DTrace buffers. Most of these functions
10520 * are called in the context of establishing or processing consumer state;
10521 * exceptions are explicitly noted.
10525 * Note: called from cross call context. This function switches the two
10526 * buffers on a given CPU. The atomicity of this operation is assured by
10527 * disabling interrupts while the actual switch takes place; the disabling of
10528 * interrupts serializes the execution with any execution of dtrace_probe() on
10532 dtrace_buffer_switch(dtrace_buffer_t
*buf
)
10534 caddr_t tomax
= buf
->dtb_tomax
;
10535 caddr_t xamot
= buf
->dtb_xamot
;
10536 dtrace_icookie_t cookie
;
10538 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
10539 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_RING
));
10541 cookie
= dtrace_interrupt_disable();
10542 buf
->dtb_tomax
= xamot
;
10543 buf
->dtb_xamot
= tomax
;
10544 buf
->dtb_xamot_drops
= buf
->dtb_drops
;
10545 buf
->dtb_xamot_offset
= buf
->dtb_offset
;
10546 buf
->dtb_xamot_errors
= buf
->dtb_errors
;
10547 buf
->dtb_xamot_flags
= buf
->dtb_flags
;
10548 buf
->dtb_offset
= 0;
10549 buf
->dtb_drops
= 0;
10550 buf
->dtb_errors
= 0;
10551 buf
->dtb_flags
&= ~(DTRACEBUF_ERROR
| DTRACEBUF_DROPPED
);
10552 dtrace_interrupt_enable(cookie
);
10556 * Note: called from cross call context. This function activates a buffer
10557 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
10558 * is guaranteed by the disabling of interrupts.
10561 dtrace_buffer_activate(dtrace_state_t
*state
)
10563 dtrace_buffer_t
*buf
;
10564 dtrace_icookie_t cookie
= dtrace_interrupt_disable();
10566 buf
= &state
->dts_buffer
[CPU
->cpu_id
];
10568 if (buf
->dtb_tomax
!= NULL
) {
10570 * We might like to assert that the buffer is marked inactive,
10571 * but this isn't necessarily true: the buffer for the CPU
10572 * that processes the BEGIN probe has its buffer activated
10573 * manually. In this case, we take the (harmless) action
10574 * re-clearing the bit INACTIVE bit.
10576 buf
->dtb_flags
&= ~DTRACEBUF_INACTIVE
;
10579 dtrace_interrupt_enable(cookie
);
10583 dtrace_buffer_canalloc(size_t size
)
10585 if (size
> (UINT64_MAX
- dtrace_buffer_memory_inuse
))
10587 if ((size
+ dtrace_buffer_memory_inuse
) > dtrace_buffer_memory_maxsize
)
10594 dtrace_buffer_alloc(dtrace_buffer_t
*bufs
, size_t size
, int flags
,
10598 dtrace_buffer_t
*buf
;
10599 size_t size_before_alloc
= dtrace_buffer_memory_inuse
;
10601 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
10602 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10604 if (size
> (size_t)dtrace_nonroot_maxsize
&&
10605 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
))
10611 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
10614 buf
= &bufs
[cp
->cpu_id
];
10617 * If there is already a buffer allocated for this CPU, it
10618 * is only possible that this is a DR event. In this case,
10619 * the buffer size must match our specified size.
10621 if (buf
->dtb_tomax
!= NULL
) {
10622 ASSERT(buf
->dtb_size
== size
);
10626 ASSERT(buf
->dtb_xamot
== NULL
);
10628 /* DTrace, please do not eat all the memory. */
10629 if (dtrace_buffer_canalloc(size
) == B_FALSE
)
10631 if ((buf
->dtb_tomax
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
10633 dtrace_buffer_memory_inuse
+= size
;
10635 buf
->dtb_size
= size
;
10636 buf
->dtb_flags
= flags
;
10637 buf
->dtb_offset
= 0;
10638 buf
->dtb_drops
= 0;
10640 if (flags
& DTRACEBUF_NOSWITCH
)
10643 /* DTrace, please do not eat all the memory. */
10644 if (dtrace_buffer_canalloc(size
) == B_FALSE
)
10646 if ((buf
->dtb_xamot
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
10648 dtrace_buffer_memory_inuse
+= size
;
10649 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10651 ASSERT(dtrace_buffer_memory_inuse
<= dtrace_buffer_memory_maxsize
);
10659 if (cpu
!= DTRACE_CPUALL
&& cpu
!= cp
->cpu_id
)
10662 buf
= &bufs
[cp
->cpu_id
];
10664 if (buf
->dtb_xamot
!= NULL
) {
10665 ASSERT(buf
->dtb_tomax
!= NULL
);
10666 ASSERT(buf
->dtb_size
== size
);
10667 kmem_free(buf
->dtb_xamot
, size
);
10670 if (buf
->dtb_tomax
!= NULL
) {
10671 ASSERT(buf
->dtb_size
== size
);
10672 kmem_free(buf
->dtb_tomax
, size
);
10675 buf
->dtb_tomax
= NULL
;
10676 buf
->dtb_xamot
= NULL
;
10678 } while ((cp
= cp
->cpu_next
) != cpu_list
);
10680 /* Restore the size saved before allocating memory */
10681 dtrace_buffer_memory_inuse
= size_before_alloc
;
10687 * Note: called from probe context. This function just increments the drop
10688 * count on a buffer. It has been made a function to allow for the
10689 * possibility of understanding the source of mysterious drop counts. (A
10690 * problem for which one may be particularly disappointed that DTrace cannot
10691 * be used to understand DTrace.)
10694 dtrace_buffer_drop(dtrace_buffer_t
*buf
)
10700 * Note: called from probe context. This function is called to reserve space
10701 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
10702 * mstate. Returns the new offset in the buffer, or a negative value if an
10703 * error has occurred.
10706 dtrace_buffer_reserve(dtrace_buffer_t
*buf
, size_t needed
, size_t align
,
10707 dtrace_state_t
*state
, dtrace_mstate_t
*mstate
)
10709 intptr_t offs
= buf
->dtb_offset
, soffs
;
10714 if (buf
->dtb_flags
& DTRACEBUF_INACTIVE
)
10717 if ((tomax
= buf
->dtb_tomax
) == NULL
) {
10718 dtrace_buffer_drop(buf
);
10722 if (!(buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
))) {
10723 while (offs
& (align
- 1)) {
10725 * Assert that our alignment is off by a number which
10726 * is itself sizeof (uint32_t) aligned.
10728 ASSERT(!((align
- (offs
& (align
- 1))) &
10729 (sizeof (uint32_t) - 1)));
10730 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10731 offs
+= sizeof (uint32_t);
10734 if ((uint64_t)(soffs
= offs
+ needed
) > buf
->dtb_size
) {
10735 dtrace_buffer_drop(buf
);
10739 if (mstate
== NULL
)
10742 mstate
->dtms_scratch_base
= (uintptr_t)tomax
+ soffs
;
10743 mstate
->dtms_scratch_size
= buf
->dtb_size
- soffs
;
10744 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10749 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10750 if (state
->dts_activity
!= DTRACE_ACTIVITY_COOLDOWN
&&
10751 (buf
->dtb_flags
& DTRACEBUF_FULL
))
10756 total_off
= needed
+ (offs
& (align
- 1));
10759 * For a ring buffer, life is quite a bit more complicated. Before
10760 * we can store any padding, we need to adjust our wrapping offset.
10761 * (If we've never before wrapped or we're not about to, no adjustment
10764 if ((buf
->dtb_flags
& DTRACEBUF_WRAPPED
) ||
10765 offs
+ total_off
> buf
->dtb_size
) {
10766 woffs
= buf
->dtb_xamot_offset
;
10768 if (offs
+ total_off
> buf
->dtb_size
) {
10770 * We can't fit in the end of the buffer. First, a
10771 * sanity check that we can fit in the buffer at all.
10773 if (total_off
> buf
->dtb_size
) {
10774 dtrace_buffer_drop(buf
);
10779 * We're going to be storing at the top of the buffer,
10780 * so now we need to deal with the wrapped offset. We
10781 * only reset our wrapped offset to 0 if it is
10782 * currently greater than the current offset. If it
10783 * is less than the current offset, it is because a
10784 * previous allocation induced a wrap -- but the
10785 * allocation didn't subsequently take the space due
10786 * to an error or false predicate evaluation. In this
10787 * case, we'll just leave the wrapped offset alone: if
10788 * the wrapped offset hasn't been advanced far enough
10789 * for this allocation, it will be adjusted in the
10792 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
10800 * Now we know that we're going to be storing to the
10801 * top of the buffer and that there is room for us
10802 * there. We need to clear the buffer from the current
10803 * offset to the end (there may be old gunk there).
10805 while ((uint64_t)offs
< buf
->dtb_size
)
10809 * We need to set our offset to zero. And because we
10810 * are wrapping, we need to set the bit indicating as
10811 * much. We can also adjust our needed space back
10812 * down to the space required by the ECB -- we know
10813 * that the top of the buffer is aligned.
10816 total_off
= needed
;
10817 buf
->dtb_flags
|= DTRACEBUF_WRAPPED
;
10820 * There is room for us in the buffer, so we simply
10821 * need to check the wrapped offset.
10823 if (woffs
< offs
) {
10825 * The wrapped offset is less than the offset.
10826 * This can happen if we allocated buffer space
10827 * that induced a wrap, but then we didn't
10828 * subsequently take the space due to an error
10829 * or false predicate evaluation. This is
10830 * okay; we know that _this_ allocation isn't
10831 * going to induce a wrap. We still can't
10832 * reset the wrapped offset to be zero,
10833 * however: the space may have been trashed in
10834 * the previous failed probe attempt. But at
10835 * least the wrapped offset doesn't need to
10836 * be adjusted at all...
10842 while (offs
+ total_off
> (size_t)woffs
) {
10843 dtrace_epid_t epid
= *(uint32_t *)(tomax
+ woffs
);
10846 if (epid
== DTRACE_EPIDNONE
) {
10847 size
= sizeof (uint32_t);
10849 ASSERT(epid
<= (dtrace_epid_t
)state
->dts_necbs
);
10850 ASSERT(state
->dts_ecbs
[epid
- 1] != NULL
);
10852 size
= state
->dts_ecbs
[epid
- 1]->dte_size
;
10855 ASSERT(woffs
+ size
<= buf
->dtb_size
);
10858 if (woffs
+ size
== buf
->dtb_size
) {
10860 * We've reached the end of the buffer; we want
10861 * to set the wrapped offset to 0 and break
10862 * out. However, if the offs is 0, then we're
10863 * in a strange edge-condition: the amount of
10864 * space that we want to reserve plus the size
10865 * of the record that we're overwriting is
10866 * greater than the size of the buffer. This
10867 * is problematic because if we reserve the
10868 * space but subsequently don't consume it (due
10869 * to a failed predicate or error) the wrapped
10870 * offset will be 0 -- yet the EPID at offset 0
10871 * will not be committed. This situation is
10872 * relatively easy to deal with: if we're in
10873 * this case, the buffer is indistinguishable
10874 * from one that hasn't wrapped; we need only
10875 * finish the job by clearing the wrapped bit,
10876 * explicitly setting the offset to be 0, and
10877 * zero'ing out the old data in the buffer.
10880 buf
->dtb_flags
&= ~DTRACEBUF_WRAPPED
;
10881 buf
->dtb_offset
= 0;
10884 while ((uint64_t)woffs
< buf
->dtb_size
)
10885 tomax
[woffs
++] = 0;
10896 * We have a wrapped offset. It may be that the wrapped offset
10897 * has become zero -- that's okay.
10899 buf
->dtb_xamot_offset
= woffs
;
10904 * Now we can plow the buffer with any necessary padding.
10906 while (offs
& (align
- 1)) {
10908 * Assert that our alignment is off by a number which
10909 * is itself sizeof (uint32_t) aligned.
10911 ASSERT(!((align
- (offs
& (align
- 1))) &
10912 (sizeof (uint32_t) - 1)));
10913 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
);
10914 offs
+= sizeof (uint32_t);
10917 if (buf
->dtb_flags
& DTRACEBUF_FILL
) {
10918 if (offs
+ needed
> buf
->dtb_size
- state
->dts_reserve
) {
10919 buf
->dtb_flags
|= DTRACEBUF_FULL
;
10924 if (mstate
== NULL
)
10928 * For ring buffers and fill buffers, the scratch space is always
10929 * the inactive buffer.
10931 mstate
->dtms_scratch_base
= (uintptr_t)buf
->dtb_xamot
;
10932 mstate
->dtms_scratch_size
= buf
->dtb_size
;
10933 mstate
->dtms_scratch_ptr
= mstate
->dtms_scratch_base
;
10939 dtrace_buffer_polish(dtrace_buffer_t
*buf
)
10941 ASSERT(buf
->dtb_flags
& DTRACEBUF_RING
);
10942 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
10944 if (!(buf
->dtb_flags
& DTRACEBUF_WRAPPED
))
10948 * We need to polish the ring buffer. There are three cases:
10950 * - The first (and presumably most common) is that there is no gap
10951 * between the buffer offset and the wrapped offset. In this case,
10952 * there is nothing in the buffer that isn't valid data; we can
10953 * mark the buffer as polished and return.
10955 * - The second (less common than the first but still more common
10956 * than the third) is that there is a gap between the buffer offset
10957 * and the wrapped offset, and the wrapped offset is larger than the
10958 * buffer offset. This can happen because of an alignment issue, or
10959 * can happen because of a call to dtrace_buffer_reserve() that
10960 * didn't subsequently consume the buffer space. In this case,
10961 * we need to zero the data from the buffer offset to the wrapped
10964 * - The third (and least common) is that there is a gap between the
10965 * buffer offset and the wrapped offset, but the wrapped offset is
10966 * _less_ than the buffer offset. This can only happen because a
10967 * call to dtrace_buffer_reserve() induced a wrap, but the space
10968 * was not subsequently consumed. In this case, we need to zero the
10969 * space from the offset to the end of the buffer _and_ from the
10970 * top of the buffer to the wrapped offset.
10972 if (buf
->dtb_offset
< buf
->dtb_xamot_offset
) {
10973 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10974 buf
->dtb_xamot_offset
- buf
->dtb_offset
);
10977 if (buf
->dtb_offset
> buf
->dtb_xamot_offset
) {
10978 bzero(buf
->dtb_tomax
+ buf
->dtb_offset
,
10979 buf
->dtb_size
- buf
->dtb_offset
);
10980 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
);
10985 dtrace_buffer_free(dtrace_buffer_t
*bufs
)
10989 for (i
= 0; i
< (int)NCPU
; i
++) {
10990 dtrace_buffer_t
*buf
= &bufs
[i
];
10992 if (buf
->dtb_tomax
== NULL
) {
10993 ASSERT(buf
->dtb_xamot
== NULL
);
10994 ASSERT(buf
->dtb_size
== 0);
10998 if (buf
->dtb_xamot
!= NULL
) {
10999 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
11000 kmem_free(buf
->dtb_xamot
, buf
->dtb_size
);
11002 ASSERT(dtrace_buffer_memory_inuse
>= buf
->dtb_size
);
11003 dtrace_buffer_memory_inuse
-= buf
->dtb_size
;
11006 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
);
11007 ASSERT(dtrace_buffer_memory_inuse
>= buf
->dtb_size
);
11008 dtrace_buffer_memory_inuse
-= buf
->dtb_size
;
11011 buf
->dtb_tomax
= NULL
;
11012 buf
->dtb_xamot
= NULL
;
11017 * DTrace Enabling Functions
11019 static dtrace_enabling_t
*
11020 dtrace_enabling_create(dtrace_vstate_t
*vstate
)
11022 dtrace_enabling_t
*enab
;
11024 enab
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
);
11025 enab
->dten_vstate
= vstate
;
11031 dtrace_enabling_add(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
)
11033 dtrace_ecbdesc_t
**ndesc
;
11034 size_t osize
, nsize
;
11037 * We can't add to enablings after we've enabled them, or after we've
11040 ASSERT(enab
->dten_probegen
== 0);
11041 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
11043 /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */
11044 if (ecb
== NULL
) return;
11046 if (enab
->dten_ndesc
< enab
->dten_maxdesc
) {
11047 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
11051 osize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
11053 if (enab
->dten_maxdesc
== 0) {
11054 enab
->dten_maxdesc
= 1;
11056 enab
->dten_maxdesc
<<= 1;
11059 ASSERT(enab
->dten_ndesc
< enab
->dten_maxdesc
);
11061 nsize
= enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*);
11062 ndesc
= kmem_zalloc(nsize
, KM_SLEEP
);
11063 bcopy(enab
->dten_desc
, ndesc
, osize
);
11064 kmem_free(enab
->dten_desc
, osize
);
11066 enab
->dten_desc
= ndesc
;
11067 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
;
11071 dtrace_enabling_addlike(dtrace_enabling_t
*enab
, dtrace_ecbdesc_t
*ecb
,
11072 dtrace_probedesc_t
*pd
)
11074 dtrace_ecbdesc_t
*new;
11075 dtrace_predicate_t
*pred
;
11076 dtrace_actdesc_t
*act
;
11079 * We're going to create a new ECB description that matches the
11080 * specified ECB in every way, but has the specified probe description.
11082 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
11084 if ((pred
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
)
11085 dtrace_predicate_hold(pred
);
11087 for (act
= ecb
->dted_action
; act
!= NULL
; act
= act
->dtad_next
)
11088 dtrace_actdesc_hold(act
);
11090 new->dted_action
= ecb
->dted_action
;
11091 new->dted_pred
= ecb
->dted_pred
;
11092 new->dted_probe
= *pd
;
11093 new->dted_uarg
= ecb
->dted_uarg
;
11095 dtrace_enabling_add(enab
, new);
11099 dtrace_enabling_dump(dtrace_enabling_t
*enab
)
11103 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11104 dtrace_probedesc_t
*desc
= &enab
->dten_desc
[i
]->dted_probe
;
11106 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
,
11107 desc
->dtpd_provider
, desc
->dtpd_mod
,
11108 desc
->dtpd_func
, desc
->dtpd_name
);
11113 dtrace_enabling_destroy(dtrace_enabling_t
*enab
)
11116 dtrace_ecbdesc_t
*ep
;
11117 dtrace_vstate_t
*vstate
= enab
->dten_vstate
;
11119 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11121 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11122 dtrace_actdesc_t
*act
, *next
;
11123 dtrace_predicate_t
*pred
;
11125 ep
= enab
->dten_desc
[i
];
11127 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
)
11128 dtrace_predicate_release(pred
, vstate
);
11130 for (act
= ep
->dted_action
; act
!= NULL
; act
= next
) {
11131 next
= act
->dtad_next
;
11132 dtrace_actdesc_release(act
, vstate
);
11135 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
11138 kmem_free(enab
->dten_desc
,
11139 enab
->dten_maxdesc
* sizeof (dtrace_enabling_t
*));
11142 * If this was a retained enabling, decrement the dts_nretained count
11143 * and take it off of the dtrace_retained list.
11145 if (enab
->dten_prev
!= NULL
|| enab
->dten_next
!= NULL
||
11146 dtrace_retained
== enab
) {
11147 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11148 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained
> 0);
11149 enab
->dten_vstate
->dtvs_state
->dts_nretained
--;
11150 dtrace_retained_gen
++;
11153 if (enab
->dten_prev
== NULL
) {
11154 if (dtrace_retained
== enab
) {
11155 dtrace_retained
= enab
->dten_next
;
11157 if (dtrace_retained
!= NULL
)
11158 dtrace_retained
->dten_prev
= NULL
;
11161 ASSERT(enab
!= dtrace_retained
);
11162 ASSERT(dtrace_retained
!= NULL
);
11163 enab
->dten_prev
->dten_next
= enab
->dten_next
;
11166 if (enab
->dten_next
!= NULL
) {
11167 ASSERT(dtrace_retained
!= NULL
);
11168 enab
->dten_next
->dten_prev
= enab
->dten_prev
;
11171 kmem_free(enab
, sizeof (dtrace_enabling_t
));
11175 dtrace_enabling_retain(dtrace_enabling_t
*enab
)
11177 dtrace_state_t
*state
;
11179 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11180 ASSERT(enab
->dten_next
== NULL
&& enab
->dten_prev
== NULL
);
11181 ASSERT(enab
->dten_vstate
!= NULL
);
11183 state
= enab
->dten_vstate
->dtvs_state
;
11184 ASSERT(state
!= NULL
);
11187 * We only allow each state to retain dtrace_retain_max enablings.
11189 if (state
->dts_nretained
>= dtrace_retain_max
)
11192 state
->dts_nretained
++;
11193 dtrace_retained_gen
++;
11195 if (dtrace_retained
== NULL
) {
11196 dtrace_retained
= enab
;
11200 enab
->dten_next
= dtrace_retained
;
11201 dtrace_retained
->dten_prev
= enab
;
11202 dtrace_retained
= enab
;
11208 dtrace_enabling_replicate(dtrace_state_t
*state
, dtrace_probedesc_t
*match
,
11209 dtrace_probedesc_t
*create
)
11211 dtrace_enabling_t
*new, *enab
;
11212 int found
= 0, err
= ENOENT
;
11214 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11215 ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
);
11216 ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
);
11217 ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
);
11218 ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
);
11220 new = dtrace_enabling_create(&state
->dts_vstate
);
11223 * Iterate over all retained enablings, looking for enablings that
11224 * match the specified state.
11226 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
11230 * dtvs_state can only be NULL for helper enablings -- and
11231 * helper enablings can't be retained.
11233 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11235 if (enab
->dten_vstate
->dtvs_state
!= state
)
11239 * Now iterate over each probe description; we're looking for
11240 * an exact match to the specified probe description.
11242 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11243 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
11244 dtrace_probedesc_t
*pd
= &ep
->dted_probe
;
11246 /* APPLE NOTE: Darwin employs size bounded string operation. */
11247 if (strncmp(pd
->dtpd_provider
, match
->dtpd_provider
, DTRACE_PROVNAMELEN
))
11250 if (strncmp(pd
->dtpd_mod
, match
->dtpd_mod
, DTRACE_MODNAMELEN
))
11253 if (strncmp(pd
->dtpd_func
, match
->dtpd_func
, DTRACE_FUNCNAMELEN
))
11256 if (strncmp(pd
->dtpd_name
, match
->dtpd_name
, DTRACE_NAMELEN
))
11260 * We have a winning probe! Add it to our growing
11264 dtrace_enabling_addlike(new, ep
, create
);
11268 if (!found
|| (err
= dtrace_enabling_retain(new)) != 0) {
11269 dtrace_enabling_destroy(new);
11277 dtrace_enabling_retract(dtrace_state_t
*state
)
11279 dtrace_enabling_t
*enab
, *next
;
11281 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11284 * Iterate over all retained enablings, destroy the enablings retained
11285 * for the specified state.
11287 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= next
) {
11288 next
= enab
->dten_next
;
11291 * dtvs_state can only be NULL for helper enablings -- and
11292 * helper enablings can't be retained.
11294 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11296 if (enab
->dten_vstate
->dtvs_state
== state
) {
11297 ASSERT(state
->dts_nretained
> 0);
11298 dtrace_enabling_destroy(enab
);
11302 ASSERT(state
->dts_nretained
== 0);
11306 dtrace_enabling_match(dtrace_enabling_t
*enab
, int *nmatched
)
11309 int total_matched
= 0, matched
= 0;
11311 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
11312 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11314 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11315 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
11317 enab
->dten_current
= ep
;
11318 enab
->dten_error
= 0;
11321 * If a provider failed to enable a probe then get out and
11322 * let the consumer know we failed.
11324 if ((matched
= dtrace_probe_enable(&ep
->dted_probe
, enab
)) < 0)
11327 total_matched
+= matched
;
11329 if (enab
->dten_error
!= 0) {
11331 * If we get an error half-way through enabling the
11332 * probes, we kick out -- perhaps with some number of
11333 * them enabled. Leaving enabled probes enabled may
11334 * be slightly confusing for user-level, but we expect
11335 * that no one will attempt to actually drive on in
11336 * the face of such errors. If this is an anonymous
11337 * enabling (indicated with a NULL nmatched pointer),
11338 * we cmn_err() a message. We aren't expecting to
11339 * get such an error -- such as it can exist at all,
11340 * it would be a result of corrupted DOF in the driver
11343 if (nmatched
== NULL
) {
11344 cmn_err(CE_WARN
, "dtrace_enabling_match() "
11345 "error on %p: %d", (void *)ep
,
11349 return (enab
->dten_error
);
11353 enab
->dten_probegen
= dtrace_probegen
;
11354 if (nmatched
!= NULL
)
11355 *nmatched
= total_matched
;
11361 dtrace_enabling_matchall(void)
11363 dtrace_enabling_t
*enab
;
11365 lck_mtx_lock(&cpu_lock
);
11366 lck_mtx_lock(&dtrace_lock
);
11369 * Iterate over all retained enablings to see if any probes match
11370 * against them. We only perform this operation on enablings for which
11371 * we have sufficient permissions by virtue of being in the global zone
11372 * or in the same zone as the DTrace client. Because we can be called
11373 * after dtrace_detach() has been called, we cannot assert that there
11374 * are retained enablings. We can safely load from dtrace_retained,
11375 * however: the taskq_destroy() at the end of dtrace_detach() will
11376 * block pending our completion.
11380 * Darwin doesn't do zones.
11381 * Behave as if always in "global" zone."
11383 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
11384 (void) dtrace_enabling_match(enab
, NULL
);
11387 lck_mtx_unlock(&dtrace_lock
);
11388 lck_mtx_unlock(&cpu_lock
);
11392 * If an enabling is to be enabled without having matched probes (that is, if
11393 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
11394 * enabling must be _primed_ by creating an ECB for every ECB description.
11395 * This must be done to assure that we know the number of speculations, the
11396 * number of aggregations, the minimum buffer size needed, etc. before we
11397 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
11398 * enabling any probes, we create ECBs for every ECB decription, but with a
11399 * NULL probe -- which is exactly what this function does.
11402 dtrace_enabling_prime(dtrace_state_t
*state
)
11404 dtrace_enabling_t
*enab
;
11407 for (enab
= dtrace_retained
; enab
!= NULL
; enab
= enab
->dten_next
) {
11408 ASSERT(enab
->dten_vstate
->dtvs_state
!= NULL
);
11410 if (enab
->dten_vstate
->dtvs_state
!= state
)
11414 * We don't want to prime an enabling more than once, lest
11415 * we allow a malicious user to induce resource exhaustion.
11416 * (The ECBs that result from priming an enabling aren't
11417 * leaked -- but they also aren't deallocated until the
11418 * consumer state is destroyed.)
11420 if (enab
->dten_primed
)
11423 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11424 enab
->dten_current
= enab
->dten_desc
[i
];
11425 (void) dtrace_probe_enable(NULL
, enab
);
11428 enab
->dten_primed
= 1;
11433 * Called to indicate that probes should be provided due to retained
11434 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
11435 * must take an initial lap through the enabling calling the dtps_provide()
11436 * entry point explicitly to allow for autocreated probes.
11439 dtrace_enabling_provide(dtrace_provider_t
*prv
)
11442 dtrace_probedesc_t desc
;
11443 dtrace_genid_t gen
;
11445 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11446 lck_mtx_assert(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
);
11450 prv
= dtrace_provider
;
11454 dtrace_enabling_t
*enab
;
11455 void *parg
= prv
->dtpv_arg
;
11458 gen
= dtrace_retained_gen
;
11459 for (enab
= dtrace_retained
; enab
!= NULL
;
11460 enab
= enab
->dten_next
) {
11461 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
11462 desc
= enab
->dten_desc
[i
]->dted_probe
;
11463 lck_mtx_unlock(&dtrace_lock
);
11464 prv
->dtpv_pops
.dtps_provide(parg
, &desc
);
11465 lck_mtx_lock(&dtrace_lock
);
11467 * Process the retained enablings again if
11468 * they have changed while we weren't holding
11471 if (gen
!= dtrace_retained_gen
)
11475 } while (all
&& (prv
= prv
->dtpv_next
) != NULL
);
11477 lck_mtx_unlock(&dtrace_lock
);
11478 dtrace_probe_provide(NULL
, all
? NULL
: prv
);
11479 lck_mtx_lock(&dtrace_lock
);
11483 * DTrace DOF Functions
11487 dtrace_dof_error(dof_hdr_t
*dof
, const char *str
)
11489 #pragma unused(dof) /* __APPLE__ */
11490 if (dtrace_err_verbose
)
11491 cmn_err(CE_WARN
, "failed to process DOF: %s", str
);
11493 #ifdef DTRACE_ERRDEBUG
11494 dtrace_errdebug(str
);
11499 * Create DOF out of a currently enabled state. Right now, we only create
11500 * DOF containing the run-time options -- but this could be expanded to create
11501 * complete DOF representing the enabled state.
11504 dtrace_dof_create(dtrace_state_t
*state
)
11508 dof_optdesc_t
*opt
;
11509 int i
, len
= sizeof (dof_hdr_t
) +
11510 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) +
11511 sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
11513 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
11515 dof
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
);
11516 dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
;
11517 dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
;
11518 dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
;
11519 dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
;
11521 dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
;
11522 dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
;
11523 dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
;
11524 dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
;
11525 dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
;
11526 dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
;
11528 dof
->dofh_flags
= 0;
11529 dof
->dofh_hdrsize
= sizeof (dof_hdr_t
);
11530 dof
->dofh_secsize
= sizeof (dof_sec_t
);
11531 dof
->dofh_secnum
= 1; /* only DOF_SECT_OPTDESC */
11532 dof
->dofh_secoff
= sizeof (dof_hdr_t
);
11533 dof
->dofh_loadsz
= len
;
11534 dof
->dofh_filesz
= len
;
11538 * Fill in the option section header...
11540 sec
= (dof_sec_t
*)((uintptr_t)dof
+ sizeof (dof_hdr_t
));
11541 sec
->dofs_type
= DOF_SECT_OPTDESC
;
11542 sec
->dofs_align
= sizeof (uint64_t);
11543 sec
->dofs_flags
= DOF_SECF_LOAD
;
11544 sec
->dofs_entsize
= sizeof (dof_optdesc_t
);
11546 opt
= (dof_optdesc_t
*)((uintptr_t)sec
+
11547 roundup(sizeof (dof_sec_t
), sizeof (uint64_t)));
11549 sec
->dofs_offset
= (uintptr_t)opt
- (uintptr_t)dof
;
11550 sec
->dofs_size
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
;
11552 for (i
= 0; i
< DTRACEOPT_MAX
; i
++) {
11553 opt
[i
].dofo_option
= i
;
11554 opt
[i
].dofo_strtab
= DOF_SECIDX_NONE
;
11555 opt
[i
].dofo_value
= state
->dts_options
[i
];
11562 dtrace_dof_copyin(user_addr_t uarg
, int *errp
)
11564 dof_hdr_t hdr
, *dof
;
11566 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
11569 * First, we're going to copyin() the sizeof (dof_hdr_t).
11571 if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) {
11572 dtrace_dof_error(NULL
, "failed to copyin DOF header");
11578 * Now we'll allocate the entire DOF and copy it in -- provided
11579 * that the length isn't outrageous.
11581 if (hdr
.dofh_loadsz
>= (uint64_t)dtrace_dof_maxsize
) {
11582 dtrace_dof_error(&hdr
, "load size exceeds maximum");
11587 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
11588 dtrace_dof_error(&hdr
, "invalid load size");
11593 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
11595 if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0 ||
11596 dof
->dofh_loadsz
!= hdr
.dofh_loadsz
) {
11597 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
11606 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
)
11608 dof_hdr_t hdr
, *dof
;
11610 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
11613 * First, we're going to copyin() the sizeof (dof_hdr_t).
11615 if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) {
11616 dtrace_dof_error(NULL
, "failed to copyin DOF header");
11622 * Now we'll allocate the entire DOF and copy it in -- provided
11623 * that the length isn't outrageous.
11625 if (hdr
.dofh_loadsz
>= (uint64_t)dtrace_dof_maxsize
) {
11626 dtrace_dof_error(&hdr
, "load size exceeds maximum");
11631 if (hdr
.dofh_loadsz
< sizeof (hdr
)) {
11632 dtrace_dof_error(&hdr
, "invalid load size");
11637 dof
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
);
11639 if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) {
11640 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
);
11649 dtrace_dof_property(const char *name
)
11653 unsigned int len
, i
;
11657 * Unfortunately, array of values in .conf files are always (and
11658 * only) interpreted to be integer arrays. We must read our DOF
11659 * as an integer array, and then squeeze it into a byte array.
11661 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0,
11662 name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
)
11665 for (i
= 0; i
< len
; i
++)
11666 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]);
11668 if (len
< sizeof (dof_hdr_t
)) {
11669 ddi_prop_free(buf
);
11670 dtrace_dof_error(NULL
, "truncated header");
11674 if (len
< (loadsz
= ((dof_hdr_t
*)buf
)->dofh_loadsz
)) {
11675 ddi_prop_free(buf
);
11676 dtrace_dof_error(NULL
, "truncated DOF");
11680 if (loadsz
>= (uint64_t)dtrace_dof_maxsize
) {
11681 ddi_prop_free(buf
);
11682 dtrace_dof_error(NULL
, "oversized DOF");
11686 dof
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
);
11687 bcopy(buf
, dof
, loadsz
);
11688 ddi_prop_free(buf
);
11694 dtrace_dof_destroy(dof_hdr_t
*dof
)
11696 dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
);
11700 * Return the dof_sec_t pointer corresponding to a given section index. If the
11701 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
11702 * a type other than DOF_SECT_NONE is specified, the header is checked against
11703 * this type and NULL is returned if the types do not match.
11706 dtrace_dof_sect(dof_hdr_t
*dof
, uint32_t type
, dof_secidx_t i
)
11708 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)
11709 ((uintptr_t)dof
+ dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
11711 if (i
>= dof
->dofh_secnum
) {
11712 dtrace_dof_error(dof
, "referenced section index is invalid");
11716 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
)) {
11717 dtrace_dof_error(dof
, "referenced section is not loadable");
11721 if (type
!= DOF_SECT_NONE
&& type
!= sec
->dofs_type
) {
11722 dtrace_dof_error(dof
, "referenced section is the wrong type");
11729 static dtrace_probedesc_t
*
11730 dtrace_dof_probedesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_probedesc_t
*desc
)
11732 dof_probedesc_t
*probe
;
11734 uintptr_t daddr
= (uintptr_t)dof
;
11738 if (sec
->dofs_type
!= DOF_SECT_PROBEDESC
) {
11739 dtrace_dof_error(dof
, "invalid probe section");
11743 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11744 dtrace_dof_error(dof
, "bad alignment in probe description");
11748 if (sec
->dofs_offset
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) {
11749 dtrace_dof_error(dof
, "truncated probe description");
11753 probe
= (dof_probedesc_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11754 strtab
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
);
11756 if (strtab
== NULL
)
11759 str
= daddr
+ strtab
->dofs_offset
;
11760 size
= strtab
->dofs_size
;
11762 if (probe
->dofp_provider
>= strtab
->dofs_size
) {
11763 dtrace_dof_error(dof
, "corrupt probe provider");
11767 (void) strncpy(desc
->dtpd_provider
,
11768 (char *)(str
+ probe
->dofp_provider
),
11769 MIN(DTRACE_PROVNAMELEN
- 1, size
- probe
->dofp_provider
));
11771 /* APPLE NOTE: Darwin employs size bounded string operation. */
11772 desc
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
11774 if (probe
->dofp_mod
>= strtab
->dofs_size
) {
11775 dtrace_dof_error(dof
, "corrupt probe module");
11779 (void) strncpy(desc
->dtpd_mod
, (char *)(str
+ probe
->dofp_mod
),
11780 MIN(DTRACE_MODNAMELEN
- 1, size
- probe
->dofp_mod
));
11782 /* APPLE NOTE: Darwin employs size bounded string operation. */
11783 desc
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
11785 if (probe
->dofp_func
>= strtab
->dofs_size
) {
11786 dtrace_dof_error(dof
, "corrupt probe function");
11790 (void) strncpy(desc
->dtpd_func
, (char *)(str
+ probe
->dofp_func
),
11791 MIN(DTRACE_FUNCNAMELEN
- 1, size
- probe
->dofp_func
));
11793 /* APPLE NOTE: Darwin employs size bounded string operation. */
11794 desc
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
11796 if (probe
->dofp_name
>= strtab
->dofs_size
) {
11797 dtrace_dof_error(dof
, "corrupt probe name");
11801 (void) strncpy(desc
->dtpd_name
, (char *)(str
+ probe
->dofp_name
),
11802 MIN(DTRACE_NAMELEN
- 1, size
- probe
->dofp_name
));
11804 /* APPLE NOTE: Darwin employs size bounded string operation. */
11805 desc
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
11810 static dtrace_difo_t
*
11811 dtrace_dof_difo(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11816 dof_difohdr_t
*dofd
;
11817 uintptr_t daddr
= (uintptr_t)dof
;
11818 size_t max_size
= dtrace_difo_maxsize
;
11823 static const struct {
11831 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
),
11832 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
),
11833 sizeof (dif_instr_t
), "multiple DIF sections" },
11835 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
),
11836 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t),
11837 sizeof (uint64_t), "multiple integer tables" },
11839 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
),
11840 offsetof(dtrace_difo_t
, dtdo_strlen
), 0,
11841 sizeof (char), "multiple string tables" },
11843 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
),
11844 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
),
11845 sizeof (uint_t
), "multiple variable tables" },
11847 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL
}
11850 if (sec
->dofs_type
!= DOF_SECT_DIFOHDR
) {
11851 dtrace_dof_error(dof
, "invalid DIFO header section");
11855 if (sec
->dofs_align
!= sizeof (dof_secidx_t
)) {
11856 dtrace_dof_error(dof
, "bad alignment in DIFO header");
11860 if (sec
->dofs_size
< sizeof (dof_difohdr_t
) ||
11861 sec
->dofs_size
% sizeof (dof_secidx_t
)) {
11862 dtrace_dof_error(dof
, "bad size in DIFO header");
11866 dofd
= (dof_difohdr_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
11867 n
= (sec
->dofs_size
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1;
11869 dp
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
);
11870 dp
->dtdo_rtype
= dofd
->dofd_rtype
;
11872 for (l
= 0; l
< n
; l
++) {
11877 if ((subsec
= dtrace_dof_sect(dof
, DOF_SECT_NONE
,
11878 dofd
->dofd_links
[l
])) == NULL
)
11879 goto err
; /* invalid section link */
11881 if (ttl
+ subsec
->dofs_size
> max_size
) {
11882 dtrace_dof_error(dof
, "exceeds maximum size");
11886 ttl
+= subsec
->dofs_size
;
11888 for (i
= 0; difo
[i
].section
!= DOF_SECT_NONE
; i
++) {
11890 if (subsec
->dofs_type
!= (uint32_t)difo
[i
].section
)
11893 if (!(subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11894 dtrace_dof_error(dof
, "section not loaded");
11898 if (subsec
->dofs_align
!= (uint32_t)difo
[i
].align
) {
11899 dtrace_dof_error(dof
, "bad alignment");
11903 bufp
= (void **)((uintptr_t)dp
+ difo
[i
].bufoffs
);
11904 lenp
= (uint32_t *)((uintptr_t)dp
+ difo
[i
].lenoffs
);
11906 if (*bufp
!= NULL
) {
11907 dtrace_dof_error(dof
, difo
[i
].msg
);
11911 if ((uint32_t)difo
[i
].entsize
!= subsec
->dofs_entsize
) {
11912 dtrace_dof_error(dof
, "entry size mismatch");
11916 if (subsec
->dofs_entsize
!= 0 &&
11917 (subsec
->dofs_size
% subsec
->dofs_entsize
) != 0) {
11918 dtrace_dof_error(dof
, "corrupt entry size");
11922 *lenp
= subsec
->dofs_size
;
11923 *bufp
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
);
11924 bcopy((char *)(uintptr_t)(daddr
+ subsec
->dofs_offset
),
11925 *bufp
, subsec
->dofs_size
);
11927 if (subsec
->dofs_entsize
!= 0)
11928 *lenp
/= subsec
->dofs_entsize
;
11934 * If we encounter a loadable DIFO sub-section that is not
11935 * known to us, assume this is a broken program and fail.
11937 if (difo
[i
].section
== DOF_SECT_NONE
&&
11938 (subsec
->dofs_flags
& DOF_SECF_LOAD
)) {
11939 dtrace_dof_error(dof
, "unrecognized DIFO subsection");
11944 if (dp
->dtdo_buf
== NULL
) {
11946 * We can't have a DIF object without DIF text.
11948 dtrace_dof_error(dof
, "missing DIF text");
11953 * Before we validate the DIF object, run through the variable table
11954 * looking for the strings -- if any of their size are under, we'll set
11955 * their size to be the system-wide default string size. Note that
11956 * this should _not_ happen if the "strsize" option has been set --
11957 * in this case, the compiler should have set the size to reflect the
11958 * setting of the option.
11960 for (i
= 0; i
< dp
->dtdo_varlen
; i
++) {
11961 dtrace_difv_t
*v
= &dp
->dtdo_vartab
[i
];
11962 dtrace_diftype_t
*t
= &v
->dtdv_type
;
11964 if (v
->dtdv_id
< DIF_VAR_OTHER_UBASE
)
11967 if (t
->dtdt_kind
== DIF_TYPE_STRING
&& t
->dtdt_size
== 0)
11968 t
->dtdt_size
= dtrace_strsize_default
;
11971 if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0)
11974 dtrace_difo_init(dp
, vstate
);
11978 kmem_free(dp
->dtdo_buf
, dp
->dtdo_len
* sizeof (dif_instr_t
));
11979 kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen
* sizeof (uint64_t));
11980 kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
);
11981 kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen
* sizeof (dtrace_difv_t
));
11983 kmem_free(dp
, sizeof (dtrace_difo_t
));
11987 static dtrace_predicate_t
*
11988 dtrace_dof_predicate(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
11993 if ((dp
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
)
11996 return (dtrace_predicate_create(dp
));
11999 static dtrace_actdesc_t
*
12000 dtrace_dof_actdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
12003 dtrace_actdesc_t
*act
, *first
= NULL
, *last
= NULL
, *next
;
12004 dof_actdesc_t
*desc
;
12005 dof_sec_t
*difosec
;
12007 uintptr_t daddr
= (uintptr_t)dof
;
12009 dtrace_actkind_t kind
;
12011 if (sec
->dofs_type
!= DOF_SECT_ACTDESC
) {
12012 dtrace_dof_error(dof
, "invalid action section");
12016 if (sec
->dofs_offset
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) {
12017 dtrace_dof_error(dof
, "truncated action description");
12021 if (sec
->dofs_align
!= sizeof (uint64_t)) {
12022 dtrace_dof_error(dof
, "bad alignment in action description");
12026 if (sec
->dofs_size
< sec
->dofs_entsize
) {
12027 dtrace_dof_error(dof
, "section entry size exceeds total size");
12031 if (sec
->dofs_entsize
!= sizeof (dof_actdesc_t
)) {
12032 dtrace_dof_error(dof
, "bad entry size in action description");
12036 if (sec
->dofs_size
/ sec
->dofs_entsize
> dtrace_actions_max
) {
12037 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max");
12041 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= sec
->dofs_entsize
) {
12042 desc
= (dof_actdesc_t
*)(daddr
+
12043 (uintptr_t)sec
->dofs_offset
+ offs
);
12044 kind
= (dtrace_actkind_t
)desc
->dofa_kind
;
12046 if (DTRACEACT_ISPRINTFLIKE(kind
) &&
12047 (kind
!= DTRACEACT_PRINTA
||
12048 desc
->dofa_strtab
!= DOF_SECIDX_NONE
)) {
12054 * printf()-like actions must have a format string.
12056 if ((strtab
= dtrace_dof_sect(dof
,
12057 DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
)
12060 str
= (char *)((uintptr_t)dof
+
12061 (uintptr_t)strtab
->dofs_offset
);
12063 for (i
= desc
->dofa_arg
; i
< strtab
->dofs_size
; i
++) {
12064 if (str
[i
] == '\0')
12068 if (i
>= strtab
->dofs_size
) {
12069 dtrace_dof_error(dof
, "bogus format string");
12073 if (i
== desc
->dofa_arg
) {
12074 dtrace_dof_error(dof
, "empty format string");
12078 i
-= desc
->dofa_arg
;
12079 fmt
= kmem_alloc(i
+ 1, KM_SLEEP
);
12080 bcopy(&str
[desc
->dofa_arg
], fmt
, i
+ 1);
12081 arg
= (uint64_t)(uintptr_t)fmt
;
12083 if (kind
== DTRACEACT_PRINTA
) {
12084 ASSERT(desc
->dofa_strtab
== DOF_SECIDX_NONE
);
12087 arg
= desc
->dofa_arg
;
12091 act
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
,
12092 desc
->dofa_uarg
, arg
);
12094 if (last
!= NULL
) {
12095 last
->dtad_next
= act
;
12102 if (desc
->dofa_difo
== DOF_SECIDX_NONE
)
12105 if ((difosec
= dtrace_dof_sect(dof
,
12106 DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
)
12109 act
->dtad_difo
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
);
12111 if (act
->dtad_difo
== NULL
)
12115 ASSERT(first
!= NULL
);
12119 for (act
= first
; act
!= NULL
; act
= next
) {
12120 next
= act
->dtad_next
;
12121 dtrace_actdesc_release(act
, vstate
);
12127 static dtrace_ecbdesc_t
*
12128 dtrace_dof_ecbdesc(dof_hdr_t
*dof
, dof_sec_t
*sec
, dtrace_vstate_t
*vstate
,
12131 dtrace_ecbdesc_t
*ep
;
12132 dof_ecbdesc_t
*ecb
;
12133 dtrace_probedesc_t
*desc
;
12134 dtrace_predicate_t
*pred
= NULL
;
12136 if (sec
->dofs_size
< sizeof (dof_ecbdesc_t
)) {
12137 dtrace_dof_error(dof
, "truncated ECB description");
12141 if (sec
->dofs_align
!= sizeof (uint64_t)) {
12142 dtrace_dof_error(dof
, "bad alignment in ECB description");
12146 ecb
= (dof_ecbdesc_t
*)((uintptr_t)dof
+ (uintptr_t)sec
->dofs_offset
);
12147 sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
);
12152 ep
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
);
12153 ep
->dted_uarg
= ecb
->dofe_uarg
;
12154 desc
= &ep
->dted_probe
;
12156 if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
)
12159 if (ecb
->dofe_pred
!= DOF_SECIDX_NONE
) {
12160 if ((sec
= dtrace_dof_sect(dof
,
12161 DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
)
12164 if ((pred
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
)
12167 ep
->dted_pred
.dtpdd_predicate
= pred
;
12170 if (ecb
->dofe_actions
!= DOF_SECIDX_NONE
) {
12171 if ((sec
= dtrace_dof_sect(dof
,
12172 DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
)
12175 ep
->dted_action
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
);
12177 if (ep
->dted_action
== NULL
)
12185 dtrace_predicate_release(pred
, vstate
);
12186 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
));
12191 * APPLE NOTE: dyld handles dof relocation.
12192 * Darwin does not need dtrace_dof_relocate()
12196 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
12197 * header: it should be at the front of a memory region that is at least
12198 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
12199 * size. It need not be validated in any other way.
12202 dtrace_dof_slurp(dof_hdr_t
*dof
, dtrace_vstate_t
*vstate
, cred_t
*cr
,
12203 dtrace_enabling_t
**enabp
, uint64_t ubase
, int noprobes
)
12205 #pragma unused(ubase) /* __APPLE__ */
12206 uint64_t len
= dof
->dofh_loadsz
, seclen
;
12207 uintptr_t daddr
= (uintptr_t)dof
;
12208 dtrace_ecbdesc_t
*ep
;
12209 dtrace_enabling_t
*enab
;
12212 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12213 ASSERT(dof
->dofh_loadsz
>= sizeof (dof_hdr_t
));
12216 * Check the DOF header identification bytes. In addition to checking
12217 * valid settings, we also verify that unused bits/bytes are zeroed so
12218 * we can use them later without fear of regressing existing binaries.
12220 if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
],
12221 DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) {
12222 dtrace_dof_error(dof
, "DOF magic string mismatch");
12226 if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32
&&
12227 dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) {
12228 dtrace_dof_error(dof
, "DOF has invalid data model");
12232 if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) {
12233 dtrace_dof_error(dof
, "DOF encoding mismatch");
12238 * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now.
12240 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) {
12241 dtrace_dof_error(dof
, "DOF version mismatch");
12245 if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) {
12246 dtrace_dof_error(dof
, "DOF uses unsupported instruction set");
12250 if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) {
12251 dtrace_dof_error(dof
, "DOF uses too many integer registers");
12255 if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) {
12256 dtrace_dof_error(dof
, "DOF uses too many tuple registers");
12260 for (i
= DOF_ID_PAD
; i
< DOF_ID_SIZE
; i
++) {
12261 if (dof
->dofh_ident
[i
] != 0) {
12262 dtrace_dof_error(dof
, "DOF has invalid ident byte set");
12267 if (dof
->dofh_flags
& ~DOF_FL_VALID
) {
12268 dtrace_dof_error(dof
, "DOF has invalid flag bits set");
12272 if (dof
->dofh_secsize
== 0) {
12273 dtrace_dof_error(dof
, "zero section header size");
12278 * Check that the section headers don't exceed the amount of DOF
12279 * data. Note that we cast the section size and number of sections
12280 * to uint64_t's to prevent possible overflow in the multiplication.
12282 seclen
= (uint64_t)dof
->dofh_secnum
* (uint64_t)dof
->dofh_secsize
;
12284 if (dof
->dofh_secoff
> len
|| seclen
> len
||
12285 dof
->dofh_secoff
+ seclen
> len
) {
12286 dtrace_dof_error(dof
, "truncated section headers");
12290 if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) {
12291 dtrace_dof_error(dof
, "misaligned section headers");
12295 if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) {
12296 dtrace_dof_error(dof
, "misaligned section size");
12301 * Take an initial pass through the section headers to be sure that
12302 * the headers don't have stray offsets. If the 'noprobes' flag is
12303 * set, do not permit sections relating to providers, probes, or args.
12305 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12306 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
12307 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12310 switch (sec
->dofs_type
) {
12311 case DOF_SECT_PROVIDER
:
12312 case DOF_SECT_PROBES
:
12313 case DOF_SECT_PRARGS
:
12314 case DOF_SECT_PROFFS
:
12315 dtrace_dof_error(dof
, "illegal sections "
12321 if (!(sec
->dofs_flags
& DOF_SECF_LOAD
))
12322 continue; /* just ignore non-loadable sections */
12324 if (sec
->dofs_align
& (sec
->dofs_align
- 1)) {
12325 dtrace_dof_error(dof
, "bad section alignment");
12329 if (sec
->dofs_offset
& (sec
->dofs_align
- 1)) {
12330 dtrace_dof_error(dof
, "misaligned section");
12334 if (sec
->dofs_offset
> len
|| sec
->dofs_size
> len
||
12335 sec
->dofs_offset
+ sec
->dofs_size
> len
) {
12336 dtrace_dof_error(dof
, "corrupt section header");
12340 if (sec
->dofs_type
== DOF_SECT_STRTAB
&& *((char *)daddr
+
12341 sec
->dofs_offset
+ sec
->dofs_size
- 1) != '\0') {
12342 dtrace_dof_error(dof
, "non-terminating string table");
12348 * APPLE NOTE: We have no further relocation to perform.
12349 * All dof values are relative offsets.
12352 if ((enab
= *enabp
) == NULL
)
12353 enab
= *enabp
= dtrace_enabling_create(vstate
);
12355 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12356 dof_sec_t
*sec
= (dof_sec_t
*)(daddr
+
12357 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12359 if (sec
->dofs_type
!= DOF_SECT_ECBDESC
)
12363 * APPLE NOTE: Defend against gcc 4.0 botch on x86.
12364 * not all paths out of inlined dtrace_dof_ecbdesc
12365 * are checked for the NULL return value.
12366 * Check for NULL explicitly here.
12368 ep
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
);
12370 dtrace_enabling_destroy(enab
);
12375 dtrace_enabling_add(enab
, ep
);
12382 * Process DOF for any options. This routine assumes that the DOF has been
12383 * at least processed by dtrace_dof_slurp().
12386 dtrace_dof_options(dof_hdr_t
*dof
, dtrace_state_t
*state
)
12392 dof_optdesc_t
*desc
;
12394 for (i
= 0; i
< dof
->dofh_secnum
; i
++) {
12395 dof_sec_t
*sec
= (dof_sec_t
*)((uintptr_t)dof
+
12396 (uintptr_t)dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
12398 if (sec
->dofs_type
!= DOF_SECT_OPTDESC
)
12401 if (sec
->dofs_align
!= sizeof (uint64_t)) {
12402 dtrace_dof_error(dof
, "bad alignment in "
12403 "option description");
12407 if ((entsize
= sec
->dofs_entsize
) == 0) {
12408 dtrace_dof_error(dof
, "zeroed option entry size");
12412 if (entsize
< sizeof (dof_optdesc_t
)) {
12413 dtrace_dof_error(dof
, "bad option entry size");
12417 for (offs
= 0; offs
< sec
->dofs_size
; offs
+= entsize
) {
12418 desc
= (dof_optdesc_t
*)((uintptr_t)dof
+
12419 (uintptr_t)sec
->dofs_offset
+ offs
);
12421 if (desc
->dofo_strtab
!= DOF_SECIDX_NONE
) {
12422 dtrace_dof_error(dof
, "non-zero option string");
12426 if (desc
->dofo_value
== (uint64_t)DTRACEOPT_UNSET
) {
12427 dtrace_dof_error(dof
, "unset option");
12431 if ((rval
= dtrace_state_option(state
,
12432 desc
->dofo_option
, desc
->dofo_value
)) != 0) {
12433 dtrace_dof_error(dof
, "rejected option");
12443 * DTrace Consumer State Functions
12446 dtrace_dstate_init(dtrace_dstate_t
*dstate
, size_t size
)
12448 size_t hashsize
, maxper
, min_size
, chunksize
= dstate
->dtds_chunksize
;
12451 dtrace_dynvar_t
*dvar
, *next
, *start
;
12454 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12455 ASSERT(dstate
->dtds_base
== NULL
&& dstate
->dtds_percpu
== NULL
);
12457 bzero(dstate
, sizeof (dtrace_dstate_t
));
12459 if ((dstate
->dtds_chunksize
= chunksize
) == 0)
12460 dstate
->dtds_chunksize
= DTRACE_DYNVAR_CHUNKSIZE
;
12462 if (size
< (min_size
= dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
)))
12465 if ((base
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
)
12468 dstate
->dtds_size
= size
;
12469 dstate
->dtds_base
= base
;
12470 dstate
->dtds_percpu
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
);
12471 bzero(dstate
->dtds_percpu
, (int)NCPU
* sizeof (dtrace_dstate_percpu_t
));
12473 hashsize
= size
/ (dstate
->dtds_chunksize
+ sizeof (dtrace_dynhash_t
));
12475 if (hashsize
!= 1 && (hashsize
& 1))
12478 dstate
->dtds_hashsize
= hashsize
;
12479 dstate
->dtds_hash
= dstate
->dtds_base
;
12482 * Set all of our hash buckets to point to the single sink, and (if
12483 * it hasn't already been set), set the sink's hash value to be the
12484 * sink sentinel value. The sink is needed for dynamic variable
12485 * lookups to know that they have iterated over an entire, valid hash
12488 for (i
= 0; i
< hashsize
; i
++)
12489 dstate
->dtds_hash
[i
].dtdh_chain
= &dtrace_dynhash_sink
;
12491 if (dtrace_dynhash_sink
.dtdv_hashval
!= DTRACE_DYNHASH_SINK
)
12492 dtrace_dynhash_sink
.dtdv_hashval
= DTRACE_DYNHASH_SINK
;
12495 * Determine number of active CPUs. Divide free list evenly among
12498 start
= (dtrace_dynvar_t
*)
12499 ((uintptr_t)base
+ hashsize
* sizeof (dtrace_dynhash_t
));
12500 limit
= (uintptr_t)base
+ size
;
12502 maxper
= (limit
- (uintptr_t)start
) / (int)NCPU
;
12503 maxper
= (maxper
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
;
12505 for (i
= 0; i
< NCPU
; i
++) {
12506 dstate
->dtds_percpu
[i
].dtdsc_free
= dvar
= start
;
12509 * If we don't even have enough chunks to make it once through
12510 * NCPUs, we're just going to allocate everything to the first
12511 * CPU. And if we're on the last CPU, we're going to allocate
12512 * whatever is left over. In either case, we set the limit to
12513 * be the limit of the dynamic variable space.
12515 if (maxper
== 0 || i
== NCPU
- 1) {
12516 limit
= (uintptr_t)base
+ size
;
12519 limit
= (uintptr_t)start
+ maxper
;
12520 start
= (dtrace_dynvar_t
*)limit
;
12523 ASSERT(limit
<= (uintptr_t)base
+ size
);
12526 next
= (dtrace_dynvar_t
*)((uintptr_t)dvar
+
12527 dstate
->dtds_chunksize
);
12529 if ((uintptr_t)next
+ dstate
->dtds_chunksize
>= limit
)
12532 dvar
->dtdv_next
= next
;
12544 dtrace_dstate_fini(dtrace_dstate_t
*dstate
)
12546 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12548 if (dstate
->dtds_base
== NULL
)
12551 kmem_free(dstate
->dtds_base
, dstate
->dtds_size
);
12552 kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
);
12556 dtrace_vstate_fini(dtrace_vstate_t
*vstate
)
12559 * Logical XOR, where are you?
12561 ASSERT((vstate
->dtvs_nglobals
== 0) ^ (vstate
->dtvs_globals
!= NULL
));
12563 if (vstate
->dtvs_nglobals
> 0) {
12564 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals
*
12565 sizeof (dtrace_statvar_t
*));
12568 if (vstate
->dtvs_ntlocals
> 0) {
12569 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals
*
12570 sizeof (dtrace_difv_t
));
12573 ASSERT((vstate
->dtvs_nlocals
== 0) ^ (vstate
->dtvs_locals
!= NULL
));
12575 if (vstate
->dtvs_nlocals
> 0) {
12576 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals
*
12577 sizeof (dtrace_statvar_t
*));
12582 dtrace_state_clean(dtrace_state_t
*state
)
12584 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
)
12587 dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
);
12588 dtrace_speculation_clean(state
);
12592 dtrace_state_deadman(dtrace_state_t
*state
)
12598 now
= dtrace_gethrtime();
12600 if (state
!= dtrace_anon
.dta_state
&&
12601 now
- state
->dts_laststatus
>= dtrace_deadman_user
)
12605 * We must be sure that dts_alive never appears to be less than the
12606 * value upon entry to dtrace_state_deadman(), and because we lack a
12607 * dtrace_cas64(), we cannot store to it atomically. We thus instead
12608 * store INT64_MAX to it, followed by a memory barrier, followed by
12609 * the new value. This assures that dts_alive never appears to be
12610 * less than its true value, regardless of the order in which the
12611 * stores to the underlying storage are issued.
12613 state
->dts_alive
= INT64_MAX
;
12614 dtrace_membar_producer();
12615 state
->dts_alive
= now
;
12619 dtrace_state_create(dev_t
*devp
, cred_t
*cr
, dtrace_state_t
**new_state
)
12624 dtrace_state_t
*state
;
12625 dtrace_optval_t
*opt
;
12626 int bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
), i
;
12628 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12629 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12631 /* Cause restart */
12635 * Darwin's DEVFS layer acquired the minor number for this "device" when it called
12636 * dtrace_devfs_clone_func(). At that time, dtrace_devfs_clone_func() proposed a minor number
12637 * (next unused according to vmem_alloc()) and then immediately put the number back in play
12638 * (by calling vmem_free()). Now that minor number is being used for an open, so committing it
12639 * to use. The following vmem_alloc() must deliver that same minor number. FIXME.
12642 minor
= (minor_t
)(uintptr_t)vmem_alloc(dtrace_minor
, 1,
12643 VM_BESTFIT
| VM_SLEEP
);
12645 if (NULL
!= devp
) {
12646 ASSERT(getminor(*devp
) == minor
);
12647 if (getminor(*devp
) != minor
) {
12648 printf("dtrace_open: couldn't re-acquire vended minor number %d. Instead got %d\n",
12649 getminor(*devp
), minor
);
12650 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12651 return (ERESTART
); /* can't reacquire */
12654 /* NULL==devp iff "Anonymous state" (see dtrace_anon_property),
12655 * so just vend the minor device number here de novo since no "open" has occurred. */
12658 if (ddi_soft_state_zalloc(dtrace_softstate
, minor
) != DDI_SUCCESS
) {
12659 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
12660 return (EAGAIN
); /* temporary resource shortage */
12663 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
12664 state
->dts_epid
= DTRACE_EPIDNONE
+ 1;
12666 (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
);
12667 state
->dts_aggid_arena
= vmem_create(c
, (void *)1, UINT32_MAX
, 1,
12668 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
12670 if (devp
!= NULL
) {
12671 major
= getemajor(*devp
);
12673 major
= ddi_driver_major(dtrace_devi
);
12676 state
->dts_dev
= makedevice(major
, minor
);
12679 *devp
= state
->dts_dev
;
12682 * We allocate NCPU buffers. On the one hand, this can be quite
12683 * a bit of memory per instance (nearly 36K on a Starcat). On the
12684 * other hand, it saves an additional memory reference in the probe
12687 state
->dts_buffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12688 state
->dts_aggbuffer
= kmem_zalloc(bufsize
, KM_SLEEP
);
12689 state
->dts_cleaner
= CYCLIC_NONE
;
12690 state
->dts_deadman
= CYCLIC_NONE
;
12691 state
->dts_vstate
.dtvs_state
= state
;
12693 for (i
= 0; i
< DTRACEOPT_MAX
; i
++)
12694 state
->dts_options
[i
] = DTRACEOPT_UNSET
;
12697 * Set the default options.
12699 opt
= state
->dts_options
;
12700 opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
;
12701 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
;
12702 opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
;
12703 opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
;
12704 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
;
12705 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
;
12706 opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
;
12707 opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
;
12708 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
;
12709 opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
;
12710 opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
;
12711 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
;
12712 opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
;
12713 opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
;
12715 state
->dts_activity
= DTRACE_ACTIVITY_INACTIVE
;
12718 * Depending on the user credentials, we set flag bits which alter probe
12719 * visibility or the amount of destructiveness allowed. In the case of
12720 * actual anonymous tracing, or the possession of all privileges, all of
12721 * the normal checks are bypassed.
12723 if (cr
== NULL
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) {
12724 state
->dts_cred
.dcr_visible
= DTRACE_CRV_ALL
;
12725 state
->dts_cred
.dcr_action
= DTRACE_CRA_ALL
;
12728 * Set up the credentials for this instantiation. We take a
12729 * hold on the credential to prevent it from disappearing on
12730 * us; this in turn prevents the zone_t referenced by this
12731 * credential from disappearing. This means that we can
12732 * examine the credential and the zone from probe context.
12735 state
->dts_cred
.dcr_cred
= cr
;
12738 * CRA_PROC means "we have *some* privilege for dtrace" and
12739 * unlocks the use of variables like pid, zonename, etc.
12741 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) ||
12742 PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12743 state
->dts_cred
.dcr_action
|= DTRACE_CRA_PROC
;
12747 * dtrace_user allows use of syscall and profile providers.
12748 * If the user also has proc_owner and/or proc_zone, we
12749 * extend the scope to include additional visibility and
12750 * destructive power.
12752 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) {
12753 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) {
12754 state
->dts_cred
.dcr_visible
|=
12755 DTRACE_CRV_ALLPROC
;
12757 state
->dts_cred
.dcr_action
|=
12758 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12761 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) {
12762 state
->dts_cred
.dcr_visible
|=
12763 DTRACE_CRV_ALLZONE
;
12765 state
->dts_cred
.dcr_action
|=
12766 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12770 * If we have all privs in whatever zone this is,
12771 * we can do destructive things to processes which
12772 * have altered credentials.
12774 * APPLE NOTE: Darwin doesn't do zones.
12775 * Behave as if zone always has destructive privs.
12778 state
->dts_cred
.dcr_action
|=
12779 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12783 * Holding the dtrace_kernel privilege also implies that
12784 * the user has the dtrace_user privilege from a visibility
12785 * perspective. But without further privileges, some
12786 * destructive actions are not available.
12788 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) {
12790 * Make all probes in all zones visible. However,
12791 * this doesn't mean that all actions become available
12794 state
->dts_cred
.dcr_visible
|= DTRACE_CRV_KERNEL
|
12795 DTRACE_CRV_ALLPROC
| DTRACE_CRV_ALLZONE
;
12797 state
->dts_cred
.dcr_action
|= DTRACE_CRA_KERNEL
|
12800 * Holding proc_owner means that destructive actions
12801 * for *this* zone are allowed.
12803 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12804 state
->dts_cred
.dcr_action
|=
12805 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12808 * Holding proc_zone means that destructive actions
12809 * for this user/group ID in all zones is allowed.
12811 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12812 state
->dts_cred
.dcr_action
|=
12813 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12816 * If we have all privs in whatever zone this is,
12817 * we can do destructive things to processes which
12818 * have altered credentials.
12820 * APPLE NOTE: Darwin doesn't do zones.
12821 * Behave as if zone always has destructive privs.
12823 state
->dts_cred
.dcr_action
|=
12824 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
;
12828 * Holding the dtrace_proc privilege gives control over fasttrap
12829 * and pid providers. We need to grant wider destructive
12830 * privileges in the event that the user has proc_owner and/or
12833 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) {
12834 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
))
12835 state
->dts_cred
.dcr_action
|=
12836 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
;
12838 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
))
12839 state
->dts_cred
.dcr_action
|=
12840 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
;
12844 *new_state
= state
;
12845 return(0); /* Success */
12849 dtrace_state_buffer(dtrace_state_t
*state
, dtrace_buffer_t
*buf
, int which
)
12851 dtrace_optval_t
*opt
= state
->dts_options
, size
;
12852 processorid_t cpu
= 0;
12853 int flags
= 0, rval
;
12855 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
12856 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
12857 ASSERT(which
< DTRACEOPT_MAX
);
12858 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
||
12859 (state
== dtrace_anon
.dta_state
&&
12860 state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
));
12862 if (opt
[which
] == DTRACEOPT_UNSET
|| opt
[which
] == 0)
12865 if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
)
12866 cpu
= opt
[DTRACEOPT_CPU
];
12868 if (which
== DTRACEOPT_SPECSIZE
)
12869 flags
|= DTRACEBUF_NOSWITCH
;
12871 if (which
== DTRACEOPT_BUFSIZE
) {
12872 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
)
12873 flags
|= DTRACEBUF_RING
;
12875 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
)
12876 flags
|= DTRACEBUF_FILL
;
12878 if (state
!= dtrace_anon
.dta_state
||
12879 state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
12880 flags
|= DTRACEBUF_INACTIVE
;
12883 for (size
= opt
[which
]; (size_t)size
>= sizeof (uint64_t); size
>>= 1) {
12885 * The size must be 8-byte aligned. If the size is not 8-byte
12886 * aligned, drop it down by the difference.
12888 if (size
& (sizeof (uint64_t) - 1))
12889 size
-= size
& (sizeof (uint64_t) - 1);
12891 if (size
< state
->dts_reserve
) {
12893 * Buffers always must be large enough to accommodate
12894 * their prereserved space. We return E2BIG instead
12895 * of ENOMEM in this case to allow for user-level
12896 * software to differentiate the cases.
12901 rval
= dtrace_buffer_alloc(buf
, size
, flags
, cpu
);
12903 if (rval
!= ENOMEM
) {
12908 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
12916 dtrace_state_buffers(dtrace_state_t
*state
)
12918 dtrace_speculation_t
*spec
= state
->dts_speculations
;
12921 if ((rval
= dtrace_state_buffer(state
, state
->dts_buffer
,
12922 DTRACEOPT_BUFSIZE
)) != 0)
12925 if ((rval
= dtrace_state_buffer(state
, state
->dts_aggbuffer
,
12926 DTRACEOPT_AGGSIZE
)) != 0)
12929 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
12930 if ((rval
= dtrace_state_buffer(state
,
12931 spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0)
12939 dtrace_state_prereserve(dtrace_state_t
*state
)
12942 dtrace_probe_t
*probe
;
12944 state
->dts_reserve
= 0;
12946 if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
)
12950 * If our buffer policy is a "fill" buffer policy, we need to set the
12951 * prereserved space to be the space required by the END probes.
12953 probe
= dtrace_probes
[dtrace_probeid_end
- 1];
12954 ASSERT(probe
!= NULL
);
12956 for (ecb
= probe
->dtpr_ecb
; ecb
!= NULL
; ecb
= ecb
->dte_next
) {
12957 if (ecb
->dte_state
!= state
)
12960 state
->dts_reserve
+= ecb
->dte_needed
+ ecb
->dte_alignment
;
12965 dtrace_state_go(dtrace_state_t
*state
, processorid_t
*cpu
)
12967 dtrace_optval_t
*opt
= state
->dts_options
, sz
, nspec
;
12968 dtrace_speculation_t
*spec
;
12969 dtrace_buffer_t
*buf
;
12970 cyc_handler_t hdlr
;
12972 int rval
= 0, i
, bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
);
12973 dtrace_icookie_t cookie
;
12975 lck_mtx_lock(&cpu_lock
);
12976 lck_mtx_lock(&dtrace_lock
);
12978 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
12984 * Before we can perform any checks, we must prime all of the
12985 * retained enablings that correspond to this state.
12987 dtrace_enabling_prime(state
);
12989 if (state
->dts_destructive
&& !state
->dts_cred
.dcr_destructive
) {
12994 dtrace_state_prereserve(state
);
12997 * Now we want to do is try to allocate our speculations.
12998 * We do not automatically resize the number of speculations; if
12999 * this fails, we will fail the operation.
13001 nspec
= opt
[DTRACEOPT_NSPEC
];
13002 ASSERT(nspec
!= DTRACEOPT_UNSET
);
13004 if (nspec
> INT_MAX
) {
13009 spec
= kmem_zalloc(nspec
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
);
13011 if (spec
== NULL
) {
13016 state
->dts_speculations
= spec
;
13017 state
->dts_nspeculations
= (int)nspec
;
13019 for (i
= 0; i
< nspec
; i
++) {
13020 if ((buf
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) {
13025 spec
[i
].dtsp_buffer
= buf
;
13028 if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) {
13029 if (dtrace_anon
.dta_state
== NULL
) {
13034 if (state
->dts_necbs
!= 0) {
13039 state
->dts_anon
= dtrace_anon_grab();
13040 ASSERT(state
->dts_anon
!= NULL
);
13041 state
= state
->dts_anon
;
13044 * We want "grabanon" to be set in the grabbed state, so we'll
13045 * copy that option value from the grabbing state into the
13048 state
->dts_options
[DTRACEOPT_GRABANON
] =
13049 opt
[DTRACEOPT_GRABANON
];
13051 *cpu
= dtrace_anon
.dta_beganon
;
13054 * If the anonymous state is active (as it almost certainly
13055 * is if the anonymous enabling ultimately matched anything),
13056 * we don't allow any further option processing -- but we
13057 * don't return failure.
13059 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
13063 if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET
&&
13064 opt
[DTRACEOPT_AGGSIZE
] != 0) {
13065 if (state
->dts_aggregations
== NULL
) {
13067 * We're not going to create an aggregation buffer
13068 * because we don't have any ECBs that contain
13069 * aggregations -- set this option to 0.
13071 opt
[DTRACEOPT_AGGSIZE
] = 0;
13074 * If we have an aggregation buffer, we must also have
13075 * a buffer to use as scratch.
13077 if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET
||
13078 (size_t)opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) {
13079 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
;
13084 if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET
&&
13085 opt
[DTRACEOPT_SPECSIZE
] != 0) {
13086 if (!state
->dts_speculates
) {
13088 * We're not going to create speculation buffers
13089 * because we don't have any ECBs that actually
13090 * speculate -- set the speculation size to 0.
13092 opt
[DTRACEOPT_SPECSIZE
] = 0;
13097 * The bare minimum size for any buffer that we're actually going to
13098 * do anything to is sizeof (uint64_t).
13100 sz
= sizeof (uint64_t);
13102 if ((state
->dts_needed
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) ||
13103 (state
->dts_speculates
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) ||
13104 (state
->dts_aggregations
!= NULL
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) {
13106 * A buffer size has been explicitly set to 0 (or to a size
13107 * that will be adjusted to 0) and we need the space -- we
13108 * need to return failure. We return ENOSPC to differentiate
13109 * it from failing to allocate a buffer due to failure to meet
13110 * the reserve (for which we return E2BIG).
13116 if ((rval
= dtrace_state_buffers(state
)) != 0)
13119 if ((sz
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
)
13120 sz
= dtrace_dstate_defsize
;
13123 rval
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
);
13128 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
)
13130 } while (sz
>>= 1);
13132 opt
[DTRACEOPT_DYNVARSIZE
] = sz
;
13137 if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
)
13138 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
;
13140 if (opt
[DTRACEOPT_CLEANRATE
] == 0)
13141 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
13143 if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
)
13144 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
;
13146 if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
)
13147 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
;
13149 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_clean
;
13150 hdlr
.cyh_arg
= state
;
13151 hdlr
.cyh_level
= CY_LOW_LEVEL
;
13154 when
.cyt_interval
= opt
[DTRACEOPT_CLEANRATE
];
13156 state
->dts_cleaner
= cyclic_add(&hdlr
, &when
);
13158 hdlr
.cyh_func
= (cyc_func_t
)dtrace_state_deadman
;
13159 hdlr
.cyh_arg
= state
;
13160 hdlr
.cyh_level
= CY_LOW_LEVEL
;
13163 when
.cyt_interval
= dtrace_deadman_interval
;
13165 state
->dts_alive
= state
->dts_laststatus
= dtrace_gethrtime();
13166 state
->dts_deadman
= cyclic_add(&hdlr
, &when
);
13168 state
->dts_activity
= DTRACE_ACTIVITY_WARMUP
;
13171 * Now it's time to actually fire the BEGIN probe. We need to disable
13172 * interrupts here both to record the CPU on which we fired the BEGIN
13173 * probe (the data from this CPU will be processed first at user
13174 * level) and to manually activate the buffer for this CPU.
13176 cookie
= dtrace_interrupt_disable();
13177 *cpu
= CPU
->cpu_id
;
13178 ASSERT(state
->dts_buffer
[*cpu
].dtb_flags
& DTRACEBUF_INACTIVE
);
13179 state
->dts_buffer
[*cpu
].dtb_flags
&= ~DTRACEBUF_INACTIVE
;
13181 dtrace_probe(dtrace_probeid_begin
,
13182 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
13183 dtrace_interrupt_enable(cookie
);
13185 * We may have had an exit action from a BEGIN probe; only change our
13186 * state to ACTIVE if we're still in WARMUP.
13188 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
||
13189 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
);
13191 if (state
->dts_activity
== DTRACE_ACTIVITY_WARMUP
)
13192 state
->dts_activity
= DTRACE_ACTIVITY_ACTIVE
;
13195 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
13196 * want each CPU to transition its principal buffer out of the
13197 * INACTIVE state. Doing this assures that no CPU will suddenly begin
13198 * processing an ECB halfway down a probe's ECB chain; all CPUs will
13199 * atomically transition from processing none of a state's ECBs to
13200 * processing all of them.
13202 dtrace_xcall(DTRACE_CPUALL
,
13203 (dtrace_xcall_t
)dtrace_buffer_activate
, state
);
13207 dtrace_buffer_free(state
->dts_buffer
);
13208 dtrace_buffer_free(state
->dts_aggbuffer
);
13210 if ((nspec
= state
->dts_nspeculations
) == 0) {
13211 ASSERT(state
->dts_speculations
== NULL
);
13215 spec
= state
->dts_speculations
;
13216 ASSERT(spec
!= NULL
);
13218 for (i
= 0; i
< state
->dts_nspeculations
; i
++) {
13219 if ((buf
= spec
[i
].dtsp_buffer
) == NULL
)
13222 dtrace_buffer_free(buf
);
13223 kmem_free(buf
, bufsize
);
13226 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
13227 state
->dts_nspeculations
= 0;
13228 state
->dts_speculations
= NULL
;
13231 lck_mtx_unlock(&dtrace_lock
);
13232 lck_mtx_unlock(&cpu_lock
);
13238 dtrace_state_stop(dtrace_state_t
*state
, processorid_t
*cpu
)
13240 dtrace_icookie_t cookie
;
13242 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13244 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
&&
13245 state
->dts_activity
!= DTRACE_ACTIVITY_DRAINING
)
13249 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
13250 * to be sure that every CPU has seen it. See below for the details
13251 * on why this is done.
13253 state
->dts_activity
= DTRACE_ACTIVITY_DRAINING
;
13257 * By this point, it is impossible for any CPU to be still processing
13258 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
13259 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
13260 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
13261 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
13262 * iff we're in the END probe.
13264 state
->dts_activity
= DTRACE_ACTIVITY_COOLDOWN
;
13266 ASSERT(state
->dts_activity
== DTRACE_ACTIVITY_COOLDOWN
);
13269 * Finally, we can release the reserve and call the END probe. We
13270 * disable interrupts across calling the END probe to allow us to
13271 * return the CPU on which we actually called the END probe. This
13272 * allows user-land to be sure that this CPU's principal buffer is
13275 state
->dts_reserve
= 0;
13277 cookie
= dtrace_interrupt_disable();
13278 *cpu
= CPU
->cpu_id
;
13279 dtrace_probe(dtrace_probeid_end
,
13280 (uint64_t)(uintptr_t)state
, 0, 0, 0, 0);
13281 dtrace_interrupt_enable(cookie
);
13283 state
->dts_activity
= DTRACE_ACTIVITY_STOPPED
;
13290 dtrace_state_option(dtrace_state_t
*state
, dtrace_optid_t option
,
13291 dtrace_optval_t val
)
13293 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13295 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
)
13298 if (option
>= DTRACEOPT_MAX
)
13301 if (option
!= DTRACEOPT_CPU
&& val
< 0)
13305 case DTRACEOPT_DESTRUCTIVE
:
13307 * Prevent consumers from enabling destructive actions if DTrace
13308 * is running in a restricted environment, or if actions are
13311 if (dtrace_is_restricted() || dtrace_destructive_disallow
)
13314 state
->dts_cred
.dcr_destructive
= 1;
13317 case DTRACEOPT_BUFSIZE
:
13318 case DTRACEOPT_DYNVARSIZE
:
13319 case DTRACEOPT_AGGSIZE
:
13320 case DTRACEOPT_SPECSIZE
:
13321 case DTRACEOPT_STRSIZE
:
13325 if (val
>= LONG_MAX
) {
13327 * If this is an otherwise negative value, set it to
13328 * the highest multiple of 128m less than LONG_MAX.
13329 * Technically, we're adjusting the size without
13330 * regard to the buffer resizing policy, but in fact,
13331 * this has no effect -- if we set the buffer size to
13332 * ~LONG_MAX and the buffer policy is ultimately set to
13333 * be "manual", the buffer allocation is guaranteed to
13334 * fail, if only because the allocation requires two
13335 * buffers. (We set the the size to the highest
13336 * multiple of 128m because it ensures that the size
13337 * will remain a multiple of a megabyte when
13338 * repeatedly halved -- all the way down to 15m.)
13340 val
= LONG_MAX
- (1 << 27) + 1;
13344 state
->dts_options
[option
] = val
;
13350 dtrace_state_destroy(dtrace_state_t
*state
)
13353 dtrace_vstate_t
*vstate
= &state
->dts_vstate
;
13354 minor_t minor
= getminor(state
->dts_dev
);
13355 int i
, bufsize
= (int)NCPU
* sizeof (dtrace_buffer_t
);
13356 dtrace_speculation_t
*spec
= state
->dts_speculations
;
13357 int nspec
= state
->dts_nspeculations
;
13360 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13361 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
13364 * First, retract any retained enablings for this state.
13366 dtrace_enabling_retract(state
);
13367 ASSERT(state
->dts_nretained
== 0);
13369 if (state
->dts_activity
== DTRACE_ACTIVITY_ACTIVE
||
13370 state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
) {
13372 * We have managed to come into dtrace_state_destroy() on a
13373 * hot enabling -- almost certainly because of a disorderly
13374 * shutdown of a consumer. (That is, a consumer that is
13375 * exiting without having called dtrace_stop().) In this case,
13376 * we're going to set our activity to be KILLED, and then
13377 * issue a sync to be sure that everyone is out of probe
13378 * context before we start blowing away ECBs.
13380 state
->dts_activity
= DTRACE_ACTIVITY_KILLED
;
13385 * Release the credential hold we took in dtrace_state_create().
13387 if (state
->dts_cred
.dcr_cred
!= NULL
)
13388 crfree(state
->dts_cred
.dcr_cred
);
13391 * Now we can safely disable and destroy any enabled probes. Because
13392 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
13393 * (especially if they're all enabled), we take two passes through the
13394 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
13395 * in the second we disable whatever is left over.
13397 for (match
= DTRACE_PRIV_KERNEL
; ; match
= 0) {
13398 for (i
= 0; i
< state
->dts_necbs
; i
++) {
13399 if ((ecb
= state
->dts_ecbs
[i
]) == NULL
)
13402 if (match
&& ecb
->dte_probe
!= NULL
) {
13403 dtrace_probe_t
*probe
= ecb
->dte_probe
;
13404 dtrace_provider_t
*prov
= probe
->dtpr_provider
;
13406 if (!(prov
->dtpv_priv
.dtpp_flags
& match
))
13410 dtrace_ecb_disable(ecb
);
13411 dtrace_ecb_destroy(ecb
);
13419 * Before we free the buffers, perform one more sync to assure that
13420 * every CPU is out of probe context.
13424 dtrace_buffer_free(state
->dts_buffer
);
13425 dtrace_buffer_free(state
->dts_aggbuffer
);
13427 for (i
= 0; i
< nspec
; i
++)
13428 dtrace_buffer_free(spec
[i
].dtsp_buffer
);
13430 if (state
->dts_cleaner
!= CYCLIC_NONE
)
13431 cyclic_remove(state
->dts_cleaner
);
13433 if (state
->dts_deadman
!= CYCLIC_NONE
)
13434 cyclic_remove(state
->dts_deadman
);
13436 dtrace_dstate_fini(&vstate
->dtvs_dynvars
);
13437 dtrace_vstate_fini(vstate
);
13438 kmem_free(state
->dts_ecbs
, state
->dts_necbs
* sizeof (dtrace_ecb_t
*));
13440 if (state
->dts_aggregations
!= NULL
) {
13442 for (i
= 0; i
< state
->dts_naggregations
; i
++)
13443 ASSERT(state
->dts_aggregations
[i
] == NULL
);
13445 ASSERT(state
->dts_naggregations
> 0);
13446 kmem_free(state
->dts_aggregations
,
13447 state
->dts_naggregations
* sizeof (dtrace_aggregation_t
*));
13450 kmem_free(state
->dts_buffer
, bufsize
);
13451 kmem_free(state
->dts_aggbuffer
, bufsize
);
13453 for (i
= 0; i
< nspec
; i
++)
13454 kmem_free(spec
[i
].dtsp_buffer
, bufsize
);
13456 kmem_free(spec
, nspec
* sizeof (dtrace_speculation_t
));
13458 dtrace_format_destroy(state
);
13460 vmem_destroy(state
->dts_aggid_arena
);
13461 ddi_soft_state_free(dtrace_softstate
, minor
);
13462 vmem_free(dtrace_minor
, (void *)(uintptr_t)minor
, 1);
13466 * DTrace Anonymous Enabling Functions
13468 static dtrace_state_t
*
13469 dtrace_anon_grab(void)
13471 dtrace_state_t
*state
;
13473 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13475 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
13476 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13480 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13481 ASSERT(dtrace_retained
!= NULL
);
13483 dtrace_enabling_destroy(dtrace_anon
.dta_enabling
);
13484 dtrace_anon
.dta_enabling
= NULL
;
13485 dtrace_anon
.dta_state
= NULL
;
13491 dtrace_anon_property(void)
13494 dtrace_state_t
*state
;
13496 char c
[32]; /* enough for "dof-data-" + digits */
13498 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13499 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
13501 for (i
= 0; ; i
++) {
13502 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
);
13504 dtrace_err_verbose
= 1;
13506 if ((dof
= dtrace_dof_property(c
)) == NULL
) {
13507 dtrace_err_verbose
= 0;
13512 * We want to create anonymous state, so we need to transition
13513 * the kernel debugger to indicate that DTrace is active. If
13514 * this fails (e.g. because the debugger has modified text in
13515 * some way), we won't continue with the processing.
13517 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
13518 cmn_err(CE_NOTE
, "kernel debugger active; anonymous "
13519 "enabling ignored.");
13520 dtrace_dof_destroy(dof
);
13525 * If we haven't allocated an anonymous state, we'll do so now.
13527 if ((state
= dtrace_anon
.dta_state
) == NULL
) {
13528 rv
= dtrace_state_create(NULL
, NULL
, &state
);
13529 dtrace_anon
.dta_state
= state
;
13530 if (rv
!= 0 || state
== NULL
) {
13532 * This basically shouldn't happen: the only
13533 * failure mode from dtrace_state_create() is a
13534 * failure of ddi_soft_state_zalloc() that
13535 * itself should never happen. Still, the
13536 * interface allows for a failure mode, and
13537 * we want to fail as gracefully as possible:
13538 * we'll emit an error message and cease
13539 * processing anonymous state in this case.
13541 cmn_err(CE_WARN
, "failed to create "
13542 "anonymous state");
13543 dtrace_dof_destroy(dof
);
13548 rv
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(),
13549 &dtrace_anon
.dta_enabling
, 0, B_TRUE
);
13552 rv
= dtrace_dof_options(dof
, state
);
13554 dtrace_err_verbose
= 0;
13555 dtrace_dof_destroy(dof
);
13559 * This is malformed DOF; chuck any anonymous state
13562 ASSERT(dtrace_anon
.dta_enabling
== NULL
);
13563 dtrace_state_destroy(state
);
13564 dtrace_anon
.dta_state
= NULL
;
13568 ASSERT(dtrace_anon
.dta_enabling
!= NULL
);
13571 if (dtrace_anon
.dta_enabling
!= NULL
) {
13575 * dtrace_enabling_retain() can only fail because we are
13576 * trying to retain more enablings than are allowed -- but
13577 * we only have one anonymous enabling, and we are guaranteed
13578 * to be allowed at least one retained enabling; we assert
13579 * that dtrace_enabling_retain() returns success.
13581 rval
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
);
13584 dtrace_enabling_dump(dtrace_anon
.dta_enabling
);
13589 * DTrace Helper Functions
13592 dtrace_helper_trace(dtrace_helper_action_t
*helper
,
13593 dtrace_mstate_t
*mstate
, dtrace_vstate_t
*vstate
, int where
)
13595 uint32_t size
, next
, nnext
;
13597 dtrace_helptrace_t
*ent
;
13598 uint16_t flags
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13600 if (!dtrace_helptrace_enabled
)
13603 ASSERT((uint32_t)vstate
->dtvs_nlocals
<= dtrace_helptrace_nlocals
);
13606 * What would a tracing framework be without its own tracing
13607 * framework? (Well, a hell of a lot simpler, for starters...)
13609 size
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals
*
13610 sizeof (uint64_t) - sizeof (uint64_t);
13613 * Iterate until we can allocate a slot in the trace buffer.
13616 next
= dtrace_helptrace_next
;
13618 if (next
+ size
< dtrace_helptrace_bufsize
) {
13619 nnext
= next
+ size
;
13623 } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
);
13626 * We have our slot; fill it in.
13631 ent
= (dtrace_helptrace_t
*)&dtrace_helptrace_buffer
[next
];
13632 ent
->dtht_helper
= helper
;
13633 ent
->dtht_where
= where
;
13634 ent
->dtht_nlocals
= vstate
->dtvs_nlocals
;
13636 ent
->dtht_fltoffs
= (mstate
->dtms_present
& DTRACE_MSTATE_FLTOFFS
) ?
13637 mstate
->dtms_fltoffs
: -1;
13638 ent
->dtht_fault
= DTRACE_FLAGS2FLT(flags
);
13639 ent
->dtht_illval
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
;
13641 for (i
= 0; i
< vstate
->dtvs_nlocals
; i
++) {
13642 dtrace_statvar_t
*svar
;
13644 if ((svar
= vstate
->dtvs_locals
[i
]) == NULL
)
13647 ASSERT(svar
->dtsv_size
>= (int)NCPU
* sizeof (uint64_t));
13648 ent
->dtht_locals
[i
] =
13649 ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
];
13654 dtrace_helper(int which
, dtrace_mstate_t
*mstate
,
13655 dtrace_state_t
*state
, uint64_t arg0
, uint64_t arg1
)
13657 uint16_t *flags
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
;
13658 uint64_t sarg0
= mstate
->dtms_arg
[0];
13659 uint64_t sarg1
= mstate
->dtms_arg
[1];
13661 dtrace_helpers_t
*helpers
= curproc
->p_dtrace_helpers
;
13662 dtrace_helper_action_t
*helper
;
13663 dtrace_vstate_t
*vstate
;
13664 dtrace_difo_t
*pred
;
13665 int i
, trace
= dtrace_helptrace_enabled
;
13667 ASSERT(which
>= 0 && which
< DTRACE_NHELPER_ACTIONS
);
13669 if (helpers
== NULL
)
13672 if ((helper
= helpers
->dthps_actions
[which
]) == NULL
)
13675 vstate
= &helpers
->dthps_vstate
;
13676 mstate
->dtms_arg
[0] = arg0
;
13677 mstate
->dtms_arg
[1] = arg1
;
13680 * Now iterate over each helper. If its predicate evaluates to 'true',
13681 * we'll call the corresponding actions. Note that the below calls
13682 * to dtrace_dif_emulate() may set faults in machine state. This is
13683 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
13684 * the stored DIF offset with its own (which is the desired behavior).
13685 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
13686 * from machine state; this is okay, too.
13688 for (; helper
!= NULL
; helper
= helper
->dtha_next
) {
13689 if ((pred
= helper
->dtha_predicate
) != NULL
) {
13691 dtrace_helper_trace(helper
, mstate
, vstate
, 0);
13693 if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
))
13696 if (*flags
& CPU_DTRACE_FAULT
)
13700 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13702 dtrace_helper_trace(helper
,
13703 mstate
, vstate
, i
+ 1);
13705 rval
= dtrace_dif_emulate(helper
->dtha_actions
[i
],
13706 mstate
, vstate
, state
);
13708 if (*flags
& CPU_DTRACE_FAULT
)
13714 dtrace_helper_trace(helper
, mstate
, vstate
,
13715 DTRACE_HELPTRACE_NEXT
);
13719 dtrace_helper_trace(helper
, mstate
, vstate
,
13720 DTRACE_HELPTRACE_DONE
);
13723 * Restore the arg0 that we saved upon entry.
13725 mstate
->dtms_arg
[0] = sarg0
;
13726 mstate
->dtms_arg
[1] = sarg1
;
13732 dtrace_helper_trace(helper
, mstate
, vstate
,
13733 DTRACE_HELPTRACE_ERR
);
13736 * Restore the arg0 that we saved upon entry.
13738 mstate
->dtms_arg
[0] = sarg0
;
13739 mstate
->dtms_arg
[1] = sarg1
;
13745 dtrace_helper_action_destroy(dtrace_helper_action_t
*helper
,
13746 dtrace_vstate_t
*vstate
)
13750 if (helper
->dtha_predicate
!= NULL
)
13751 dtrace_difo_release(helper
->dtha_predicate
, vstate
);
13753 for (i
= 0; i
< helper
->dtha_nactions
; i
++) {
13754 ASSERT(helper
->dtha_actions
[i
] != NULL
);
13755 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
);
13758 kmem_free(helper
->dtha_actions
,
13759 helper
->dtha_nactions
* sizeof (dtrace_difo_t
*));
13760 kmem_free(helper
, sizeof (dtrace_helper_action_t
));
13764 dtrace_helper_destroygen(proc_t
* p
, int gen
)
13766 dtrace_helpers_t
*help
= p
->p_dtrace_helpers
;
13767 dtrace_vstate_t
*vstate
;
13770 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
13772 if (help
== NULL
|| gen
> help
->dthps_generation
)
13775 vstate
= &help
->dthps_vstate
;
13777 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
13778 dtrace_helper_action_t
*last
= NULL
, *h
, *next
;
13780 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
13781 next
= h
->dtha_next
;
13783 if (h
->dtha_generation
== gen
) {
13784 if (last
!= NULL
) {
13785 last
->dtha_next
= next
;
13787 help
->dthps_actions
[i
] = next
;
13790 dtrace_helper_action_destroy(h
, vstate
);
13798 * Interate until we've cleared out all helper providers with the
13799 * given generation number.
13802 dtrace_helper_provider_t
*prov
= NULL
;
13805 * Look for a helper provider with the right generation. We
13806 * have to start back at the beginning of the list each time
13807 * because we drop dtrace_lock. It's unlikely that we'll make
13808 * more than two passes.
13810 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13811 prov
= help
->dthps_provs
[i
];
13813 if (prov
->dthp_generation
== gen
)
13818 * If there were no matches, we're done.
13820 if (i
== help
->dthps_nprovs
)
13824 * Move the last helper provider into this slot.
13826 help
->dthps_nprovs
--;
13827 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
];
13828 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
;
13830 lck_mtx_unlock(&dtrace_lock
);
13833 * If we have a meta provider, remove this helper provider.
13835 lck_mtx_lock(&dtrace_meta_lock
);
13836 if (dtrace_meta_pid
!= NULL
) {
13837 ASSERT(dtrace_deferred_pid
== NULL
);
13838 dtrace_helper_provider_remove(&prov
->dthp_prov
,
13841 lck_mtx_unlock(&dtrace_meta_lock
);
13843 dtrace_helper_provider_destroy(prov
);
13845 lck_mtx_lock(&dtrace_lock
);
13852 dtrace_helper_validate(dtrace_helper_action_t
*helper
)
13857 if ((dp
= helper
->dtha_predicate
) != NULL
)
13858 err
+= dtrace_difo_validate_helper(dp
);
13860 for (i
= 0; i
< helper
->dtha_nactions
; i
++)
13861 err
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]);
13867 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t
*ep
)
13869 dtrace_helpers_t
*help
;
13870 dtrace_helper_action_t
*helper
, *last
;
13871 dtrace_actdesc_t
*act
;
13872 dtrace_vstate_t
*vstate
;
13873 dtrace_predicate_t
*pred
;
13874 int count
= 0, nactions
= 0, i
;
13876 if (which
< 0 || which
>= DTRACE_NHELPER_ACTIONS
)
13879 help
= p
->p_dtrace_helpers
;
13880 last
= help
->dthps_actions
[which
];
13881 vstate
= &help
->dthps_vstate
;
13883 for (count
= 0; last
!= NULL
; last
= last
->dtha_next
) {
13885 if (last
->dtha_next
== NULL
)
13890 * If we already have dtrace_helper_actions_max helper actions for this
13891 * helper action type, we'll refuse to add a new one.
13893 if (count
>= dtrace_helper_actions_max
)
13896 helper
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
);
13897 helper
->dtha_generation
= help
->dthps_generation
;
13899 if ((pred
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) {
13900 ASSERT(pred
->dtp_difo
!= NULL
);
13901 dtrace_difo_hold(pred
->dtp_difo
);
13902 helper
->dtha_predicate
= pred
->dtp_difo
;
13905 for (act
= ep
->dted_action
; act
!= NULL
; act
= act
->dtad_next
) {
13906 if (act
->dtad_kind
!= DTRACEACT_DIFEXPR
)
13909 if (act
->dtad_difo
== NULL
)
13915 helper
->dtha_actions
= kmem_zalloc(sizeof (dtrace_difo_t
*) *
13916 (helper
->dtha_nactions
= nactions
), KM_SLEEP
);
13918 for (act
= ep
->dted_action
, i
= 0; act
!= NULL
; act
= act
->dtad_next
) {
13919 dtrace_difo_hold(act
->dtad_difo
);
13920 helper
->dtha_actions
[i
++] = act
->dtad_difo
;
13923 if (!dtrace_helper_validate(helper
))
13926 if (last
== NULL
) {
13927 help
->dthps_actions
[which
] = helper
;
13929 last
->dtha_next
= helper
;
13932 if ((uint32_t)vstate
->dtvs_nlocals
> dtrace_helptrace_nlocals
) {
13933 dtrace_helptrace_nlocals
= vstate
->dtvs_nlocals
;
13934 dtrace_helptrace_next
= 0;
13939 dtrace_helper_action_destroy(helper
, vstate
);
13944 dtrace_helper_provider_register(proc_t
*p
, dtrace_helpers_t
*help
,
13945 dof_helper_t
*dofhp
)
13947 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
13949 lck_mtx_lock(&dtrace_meta_lock
);
13950 lck_mtx_lock(&dtrace_lock
);
13952 if (!dtrace_attached() || dtrace_meta_pid
== NULL
) {
13954 * If the dtrace module is loaded but not attached, or if
13955 * there aren't isn't a meta provider registered to deal with
13956 * these provider descriptions, we need to postpone creating
13957 * the actual providers until later.
13960 if (help
->dthps_next
== NULL
&& help
->dthps_prev
== NULL
&&
13961 dtrace_deferred_pid
!= help
) {
13962 help
->dthps_deferred
= 1;
13963 help
->dthps_pid
= p
->p_pid
;
13964 help
->dthps_next
= dtrace_deferred_pid
;
13965 help
->dthps_prev
= NULL
;
13966 if (dtrace_deferred_pid
!= NULL
)
13967 dtrace_deferred_pid
->dthps_prev
= help
;
13968 dtrace_deferred_pid
= help
;
13971 lck_mtx_unlock(&dtrace_lock
);
13973 } else if (dofhp
!= NULL
) {
13975 * If the dtrace module is loaded and we have a particular
13976 * helper provider description, pass that off to the
13980 lck_mtx_unlock(&dtrace_lock
);
13982 dtrace_helper_provide(dofhp
, p
->p_pid
);
13986 * Otherwise, just pass all the helper provider descriptions
13987 * off to the meta provider.
13991 lck_mtx_unlock(&dtrace_lock
);
13993 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
13994 dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
,
13999 lck_mtx_unlock(&dtrace_meta_lock
);
14003 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t
*dofhp
, int gen
)
14005 dtrace_helpers_t
*help
;
14006 dtrace_helper_provider_t
*hprov
, **tmp_provs
;
14007 uint_t tmp_maxprovs
, i
;
14009 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14010 help
= p
->p_dtrace_helpers
;
14011 ASSERT(help
!= NULL
);
14014 * If we already have dtrace_helper_providers_max helper providers,
14015 * we're refuse to add a new one.
14017 if (help
->dthps_nprovs
>= dtrace_helper_providers_max
)
14021 * Check to make sure this isn't a duplicate.
14023 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14024 if (dofhp
->dofhp_addr
==
14025 help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
)
14029 hprov
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
);
14030 hprov
->dthp_prov
= *dofhp
;
14031 hprov
->dthp_ref
= 1;
14032 hprov
->dthp_generation
= gen
;
14035 * Allocate a bigger table for helper providers if it's already full.
14037 if (help
->dthps_maxprovs
== help
->dthps_nprovs
) {
14038 tmp_maxprovs
= help
->dthps_maxprovs
;
14039 tmp_provs
= help
->dthps_provs
;
14041 if (help
->dthps_maxprovs
== 0)
14042 help
->dthps_maxprovs
= 2;
14044 help
->dthps_maxprovs
*= 2;
14045 if (help
->dthps_maxprovs
> dtrace_helper_providers_max
)
14046 help
->dthps_maxprovs
= dtrace_helper_providers_max
;
14048 ASSERT(tmp_maxprovs
< help
->dthps_maxprovs
);
14050 help
->dthps_provs
= kmem_zalloc(help
->dthps_maxprovs
*
14051 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
14053 if (tmp_provs
!= NULL
) {
14054 bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs
*
14055 sizeof (dtrace_helper_provider_t
*));
14056 kmem_free(tmp_provs
, tmp_maxprovs
*
14057 sizeof (dtrace_helper_provider_t
*));
14061 help
->dthps_provs
[help
->dthps_nprovs
] = hprov
;
14062 help
->dthps_nprovs
++;
14068 dtrace_helper_provider_destroy(dtrace_helper_provider_t
*hprov
)
14070 lck_mtx_lock(&dtrace_lock
);
14072 if (--hprov
->dthp_ref
== 0) {
14074 lck_mtx_unlock(&dtrace_lock
);
14075 dof
= (dof_hdr_t
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
;
14076 dtrace_dof_destroy(dof
);
14077 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
));
14079 lck_mtx_unlock(&dtrace_lock
);
14084 dtrace_helper_provider_validate(dof_hdr_t
*dof
, dof_sec_t
*sec
)
14086 uintptr_t daddr
= (uintptr_t)dof
;
14087 dof_sec_t
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
;
14088 dof_provider_t
*provider
;
14089 dof_probe_t
*probe
;
14091 char *strtab
, *typestr
;
14092 dof_stridx_t typeidx
;
14094 uint_t nprobes
, j
, k
;
14096 ASSERT(sec
->dofs_type
== DOF_SECT_PROVIDER
);
14098 if (sec
->dofs_offset
& (sizeof (uint_t
) - 1)) {
14099 dtrace_dof_error(dof
, "misaligned section offset");
14104 * The section needs to be large enough to contain the DOF provider
14105 * structure appropriate for the given version.
14107 if (sec
->dofs_size
<
14108 ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ?
14109 offsetof(dof_provider_t
, dofpv_prenoffs
) :
14110 sizeof (dof_provider_t
))) {
14111 dtrace_dof_error(dof
, "provider section too small");
14115 provider
= (dof_provider_t
*)(uintptr_t)(daddr
+ sec
->dofs_offset
);
14116 str_sec
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
);
14117 prb_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
);
14118 arg_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
);
14119 off_sec
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
);
14121 if (str_sec
== NULL
|| prb_sec
== NULL
||
14122 arg_sec
== NULL
|| off_sec
== NULL
)
14127 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
&&
14128 provider
->dofpv_prenoffs
!= DOF_SECT_NONE
&&
14129 (enoff_sec
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
,
14130 provider
->dofpv_prenoffs
)) == NULL
)
14133 strtab
= (char *)(uintptr_t)(daddr
+ str_sec
->dofs_offset
);
14135 if (provider
->dofpv_name
>= str_sec
->dofs_size
||
14136 strlen(strtab
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) {
14137 dtrace_dof_error(dof
, "invalid provider name");
14141 if (prb_sec
->dofs_entsize
== 0 ||
14142 prb_sec
->dofs_entsize
> prb_sec
->dofs_size
) {
14143 dtrace_dof_error(dof
, "invalid entry size");
14147 if (prb_sec
->dofs_entsize
& (sizeof (uintptr_t) - 1)) {
14148 dtrace_dof_error(dof
, "misaligned entry size");
14152 if (off_sec
->dofs_entsize
!= sizeof (uint32_t)) {
14153 dtrace_dof_error(dof
, "invalid entry size");
14157 if (off_sec
->dofs_offset
& (sizeof (uint32_t) - 1)) {
14158 dtrace_dof_error(dof
, "misaligned section offset");
14162 if (arg_sec
->dofs_entsize
!= sizeof (uint8_t)) {
14163 dtrace_dof_error(dof
, "invalid entry size");
14167 arg
= (uint8_t *)(uintptr_t)(daddr
+ arg_sec
->dofs_offset
);
14169 nprobes
= prb_sec
->dofs_size
/ prb_sec
->dofs_entsize
;
14172 * Take a pass through the probes to check for errors.
14174 for (j
= 0; j
< nprobes
; j
++) {
14175 probe
= (dof_probe_t
*)(uintptr_t)(daddr
+
14176 prb_sec
->dofs_offset
+ j
* prb_sec
->dofs_entsize
);
14178 if (probe
->dofpr_func
>= str_sec
->dofs_size
) {
14179 dtrace_dof_error(dof
, "invalid function name");
14183 if (strlen(strtab
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) {
14184 dtrace_dof_error(dof
, "function name too long");
14188 if (probe
->dofpr_name
>= str_sec
->dofs_size
||
14189 strlen(strtab
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) {
14190 dtrace_dof_error(dof
, "invalid probe name");
14195 * The offset count must not wrap the index, and the offsets
14196 * must also not overflow the section's data.
14198 if (probe
->dofpr_offidx
+ probe
->dofpr_noffs
<
14199 probe
->dofpr_offidx
||
14200 (probe
->dofpr_offidx
+ probe
->dofpr_noffs
) *
14201 off_sec
->dofs_entsize
> off_sec
->dofs_size
) {
14202 dtrace_dof_error(dof
, "invalid probe offset");
14206 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) {
14208 * If there's no is-enabled offset section, make sure
14209 * there aren't any is-enabled offsets. Otherwise
14210 * perform the same checks as for probe offsets
14211 * (immediately above).
14213 if (enoff_sec
== NULL
) {
14214 if (probe
->dofpr_enoffidx
!= 0 ||
14215 probe
->dofpr_nenoffs
!= 0) {
14216 dtrace_dof_error(dof
, "is-enabled "
14217 "offsets with null section");
14220 } else if (probe
->dofpr_enoffidx
+
14221 probe
->dofpr_nenoffs
< probe
->dofpr_enoffidx
||
14222 (probe
->dofpr_enoffidx
+ probe
->dofpr_nenoffs
) *
14223 enoff_sec
->dofs_entsize
> enoff_sec
->dofs_size
) {
14224 dtrace_dof_error(dof
, "invalid is-enabled "
14229 if (probe
->dofpr_noffs
+ probe
->dofpr_nenoffs
== 0) {
14230 dtrace_dof_error(dof
, "zero probe and "
14231 "is-enabled offsets");
14234 } else if (probe
->dofpr_noffs
== 0) {
14235 dtrace_dof_error(dof
, "zero probe offsets");
14239 if (probe
->dofpr_argidx
+ probe
->dofpr_xargc
<
14240 probe
->dofpr_argidx
||
14241 (probe
->dofpr_argidx
+ probe
->dofpr_xargc
) *
14242 arg_sec
->dofs_entsize
> arg_sec
->dofs_size
) {
14243 dtrace_dof_error(dof
, "invalid args");
14247 typeidx
= probe
->dofpr_nargv
;
14248 typestr
= strtab
+ probe
->dofpr_nargv
;
14249 for (k
= 0; k
< probe
->dofpr_nargc
; k
++) {
14250 if (typeidx
>= str_sec
->dofs_size
) {
14251 dtrace_dof_error(dof
, "bad "
14252 "native argument type");
14256 typesz
= strlen(typestr
) + 1;
14257 if (typesz
> DTRACE_ARGTYPELEN
) {
14258 dtrace_dof_error(dof
, "native "
14259 "argument type too long");
14266 typeidx
= probe
->dofpr_xargv
;
14267 typestr
= strtab
+ probe
->dofpr_xargv
;
14268 for (k
= 0; k
< probe
->dofpr_xargc
; k
++) {
14269 if (arg
[probe
->dofpr_argidx
+ k
] > probe
->dofpr_nargc
) {
14270 dtrace_dof_error(dof
, "bad "
14271 "native argument index");
14275 if (typeidx
>= str_sec
->dofs_size
) {
14276 dtrace_dof_error(dof
, "bad "
14277 "translated argument type");
14281 typesz
= strlen(typestr
) + 1;
14282 if (typesz
> DTRACE_ARGTYPELEN
) {
14283 dtrace_dof_error(dof
, "translated argument "
14297 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t
*dof
, dof_helper_t
*dhp
)
14299 dtrace_helpers_t
*help
;
14300 dtrace_vstate_t
*vstate
;
14301 dtrace_enabling_t
*enab
= NULL
;
14302 int i
, gen
, rv
, nhelpers
= 0, nprovs
= 0, destroy
= 1;
14303 uintptr_t daddr
= (uintptr_t)dof
;
14305 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14307 if ((help
= p
->p_dtrace_helpers
) == NULL
)
14308 help
= dtrace_helpers_create(p
);
14310 vstate
= &help
->dthps_vstate
;
14312 if ((rv
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
,
14313 dhp
!= NULL
? dhp
->dofhp_addr
: 0, B_FALSE
)) != 0) {
14314 dtrace_dof_destroy(dof
);
14319 * Look for helper providers and validate their descriptions.
14322 for (i
= 0; (uint32_t)i
< dof
->dofh_secnum
; i
++) {
14323 dof_sec_t
*sec
= (dof_sec_t
*)(uintptr_t)(daddr
+
14324 dof
->dofh_secoff
+ i
* dof
->dofh_secsize
);
14326 if (sec
->dofs_type
!= DOF_SECT_PROVIDER
)
14329 if (dtrace_helper_provider_validate(dof
, sec
) != 0) {
14330 dtrace_enabling_destroy(enab
);
14331 dtrace_dof_destroy(dof
);
14340 * Now we need to walk through the ECB descriptions in the enabling.
14342 for (i
= 0; i
< enab
->dten_ndesc
; i
++) {
14343 dtrace_ecbdesc_t
*ep
= enab
->dten_desc
[i
];
14344 dtrace_probedesc_t
*desc
= &ep
->dted_probe
;
14346 /* APPLE NOTE: Darwin employs size bounded string operation. */
14347 if (!LIT_STRNEQL(desc
->dtpd_provider
, "dtrace"))
14350 if (!LIT_STRNEQL(desc
->dtpd_mod
, "helper"))
14353 if (!LIT_STRNEQL(desc
->dtpd_func
, "ustack"))
14356 if ((rv
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
,
14359 * Adding this helper action failed -- we are now going
14360 * to rip out the entire generation and return failure.
14362 (void) dtrace_helper_destroygen(p
, help
->dthps_generation
);
14363 dtrace_enabling_destroy(enab
);
14364 dtrace_dof_destroy(dof
);
14371 if (nhelpers
< enab
->dten_ndesc
)
14372 dtrace_dof_error(dof
, "unmatched helpers");
14374 gen
= help
->dthps_generation
++;
14375 dtrace_enabling_destroy(enab
);
14377 if (dhp
!= NULL
&& nprovs
> 0) {
14378 dhp
->dofhp_dof
= (uint64_t)(uintptr_t)dof
;
14379 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) {
14380 lck_mtx_unlock(&dtrace_lock
);
14381 dtrace_helper_provider_register(p
, help
, dhp
);
14382 lck_mtx_lock(&dtrace_lock
);
14389 dtrace_dof_destroy(dof
);
14395 * APPLE NOTE: DTrace lazy dof implementation
14397 * DTrace user static probes (USDT probes) and helper actions are loaded
14398 * in a process by proccessing dof sections. The dof sections are passed
14399 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
14400 * expensive to process dof for a process that will never use it. There
14401 * is a memory cost (allocating the providers/probes), and a cpu cost
14402 * (creating the providers/probes).
14404 * To reduce this cost, we use "lazy dof". The normal proceedure for
14405 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
14406 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
14407 * used, each process retains the dof_ioctl_data_t block, instead of
14408 * copying in the data it points to.
14410 * The dof_ioctl_data_t blocks are managed as if they were the actual
14411 * processed dof; on fork the block is copied to the child, on exec and
14412 * exit the block is freed.
14414 * If the process loads library(s) containing additional dof, the
14415 * new dof_ioctl_data_t is merged with the existing block.
14417 * There are a few catches that make this slightly more difficult.
14418 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
14419 * identifier value for each dof in the block. In non-lazy dof terms,
14420 * this is the generation that dof was loaded in. If we hand back
14421 * a UID for a lazy dof, that same UID must be able to unload the
14422 * dof once it has become non-lazy. To meet this requirement, the
14423 * code that loads lazy dof requires that the UID's for dof(s) in
14424 * the lazy dof be sorted, and in ascending order. It is okay to skip
14425 * UID's, I.E., 1 -> 5 -> 6 is legal.
14427 * Once a process has become non-lazy, it will stay non-lazy. All
14428 * future dof operations for that process will be non-lazy, even
14429 * if the dof mode transitions back to lazy.
14431 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
14432 * That way if the lazy check fails due to transitioning to non-lazy, the
14433 * right thing is done with the newly faulted in dof.
14437 * This method is a bit squicky. It must handle:
14439 * dof should not be lazy.
14440 * dof should have been handled lazily, but there was an error
14441 * dof was handled lazily, and needs to be freed.
14442 * dof was handled lazily, and must not be freed.
14445 * Returns EACCESS if dof should be handled non-lazily.
14447 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
14449 * If the dofs data is claimed by this method, dofs_claimed will be set.
14450 * Callers should not free claimed dofs.
14453 dtrace_lazy_dofs_add(proc_t
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
)
14456 ASSERT(incoming_dofs
&& incoming_dofs
->dofiod_count
> 0);
14461 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14464 * If we have lazy dof, dof mode better be LAZY_ON.
14466 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14467 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14468 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
14471 * Any existing helpers force non-lazy behavior.
14473 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
14474 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14476 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
14477 unsigned int existing_dofs_count
= (existing_dofs
) ? existing_dofs
->dofiod_count
: 0;
14478 unsigned int i
, merged_dofs_count
= incoming_dofs
->dofiod_count
+ existing_dofs_count
;
14483 if (merged_dofs_count
== 0 || merged_dofs_count
> 1024) {
14484 dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range");
14490 * Each dof being added must be assigned a unique generation.
14492 uint64_t generation
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count
- 1].dofhp_dof
+ 1 : 1;
14493 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) {
14495 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
14497 ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
);
14498 incoming_dofs
->dofiod_helpers
[i
].dofhp_dof
= generation
++;
14502 if (existing_dofs
) {
14504 * Merge the existing and incoming dofs
14506 size_t merged_dofs_size
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
);
14507 dof_ioctl_data_t
* merged_dofs
= kmem_alloc(merged_dofs_size
, KM_SLEEP
);
14509 bcopy(&existing_dofs
->dofiod_helpers
[0],
14510 &merged_dofs
->dofiod_helpers
[0],
14511 sizeof(dof_helper_t
) * existing_dofs_count
);
14512 bcopy(&incoming_dofs
->dofiod_helpers
[0],
14513 &merged_dofs
->dofiod_helpers
[existing_dofs_count
],
14514 sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
);
14516 merged_dofs
->dofiod_count
= merged_dofs_count
;
14518 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14520 p
->p_dtrace_lazy_dofs
= merged_dofs
;
14523 * Claim the incoming dofs
14526 p
->p_dtrace_lazy_dofs
= incoming_dofs
;
14530 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14531 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14532 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14537 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14542 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14550 * EINVAL: lazy dof is enabled, but the requested generation was not found.
14551 * EACCES: This removal needs to be handled non-lazily.
14554 dtrace_lazy_dofs_remove(proc_t
*p
, int generation
)
14558 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14561 * If we have lazy dof, dof mode better be LAZY_ON.
14563 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14564 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14565 ASSERT(dtrace_dof_mode
!= DTRACE_DOF_MODE_NEVER
);
14568 * Any existing helpers force non-lazy behavior.
14570 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
&& (p
->p_dtrace_helpers
== NULL
)) {
14571 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14573 dof_ioctl_data_t
* existing_dofs
= p
->p_dtrace_lazy_dofs
;
14575 if (existing_dofs
) {
14576 int index
, existing_dofs_count
= existing_dofs
->dofiod_count
;
14577 for (index
=0; index
<existing_dofs_count
; index
++) {
14578 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof
== generation
) {
14579 dof_ioctl_data_t
* removed_dofs
= NULL
;
14582 * If there is only 1 dof, we'll delete it and swap in NULL.
14584 if (existing_dofs_count
> 1) {
14585 int removed_dofs_count
= existing_dofs_count
- 1;
14586 size_t removed_dofs_size
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
);
14588 removed_dofs
= kmem_alloc(removed_dofs_size
, KM_SLEEP
);
14589 removed_dofs
->dofiod_count
= removed_dofs_count
;
14592 * copy the remaining data.
14595 bcopy(&existing_dofs
->dofiod_helpers
[0],
14596 &removed_dofs
->dofiod_helpers
[0],
14597 index
* sizeof(dof_helper_t
));
14600 if (index
< existing_dofs_count
-1) {
14601 bcopy(&existing_dofs
->dofiod_helpers
[index
+1],
14602 &removed_dofs
->dofiod_helpers
[index
],
14603 (existing_dofs_count
- index
- 1) * sizeof(dof_helper_t
));
14607 kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
));
14609 p
->p_dtrace_lazy_dofs
= removed_dofs
;
14611 rval
= KERN_SUCCESS
;
14618 dof_ioctl_data_t
* all_dofs
= p
->p_dtrace_lazy_dofs
;
14621 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) {
14622 ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14629 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14634 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14640 dtrace_lazy_dofs_destroy(proc_t
*p
)
14642 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14643 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14646 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14647 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14648 * kern_exit.c and kern_exec.c.
14650 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
|| p
->p_lflag
& P_LEXIT
);
14651 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14653 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14654 p
->p_dtrace_lazy_dofs
= NULL
;
14656 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14657 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14660 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14665 dtrace_lazy_dofs_duplicate(proc_t
*parent
, proc_t
*child
)
14667 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
);
14668 lck_mtx_assert(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14669 lck_mtx_assert(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
);
14671 lck_rw_lock_shared(&dtrace_dof_mode_lock
);
14672 lck_mtx_lock(&parent
->p_dtrace_sprlock
);
14675 * If we have lazy dof, dof mode better be LAZY_ON, or we must be exiting.
14676 * We cannot assert against DTRACE_DOF_MODE_NEVER here, because we are called from
14679 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
);
14680 ASSERT(parent
->p_dtrace_lazy_dofs
== NULL
|| parent
->p_dtrace_helpers
== NULL
);
14682 * In theory we should hold the child sprlock, but this is safe...
14684 ASSERT(child
->p_dtrace_lazy_dofs
== NULL
&& child
->p_dtrace_helpers
== NULL
);
14686 dof_ioctl_data_t
* parent_dofs
= parent
->p_dtrace_lazy_dofs
;
14687 dof_ioctl_data_t
* child_dofs
= NULL
;
14689 size_t parent_dofs_size
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
);
14690 child_dofs
= kmem_alloc(parent_dofs_size
, KM_SLEEP
);
14691 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
);
14694 lck_mtx_unlock(&parent
->p_dtrace_sprlock
);
14697 lck_mtx_lock(&child
->p_dtrace_sprlock
);
14698 child
->p_dtrace_lazy_dofs
= child_dofs
;
14699 lck_mtx_unlock(&child
->p_dtrace_sprlock
);
14702 lck_rw_unlock_shared(&dtrace_dof_mode_lock
);
14706 dtrace_lazy_dofs_proc_iterate_filter(proc_t
*p
, void* ignored
)
14708 #pragma unused(ignored)
14710 * Okay to NULL test without taking the sprlock.
14712 return p
->p_dtrace_lazy_dofs
!= NULL
;
14716 dtrace_lazy_dofs_proc_iterate_doit(proc_t
*p
, void* ignored
)
14718 #pragma unused(ignored)
14720 * It is possible this process may exit during our attempt to
14721 * fault in the dof. We could fix this by holding locks longer,
14722 * but the errors are benign.
14724 lck_mtx_lock(&p
->p_dtrace_sprlock
);
14727 * In this case only, it is okay to have lazy dof when dof mode is DTRACE_DOF_MODE_LAZY_OFF
14729 ASSERT(p
->p_dtrace_lazy_dofs
== NULL
|| p
->p_dtrace_helpers
== NULL
);
14730 ASSERT(dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
);
14733 dof_ioctl_data_t
* lazy_dofs
= p
->p_dtrace_lazy_dofs
;
14734 p
->p_dtrace_lazy_dofs
= NULL
;
14736 lck_mtx_unlock(&p
->p_dtrace_sprlock
);
14739 * Process each dof_helper_t
14741 if (lazy_dofs
!= NULL
) {
14745 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) {
14747 * When loading lazy dof, we depend on the generations being sorted in ascending order.
14749 ASSERT(i
>= (lazy_dofs
->dofiod_count
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
);
14751 dof_helper_t
*dhp
= &lazy_dofs
->dofiod_helpers
[i
];
14754 * We stored the generation in dofhp_dof. Save it, and restore the original value.
14756 int generation
= dhp
->dofhp_dof
;
14757 dhp
->dofhp_dof
= dhp
->dofhp_addr
;
14759 dof_hdr_t
*dof
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
);
14762 dtrace_helpers_t
*help
;
14764 lck_mtx_lock(&dtrace_lock
);
14767 * This must be done with the dtrace_lock held
14769 if ((help
= p
->p_dtrace_helpers
) == NULL
)
14770 help
= dtrace_helpers_create(p
);
14773 * If the generation value has been bumped, someone snuck in
14774 * when we released the dtrace lock. We have to dump this generation,
14775 * there is no safe way to load it.
14777 if (help
->dthps_generation
<= generation
) {
14778 help
->dthps_generation
= generation
;
14781 * dtrace_helper_slurp() takes responsibility for the dof --
14782 * it may free it now or it may save it and free it later.
14784 if ((rval
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) {
14785 dtrace_dof_error(NULL
, "returned value did not match expected generation");
14789 lck_mtx_unlock(&dtrace_lock
);
14793 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
));
14796 return PROC_RETURNED
;
14799 static dtrace_helpers_t
*
14800 dtrace_helpers_create(proc_t
*p
)
14802 dtrace_helpers_t
*help
;
14804 lck_mtx_assert(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
);
14805 ASSERT(p
->p_dtrace_helpers
== NULL
);
14807 help
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
);
14808 help
->dthps_actions
= kmem_zalloc(sizeof (dtrace_helper_action_t
*) *
14809 DTRACE_NHELPER_ACTIONS
, KM_SLEEP
);
14811 p
->p_dtrace_helpers
= help
;
14818 dtrace_helpers_destroy(proc_t
* p
)
14820 dtrace_helpers_t
*help
;
14821 dtrace_vstate_t
*vstate
;
14824 lck_mtx_lock(&dtrace_lock
);
14826 ASSERT(p
->p_dtrace_helpers
!= NULL
);
14827 ASSERT(dtrace_helpers
> 0);
14829 help
= p
->p_dtrace_helpers
;
14830 vstate
= &help
->dthps_vstate
;
14833 * We're now going to lose the help from this process.
14835 p
->p_dtrace_helpers
= NULL
;
14839 * Destory the helper actions.
14841 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14842 dtrace_helper_action_t
*h
, *next
;
14844 for (h
= help
->dthps_actions
[i
]; h
!= NULL
; h
= next
) {
14845 next
= h
->dtha_next
;
14846 dtrace_helper_action_destroy(h
, vstate
);
14851 lck_mtx_unlock(&dtrace_lock
);
14854 * Destroy the helper providers.
14856 if (help
->dthps_maxprovs
> 0) {
14857 lck_mtx_lock(&dtrace_meta_lock
);
14858 if (dtrace_meta_pid
!= NULL
) {
14859 ASSERT(dtrace_deferred_pid
== NULL
);
14861 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14862 dtrace_helper_provider_remove(
14863 &help
->dthps_provs
[i
]->dthp_prov
, p
->p_pid
);
14866 lck_mtx_lock(&dtrace_lock
);
14867 ASSERT(help
->dthps_deferred
== 0 ||
14868 help
->dthps_next
!= NULL
||
14869 help
->dthps_prev
!= NULL
||
14870 help
== dtrace_deferred_pid
);
14873 * Remove the helper from the deferred list.
14875 if (help
->dthps_next
!= NULL
)
14876 help
->dthps_next
->dthps_prev
= help
->dthps_prev
;
14877 if (help
->dthps_prev
!= NULL
)
14878 help
->dthps_prev
->dthps_next
= help
->dthps_next
;
14879 if (dtrace_deferred_pid
== help
) {
14880 dtrace_deferred_pid
= help
->dthps_next
;
14881 ASSERT(help
->dthps_prev
== NULL
);
14884 lck_mtx_unlock(&dtrace_lock
);
14887 lck_mtx_unlock(&dtrace_meta_lock
);
14889 for (i
= 0; i
< help
->dthps_nprovs
; i
++) {
14890 dtrace_helper_provider_destroy(help
->dthps_provs
[i
]);
14893 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs
*
14894 sizeof (dtrace_helper_provider_t
*));
14897 lck_mtx_lock(&dtrace_lock
);
14899 dtrace_vstate_fini(&help
->dthps_vstate
);
14900 kmem_free(help
->dthps_actions
,
14901 sizeof (dtrace_helper_action_t
*) * DTRACE_NHELPER_ACTIONS
);
14902 kmem_free(help
, sizeof (dtrace_helpers_t
));
14905 lck_mtx_unlock(&dtrace_lock
);
14909 dtrace_helpers_duplicate(proc_t
*from
, proc_t
*to
)
14911 dtrace_helpers_t
*help
, *newhelp
;
14912 dtrace_helper_action_t
*helper
, *new, *last
;
14914 dtrace_vstate_t
*vstate
;
14916 int j
, sz
, hasprovs
= 0;
14918 lck_mtx_lock(&dtrace_lock
);
14919 ASSERT(from
->p_dtrace_helpers
!= NULL
);
14920 ASSERT(dtrace_helpers
> 0);
14922 help
= from
->p_dtrace_helpers
;
14923 newhelp
= dtrace_helpers_create(to
);
14924 ASSERT(to
->p_dtrace_helpers
!= NULL
);
14926 newhelp
->dthps_generation
= help
->dthps_generation
;
14927 vstate
= &newhelp
->dthps_vstate
;
14930 * Duplicate the helper actions.
14932 for (i
= 0; i
< DTRACE_NHELPER_ACTIONS
; i
++) {
14933 if ((helper
= help
->dthps_actions
[i
]) == NULL
)
14936 for (last
= NULL
; helper
!= NULL
; helper
= helper
->dtha_next
) {
14937 new = kmem_zalloc(sizeof (dtrace_helper_action_t
),
14939 new->dtha_generation
= helper
->dtha_generation
;
14941 if ((dp
= helper
->dtha_predicate
) != NULL
) {
14942 dp
= dtrace_difo_duplicate(dp
, vstate
);
14943 new->dtha_predicate
= dp
;
14946 new->dtha_nactions
= helper
->dtha_nactions
;
14947 sz
= sizeof (dtrace_difo_t
*) * new->dtha_nactions
;
14948 new->dtha_actions
= kmem_alloc(sz
, KM_SLEEP
);
14950 for (j
= 0; j
< new->dtha_nactions
; j
++) {
14951 dtrace_difo_t
*dpj
= helper
->dtha_actions
[j
];
14953 ASSERT(dpj
!= NULL
);
14954 dpj
= dtrace_difo_duplicate(dpj
, vstate
);
14955 new->dtha_actions
[j
] = dpj
;
14958 if (last
!= NULL
) {
14959 last
->dtha_next
= new;
14961 newhelp
->dthps_actions
[i
] = new;
14969 * Duplicate the helper providers and register them with the
14970 * DTrace framework.
14972 if (help
->dthps_nprovs
> 0) {
14973 newhelp
->dthps_nprovs
= help
->dthps_nprovs
;
14974 newhelp
->dthps_maxprovs
= help
->dthps_nprovs
;
14975 newhelp
->dthps_provs
= kmem_alloc(newhelp
->dthps_nprovs
*
14976 sizeof (dtrace_helper_provider_t
*), KM_SLEEP
);
14977 for (i
= 0; i
< newhelp
->dthps_nprovs
; i
++) {
14978 newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
];
14979 newhelp
->dthps_provs
[i
]->dthp_ref
++;
14985 lck_mtx_unlock(&dtrace_lock
);
14988 dtrace_helper_provider_register(to
, newhelp
, NULL
);
14992 * DTrace Hook Functions
14996 * APPLE NOTE: dtrace_modctl_* routines for kext support.
14997 * Used to manipulate the modctl list within dtrace xnu.
15000 modctl_t
*dtrace_modctl_list
;
15003 dtrace_modctl_add(struct modctl
* newctl
)
15005 struct modctl
*nextp
, *prevp
;
15007 ASSERT(newctl
!= NULL
);
15008 lck_mtx_assert(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
15010 // Insert new module at the front of the list,
15012 newctl
->mod_next
= dtrace_modctl_list
;
15013 dtrace_modctl_list
= newctl
;
15016 * If a module exists with the same name, then that module
15017 * must have been unloaded with enabled probes. We will move
15018 * the unloaded module to the new module's stale chain and
15019 * then stop traversing the list.
15023 nextp
= newctl
->mod_next
;
15025 while (nextp
!= NULL
) {
15026 if (nextp
->mod_loaded
) {
15027 /* This is a loaded module. Keep traversing. */
15029 nextp
= nextp
->mod_next
;
15033 /* Found an unloaded module */
15034 if (strncmp (newctl
->mod_modname
, nextp
->mod_modname
, KMOD_MAX_NAME
)) {
15035 /* Names don't match. Keep traversing. */
15037 nextp
= nextp
->mod_next
;
15041 /* We found a stale entry, move it. We're done. */
15042 prevp
->mod_next
= nextp
->mod_next
;
15043 newctl
->mod_stale
= nextp
;
15044 nextp
->mod_next
= NULL
;
15052 dtrace_modctl_lookup(struct kmod_info
* kmod
)
15054 lck_mtx_assert(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
15056 struct modctl
* ctl
;
15058 for (ctl
= dtrace_modctl_list
; ctl
; ctl
=ctl
->mod_next
) {
15059 if (ctl
->mod_id
== kmod
->id
)
15066 * This routine is called from dtrace_module_unloaded().
15067 * It removes a modctl structure and its stale chain
15068 * from the kext shadow list.
15071 dtrace_modctl_remove(struct modctl
* ctl
)
15073 ASSERT(ctl
!= NULL
);
15074 lck_mtx_assert(&mod_lock
, LCK_MTX_ASSERT_OWNED
);
15075 modctl_t
*prevp
, *nextp
, *curp
;
15077 // Remove stale chain first
15078 for (curp
=ctl
->mod_stale
; curp
!= NULL
; curp
=nextp
) {
15079 nextp
= curp
->mod_stale
;
15080 /* There should NEVER be user symbols allocated at this point */
15081 ASSERT(curp
->mod_user_symbols
== NULL
);
15082 kmem_free(curp
, sizeof(modctl_t
));
15086 curp
= dtrace_modctl_list
;
15088 while (curp
!= ctl
) {
15090 curp
= curp
->mod_next
;
15093 if (prevp
!= NULL
) {
15094 prevp
->mod_next
= ctl
->mod_next
;
15097 dtrace_modctl_list
= ctl
->mod_next
;
15100 /* There should NEVER be user symbols allocated at this point */
15101 ASSERT(ctl
->mod_user_symbols
== NULL
);
15103 kmem_free (ctl
, sizeof(modctl_t
));
15107 * APPLE NOTE: The kext loader will call dtrace_module_loaded
15108 * when the kext is loaded in memory, but before calling the
15109 * kext's start routine.
15111 * Return 0 on success
15112 * Return -1 on failure
15116 dtrace_module_loaded(struct kmod_info
*kmod
, uint32_t flag
)
15118 dtrace_provider_t
*prv
;
15121 * If kernel symbols have been disabled, return immediately
15122 * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks
15124 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_NEVER
)
15127 struct modctl
*ctl
= NULL
;
15128 if (!kmod
|| kmod
->address
== 0 || kmod
->size
== 0)
15131 lck_mtx_lock(&dtrace_provider_lock
);
15132 lck_mtx_lock(&mod_lock
);
15135 * Have we seen this kext before?
15138 ctl
= dtrace_modctl_lookup(kmod
);
15141 /* bail... we already have this kext in the modctl list */
15142 lck_mtx_unlock(&mod_lock
);
15143 lck_mtx_unlock(&dtrace_provider_lock
);
15144 if (dtrace_err_verbose
)
15145 cmn_err(CE_WARN
, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod
->name
, (uint_t
)kmod
->id
, ctl
->mod_modname
, ctl
->mod_id
);
15149 ctl
= kmem_alloc(sizeof(struct modctl
), KM_SLEEP
);
15151 if (dtrace_err_verbose
)
15152 cmn_err(CE_WARN
, "dtrace module load '%s %u' is failing ", kmod
->name
, (uint_t
)kmod
->id
);
15153 lck_mtx_unlock(&mod_lock
);
15154 lck_mtx_unlock(&dtrace_provider_lock
);
15157 ctl
->mod_next
= NULL
;
15158 ctl
->mod_stale
= NULL
;
15159 strlcpy (ctl
->mod_modname
, kmod
->name
, sizeof(ctl
->mod_modname
));
15160 ctl
->mod_loadcnt
= kmod
->id
;
15161 ctl
->mod_nenabled
= 0;
15162 ctl
->mod_address
= kmod
->address
;
15163 ctl
->mod_size
= kmod
->size
;
15164 ctl
->mod_id
= kmod
->id
;
15165 ctl
->mod_loaded
= 1;
15166 ctl
->mod_flags
= 0;
15167 ctl
->mod_user_symbols
= NULL
;
15170 * Find the UUID for this module, if it has one
15172 kernel_mach_header_t
* header
= (kernel_mach_header_t
*)ctl
->mod_address
;
15173 struct load_command
* load_cmd
= (struct load_command
*)&header
[1];
15175 for (i
= 0; i
< header
->ncmds
; i
++) {
15176 if (load_cmd
->cmd
== LC_UUID
) {
15177 struct uuid_command
* uuid_cmd
= (struct uuid_command
*)load_cmd
;
15178 memcpy(ctl
->mod_uuid
, uuid_cmd
->uuid
, sizeof(uuid_cmd
->uuid
));
15179 ctl
->mod_flags
|= MODCTL_HAS_UUID
;
15182 load_cmd
= (struct load_command
*)((caddr_t
)load_cmd
+ load_cmd
->cmdsize
);
15185 if (ctl
->mod_address
== g_kernel_kmod_info
.address
) {
15186 ctl
->mod_flags
|= MODCTL_IS_MACH_KERNEL
;
15189 dtrace_modctl_add(ctl
);
15192 * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s)
15194 lck_mtx_lock(&dtrace_lock
);
15197 * DTrace must decide if it will instrument modules lazily via
15198 * userspace symbols (default mode), or instrument immediately via
15199 * kernel symbols (non-default mode)
15201 * When in default/lazy mode, DTrace will only support modules
15202 * built with a valid UUID.
15204 * Overriding the default can be done explicitly in one of
15205 * the following two ways.
15207 * A module can force symbols from kernel space using the plist key,
15208 * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set,
15209 * we fall through and instrument this module now.
15211 * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols
15212 * from kernel space (see dtrace_impl.h). If this system state is set
15213 * to a non-userspace mode, we fall through and instrument the module now.
15216 if ((dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) &&
15217 (!(flag
& KMOD_DTRACE_FORCE_INIT
)))
15219 /* We will instrument the module lazily -- this is the default */
15220 lck_mtx_unlock(&dtrace_lock
);
15221 lck_mtx_unlock(&mod_lock
);
15222 lck_mtx_unlock(&dtrace_provider_lock
);
15226 /* We will instrument the module immediately using kernel symbols */
15227 ctl
->mod_flags
|= MODCTL_HAS_KERNEL_SYMBOLS
;
15229 lck_mtx_unlock(&dtrace_lock
);
15232 * We're going to call each providers per-module provide operation
15233 * specifying only this module.
15235 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
15236 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
15239 * APPLE NOTE: The contract with the kext loader is that once this function
15240 * has completed, it may delete kernel symbols at will.
15241 * We must set this while still holding the mod_lock.
15243 ctl
->mod_flags
&= ~MODCTL_HAS_KERNEL_SYMBOLS
;
15245 lck_mtx_unlock(&mod_lock
);
15246 lck_mtx_unlock(&dtrace_provider_lock
);
15249 * If we have any retained enablings, we need to match against them.
15250 * Enabling probes requires that cpu_lock be held, and we cannot hold
15251 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
15252 * module. (In particular, this happens when loading scheduling
15253 * classes.) So if we have any retained enablings, we need to dispatch
15254 * our task queue to do the match for us.
15256 lck_mtx_lock(&dtrace_lock
);
15258 if (dtrace_retained
== NULL
) {
15259 lck_mtx_unlock(&dtrace_lock
);
15265 * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually
15266 * holds it for any reason. Thus the comment above is invalid, we can directly invoke
15267 * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid
15268 * the delay call as well.
15270 lck_mtx_unlock(&dtrace_lock
);
15272 dtrace_enabling_matchall();
15278 * Return 0 on success
15279 * Return -1 on failure
15282 dtrace_module_unloaded(struct kmod_info
*kmod
)
15284 dtrace_probe_t
template, *probe
, *first
, *next
;
15285 dtrace_provider_t
*prov
;
15286 struct modctl
*ctl
= NULL
;
15287 struct modctl
*syncctl
= NULL
;
15288 struct modctl
*nextsyncctl
= NULL
;
15291 lck_mtx_lock(&dtrace_provider_lock
);
15292 lck_mtx_lock(&mod_lock
);
15293 lck_mtx_lock(&dtrace_lock
);
15295 if (kmod
== NULL
) {
15299 ctl
= dtrace_modctl_lookup(kmod
);
15302 lck_mtx_unlock(&dtrace_lock
);
15303 lck_mtx_unlock(&mod_lock
);
15304 lck_mtx_unlock(&dtrace_provider_lock
);
15307 ctl
->mod_loaded
= 0;
15308 ctl
->mod_address
= 0;
15312 if (dtrace_bymod
== NULL
) {
15314 * The DTrace module is loaded (obviously) but not attached;
15315 * we don't have any work to do.
15318 (void)dtrace_modctl_remove(ctl
);
15319 lck_mtx_unlock(&dtrace_lock
);
15320 lck_mtx_unlock(&mod_lock
);
15321 lck_mtx_unlock(&dtrace_provider_lock
);
15325 /* Syncmode set means we target and traverse entire modctl list. */
15327 nextsyncctl
= dtrace_modctl_list
;
15332 /* find a stale modctl struct */
15333 for (syncctl
= nextsyncctl
; syncctl
!= NULL
; syncctl
=syncctl
->mod_next
) {
15334 if (syncctl
->mod_address
== 0)
15339 /* We have no more work to do */
15340 lck_mtx_unlock(&dtrace_lock
);
15341 lck_mtx_unlock(&mod_lock
);
15342 lck_mtx_unlock(&dtrace_provider_lock
);
15346 /* keep track of next syncctl in case this one is removed */
15347 nextsyncctl
= syncctl
->mod_next
;
15352 template.dtpr_mod
= ctl
->mod_modname
;
15354 for (probe
= first
= dtrace_hash_lookup(dtrace_bymod
, &template);
15355 probe
!= NULL
; probe
= probe
->dtpr_nextmod
) {
15356 if (probe
->dtpr_ecb
!= NULL
) {
15358 * This shouldn't _actually_ be possible -- we're
15359 * unloading a module that has an enabled probe in it.
15360 * (It's normally up to the provider to make sure that
15361 * this can't happen.) However, because dtps_enable()
15362 * doesn't have a failure mode, there can be an
15363 * enable/unload race. Upshot: we don't want to
15364 * assert, but we're not going to disable the
15370 /* We're syncing, let's look at next in list */
15374 lck_mtx_unlock(&dtrace_lock
);
15375 lck_mtx_unlock(&mod_lock
);
15376 lck_mtx_unlock(&dtrace_provider_lock
);
15378 if (dtrace_err_verbose
) {
15379 cmn_err(CE_WARN
, "unloaded module '%s' had "
15380 "enabled probes", ctl
->mod_modname
);
15388 for (first
= NULL
; probe
!= NULL
; probe
= next
) {
15389 ASSERT(dtrace_probes
[probe
->dtpr_id
- 1] == probe
);
15391 dtrace_probes
[probe
->dtpr_id
- 1] = NULL
;
15392 probe
->dtpr_provider
->dtpv_probe_count
--;
15394 next
= probe
->dtpr_nextmod
;
15395 dtrace_hash_remove(dtrace_bymod
, probe
);
15396 dtrace_hash_remove(dtrace_byfunc
, probe
);
15397 dtrace_hash_remove(dtrace_byname
, probe
);
15399 if (first
== NULL
) {
15401 probe
->dtpr_nextmod
= NULL
;
15403 probe
->dtpr_nextmod
= first
;
15409 * We've removed all of the module's probes from the hash chains and
15410 * from the probe array. Now issue a dtrace_sync() to be sure that
15411 * everyone has cleared out from any probe array processing.
15415 for (probe
= first
; probe
!= NULL
; probe
= first
) {
15416 first
= probe
->dtpr_nextmod
;
15417 prov
= probe
->dtpr_provider
;
15418 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
,
15420 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1);
15421 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1);
15422 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1);
15423 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1);
15425 zfree(dtrace_probe_t_zone
, probe
);
15428 dtrace_modctl_remove(ctl
);
15433 lck_mtx_unlock(&dtrace_lock
);
15434 lck_mtx_unlock(&mod_lock
);
15435 lck_mtx_unlock(&dtrace_provider_lock
);
15441 dtrace_suspend(void)
15443 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
));
15447 dtrace_resume(void)
15449 dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
));
15453 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
)
15455 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
15456 lck_mtx_lock(&dtrace_lock
);
15460 dtrace_state_t
*state
;
15461 dtrace_optval_t
*opt
, rs
, c
;
15464 * For now, we only allocate a new buffer for anonymous state.
15466 if ((state
= dtrace_anon
.dta_state
) == NULL
)
15469 if (state
->dts_activity
!= DTRACE_ACTIVITY_ACTIVE
)
15472 opt
= state
->dts_options
;
15473 c
= opt
[DTRACEOPT_CPU
];
15475 if (c
!= DTRACE_CPUALL
&& c
!= DTRACEOPT_UNSET
&& c
!= cpu
)
15479 * Regardless of what the actual policy is, we're going to
15480 * temporarily set our resize policy to be manual. We're
15481 * also going to temporarily set our CPU option to denote
15482 * the newly configured CPU.
15484 rs
= opt
[DTRACEOPT_BUFRESIZE
];
15485 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
;
15486 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
;
15488 (void) dtrace_state_buffers(state
);
15490 opt
[DTRACEOPT_BUFRESIZE
] = rs
;
15491 opt
[DTRACEOPT_CPU
] = c
;
15498 * We don't free the buffer in the CPU_UNCONFIG case. (The
15499 * buffer will be freed when the consumer exits.)
15507 lck_mtx_unlock(&dtrace_lock
);
15512 dtrace_cpu_setup_initial(processorid_t cpu
)
15514 (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
);
15518 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
)
15520 if (dtrace_toxranges
>= dtrace_toxranges_max
) {
15522 dtrace_toxrange_t
*range
;
15524 osize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
15527 ASSERT(dtrace_toxrange
== NULL
);
15528 ASSERT(dtrace_toxranges_max
== 0);
15529 dtrace_toxranges_max
= 1;
15531 dtrace_toxranges_max
<<= 1;
15534 nsize
= dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
);
15535 range
= kmem_zalloc(nsize
, KM_SLEEP
);
15537 if (dtrace_toxrange
!= NULL
) {
15538 ASSERT(osize
!= 0);
15539 bcopy(dtrace_toxrange
, range
, osize
);
15540 kmem_free(dtrace_toxrange
, osize
);
15543 dtrace_toxrange
= range
;
15546 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base
== 0);
15547 ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit
== 0);
15549 dtrace_toxrange
[dtrace_toxranges
].dtt_base
= base
;
15550 dtrace_toxrange
[dtrace_toxranges
].dtt_limit
= limit
;
15551 dtrace_toxranges
++;
15555 * DTrace Driver Cookbook Functions
15559 dtrace_attach(dev_info_t
*devi
, ddi_attach_cmd_t cmd
)
15561 #pragma unused(cmd) /* __APPLE__ */
15562 dtrace_provider_id_t id
;
15563 dtrace_state_t
*state
= NULL
;
15564 dtrace_enabling_t
*enab
;
15566 lck_mtx_lock(&cpu_lock
);
15567 lck_mtx_lock(&dtrace_provider_lock
);
15568 lck_mtx_lock(&dtrace_lock
);
15570 if (ddi_soft_state_init(&dtrace_softstate
,
15571 sizeof (dtrace_state_t
), 0) != 0) {
15572 cmn_err(CE_NOTE
, "/dev/dtrace failed to initialize soft state");
15573 lck_mtx_unlock(&dtrace_lock
);
15574 lck_mtx_unlock(&dtrace_provider_lock
);
15575 lck_mtx_unlock(&cpu_lock
);
15576 return (DDI_FAILURE
);
15579 /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */
15581 ddi_report_dev(devi
);
15582 dtrace_devi
= devi
;
15584 dtrace_modload
= dtrace_module_loaded
;
15585 dtrace_modunload
= dtrace_module_unloaded
;
15586 dtrace_cpu_init
= dtrace_cpu_setup_initial
;
15587 dtrace_helpers_cleanup
= dtrace_helpers_destroy
;
15588 dtrace_helpers_fork
= dtrace_helpers_duplicate
;
15589 dtrace_cpustart_init
= dtrace_suspend
;
15590 dtrace_cpustart_fini
= dtrace_resume
;
15591 dtrace_debugger_init
= dtrace_suspend
;
15592 dtrace_debugger_fini
= dtrace_resume
;
15594 register_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
15596 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
15598 dtrace_arena
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1,
15599 NULL
, NULL
, NULL
, 0, VM_SLEEP
| VMC_IDENTIFIER
);
15600 dtrace_minor
= vmem_create("dtrace_minor", (void *)DTRACEMNRN_CLONE
,
15601 UINT32_MAX
- DTRACEMNRN_CLONE
, 1, NULL
, NULL
, NULL
, 0,
15602 VM_SLEEP
| VMC_IDENTIFIER
);
15603 dtrace_taskq
= taskq_create("dtrace_taskq", 1, maxclsyspri
,
15606 dtrace_state_cache
= kmem_cache_create("dtrace_state_cache",
15607 sizeof (dtrace_dstate_percpu_t
) * (int)NCPU
, DTRACE_STATE_ALIGN
,
15608 NULL
, NULL
, NULL
, NULL
, NULL
, 0);
15610 lck_mtx_assert(&cpu_lock
, LCK_MTX_ASSERT_OWNED
);
15611 dtrace_bymod
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
),
15612 offsetof(dtrace_probe_t
, dtpr_nextmod
),
15613 offsetof(dtrace_probe_t
, dtpr_prevmod
));
15615 dtrace_byfunc
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
),
15616 offsetof(dtrace_probe_t
, dtpr_nextfunc
),
15617 offsetof(dtrace_probe_t
, dtpr_prevfunc
));
15619 dtrace_byname
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
),
15620 offsetof(dtrace_probe_t
, dtpr_nextname
),
15621 offsetof(dtrace_probe_t
, dtpr_prevname
));
15623 if (dtrace_retain_max
< 1) {
15624 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; "
15625 "setting to 1", dtrace_retain_max
);
15626 dtrace_retain_max
= 1;
15630 * Now discover our toxic ranges.
15632 dtrace_toxic_ranges(dtrace_toxrange_add
);
15635 * Before we register ourselves as a provider to our own framework,
15636 * we would like to assert that dtrace_provider is NULL -- but that's
15637 * not true if we were loaded as a dependency of a DTrace provider.
15638 * Once we've registered, we can assert that dtrace_provider is our
15641 (void) dtrace_register("dtrace", &dtrace_provider_attr
,
15642 DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
);
15644 ASSERT(dtrace_provider
!= NULL
);
15645 ASSERT((dtrace_provider_id_t
)dtrace_provider
== id
);
15647 #if defined (__x86_64__)
15648 dtrace_probeid_begin
= dtrace_probe_create((dtrace_provider_id_t
)
15649 dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
);
15650 dtrace_probeid_end
= dtrace_probe_create((dtrace_provider_id_t
)
15651 dtrace_provider
, NULL
, NULL
, "END", 0, NULL
);
15652 dtrace_probeid_error
= dtrace_probe_create((dtrace_provider_id_t
)
15653 dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
);
15655 #error Unknown Architecture
15658 dtrace_anon_property();
15659 lck_mtx_unlock(&cpu_lock
);
15662 * If DTrace helper tracing is enabled, we need to allocate the
15663 * trace buffer and initialize the values.
15665 if (dtrace_helptrace_enabled
) {
15666 ASSERT(dtrace_helptrace_buffer
== NULL
);
15667 dtrace_helptrace_buffer
=
15668 kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
);
15669 dtrace_helptrace_next
= 0;
15673 * If there are already providers, we must ask them to provide their
15674 * probes, and then match any anonymous enabling against them. Note
15675 * that there should be no other retained enablings at this time:
15676 * the only retained enablings at this time should be the anonymous
15679 if (dtrace_anon
.dta_enabling
!= NULL
) {
15680 ASSERT(dtrace_retained
== dtrace_anon
.dta_enabling
);
15683 * APPLE NOTE: if handling anonymous dof, switch symbol modes.
15685 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) {
15686 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
;
15689 dtrace_enabling_provide(NULL
);
15690 state
= dtrace_anon
.dta_state
;
15693 * We couldn't hold cpu_lock across the above call to
15694 * dtrace_enabling_provide(), but we must hold it to actually
15695 * enable the probes. We have to drop all of our locks, pick
15696 * up cpu_lock, and regain our locks before matching the
15697 * retained anonymous enabling.
15699 lck_mtx_unlock(&dtrace_lock
);
15700 lck_mtx_unlock(&dtrace_provider_lock
);
15702 lck_mtx_lock(&cpu_lock
);
15703 lck_mtx_lock(&dtrace_provider_lock
);
15704 lck_mtx_lock(&dtrace_lock
);
15706 if ((enab
= dtrace_anon
.dta_enabling
) != NULL
)
15707 (void) dtrace_enabling_match(enab
, NULL
);
15709 lck_mtx_unlock(&cpu_lock
);
15712 lck_mtx_unlock(&dtrace_lock
);
15713 lck_mtx_unlock(&dtrace_provider_lock
);
15715 if (state
!= NULL
) {
15717 * If we created any anonymous state, set it going now.
15719 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
);
15722 return (DDI_SUCCESS
);
15727 dtrace_open(dev_t
*devp
, int flag
, int otyp
, cred_t
*cred_p
)
15729 #pragma unused(flag, otyp)
15730 dtrace_state_t
*state
;
15736 /* APPLE: Darwin puts Helper on its own major device. */
15739 * If no DTRACE_PRIV_* bits are set in the credential, then the
15740 * caller lacks sufficient permission to do anything with DTrace.
15742 dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
);
15743 if (priv
== DTRACE_PRIV_NONE
)
15747 * APPLE NOTE: We delay the initialization of fasttrap as late as possible.
15748 * It certainly can't be later than now!
15753 * Ask all providers to provide all their probes.
15755 lck_mtx_lock(&dtrace_provider_lock
);
15756 dtrace_probe_provide(NULL
, NULL
);
15757 lck_mtx_unlock(&dtrace_provider_lock
);
15759 lck_mtx_lock(&cpu_lock
);
15760 lck_mtx_lock(&dtrace_lock
);
15762 dtrace_membar_producer();
15765 * If the kernel debugger is active (that is, if the kernel debugger
15766 * modified text in some way), we won't allow the open.
15768 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) {
15770 lck_mtx_unlock(&dtrace_lock
);
15771 lck_mtx_unlock(&cpu_lock
);
15775 rv
= dtrace_state_create(devp
, cred_p
, &state
);
15776 lck_mtx_unlock(&cpu_lock
);
15778 if (rv
!= 0 || state
== NULL
) {
15779 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
15780 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15781 lck_mtx_unlock(&dtrace_lock
);
15782 /* propagate EAGAIN or ERESTART */
15786 lck_mtx_unlock(&dtrace_lock
);
15788 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15791 * If we are currently lazy, transition states.
15793 * Unlike dtrace_close, we do not need to check the
15794 * value of dtrace_opens, as any positive value (and
15795 * we count as 1) means we transition states.
15797 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_ON
) {
15798 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_OFF
;
15801 * Iterate all existing processes and load lazy dofs.
15803 proc_iterate(PROC_ALLPROCLIST
| PROC_NOWAITTRANS
,
15804 dtrace_lazy_dofs_proc_iterate_doit
,
15806 dtrace_lazy_dofs_proc_iterate_filter
,
15810 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15813 * Update kernel symbol state.
15815 * We must own the provider and dtrace locks.
15817 * NOTE! It may appear there is a race by setting this value so late
15818 * after dtrace_probe_provide. However, any kext loaded after the
15819 * call to probe provide and before we set LAZY_OFF will be marked as
15820 * eligible for symbols from userspace. The same dtrace that is currently
15821 * calling dtrace_open() (this call!) will get a list of kexts needing
15822 * symbols and fill them in, thus closing the race window.
15824 * We want to set this value only after it certain it will succeed, as
15825 * this significantly reduces the complexity of error exits.
15827 lck_mtx_lock(&dtrace_lock
);
15828 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) {
15829 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
;
15831 lck_mtx_unlock(&dtrace_lock
);
15838 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t
*cred_p
)
15840 #pragma unused(flag, otyp, cred_p) /* __APPLE__ */
15841 minor_t minor
= getminor(dev
);
15842 dtrace_state_t
*state
;
15844 /* APPLE NOTE: Darwin puts Helper on its own major device. */
15846 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
15848 lck_mtx_lock(&cpu_lock
);
15849 lck_mtx_lock(&dtrace_lock
);
15851 if (state
->dts_anon
) {
15853 * There is anonymous state. Destroy that first.
15855 ASSERT(dtrace_anon
.dta_state
== NULL
);
15856 dtrace_state_destroy(state
->dts_anon
);
15859 dtrace_state_destroy(state
);
15860 ASSERT(dtrace_opens
> 0);
15863 * Only relinquish control of the kernel debugger interface when there
15864 * are no consumers and no anonymous enablings.
15866 if (--dtrace_opens
== 0 && dtrace_anon
.dta_enabling
== NULL
)
15867 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
15869 lck_mtx_unlock(&dtrace_lock
);
15870 lck_mtx_unlock(&cpu_lock
);
15873 * Lock ordering requires the dof mode lock be taken before
15876 lck_rw_lock_exclusive(&dtrace_dof_mode_lock
);
15877 lck_mtx_lock(&dtrace_lock
);
15879 if (dtrace_opens
== 0) {
15881 * If we are currently lazy-off, and this is the last close, transition to
15884 if (dtrace_dof_mode
== DTRACE_DOF_MODE_LAZY_OFF
) {
15885 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
15889 * If we are the last dtrace client, switch back to lazy (from userspace) symbols
15891 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
) {
15892 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
;
15896 lck_mtx_unlock(&dtrace_lock
);
15897 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
);
15900 * Kext probes may be retained past the end of the kext's lifespan. The
15901 * probes are kept until the last reference to them has been removed.
15902 * Since closing an active dtrace context is likely to drop that last reference,
15903 * lets take a shot at cleaning out the orphaned probes now.
15905 dtrace_module_unloaded(NULL
);
15912 dtrace_ioctl_helper(u_long cmd
, caddr_t arg
, int *rv
)
15916 * Safe to check this outside the dof mode lock
15918 if (dtrace_dof_mode
== DTRACE_DOF_MODE_NEVER
)
15919 return KERN_SUCCESS
;
15922 case DTRACEHIOC_ADDDOF
:
15924 dof_helper_t
*dhp
= NULL
;
15925 size_t dof_ioctl_data_size
;
15926 dof_ioctl_data_t
* multi_dof
;
15929 user_addr_t user_address
= *(user_addr_t
*)arg
;
15930 uint64_t dof_count
;
15931 int multi_dof_claimed
= 0;
15932 proc_t
* p
= current_proc();
15935 * Read the number of DOF sections being passed in.
15937 if (copyin(user_address
+ offsetof(dof_ioctl_data_t
, dofiod_count
),
15939 sizeof(dof_count
))) {
15940 dtrace_dof_error(NULL
, "failed to copyin dofiod_count");
15945 * Range check the count.
15947 if (dof_count
== 0 || dof_count
> 1024) {
15948 dtrace_dof_error(NULL
, "dofiod_count is not valid");
15953 * Allocate a correctly sized structure and copyin the data.
15955 dof_ioctl_data_size
= DOF_IOCTL_DATA_T_SIZE(dof_count
);
15956 if ((multi_dof
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)
15959 /* NOTE! We can no longer exit this method via return */
15960 if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) {
15961 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t");
15967 * Check that the count didn't change between the first copyin and the second.
15969 if (multi_dof
->dofiod_count
!= dof_count
) {
15975 * Try to process lazily first.
15977 rval
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
);
15980 * If rval is EACCES, we must be non-lazy.
15982 if (rval
== EACCES
) {
15985 * Process each dof_helper_t
15989 dhp
= &multi_dof
->dofiod_helpers
[i
];
15991 dof_hdr_t
*dof
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
);
15994 lck_mtx_lock(&dtrace_lock
);
15997 * dtrace_helper_slurp() takes responsibility for the dof --
15998 * it may free it now or it may save it and free it later.
16000 if ((dhp
->dofhp_dof
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) {
16004 lck_mtx_unlock(&dtrace_lock
);
16006 } while (++i
< multi_dof
->dofiod_count
&& rval
== 0);
16010 * We need to copyout the multi_dof struct, because it contains
16011 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
16013 * This could certainly be better optimized.
16015 if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) {
16016 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t");
16017 /* Don't overwrite pre-existing error code */
16018 if (rval
== 0) rval
= EFAULT
;
16023 * If we had to allocate struct memory, free it.
16025 if (multi_dof
!= NULL
&& !multi_dof_claimed
) {
16026 kmem_free(multi_dof
, dof_ioctl_data_size
);
16032 case DTRACEHIOC_REMOVE
: {
16033 int generation
= *(int*)arg
;
16034 proc_t
* p
= current_proc();
16039 int rval
= dtrace_lazy_dofs_remove(p
, generation
);
16042 * EACCES means non-lazy
16044 if (rval
== EACCES
) {
16045 lck_mtx_lock(&dtrace_lock
);
16046 rval
= dtrace_helper_destroygen(p
, generation
);
16047 lck_mtx_unlock(&dtrace_lock
);
16062 dtrace_ioctl(dev_t dev
, u_long cmd
, user_addr_t arg
, int md
, cred_t
*cr
, int *rv
)
16065 minor_t minor
= getminor(dev
);
16066 dtrace_state_t
*state
;
16069 /* Darwin puts Helper on its own major device. */
16071 state
= ddi_get_soft_state(dtrace_softstate
, minor
);
16073 if (state
->dts_anon
) {
16074 ASSERT(dtrace_anon
.dta_state
== NULL
);
16075 state
= state
->dts_anon
;
16079 case DTRACEIOC_PROVIDER
: {
16080 dtrace_providerdesc_t pvd
;
16081 dtrace_provider_t
*pvp
;
16083 if (copyin(arg
, &pvd
, sizeof (pvd
)) != 0)
16086 pvd
.dtvd_name
[DTRACE_PROVNAMELEN
- 1] = '\0';
16087 lck_mtx_lock(&dtrace_provider_lock
);
16089 for (pvp
= dtrace_provider
; pvp
!= NULL
; pvp
= pvp
->dtpv_next
) {
16090 if (strncmp(pvp
->dtpv_name
, pvd
.dtvd_name
, DTRACE_PROVNAMELEN
) == 0)
16094 lck_mtx_unlock(&dtrace_provider_lock
);
16099 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
));
16100 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
));
16101 if (copyout(&pvd
, arg
, sizeof (pvd
)) != 0)
16107 case DTRACEIOC_EPROBE
: {
16108 dtrace_eprobedesc_t epdesc
;
16110 dtrace_action_t
*act
;
16116 if (copyin(arg
, &epdesc
, sizeof (epdesc
)) != 0)
16119 lck_mtx_lock(&dtrace_lock
);
16121 if ((ecb
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) {
16122 lck_mtx_unlock(&dtrace_lock
);
16126 if (ecb
->dte_probe
== NULL
) {
16127 lck_mtx_unlock(&dtrace_lock
);
16131 epdesc
.dtepd_probeid
= ecb
->dte_probe
->dtpr_id
;
16132 epdesc
.dtepd_uarg
= ecb
->dte_uarg
;
16133 epdesc
.dtepd_size
= ecb
->dte_size
;
16135 nrecs
= epdesc
.dtepd_nrecs
;
16136 epdesc
.dtepd_nrecs
= 0;
16137 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
16138 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
16141 epdesc
.dtepd_nrecs
++;
16145 * Now that we have the size, we need to allocate a temporary
16146 * buffer in which to store the complete description. We need
16147 * the temporary buffer to be able to drop dtrace_lock()
16148 * across the copyout(), below.
16150 size
= sizeof (dtrace_eprobedesc_t
) +
16151 (epdesc
.dtepd_nrecs
* sizeof (dtrace_recdesc_t
));
16153 buf
= kmem_alloc(size
, KM_SLEEP
);
16154 dest
= (uintptr_t)buf
;
16156 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
));
16157 dest
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]);
16159 for (act
= ecb
->dte_action
; act
!= NULL
; act
= act
->dta_next
) {
16160 if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
)
16166 bcopy(&act
->dta_rec
, (void *)dest
,
16167 sizeof (dtrace_recdesc_t
));
16168 dest
+= sizeof (dtrace_recdesc_t
);
16171 lck_mtx_unlock(&dtrace_lock
);
16173 if (copyout(buf
, arg
, dest
- (uintptr_t)buf
) != 0) {
16174 kmem_free(buf
, size
);
16178 kmem_free(buf
, size
);
16182 case DTRACEIOC_AGGDESC
: {
16183 dtrace_aggdesc_t aggdesc
;
16184 dtrace_action_t
*act
;
16185 dtrace_aggregation_t
*agg
;
16188 dtrace_recdesc_t
*lrec
;
16193 if (copyin(arg
, &aggdesc
, sizeof (aggdesc
)) != 0)
16196 lck_mtx_lock(&dtrace_lock
);
16198 if ((agg
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) {
16199 lck_mtx_unlock(&dtrace_lock
);
16203 aggdesc
.dtagd_epid
= agg
->dtag_ecb
->dte_epid
;
16205 nrecs
= aggdesc
.dtagd_nrecs
;
16206 aggdesc
.dtagd_nrecs
= 0;
16208 offs
= agg
->dtag_base
;
16209 lrec
= &agg
->dtag_action
.dta_rec
;
16210 aggdesc
.dtagd_size
= lrec
->dtrd_offset
+ lrec
->dtrd_size
- offs
;
16212 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
16213 ASSERT(act
->dta_intuple
||
16214 DTRACEACT_ISAGG(act
->dta_kind
));
16217 * If this action has a record size of zero, it
16218 * denotes an argument to the aggregating action.
16219 * Because the presence of this record doesn't (or
16220 * shouldn't) affect the way the data is interpreted,
16221 * we don't copy it out to save user-level the
16222 * confusion of dealing with a zero-length record.
16224 if (act
->dta_rec
.dtrd_size
== 0) {
16225 ASSERT(agg
->dtag_hasarg
);
16229 aggdesc
.dtagd_nrecs
++;
16231 if (act
== &agg
->dtag_action
)
16236 * Now that we have the size, we need to allocate a temporary
16237 * buffer in which to store the complete description. We need
16238 * the temporary buffer to be able to drop dtrace_lock()
16239 * across the copyout(), below.
16241 size
= sizeof (dtrace_aggdesc_t
) +
16242 (aggdesc
.dtagd_nrecs
* sizeof (dtrace_recdesc_t
));
16244 buf
= kmem_alloc(size
, KM_SLEEP
);
16245 dest
= (uintptr_t)buf
;
16247 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
));
16248 dest
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]);
16250 for (act
= agg
->dtag_first
; ; act
= act
->dta_next
) {
16251 dtrace_recdesc_t rec
= act
->dta_rec
;
16254 * See the comment in the above loop for why we pass
16255 * over zero-length records.
16257 if (rec
.dtrd_size
== 0) {
16258 ASSERT(agg
->dtag_hasarg
);
16265 rec
.dtrd_offset
-= offs
;
16266 bcopy(&rec
, (void *)dest
, sizeof (rec
));
16267 dest
+= sizeof (dtrace_recdesc_t
);
16269 if (act
== &agg
->dtag_action
)
16273 lck_mtx_unlock(&dtrace_lock
);
16275 if (copyout(buf
, arg
, dest
- (uintptr_t)buf
) != 0) {
16276 kmem_free(buf
, size
);
16280 kmem_free(buf
, size
);
16284 case DTRACEIOC_ENABLE
: {
16286 dtrace_enabling_t
*enab
= NULL
;
16287 dtrace_vstate_t
*vstate
;
16293 * If a NULL argument has been passed, we take this as our
16294 * cue to reevaluate our enablings.
16297 dtrace_enabling_matchall();
16302 if ((dof
= dtrace_dof_copyin(arg
, &rval
)) == NULL
)
16305 lck_mtx_lock(&cpu_lock
);
16306 lck_mtx_lock(&dtrace_lock
);
16307 vstate
= &state
->dts_vstate
;
16309 if (state
->dts_activity
!= DTRACE_ACTIVITY_INACTIVE
) {
16310 lck_mtx_unlock(&dtrace_lock
);
16311 lck_mtx_unlock(&cpu_lock
);
16312 dtrace_dof_destroy(dof
);
16316 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) {
16317 lck_mtx_unlock(&dtrace_lock
);
16318 lck_mtx_unlock(&cpu_lock
);
16319 dtrace_dof_destroy(dof
);
16323 if ((rval
= dtrace_dof_options(dof
, state
)) != 0) {
16324 dtrace_enabling_destroy(enab
);
16325 lck_mtx_unlock(&dtrace_lock
);
16326 lck_mtx_unlock(&cpu_lock
);
16327 dtrace_dof_destroy(dof
);
16331 if ((err
= dtrace_enabling_match(enab
, rv
)) == 0) {
16332 err
= dtrace_enabling_retain(enab
);
16334 dtrace_enabling_destroy(enab
);
16337 lck_mtx_unlock(&dtrace_lock
);
16338 lck_mtx_unlock(&cpu_lock
);
16339 dtrace_dof_destroy(dof
);
16344 case DTRACEIOC_REPLICATE
: {
16345 dtrace_repldesc_t desc
;
16346 dtrace_probedesc_t
*match
= &desc
.dtrpd_match
;
16347 dtrace_probedesc_t
*create
= &desc
.dtrpd_create
;
16350 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
16353 match
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
16354 match
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
16355 match
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
16356 match
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
16358 create
->dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
16359 create
->dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
16360 create
->dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
16361 create
->dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
16363 lck_mtx_lock(&dtrace_lock
);
16364 err
= dtrace_enabling_replicate(state
, match
, create
);
16365 lck_mtx_unlock(&dtrace_lock
);
16370 case DTRACEIOC_PROBEMATCH
:
16371 case DTRACEIOC_PROBES
: {
16372 dtrace_probe_t
*probe
= NULL
;
16373 dtrace_probedesc_t desc
;
16374 dtrace_probekey_t pkey
;
16381 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
16384 desc
.dtpd_provider
[DTRACE_PROVNAMELEN
- 1] = '\0';
16385 desc
.dtpd_mod
[DTRACE_MODNAMELEN
- 1] = '\0';
16386 desc
.dtpd_func
[DTRACE_FUNCNAMELEN
- 1] = '\0';
16387 desc
.dtpd_name
[DTRACE_NAMELEN
- 1] = '\0';
16390 * Before we attempt to match this probe, we want to give
16391 * all providers the opportunity to provide it.
16393 if (desc
.dtpd_id
== DTRACE_IDNONE
) {
16394 lck_mtx_lock(&dtrace_provider_lock
);
16395 dtrace_probe_provide(&desc
, NULL
);
16396 lck_mtx_unlock(&dtrace_provider_lock
);
16400 if (cmd
== DTRACEIOC_PROBEMATCH
) {
16401 dtrace_probekey(&desc
, &pkey
);
16402 pkey
.dtpk_id
= DTRACE_IDNONE
;
16405 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
);
16407 lck_mtx_lock(&dtrace_lock
);
16409 if (cmd
== DTRACEIOC_PROBEMATCH
) {
16410 /* Quiet compiler warning */
16411 for (i
= desc
.dtpd_id
; i
<= (dtrace_id_t
)dtrace_nprobes
; i
++) {
16412 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
16413 (m
= dtrace_match_probe(probe
, &pkey
,
16414 priv
, uid
, zoneid
)) != 0)
16419 lck_mtx_unlock(&dtrace_lock
);
16424 /* Quiet compiler warning */
16425 for (i
= desc
.dtpd_id
; i
<= (dtrace_id_t
)dtrace_nprobes
; i
++) {
16426 if ((probe
= dtrace_probes
[i
- 1]) != NULL
&&
16427 dtrace_match_priv(probe
, priv
, uid
, zoneid
))
16432 if (probe
== NULL
) {
16433 lck_mtx_unlock(&dtrace_lock
);
16437 dtrace_probe_description(probe
, &desc
);
16438 lck_mtx_unlock(&dtrace_lock
);
16440 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
16446 case DTRACEIOC_PROBEARG
: {
16447 dtrace_argdesc_t desc
;
16448 dtrace_probe_t
*probe
;
16449 dtrace_provider_t
*prov
;
16451 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
16454 if (desc
.dtargd_id
== DTRACE_IDNONE
)
16457 if (desc
.dtargd_ndx
== DTRACE_ARGNONE
)
16460 lck_mtx_lock(&dtrace_provider_lock
);
16461 lck_mtx_lock(&mod_lock
);
16462 lck_mtx_lock(&dtrace_lock
);
16464 /* Quiet compiler warning */
16465 if (desc
.dtargd_id
> (dtrace_id_t
)dtrace_nprobes
) {
16466 lck_mtx_unlock(&dtrace_lock
);
16467 lck_mtx_unlock(&mod_lock
);
16468 lck_mtx_unlock(&dtrace_provider_lock
);
16472 if ((probe
= dtrace_probes
[desc
.dtargd_id
- 1]) == NULL
) {
16473 lck_mtx_unlock(&dtrace_lock
);
16474 lck_mtx_unlock(&mod_lock
);
16475 lck_mtx_unlock(&dtrace_provider_lock
);
16479 lck_mtx_unlock(&dtrace_lock
);
16481 prov
= probe
->dtpr_provider
;
16483 if (prov
->dtpv_pops
.dtps_getargdesc
== NULL
) {
16485 * There isn't any typed information for this probe.
16486 * Set the argument number to DTRACE_ARGNONE.
16488 desc
.dtargd_ndx
= DTRACE_ARGNONE
;
16490 desc
.dtargd_native
[0] = '\0';
16491 desc
.dtargd_xlate
[0] = '\0';
16492 desc
.dtargd_mapping
= desc
.dtargd_ndx
;
16494 prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
,
16495 probe
->dtpr_id
, probe
->dtpr_arg
, &desc
);
16498 lck_mtx_unlock(&mod_lock
);
16499 lck_mtx_unlock(&dtrace_provider_lock
);
16501 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
16507 case DTRACEIOC_GO
: {
16508 processorid_t cpuid
;
16509 rval
= dtrace_state_go(state
, &cpuid
);
16514 if (copyout(&cpuid
, arg
, sizeof (cpuid
)) != 0)
16520 case DTRACEIOC_STOP
: {
16521 processorid_t cpuid
;
16523 lck_mtx_lock(&dtrace_lock
);
16524 rval
= dtrace_state_stop(state
, &cpuid
);
16525 lck_mtx_unlock(&dtrace_lock
);
16530 if (copyout(&cpuid
, arg
, sizeof (cpuid
)) != 0)
16536 case DTRACEIOC_DOFGET
: {
16537 dof_hdr_t hdr
, *dof
;
16540 if (copyin(arg
, &hdr
, sizeof (hdr
)) != 0)
16543 lck_mtx_lock(&dtrace_lock
);
16544 dof
= dtrace_dof_create(state
);
16545 lck_mtx_unlock(&dtrace_lock
);
16547 len
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
);
16548 rval
= copyout(dof
, arg
, len
);
16549 dtrace_dof_destroy(dof
);
16551 return (rval
== 0 ? 0 : EFAULT
);
16554 case DTRACEIOC_AGGSNAP
:
16555 case DTRACEIOC_BUFSNAP
: {
16556 dtrace_bufdesc_t desc
;
16558 dtrace_buffer_t
*buf
;
16560 if (copyin(arg
, &desc
, sizeof (desc
)) != 0)
16563 if ((int)desc
.dtbd_cpu
< 0 || desc
.dtbd_cpu
>= NCPU
)
16566 lck_mtx_lock(&dtrace_lock
);
16568 if (cmd
== DTRACEIOC_BUFSNAP
) {
16569 buf
= &state
->dts_buffer
[desc
.dtbd_cpu
];
16571 buf
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
];
16574 if (buf
->dtb_flags
& (DTRACEBUF_RING
| DTRACEBUF_FILL
)) {
16575 size_t sz
= buf
->dtb_offset
;
16577 if (state
->dts_activity
!= DTRACE_ACTIVITY_STOPPED
) {
16578 lck_mtx_unlock(&dtrace_lock
);
16583 * If this buffer has already been consumed, we're
16584 * going to indicate that there's nothing left here
16587 if (buf
->dtb_flags
& DTRACEBUF_CONSUMED
) {
16588 lck_mtx_unlock(&dtrace_lock
);
16590 desc
.dtbd_size
= 0;
16591 desc
.dtbd_drops
= 0;
16592 desc
.dtbd_errors
= 0;
16593 desc
.dtbd_oldest
= 0;
16594 sz
= sizeof (desc
);
16596 if (copyout(&desc
, arg
, sz
) != 0)
16603 * If this is a ring buffer that has wrapped, we want
16604 * to copy the whole thing out.
16606 if (buf
->dtb_flags
& DTRACEBUF_WRAPPED
) {
16607 dtrace_buffer_polish(buf
);
16608 sz
= buf
->dtb_size
;
16611 if (copyout(buf
->dtb_tomax
, (user_addr_t
)desc
.dtbd_data
, sz
) != 0) {
16612 lck_mtx_unlock(&dtrace_lock
);
16616 desc
.dtbd_size
= sz
;
16617 desc
.dtbd_drops
= buf
->dtb_drops
;
16618 desc
.dtbd_errors
= buf
->dtb_errors
;
16619 desc
.dtbd_oldest
= buf
->dtb_xamot_offset
;
16621 lck_mtx_unlock(&dtrace_lock
);
16623 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
16626 buf
->dtb_flags
|= DTRACEBUF_CONSUMED
;
16631 if (buf
->dtb_tomax
== NULL
) {
16632 ASSERT(buf
->dtb_xamot
== NULL
);
16633 lck_mtx_unlock(&dtrace_lock
);
16637 cached
= buf
->dtb_tomax
;
16638 ASSERT(!(buf
->dtb_flags
& DTRACEBUF_NOSWITCH
));
16640 dtrace_xcall(desc
.dtbd_cpu
,
16641 (dtrace_xcall_t
)dtrace_buffer_switch
, buf
);
16643 state
->dts_errors
+= buf
->dtb_xamot_errors
;
16646 * If the buffers did not actually switch, then the cross call
16647 * did not take place -- presumably because the given CPU is
16648 * not in the ready set. If this is the case, we'll return
16651 if (buf
->dtb_tomax
== cached
) {
16652 ASSERT(buf
->dtb_xamot
!= cached
);
16653 lck_mtx_unlock(&dtrace_lock
);
16657 ASSERT(cached
== buf
->dtb_xamot
);
16660 * We have our snapshot; now copy it out.
16662 if (copyout(buf
->dtb_xamot
, (user_addr_t
)desc
.dtbd_data
,
16663 buf
->dtb_xamot_offset
) != 0) {
16664 lck_mtx_unlock(&dtrace_lock
);
16668 desc
.dtbd_size
= buf
->dtb_xamot_offset
;
16669 desc
.dtbd_drops
= buf
->dtb_xamot_drops
;
16670 desc
.dtbd_errors
= buf
->dtb_xamot_errors
;
16671 desc
.dtbd_oldest
= 0;
16673 lck_mtx_unlock(&dtrace_lock
);
16676 * Finally, copy out the buffer description.
16678 if (copyout(&desc
, arg
, sizeof (desc
)) != 0)
16684 case DTRACEIOC_CONF
: {
16685 dtrace_conf_t conf
;
16687 bzero(&conf
, sizeof (conf
));
16688 conf
.dtc_difversion
= DIF_VERSION
;
16689 conf
.dtc_difintregs
= DIF_DIR_NREGS
;
16690 conf
.dtc_diftupregs
= DIF_DTR_NREGS
;
16691 conf
.dtc_ctfmodel
= CTF_MODEL_NATIVE
;
16693 if (copyout(&conf
, arg
, sizeof (conf
)) != 0)
16699 case DTRACEIOC_STATUS
: {
16700 dtrace_status_t stat
;
16701 dtrace_dstate_t
*dstate
;
16706 * See the comment in dtrace_state_deadman() for the reason
16707 * for setting dts_laststatus to INT64_MAX before setting
16708 * it to the correct value.
16710 state
->dts_laststatus
= INT64_MAX
;
16711 dtrace_membar_producer();
16712 state
->dts_laststatus
= dtrace_gethrtime();
16714 bzero(&stat
, sizeof (stat
));
16716 lck_mtx_lock(&dtrace_lock
);
16718 if (state
->dts_activity
== DTRACE_ACTIVITY_INACTIVE
) {
16719 lck_mtx_unlock(&dtrace_lock
);
16723 if (state
->dts_activity
== DTRACE_ACTIVITY_DRAINING
)
16724 stat
.dtst_exiting
= 1;
16726 nerrs
= state
->dts_errors
;
16727 dstate
= &state
->dts_vstate
.dtvs_dynvars
;
16729 for (i
= 0; i
< (int)NCPU
; i
++) {
16730 dtrace_dstate_percpu_t
*dcpu
= &dstate
->dtds_percpu
[i
];
16732 stat
.dtst_dyndrops
+= dcpu
->dtdsc_drops
;
16733 stat
.dtst_dyndrops_dirty
+= dcpu
->dtdsc_dirty_drops
;
16734 stat
.dtst_dyndrops_rinsing
+= dcpu
->dtdsc_rinsing_drops
;
16736 if (state
->dts_buffer
[i
].dtb_flags
& DTRACEBUF_FULL
)
16737 stat
.dtst_filled
++;
16739 nerrs
+= state
->dts_buffer
[i
].dtb_errors
;
16741 for (j
= 0; j
< state
->dts_nspeculations
; j
++) {
16742 dtrace_speculation_t
*spec
;
16743 dtrace_buffer_t
*buf
;
16745 spec
= &state
->dts_speculations
[j
];
16746 buf
= &spec
->dtsp_buffer
[i
];
16747 stat
.dtst_specdrops
+= buf
->dtb_xamot_drops
;
16751 stat
.dtst_specdrops_busy
= state
->dts_speculations_busy
;
16752 stat
.dtst_specdrops_unavail
= state
->dts_speculations_unavail
;
16753 stat
.dtst_stkstroverflows
= state
->dts_stkstroverflows
;
16754 stat
.dtst_dblerrors
= state
->dts_dblerrors
;
16756 (state
->dts_activity
== DTRACE_ACTIVITY_KILLED
);
16757 stat
.dtst_errors
= nerrs
;
16759 lck_mtx_unlock(&dtrace_lock
);
16761 if (copyout(&stat
, arg
, sizeof (stat
)) != 0)
16767 case DTRACEIOC_FORMAT
: {
16768 dtrace_fmtdesc_t fmt
;
16772 if (copyin(arg
, &fmt
, sizeof (fmt
)) != 0)
16775 lck_mtx_lock(&dtrace_lock
);
16777 if (fmt
.dtfd_format
== 0 ||
16778 fmt
.dtfd_format
> state
->dts_nformats
) {
16779 lck_mtx_unlock(&dtrace_lock
);
16784 * Format strings are allocated contiguously and they are
16785 * never freed; if a format index is less than the number
16786 * of formats, we can assert that the format map is non-NULL
16787 * and that the format for the specified index is non-NULL.
16789 ASSERT(state
->dts_formats
!= NULL
);
16790 str
= state
->dts_formats
[fmt
.dtfd_format
- 1];
16791 ASSERT(str
!= NULL
);
16793 len
= strlen(str
) + 1;
16795 if (len
> fmt
.dtfd_length
) {
16796 fmt
.dtfd_length
= len
;
16798 if (copyout(&fmt
, arg
, sizeof (fmt
)) != 0) {
16799 lck_mtx_unlock(&dtrace_lock
);
16803 if (copyout(str
, (user_addr_t
)fmt
.dtfd_string
, len
) != 0) {
16804 lck_mtx_unlock(&dtrace_lock
);
16809 lck_mtx_unlock(&dtrace_lock
);
16813 case DTRACEIOC_MODUUIDSLIST
: {
16814 size_t module_uuids_list_size
;
16815 dtrace_module_uuids_list_t
* uuids_list
;
16816 uint64_t dtmul_count
;
16819 * Security restrictions make this operation illegal, if this is enabled DTrace
16820 * must refuse to provide any fbt probes.
16822 if (dtrace_is_restricted()) {
16823 cmn_err(CE_WARN
, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");
16828 * Fail if the kernel symbol mode makes this operation illegal.
16829 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
16830 * for them without holding the dtrace_lock.
16832 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_NEVER
||
16833 dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL
) {
16834 cmn_err(CE_WARN
, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode
);
16839 * Read the number of symbolsdesc structs being passed in.
16841 if (copyin(arg
+ offsetof(dtrace_module_uuids_list_t
, dtmul_count
),
16843 sizeof(dtmul_count
))) {
16844 cmn_err(CE_WARN
, "failed to copyin dtmul_count");
16849 * Range check the count. More than 2k kexts is probably an error.
16851 if (dtmul_count
> 2048) {
16852 cmn_err(CE_WARN
, "dtmul_count is not valid");
16857 * For all queries, we return EINVAL when the user specified
16858 * count does not match the actual number of modules we find
16861 * If the user specified count is zero, then this serves as a
16862 * simple query to count the available modules in need of symbols.
16867 if (dtmul_count
== 0)
16869 lck_mtx_lock(&mod_lock
);
16870 struct modctl
* ctl
= dtrace_modctl_list
;
16872 /* Update the private probes bit */
16873 if (dtrace_provide_private_probes
)
16874 ctl
->mod_flags
|= MODCTL_FBT_PROVIDE_PRIVATE_PROBES
;
16876 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
));
16877 if (!MOD_SYMBOLS_DONE(ctl
)) {
16881 ctl
= ctl
->mod_next
;
16883 lck_mtx_unlock(&mod_lock
);
16885 if (copyout(&dtmul_count
, arg
, sizeof (dtmul_count
)) != 0)
16892 * If we reach this point, then we have a request for full list data.
16893 * Allocate a correctly sized structure and copyin the data.
16895 module_uuids_list_size
= DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count
);
16896 if ((uuids_list
= kmem_alloc(module_uuids_list_size
, KM_SLEEP
)) == NULL
)
16899 /* NOTE! We can no longer exit this method via return */
16900 if (copyin(arg
, uuids_list
, module_uuids_list_size
) != 0) {
16901 cmn_err(CE_WARN
, "failed copyin of dtrace_module_uuids_list_t");
16903 goto moduuidslist_cleanup
;
16907 * Check that the count didn't change between the first copyin and the second.
16909 if (uuids_list
->dtmul_count
!= dtmul_count
) {
16911 goto moduuidslist_cleanup
;
16915 * Build the list of UUID's that need symbols
16917 lck_mtx_lock(&mod_lock
);
16921 struct modctl
* ctl
= dtrace_modctl_list
;
16923 /* Update the private probes bit */
16924 if (dtrace_provide_private_probes
)
16925 ctl
->mod_flags
|= MODCTL_FBT_PROVIDE_PRIVATE_PROBES
;
16928 * We assume that userspace symbols will be "better" than kernel level symbols,
16929 * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms
16930 * are available, add user syms if the module might use them.
16932 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
));
16933 if (!MOD_SYMBOLS_DONE(ctl
)) {
16934 UUID
* uuid
= &uuids_list
->dtmul_uuid
[dtmul_count
];
16935 if (dtmul_count
++ < uuids_list
->dtmul_count
) {
16936 memcpy(uuid
, ctl
->mod_uuid
, sizeof(UUID
));
16939 ctl
= ctl
->mod_next
;
16942 lck_mtx_unlock(&mod_lock
);
16944 if (uuids_list
->dtmul_count
< dtmul_count
)
16947 uuids_list
->dtmul_count
= dtmul_count
;
16950 * Copyout the symbols list (or at least the count!)
16952 if (copyout(uuids_list
, arg
, module_uuids_list_size
) != 0) {
16953 cmn_err(CE_WARN
, "failed copyout of dtrace_symbolsdesc_list_t");
16957 moduuidslist_cleanup
:
16959 * If we had to allocate struct memory, free it.
16961 if (uuids_list
!= NULL
) {
16962 kmem_free(uuids_list
, module_uuids_list_size
);
16968 case DTRACEIOC_PROVMODSYMS
: {
16969 size_t module_symbols_size
;
16970 dtrace_module_symbols_t
* module_symbols
;
16971 uint64_t dtmodsyms_count
;
16974 * Security restrictions make this operation illegal, if this is enabled DTrace
16975 * must refuse to provide any fbt probes.
16977 if (dtrace_is_restricted()) {
16978 cmn_err(CE_WARN
, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");
16983 * Fail if the kernel symbol mode makes this operation illegal.
16984 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
16985 * for them without holding the dtrace_lock.
16987 if (dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_NEVER
||
16988 dtrace_kernel_symbol_mode
== DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL
) {
16989 cmn_err(CE_WARN
, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode
);
16994 * Read the number of module symbols structs being passed in.
16996 if (copyin(arg
+ offsetof(dtrace_module_symbols_t
, dtmodsyms_count
),
16998 sizeof(dtmodsyms_count
))) {
16999 cmn_err(CE_WARN
, "failed to copyin dtmodsyms_count");
17004 * Range check the count. How much data can we pass around?
17007 if (dtmodsyms_count
== 0 || (dtmodsyms_count
> 100 * 1024)) {
17008 cmn_err(CE_WARN
, "dtmodsyms_count is not valid");
17013 * Allocate a correctly sized structure and copyin the data.
17015 module_symbols_size
= DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count
);
17016 if ((module_symbols
= kmem_alloc(module_symbols_size
, KM_SLEEP
)) == NULL
)
17021 /* NOTE! We can no longer exit this method via return */
17022 if (copyin(arg
, module_symbols
, module_symbols_size
) != 0) {
17023 cmn_err(CE_WARN
, "failed copyin of dtrace_module_symbols_t, symbol count %llu", module_symbols
->dtmodsyms_count
);
17025 goto module_symbols_cleanup
;
17029 * Check that the count didn't change between the first copyin and the second.
17031 if (module_symbols
->dtmodsyms_count
!= dtmodsyms_count
) {
17033 goto module_symbols_cleanup
;
17037 * Find the modctl to add symbols to.
17039 lck_mtx_lock(&dtrace_provider_lock
);
17040 lck_mtx_lock(&mod_lock
);
17042 struct modctl
* ctl
= dtrace_modctl_list
;
17044 /* Update the private probes bit */
17045 if (dtrace_provide_private_probes
)
17046 ctl
->mod_flags
|= MODCTL_FBT_PROVIDE_PRIVATE_PROBES
;
17048 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
));
17049 if (MOD_HAS_UUID(ctl
) && !MOD_SYMBOLS_DONE(ctl
)) {
17050 if (memcmp(module_symbols
->dtmodsyms_uuid
, ctl
->mod_uuid
, sizeof(UUID
)) == 0) {
17052 ctl
->mod_user_symbols
= module_symbols
;
17056 ctl
= ctl
->mod_next
;
17060 dtrace_provider_t
*prv
;
17063 * We're going to call each providers per-module provide operation
17064 * specifying only this module.
17066 for (prv
= dtrace_provider
; prv
!= NULL
; prv
= prv
->dtpv_next
)
17067 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);
17070 * We gave every provider a chance to provide with the user syms, go ahead and clear them
17072 ctl
->mod_user_symbols
= NULL
; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */
17075 lck_mtx_unlock(&mod_lock
);
17076 lck_mtx_unlock(&dtrace_provider_lock
);
17078 module_symbols_cleanup
:
17080 * If we had to allocate struct memory, free it.
17082 if (module_symbols
!= NULL
) {
17083 kmem_free(module_symbols
, module_symbols_size
);
17089 case DTRACEIOC_PROCWAITFOR
: {
17090 dtrace_procdesc_t pdesc
= {
17095 if ((rval
= copyin(arg
, &pdesc
, sizeof(pdesc
))) != 0)
17096 goto proc_waitfor_error
;
17098 if ((rval
= dtrace_proc_waitfor(&pdesc
)) != 0)
17099 goto proc_waitfor_error
;
17101 if ((rval
= copyout(&pdesc
, arg
, sizeof(pdesc
))) != 0)
17102 goto proc_waitfor_error
;
17106 proc_waitfor_error
:
17107 /* The process was suspended, revert this since the client will not do it. */
17108 if (pdesc
.p_pid
!= -1) {
17109 proc_t
*proc
= proc_find(pdesc
.p_pid
);
17110 if (proc
!= PROC_NULL
) {
17111 task_pidresume(proc
->task
);
17127 * APPLE NOTE: dtrace_detach not implemented
17129 #if !defined(__APPLE__)
17132 dtrace_detach(dev_info_t
*dip
, ddi_detach_cmd_t cmd
)
17134 dtrace_state_t
*state
;
17141 return (DDI_SUCCESS
);
17144 return (DDI_FAILURE
);
17147 lck_mtx_lock(&cpu_lock
);
17148 lck_mtx_lock(&dtrace_provider_lock
);
17149 lck_mtx_lock(&dtrace_lock
);
17151 ASSERT(dtrace_opens
== 0);
17153 if (dtrace_helpers
> 0) {
17154 lck_mtx_unlock(&dtrace_lock
);
17155 lck_mtx_unlock(&dtrace_provider_lock
);
17156 lck_mtx_unlock(&cpu_lock
);
17157 return (DDI_FAILURE
);
17160 if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) {
17161 lck_mtx_unlock(&dtrace_lock
);
17162 lck_mtx_unlock(&dtrace_provider_lock
);
17163 lck_mtx_unlock(&cpu_lock
);
17164 return (DDI_FAILURE
);
17167 dtrace_provider
= NULL
;
17169 if ((state
= dtrace_anon_grab()) != NULL
) {
17171 * If there were ECBs on this state, the provider should
17172 * have not been allowed to detach; assert that there is
17175 ASSERT(state
->dts_necbs
== 0);
17176 dtrace_state_destroy(state
);
17179 * If we're being detached with anonymous state, we need to
17180 * indicate to the kernel debugger that DTrace is now inactive.
17182 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
);
17185 bzero(&dtrace_anon
, sizeof (dtrace_anon_t
));
17186 unregister_cpu_setup_func((cpu_setup_func_t
*)dtrace_cpu_setup
, NULL
);
17187 dtrace_cpu_init
= NULL
;
17188 dtrace_helpers_cleanup
= NULL
;
17189 dtrace_helpers_fork
= NULL
;
17190 dtrace_cpustart_init
= NULL
;
17191 dtrace_cpustart_fini
= NULL
;
17192 dtrace_debugger_init
= NULL
;
17193 dtrace_debugger_fini
= NULL
;
17194 dtrace_kreloc_init
= NULL
;
17195 dtrace_kreloc_fini
= NULL
;
17196 dtrace_modload
= NULL
;
17197 dtrace_modunload
= NULL
;
17199 lck_mtx_unlock(&cpu_lock
);
17201 if (dtrace_helptrace_enabled
) {
17202 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
);
17203 dtrace_helptrace_buffer
= NULL
;
17206 kmem_free(dtrace_probes
, dtrace_nprobes
* sizeof (dtrace_probe_t
*));
17207 dtrace_probes
= NULL
;
17208 dtrace_nprobes
= 0;
17210 dtrace_hash_destroy(dtrace_bymod
);
17211 dtrace_hash_destroy(dtrace_byfunc
);
17212 dtrace_hash_destroy(dtrace_byname
);
17213 dtrace_bymod
= NULL
;
17214 dtrace_byfunc
= NULL
;
17215 dtrace_byname
= NULL
;
17217 kmem_cache_destroy(dtrace_state_cache
);
17218 vmem_destroy(dtrace_minor
);
17219 vmem_destroy(dtrace_arena
);
17221 if (dtrace_toxrange
!= NULL
) {
17222 kmem_free(dtrace_toxrange
,
17223 dtrace_toxranges_max
* sizeof (dtrace_toxrange_t
));
17224 dtrace_toxrange
= NULL
;
17225 dtrace_toxranges
= 0;
17226 dtrace_toxranges_max
= 0;
17229 ddi_remove_minor_node(dtrace_devi
, NULL
);
17230 dtrace_devi
= NULL
;
17232 ddi_soft_state_fini(&dtrace_softstate
);
17234 ASSERT(dtrace_vtime_references
== 0);
17235 ASSERT(dtrace_opens
== 0);
17236 ASSERT(dtrace_retained
== NULL
);
17238 lck_mtx_unlock(&dtrace_lock
);
17239 lck_mtx_unlock(&dtrace_provider_lock
);
17242 * We don't destroy the task queue until after we have dropped our
17243 * locks (taskq_destroy() may block on running tasks). To prevent
17244 * attempting to do work after we have effectively detached but before
17245 * the task queue has been destroyed, all tasks dispatched via the
17246 * task queue must check that DTrace is still attached before
17247 * performing any operation.
17249 taskq_destroy(dtrace_taskq
);
17250 dtrace_taskq
= NULL
;
17252 return (DDI_SUCCESS
);
17254 #endif /* __APPLE__ */
17256 d_open_t _dtrace_open
, helper_open
;
17257 d_close_t _dtrace_close
, helper_close
;
17258 d_ioctl_t _dtrace_ioctl
, helper_ioctl
;
17261 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
17264 dev_t locdev
= dev
;
17266 return dtrace_open( &locdev
, flags
, devtype
, CRED());
17270 helper_open(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
17272 #pragma unused(dev,flags,devtype,p)
17277 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
17280 return dtrace_close( dev
, flags
, devtype
, CRED());
17284 helper_close(dev_t dev
, int flags
, int devtype
, struct proc
*p
)
17286 #pragma unused(dev,flags,devtype,p)
17291 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
17295 user_addr_t uaddrp
;
17297 if (proc_is64bit(p
))
17298 uaddrp
= *(user_addr_t
*)data
;
17300 uaddrp
= (user_addr_t
) *(uint32_t *)data
;
17302 err
= dtrace_ioctl(dev
, cmd
, uaddrp
, fflag
, CRED(), &rv
);
17304 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
17306 ASSERT( (err
& 0xfffff000) == 0 );
17307 return (err
& 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
17308 } else if (rv
!= 0) {
17309 ASSERT( (rv
& 0xfff00000) == 0 );
17310 return (((rv
& 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
17316 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc
*p
)
17318 #pragma unused(dev,fflag,p)
17321 err
= dtrace_ioctl_helper(cmd
, data
, &rv
);
17322 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
17324 ASSERT( (err
& 0xfffff000) == 0 );
17325 return (err
& 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
17326 } else if (rv
!= 0) {
17327 ASSERT( (rv
& 0xfff00000) == 0 );
17328 return (((rv
& 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
17333 #define HELPER_MAJOR -24 /* let the kernel pick the device number */
17336 * A struct describing which functions will get invoked for certain
17339 static struct cdevsw helper_cdevsw
=
17341 helper_open
, /* open */
17342 helper_close
, /* close */
17343 eno_rdwrt
, /* read */
17344 eno_rdwrt
, /* write */
17345 helper_ioctl
, /* ioctl */
17346 (stop_fcn_t
*)nulldev
, /* stop */
17347 (reset_fcn_t
*)nulldev
, /* reset */
17349 eno_select
, /* select */
17350 eno_mmap
, /* mmap */
17351 eno_strat
, /* strategy */
17352 eno_getc
, /* getc */
17353 eno_putc
, /* putc */
17357 static int helper_majdevno
= 0;
17359 static int gDTraceInited
= 0;
17362 helper_init( void )
17365 * Once the "helper" is initialized, it can take ioctl calls that use locks
17366 * and zones initialized in dtrace_init. Make certain dtrace_init was called
17370 if (!gDTraceInited
) {
17371 panic("helper_init before dtrace_init\n");
17374 if (0 >= helper_majdevno
)
17376 helper_majdevno
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
);
17378 if (helper_majdevno
< 0) {
17379 printf("helper_init: failed to allocate a major number!\n");
17383 if (NULL
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
17384 DTRACEMNR_HELPER
, 0 )) {
17385 printf("dtrace_init: failed to devfs_make_node for helper!\n");
17389 panic("helper_init: called twice!\n");
17392 #undef HELPER_MAJOR
17395 * Called with DEVFS_LOCK held, so vmem_alloc's underlying blist structures are protected.
17398 dtrace_clone_func(dev_t dev
, int action
)
17400 #pragma unused(dev)
17402 if (action
== DEVFS_CLONE_ALLOC
) {
17403 if (NULL
== dtrace_minor
) /* Arena not created yet!?! */
17407 * Propose a minor number, namely the next number that vmem_alloc() will return.
17408 * Immediately put it back in play by calling vmem_free(). FIXME.
17410 int ret
= (int)(uintptr_t)vmem_alloc(dtrace_minor
, 1, VM_BESTFIT
| VM_SLEEP
);
17412 vmem_free(dtrace_minor
, (void *)(uintptr_t)ret
, 1);
17417 else if (action
== DEVFS_CLONE_FREE
) {
17423 #define DTRACE_MAJOR -24 /* let the kernel pick the device number */
17425 static struct cdevsw dtrace_cdevsw
=
17427 _dtrace_open
, /* open */
17428 _dtrace_close
, /* close */
17429 eno_rdwrt
, /* read */
17430 eno_rdwrt
, /* write */
17431 _dtrace_ioctl
, /* ioctl */
17432 (stop_fcn_t
*)nulldev
, /* stop */
17433 (reset_fcn_t
*)nulldev
, /* reset */
17435 eno_select
, /* select */
17436 eno_mmap
, /* mmap */
17437 eno_strat
, /* strategy */
17438 eno_getc
, /* getc */
17439 eno_putc
, /* putc */
17443 lck_attr_t
* dtrace_lck_attr
;
17444 lck_grp_attr_t
* dtrace_lck_grp_attr
;
17445 lck_grp_t
* dtrace_lck_grp
;
17447 static int gMajDevNo
;
17450 dtrace_init( void )
17452 if (0 == gDTraceInited
) {
17454 size_t size
= sizeof(dtrace_buffer_memory_maxsize
);
17457 * DTrace allocates buffers based on the maximum number
17458 * of enabled cpus. This call avoids any race when finding
17461 ASSERT(dtrace_max_cpus
== 0);
17462 ncpu
= dtrace_max_cpus
= ml_get_max_cpus();
17465 * Retrieve the size of the physical memory in order to define
17466 * the state buffer memory maximal size. If we cannot retrieve
17467 * this value, we'll consider that we have 1Gb of memory per CPU, that's
17468 * still better than raising a kernel panic.
17470 if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize
,
17473 dtrace_buffer_memory_maxsize
= ncpu
* 1024 * 1024 * 1024;
17474 printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n",
17475 dtrace_buffer_memory_maxsize
);
17479 * Finally, divide by three to prevent DTrace from eating too
17482 dtrace_buffer_memory_maxsize
/= 3;
17483 ASSERT(dtrace_buffer_memory_maxsize
> 0);
17485 gMajDevNo
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
);
17487 if (gMajDevNo
< 0) {
17488 printf("dtrace_init: failed to allocate a major number!\n");
17493 if (NULL
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,
17494 dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) {
17495 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
17500 #if defined(DTRACE_MEMORY_ZONES)
17502 * Initialize the dtrace kalloc-emulation zones.
17504 dtrace_alloc_init();
17505 #endif /* DTRACE_MEMORY_ZONES */
17508 * Allocate the dtrace_probe_t zone
17510 dtrace_probe_t_zone
= zinit(sizeof(dtrace_probe_t
),
17511 1024 * sizeof(dtrace_probe_t
),
17512 sizeof(dtrace_probe_t
),
17513 "dtrace.dtrace_probe_t");
17516 * Create the dtrace lock group and attrs.
17518 dtrace_lck_attr
= lck_attr_alloc_init();
17519 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();
17520 dtrace_lck_grp
= lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr
);
17523 * We have to initialize all locks explicitly
17525 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17526 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17527 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17528 lck_mtx_init(&dtrace_procwaitfor_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17530 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
);
17532 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17535 * The cpu_core structure consists of per-CPU state available in any context.
17536 * On some architectures, this may mean that the page(s) containing the
17537 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
17538 * is up to the platform to assure that this is performed properly. Note that
17539 * the structure is sized to avoid false sharing.
17541 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17542 lck_mtx_init(&cyc_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17543 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17546 * Initialize the CPU offline/online hooks.
17548 dtrace_install_cpu_hooks();
17550 dtrace_modctl_list
= NULL
;
17552 cpu_core
= (cpu_core_t
*)kmem_zalloc( ncpu
* sizeof(cpu_core_t
), KM_SLEEP
);
17553 for (i
= 0; i
< ncpu
; ++i
) {
17554 lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17557 cpu_list
= (dtrace_cpu_t
*)kmem_zalloc( ncpu
* sizeof(dtrace_cpu_t
), KM_SLEEP
);
17558 for (i
= 0; i
< ncpu
; ++i
) {
17559 cpu_list
[i
].cpu_id
= (processorid_t
)i
;
17560 cpu_list
[i
].cpu_next
= &(cpu_list
[(i
+1) % ncpu
]);
17561 LIST_INIT(&cpu_list
[i
].cpu_cyc_list
);
17562 lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
);
17565 lck_mtx_lock(&cpu_lock
);
17566 for (i
= 0; i
< ncpu
; ++i
)
17567 /* FIXME: track CPU configuration a la CHUD Processor Pref Pane. */
17568 dtrace_cpu_setup_initial( (processorid_t
)i
); /* In lieu of register_cpu_setup_func() callback */
17569 lck_mtx_unlock(&cpu_lock
);
17571 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
17576 * See dtrace_impl.h for a description of dof modes.
17577 * The default is lazy dof.
17579 * FIXME: Warn if state is LAZY_OFF? It won't break anything, but
17580 * makes no sense...
17582 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode
, sizeof (dtrace_dof_mode
))) {
17583 dtrace_dof_mode
= DTRACE_DOF_MODE_LAZY_ON
;
17587 * Sanity check of dof mode value.
17589 switch (dtrace_dof_mode
) {
17590 case DTRACE_DOF_MODE_NEVER
:
17591 case DTRACE_DOF_MODE_LAZY_ON
:
17592 /* valid modes, but nothing else we need to do */
17595 case DTRACE_DOF_MODE_LAZY_OFF
:
17596 case DTRACE_DOF_MODE_NON_LAZY
:
17597 /* Cannot wait for a dtrace_open to init fasttrap */
17602 /* Invalid, clamp to non lazy */
17603 dtrace_dof_mode
= DTRACE_DOF_MODE_NON_LAZY
;
17609 * See dtrace_impl.h for a description of kernel symbol modes.
17610 * The default is to wait for symbols from userspace (lazy symbols).
17612 if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode
, sizeof (dtrace_kernel_symbol_mode
))) {
17613 dtrace_kernel_symbol_mode
= DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
;
17619 panic("dtrace_init: called twice!\n");
17623 dtrace_postinit(void)
17626 * Called from bsd_init after all provider's *_init() routines have been
17627 * run. That way, anonymous DOF enabled under dtrace_attach() is safe
17630 dtrace_attach( (dev_info_t
*)(uintptr_t)makedev(gMajDevNo
, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
17633 * Add the mach_kernel to the module list for lazy processing
17635 struct kmod_info fake_kernel_kmod
;
17636 memset(&fake_kernel_kmod
, 0, sizeof(fake_kernel_kmod
));
17638 strlcpy(fake_kernel_kmod
.name
, "mach_kernel", sizeof(fake_kernel_kmod
.name
));
17639 fake_kernel_kmod
.id
= 1;
17640 fake_kernel_kmod
.address
= g_kernel_kmod_info
.address
;
17641 fake_kernel_kmod
.size
= g_kernel_kmod_info
.size
;
17643 if (dtrace_module_loaded(&fake_kernel_kmod
, 0) != 0) {
17644 printf("dtrace_postinit: Could not register mach_kernel modctl\n");
17647 (void)OSKextRegisterKextsWithDTrace();
17649 #undef DTRACE_MAJOR
17652 * Routines used to register interest in cpu's being added to or removed
17656 register_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
17658 #pragma unused(ignore1,ignore2)
17662 unregister_cpu_setup_func(cpu_setup_func_t
*ignore1
, void *ignore2
)
17664 #pragma unused(ignore1,ignore2)