]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/dtrace.c
xnu-4903.270.47.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / dtrace.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
39236c6e 22/*
39037602 23 * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved.
3e170ce0 24 * Portions Copyright (c) 2013 by Delphix. All rights reserved.
39236c6e
A
25 */
26
2d21ac55 27/*
6d2010ae 28 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
29 * Use is subject to license terms.
30 */
31
b0d623f7 32/* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */
2d21ac55
A
33
34/*
35 * DTrace - Dynamic Tracing for Solaris
36 *
37 * This is the implementation of the Solaris Dynamic Tracing framework
38 * (DTrace). The user-visible interface to DTrace is described at length in
39 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
40 * library, the in-kernel DTrace framework, and the DTrace providers are
41 * described in the block comments in the <sys/dtrace.h> header file. The
42 * internal architecture of DTrace is described in the block comments in the
43 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
44 * implementation very much assume mastery of all of these sources; if one has
45 * an unanswered question about the implementation, one should consult them
46 * first.
47 *
48 * The functions here are ordered roughly as follows:
49 *
50 * - Probe context functions
51 * - Probe hashing functions
52 * - Non-probe context utility functions
53 * - Matching functions
54 * - Provider-to-Framework API functions
55 * - Probe management functions
56 * - DIF object functions
57 * - Format functions
58 * - Predicate functions
59 * - ECB functions
60 * - Buffer functions
61 * - Enabling functions
62 * - DOF functions
63 * - Anonymous enabling functions
39037602 64 * - Process functions
2d21ac55
A
65 * - Consumer state functions
66 * - Helper functions
67 * - Hook functions
68 * - Driver cookbook functions
69 *
70 * Each group of functions begins with a block comment labelled the "DTrace
71 * [Group] Functions", allowing one to find each block by searching forward
72 * on capital-f functions.
73 */
2d21ac55
A
74#include <sys/errno.h>
75#include <sys/types.h>
76#include <sys/stat.h>
77#include <sys/conf.h>
78#include <sys/systm.h>
79#include <sys/dtrace_impl.h>
80#include <sys/param.h>
6d2010ae 81#include <sys/proc_internal.h>
2d21ac55
A
82#include <sys/ioctl.h>
83#include <sys/fcntl.h>
84#include <miscfs/devfs/devfs.h>
85#include <sys/malloc.h>
86#include <sys/kernel_types.h>
87#include <sys/proc_internal.h>
88#include <sys/uio_internal.h>
89#include <sys/kauth.h>
90#include <vm/pmap.h>
91#include <sys/user.h>
92#include <mach/exception_types.h>
93#include <sys/signalvar.h>
6d2010ae 94#include <mach/task.h>
2d21ac55 95#include <kern/zalloc.h>
b0d623f7 96#include <kern/ast.h>
39037602 97#include <kern/sched_prim.h>
fe8ab488 98#include <kern/task.h>
b0d623f7 99#include <netinet/in.h>
39037602
A
100#include <libkern/sysctl.h>
101#include <sys/kdebug.h>
b0d623f7 102
5ba3f43e
A
103#if MONOTONIC
104#include <kern/monotonic.h>
105#include <machine/monotonic.h>
106#endif /* MONOTONIC */
107
a39ff7e2
A
108#include <IOKit/IOPlatformExpert.h>
109
6d2010ae 110#include <kern/cpu_data.h>
b0d623f7
A
111extern uint32_t pmap_find_phys(void *, uint64_t);
112extern boolean_t pmap_valid_page(uint32_t);
6d2010ae
A
113extern void OSKextRegisterKextsWithDTrace(void);
114extern kmod_info_t g_kernel_kmod_info;
b0d623f7
A
115
116/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
117#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
2d21ac55
A
118
119#define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
120
121extern void dtrace_suspend(void);
122extern void dtrace_resume(void);
d9a64523
A
123extern void dtrace_early_init(void);
124extern int dtrace_keep_kernel_symbols(void);
2d21ac55
A
125extern void dtrace_init(void);
126extern void helper_init(void);
b0d623f7 127extern void fasttrap_init(void);
39037602
A
128
129static int dtrace_lazy_dofs_duplicate(proc_t *, proc_t *);
b0d623f7
A
130extern void dtrace_lazy_dofs_destroy(proc_t *);
131extern void dtrace_postinit(void);
2d21ac55 132
39037602
A
133extern void dtrace_proc_fork(proc_t*, proc_t*, int);
134extern void dtrace_proc_exec(proc_t*);
135extern void dtrace_proc_exit(proc_t*);
d9a64523 136
2d21ac55
A
137/*
138 * DTrace Tunable Variables
139 *
fe8ab488
A
140 * The following variables may be dynamically tuned by using sysctl(8), the
141 * variables being stored in the kern.dtrace namespace. For example:
142 * sysctl kern.dtrace.dof_maxsize = 1048575 # 1M
2d21ac55
A
143 *
144 * In general, the only variables that one should be tuning this way are those
145 * that affect system-wide DTrace behavior, and for which the default behavior
146 * is undesirable. Most of these variables are tunable on a per-consumer
147 * basis using DTrace options, and need not be tuned on a system-wide basis.
148 * When tuning these variables, avoid pathological values; while some attempt
149 * is made to verify the integrity of these variables, they are not considered
150 * part of the supported interface to DTrace, and they are therefore not
fe8ab488 151 * checked comprehensively.
2d21ac55 152 */
fe8ab488
A
153uint64_t dtrace_buffer_memory_maxsize = 0; /* initialized in dtrace_init */
154uint64_t dtrace_buffer_memory_inuse = 0;
2d21ac55 155int dtrace_destructive_disallow = 0;
2d21ac55
A
156dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
157size_t dtrace_difo_maxsize = (256 * 1024);
5ba3f43e 158dtrace_optval_t dtrace_dof_maxsize = (512 * 1024);
ecc0ceb4
A
159dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024);
160dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024);
2d21ac55
A
161size_t dtrace_actions_max = (16 * 1024);
162size_t dtrace_retain_max = 1024;
163dtrace_optval_t dtrace_helper_actions_max = 32;
6d2010ae 164dtrace_optval_t dtrace_helper_providers_max = 64;
2d21ac55
A
165dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
166size_t dtrace_strsize_default = 256;
39037602
A
167dtrace_optval_t dtrace_strsize_min = 8;
168dtrace_optval_t dtrace_strsize_max = 65536;
39236c6e
A
169dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */
170dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */
2d21ac55
A
171dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
172dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
173dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
174dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
175dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
176dtrace_optval_t dtrace_nspec_default = 1;
177dtrace_optval_t dtrace_specsize_default = 32 * 1024;
178dtrace_optval_t dtrace_stackframes_default = 20;
179dtrace_optval_t dtrace_ustackframes_default = 20;
180dtrace_optval_t dtrace_jstackframes_default = 50;
181dtrace_optval_t dtrace_jstackstrsize_default = 512;
39037602
A
182dtrace_optval_t dtrace_buflimit_default = 75;
183dtrace_optval_t dtrace_buflimit_min = 1;
184dtrace_optval_t dtrace_buflimit_max = 99;
2d21ac55
A
185int dtrace_msgdsize_max = 128;
186hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
187hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
188int dtrace_devdepth_max = 32;
189int dtrace_err_verbose;
fe8ab488 190int dtrace_provide_private_probes = 0;
2d21ac55
A
191hrtime_t dtrace_deadman_interval = NANOSEC;
192hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
193hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
194
195/*
196 * DTrace External Variables
197 *
198 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
199 * available to DTrace consumers via the backtick (`) syntax. One of these,
200 * dtrace_zero, is made deliberately so: it is provided as a source of
201 * well-known, zero-filled memory. While this variable is not documented,
202 * it is used by some translators as an implementation detail.
203 */
204const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
39236c6e 205unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */
2d21ac55
A
206/*
207 * DTrace Internal Variables
208 */
209static dev_info_t *dtrace_devi; /* device info */
210static vmem_t *dtrace_arena; /* probe ID arena */
2d21ac55
A
211static dtrace_probe_t **dtrace_probes; /* array of all probes */
212static int dtrace_nprobes; /* number of probes */
213static dtrace_provider_t *dtrace_provider; /* provider list */
214static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
215static int dtrace_opens; /* number of opens */
216static int dtrace_helpers; /* number of helpers */
d9a64523
A
217static dtrace_hash_t *dtrace_strings;
218static dtrace_hash_t *dtrace_byprov; /* probes hashed by provider */
2d21ac55
A
219static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
220static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
221static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
222static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
223static int dtrace_toxranges; /* number of toxic ranges */
224static int dtrace_toxranges_max; /* size of toxic range array */
225static dtrace_anon_t dtrace_anon; /* anonymous enabling */
226static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
227static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
228static kthread_t *dtrace_panicked; /* panicking thread */
229static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
230static dtrace_genid_t dtrace_probegen; /* current probe generation */
231static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
232static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
b0d623f7 233static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
2d21ac55 234static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
fe8ab488 235
b0d623f7 236static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */
6d2010ae
A
237
238 /*
239 * This does't quite fit as an internal variable, as it must be accessed in
240 * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either...
241 */
242int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */
39037602 243static uint32_t dtrace_wake_clients;
d9a64523 244static uint8_t dtrace_kerneluuid[16]; /* the 128-bit uuid */
fe8ab488 245
2d21ac55
A
246/*
247 * To save memory, some common memory allocations are given a
b0d623f7 248 * unique zone. For example, dtrace_probe_t is 72 bytes in size,
2d21ac55
A
249 * which means it would fall into the kalloc.128 bucket. With
250 * 20k elements allocated, the space saved is substantial.
251 */
252
253struct zone *dtrace_probe_t_zone;
6d2010ae
A
254
255static int dtrace_module_unloaded(struct kmod_info *kmod);
2d21ac55
A
256
257/*
258 * DTrace Locking
259 * DTrace is protected by three (relatively coarse-grained) locks:
260 *
261 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
262 * including enabling state, probes, ECBs, consumer state, helper state,
263 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
264 * probe context is lock-free -- synchronization is handled via the
265 * dtrace_sync() cross call mechanism.
266 *
267 * (2) dtrace_provider_lock is required when manipulating provider state, or
268 * when provider state must be held constant.
269 *
270 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
271 * when meta provider state must be held constant.
272 *
273 * The lock ordering between these three locks is dtrace_meta_lock before
274 * dtrace_provider_lock before dtrace_lock. (In particular, there are
275 * several places where dtrace_provider_lock is held by the framework as it
276 * calls into the providers -- which then call back into the framework,
277 * grabbing dtrace_lock.)
278 *
279 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
280 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
281 * role as a coarse-grained lock; it is acquired before both of these locks.
282 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
283 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
284 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
285 * acquired _between_ dtrace_provider_lock and dtrace_lock.
286 */
287
fe8ab488 288
2d21ac55
A
289/*
290 * APPLE NOTE:
291 *
fe8ab488
A
292 * For porting purposes, all kmutex_t vars have been changed
293 * to lck_mtx_t, which require explicit initialization.
2d21ac55 294 *
fe8ab488 295 * kmutex_t becomes lck_mtx_t
2d21ac55
A
296 * mutex_enter() becomes lck_mtx_lock()
297 * mutex_exit() becomes lck_mtx_unlock()
298 *
299 * Lock asserts are changed like this:
300 *
301 * ASSERT(MUTEX_HELD(&cpu_lock));
302 * becomes:
5ba3f43e 303 * LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 304 *
2d21ac55
A
305 */
306static lck_mtx_t dtrace_lock; /* probe state lock */
307static lck_mtx_t dtrace_provider_lock; /* provider state lock */
308static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */
2d21ac55 309static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */
2d21ac55
A
310
311/*
312 * DTrace Provider Variables
313 *
314 * These are the variables relating to DTrace as a provider (that is, the
315 * provider of the BEGIN, END, and ERROR probes).
316 */
317static dtrace_pattr_t dtrace_provider_attr = {
318{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
319{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
320{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
321{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
322{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
323};
324
325static void
0a7de745
A
326dtrace_provide_nullop(void *arg, const dtrace_probedesc_t *desc)
327{
328#pragma unused(arg, desc)
329}
330
331static void
332dtrace_provide_module_nullop(void *arg, struct modctl *ctl)
333{
334#pragma unused(arg, ctl)
335}
2d21ac55 336
6d2010ae 337static int
0a7de745 338dtrace_enable_nullop(void *arg, dtrace_id_t id, void *parg)
6d2010ae 339{
0a7de745 340#pragma unused(arg, id, parg)
6d2010ae
A
341 return (0);
342}
343
0a7de745
A
344static void
345dtrace_disable_nullop(void *arg, dtrace_id_t id, void *parg)
346{
347#pragma unused(arg, id, parg)
348}
349
350static void
351dtrace_suspend_nullop(void *arg, dtrace_id_t id, void *parg)
352{
353#pragma unused(arg, id, parg)
354}
355
356static void
357dtrace_resume_nullop(void *arg, dtrace_id_t id, void *parg)
358{
359#pragma unused(arg, id, parg)
360}
361
362static void
363dtrace_destroy_nullop(void *arg, dtrace_id_t id, void *parg)
364{
365#pragma unused(arg, id, parg)
366}
367
368
d9a64523 369static dtrace_pops_t dtrace_provider_ops = {
0a7de745
A
370 .dtps_provide = dtrace_provide_nullop,
371 .dtps_provide_module = dtrace_provide_module_nullop,
372 .dtps_enable = dtrace_enable_nullop,
373 .dtps_disable = dtrace_disable_nullop,
374 .dtps_suspend = dtrace_suspend_nullop,
375 .dtps_resume = dtrace_resume_nullop,
d9a64523
A
376 .dtps_getargdesc = NULL,
377 .dtps_getargval = NULL,
378 .dtps_usermode = NULL,
0a7de745 379 .dtps_destroy = dtrace_destroy_nullop,
2d21ac55
A
380};
381
382static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
383static dtrace_id_t dtrace_probeid_end; /* special END probe */
384dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
385
386/*
387 * DTrace Helper Tracing Variables
388 */
389uint32_t dtrace_helptrace_next = 0;
390uint32_t dtrace_helptrace_nlocals;
391char *dtrace_helptrace_buffer;
b0d623f7 392size_t dtrace_helptrace_bufsize = 512 * 1024;
2d21ac55 393
b0d623f7 394#if DEBUG
2d21ac55
A
395int dtrace_helptrace_enabled = 1;
396#else
397int dtrace_helptrace_enabled = 0;
398#endif
399
5ba3f43e
A
400#if defined (__arm64__)
401/*
402 * The ioctl for adding helper DOF is based on the
403 * size of a user_addr_t. We need to recognize both
404 * U32 and U64 as the same action.
405 */
406#define DTRACEHIOC_ADDDOF_U32 _IOW('h', 4, user32_addr_t)
407#define DTRACEHIOC_ADDDOF_U64 _IOW('h', 4, user64_addr_t)
408#endif /* __arm64__ */
fe8ab488 409
2d21ac55
A
410/*
411 * DTrace Error Hashing
412 *
413 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
414 * table. This is very useful for checking coverage of tests that are
415 * expected to induce DIF or DOF processing errors, and may be useful for
416 * debugging problems in the DIF code generator or in DOF generation . The
417 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
418 */
b0d623f7 419#if DEBUG
2d21ac55
A
420static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
421static const char *dtrace_errlast;
422static kthread_t *dtrace_errthread;
423static lck_mtx_t dtrace_errlock;
424#endif
425
426/*
427 * DTrace Macros and Constants
428 *
429 * These are various macros that are useful in various spots in the
430 * implementation, along with a few random constants that have no meaning
431 * outside of the implementation. There is no real structure to this cpp
432 * mishmash -- but is there ever?
433 */
2d21ac55 434
d9a64523
A
435#define DTRACE_GETSTR(hash, elm) \
436 (hash->dth_getstr(elm, hash->dth_stroffs))
437
438#define DTRACE_HASHSTR(hash, elm) \
439 dtrace_hash_str(DTRACE_GETSTR(hash, elm))
440
441#define DTRACE_HASHNEXT(hash, elm) \
442 (void**)((uintptr_t)(elm) + (hash)->dth_nextoffs)
2d21ac55 443
d9a64523
A
444#define DTRACE_HASHPREV(hash, elm) \
445 (void**)((uintptr_t)(elm) + (hash)->dth_prevoffs)
2d21ac55
A
446
447#define DTRACE_HASHEQ(hash, lhs, rhs) \
d9a64523
A
448 (strcmp(DTRACE_GETSTR(hash, lhs), \
449 DTRACE_GETSTR(hash, rhs)) == 0)
2d21ac55
A
450
451#define DTRACE_AGGHASHSIZE_SLEW 17
452
b0d623f7
A
453#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
454
2d21ac55
A
455/*
456 * The key for a thread-local variable consists of the lower 61 bits of the
fe8ab488 457 * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
2d21ac55
A
458 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
459 * equal to a variable identifier. This is necessary (but not sufficient) to
460 * assure that global associative arrays never collide with thread-local
461 * variables. To guarantee that they cannot collide, we must also define the
462 * order for keying dynamic variables. That order is:
463 *
464 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
465 *
466 * Because the variable-key and the tls-key are in orthogonal spaces, there is
467 * no way for a global variable key signature to match a thread-local key
468 * signature.
469 */
39236c6e 470#if defined (__x86_64__)
b0d623f7
A
471/* FIXME: two function calls!! */
472#define DTRACE_TLS_THRKEY(where) { \
473 uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
474 uint64_t thr = (uintptr_t)current_thread(); \
475 ASSERT(intr < (1 << 3)); \
476 (where) = ((thr + DIF_VARIABLE_MAX) & \
477 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
478}
5ba3f43e
A
479#elif defined(__arm__)
480/* FIXME: three function calls!!! */
481#define DTRACE_TLS_THRKEY(where) { \
482 uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
483 uint64_t thr = (uintptr_t)current_thread(); \
484 uint_t pid = (uint_t)dtrace_proc_selfpid(); \
485 ASSERT(intr < (1 << 3)); \
486 (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \
487 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
488}
489#elif defined (__arm64__)
490/* FIXME: two function calls!! */
491#define DTRACE_TLS_THRKEY(where) { \
492 uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
493 uint64_t thr = (uintptr_t)current_thread(); \
494 ASSERT(intr < (1 << 3)); \
495 (where) = ((thr + DIF_VARIABLE_MAX) & \
496 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
497}
2d21ac55 498#else
39236c6e 499#error Unknown architecture
b0d623f7 500#endif
2d21ac55 501
b0d623f7
A
502#define DT_BSWAP_8(x) ((x) & 0xff)
503#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
504#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
505#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
506
507#define DT_MASK_LO 0x00000000FFFFFFFFULL
508
2d21ac55
A
509#define DTRACE_STORE(type, tomax, offset, what) \
510 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
511
39236c6e 512
b0d623f7
A
513#define DTRACE_ALIGNCHECK(addr, size, flags) \
514 if (addr & (MIN(size,4) - 1)) { \
515 *flags |= CPU_DTRACE_BADALIGN; \
516 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
517 return (0); \
518 }
b0d623f7 519
39037602
A
520#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \
521do { \
522 if ((remp) != NULL) { \
523 *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \
524 } \
525} while (0)
526
527
b0d623f7
A
528/*
529 * Test whether a range of memory starting at testaddr of size testsz falls
530 * within the range of memory described by addr, sz. We take care to avoid
531 * problems with overflow and underflow of the unsigned quantities, and
532 * disallow all negative sizes. Ranges of size 0 are allowed.
533 */
534#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
535 ((testaddr) - (baseaddr) < (basesz) && \
536 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
537 (testaddr) + (testsz) >= (testaddr))
538
539/*
540 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
541 * alloc_sz on the righthand side of the comparison in order to avoid overflow
542 * or underflow in the comparison with it. This is simpler than the INRANGE
543 * check above, because we know that the dtms_scratch_ptr is valid in the
544 * range. Allocations of size zero are allowed.
545 */
546#define DTRACE_INSCRATCH(mstate, alloc_sz) \
547 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
548 (mstate)->dtms_scratch_ptr >= (alloc_sz))
2d21ac55 549
6d2010ae 550#define RECOVER_LABEL(bits) dtraceLoadRecover##bits:
2d21ac55 551
39037602 552#if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__))
2d21ac55
A
553#define DTRACE_LOADFUNC(bits) \
554/*CSTYLED*/ \
2d21ac55
A
555uint##bits##_t dtrace_load##bits(uintptr_t addr); \
556 \
557uint##bits##_t \
558dtrace_load##bits(uintptr_t addr) \
559{ \
560 size_t size = bits / NBBY; \
561 /*CSTYLED*/ \
562 uint##bits##_t rval = 0; \
563 int i; \
2d21ac55
A
564 volatile uint16_t *flags = (volatile uint16_t *) \
565 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
566 \
567 DTRACE_ALIGNCHECK(addr, size, flags); \
568 \
569 for (i = 0; i < dtrace_toxranges; i++) { \
570 if (addr >= dtrace_toxrange[i].dtt_limit) \
571 continue; \
572 \
573 if (addr + size <= dtrace_toxrange[i].dtt_base) \
574 continue; \
575 \
576 /* \
577 * This address falls within a toxic region; return 0. \
578 */ \
579 *flags |= CPU_DTRACE_BADADDR; \
580 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
581 return (0); \
582 } \
583 \
b0d623f7 584 { \
6d2010ae 585 volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \
b0d623f7 586 *flags |= CPU_DTRACE_NOFAULT; \
0a7de745 587 recover = dtrace_sign_and_set_thread_recover(current_thread(), recover); \
b0d623f7
A
588 /*CSTYLED*/ \
589 /* \
590 * PR6394061 - avoid device memory that is unpredictably \
591 * mapped and unmapped \
592 */ \
593 if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \
594 rval = *((volatile uint##bits##_t *)addr); \
39037602
A
595 else { \
596 *flags |= CPU_DTRACE_BADADDR; \
597 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
598 return (0); \
599 } \
600 \
b0d623f7
A
601 RECOVER_LABEL(bits); \
602 (void)dtrace_set_thread_recover(current_thread(), recover); \
603 *flags &= ~CPU_DTRACE_NOFAULT; \
604 } \
605 \
606 return (rval); \
607}
608#else /* all other architectures */
39236c6e 609#error Unknown Architecture
b0d623f7 610#endif
2d21ac55 611
2d21ac55
A
612#ifdef __LP64__
613#define dtrace_loadptr dtrace_load64
614#else
615#define dtrace_loadptr dtrace_load32
616#endif
617
618#define DTRACE_DYNHASH_FREE 0
619#define DTRACE_DYNHASH_SINK 1
620#define DTRACE_DYNHASH_VALID 2
621
6d2010ae 622#define DTRACE_MATCH_FAIL -1
2d21ac55
A
623#define DTRACE_MATCH_NEXT 0
624#define DTRACE_MATCH_DONE 1
625#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
626#define DTRACE_STATE_ALIGN 64
627
628#define DTRACE_FLAGS2FLT(flags) \
629 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
630 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
631 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
632 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
633 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
634 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
635 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
636 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
b0d623f7 637 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
2d21ac55
A
638 DTRACEFLT_UNKNOWN)
639
640#define DTRACEACT_ISSTRING(act) \
641 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
642 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
643
b0d623f7 644
b0d623f7 645static size_t dtrace_strlen(const char *, size_t);
2d21ac55
A
646static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
647static void dtrace_enabling_provide(dtrace_provider_t *);
39037602
A
648static int dtrace_enabling_match(dtrace_enabling_t *, int *, dtrace_match_cond_t *cond);
649static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond);
2d21ac55
A
650static void dtrace_enabling_matchall(void);
651static dtrace_state_t *dtrace_anon_grab(void);
652static uint64_t dtrace_helper(int, dtrace_mstate_t *,
653 dtrace_state_t *, uint64_t, uint64_t);
654static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
655static void dtrace_buffer_drop(dtrace_buffer_t *);
656static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
657 dtrace_state_t *, dtrace_mstate_t *);
658static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
659 dtrace_optval_t);
d190cdc3 660static int dtrace_ecb_create_enable(dtrace_probe_t *, void *, void *);
2d21ac55 661static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
39037602
A
662static int dtrace_canload_remains(uint64_t, size_t, size_t *,
663 dtrace_mstate_t *, dtrace_vstate_t *);
664static int dtrace_canstore_remains(uint64_t, size_t, size_t *,
665 dtrace_mstate_t *, dtrace_vstate_t *);
2d21ac55 666
fe8ab488
A
667
668/*
669 * DTrace sysctl handlers
670 *
671 * These declarations and functions are used for a deeper DTrace configuration.
672 * Most of them are not per-consumer basis and may impact the other DTrace
673 * consumers. Correctness may not be supported for all the variables, so you
674 * should be careful about what values you are using.
675 */
676
677SYSCTL_DECL(_kern_dtrace);
678SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "dtrace");
679
680static int
681sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS
682{
683#pragma unused(oidp, arg2)
684 int changed, error;
685 int value = *(int *) arg1;
686
687 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
688 if (error || !changed)
689 return (error);
690
691 if (value != 0 && value != 1)
692 return (ERANGE);
693
694 lck_mtx_lock(&dtrace_lock);
695 dtrace_err_verbose = value;
696 lck_mtx_unlock(&dtrace_lock);
697
698 return (0);
699}
700
701/*
702 * kern.dtrace.err_verbose
703 *
704 * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld).
705 * Errors are reported when a DIFO or a DOF has been rejected by the kernel.
706 */
707SYSCTL_PROC(_kern_dtrace, OID_AUTO, err_verbose,
708 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
709 &dtrace_err_verbose, 0,
710 sysctl_dtrace_err_verbose, "I", "dtrace error verbose");
711
712static int
713sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS
714{
715#pragma unused(oidp, arg2, req)
716 int changed, error;
717 uint64_t value = *(uint64_t *) arg1;
718
719 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
720 if (error || !changed)
721 return (error);
722
723 if (value <= dtrace_buffer_memory_inuse)
724 return (ERANGE);
725
726 lck_mtx_lock(&dtrace_lock);
727 dtrace_buffer_memory_maxsize = value;
728 lck_mtx_unlock(&dtrace_lock);
729
730 return (0);
731}
732
733/*
734 * kern.dtrace.buffer_memory_maxsize
735 *
736 * Set DTrace maximal size in bytes used by all the consumers' state buffers. By default
737 * the limit is PHYS_MEM / 3 for *all* consumers. Attempting to set a null, a negative value
738 * or a value <= to dtrace_buffer_memory_inuse will result in a failure.
739 */
740SYSCTL_PROC(_kern_dtrace, OID_AUTO, buffer_memory_maxsize,
741 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
742 &dtrace_buffer_memory_maxsize, 0,
743 sysctl_dtrace_buffer_memory_maxsize, "Q", "dtrace state buffer memory maxsize");
744
745/*
746 * kern.dtrace.buffer_memory_inuse
747 *
748 * Current state buffer memory used, in bytes, by all the DTrace consumers.
749 * This value is read-only.
750 */
751SYSCTL_QUAD(_kern_dtrace, OID_AUTO, buffer_memory_inuse, CTLFLAG_RD | CTLFLAG_LOCKED,
752 &dtrace_buffer_memory_inuse, "dtrace state buffer memory in-use");
753
754static int
755sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS
756{
757#pragma unused(oidp, arg2, req)
758 int changed, error;
759 size_t value = *(size_t*) arg1;
760
761 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
762 if (error || !changed)
763 return (error);
764
765 if (value <= 0)
766 return (ERANGE);
767
768 lck_mtx_lock(&dtrace_lock);
769 dtrace_difo_maxsize = value;
770 lck_mtx_unlock(&dtrace_lock);
771
772 return (0);
773}
774
775/*
776 * kern.dtrace.difo_maxsize
777 *
778 * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize
779 * to get the default value. Attempting to set a null or negative size will
780 * result in a failure.
781 */
782SYSCTL_PROC(_kern_dtrace, OID_AUTO, difo_maxsize,
783 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
784 &dtrace_difo_maxsize, 0,
785 sysctl_dtrace_difo_maxsize, "Q", "dtrace difo maxsize");
786
787static int
788sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS
789{
790#pragma unused(oidp, arg2, req)
791 int changed, error;
792 dtrace_optval_t value = *(dtrace_optval_t *) arg1;
793
794 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
795 if (error || !changed)
796 return (error);
797
798 if (value <= 0)
799 return (ERANGE);
800
d9a64523
A
801 if (value >= dtrace_copy_maxsize())
802 return (ERANGE);
803
fe8ab488
A
804 lck_mtx_lock(&dtrace_lock);
805 dtrace_dof_maxsize = value;
806 lck_mtx_unlock(&dtrace_lock);
807
808 return (0);
809}
810
811/*
812 * kern.dtrace.dof_maxsize
813 *
814 * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to
815 * get the default value. Attempting to set a null or negative size will result
816 * in a failure.
817 */
818SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize,
819 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
820 &dtrace_dof_maxsize, 0,
821 sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize");
822
823static int
ecc0ceb4 824sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS
fe8ab488
A
825{
826#pragma unused(oidp, arg2, req)
827 int changed, error;
828 dtrace_optval_t value = *(dtrace_optval_t*) arg1;
829
830 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
831 if (error || !changed)
832 return (error);
833
834 if (value <= 0)
835 return (ERANGE);
ecc0ceb4
A
836 if (value > dtrace_statvar_maxsize_max)
837 return (ERANGE);
fe8ab488
A
838
839 lck_mtx_lock(&dtrace_lock);
ecc0ceb4 840 dtrace_statvar_maxsize = value;
fe8ab488
A
841 lck_mtx_unlock(&dtrace_lock);
842
843 return (0);
844}
845
846/*
847 * kern.dtrace.global_maxsize
848 *
ecc0ceb4
A
849 * Set the variable max size in bytes, check the definition of
850 * dtrace_statvar_maxsize to get the default value. Attempting to set a null,
851 * too high or negative size will result in a failure.
fe8ab488
A
852 */
853SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize,
854 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
ecc0ceb4
A
855 &dtrace_statvar_maxsize, 0,
856 sysctl_dtrace_statvar_maxsize, "Q", "dtrace statvar maxsize");
fe8ab488
A
857
858static int
859sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS
860{
861#pragma unused(oidp, arg2)
862 int error;
863 int value = *(int *) arg1;
864
865 error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
866 if (error)
867 return (error);
868
5ba3f43e
A
869 if (req->newptr) {
870 if (value != 0 && value != 1)
871 return (ERANGE);
fe8ab488 872
5ba3f43e
A
873 /*
874 * We do not allow changing this back to zero, as private probes
875 * would still be left registered
876 */
877 if (value != 1)
878 return (EPERM);
fe8ab488 879
5ba3f43e
A
880 lck_mtx_lock(&dtrace_lock);
881 dtrace_provide_private_probes = value;
882 lck_mtx_unlock(&dtrace_lock);
883 }
fe8ab488
A
884 return (0);
885}
886
887/*
888 * kern.dtrace.provide_private_probes
889 *
890 * Set whether the providers must provide the private probes. This is
891 * mainly used by the FBT provider to request probes for the private/static
892 * symbols.
893 */
894SYSCTL_PROC(_kern_dtrace, OID_AUTO, provide_private_probes,
895 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
896 &dtrace_provide_private_probes, 0,
897 sysctl_dtrace_provide_private_probes, "I", "provider must provide the private probes");
898
d9a64523
A
899/*
900 * kern.dtrace.dof_mode
901 *
902 * Returns the current DOF mode.
903 * This value is read-only.
904 */
905SYSCTL_INT(_kern_dtrace, OID_AUTO, dof_mode, CTLFLAG_RD | CTLFLAG_LOCKED,
906 &dtrace_dof_mode, 0, "dtrace dof mode");
907
2d21ac55
A
908/*
909 * DTrace Probe Context Functions
910 *
911 * These functions are called from probe context. Because probe context is
912 * any context in which C may be called, arbitrarily locks may be held,
913 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
914 * As a result, functions called from probe context may only call other DTrace
915 * support functions -- they may not interact at all with the system at large.
916 * (Note that the ASSERT macro is made probe-context safe by redefining it in
917 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
918 * loads are to be performed from probe context, they _must_ be in terms of
919 * the safe dtrace_load*() variants.
920 *
921 * Some functions in this block are not actually called from probe context;
922 * for these functions, there will be a comment above the function reading
923 * "Note: not called from probe context."
924 */
2d21ac55
A
925
926int
927dtrace_assfail(const char *a, const char *f, int l)
928{
316670eb 929 panic("dtrace: assertion failed: %s, file: %s, line: %d", a, f, l);
2d21ac55
A
930
931 /*
932 * We just need something here that even the most clever compiler
933 * cannot optimize away.
934 */
935 return (a[(uintptr_t)f]);
936}
937
938/*
939 * Atomically increment a specified error counter from probe context.
940 */
941static void
942dtrace_error(uint32_t *counter)
943{
944 /*
945 * Most counters stored to in probe context are per-CPU counters.
946 * However, there are some error conditions that are sufficiently
947 * arcane that they don't merit per-CPU storage. If these counters
948 * are incremented concurrently on different CPUs, scalability will be
949 * adversely affected -- but we don't expect them to be white-hot in a
950 * correctly constructed enabling...
951 */
952 uint32_t oval, nval;
953
954 do {
955 oval = *counter;
956
957 if ((nval = oval + 1) == 0) {
958 /*
959 * If the counter would wrap, set it to 1 -- assuring
960 * that the counter is never zero when we have seen
961 * errors. (The counter must be 32-bits because we
962 * aren't guaranteed a 64-bit compare&swap operation.)
963 * To save this code both the infamy of being fingered
964 * by a priggish news story and the indignity of being
965 * the target of a neo-puritan witch trial, we're
966 * carefully avoiding any colorful description of the
967 * likelihood of this condition -- but suffice it to
968 * say that it is only slightly more likely than the
969 * overflow of predicate cache IDs, as discussed in
970 * dtrace_predicate_create().
971 */
972 nval = 1;
973 }
974 } while (dtrace_cas32(counter, oval, nval) != oval);
975}
976
977/*
978 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
979 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
980 */
981DTRACE_LOADFUNC(8)
982DTRACE_LOADFUNC(16)
983DTRACE_LOADFUNC(32)
984DTRACE_LOADFUNC(64)
985
986static int
987dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
988{
989 if (dest < mstate->dtms_scratch_base)
990 return (0);
991
992 if (dest + size < dest)
993 return (0);
994
995 if (dest + size > mstate->dtms_scratch_ptr)
996 return (0);
997
998 return (1);
999}
1000
1001static int
39037602 1002dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
2d21ac55
A
1003 dtrace_statvar_t **svars, int nsvars)
1004{
1005 int i;
1006
ecc0ceb4
A
1007 size_t maxglobalsize, maxlocalsize;
1008
39037602
A
1009 maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
1010 maxlocalsize = (maxglobalsize) * NCPU;
ecc0ceb4
A
1011
1012 if (nsvars == 0)
1013 return (0);
1014
2d21ac55
A
1015 for (i = 0; i < nsvars; i++) {
1016 dtrace_statvar_t *svar = svars[i];
ecc0ceb4
A
1017 uint8_t scope;
1018 size_t size;
2d21ac55 1019
ecc0ceb4 1020 if (svar == NULL || (size = svar->dtsv_size) == 0)
2d21ac55
A
1021 continue;
1022
ecc0ceb4
A
1023 scope = svar->dtsv_var.dtdv_scope;
1024
1025 /**
1026 * We verify that our size is valid in the spirit of providing
1027 * defense in depth: we want to prevent attackers from using
1028 * DTrace to escalate an orthogonal kernel heap corruption bug
1029 * into the ability to store to arbitrary locations in memory.
1030 */
39037602
A
1031 VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
1032 (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
ecc0ceb4 1033
39037602
A
1034 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) {
1035 DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
1036 svar->dtsv_size);
2d21ac55 1037 return (1);
39037602 1038 }
2d21ac55
A
1039 }
1040
1041 return (0);
1042}
1043
1044/*
1045 * Check to see if the address is within a memory region to which a store may
1046 * be issued. This includes the DTrace scratch areas, and any DTrace variable
1047 * region. The caller of dtrace_canstore() is responsible for performing any
1048 * alignment checks that are needed before stores are actually executed.
1049 */
1050static int
1051dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
1052 dtrace_vstate_t *vstate)
39037602
A
1053{
1054 return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
1055}
1056/*
1057 * Implementation of dtrace_canstore which communicates the upper bound of the
1058 * allowed memory region.
1059 */
1060static int
1061dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
1062 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
2d21ac55 1063{
2d21ac55
A
1064 /*
1065 * First, check to see if the address is in scratch space...
1066 */
b0d623f7 1067 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
39037602
A
1068 mstate->dtms_scratch_size)) {
1069 DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
1070 mstate->dtms_scratch_size);
2d21ac55 1071 return (1);
39037602 1072 }
2d21ac55
A
1073 /*
1074 * Now check to see if it's a dynamic variable. This check will pick
1075 * up both thread-local variables and any global dynamically-allocated
1076 * variables.
1077 */
b0d623f7
A
1078 if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
1079 vstate->dtvs_dynvars.dtds_size)) {
1080 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
1081 uintptr_t base = (uintptr_t)dstate->dtds_base +
1082 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
1083 uintptr_t chunkoffs;
39037602 1084 dtrace_dynvar_t *dvar;
b0d623f7
A
1085
1086 /*
1087 * Before we assume that we can store here, we need to make
1088 * sure that it isn't in our metadata -- storing to our
1089 * dynamic variable metadata would corrupt our state. For
1090 * the range to not include any dynamic variable metadata,
1091 * it must:
1092 *
1093 * (1) Start above the hash table that is at the base of
1094 * the dynamic variable space
1095 *
1096 * (2) Have a starting chunk offset that is beyond the
1097 * dtrace_dynvar_t that is at the base of every chunk
1098 *
1099 * (3) Not span a chunk boundary
1100 *
39037602
A
1101 * (4) Not be in the tuple space of a dynamic variable
1102 *
b0d623f7
A
1103 */
1104 if (addr < base)
1105 return (0);
1106
1107 chunkoffs = (addr - base) % dstate->dtds_chunksize;
1108
1109 if (chunkoffs < sizeof (dtrace_dynvar_t))
1110 return (0);
1111
1112 if (chunkoffs + sz > dstate->dtds_chunksize)
1113 return (0);
1114
39037602
A
1115 dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
1116
1117 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
1118 return (0);
1119
1120 if (chunkoffs < sizeof (dtrace_dynvar_t) +
1121 ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
1122 return (0);
1123
2d21ac55 1124 return (1);
b0d623f7 1125 }
2d21ac55
A
1126
1127 /*
1128 * Finally, check the static local and global variables. These checks
1129 * take the longest, so we perform them last.
1130 */
39037602 1131 if (dtrace_canstore_statvar(addr, sz, remain,
2d21ac55
A
1132 vstate->dtvs_locals, vstate->dtvs_nlocals))
1133 return (1);
1134
39037602 1135 if (dtrace_canstore_statvar(addr, sz, remain,
2d21ac55
A
1136 vstate->dtvs_globals, vstate->dtvs_nglobals))
1137 return (1);
1138
1139 return (0);
1140}
1141
b0d623f7
A
1142
1143/*
1144 * Convenience routine to check to see if the address is within a memory
1145 * region in which a load may be issued given the user's privilege level;
1146 * if not, it sets the appropriate error flags and loads 'addr' into the
1147 * illegal value slot.
1148 *
1149 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
1150 * appropriate memory access protection.
1151 */
5ba3f43e 1152int
b0d623f7
A
1153dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
1154 dtrace_vstate_t *vstate)
39037602
A
1155{
1156 return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
1157}
1158
1159/*
1160 * Implementation of dtrace_canload which communicates the upper bound of the
1161 * allowed memory region.
1162 */
1163static int
1164dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
1165 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
b0d623f7 1166{
b0d623f7 1167 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
b0d623f7
A
1168
1169 /*
1170 * If we hold the privilege to read from kernel memory, then
1171 * everything is readable.
1172 */
39037602
A
1173 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1174 DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
b0d623f7 1175 return (1);
39037602 1176 }
b0d623f7
A
1177
1178 /*
1179 * You can obviously read that which you can store.
1180 */
39037602 1181 if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
b0d623f7
A
1182 return (1);
1183
1184 /*
1185 * We're allowed to read from our own string table.
1186 */
1187 if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
39037602
A
1188 mstate->dtms_difo->dtdo_strlen)) {
1189 DTRACE_RANGE_REMAIN(remain, addr,
1190 mstate->dtms_difo->dtdo_strtab,
1191 mstate->dtms_difo->dtdo_strlen);
b0d623f7 1192 return (1);
39037602 1193 }
b0d623f7
A
1194
1195 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
1196 *illval = addr;
1197 return (0);
1198}
1199
1200/*
1201 * Convenience routine to check to see if a given string is within a memory
1202 * region in which a load may be issued given the user's privilege level;
1203 * this exists so that we don't need to issue unnecessary dtrace_strlen()
1204 * calls in the event that the user has all privileges.
1205 */
1206static int
39037602
A
1207dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
1208 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
b0d623f7 1209{
39037602 1210 size_t rsize;
b0d623f7
A
1211
1212 /*
1213 * If we hold the privilege to read from kernel memory, then
1214 * everything is readable.
1215 */
39037602
A
1216 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1217 DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
b0d623f7 1218 return (1);
39037602 1219 }
b0d623f7 1220
39037602
A
1221 /*
1222 * Even if the caller is uninterested in querying the remaining valid
1223 * range, it is required to ensure that the access is allowed.
1224 */
1225 if (remain == NULL) {
1226 remain = &rsize;
1227 }
1228 if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
1229 size_t strsz;
1230 /*
1231 * Perform the strlen after determining the length of the
1232 * memory region which is accessible. This prevents timing
1233 * information from being used to find NULs in memory which is
1234 * not accessible to the caller.
1235 */
1236 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
1237 MIN(sz, *remain));
1238 if (strsz <= *remain) {
1239 return (1);
1240 }
1241 }
b0d623f7
A
1242
1243 return (0);
1244}
1245
1246/*
1247 * Convenience routine to check to see if a given variable is within a memory
1248 * region in which a load may be issued given the user's privilege level.
1249 */
1250static int
39037602
A
1251dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
1252 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
b0d623f7
A
1253{
1254 size_t sz;
1255 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1256
39037602
A
1257 /*
1258 * Calculate the max size before performing any checks since even
1259 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
1260 * return the max length via 'remain'.
1261 */
1262 if (type->dtdt_kind == DIF_TYPE_STRING) {
1263 dtrace_state_t *state = vstate->dtvs_state;
1264
1265 if (state != NULL) {
1266 sz = state->dts_options[DTRACEOPT_STRSIZE];
1267 } else {
1268 /*
1269 * In helper context, we have a NULL state; fall back
1270 * to using the system-wide default for the string size
1271 * in this case.
1272 */
1273 sz = dtrace_strsize_default;
1274 }
1275 } else {
1276 sz = type->dtdt_size;
1277 }
1278
b0d623f7
A
1279 /*
1280 * If we hold the privilege to read from kernel memory, then
1281 * everything is readable.
1282 */
39037602
A
1283 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1284 DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
b0d623f7 1285 return (1);
39037602 1286 }
b0d623f7 1287
39037602
A
1288 if (type->dtdt_kind == DIF_TYPE_STRING) {
1289 return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
1290 vstate));
1291 }
1292 return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
1293 vstate));
b0d623f7
A
1294}
1295
2d21ac55
A
1296/*
1297 * Compare two strings using safe loads.
1298 */
1299static int
1300dtrace_strncmp(char *s1, char *s2, size_t limit)
1301{
1302 uint8_t c1, c2;
1303 volatile uint16_t *flags;
1304
1305 if (s1 == s2 || limit == 0)
1306 return (0);
1307
1308 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1309
1310 do {
b0d623f7 1311 if (s1 == NULL) {
2d21ac55 1312 c1 = '\0';
b0d623f7 1313 } else {
2d21ac55 1314 c1 = dtrace_load8((uintptr_t)s1++);
b0d623f7 1315 }
2d21ac55 1316
b0d623f7 1317 if (s2 == NULL) {
2d21ac55 1318 c2 = '\0';
b0d623f7 1319 } else {
2d21ac55 1320 c2 = dtrace_load8((uintptr_t)s2++);
b0d623f7 1321 }
2d21ac55
A
1322
1323 if (c1 != c2)
1324 return (c1 - c2);
1325 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
1326
1327 return (0);
1328}
1329
1330/*
1331 * Compute strlen(s) for a string using safe memory accesses. The additional
1332 * len parameter is used to specify a maximum length to ensure completion.
1333 */
1334static size_t
1335dtrace_strlen(const char *s, size_t lim)
1336{
1337 uint_t len;
1338
b0d623f7 1339 for (len = 0; len != lim; len++) {
2d21ac55
A
1340 if (dtrace_load8((uintptr_t)s++) == '\0')
1341 break;
b0d623f7 1342 }
2d21ac55
A
1343
1344 return (len);
1345}
1346
1347/*
1348 * Check if an address falls within a toxic region.
1349 */
1350static int
1351dtrace_istoxic(uintptr_t kaddr, size_t size)
1352{
1353 uintptr_t taddr, tsize;
1354 int i;
1355
1356 for (i = 0; i < dtrace_toxranges; i++) {
1357 taddr = dtrace_toxrange[i].dtt_base;
1358 tsize = dtrace_toxrange[i].dtt_limit - taddr;
1359
1360 if (kaddr - taddr < tsize) {
1361 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1362 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
1363 return (1);
1364 }
1365
1366 if (taddr - kaddr < size) {
1367 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1368 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
1369 return (1);
1370 }
1371 }
1372
1373 return (0);
1374}
1375
1376/*
1377 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
1378 * memory specified by the DIF program. The dst is assumed to be safe memory
1379 * that we can store to directly because it is managed by DTrace. As with
1380 * standard bcopy, overlapping copies are handled properly.
1381 */
1382static void
1383dtrace_bcopy(const void *src, void *dst, size_t len)
1384{
1385 if (len != 0) {
1386 uint8_t *s1 = dst;
1387 const uint8_t *s2 = src;
1388
1389 if (s1 <= s2) {
1390 do {
1391 *s1++ = dtrace_load8((uintptr_t)s2++);
1392 } while (--len != 0);
1393 } else {
1394 s2 += len;
1395 s1 += len;
1396
1397 do {
1398 *--s1 = dtrace_load8((uintptr_t)--s2);
1399 } while (--len != 0);
1400 }
1401 }
1402}
1403
1404/*
1405 * Copy src to dst using safe memory accesses, up to either the specified
1406 * length, or the point that a nul byte is encountered. The src is assumed to
1407 * be unsafe memory specified by the DIF program. The dst is assumed to be
1408 * safe memory that we can store to directly because it is managed by DTrace.
1409 * Unlike dtrace_bcopy(), overlapping regions are not handled.
1410 */
1411static void
1412dtrace_strcpy(const void *src, void *dst, size_t len)
1413{
1414 if (len != 0) {
1415 uint8_t *s1 = dst, c;
1416 const uint8_t *s2 = src;
1417
1418 do {
1419 *s1++ = c = dtrace_load8((uintptr_t)s2++);
1420 } while (--len != 0 && c != '\0');
1421 }
1422}
1423
1424/*
1425 * Copy src to dst, deriving the size and type from the specified (BYREF)
1426 * variable type. The src is assumed to be unsafe memory specified by the DIF
1427 * program. The dst is assumed to be DTrace variable memory that is of the
1428 * specified type; we assume that we can store to directly.
1429 */
1430static void
39037602 1431dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
2d21ac55
A
1432{
1433 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1434
b0d623f7 1435 if (type->dtdt_kind == DIF_TYPE_STRING) {
39037602 1436 dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
b0d623f7 1437 } else {
39037602
A
1438 dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
1439 }
b0d623f7 1440}
2d21ac55
A
1441
1442/*
1443 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1444 * unsafe memory specified by the DIF program. The s2 data is assumed to be
1445 * safe memory that we can access directly because it is managed by DTrace.
1446 */
1447static int
1448dtrace_bcmp(const void *s1, const void *s2, size_t len)
1449{
1450 volatile uint16_t *flags;
1451
1452 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1453
1454 if (s1 == s2)
1455 return (0);
1456
1457 if (s1 == NULL || s2 == NULL)
1458 return (1);
1459
1460 if (s1 != s2 && len != 0) {
1461 const uint8_t *ps1 = s1;
1462 const uint8_t *ps2 = s2;
1463
1464 do {
1465 if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
1466 return (1);
1467 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
1468 }
1469 return (0);
1470}
1471
1472/*
1473 * Zero the specified region using a simple byte-by-byte loop. Note that this
1474 * is for safe DTrace-managed memory only.
1475 */
1476static void
1477dtrace_bzero(void *dst, size_t len)
1478{
1479 uchar_t *cp;
1480
1481 for (cp = dst; len != 0; len--)
1482 *cp++ = 0;
1483}
1484
b0d623f7
A
1485static void
1486dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
1487{
1488 uint64_t result[2];
1489
1490 result[0] = addend1[0] + addend2[0];
1491 result[1] = addend1[1] + addend2[1] +
1492 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
1493
1494 sum[0] = result[0];
1495 sum[1] = result[1];
1496}
1497
1498/*
1499 * Shift the 128-bit value in a by b. If b is positive, shift left.
1500 * If b is negative, shift right.
1501 */
1502static void
1503dtrace_shift_128(uint64_t *a, int b)
1504{
1505 uint64_t mask;
1506
1507 if (b == 0)
1508 return;
1509
1510 if (b < 0) {
1511 b = -b;
1512 if (b >= 64) {
1513 a[0] = a[1] >> (b - 64);
1514 a[1] = 0;
1515 } else {
1516 a[0] >>= b;
1517 mask = 1LL << (64 - b);
1518 mask -= 1;
1519 a[0] |= ((a[1] & mask) << (64 - b));
1520 a[1] >>= b;
1521 }
1522 } else {
1523 if (b >= 64) {
1524 a[1] = a[0] << (b - 64);
1525 a[0] = 0;
1526 } else {
1527 a[1] <<= b;
1528 mask = a[0] >> (64 - b);
1529 a[1] |= mask;
1530 a[0] <<= b;
1531 }
1532 }
1533}
1534
1535/*
1536 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1537 * use native multiplication on those, and then re-combine into the
1538 * resulting 128-bit value.
1539 *
1540 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1541 * hi1 * hi2 << 64 +
1542 * hi1 * lo2 << 32 +
1543 * hi2 * lo1 << 32 +
1544 * lo1 * lo2
1545 */
1546static void
1547dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1548{
1549 uint64_t hi1, hi2, lo1, lo2;
1550 uint64_t tmp[2];
1551
1552 hi1 = factor1 >> 32;
1553 hi2 = factor2 >> 32;
1554
1555 lo1 = factor1 & DT_MASK_LO;
1556 lo2 = factor2 & DT_MASK_LO;
1557
1558 product[0] = lo1 * lo2;
1559 product[1] = hi1 * hi2;
1560
1561 tmp[0] = hi1 * lo2;
1562 tmp[1] = 0;
1563 dtrace_shift_128(tmp, 32);
1564 dtrace_add_128(product, tmp, product);
1565
1566 tmp[0] = hi2 * lo1;
1567 tmp[1] = 0;
1568 dtrace_shift_128(tmp, 32);
1569 dtrace_add_128(product, tmp, product);
1570}
1571
2d21ac55
A
1572/*
1573 * This privilege check should be used by actions and subroutines to
1574 * verify that the user credentials of the process that enabled the
1575 * invoking ECB match the target credentials
1576 */
1577static int
1578dtrace_priv_proc_common_user(dtrace_state_t *state)
1579{
1580 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1581
1582 /*
1583 * We should always have a non-NULL state cred here, since if cred
1584 * is null (anonymous tracing), we fast-path bypass this routine.
1585 */
1586 ASSERT(s_cr != NULL);
1587
2d21ac55 1588 if ((cr = dtrace_CRED()) != NULL &&
6d2010ae
A
1589 posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid &&
1590 posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid &&
1591 posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid &&
1592 posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_gid &&
1593 posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_rgid &&
1594 posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_sgid)
2d21ac55
A
1595 return (1);
1596
1597 return (0);
1598}
1599
1600/*
1601 * This privilege check should be used by actions and subroutines to
1602 * verify that the zone of the process that enabled the invoking ECB
1603 * matches the target credentials
1604 */
1605static int
1606dtrace_priv_proc_common_zone(dtrace_state_t *state)
1607{
1608 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
fe8ab488 1609#pragma unused(cr, s_cr, state) /* __APPLE__ */
2d21ac55
A
1610
1611 /*
1612 * We should always have a non-NULL state cred here, since if cred
1613 * is null (anonymous tracing), we fast-path bypass this routine.
1614 */
1615 ASSERT(s_cr != NULL);
1616
fe8ab488 1617 return 1; /* APPLE NOTE: Darwin doesn't do zones. */
2d21ac55
A
1618}
1619
1620/*
1621 * This privilege check should be used by actions and subroutines to
1622 * verify that the process has not setuid or changed credentials.
1623 */
2d21ac55
A
1624static int
1625dtrace_priv_proc_common_nocd(void)
1626{
1627 return 1; /* Darwin omits "No Core Dump" flag. */
1628}
2d21ac55
A
1629
1630static int
1631dtrace_priv_proc_destructive(dtrace_state_t *state)
1632{
1633 int action = state->dts_cred.dcr_action;
1634
cf7d32b8
A
1635 if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
1636 goto bad;
fe8ab488
A
1637
1638 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1639 goto bad;
cf7d32b8 1640
2d21ac55
A
1641 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1642 dtrace_priv_proc_common_zone(state) == 0)
1643 goto bad;
1644
1645 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1646 dtrace_priv_proc_common_user(state) == 0)
1647 goto bad;
1648
1649 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1650 dtrace_priv_proc_common_nocd() == 0)
1651 goto bad;
1652
1653 return (1);
1654
1655bad:
1656 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1657
1658 return (0);
1659}
1660
1661static int
1662dtrace_priv_proc_control(dtrace_state_t *state)
1663{
cf7d32b8
A
1664 if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
1665 goto bad;
fe8ab488
A
1666
1667 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1668 goto bad;
cf7d32b8 1669
2d21ac55
A
1670 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1671 return (1);
1672
1673 if (dtrace_priv_proc_common_zone(state) &&
1674 dtrace_priv_proc_common_user(state) &&
1675 dtrace_priv_proc_common_nocd())
1676 return (1);
1677
cf7d32b8 1678bad:
2d21ac55
A
1679 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1680
1681 return (0);
1682}
1683
1684static int
1685dtrace_priv_proc(dtrace_state_t *state)
1686{
cf7d32b8
A
1687 if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
1688 goto bad;
fe8ab488 1689
39037602 1690 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc()))
fe8ab488 1691 goto bad;
cf7d32b8 1692
2d21ac55
A
1693 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
1694 return (1);
1695
cf7d32b8 1696bad:
2d21ac55
A
1697 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1698
1699 return (0);
1700}
1701
fe8ab488
A
1702/*
1703 * The P_LNOATTACH check is an Apple specific check.
1704 * We need a version of dtrace_priv_proc() that omits
1705 * that check for PID and EXECNAME accesses
1706 */
935ed37a
A
1707static int
1708dtrace_priv_proc_relaxed(dtrace_state_t *state)
1709{
1710
1711 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
1712 return (1);
1713
1714 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1715
1716 return (0);
1717}
935ed37a 1718
2d21ac55
A
1719static int
1720dtrace_priv_kernel(dtrace_state_t *state)
1721{
39037602 1722 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed())
fe8ab488
A
1723 goto bad;
1724
2d21ac55
A
1725 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1726 return (1);
1727
fe8ab488 1728bad:
2d21ac55
A
1729 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1730
1731 return (0);
1732}
1733
1734static int
1735dtrace_priv_kernel_destructive(dtrace_state_t *state)
1736{
fe8ab488
A
1737 if (dtrace_is_restricted())
1738 goto bad;
1739
2d21ac55
A
1740 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1741 return (1);
1742
fe8ab488 1743bad:
2d21ac55
A
1744 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1745
1746 return (0);
1747}
1748
1749/*
1750 * Note: not called from probe context. This function is called
1751 * asynchronously (and at a regular interval) from outside of probe context to
1752 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1753 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1754 */
fe8ab488 1755static void
2d21ac55
A
1756dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1757{
1758 dtrace_dynvar_t *dirty;
1759 dtrace_dstate_percpu_t *dcpu;
1760 int i, work = 0;
1761
c910b4d9 1762 for (i = 0; i < (int)NCPU; i++) {
2d21ac55
A
1763 dcpu = &dstate->dtds_percpu[i];
1764
1765 ASSERT(dcpu->dtdsc_rinsing == NULL);
1766
1767 /*
1768 * If the dirty list is NULL, there is no dirty work to do.
1769 */
1770 if (dcpu->dtdsc_dirty == NULL)
1771 continue;
1772
1773 /*
1774 * If the clean list is non-NULL, then we're not going to do
1775 * any work for this CPU -- it means that there has not been
1776 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1777 * since the last time we cleaned house.
1778 */
1779 if (dcpu->dtdsc_clean != NULL)
1780 continue;
1781
1782 work = 1;
1783
1784 /*
1785 * Atomically move the dirty list aside.
1786 */
1787 do {
1788 dirty = dcpu->dtdsc_dirty;
1789
1790 /*
1791 * Before we zap the dirty list, set the rinsing list.
1792 * (This allows for a potential assertion in
1793 * dtrace_dynvar(): if a free dynamic variable appears
1794 * on a hash chain, either the dirty list or the
1795 * rinsing list for some CPU must be non-NULL.)
1796 */
1797 dcpu->dtdsc_rinsing = dirty;
1798 dtrace_membar_producer();
1799 } while (dtrace_casptr(&dcpu->dtdsc_dirty,
1800 dirty, NULL) != dirty);
1801 }
1802
1803 if (!work) {
1804 /*
1805 * We have no work to do; we can simply return.
1806 */
1807 return;
1808 }
1809
1810 dtrace_sync();
1811
c910b4d9 1812 for (i = 0; i < (int)NCPU; i++) {
2d21ac55
A
1813 dcpu = &dstate->dtds_percpu[i];
1814
1815 if (dcpu->dtdsc_rinsing == NULL)
1816 continue;
1817
1818 /*
1819 * We are now guaranteed that no hash chain contains a pointer
1820 * into this dirty list; we can make it clean.
1821 */
1822 ASSERT(dcpu->dtdsc_clean == NULL);
1823 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1824 dcpu->dtdsc_rinsing = NULL;
1825 }
1826
1827 /*
1828 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1829 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1830 * This prevents a race whereby a CPU incorrectly decides that
1831 * the state should be something other than DTRACE_DSTATE_CLEAN
1832 * after dtrace_dynvar_clean() has completed.
1833 */
1834 dtrace_sync();
1835
1836 dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1837}
1838
1839/*
1840 * Depending on the value of the op parameter, this function looks-up,
1841 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1842 * allocation is requested, this function will return a pointer to a
1843 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1844 * variable can be allocated. If NULL is returned, the appropriate counter
1845 * will be incremented.
1846 */
fe8ab488 1847static dtrace_dynvar_t *
2d21ac55 1848dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
b0d623f7
A
1849 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1850 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
2d21ac55
A
1851{
1852 uint64_t hashval = DTRACE_DYNHASH_VALID;
1853 dtrace_dynhash_t *hash = dstate->dtds_hash;
1854 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1855 processorid_t me = CPU->cpu_id, cpu = me;
1856 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1857 size_t bucket, ksize;
1858 size_t chunksize = dstate->dtds_chunksize;
1859 uintptr_t kdata, lock, nstate;
1860 uint_t i;
1861
1862 ASSERT(nkeys != 0);
1863
1864 /*
1865 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1866 * algorithm. For the by-value portions, we perform the algorithm in
1867 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1868 * bit, and seems to have only a minute effect on distribution. For
1869 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1870 * over each referenced byte. It's painful to do this, but it's much
1871 * better than pathological hash distribution. The efficacy of the
1872 * hashing algorithm (and a comparison with other algorithms) may be
1873 * found by running the ::dtrace_dynstat MDB dcmd.
1874 */
1875 for (i = 0; i < nkeys; i++) {
1876 if (key[i].dttk_size == 0) {
1877 uint64_t val = key[i].dttk_value;
1878
1879 hashval += (val >> 48) & 0xffff;
1880 hashval += (hashval << 10);
1881 hashval ^= (hashval >> 6);
1882
1883 hashval += (val >> 32) & 0xffff;
1884 hashval += (hashval << 10);
1885 hashval ^= (hashval >> 6);
1886
1887 hashval += (val >> 16) & 0xffff;
1888 hashval += (hashval << 10);
1889 hashval ^= (hashval >> 6);
1890
1891 hashval += val & 0xffff;
1892 hashval += (hashval << 10);
1893 hashval ^= (hashval >> 6);
1894 } else {
1895 /*
1896 * This is incredibly painful, but it beats the hell
1897 * out of the alternative.
1898 */
1899 uint64_t j, size = key[i].dttk_size;
1900 uintptr_t base = (uintptr_t)key[i].dttk_value;
1901
b0d623f7
A
1902 if (!dtrace_canload(base, size, mstate, vstate))
1903 break;
1904
2d21ac55
A
1905 for (j = 0; j < size; j++) {
1906 hashval += dtrace_load8(base + j);
1907 hashval += (hashval << 10);
1908 hashval ^= (hashval >> 6);
1909 }
1910 }
1911 }
1912
b0d623f7
A
1913 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1914 return (NULL);
1915
2d21ac55
A
1916 hashval += (hashval << 3);
1917 hashval ^= (hashval >> 11);
1918 hashval += (hashval << 15);
1919
1920 /*
1921 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1922 * comes out to be one of our two sentinel hash values. If this
1923 * actually happens, we set the hashval to be a value known to be a
1924 * non-sentinel value.
1925 */
1926 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1927 hashval = DTRACE_DYNHASH_VALID;
1928
1929 /*
1930 * Yes, it's painful to do a divide here. If the cycle count becomes
1931 * important here, tricks can be pulled to reduce it. (However, it's
1932 * critical that hash collisions be kept to an absolute minimum;
1933 * they're much more painful than a divide.) It's better to have a
1934 * solution that generates few collisions and still keeps things
1935 * relatively simple.
1936 */
1937 bucket = hashval % dstate->dtds_hashsize;
1938
1939 if (op == DTRACE_DYNVAR_DEALLOC) {
1940 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
1941
1942 for (;;) {
1943 while ((lock = *lockp) & 1)
1944 continue;
1945
b0d623f7
A
1946 if (dtrace_casptr((void *)(uintptr_t)lockp,
1947 (void *)lock, (void *)(lock + 1)) == (void *)lock)
1948 break;
2d21ac55
A
1949 }
1950
1951 dtrace_membar_producer();
1952 }
1953
1954top:
1955 prev = NULL;
1956 lock = hash[bucket].dtdh_lock;
1957
1958 dtrace_membar_consumer();
1959
1960 start = hash[bucket].dtdh_chain;
1961 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
1962 start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
1963 op != DTRACE_DYNVAR_DEALLOC));
1964
1965 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
1966 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
1967 dtrace_key_t *dkey = &dtuple->dtt_key[0];
1968
1969 if (dvar->dtdv_hashval != hashval) {
1970 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
1971 /*
1972 * We've reached the sink, and therefore the
1973 * end of the hash chain; we can kick out of
1974 * the loop knowing that we have seen a valid
1975 * snapshot of state.
1976 */
1977 ASSERT(dvar->dtdv_next == NULL);
1978 ASSERT(dvar == &dtrace_dynhash_sink);
1979 break;
1980 }
1981
1982 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
1983 /*
1984 * We've gone off the rails: somewhere along
1985 * the line, one of the members of this hash
1986 * chain was deleted. Note that we could also
1987 * detect this by simply letting this loop run
1988 * to completion, as we would eventually hit
1989 * the end of the dirty list. However, we
1990 * want to avoid running the length of the
1991 * dirty list unnecessarily (it might be quite
1992 * long), so we catch this as early as
1993 * possible by detecting the hash marker. In
1994 * this case, we simply set dvar to NULL and
1995 * break; the conditional after the loop will
1996 * send us back to top.
1997 */
1998 dvar = NULL;
1999 break;
2000 }
2001
2002 goto next;
2003 }
2004
2005 if (dtuple->dtt_nkeys != nkeys)
2006 goto next;
2007
2008 for (i = 0; i < nkeys; i++, dkey++) {
2009 if (dkey->dttk_size != key[i].dttk_size)
2010 goto next; /* size or type mismatch */
2011
2012 if (dkey->dttk_size != 0) {
2013 if (dtrace_bcmp(
2014 (void *)(uintptr_t)key[i].dttk_value,
2015 (void *)(uintptr_t)dkey->dttk_value,
2016 dkey->dttk_size))
2017 goto next;
2018 } else {
2019 if (dkey->dttk_value != key[i].dttk_value)
2020 goto next;
2021 }
2022 }
2023
2024 if (op != DTRACE_DYNVAR_DEALLOC)
2025 return (dvar);
2026
2027 ASSERT(dvar->dtdv_next == NULL ||
2028 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
2029
2030 if (prev != NULL) {
2031 ASSERT(hash[bucket].dtdh_chain != dvar);
2032 ASSERT(start != dvar);
2033 ASSERT(prev->dtdv_next == dvar);
2034 prev->dtdv_next = dvar->dtdv_next;
2035 } else {
2036 if (dtrace_casptr(&hash[bucket].dtdh_chain,
2037 start, dvar->dtdv_next) != start) {
2038 /*
2039 * We have failed to atomically swing the
2040 * hash table head pointer, presumably because
2041 * of a conflicting allocation on another CPU.
2042 * We need to reread the hash chain and try
2043 * again.
2044 */
2045 goto top;
2046 }
2047 }
2048
2049 dtrace_membar_producer();
2050
2051 /*
2052 * Now set the hash value to indicate that it's free.
2053 */
2054 ASSERT(hash[bucket].dtdh_chain != dvar);
2055 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
2056
2057 dtrace_membar_producer();
2058
2059 /*
2060 * Set the next pointer to point at the dirty list, and
2061 * atomically swing the dirty pointer to the newly freed dvar.
2062 */
2063 do {
2064 next = dcpu->dtdsc_dirty;
2065 dvar->dtdv_next = next;
2066 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
2067
2068 /*
2069 * Finally, unlock this hash bucket.
2070 */
2071 ASSERT(hash[bucket].dtdh_lock == lock);
2072 ASSERT(lock & 1);
2073 hash[bucket].dtdh_lock++;
2074
2075 return (NULL);
2076next:
2077 prev = dvar;
2078 continue;
2079 }
2080
2081 if (dvar == NULL) {
2082 /*
2083 * If dvar is NULL, it is because we went off the rails:
2084 * one of the elements that we traversed in the hash chain
2085 * was deleted while we were traversing it. In this case,
2086 * we assert that we aren't doing a dealloc (deallocs lock
2087 * the hash bucket to prevent themselves from racing with
2088 * one another), and retry the hash chain traversal.
2089 */
2090 ASSERT(op != DTRACE_DYNVAR_DEALLOC);
2091 goto top;
2092 }
2093
2094 if (op != DTRACE_DYNVAR_ALLOC) {
2095 /*
2096 * If we are not to allocate a new variable, we want to
2097 * return NULL now. Before we return, check that the value
2098 * of the lock word hasn't changed. If it has, we may have
2099 * seen an inconsistent snapshot.
2100 */
2101 if (op == DTRACE_DYNVAR_NOALLOC) {
2102 if (hash[bucket].dtdh_lock != lock)
2103 goto top;
2104 } else {
2105 ASSERT(op == DTRACE_DYNVAR_DEALLOC);
2106 ASSERT(hash[bucket].dtdh_lock == lock);
2107 ASSERT(lock & 1);
2108 hash[bucket].dtdh_lock++;
2109 }
2110
2111 return (NULL);
2112 }
2113
2114 /*
2115 * We need to allocate a new dynamic variable. The size we need is the
2116 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
2117 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
2118 * the size of any referred-to data (dsize). We then round the final
2119 * size up to the chunksize for allocation.
2120 */
2121 for (ksize = 0, i = 0; i < nkeys; i++)
2122 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
2123
2124 /*
2125 * This should be pretty much impossible, but could happen if, say,
2126 * strange DIF specified the tuple. Ideally, this should be an
2127 * assertion and not an error condition -- but that requires that the
2128 * chunksize calculation in dtrace_difo_chunksize() be absolutely
2129 * bullet-proof. (That is, it must not be able to be fooled by
2130 * malicious DIF.) Given the lack of backwards branches in DIF,
2131 * solving this would presumably not amount to solving the Halting
2132 * Problem -- but it still seems awfully hard.
2133 */
2134 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
2135 ksize + dsize > chunksize) {
2136 dcpu->dtdsc_drops++;
2137 return (NULL);
2138 }
2139
2140 nstate = DTRACE_DSTATE_EMPTY;
2141
2142 do {
2143retry:
2144 free = dcpu->dtdsc_free;
2145
2146 if (free == NULL) {
2147 dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
2148 void *rval;
2149
2150 if (clean == NULL) {
2151 /*
2152 * We're out of dynamic variable space on
2153 * this CPU. Unless we have tried all CPUs,
2154 * we'll try to allocate from a different
2155 * CPU.
2156 */
2157 switch (dstate->dtds_state) {
2158 case DTRACE_DSTATE_CLEAN: {
2159 void *sp = &dstate->dtds_state;
2160
c910b4d9 2161 if (++cpu >= (int)NCPU)
2d21ac55
A
2162 cpu = 0;
2163
2164 if (dcpu->dtdsc_dirty != NULL &&
2165 nstate == DTRACE_DSTATE_EMPTY)
2166 nstate = DTRACE_DSTATE_DIRTY;
2167
2168 if (dcpu->dtdsc_rinsing != NULL)
2169 nstate = DTRACE_DSTATE_RINSING;
2170
2171 dcpu = &dstate->dtds_percpu[cpu];
2172
2173 if (cpu != me)
2174 goto retry;
2175
2176 (void) dtrace_cas32(sp,
2177 DTRACE_DSTATE_CLEAN, nstate);
2178
2179 /*
2180 * To increment the correct bean
2181 * counter, take another lap.
2182 */
2183 goto retry;
2184 }
2185
2186 case DTRACE_DSTATE_DIRTY:
2187 dcpu->dtdsc_dirty_drops++;
2188 break;
2189
2190 case DTRACE_DSTATE_RINSING:
2191 dcpu->dtdsc_rinsing_drops++;
2192 break;
2193
2194 case DTRACE_DSTATE_EMPTY:
2195 dcpu->dtdsc_drops++;
2196 break;
2197 }
2198
2199 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
2200 return (NULL);
2201 }
2202
2203 /*
2204 * The clean list appears to be non-empty. We want to
2205 * move the clean list to the free list; we start by
2206 * moving the clean pointer aside.
2207 */
2208 if (dtrace_casptr(&dcpu->dtdsc_clean,
2209 clean, NULL) != clean) {
2210 /*
2211 * We are in one of two situations:
2212 *
2213 * (a) The clean list was switched to the
2214 * free list by another CPU.
2215 *
2216 * (b) The clean list was added to by the
2217 * cleansing cyclic.
2218 *
2219 * In either of these situations, we can
2220 * just reattempt the free list allocation.
2221 */
2222 goto retry;
2223 }
2224
2225 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
2226
2227 /*
2228 * Now we'll move the clean list to the free list.
2229 * It's impossible for this to fail: the only way
2230 * the free list can be updated is through this
2231 * code path, and only one CPU can own the clean list.
2232 * Thus, it would only be possible for this to fail if
2233 * this code were racing with dtrace_dynvar_clean().
2234 * (That is, if dtrace_dynvar_clean() updated the clean
2235 * list, and we ended up racing to update the free
2236 * list.) This race is prevented by the dtrace_sync()
2237 * in dtrace_dynvar_clean() -- which flushes the
2238 * owners of the clean lists out before resetting
2239 * the clean lists.
2240 */
2241 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
2242 ASSERT(rval == NULL);
2243 goto retry;
2244 }
2245
2246 dvar = free;
2247 new_free = dvar->dtdv_next;
2248 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
2249
2250 /*
2251 * We have now allocated a new chunk. We copy the tuple keys into the
2252 * tuple array and copy any referenced key data into the data space
2253 * following the tuple array. As we do this, we relocate dttk_value
2254 * in the final tuple to point to the key data address in the chunk.
2255 */
2256 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
2257 dvar->dtdv_data = (void *)(kdata + ksize);
2258 dvar->dtdv_tuple.dtt_nkeys = nkeys;
2259
2260 for (i = 0; i < nkeys; i++) {
2261 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
2262 size_t kesize = key[i].dttk_size;
2263
2264 if (kesize != 0) {
2265 dtrace_bcopy(
2266 (const void *)(uintptr_t)key[i].dttk_value,
2267 (void *)kdata, kesize);
2268 dkey->dttk_value = kdata;
2269 kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
2270 } else {
2271 dkey->dttk_value = key[i].dttk_value;
2272 }
2273
2274 dkey->dttk_size = kesize;
2275 }
2276
2277 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
2278 dvar->dtdv_hashval = hashval;
2279 dvar->dtdv_next = start;
2280
2281 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
2282 return (dvar);
2283
2284 /*
2285 * The cas has failed. Either another CPU is adding an element to
2286 * this hash chain, or another CPU is deleting an element from this
2287 * hash chain. The simplest way to deal with both of these cases
2288 * (though not necessarily the most efficient) is to free our
2289 * allocated block and tail-call ourselves. Note that the free is
2290 * to the dirty list and _not_ to the free list. This is to prevent
2291 * races with allocators, above.
2292 */
2293 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
2294
2295 dtrace_membar_producer();
2296
2297 do {
2298 free = dcpu->dtdsc_dirty;
2299 dvar->dtdv_next = free;
2300 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
2301
b0d623f7 2302 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
2d21ac55
A
2303}
2304
2305/*ARGSUSED*/
2306static void
2307dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
2308{
b0d623f7
A
2309#pragma unused(arg) /* __APPLE__ */
2310 if ((int64_t)nval < (int64_t)*oval)
2d21ac55
A
2311 *oval = nval;
2312}
2313
2314/*ARGSUSED*/
2315static void
2316dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
2317{
b0d623f7
A
2318#pragma unused(arg) /* __APPLE__ */
2319 if ((int64_t)nval > (int64_t)*oval)
2d21ac55
A
2320 *oval = nval;
2321}
2322
2323static void
2324dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
2325{
2326 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
2327 int64_t val = (int64_t)nval;
2328
2329 if (val < 0) {
2330 for (i = 0; i < zero; i++) {
2331 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
2332 quanta[i] += incr;
2333 return;
2334 }
2335 }
2336 } else {
2337 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
2338 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
2339 quanta[i - 1] += incr;
2340 return;
2341 }
2342 }
2343
2344 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
2345 return;
2346 }
2347
2348 ASSERT(0);
2349}
2350
2351static void
2352dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
2353{
2354 uint64_t arg = *lquanta++;
2355 int32_t base = DTRACE_LQUANTIZE_BASE(arg);
2356 uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
2357 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
2358 int32_t val = (int32_t)nval, level;
2359
2360 ASSERT(step != 0);
2361 ASSERT(levels != 0);
2362
2363 if (val < base) {
2364 /*
2365 * This is an underflow.
2366 */
2367 lquanta[0] += incr;
2368 return;
2369 }
2370
2371 level = (val - base) / step;
2372
2373 if (level < levels) {
2374 lquanta[level + 1] += incr;
2375 return;
2376 }
2377
2378 /*
2379 * This is an overflow.
2380 */
2381 lquanta[levels + 1] += incr;
2382}
2383
39236c6e
A
2384static int
2385dtrace_aggregate_llquantize_bucket(int16_t factor, int16_t low, int16_t high,
2386 int16_t nsteps, int64_t value)
2387{
2388 int64_t this = 1, last, next;
2389 int base = 1, order;
2390
2391 for (order = 0; order < low; ++order)
2392 this *= factor;
2393
2394 /*
2395 * If our value is less than our factor taken to the power of the
2396 * low order of magnitude, it goes into the zeroth bucket.
2397 */
2398 if (value < this)
2399 return 0;
2400 else
2401 last = this;
2402
2403 for (this *= factor; order <= high; ++order) {
2404 int nbuckets = this > nsteps ? nsteps : this;
2405
2406 /*
2407 * We should not generally get log/linear quantizations
2408 * with a high magnitude that allows 64-bits to
2409 * overflow, but we nonetheless protect against this
2410 * by explicitly checking for overflow, and clamping
2411 * our value accordingly.
2412 */
2413 next = this * factor;
2414 if (next < this) {
2415 value = this - 1;
2416 }
2417
2418 /*
2419 * If our value lies within this order of magnitude,
2420 * determine its position by taking the offset within
2421 * the order of magnitude, dividing by the bucket
2422 * width, and adding to our (accumulated) base.
2423 */
2424 if (value < this) {
2425 return (base + (value - last) / (this / nbuckets));
2426 }
2427
2428 base += nbuckets - (nbuckets / factor);
2429 last = this;
2430 this = next;
2431 }
2432
2433 /*
2434 * Our value is greater than or equal to our factor taken to the
2435 * power of one plus the high magnitude -- return the top bucket.
2436 */
2437 return base;
2438}
2439
2440static void
2441dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2442{
2443 uint64_t arg = *llquanta++;
2444 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2445 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2446 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
15129b1c 2447 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
39236c6e
A
2448
2449 llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr;
2450}
2451
2d21ac55
A
2452/*ARGSUSED*/
2453static void
2454dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2455{
b0d623f7 2456#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
2457 data[0]++;
2458 data[1] += nval;
2459}
2460
2461/*ARGSUSED*/
2462static void
b0d623f7 2463dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2d21ac55 2464{
b0d623f7
A
2465#pragma unused(arg) /* __APPLE__ */
2466 int64_t snval = (int64_t)nval;
2467 uint64_t tmp[2];
2468
2469 data[0]++;
2470 data[1] += nval;
2471
2472 /*
2473 * What we want to say here is:
2474 *
2475 * data[2] += nval * nval;
2476 *
2477 * But given that nval is 64-bit, we could easily overflow, so
2478 * we do this as 128-bit arithmetic.
2479 */
2480 if (snval < 0)
2481 snval = -snval;
2482
2483 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2484 dtrace_add_128(data + 2, tmp, data + 2);
2d21ac55
A
2485}
2486
2487/*ARGSUSED*/
2488static void
b0d623f7 2489dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2d21ac55 2490{
b0d623f7
A
2491#pragma unused(nval, arg) /* __APPLE__ */
2492 *oval = *oval + 1;
2493}
2494
2495/*ARGSUSED*/
2496static void
2497dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2498{
2499#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
2500 *oval += nval;
2501}
2502
2503/*
2504 * Aggregate given the tuple in the principal data buffer, and the aggregating
2505 * action denoted by the specified dtrace_aggregation_t. The aggregation
2506 * buffer is specified as the buf parameter. This routine does not return
2507 * failure; if there is no space in the aggregation buffer, the data will be
2508 * dropped, and a corresponding counter incremented.
2509 */
2510static void
2511dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2512 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2513{
c910b4d9 2514#pragma unused(arg)
2d21ac55
A
2515 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2516 uint32_t i, ndx, size, fsize;
2517 uint32_t align = sizeof (uint64_t) - 1;
2518 dtrace_aggbuffer_t *agb;
2519 dtrace_aggkey_t *key;
2520 uint32_t hashval = 0, limit, isstr;
2521 caddr_t tomax, data, kdata;
2522 dtrace_actkind_t action;
2523 dtrace_action_t *act;
2524 uintptr_t offs;
2525
2526 if (buf == NULL)
2527 return;
2528
2529 if (!agg->dtag_hasarg) {
2530 /*
2531 * Currently, only quantize() and lquantize() take additional
2532 * arguments, and they have the same semantics: an increment
2533 * value that defaults to 1 when not present. If additional
2534 * aggregating actions take arguments, the setting of the
2535 * default argument value will presumably have to become more
2536 * sophisticated...
2537 */
2538 arg = 1;
2539 }
2540
2541 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2542 size = rec->dtrd_offset - agg->dtag_base;
2543 fsize = size + rec->dtrd_size;
2544
2545 ASSERT(dbuf->dtb_tomax != NULL);
2546 data = dbuf->dtb_tomax + offset + agg->dtag_base;
2547
2548 if ((tomax = buf->dtb_tomax) == NULL) {
2549 dtrace_buffer_drop(buf);
2550 return;
2551 }
2552
2553 /*
2554 * The metastructure is always at the bottom of the buffer.
2555 */
2556 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2557 sizeof (dtrace_aggbuffer_t));
2558
2559 if (buf->dtb_offset == 0) {
2560 /*
2561 * We just kludge up approximately 1/8th of the size to be
2562 * buckets. If this guess ends up being routinely
2563 * off-the-mark, we may need to dynamically readjust this
2564 * based on past performance.
2565 */
2566 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2567
2568 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2569 (uintptr_t)tomax || hashsize == 0) {
2570 /*
2571 * We've been given a ludicrously small buffer;
2572 * increment our drop count and leave.
2573 */
2574 dtrace_buffer_drop(buf);
2575 return;
2576 }
2577
2578 /*
2579 * And now, a pathetic attempt to try to get a an odd (or
2580 * perchance, a prime) hash size for better hash distribution.
2581 */
2582 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2583 hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2584
2585 agb->dtagb_hashsize = hashsize;
2586 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2587 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2588 agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2589
2590 for (i = 0; i < agb->dtagb_hashsize; i++)
2591 agb->dtagb_hash[i] = NULL;
2592 }
2593
2594 ASSERT(agg->dtag_first != NULL);
2595 ASSERT(agg->dtag_first->dta_intuple);
2596
2597 /*
2598 * Calculate the hash value based on the key. Note that we _don't_
2599 * include the aggid in the hashing (but we will store it as part of
2600 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2601 * algorithm: a simple, quick algorithm that has no known funnels, and
2602 * gets good distribution in practice. The efficacy of the hashing
2603 * algorithm (and a comparison with other algorithms) may be found by
2604 * running the ::dtrace_aggstat MDB dcmd.
2605 */
2606 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2607 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2608 limit = i + act->dta_rec.dtrd_size;
2609 ASSERT(limit <= size);
2610 isstr = DTRACEACT_ISSTRING(act);
2611
2612 for (; i < limit; i++) {
2613 hashval += data[i];
2614 hashval += (hashval << 10);
2615 hashval ^= (hashval >> 6);
2616
2617 if (isstr && data[i] == '\0')
2618 break;
2619 }
2620 }
2621
2622 hashval += (hashval << 3);
2623 hashval ^= (hashval >> 11);
2624 hashval += (hashval << 15);
2625
2626 /*
2627 * Yes, the divide here is expensive -- but it's generally the least
2628 * of the performance issues given the amount of data that we iterate
2629 * over to compute hash values, compare data, etc.
2630 */
2631 ndx = hashval % agb->dtagb_hashsize;
2632
2633 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2634 ASSERT((caddr_t)key >= tomax);
2635 ASSERT((caddr_t)key < tomax + buf->dtb_size);
2636
2637 if (hashval != key->dtak_hashval || key->dtak_size != size)
2638 continue;
2639
2640 kdata = key->dtak_data;
2641 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2642
2643 for (act = agg->dtag_first; act->dta_intuple;
2644 act = act->dta_next) {
2645 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2646 limit = i + act->dta_rec.dtrd_size;
2647 ASSERT(limit <= size);
2648 isstr = DTRACEACT_ISSTRING(act);
2649
2650 for (; i < limit; i++) {
2651 if (kdata[i] != data[i])
2652 goto next;
2653
2654 if (isstr && data[i] == '\0')
2655 break;
2656 }
2657 }
2658
2659 if (action != key->dtak_action) {
2660 /*
2661 * We are aggregating on the same value in the same
2662 * aggregation with two different aggregating actions.
2663 * (This should have been picked up in the compiler,
2664 * so we may be dealing with errant or devious DIF.)
2665 * This is an error condition; we indicate as much,
2666 * and return.
2667 */
2668 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2669 return;
2670 }
2671
2672 /*
2673 * This is a hit: we need to apply the aggregator to
2674 * the value at this key.
2675 */
2676 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2677 return;
2678next:
2679 continue;
2680 }
2681
2682 /*
2683 * We didn't find it. We need to allocate some zero-filled space,
2684 * link it into the hash table appropriately, and apply the aggregator
2685 * to the (zero-filled) value.
2686 */
2687 offs = buf->dtb_offset;
2688 while (offs & (align - 1))
2689 offs += sizeof (uint32_t);
2690
2691 /*
2692 * If we don't have enough room to both allocate a new key _and_
2693 * its associated data, increment the drop count and return.
2694 */
2695 if ((uintptr_t)tomax + offs + fsize >
2696 agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2697 dtrace_buffer_drop(buf);
2698 return;
2699 }
2700
2701 /*CONSTCOND*/
2702 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2703 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2704 agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2705
2706 key->dtak_data = kdata = tomax + offs;
2707 buf->dtb_offset = offs + fsize;
2708
2709 /*
2710 * Now copy the data across.
2711 */
2712 *((dtrace_aggid_t *)kdata) = agg->dtag_id;
2713
2714 for (i = sizeof (dtrace_aggid_t); i < size; i++)
2715 kdata[i] = data[i];
2716
2717 /*
2718 * Because strings are not zeroed out by default, we need to iterate
2719 * looking for actions that store strings, and we need to explicitly
2720 * pad these strings out with zeroes.
2721 */
2722 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2723 int nul;
2724
2725 if (!DTRACEACT_ISSTRING(act))
2726 continue;
2727
2728 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2729 limit = i + act->dta_rec.dtrd_size;
2730 ASSERT(limit <= size);
2731
2732 for (nul = 0; i < limit; i++) {
2733 if (nul) {
2734 kdata[i] = '\0';
2735 continue;
2736 }
2737
2738 if (data[i] != '\0')
2739 continue;
2740
2741 nul = 1;
2742 }
2743 }
2744
2745 for (i = size; i < fsize; i++)
2746 kdata[i] = 0;
2747
2748 key->dtak_hashval = hashval;
2749 key->dtak_size = size;
2750 key->dtak_action = action;
2751 key->dtak_next = agb->dtagb_hash[ndx];
2752 agb->dtagb_hash[ndx] = key;
2753
2754 /*
2755 * Finally, apply the aggregator.
2756 */
2757 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2758 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2759}
2760
2761/*
2762 * Given consumer state, this routine finds a speculation in the INACTIVE
2763 * state and transitions it into the ACTIVE state. If there is no speculation
2764 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2765 * incremented -- it is up to the caller to take appropriate action.
2766 */
2767static int
2768dtrace_speculation(dtrace_state_t *state)
2769{
2770 int i = 0;
2771 dtrace_speculation_state_t current;
2772 uint32_t *stat = &state->dts_speculations_unavail, count;
2773
2774 while (i < state->dts_nspeculations) {
2775 dtrace_speculation_t *spec = &state->dts_speculations[i];
2776
2777 current = spec->dtsp_state;
2778
2779 if (current != DTRACESPEC_INACTIVE) {
2780 if (current == DTRACESPEC_COMMITTINGMANY ||
2781 current == DTRACESPEC_COMMITTING ||
2782 current == DTRACESPEC_DISCARDING)
2783 stat = &state->dts_speculations_busy;
2784 i++;
2785 continue;
2786 }
2787
2788 if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2789 current, DTRACESPEC_ACTIVE) == current)
2790 return (i + 1);
2791 }
2792
2793 /*
2794 * We couldn't find a speculation. If we found as much as a single
2795 * busy speculation buffer, we'll attribute this failure as "busy"
2796 * instead of "unavail".
2797 */
2798 do {
2799 count = *stat;
2800 } while (dtrace_cas32(stat, count, count + 1) != count);
2801
2802 return (0);
2803}
2804
2805/*
2806 * This routine commits an active speculation. If the specified speculation
2807 * is not in a valid state to perform a commit(), this routine will silently do
2808 * nothing. The state of the specified speculation is transitioned according
2809 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2810 */
2811static void
2812dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2813 dtrace_specid_t which)
2814{
2815 dtrace_speculation_t *spec;
2816 dtrace_buffer_t *src, *dest;
04b8595b 2817 uintptr_t daddr, saddr, dlimit, slimit;
b0d623f7 2818 dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE;
2d21ac55 2819 intptr_t offs;
04b8595b 2820 uint64_t timestamp;
2d21ac55
A
2821
2822 if (which == 0)
2823 return;
2824
b0d623f7
A
2825 if (which > (dtrace_specid_t)state->dts_nspeculations) {
2826 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2827 return;
2828 }
b0d623f7 2829
2d21ac55
A
2830 spec = &state->dts_speculations[which - 1];
2831 src = &spec->dtsp_buffer[cpu];
2832 dest = &state->dts_buffer[cpu];
2833
2834 do {
2835 current = spec->dtsp_state;
2836
2837 if (current == DTRACESPEC_COMMITTINGMANY)
2838 break;
2839
2840 switch (current) {
2841 case DTRACESPEC_INACTIVE:
2842 case DTRACESPEC_DISCARDING:
2843 return;
2844
2845 case DTRACESPEC_COMMITTING:
2846 /*
2847 * This is only possible if we are (a) commit()'ing
2848 * without having done a prior speculate() on this CPU
2849 * and (b) racing with another commit() on a different
2850 * CPU. There's nothing to do -- we just assert that
2851 * our offset is 0.
2852 */
2853 ASSERT(src->dtb_offset == 0);
2854 return;
2855
2856 case DTRACESPEC_ACTIVE:
2857 new = DTRACESPEC_COMMITTING;
2858 break;
2859
2860 case DTRACESPEC_ACTIVEONE:
2861 /*
2862 * This speculation is active on one CPU. If our
2863 * buffer offset is non-zero, we know that the one CPU
2864 * must be us. Otherwise, we are committing on a
2865 * different CPU from the speculate(), and we must
2866 * rely on being asynchronously cleaned.
2867 */
2868 if (src->dtb_offset != 0) {
2869 new = DTRACESPEC_COMMITTING;
2870 break;
2871 }
2872 /*FALLTHROUGH*/
2873
2874 case DTRACESPEC_ACTIVEMANY:
2875 new = DTRACESPEC_COMMITTINGMANY;
2876 break;
2877
2878 default:
2879 ASSERT(0);
2880 }
2881 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2882 current, new) != current);
2883
2884 /*
2885 * We have set the state to indicate that we are committing this
2886 * speculation. Now reserve the necessary space in the destination
2887 * buffer.
2888 */
2889 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2890 sizeof (uint64_t), state, NULL)) < 0) {
2891 dtrace_buffer_drop(dest);
2892 goto out;
2893 }
2894
2895 /*
04b8595b
A
2896 * We have sufficient space to copy the speculative buffer into the
2897 * primary buffer. First, modify the speculative buffer, filling
2898 * in the timestamp of all entries with the current time. The data
2899 * must have the commit() time rather than the time it was traced,
2900 * so that all entries in the primary buffer are in timestamp order.
2901 */
2902 timestamp = dtrace_gethrtime();
2903 saddr = (uintptr_t)src->dtb_tomax;
2904 slimit = saddr + src->dtb_offset;
2905 while (saddr < slimit) {
2906 size_t size;
2907 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
2908
2909 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
2910 saddr += sizeof (dtrace_epid_t);
2911 continue;
2912 }
2913
2914 ASSERT(dtrh->dtrh_epid <= ((dtrace_epid_t) state->dts_necbs));
2915 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
2916
2917 ASSERT(saddr + size <= slimit);
2918 ASSERT(size >= sizeof(dtrace_rechdr_t));
2919 ASSERT(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh) == UINT64_MAX);
2920
2921 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
2922
2923 saddr += size;
2924 }
2925
2926 /*
2927 * Copy the buffer across. (Note that this is a
2d21ac55
A
2928 * highly subobtimal bcopy(); in the unlikely event that this becomes
2929 * a serious performance issue, a high-performance DTrace-specific
2930 * bcopy() should obviously be invented.)
2931 */
2932 daddr = (uintptr_t)dest->dtb_tomax + offs;
2933 dlimit = daddr + src->dtb_offset;
2934 saddr = (uintptr_t)src->dtb_tomax;
2935
2936 /*
2937 * First, the aligned portion.
2938 */
2939 while (dlimit - daddr >= sizeof (uint64_t)) {
2940 *((uint64_t *)daddr) = *((uint64_t *)saddr);
2941
2942 daddr += sizeof (uint64_t);
2943 saddr += sizeof (uint64_t);
2944 }
2945
2946 /*
2947 * Now any left-over bit...
2948 */
2949 while (dlimit - daddr)
2950 *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
2951
2952 /*
2953 * Finally, commit the reserved space in the destination buffer.
2954 */
2955 dest->dtb_offset = offs + src->dtb_offset;
2956
2957out:
2958 /*
2959 * If we're lucky enough to be the only active CPU on this speculation
2960 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2961 */
2962 if (current == DTRACESPEC_ACTIVE ||
2963 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
2964 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
2965 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
b0d623f7 2966#pragma unused(rval) /* __APPLE__ */
2d21ac55
A
2967
2968 ASSERT(rval == DTRACESPEC_COMMITTING);
2969 }
2970
2971 src->dtb_offset = 0;
2972 src->dtb_xamot_drops += src->dtb_drops;
2973 src->dtb_drops = 0;
2974}
2975
2976/*
2977 * This routine discards an active speculation. If the specified speculation
2978 * is not in a valid state to perform a discard(), this routine will silently
2979 * do nothing. The state of the specified speculation is transitioned
2980 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2981 */
2982static void
2983dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
2984 dtrace_specid_t which)
2985{
2986 dtrace_speculation_t *spec;
b0d623f7 2987 dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE;
2d21ac55
A
2988 dtrace_buffer_t *buf;
2989
2990 if (which == 0)
2991 return;
2992
b0d623f7
A
2993 if (which > (dtrace_specid_t)state->dts_nspeculations) {
2994 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2995 return;
2996 }
2d21ac55
A
2997
2998 spec = &state->dts_speculations[which - 1];
2999 buf = &spec->dtsp_buffer[cpu];
3000
3001 do {
3002 current = spec->dtsp_state;
3003
3004 switch (current) {
3005 case DTRACESPEC_INACTIVE:
3006 case DTRACESPEC_COMMITTINGMANY:
3007 case DTRACESPEC_COMMITTING:
3008 case DTRACESPEC_DISCARDING:
3009 return;
3010
3011 case DTRACESPEC_ACTIVE:
3012 case DTRACESPEC_ACTIVEMANY:
3013 new = DTRACESPEC_DISCARDING;
3014 break;
3015
3016 case DTRACESPEC_ACTIVEONE:
3017 if (buf->dtb_offset != 0) {
3018 new = DTRACESPEC_INACTIVE;
3019 } else {
3020 new = DTRACESPEC_DISCARDING;
3021 }
3022 break;
3023
3024 default:
3025 ASSERT(0);
3026 }
3027 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
3028 current, new) != current);
3029
3030 buf->dtb_offset = 0;
3031 buf->dtb_drops = 0;
3032}
3033
3034/*
3035 * Note: not called from probe context. This function is called
3036 * asynchronously from cross call context to clean any speculations that are
3037 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
3038 * transitioned back to the INACTIVE state until all CPUs have cleaned the
3039 * speculation.
3040 */
3041static void
3042dtrace_speculation_clean_here(dtrace_state_t *state)
3043{
3044 dtrace_icookie_t cookie;
3045 processorid_t cpu = CPU->cpu_id;
3046 dtrace_buffer_t *dest = &state->dts_buffer[cpu];
3047 dtrace_specid_t i;
3048
3049 cookie = dtrace_interrupt_disable();
3050
3051 if (dest->dtb_tomax == NULL) {
3052 dtrace_interrupt_enable(cookie);
3053 return;
3054 }
3055
b0d623f7 3056 for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) {
2d21ac55
A
3057 dtrace_speculation_t *spec = &state->dts_speculations[i];
3058 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
3059
3060 if (src->dtb_tomax == NULL)
3061 continue;
3062
3063 if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
3064 src->dtb_offset = 0;
3065 continue;
3066 }
3067
3068 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
3069 continue;
3070
3071 if (src->dtb_offset == 0)
3072 continue;
3073
3074 dtrace_speculation_commit(state, cpu, i + 1);
3075 }
3076
3077 dtrace_interrupt_enable(cookie);
3078}
3079
3080/*
3081 * Note: not called from probe context. This function is called
3082 * asynchronously (and at a regular interval) to clean any speculations that
3083 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
3084 * is work to be done, it cross calls all CPUs to perform that work;
3085 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
3086 * INACTIVE state until they have been cleaned by all CPUs.
3087 */
3088static void
3089dtrace_speculation_clean(dtrace_state_t *state)
3090{
b0d623f7
A
3091 int work = 0;
3092 uint32_t rv;
2d21ac55
A
3093 dtrace_specid_t i;
3094
b0d623f7 3095 for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) {
2d21ac55
A
3096 dtrace_speculation_t *spec = &state->dts_speculations[i];
3097
3098 ASSERT(!spec->dtsp_cleaning);
3099
3100 if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
3101 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
3102 continue;
3103
3104 work++;
3105 spec->dtsp_cleaning = 1;
3106 }
3107
3108 if (!work)
3109 return;
3110
3111 dtrace_xcall(DTRACE_CPUALL,
3112 (dtrace_xcall_t)dtrace_speculation_clean_here, state);
3113
3114 /*
3115 * We now know that all CPUs have committed or discarded their
3116 * speculation buffers, as appropriate. We can now set the state
3117 * to inactive.
3118 */
b0d623f7 3119 for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) {
2d21ac55
A
3120 dtrace_speculation_t *spec = &state->dts_speculations[i];
3121 dtrace_speculation_state_t current, new;
3122
3123 if (!spec->dtsp_cleaning)
3124 continue;
3125
3126 current = spec->dtsp_state;
3127 ASSERT(current == DTRACESPEC_DISCARDING ||
3128 current == DTRACESPEC_COMMITTINGMANY);
3129
3130 new = DTRACESPEC_INACTIVE;
3131
3132 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
3133 ASSERT(rv == current);
3134 spec->dtsp_cleaning = 0;
3135 }
3136}
3137
3138/*
3139 * Called as part of a speculate() to get the speculative buffer associated
3140 * with a given speculation. Returns NULL if the specified speculation is not
3141 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
3142 * the active CPU is not the specified CPU -- the speculation will be
3143 * atomically transitioned into the ACTIVEMANY state.
3144 */
3145static dtrace_buffer_t *
3146dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
3147 dtrace_specid_t which)
3148{
3149 dtrace_speculation_t *spec;
b0d623f7 3150 dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE;
2d21ac55
A
3151 dtrace_buffer_t *buf;
3152
3153 if (which == 0)
3154 return (NULL);
3155
b0d623f7 3156 if (which > (dtrace_specid_t)state->dts_nspeculations) {
2d21ac55
A
3157 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
3158 return (NULL);
3159 }
3160
3161 spec = &state->dts_speculations[which - 1];
3162 buf = &spec->dtsp_buffer[cpuid];
3163
3164 do {
3165 current = spec->dtsp_state;
3166
3167 switch (current) {
3168 case DTRACESPEC_INACTIVE:
3169 case DTRACESPEC_COMMITTINGMANY:
3170 case DTRACESPEC_DISCARDING:
3171 return (NULL);
3172
3173 case DTRACESPEC_COMMITTING:
3174 ASSERT(buf->dtb_offset == 0);
3175 return (NULL);
3176
3177 case DTRACESPEC_ACTIVEONE:
3178 /*
3179 * This speculation is currently active on one CPU.
3180 * Check the offset in the buffer; if it's non-zero,
3181 * that CPU must be us (and we leave the state alone).
3182 * If it's zero, assume that we're starting on a new
3183 * CPU -- and change the state to indicate that the
3184 * speculation is active on more than one CPU.
3185 */
3186 if (buf->dtb_offset != 0)
3187 return (buf);
3188
3189 new = DTRACESPEC_ACTIVEMANY;
3190 break;
3191
3192 case DTRACESPEC_ACTIVEMANY:
3193 return (buf);
3194
3195 case DTRACESPEC_ACTIVE:
3196 new = DTRACESPEC_ACTIVEONE;
3197 break;
3198
3199 default:
3200 ASSERT(0);
3201 }
3202 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
3203 current, new) != current);
3204
3205 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
3206 return (buf);
3207}
3208
b0d623f7
A
3209/*
3210 * Return a string. In the event that the user lacks the privilege to access
3211 * arbitrary kernel memory, we copy the string out to scratch memory so that we
3212 * don't fail access checking.
3213 *
3214 * dtrace_dif_variable() uses this routine as a helper for various
3215 * builtin values such as 'execname' and 'probefunc.'
3216 */
b0d623f7 3217static
b0d623f7
A
3218uintptr_t
3219dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
3220 dtrace_mstate_t *mstate)
3221{
3222 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3223 uintptr_t ret;
3224 size_t strsz;
3225
3226 /*
3227 * The easy case: this probe is allowed to read all of memory, so
3228 * we can just return this as a vanilla pointer.
3229 */
3230 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
3231 return (addr);
3232
3233 /*
3234 * This is the tougher case: we copy the string in question from
3235 * kernel memory into scratch memory and return it that way: this
3236 * ensures that we won't trip up when access checking tests the
3237 * BYREF return value.
3238 */
3239 strsz = dtrace_strlen((char *)addr, size) + 1;
3240
3241 if (mstate->dtms_scratch_ptr + strsz >
3242 mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
3243 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 3244 return (0);
b0d623f7
A
3245 }
3246
3247 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
3248 strsz);
3249 ret = mstate->dtms_scratch_ptr;
3250 mstate->dtms_scratch_ptr += strsz;
3251 return (ret);
3252}
3253
2d21ac55
A
3254/*
3255 * This function implements the DIF emulator's variable lookups. The emulator
3256 * passes a reserved variable identifier and optional built-in array index.
3257 */
3258static uint64_t
3259dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
3260 uint64_t ndx)
3261{
3262 /*
3263 * If we're accessing one of the uncached arguments, we'll turn this
3264 * into a reference in the args array.
3265 */
3266 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
3267 ndx = v - DIF_VAR_ARG0;
3268 v = DIF_VAR_ARGS;
3269 }
3270
3271 switch (v) {
3272 case DIF_VAR_ARGS:
3273 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
3274 if (ndx >= sizeof (mstate->dtms_arg) /
3275 sizeof (mstate->dtms_arg[0])) {
fe8ab488
A
3276 /*
3277 * APPLE NOTE: Account for introduction of __dtrace_probe()
3278 */
2d21ac55 3279 int aframes = mstate->dtms_probe->dtpr_aframes + 3;
5ba3f43e 3280 dtrace_vstate_t *vstate = &state->dts_vstate;
2d21ac55
A
3281 dtrace_provider_t *pv;
3282 uint64_t val;
3283
3284 pv = mstate->dtms_probe->dtpr_provider;
3285 if (pv->dtpv_pops.dtps_getargval != NULL)
3286 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
3287 mstate->dtms_probe->dtpr_id,
3288 mstate->dtms_probe->dtpr_arg, ndx, aframes);
b0d623f7 3289 /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */
2d21ac55 3290 else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) {
b0d623f7 3291 return ((dtrace_state_t *)(uintptr_t)(mstate->dtms_arg[0]))->dts_arg_error_illval;
2d21ac55 3292 }
fe8ab488 3293
2d21ac55 3294 else
5ba3f43e 3295 val = dtrace_getarg(ndx, aframes, mstate, vstate);
2d21ac55
A
3296
3297 /*
3298 * This is regrettably required to keep the compiler
3299 * from tail-optimizing the call to dtrace_getarg().
3300 * The condition always evaluates to true, but the
3301 * compiler has no way of figuring that out a priori.
3302 * (None of this would be necessary if the compiler
3303 * could be relied upon to _always_ tail-optimize
3304 * the call to dtrace_getarg() -- but it can't.)
3305 */
3306 if (mstate->dtms_probe != NULL)
3307 return (val);
3308
3309 ASSERT(0);
3310 }
3311
3312 return (mstate->dtms_arg[ndx]);
3313
2d21ac55
A
3314 case DIF_VAR_UREGS: {
3315 thread_t thread;
3316
3317 if (!dtrace_priv_proc(state))
3318 return (0);
3319
3320 if ((thread = current_thread()) == NULL) {
3321 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
3322 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = 0;
3323 return (0);
3324 }
3325
3326 return (dtrace_getreg(find_user_regs(thread), ndx));
3327 }
2d21ac55 3328
fe8ab488 3329
2d21ac55
A
3330 case DIF_VAR_CURTHREAD:
3331 if (!dtrace_priv_kernel(state))
3332 return (0);
3333
3334 return ((uint64_t)(uintptr_t)current_thread());
2d21ac55
A
3335
3336 case DIF_VAR_TIMESTAMP:
3337 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
3338 mstate->dtms_timestamp = dtrace_gethrtime();
3339 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
3340 }
3341 return (mstate->dtms_timestamp);
3342
2d21ac55
A
3343 case DIF_VAR_VTIMESTAMP:
3344 ASSERT(dtrace_vtime_references != 0);
3345 return (dtrace_get_thread_vtime(current_thread()));
2d21ac55
A
3346
3347 case DIF_VAR_WALLTIMESTAMP:
3348 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
3349 mstate->dtms_walltimestamp = dtrace_gethrestime();
3350 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
3351 }
3352 return (mstate->dtms_walltimestamp);
3353
fe8ab488
A
3354 case DIF_VAR_MACHTIMESTAMP:
3355 if (!(mstate->dtms_present & DTRACE_MSTATE_MACHTIMESTAMP)) {
3356 mstate->dtms_machtimestamp = mach_absolute_time();
3357 mstate->dtms_present |= DTRACE_MSTATE_MACHTIMESTAMP;
3358 }
3359 return (mstate->dtms_machtimestamp);
3360
3e170ce0
A
3361 case DIF_VAR_CPU:
3362 return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread()));
3363
2d21ac55
A
3364 case DIF_VAR_IPL:
3365 if (!dtrace_priv_kernel(state))
3366 return (0);
3367 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
3368 mstate->dtms_ipl = dtrace_getipl();
3369 mstate->dtms_present |= DTRACE_MSTATE_IPL;
3370 }
3371 return (mstate->dtms_ipl);
3372
3373 case DIF_VAR_EPID:
3374 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
3375 return (mstate->dtms_epid);
3376
3377 case DIF_VAR_ID:
3378 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3379 return (mstate->dtms_probe->dtpr_id);
3380
3381 case DIF_VAR_STACKDEPTH:
3382 if (!dtrace_priv_kernel(state))
3383 return (0);
3384 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
fe8ab488
A
3385 /*
3386 * APPLE NOTE: Account for introduction of __dtrace_probe()
3387 */
2d21ac55 3388 int aframes = mstate->dtms_probe->dtpr_aframes + 3;
2d21ac55
A
3389
3390 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
3391 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
3392 }
3393 return (mstate->dtms_stackdepth);
3394
3395 case DIF_VAR_USTACKDEPTH:
3396 if (!dtrace_priv_proc(state))
3397 return (0);
3398 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
3399 /*
3400 * See comment in DIF_VAR_PID.
3401 */
3402 if (DTRACE_ANCHORED(mstate->dtms_probe) &&
3403 CPU_ON_INTR(CPU)) {
3404 mstate->dtms_ustackdepth = 0;
3405 } else {
3406 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3407 mstate->dtms_ustackdepth =
3408 dtrace_getustackdepth();
3409 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3410 }
3411 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
3412 }
3413 return (mstate->dtms_ustackdepth);
3414
3415 case DIF_VAR_CALLER:
3416 if (!dtrace_priv_kernel(state))
3417 return (0);
3418 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
fe8ab488
A
3419 /*
3420 * APPLE NOTE: Account for introduction of __dtrace_probe()
3421 */
2d21ac55 3422 int aframes = mstate->dtms_probe->dtpr_aframes + 3;
2d21ac55
A
3423
3424 if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
3425 /*
3426 * If this is an unanchored probe, we are
3427 * required to go through the slow path:
3428 * dtrace_caller() only guarantees correct
3429 * results for anchored probes.
3430 */
3431 pc_t caller[2];
3432
3433 dtrace_getpcstack(caller, 2, aframes,
3434 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
3435 mstate->dtms_caller = caller[1];
3436 } else if ((mstate->dtms_caller =
fe8ab488 3437 dtrace_caller(aframes)) == (uintptr_t)-1) {
2d21ac55
A
3438 /*
3439 * We have failed to do this the quick way;
3440 * we must resort to the slower approach of
3441 * calling dtrace_getpcstack().
3442 */
3443 pc_t caller;
3444
3445 dtrace_getpcstack(&caller, 1, aframes, NULL);
3446 mstate->dtms_caller = caller;
3447 }
3448
3449 mstate->dtms_present |= DTRACE_MSTATE_CALLER;
3450 }
3451 return (mstate->dtms_caller);
3452
3453 case DIF_VAR_UCALLER:
3454 if (!dtrace_priv_proc(state))
3455 return (0);
3456
3457 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3458 uint64_t ustack[3];
3459
3460 /*
3461 * dtrace_getupcstack() fills in the first uint64_t
3462 * with the current PID. The second uint64_t will
3463 * be the program counter at user-level. The third
3464 * uint64_t will contain the caller, which is what
3465 * we're after.
3466 */
fe8ab488 3467 ustack[2] = 0;
b0d623f7 3468 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
2d21ac55 3469 dtrace_getupcstack(ustack, 3);
b0d623f7 3470 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2d21ac55
A
3471 mstate->dtms_ucaller = ustack[2];
3472 mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3473 }
3474
3475 return (mstate->dtms_ucaller);
3476
3477 case DIF_VAR_PROBEPROV:
3478 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3479 return (dtrace_dif_varstr(
3480 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3481 state, mstate));
2d21ac55
A
3482
3483 case DIF_VAR_PROBEMOD:
3484 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3485 return (dtrace_dif_varstr(
3486 (uintptr_t)mstate->dtms_probe->dtpr_mod,
3487 state, mstate));
2d21ac55
A
3488
3489 case DIF_VAR_PROBEFUNC:
3490 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3491 return (dtrace_dif_varstr(
3492 (uintptr_t)mstate->dtms_probe->dtpr_func,
3493 state, mstate));
2d21ac55
A
3494
3495 case DIF_VAR_PROBENAME:
3496 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3497 return (dtrace_dif_varstr(
3498 (uintptr_t)mstate->dtms_probe->dtpr_name,
3499 state, mstate));
2d21ac55 3500
2d21ac55 3501 case DIF_VAR_PID:
935ed37a 3502 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3503 return (0);
3504
3505 /*
3506 * Note that we are assuming that an unanchored probe is
3507 * always due to a high-level interrupt. (And we're assuming
3508 * that there is only a single high level interrupt.)
3509 */
3510 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3511 /* Anchored probe that fires while on an interrupt accrues to process 0 */
3512 return 0;
3513
39236c6e 3514 return ((uint64_t)dtrace_proc_selfpid());
2d21ac55 3515
2d21ac55 3516 case DIF_VAR_PPID:
935ed37a 3517 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3518 return (0);
3519
3520 /*
3521 * See comment in DIF_VAR_PID.
3522 */
3523 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3524 return (0);
3525
39236c6e 3526 return ((uint64_t)dtrace_proc_selfppid());
2d21ac55 3527
2d21ac55 3528 case DIF_VAR_TID:
b0d623f7
A
3529 /* We do not need to check for null current_thread() */
3530 return thread_tid(current_thread()); /* globally unique */
3531
3532 case DIF_VAR_PTHREAD_SELF:
3533 if (!dtrace_priv_proc(state))
3534 return (0);
3535
3536 /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */
3537 return 0;
3538
3539 case DIF_VAR_DISPATCHQADDR:
3540 if (!dtrace_priv_proc(state))
2d21ac55
A
3541 return (0);
3542
b0d623f7
A
3543 /* We do not need to check for null current_thread() */
3544 return thread_dispatchqaddr(current_thread());
2d21ac55 3545
2d21ac55
A
3546 case DIF_VAR_EXECNAME:
3547 {
3548 char *xname = (char *)mstate->dtms_scratch_ptr;
3549 size_t scratch_size = MAXCOMLEN+1;
3550
3551 /* The scratch allocation's lifetime is that of the clause. */
b0d623f7
A
3552 if (!DTRACE_INSCRATCH(mstate, scratch_size)) {
3553 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
2d21ac55 3554 return 0;
b0d623f7 3555 }
2d21ac55 3556
935ed37a 3557 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3558 return (0);
3559
3560 mstate->dtms_scratch_ptr += scratch_size;
3e170ce0 3561 proc_selfname( xname, scratch_size );
2d21ac55
A
3562
3563 return ((uint64_t)(uintptr_t)xname);
3564 }
2d21ac55 3565
2d21ac55 3566
2d21ac55 3567 case DIF_VAR_ZONENAME:
5ba3f43e
A
3568 {
3569 /* scratch_size is equal to length('global') + 1 for the null-terminator. */
3570 char *zname = (char *)mstate->dtms_scratch_ptr;
3571 size_t scratch_size = 6 + 1;
39236c6e 3572
2d21ac55
A
3573 if (!dtrace_priv_proc(state))
3574 return (0);
39236c6e 3575
5ba3f43e
A
3576 /* The scratch allocation's lifetime is that of the clause. */
3577 if (!DTRACE_INSCRATCH(mstate, scratch_size)) {
3578 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3579 return 0;
3580 }
3581
3582 mstate->dtms_scratch_ptr += scratch_size;
39236c6e 3583
5ba3f43e
A
3584 /* The kernel does not provide zonename, it will always return 'global'. */
3585 strlcpy(zname, "global", scratch_size);
39236c6e 3586
5ba3f43e
A
3587 return ((uint64_t)(uintptr_t)zname);
3588 }
39236c6e 3589
5ba3f43e
A
3590#if MONOTONIC
3591 case DIF_VAR_CPUINSTRS:
3592 return mt_cur_cpu_instrs();
3593
3594 case DIF_VAR_CPUCYCLES:
3595 return mt_cur_cpu_cycles();
3596
3597 case DIF_VAR_VINSTRS:
3598 return mt_cur_thread_instrs();
3599
3600 case DIF_VAR_VCYCLES:
3601 return mt_cur_thread_cycles();
3602#else /* MONOTONIC */
3603 case DIF_VAR_CPUINSTRS: /* FALLTHROUGH */
3604 case DIF_VAR_CPUCYCLES: /* FALLTHROUGH */
3605 case DIF_VAR_VINSTRS: /* FALLTHROUGH */
3606 case DIF_VAR_VCYCLES: /* FALLTHROUGH */
3607 return 0;
3608#endif /* !MONOTONIC */
2d21ac55 3609
2d21ac55 3610 case DIF_VAR_UID:
39236c6e 3611 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3612 return (0);
3613
3614 /*
3615 * See comment in DIF_VAR_PID.
3616 */
3617 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3618 return (0);
3619
39236c6e 3620 return ((uint64_t) dtrace_proc_selfruid());
2d21ac55 3621
2d21ac55
A
3622 case DIF_VAR_GID:
3623 if (!dtrace_priv_proc(state))
3624 return (0);
3625
3626 /*
3627 * See comment in DIF_VAR_PID.
3628 */
3629 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3630 return (0);
3631
3632 if (dtrace_CRED() != NULL)
b0d623f7 3633 /* Credential does not require lazy initialization. */
2d21ac55 3634 return ((uint64_t)kauth_getgid());
b0d623f7
A
3635 else {
3636 /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */
3637 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3638 return -1ULL;
3639 }
2d21ac55 3640
2d21ac55
A
3641 case DIF_VAR_ERRNO: {
3642 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
3643 if (!dtrace_priv_proc(state))
3644 return (0);
3645
3646 /*
3647 * See comment in DIF_VAR_PID.
3648 */
3649 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3650 return (0);
3651
b0d623f7
A
3652 if (uthread)
3653 return (uint64_t)uthread->t_dtrace_errno;
3654 else {
3655 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3656 return -1ULL;
3657 }
2d21ac55 3658 }
2d21ac55
A
3659
3660 default:
3661 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3662 return (0);
3663 }
3664}
3665
3666/*
3667 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3668 * Notice that we don't bother validating the proper number of arguments or
3669 * their types in the tuple stack. This isn't needed because all argument
3670 * interpretation is safe because of our load safety -- the worst that can
3671 * happen is that a bogus program can obtain bogus results.
3672 */
3673static void
3674dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
3675 dtrace_key_t *tupregs, int nargs,
3676 dtrace_mstate_t *mstate, dtrace_state_t *state)
3677{
3678 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
2d21ac55 3679 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
b0d623f7 3680 dtrace_vstate_t *vstate = &state->dts_vstate;
2d21ac55
A
3681
3682#if !defined(__APPLE__)
3683 union {
3684 mutex_impl_t mi;
3685 uint64_t mx;
3686 } m;
3687
3688 union {
3689 krwlock_t ri;
3690 uintptr_t rw;
3691 } r;
3692#else
b0d623f7 3693/* FIXME: awaits lock/mutex work */
2d21ac55
A
3694#endif /* __APPLE__ */
3695
3696 switch (subr) {
3697 case DIF_SUBR_RAND:
3698 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3699 break;
3700
3701#if !defined(__APPLE__)
3702 case DIF_SUBR_MUTEX_OWNED:
b0d623f7
A
3703 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3704 mstate, vstate)) {
fe8ab488 3705 regs[rd] = 0;
b0d623f7
A
3706 break;
3707 }
3708
2d21ac55
A
3709 m.mx = dtrace_load64(tupregs[0].dttk_value);
3710 if (MUTEX_TYPE_ADAPTIVE(&m.mi))
3711 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
3712 else
3713 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
3714 break;
3715
3716 case DIF_SUBR_MUTEX_OWNER:
b0d623f7
A
3717 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3718 mstate, vstate)) {
fe8ab488 3719 regs[rd] = 0;
b0d623f7
A
3720 break;
3721 }
3722
2d21ac55
A
3723 m.mx = dtrace_load64(tupregs[0].dttk_value);
3724 if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
3725 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
3726 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
3727 else
3728 regs[rd] = 0;
3729 break;
3730
3731 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
b0d623f7
A
3732 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3733 mstate, vstate)) {
fe8ab488 3734 regs[rd] = 0;
b0d623f7
A
3735 break;
3736 }
3737
2d21ac55
A
3738 m.mx = dtrace_load64(tupregs[0].dttk_value);
3739 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
3740 break;
3741
3742 case DIF_SUBR_MUTEX_TYPE_SPIN:
b0d623f7
A
3743 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3744 mstate, vstate)) {
fe8ab488 3745 regs[rd] = 0;
b0d623f7
A
3746 break;
3747 }
3748
2d21ac55
A
3749 m.mx = dtrace_load64(tupregs[0].dttk_value);
3750 regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
3751 break;
3752
3753 case DIF_SUBR_RW_READ_HELD: {
3754 uintptr_t tmp;
3755
b0d623f7
A
3756 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
3757 mstate, vstate)) {
fe8ab488 3758 regs[rd] = 0;
b0d623f7
A
3759 break;
3760 }
3761
2d21ac55
A
3762 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3763 regs[rd] = _RW_READ_HELD(&r.ri, tmp);
3764 break;
3765 }
3766
3767 case DIF_SUBR_RW_WRITE_HELD:
b0d623f7
A
3768 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3769 mstate, vstate)) {
fe8ab488 3770 regs[rd] = 0;
b0d623f7
A
3771 break;
3772 }
3773
2d21ac55
A
3774 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3775 regs[rd] = _RW_WRITE_HELD(&r.ri);
3776 break;
3777
3778 case DIF_SUBR_RW_ISWRITER:
b0d623f7
A
3779 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3780 mstate, vstate)) {
fe8ab488 3781 regs[rd] = 0;
b0d623f7
A
3782 break;
3783 }
3784
2d21ac55
A
3785 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3786 regs[rd] = _RW_ISWRITER(&r.ri);
3787 break;
3788#else
b0d623f7 3789/* FIXME: awaits lock/mutex work */
2d21ac55
A
3790#endif /* __APPLE__ */
3791
3792 case DIF_SUBR_BCOPY: {
3793 /*
3794 * We need to be sure that the destination is in the scratch
3795 * region -- no other region is allowed.
3796 */
3797 uintptr_t src = tupregs[0].dttk_value;
3798 uintptr_t dest = tupregs[1].dttk_value;
3799 size_t size = tupregs[2].dttk_value;
3800
3801 if (!dtrace_inscratch(dest, size, mstate)) {
3802 *flags |= CPU_DTRACE_BADADDR;
3803 *illval = regs[rd];
3804 break;
3805 }
3806
b0d623f7 3807 if (!dtrace_canload(src, size, mstate, vstate)) {
fe8ab488 3808 regs[rd] = 0;
b0d623f7
A
3809 break;
3810 }
3811
2d21ac55
A
3812 dtrace_bcopy((void *)src, (void *)dest, size);
3813 break;
3814 }
3815
3816 case DIF_SUBR_ALLOCA:
3817 case DIF_SUBR_COPYIN: {
3818 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
3819 uint64_t size =
3820 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
3821 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
3822
39037602
A
3823 /*
3824 * Check whether the user can access kernel memory
3825 */
3826 if (dtrace_priv_kernel(state) == 0) {
3827 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
3828 regs[rd] = 0;
3829 break;
3830 }
2d21ac55
A
3831 /*
3832 * This action doesn't require any credential checks since
3833 * probes will not activate in user contexts to which the
3834 * enabling user does not have permissions.
3835 */
b0d623f7
A
3836
3837 /*
3838 * Rounding up the user allocation size could have overflowed
3839 * a large, bogus allocation (like -1ULL) to 0.
3840 */
3841 if (scratch_size < size ||
3842 !DTRACE_INSCRATCH(mstate, scratch_size)) {
2d21ac55 3843 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 3844 regs[rd] = 0;
2d21ac55
A
3845 break;
3846 }
3847
3848 if (subr == DIF_SUBR_COPYIN) {
3849 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
cf7d32b8 3850 if (dtrace_priv_proc(state))
b0d623f7 3851 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
2d21ac55
A
3852 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3853 }
3854
3855 mstate->dtms_scratch_ptr += scratch_size;
3856 regs[rd] = dest;
3857 break;
3858 }
3859
3860 case DIF_SUBR_COPYINTO: {
3861 uint64_t size = tupregs[1].dttk_value;
3862 uintptr_t dest = tupregs[2].dttk_value;
3863
3864 /*
3865 * This action doesn't require any credential checks since
3866 * probes will not activate in user contexts to which the
3867 * enabling user does not have permissions.
3868 */
3869 if (!dtrace_inscratch(dest, size, mstate)) {
3870 *flags |= CPU_DTRACE_BADADDR;
3871 *illval = regs[rd];
3872 break;
3873 }
3874
3875 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
cf7d32b8 3876 if (dtrace_priv_proc(state))
b0d623f7 3877 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
2d21ac55
A
3878 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3879 break;
3880 }
3881
3882 case DIF_SUBR_COPYINSTR: {
3883 uintptr_t dest = mstate->dtms_scratch_ptr;
3884 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3885
3886 if (nargs > 1 && tupregs[1].dttk_value < size)
3887 size = tupregs[1].dttk_value + 1;
3888
3889 /*
3890 * This action doesn't require any credential checks since
3891 * probes will not activate in user contexts to which the
3892 * enabling user does not have permissions.
3893 */
b0d623f7 3894 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 3895 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 3896 regs[rd] = 0;
2d21ac55
A
3897 break;
3898 }
3899
3900 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
cf7d32b8 3901 if (dtrace_priv_proc(state))
b0d623f7 3902 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
2d21ac55
A
3903 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3904
3905 ((char *)dest)[size - 1] = '\0';
3906 mstate->dtms_scratch_ptr += size;
3907 regs[rd] = dest;
3908 break;
3909 }
3910
2d21ac55
A
3911 case DIF_SUBR_MSGSIZE:
3912 case DIF_SUBR_MSGDSIZE: {
3913 /* Darwin does not implement SysV streams messages */
b0d623f7 3914 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2d21ac55
A
3915 regs[rd] = 0;
3916 break;
3917 }
2d21ac55 3918
2d21ac55
A
3919 case DIF_SUBR_PROGENYOF: {
3920 pid_t pid = tupregs[0].dttk_value;
3921 struct proc *p = current_proc();
3922 int rval = 0, lim = nprocs;
3923
3924 while(p && (lim-- > 0)) {
3925 pid_t ppid;
3926
3927 ppid = (pid_t)dtrace_load32((uintptr_t)&(p->p_pid));
3928 if (*flags & CPU_DTRACE_FAULT)
3929 break;
3930
3931 if (ppid == pid) {
3932 rval = 1;
3933 break;
3934 }
3935
3936 if (ppid == 0)
3937 break; /* Can't climb process tree any further. */
3938
3939 p = (struct proc *)dtrace_loadptr((uintptr_t)&(p->p_pptr));
3940 if (*flags & CPU_DTRACE_FAULT)
3941 break;
3942 }
3943
3944 regs[rd] = rval;
3945 break;
3946 }
2d21ac55
A
3947
3948 case DIF_SUBR_SPECULATION:
3949 regs[rd] = dtrace_speculation(state);
3950 break;
3951
fe8ab488 3952
2d21ac55
A
3953 case DIF_SUBR_COPYOUT: {
3954 uintptr_t kaddr = tupregs[0].dttk_value;
fe8ab488 3955 user_addr_t uaddr = tupregs[1].dttk_value;
2d21ac55
A
3956 uint64_t size = tupregs[2].dttk_value;
3957
3958 if (!dtrace_destructive_disallow &&
3959 dtrace_priv_proc_control(state) &&
ecc0ceb4
A
3960 !dtrace_istoxic(kaddr, size) &&
3961 dtrace_canload(kaddr, size, mstate, vstate)) {
2d21ac55 3962 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
b0d623f7 3963 dtrace_copyout(kaddr, uaddr, size, flags);
2d21ac55
A
3964 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3965 }
3966 break;
3967 }
3968
3969 case DIF_SUBR_COPYOUTSTR: {
3970 uintptr_t kaddr = tupregs[0].dttk_value;
fe8ab488 3971 user_addr_t uaddr = tupregs[1].dttk_value;
2d21ac55 3972 uint64_t size = tupregs[2].dttk_value;
39037602 3973 size_t lim;
2d21ac55
A
3974
3975 if (!dtrace_destructive_disallow &&
3976 dtrace_priv_proc_control(state) &&
ecc0ceb4 3977 !dtrace_istoxic(kaddr, size) &&
39037602 3978 dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) {
2d21ac55 3979 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
39037602 3980 dtrace_copyoutstr(kaddr, uaddr, lim, flags);
2d21ac55
A
3981 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3982 }
3983 break;
3984 }
2d21ac55 3985
b0d623f7 3986 case DIF_SUBR_STRLEN: {
39037602 3987 size_t size = state->dts_options[DTRACEOPT_STRSIZE];
b0d623f7 3988 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
39037602 3989 size_t lim;
b0d623f7 3990
39037602 3991 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
fe8ab488 3992 regs[rd] = 0;
b0d623f7
A
3993 break;
3994 }
3995
39037602 3996 regs[rd] = dtrace_strlen((char *)addr, lim);
b0d623f7 3997
2d21ac55 3998 break;
b0d623f7 3999 }
2d21ac55
A
4000
4001 case DIF_SUBR_STRCHR:
4002 case DIF_SUBR_STRRCHR: {
4003 /*
4004 * We're going to iterate over the string looking for the
4005 * specified character. We will iterate until we have reached
4006 * the string length or we have found the character. If this
4007 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
4008 * of the specified character instead of the first.
4009 */
4010 uintptr_t addr = tupregs[0].dttk_value;
39037602
A
4011 uintptr_t addr_limit;
4012 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4013 size_t lim;
2d21ac55
A
4014 char c, target = (char)tupregs[1].dttk_value;
4015
39037602 4016 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
5ba3f43e 4017 regs[rd] = 0;
39037602
A
4018 break;
4019 }
4020 addr_limit = addr + lim;
4021
4022 for (regs[rd] = 0; addr < addr_limit; addr++) {
2d21ac55
A
4023 if ((c = dtrace_load8(addr)) == target) {
4024 regs[rd] = addr;
4025
4026 if (subr == DIF_SUBR_STRCHR)
4027 break;
4028 }
4029
4030 if (c == '\0')
4031 break;
4032 }
4033
4034 break;
4035 }
4036
4037 case DIF_SUBR_STRSTR:
4038 case DIF_SUBR_INDEX:
4039 case DIF_SUBR_RINDEX: {
4040 /*
4041 * We're going to iterate over the string looking for the
4042 * specified string. We will iterate until we have reached
4043 * the string length or we have found the string. (Yes, this
4044 * is done in the most naive way possible -- but considering
4045 * that the string we're searching for is likely to be
4046 * relatively short, the complexity of Rabin-Karp or similar
4047 * hardly seems merited.)
4048 */
4049 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
4050 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
4051 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4052 size_t len = dtrace_strlen(addr, size);
4053 size_t sublen = dtrace_strlen(substr, size);
4054 char *limit = addr + len, *orig = addr;
4055 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
4056 int inc = 1;
4057
4058 regs[rd] = notfound;
4059
b0d623f7 4060 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
fe8ab488 4061 regs[rd] = 0;
b0d623f7
A
4062 break;
4063 }
4064
4065 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
4066 vstate)) {
fe8ab488 4067 regs[rd] = 0;
b0d623f7
A
4068 break;
4069 }
4070
2d21ac55
A
4071 /*
4072 * strstr() and index()/rindex() have similar semantics if
4073 * both strings are the empty string: strstr() returns a
4074 * pointer to the (empty) string, and index() and rindex()
4075 * both return index 0 (regardless of any position argument).
4076 */
4077 if (sublen == 0 && len == 0) {
4078 if (subr == DIF_SUBR_STRSTR)
4079 regs[rd] = (uintptr_t)addr;
4080 else
4081 regs[rd] = 0;
4082 break;
4083 }
4084
4085 if (subr != DIF_SUBR_STRSTR) {
4086 if (subr == DIF_SUBR_RINDEX) {
4087 limit = orig - 1;
4088 addr += len;
4089 inc = -1;
4090 }
4091
4092 /*
4093 * Both index() and rindex() take an optional position
4094 * argument that denotes the starting position.
4095 */
4096 if (nargs == 3) {
4097 int64_t pos = (int64_t)tupregs[2].dttk_value;
4098
4099 /*
4100 * If the position argument to index() is
4101 * negative, Perl implicitly clamps it at
4102 * zero. This semantic is a little surprising
4103 * given the special meaning of negative
4104 * positions to similar Perl functions like
4105 * substr(), but it appears to reflect a
4106 * notion that index() can start from a
4107 * negative index and increment its way up to
4108 * the string. Given this notion, Perl's
4109 * rindex() is at least self-consistent in
4110 * that it implicitly clamps positions greater
4111 * than the string length to be the string
4112 * length. Where Perl completely loses
4113 * coherence, however, is when the specified
4114 * substring is the empty string (""). In
4115 * this case, even if the position is
4116 * negative, rindex() returns 0 -- and even if
4117 * the position is greater than the length,
4118 * index() returns the string length. These
4119 * semantics violate the notion that index()
4120 * should never return a value less than the
4121 * specified position and that rindex() should
4122 * never return a value greater than the
4123 * specified position. (One assumes that
4124 * these semantics are artifacts of Perl's
4125 * implementation and not the results of
4126 * deliberate design -- it beggars belief that
4127 * even Larry Wall could desire such oddness.)
4128 * While in the abstract one would wish for
4129 * consistent position semantics across
4130 * substr(), index() and rindex() -- or at the
4131 * very least self-consistent position
4132 * semantics for index() and rindex() -- we
4133 * instead opt to keep with the extant Perl
4134 * semantics, in all their broken glory. (Do
4135 * we have more desire to maintain Perl's
4136 * semantics than Perl does? Probably.)
4137 */
4138 if (subr == DIF_SUBR_RINDEX) {
4139 if (pos < 0) {
4140 if (sublen == 0)
4141 regs[rd] = 0;
4142 break;
4143 }
4144
b0d623f7 4145 if ((size_t)pos > len)
2d21ac55
A
4146 pos = len;
4147 } else {
4148 if (pos < 0)
4149 pos = 0;
4150
b0d623f7 4151 if ((size_t)pos >= len) {
2d21ac55
A
4152 if (sublen == 0)
4153 regs[rd] = len;
4154 break;
4155 }
4156 }
4157
4158 addr = orig + pos;
4159 }
4160 }
4161
4162 for (regs[rd] = notfound; addr != limit; addr += inc) {
4163 if (dtrace_strncmp(addr, substr, sublen) == 0) {
4164 if (subr != DIF_SUBR_STRSTR) {
4165 /*
4166 * As D index() and rindex() are
4167 * modeled on Perl (and not on awk),
4168 * we return a zero-based (and not a
4169 * one-based) index. (For you Perl
4170 * weenies: no, we're not going to add
4171 * $[ -- and shouldn't you be at a con
4172 * or something?)
4173 */
4174 regs[rd] = (uintptr_t)(addr - orig);
4175 break;
4176 }
4177
4178 ASSERT(subr == DIF_SUBR_STRSTR);
4179 regs[rd] = (uintptr_t)addr;
4180 break;
4181 }
4182 }
4183
4184 break;
4185 }
4186
4187 case DIF_SUBR_STRTOK: {
4188 uintptr_t addr = tupregs[0].dttk_value;
4189 uintptr_t tokaddr = tupregs[1].dttk_value;
4190 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
39037602
A
4191 uintptr_t limit, toklimit;
4192 size_t clim;
2d21ac55 4193 char *dest = (char *)mstate->dtms_scratch_ptr;
b0d623f7
A
4194 uint8_t c='\0', tokmap[32]; /* 256 / 8 */
4195 uint64_t i = 0;
b0d623f7
A
4196
4197 /*
4198 * Check both the token buffer and (later) the input buffer,
4199 * since both could be non-scratch addresses.
4200 */
39037602 4201 if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) {
fe8ab488 4202 regs[rd] = 0;
b0d623f7
A
4203 break;
4204 }
39037602 4205 toklimit = tokaddr + clim;
2d21ac55 4206
b0d623f7 4207 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4208 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4209 regs[rd] = 0;
2d21ac55
A
4210 break;
4211 }
4212
fe8ab488 4213 if (addr == 0) {
2d21ac55
A
4214 /*
4215 * If the address specified is NULL, we use our saved
4216 * strtok pointer from the mstate. Note that this
4217 * means that the saved strtok pointer is _only_
4218 * valid within multiple enablings of the same probe --
4219 * it behaves like an implicit clause-local variable.
4220 */
4221 addr = mstate->dtms_strtok;
39037602 4222 limit = mstate->dtms_strtok_limit;
b0d623f7
A
4223 } else {
4224 /*
4225 * If the user-specified address is non-NULL we must
4226 * access check it. This is the only time we have
4227 * a chance to do so, since this address may reside
4228 * in the string table of this clause-- future calls
4229 * (when we fetch addr from mstate->dtms_strtok)
4230 * would fail this access check.
4231 */
39037602
A
4232 if (!dtrace_strcanload(addr, size, &clim, mstate,
4233 vstate)) {
fe8ab488 4234 regs[rd] = 0;
b0d623f7 4235 break;
fe8ab488 4236 }
39037602 4237 limit = addr + clim;
2d21ac55
A
4238 }
4239
4240 /*
4241 * First, zero the token map, and then process the token
4242 * string -- setting a bit in the map for every character
4243 * found in the token string.
4244 */
c910b4d9 4245 for (i = 0; i < (int)sizeof (tokmap); i++)
2d21ac55
A
4246 tokmap[i] = 0;
4247
4248 for (; tokaddr < toklimit; tokaddr++) {
4249 if ((c = dtrace_load8(tokaddr)) == '\0')
4250 break;
4251
4252 ASSERT((c >> 3) < sizeof (tokmap));
4253 tokmap[c >> 3] |= (1 << (c & 0x7));
4254 }
4255
39037602 4256 for (; addr < limit; addr++) {
2d21ac55 4257 /*
39037602
A
4258 * We're looking for a character that is _not_
4259 * contained in the token string.
2d21ac55
A
4260 */
4261 if ((c = dtrace_load8(addr)) == '\0')
4262 break;
4263
4264 if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
4265 break;
4266 }
4267
4268 if (c == '\0') {
4269 /*
4270 * We reached the end of the string without finding
4271 * any character that was not in the token string.
4272 * We return NULL in this case, and we set the saved
4273 * address to NULL as well.
4274 */
fe8ab488
A
4275 regs[rd] = 0;
4276 mstate->dtms_strtok = 0;
5ba3f43e 4277 mstate->dtms_strtok_limit = 0;
2d21ac55
A
4278 break;
4279 }
4280
4281 /*
4282 * From here on, we're copying into the destination string.
4283 */
4284 for (i = 0; addr < limit && i < size - 1; addr++) {
4285 if ((c = dtrace_load8(addr)) == '\0')
4286 break;
4287
4288 if (tokmap[c >> 3] & (1 << (c & 0x7)))
4289 break;
4290
4291 ASSERT(i < size);
4292 dest[i++] = c;
4293 }
4294
4295 ASSERT(i < size);
4296 dest[i] = '\0';
4297 regs[rd] = (uintptr_t)dest;
4298 mstate->dtms_scratch_ptr += size;
4299 mstate->dtms_strtok = addr;
39037602 4300 mstate->dtms_strtok_limit = limit;
2d21ac55
A
4301 break;
4302 }
4303
4304 case DIF_SUBR_SUBSTR: {
4305 uintptr_t s = tupregs[0].dttk_value;
4306 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4307 char *d = (char *)mstate->dtms_scratch_ptr;
4308 int64_t index = (int64_t)tupregs[1].dttk_value;
4309 int64_t remaining = (int64_t)tupregs[2].dttk_value;
4310 size_t len = dtrace_strlen((char *)s, size);
4311 int64_t i = 0;
4312
b0d623f7 4313 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
fe8ab488 4314 regs[rd] = 0;
b0d623f7
A
4315 break;
4316 }
2d21ac55 4317
b0d623f7 4318 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4319 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4320 regs[rd] = 0;
2d21ac55
A
4321 break;
4322 }
4323
b0d623f7
A
4324 if (nargs <= 2)
4325 remaining = (int64_t)size;
4326
2d21ac55
A
4327 if (index < 0) {
4328 index += len;
4329
4330 if (index < 0 && index + remaining > 0) {
4331 remaining += index;
4332 index = 0;
4333 }
4334 }
4335
b0d623f7
A
4336 if ((size_t)index >= len || index < 0) {
4337 remaining = 0;
4338 } else if (remaining < 0) {
4339 remaining += len - index;
4340 } else if ((uint64_t)index + (uint64_t)remaining > size) {
4341 remaining = size - index;
4342 }
fe8ab488 4343
b0d623f7
A
4344 for (i = 0; i < remaining; i++) {
4345 if ((d[i] = dtrace_load8(s + index + i)) == '\0')
2d21ac55
A
4346 break;
4347 }
b0d623f7
A
4348
4349 d[i] = '\0';
2d21ac55
A
4350
4351 mstate->dtms_scratch_ptr += size;
4352 regs[rd] = (uintptr_t)d;
4353 break;
4354 }
4355
2d21ac55
A
4356 case DIF_SUBR_GETMAJOR:
4357 regs[rd] = (uintptr_t)major( (dev_t)tupregs[0].dttk_value );
4358 break;
2d21ac55 4359
2d21ac55
A
4360 case DIF_SUBR_GETMINOR:
4361 regs[rd] = (uintptr_t)minor( (dev_t)tupregs[0].dttk_value );
4362 break;
2d21ac55 4363
2d21ac55 4364 case DIF_SUBR_DDI_PATHNAME: {
fe8ab488 4365 /* APPLE NOTE: currently unsupported on Darwin */
b0d623f7 4366 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
fe8ab488 4367 regs[rd] = 0;
2d21ac55
A
4368 break;
4369 }
2d21ac55
A
4370
4371 case DIF_SUBR_STRJOIN: {
4372 char *d = (char *)mstate->dtms_scratch_ptr;
4373 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4374 uintptr_t s1 = tupregs[0].dttk_value;
4375 uintptr_t s2 = tupregs[1].dttk_value;
39037602
A
4376 uint64_t i = 0, j = 0;
4377 size_t lim1, lim2;
4378 char c;
b0d623f7 4379
39037602
A
4380 if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) ||
4381 !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) {
fe8ab488 4382 regs[rd] = 0;
b0d623f7
A
4383 break;
4384 }
2d21ac55 4385
b0d623f7 4386 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4387 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4388 regs[rd] = 0;
2d21ac55
A
4389 break;
4390 }
4391
4392 for (;;) {
4393 if (i >= size) {
4394 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4395 regs[rd] = 0;
2d21ac55
A
4396 break;
4397 }
39037602
A
4398 c = (i >= lim1) ? '\0' : dtrace_load8(s1++);
4399 if ((d[i++] = c) == '\0') {
2d21ac55
A
4400 i--;
4401 break;
4402 }
4403 }
4404
4405 for (;;) {
4406 if (i >= size) {
4407 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4408 regs[rd] = 0;
2d21ac55
A
4409 break;
4410 }
39037602
A
4411 c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++);
4412 if ((d[i++] = c) == '\0')
2d21ac55
A
4413 break;
4414 }
4415
4416 if (i < size) {
4417 mstate->dtms_scratch_ptr += i;
4418 regs[rd] = (uintptr_t)d;
4419 }
4420
4421 break;
4422 }
4423
4424 case DIF_SUBR_LLTOSTR: {
4425 int64_t i = (int64_t)tupregs[0].dttk_value;
5ba3f43e
A
4426 uint64_t val, digit;
4427 uint64_t size = 65; /* enough room for 2^64 in binary */
2d21ac55 4428 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
5ba3f43e
A
4429 int base = 10;
4430
4431 if (nargs > 1) {
4432 if ((base = tupregs[1].dttk_value) <= 1 ||
4433 base > ('z' - 'a' + 1) + ('9' - '0' + 1)) {
4434 *flags |= CPU_DTRACE_ILLOP;
4435 break;
4436 }
4437 }
4438
4439 val = (base == 10 && i < 0) ? i * -1 : i;
2d21ac55 4440
b0d623f7 4441 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4442 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4443 regs[rd] = 0;
2d21ac55
A
4444 break;
4445 }
4446
5ba3f43e
A
4447 for (*end-- = '\0'; val; val /= base) {
4448 if ((digit = val % base) <= '9' - '0') {
4449 *end-- = '0' + digit;
4450 } else {
4451 *end-- = 'a' + (digit - ('9' - '0') - 1);
4452 }
4453 }
2d21ac55 4454
5ba3f43e 4455 if (i == 0 && base == 16)
2d21ac55
A
4456 *end-- = '0';
4457
5ba3f43e
A
4458 if (base == 16)
4459 *end-- = 'x';
4460
4461 if (i == 0 || base == 8 || base == 16)
4462 *end-- = '0';
4463
4464 if (i < 0 && base == 10)
2d21ac55
A
4465 *end-- = '-';
4466
4467 regs[rd] = (uintptr_t)end + 1;
4468 mstate->dtms_scratch_ptr += size;
4469 break;
4470 }
4471
b0d623f7
A
4472 case DIF_SUBR_HTONS:
4473 case DIF_SUBR_NTOHS:
4474#ifdef _BIG_ENDIAN
4475 regs[rd] = (uint16_t)tupregs[0].dttk_value;
4476#else
4477 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
4478#endif
4479 break;
4480
4481
4482 case DIF_SUBR_HTONL:
4483 case DIF_SUBR_NTOHL:
4484#ifdef _BIG_ENDIAN
4485 regs[rd] = (uint32_t)tupregs[0].dttk_value;
4486#else
4487 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
4488#endif
4489 break;
4490
4491
4492 case DIF_SUBR_HTONLL:
4493 case DIF_SUBR_NTOHLL:
4494#ifdef _BIG_ENDIAN
4495 regs[rd] = (uint64_t)tupregs[0].dttk_value;
4496#else
4497 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
4498#endif
4499 break;
4500
4501
2d21ac55
A
4502 case DIF_SUBR_DIRNAME:
4503 case DIF_SUBR_BASENAME: {
4504 char *dest = (char *)mstate->dtms_scratch_ptr;
4505 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4506 uintptr_t src = tupregs[0].dttk_value;
4507 int i, j, len = dtrace_strlen((char *)src, size);
4508 int lastbase = -1, firstbase = -1, lastdir = -1;
4509 int start, end;
4510
b0d623f7 4511 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
fe8ab488 4512 regs[rd] = 0;
b0d623f7
A
4513 break;
4514 }
4515
4516 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4517 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4518 regs[rd] = 0;
2d21ac55
A
4519 break;
4520 }
4521
4522 /*
4523 * The basename and dirname for a zero-length string is
4524 * defined to be "."
4525 */
4526 if (len == 0) {
4527 len = 1;
4528 src = (uintptr_t)".";
4529 }
4530
4531 /*
4532 * Start from the back of the string, moving back toward the
4533 * front until we see a character that isn't a slash. That
4534 * character is the last character in the basename.
4535 */
4536 for (i = len - 1; i >= 0; i--) {
4537 if (dtrace_load8(src + i) != '/')
4538 break;
4539 }
4540
4541 if (i >= 0)
4542 lastbase = i;
4543
4544 /*
4545 * Starting from the last character in the basename, move
4546 * towards the front until we find a slash. The character
4547 * that we processed immediately before that is the first
4548 * character in the basename.
4549 */
4550 for (; i >= 0; i--) {
4551 if (dtrace_load8(src + i) == '/')
4552 break;
4553 }
4554
4555 if (i >= 0)
4556 firstbase = i + 1;
4557
4558 /*
4559 * Now keep going until we find a non-slash character. That
4560 * character is the last character in the dirname.
4561 */
4562 for (; i >= 0; i--) {
4563 if (dtrace_load8(src + i) != '/')
4564 break;
4565 }
4566
4567 if (i >= 0)
4568 lastdir = i;
4569
4570 ASSERT(!(lastbase == -1 && firstbase != -1));
4571 ASSERT(!(firstbase == -1 && lastdir != -1));
4572
4573 if (lastbase == -1) {
4574 /*
4575 * We didn't find a non-slash character. We know that
4576 * the length is non-zero, so the whole string must be
4577 * slashes. In either the dirname or the basename
4578 * case, we return '/'.
4579 */
4580 ASSERT(firstbase == -1);
4581 firstbase = lastbase = lastdir = 0;
4582 }
4583
4584 if (firstbase == -1) {
4585 /*
4586 * The entire string consists only of a basename
4587 * component. If we're looking for dirname, we need
4588 * to change our string to be just "."; if we're
4589 * looking for a basename, we'll just set the first
4590 * character of the basename to be 0.
4591 */
4592 if (subr == DIF_SUBR_DIRNAME) {
4593 ASSERT(lastdir == -1);
4594 src = (uintptr_t)".";
4595 lastdir = 0;
4596 } else {
4597 firstbase = 0;
4598 }
4599 }
4600
4601 if (subr == DIF_SUBR_DIRNAME) {
4602 if (lastdir == -1) {
4603 /*
4604 * We know that we have a slash in the name --
4605 * or lastdir would be set to 0, above. And
4606 * because lastdir is -1, we know that this
4607 * slash must be the first character. (That
4608 * is, the full string must be of the form
4609 * "/basename".) In this case, the last
4610 * character of the directory name is 0.
4611 */
4612 lastdir = 0;
4613 }
4614
4615 start = 0;
4616 end = lastdir;
4617 } else {
4618 ASSERT(subr == DIF_SUBR_BASENAME);
4619 ASSERT(firstbase != -1 && lastbase != -1);
4620 start = firstbase;
4621 end = lastbase;
4622 }
4623
b0d623f7
A
4624 for (i = start, j = 0; i <= end && (uint64_t)j < size - 1; i++, j++)
4625 dest[j] = dtrace_load8(src + i);
2d21ac55
A
4626
4627 dest[j] = '\0';
4628 regs[rd] = (uintptr_t)dest;
4629 mstate->dtms_scratch_ptr += size;
4630 break;
4631 }
4632
4633 case DIF_SUBR_CLEANPATH: {
4634 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4635 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4636 uintptr_t src = tupregs[0].dttk_value;
39037602
A
4637 size_t lim;
4638 size_t i = 0, j = 0;
2d21ac55 4639
39037602 4640 if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
fe8ab488 4641 regs[rd] = 0;
b0d623f7
A
4642 break;
4643 }
4644
4645 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4646 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4647 regs[rd] = 0;
2d21ac55
A
4648 break;
4649 }
4650
4651 /*
4652 * Move forward, loading each character.
4653 */
4654 do {
39037602 4655 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55 4656next:
b0d623f7
A
4657 if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */
4658 break;
2d21ac55
A
4659
4660 if (c != '/') {
4661 dest[j++] = c;
4662 continue;
4663 }
4664
39037602 4665 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55
A
4666
4667 if (c == '/') {
4668 /*
4669 * We have two slashes -- we can just advance
4670 * to the next character.
4671 */
4672 goto next;
4673 }
4674
4675 if (c != '.') {
4676 /*
4677 * This is not "." and it's not ".." -- we can
4678 * just store the "/" and this character and
4679 * drive on.
4680 */
4681 dest[j++] = '/';
4682 dest[j++] = c;
4683 continue;
4684 }
4685
39037602 4686 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55
A
4687
4688 if (c == '/') {
4689 /*
4690 * This is a "/./" component. We're not going
4691 * to store anything in the destination buffer;
4692 * we're just going to go to the next component.
4693 */
4694 goto next;
4695 }
4696
4697 if (c != '.') {
4698 /*
4699 * This is not ".." -- we can just store the
4700 * "/." and this character and continue
4701 * processing.
4702 */
4703 dest[j++] = '/';
4704 dest[j++] = '.';
4705 dest[j++] = c;
4706 continue;
4707 }
4708
39037602 4709 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55
A
4710
4711 if (c != '/' && c != '\0') {
4712 /*
4713 * This is not ".." -- it's "..[mumble]".
4714 * We'll store the "/.." and this character
4715 * and continue processing.
4716 */
4717 dest[j++] = '/';
4718 dest[j++] = '.';
4719 dest[j++] = '.';
4720 dest[j++] = c;
4721 continue;
4722 }
4723
4724 /*
4725 * This is "/../" or "/..\0". We need to back up
4726 * our destination pointer until we find a "/".
4727 */
4728 i--;
4729 while (j != 0 && dest[--j] != '/')
4730 continue;
4731
4732 if (c == '\0')
4733 dest[++j] = '/';
4734 } while (c != '\0');
4735
4736 dest[j] = '\0';
4737 regs[rd] = (uintptr_t)dest;
4738 mstate->dtms_scratch_ptr += size;
4739 break;
4740 }
2d21ac55 4741
b0d623f7
A
4742 case DIF_SUBR_INET_NTOA:
4743 case DIF_SUBR_INET_NTOA6:
4744 case DIF_SUBR_INET_NTOP: {
4745 size_t size;
4746 int af, argi, i;
4747 char *base, *end;
2d21ac55 4748
b0d623f7
A
4749 if (subr == DIF_SUBR_INET_NTOP) {
4750 af = (int)tupregs[0].dttk_value;
4751 argi = 1;
4752 } else {
4753 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4754 argi = 0;
2d21ac55
A
4755 }
4756
b0d623f7
A
4757 if (af == AF_INET) {
4758#if !defined(__APPLE__)
4759 ipaddr_t ip4;
4760#else
6d2010ae 4761 uint32_t ip4;
b0d623f7
A
4762#endif /* __APPLE__ */
4763 uint8_t *ptr8, val;
4764
4765 /*
4766 * Safely load the IPv4 address.
4767 */
6d2010ae 4768#if !defined(__APPLE__)
b0d623f7 4769 ip4 = dtrace_load32(tupregs[argi].dttk_value);
6d2010ae 4770#else
39037602
A
4771 if (!dtrace_canload(tupregs[argi].dttk_value, sizeof(ip4),
4772 mstate, vstate)) {
4773 regs[rd] = 0;
4774 break;
4775 }
4776
6d2010ae
A
4777 dtrace_bcopy(
4778 (void *)(uintptr_t)tupregs[argi].dttk_value,
4779 (void *)(uintptr_t)&ip4, sizeof (ip4));
4780#endif /* __APPLE__ */
b0d623f7
A
4781 /*
4782 * Check an IPv4 string will fit in scratch.
4783 */
4784#if !defined(__APPLE__)
4785 size = INET_ADDRSTRLEN;
4786#else
4787 size = MAX_IPv4_STR_LEN;
4788#endif /* __APPLE__ */
4789 if (!DTRACE_INSCRATCH(mstate, size)) {
4790 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4791 regs[rd] = 0;
b0d623f7
A
4792 break;
4793 }
4794 base = (char *)mstate->dtms_scratch_ptr;
4795 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4796
4797 /*
4798 * Stringify as a dotted decimal quad.
4799 */
4800 *end-- = '\0';
4801 ptr8 = (uint8_t *)&ip4;
4802 for (i = 3; i >= 0; i--) {
4803 val = ptr8[i];
4804
4805 if (val == 0) {
4806 *end-- = '0';
4807 } else {
4808 for (; val; val /= 10) {
4809 *end-- = '0' + (val % 10);
4810 }
4811 }
4812
4813 if (i > 0)
4814 *end-- = '.';
4815 }
4816 ASSERT(end + 1 >= base);
4817
4818 } else if (af == AF_INET6) {
4819#if defined(__APPLE__)
4820#define _S6_un __u6_addr
4821#define _S6_u8 __u6_addr8
4822#endif /* __APPLE__ */
4823 struct in6_addr ip6;
4824 int firstzero, tryzero, numzero, v6end;
4825 uint16_t val;
4826 const char digits[] = "0123456789abcdef";
4827
4828 /*
4829 * Stringify using RFC 1884 convention 2 - 16 bit
4830 * hexadecimal values with a zero-run compression.
4831 * Lower case hexadecimal digits are used.
4832 * eg, fe80::214:4fff:fe0b:76c8.
4833 * The IPv4 embedded form is returned for inet_ntop,
4834 * just the IPv4 string is returned for inet_ntoa6.
4835 */
4836
39037602
A
4837 if (!dtrace_canload(tupregs[argi].dttk_value,
4838 sizeof(struct in6_addr), mstate, vstate)) {
4839 regs[rd] = 0;
4840 break;
4841 }
4842
b0d623f7
A
4843 /*
4844 * Safely load the IPv6 address.
4845 */
4846 dtrace_bcopy(
4847 (void *)(uintptr_t)tupregs[argi].dttk_value,
4848 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
4849
4850 /*
4851 * Check an IPv6 string will fit in scratch.
4852 */
4853 size = INET6_ADDRSTRLEN;
4854 if (!DTRACE_INSCRATCH(mstate, size)) {
4855 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4856 regs[rd] = 0;
b0d623f7
A
4857 break;
4858 }
4859 base = (char *)mstate->dtms_scratch_ptr;
4860 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4861 *end-- = '\0';
4862
4863 /*
4864 * Find the longest run of 16 bit zero values
4865 * for the single allowed zero compression - "::".
4866 */
4867 firstzero = -1;
4868 tryzero = -1;
4869 numzero = 1;
b0d623f7 4870 for (i = 0; i < (int)sizeof (struct in6_addr); i++) {
b0d623f7
A
4871 if (ip6._S6_un._S6_u8[i] == 0 &&
4872 tryzero == -1 && i % 2 == 0) {
4873 tryzero = i;
4874 continue;
4875 }
4876
4877 if (tryzero != -1 &&
4878 (ip6._S6_un._S6_u8[i] != 0 ||
4879 i == sizeof (struct in6_addr) - 1)) {
4880
4881 if (i - tryzero <= numzero) {
4882 tryzero = -1;
4883 continue;
4884 }
4885
4886 firstzero = tryzero;
4887 numzero = i - i % 2 - tryzero;
4888 tryzero = -1;
4889
4890 if (ip6._S6_un._S6_u8[i] == 0 &&
4891 i == sizeof (struct in6_addr) - 1)
4892 numzero += 2;
4893 }
4894 }
b0d623f7 4895 ASSERT(firstzero + numzero <= (int)sizeof (struct in6_addr));
b0d623f7
A
4896
4897 /*
4898 * Check for an IPv4 embedded address.
4899 */
4900 v6end = sizeof (struct in6_addr) - 2;
4901 if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
4902 IN6_IS_ADDR_V4COMPAT(&ip6)) {
b0d623f7
A
4903 for (i = sizeof (struct in6_addr) - 1;
4904 i >= (int)DTRACE_V4MAPPED_OFFSET; i--) {
b0d623f7
A
4905 ASSERT(end >= base);
4906
4907 val = ip6._S6_un._S6_u8[i];
4908
4909 if (val == 0) {
4910 *end-- = '0';
4911 } else {
4912 for (; val; val /= 10) {
4913 *end-- = '0' + val % 10;
4914 }
4915 }
4916
b0d623f7
A
4917 if (i > (int)DTRACE_V4MAPPED_OFFSET)
4918 *end-- = '.';
b0d623f7
A
4919 }
4920
4921 if (subr == DIF_SUBR_INET_NTOA6)
4922 goto inetout;
4923
4924 /*
4925 * Set v6end to skip the IPv4 address that
4926 * we have already stringified.
4927 */
4928 v6end = 10;
4929 }
4930
4931 /*
4932 * Build the IPv6 string by working through the
4933 * address in reverse.
4934 */
4935 for (i = v6end; i >= 0; i -= 2) {
4936 ASSERT(end >= base);
4937
4938 if (i == firstzero + numzero - 2) {
4939 *end-- = ':';
4940 *end-- = ':';
4941 i -= numzero - 2;
4942 continue;
4943 }
4944
4945 if (i < 14 && i != firstzero - 2)
4946 *end-- = ':';
4947
4948 val = (ip6._S6_un._S6_u8[i] << 8) +
4949 ip6._S6_un._S6_u8[i + 1];
4950
4951 if (val == 0) {
4952 *end-- = '0';
4953 } else {
4954 for (; val; val /= 16) {
4955 *end-- = digits[val % 16];
4956 }
4957 }
4958 }
4959 ASSERT(end + 1 >= base);
4960
4961#if defined(__APPLE__)
4962#undef _S6_un
4963#undef _S6_u8
4964#endif /* __APPLE__ */
4965 } else {
4966 /*
4967 * The user didn't use AH_INET or AH_INET6.
4968 */
4969 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
fe8ab488 4970 regs[rd] = 0;
b0d623f7
A
4971 break;
4972 }
4973
4974inetout: regs[rd] = (uintptr_t)end + 1;
4975 mstate->dtms_scratch_ptr += size;
4976 break;
4977 }
b0d623f7 4978
fe8ab488
A
4979 case DIF_SUBR_TOUPPER:
4980 case DIF_SUBR_TOLOWER: {
4981 uintptr_t src = tupregs[0].dttk_value;
4982 char *dest = (char *)mstate->dtms_scratch_ptr;
4983 char lower, upper, base, c;
4984 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4985 size_t len = dtrace_strlen((char*) src, size);
4986 size_t i = 0;
4987
4988 lower = (subr == DIF_SUBR_TOUPPER) ? 'a' : 'A';
4989 upper = (subr == DIF_SUBR_TOUPPER) ? 'z' : 'Z';
4990 base = (subr == DIF_SUBR_TOUPPER) ? 'A' : 'a';
4991
4992 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
4993 regs[rd] = 0;
4994 break;
4995 }
4996
4997 if (!DTRACE_INSCRATCH(mstate, size)) {
4998 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4999 regs[rd] = 0;
5000 break;
5001 }
5002
5003 for (i = 0; i < size - 1; ++i) {
5004 if ((c = dtrace_load8(src + i)) == '\0')
5005 break;
5006 if (c >= lower && c <= upper)
5007 c = base + (c - lower);
5008 dest[i] = c;
5009 }
5010
5011 ASSERT(i < size);
5012
5013 dest[i] = '\0';
5014 regs[rd] = (uintptr_t) dest;
5015 mstate->dtms_scratch_ptr += size;
5016
5017 break;
5018 }
5019
39037602 5020#if defined(__APPLE__)
3e170ce0
A
5021 case DIF_SUBR_VM_KERNEL_ADDRPERM: {
5022 if (!dtrace_priv_kernel(state)) {
5023 regs[rd] = 0;
5024 } else {
5025 regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value);
5026 }
5027
5028 break;
5029 }
39037602
A
5030
5031 case DIF_SUBR_KDEBUG_TRACE: {
5032 uint32_t debugid;
5033 uintptr_t args[4] = {0};
5034 int i;
5035
5036 if (nargs < 2 || nargs > 5) {
5037 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5038 break;
b0d623f7 5039 }
b0d623f7 5040
39037602
A
5041 if (dtrace_destructive_disallow)
5042 return;
5043
5044 debugid = tupregs[0].dttk_value;
5045 for (i = 0; i < nargs - 1; i++)
5046 args[i] = tupregs[i + 1].dttk_value;
5047
5048 kernel_debug(debugid, args[0], args[1], args[2], args[3], 0);
5049
5050 break;
5051 }
5052
5053 case DIF_SUBR_KDEBUG_TRACE_STRING: {
5054 if (nargs != 3) {
5055 break;
b0d623f7
A
5056 }
5057
39037602
A
5058 if (dtrace_destructive_disallow)
5059 return;
5060
5061 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
5062 uint32_t debugid = tupregs[0].dttk_value;
5063 uint64_t str_id = tupregs[1].dttk_value;
5064 uintptr_t src = tupregs[2].dttk_value;
5065 size_t lim;
5066 char buf[size];
5067 char* str = NULL;
5068
5069 if (src != (uintptr_t)0) {
5070 str = buf;
5071 if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
5072 break;
5073 }
5074 dtrace_strcpy((void*)src, buf, size);
2d21ac55 5075 }
b0d623f7 5076
39037602
A
5077 (void)kernel_debug_string(debugid, &str_id, str);
5078 regs[rd] = str_id;
5079
2d21ac55
A
5080 break;
5081 }
39037602
A
5082#endif
5083
2d21ac55
A
5084 }
5085}
5086
5087/*
5088 * Emulate the execution of DTrace IR instructions specified by the given
5089 * DIF object. This function is deliberately void of assertions as all of
5090 * the necessary checks are handled by a call to dtrace_difo_validate().
5091 */
5092static uint64_t
5093dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
5094 dtrace_vstate_t *vstate, dtrace_state_t *state)
5095{
5096 const dif_instr_t *text = difo->dtdo_buf;
5097 const uint_t textlen = difo->dtdo_len;
5098 const char *strtab = difo->dtdo_strtab;
5099 const uint64_t *inttab = difo->dtdo_inttab;
5100
5101 uint64_t rval = 0;
5102 dtrace_statvar_t *svar;
5103 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
5104 dtrace_difv_t *v;
5105 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
2d21ac55 5106 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
2d21ac55
A
5107
5108 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
5109 uint64_t regs[DIF_DIR_NREGS];
5110 uint64_t *tmp;
5111
5112 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
5113 int64_t cc_r;
b0d623f7 5114 uint_t pc = 0, id, opc = 0;
2d21ac55
A
5115 uint8_t ttop = 0;
5116 dif_instr_t instr;
5117 uint_t r1, r2, rd;
5118
b0d623f7
A
5119 /*
5120 * We stash the current DIF object into the machine state: we need it
5121 * for subsequent access checking.
5122 */
5123 mstate->dtms_difo = difo;
5124
2d21ac55
A
5125 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
5126
5127 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
5128 opc = pc;
5129
5130 instr = text[pc++];
5131 r1 = DIF_INSTR_R1(instr);
5132 r2 = DIF_INSTR_R2(instr);
5133 rd = DIF_INSTR_RD(instr);
5134
5135 switch (DIF_INSTR_OP(instr)) {
5136 case DIF_OP_OR:
5137 regs[rd] = regs[r1] | regs[r2];
5138 break;
5139 case DIF_OP_XOR:
5140 regs[rd] = regs[r1] ^ regs[r2];
5141 break;
5142 case DIF_OP_AND:
5143 regs[rd] = regs[r1] & regs[r2];
5144 break;
5145 case DIF_OP_SLL:
5146 regs[rd] = regs[r1] << regs[r2];
5147 break;
5148 case DIF_OP_SRL:
5149 regs[rd] = regs[r1] >> regs[r2];
5150 break;
5151 case DIF_OP_SUB:
5152 regs[rd] = regs[r1] - regs[r2];
5153 break;
5154 case DIF_OP_ADD:
5155 regs[rd] = regs[r1] + regs[r2];
5156 break;
5157 case DIF_OP_MUL:
5158 regs[rd] = regs[r1] * regs[r2];
5159 break;
5160 case DIF_OP_SDIV:
5161 if (regs[r2] == 0) {
5162 regs[rd] = 0;
5163 *flags |= CPU_DTRACE_DIVZERO;
5164 } else {
5165 regs[rd] = (int64_t)regs[r1] /
5166 (int64_t)regs[r2];
5167 }
5168 break;
5169
5170 case DIF_OP_UDIV:
5171 if (regs[r2] == 0) {
5172 regs[rd] = 0;
5173 *flags |= CPU_DTRACE_DIVZERO;
5174 } else {
5175 regs[rd] = regs[r1] / regs[r2];
5176 }
5177 break;
5178
5179 case DIF_OP_SREM:
5180 if (regs[r2] == 0) {
5181 regs[rd] = 0;
5182 *flags |= CPU_DTRACE_DIVZERO;
5183 } else {
5184 regs[rd] = (int64_t)regs[r1] %
5185 (int64_t)regs[r2];
5186 }
5187 break;
5188
5189 case DIF_OP_UREM:
5190 if (regs[r2] == 0) {
5191 regs[rd] = 0;
5192 *flags |= CPU_DTRACE_DIVZERO;
5193 } else {
5194 regs[rd] = regs[r1] % regs[r2];
5195 }
5196 break;
5197
5198 case DIF_OP_NOT:
5199 regs[rd] = ~regs[r1];
5200 break;
5201 case DIF_OP_MOV:
5202 regs[rd] = regs[r1];
5203 break;
5204 case DIF_OP_CMP:
5205 cc_r = regs[r1] - regs[r2];
5206 cc_n = cc_r < 0;
5207 cc_z = cc_r == 0;
5208 cc_v = 0;
5209 cc_c = regs[r1] < regs[r2];
5210 break;
5211 case DIF_OP_TST:
5212 cc_n = cc_v = cc_c = 0;
5213 cc_z = regs[r1] == 0;
5214 break;
5215 case DIF_OP_BA:
5216 pc = DIF_INSTR_LABEL(instr);
5217 break;
5218 case DIF_OP_BE:
5219 if (cc_z)
5220 pc = DIF_INSTR_LABEL(instr);
5221 break;
5222 case DIF_OP_BNE:
5223 if (cc_z == 0)
5224 pc = DIF_INSTR_LABEL(instr);
5225 break;
5226 case DIF_OP_BG:
5227 if ((cc_z | (cc_n ^ cc_v)) == 0)
5228 pc = DIF_INSTR_LABEL(instr);
5229 break;
5230 case DIF_OP_BGU:
5231 if ((cc_c | cc_z) == 0)
5232 pc = DIF_INSTR_LABEL(instr);
5233 break;
5234 case DIF_OP_BGE:
5235 if ((cc_n ^ cc_v) == 0)
5236 pc = DIF_INSTR_LABEL(instr);
5237 break;
5238 case DIF_OP_BGEU:
5239 if (cc_c == 0)
5240 pc = DIF_INSTR_LABEL(instr);
5241 break;
5242 case DIF_OP_BL:
5243 if (cc_n ^ cc_v)
5244 pc = DIF_INSTR_LABEL(instr);
5245 break;
5246 case DIF_OP_BLU:
5247 if (cc_c)
5248 pc = DIF_INSTR_LABEL(instr);
5249 break;
5250 case DIF_OP_BLE:
5251 if (cc_z | (cc_n ^ cc_v))
5252 pc = DIF_INSTR_LABEL(instr);
5253 break;
5254 case DIF_OP_BLEU:
5255 if (cc_c | cc_z)
5256 pc = DIF_INSTR_LABEL(instr);
5257 break;
5258 case DIF_OP_RLDSB:
5259 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
5260 *flags |= CPU_DTRACE_KPRIV;
5261 *illval = regs[r1];
5262 break;
5263 }
5264 /*FALLTHROUGH*/
5265 case DIF_OP_LDSB:
5266 regs[rd] = (int8_t)dtrace_load8(regs[r1]);
5267 break;
5268 case DIF_OP_RLDSH:
5269 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
5270 *flags |= CPU_DTRACE_KPRIV;
5271 *illval = regs[r1];
5272 break;
5273 }
5274 /*FALLTHROUGH*/
5275 case DIF_OP_LDSH:
5276 regs[rd] = (int16_t)dtrace_load16(regs[r1]);
5277 break;
5278 case DIF_OP_RLDSW:
5279 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
5280 *flags |= CPU_DTRACE_KPRIV;
5281 *illval = regs[r1];
5282 break;
5283 }
5284 /*FALLTHROUGH*/
5285 case DIF_OP_LDSW:
5286 regs[rd] = (int32_t)dtrace_load32(regs[r1]);
5287 break;
5288 case DIF_OP_RLDUB:
5289 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
5290 *flags |= CPU_DTRACE_KPRIV;
5291 *illval = regs[r1];
5292 break;
5293 }
5294 /*FALLTHROUGH*/
5295 case DIF_OP_LDUB:
5296 regs[rd] = dtrace_load8(regs[r1]);
5297 break;
5298 case DIF_OP_RLDUH:
5299 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
5300 *flags |= CPU_DTRACE_KPRIV;
5301 *illval = regs[r1];
5302 break;
5303 }
5304 /*FALLTHROUGH*/
5305 case DIF_OP_LDUH:
5306 regs[rd] = dtrace_load16(regs[r1]);
5307 break;
5308 case DIF_OP_RLDUW:
5309 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
5310 *flags |= CPU_DTRACE_KPRIV;
5311 *illval = regs[r1];
5312 break;
5313 }
5314 /*FALLTHROUGH*/
5315 case DIF_OP_LDUW:
5316 regs[rd] = dtrace_load32(regs[r1]);
5317 break;
5318 case DIF_OP_RLDX:
5319 if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
5320 *flags |= CPU_DTRACE_KPRIV;
5321 *illval = regs[r1];
5322 break;
5323 }
5324 /*FALLTHROUGH*/
5325 case DIF_OP_LDX:
5326 regs[rd] = dtrace_load64(regs[r1]);
5327 break;
fe8ab488
A
5328/*
5329 * Darwin 32-bit kernel may fetch from 64-bit user.
5330 * Do not cast regs to uintptr_t
5331 * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB
5332 * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX
5333 */
2d21ac55
A
5334 case DIF_OP_ULDSB:
5335 regs[rd] = (int8_t)
5336 dtrace_fuword8(regs[r1]);
5337 break;
5338 case DIF_OP_ULDSH:
5339 regs[rd] = (int16_t)
5340 dtrace_fuword16(regs[r1]);
5341 break;
5342 case DIF_OP_ULDSW:
5343 regs[rd] = (int32_t)
5344 dtrace_fuword32(regs[r1]);
5345 break;
5346 case DIF_OP_ULDUB:
5347 regs[rd] =
5348 dtrace_fuword8(regs[r1]);
5349 break;
5350 case DIF_OP_ULDUH:
5351 regs[rd] =
5352 dtrace_fuword16(regs[r1]);
5353 break;
5354 case DIF_OP_ULDUW:
5355 regs[rd] =
5356 dtrace_fuword32(regs[r1]);
5357 break;
5358 case DIF_OP_ULDX:
5359 regs[rd] =
5360 dtrace_fuword64(regs[r1]);
5361 break;
5362 case DIF_OP_RET:
5363 rval = regs[rd];
b0d623f7 5364 pc = textlen;
2d21ac55
A
5365 break;
5366 case DIF_OP_NOP:
5367 break;
5368 case DIF_OP_SETX:
5369 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
5370 break;
5371 case DIF_OP_SETS:
5372 regs[rd] = (uint64_t)(uintptr_t)
5373 (strtab + DIF_INSTR_STRING(instr));
5374 break;
b0d623f7
A
5375 case DIF_OP_SCMP: {
5376 size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
5377 uintptr_t s1 = regs[r1];
5378 uintptr_t s2 = regs[r2];
39037602 5379 size_t lim1 = sz, lim2 = sz;
b0d623f7 5380
fe8ab488 5381 if (s1 != 0 &&
39037602 5382 !dtrace_strcanload(s1, sz, &lim1, mstate, vstate))
b0d623f7 5383 break;
fe8ab488 5384 if (s2 != 0 &&
39037602 5385 !dtrace_strcanload(s2, sz, &lim2, mstate, vstate))
b0d623f7
A
5386 break;
5387
39037602
A
5388 cc_r = dtrace_strncmp((char *)s1, (char *)s2,
5389 MIN(lim1, lim2));
2d21ac55
A
5390
5391 cc_n = cc_r < 0;
5392 cc_z = cc_r == 0;
5393 cc_v = cc_c = 0;
5394 break;
b0d623f7 5395 }
2d21ac55
A
5396 case DIF_OP_LDGA:
5397 regs[rd] = dtrace_dif_variable(mstate, state,
5398 r1, regs[r2]);
5399 break;
5400 case DIF_OP_LDGS:
5401 id = DIF_INSTR_VAR(instr);
5402
5403 if (id >= DIF_VAR_OTHER_UBASE) {
5404 uintptr_t a;
5405
5406 id -= DIF_VAR_OTHER_UBASE;
5407 svar = vstate->dtvs_globals[id];
5408 ASSERT(svar != NULL);
5409 v = &svar->dtsv_var;
5410
5411 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
5412 regs[rd] = svar->dtsv_data;
5413 break;
5414 }
5415
5416 a = (uintptr_t)svar->dtsv_data;
5417
5418 if (*(uint8_t *)a == UINT8_MAX) {
5419 /*
5420 * If the 0th byte is set to UINT8_MAX
5421 * then this is to be treated as a
5422 * reference to a NULL variable.
5423 */
fe8ab488 5424 regs[rd] = 0;
2d21ac55
A
5425 } else {
5426 regs[rd] = a + sizeof (uint64_t);
5427 }
5428
5429 break;
5430 }
5431
5432 regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
5433 break;
5434
5435 case DIF_OP_STGS:
5436 id = DIF_INSTR_VAR(instr);
5437
5438 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5439 id -= DIF_VAR_OTHER_UBASE;
5440
39037602 5441 VERIFY(id < (uint_t)vstate->dtvs_nglobals);
2d21ac55
A
5442 svar = vstate->dtvs_globals[id];
5443 ASSERT(svar != NULL);
5444 v = &svar->dtsv_var;
5445
5446 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5447 uintptr_t a = (uintptr_t)svar->dtsv_data;
39037602 5448 size_t lim;
2d21ac55 5449
fe8ab488 5450 ASSERT(a != 0);
2d21ac55
A
5451 ASSERT(svar->dtsv_size != 0);
5452
fe8ab488 5453 if (regs[rd] == 0) {
2d21ac55
A
5454 *(uint8_t *)a = UINT8_MAX;
5455 break;
5456 } else {
5457 *(uint8_t *)a = 0;
5458 a += sizeof (uint64_t);
5459 }
b0d623f7
A
5460 if (!dtrace_vcanload(
5461 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
39037602 5462 &lim, mstate, vstate))
b0d623f7 5463 break;
2d21ac55
A
5464
5465 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5466 (void *)a, &v->dtdv_type, lim);
2d21ac55
A
5467 break;
5468 }
5469
5470 svar->dtsv_data = regs[rd];
5471 break;
5472
5473 case DIF_OP_LDTA:
5474 /*
5475 * There are no DTrace built-in thread-local arrays at
5476 * present. This opcode is saved for future work.
5477 */
5478 *flags |= CPU_DTRACE_ILLOP;
5479 regs[rd] = 0;
5480 break;
5481
5482 case DIF_OP_LDLS:
5483 id = DIF_INSTR_VAR(instr);
5484
5485 if (id < DIF_VAR_OTHER_UBASE) {
5486 /*
5487 * For now, this has no meaning.
5488 */
5489 regs[rd] = 0;
5490 break;
5491 }
5492
5493 id -= DIF_VAR_OTHER_UBASE;
5494
b0d623f7 5495 ASSERT(id < (uint_t)vstate->dtvs_nlocals);
2d21ac55 5496 ASSERT(vstate->dtvs_locals != NULL);
2d21ac55
A
5497 svar = vstate->dtvs_locals[id];
5498 ASSERT(svar != NULL);
5499 v = &svar->dtsv_var;
5500
5501 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5502 uintptr_t a = (uintptr_t)svar->dtsv_data;
5503 size_t sz = v->dtdv_type.dtdt_size;
5504
5505 sz += sizeof (uint64_t);
c910b4d9 5506 ASSERT(svar->dtsv_size == (int)NCPU * sz);
2d21ac55
A
5507 a += CPU->cpu_id * sz;
5508
5509 if (*(uint8_t *)a == UINT8_MAX) {
5510 /*
5511 * If the 0th byte is set to UINT8_MAX
5512 * then this is to be treated as a
5513 * reference to a NULL variable.
5514 */
fe8ab488 5515 regs[rd] = 0;
2d21ac55
A
5516 } else {
5517 regs[rd] = a + sizeof (uint64_t);
5518 }
5519
5520 break;
5521 }
5522
c910b4d9 5523 ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t));
2d21ac55
A
5524 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5525 regs[rd] = tmp[CPU->cpu_id];
5526 break;
5527
5528 case DIF_OP_STLS:
5529 id = DIF_INSTR_VAR(instr);
5530
5531 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5532 id -= DIF_VAR_OTHER_UBASE;
39037602 5533 VERIFY(id < (uint_t)vstate->dtvs_nlocals);
2d21ac55
A
5534 ASSERT(vstate->dtvs_locals != NULL);
5535 svar = vstate->dtvs_locals[id];
5536 ASSERT(svar != NULL);
5537 v = &svar->dtsv_var;
5538
5539 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5540 uintptr_t a = (uintptr_t)svar->dtsv_data;
5541 size_t sz = v->dtdv_type.dtdt_size;
39037602 5542 size_t lim;
2d21ac55
A
5543
5544 sz += sizeof (uint64_t);
c910b4d9 5545 ASSERT(svar->dtsv_size == (int)NCPU * sz);
2d21ac55
A
5546 a += CPU->cpu_id * sz;
5547
fe8ab488 5548 if (regs[rd] == 0) {
2d21ac55
A
5549 *(uint8_t *)a = UINT8_MAX;
5550 break;
5551 } else {
5552 *(uint8_t *)a = 0;
5553 a += sizeof (uint64_t);
5554 }
5555
b0d623f7
A
5556 if (!dtrace_vcanload(
5557 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
39037602 5558 &lim, mstate, vstate))
b0d623f7
A
5559 break;
5560
2d21ac55 5561 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5562 (void *)a, &v->dtdv_type, lim);
2d21ac55
A
5563 break;
5564 }
5565
c910b4d9 5566 ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t));
2d21ac55
A
5567 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5568 tmp[CPU->cpu_id] = regs[rd];
5569 break;
5570
5571 case DIF_OP_LDTS: {
5572 dtrace_dynvar_t *dvar;
5573 dtrace_key_t *key;
5574
5575 id = DIF_INSTR_VAR(instr);
5576 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5577 id -= DIF_VAR_OTHER_UBASE;
5578 v = &vstate->dtvs_tlocals[id];
5579
5580 key = &tupregs[DIF_DTR_NREGS];
5581 key[0].dttk_value = (uint64_t)id;
5582 key[0].dttk_size = 0;
5583 DTRACE_TLS_THRKEY(key[1].dttk_value);
5584 key[1].dttk_size = 0;
5585
5586 dvar = dtrace_dynvar(dstate, 2, key,
b0d623f7
A
5587 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
5588 mstate, vstate);
2d21ac55
A
5589
5590 if (dvar == NULL) {
5591 regs[rd] = 0;
5592 break;
5593 }
5594
5595 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5596 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5597 } else {
5598 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5599 }
5600
5601 break;
5602 }
5603
5604 case DIF_OP_STTS: {
5605 dtrace_dynvar_t *dvar;
5606 dtrace_key_t *key;
5607
5608 id = DIF_INSTR_VAR(instr);
5609 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5610 id -= DIF_VAR_OTHER_UBASE;
39037602 5611 VERIFY(id < (uint_t)vstate->dtvs_ntlocals);
2d21ac55
A
5612
5613 key = &tupregs[DIF_DTR_NREGS];
5614 key[0].dttk_value = (uint64_t)id;
5615 key[0].dttk_size = 0;
5616 DTRACE_TLS_THRKEY(key[1].dttk_value);
5617 key[1].dttk_size = 0;
5618 v = &vstate->dtvs_tlocals[id];
5619
5620 dvar = dtrace_dynvar(dstate, 2, key,
5621 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5622 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5623 regs[rd] ? DTRACE_DYNVAR_ALLOC :
b0d623f7 5624 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
2d21ac55
A
5625
5626 /*
5627 * Given that we're storing to thread-local data,
5628 * we need to flush our predicate cache.
5629 */
2d21ac55 5630 dtrace_set_thread_predcache(current_thread(), 0);
2d21ac55 5631
2d21ac55
A
5632 if (dvar == NULL)
5633 break;
5634
5635 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
39037602
A
5636 size_t lim;
5637
b0d623f7
A
5638 if (!dtrace_vcanload(
5639 (void *)(uintptr_t)regs[rd],
39037602 5640 &v->dtdv_type, &lim, mstate, vstate))
b0d623f7
A
5641 break;
5642
2d21ac55 5643 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5644 dvar->dtdv_data, &v->dtdv_type, lim);
2d21ac55
A
5645 } else {
5646 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5647 }
5648
5649 break;
5650 }
5651
5652 case DIF_OP_SRA:
5653 regs[rd] = (int64_t)regs[r1] >> regs[r2];
5654 break;
5655
5656 case DIF_OP_CALL:
5657 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
5658 regs, tupregs, ttop, mstate, state);
5659 break;
5660
5661 case DIF_OP_PUSHTR:
5662 if (ttop == DIF_DTR_NREGS) {
5663 *flags |= CPU_DTRACE_TUPOFLOW;
5664 break;
5665 }
5666
5667 if (r1 == DIF_TYPE_STRING) {
5668 /*
5669 * If this is a string type and the size is 0,
5670 * we'll use the system-wide default string
5671 * size. Note that we are _not_ looking at
5672 * the value of the DTRACEOPT_STRSIZE option;
5673 * had this been set, we would expect to have
5674 * a non-zero size value in the "pushtr".
5675 */
5676 tupregs[ttop].dttk_size =
5677 dtrace_strlen((char *)(uintptr_t)regs[rd],
5678 regs[r2] ? regs[r2] :
5679 dtrace_strsize_default) + 1;
5680 } else {
ecc0ceb4
A
5681 if (regs[r2] > LONG_MAX) {
5682 *flags |= CPU_DTRACE_ILLOP;
5683 break;
5684 }
2d21ac55
A
5685 tupregs[ttop].dttk_size = regs[r2];
5686 }
5687
5688 tupregs[ttop++].dttk_value = regs[rd];
5689 break;
5690
5691 case DIF_OP_PUSHTV:
5692 if (ttop == DIF_DTR_NREGS) {
5693 *flags |= CPU_DTRACE_TUPOFLOW;
5694 break;
5695 }
5696
5697 tupregs[ttop].dttk_value = regs[rd];
5698 tupregs[ttop++].dttk_size = 0;
5699 break;
5700
5701 case DIF_OP_POPTS:
5702 if (ttop != 0)
5703 ttop--;
5704 break;
5705
5706 case DIF_OP_FLUSHTS:
5707 ttop = 0;
5708 break;
5709
5710 case DIF_OP_LDGAA:
5711 case DIF_OP_LDTAA: {
5712 dtrace_dynvar_t *dvar;
5713 dtrace_key_t *key = tupregs;
5714 uint_t nkeys = ttop;
5715
5716 id = DIF_INSTR_VAR(instr);
5717 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5718 id -= DIF_VAR_OTHER_UBASE;
5719
5720 key[nkeys].dttk_value = (uint64_t)id;
5721 key[nkeys++].dttk_size = 0;
5722
5723 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
5724 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5725 key[nkeys++].dttk_size = 0;
39037602 5726 VERIFY(id < (uint_t)vstate->dtvs_ntlocals);
2d21ac55
A
5727 v = &vstate->dtvs_tlocals[id];
5728 } else {
39037602 5729 VERIFY(id < (uint_t)vstate->dtvs_nglobals);
2d21ac55
A
5730 v = &vstate->dtvs_globals[id]->dtsv_var;
5731 }
5732
5733 dvar = dtrace_dynvar(dstate, nkeys, key,
5734 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5735 v->dtdv_type.dtdt_size : sizeof (uint64_t),
b0d623f7 5736 DTRACE_DYNVAR_NOALLOC, mstate, vstate);
2d21ac55
A
5737
5738 if (dvar == NULL) {
5739 regs[rd] = 0;
5740 break;
5741 }
5742
5743 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5744 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5745 } else {
5746 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5747 }
5748
5749 break;
5750 }
5751
5752 case DIF_OP_STGAA:
5753 case DIF_OP_STTAA: {
5754 dtrace_dynvar_t *dvar;
5755 dtrace_key_t *key = tupregs;
5756 uint_t nkeys = ttop;
5757
5758 id = DIF_INSTR_VAR(instr);
5759 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5760 id -= DIF_VAR_OTHER_UBASE;
5761
5762 key[nkeys].dttk_value = (uint64_t)id;
5763 key[nkeys++].dttk_size = 0;
5764
5765 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
5766 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5767 key[nkeys++].dttk_size = 0;
39037602 5768 VERIFY(id < (uint_t)vstate->dtvs_ntlocals);
2d21ac55
A
5769 v = &vstate->dtvs_tlocals[id];
5770 } else {
39037602 5771 VERIFY(id < (uint_t)vstate->dtvs_nglobals);
2d21ac55
A
5772 v = &vstate->dtvs_globals[id]->dtsv_var;
5773 }
5774
5775 dvar = dtrace_dynvar(dstate, nkeys, key,
5776 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5777 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5778 regs[rd] ? DTRACE_DYNVAR_ALLOC :
b0d623f7 5779 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
2d21ac55
A
5780
5781 if (dvar == NULL)
5782 break;
5783
5784 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
39037602
A
5785 size_t lim;
5786
b0d623f7
A
5787 if (!dtrace_vcanload(
5788 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
39037602 5789 &lim, mstate, vstate))
b0d623f7
A
5790 break;
5791
2d21ac55 5792 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5793 dvar->dtdv_data, &v->dtdv_type, lim);
2d21ac55
A
5794 } else {
5795 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5796 }
5797
5798 break;
5799 }
5800
5801 case DIF_OP_ALLOCS: {
5802 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5803 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
5804
b0d623f7
A
5805 /*
5806 * Rounding up the user allocation size could have
5807 * overflowed large, bogus allocations (like -1ULL) to
5808 * 0.
5809 */
5810 if (size < regs[r1] ||
5811 !DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 5812 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 5813 regs[rd] = 0;
b0d623f7
A
5814 break;
5815 }
5816
5817 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
2d21ac55
A
5818 mstate->dtms_scratch_ptr += size;
5819 regs[rd] = ptr;
2d21ac55
A
5820 break;
5821 }
5822
5823 case DIF_OP_COPYS:
5824 if (!dtrace_canstore(regs[rd], regs[r2],
5825 mstate, vstate)) {
5826 *flags |= CPU_DTRACE_BADADDR;
5827 *illval = regs[rd];
5828 break;
5829 }
5830
b0d623f7
A
5831 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
5832 break;
5833
2d21ac55
A
5834 dtrace_bcopy((void *)(uintptr_t)regs[r1],
5835 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
5836 break;
5837
5838 case DIF_OP_STB:
5839 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
5840 *flags |= CPU_DTRACE_BADADDR;
5841 *illval = regs[rd];
5842 break;
5843 }
5844 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
5845 break;
5846
5847 case DIF_OP_STH:
5848 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
5849 *flags |= CPU_DTRACE_BADADDR;
5850 *illval = regs[rd];
5851 break;
5852 }
5853 if (regs[rd] & 1) {
5854 *flags |= CPU_DTRACE_BADALIGN;
5855 *illval = regs[rd];
5856 break;
5857 }
5858 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
5859 break;
5860
5861 case DIF_OP_STW:
5862 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
5863 *flags |= CPU_DTRACE_BADADDR;
5864 *illval = regs[rd];
5865 break;
5866 }
5867 if (regs[rd] & 3) {
5868 *flags |= CPU_DTRACE_BADALIGN;
5869 *illval = regs[rd];
5870 break;
5871 }
5872 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
5873 break;
5874
5875 case DIF_OP_STX:
5876 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
5877 *flags |= CPU_DTRACE_BADADDR;
5878 *illval = regs[rd];
5879 break;
5880 }
fe8ab488
A
5881
5882 /*
5883 * Darwin kmem_zalloc() called from
5884 * dtrace_difo_init() is 4-byte aligned.
5885 */
5886 if (regs[rd] & 3) {
2d21ac55
A
5887 *flags |= CPU_DTRACE_BADALIGN;
5888 *illval = regs[rd];
5889 break;
5890 }
5891 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
5892 break;
5893 }
5894 }
5895
5896 if (!(*flags & CPU_DTRACE_FAULT))
5897 return (rval);
5898
5899 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
5900 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
5901
5902 return (0);
5903}
5904
5905static void
5906dtrace_action_breakpoint(dtrace_ecb_t *ecb)
5907{
5908 dtrace_probe_t *probe = ecb->dte_probe;
5909 dtrace_provider_t *prov = probe->dtpr_provider;
5910 char c[DTRACE_FULLNAMELEN + 80], *str;
b0d623f7
A
5911 const char *msg = "dtrace: breakpoint action at probe ";
5912 const char *ecbmsg = " (ecb ";
2d21ac55
A
5913 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
5914 uintptr_t val = (uintptr_t)ecb;
5915 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
5916
5917 if (dtrace_destructive_disallow)
5918 return;
5919
5920 /*
5921 * It's impossible to be taking action on the NULL probe.
5922 */
5923 ASSERT(probe != NULL);
5924
5925 /*
5926 * This is a poor man's (destitute man's?) sprintf(): we want to
5927 * print the provider name, module name, function name and name of
5928 * the probe, along with the hex address of the ECB with the breakpoint
5929 * action -- all of which we must place in the character buffer by
5930 * hand.
5931 */
5932 while (*msg != '\0')
5933 c[i++] = *msg++;
5934
5935 for (str = prov->dtpv_name; *str != '\0'; str++)
5936 c[i++] = *str;
5937 c[i++] = ':';
5938
5939 for (str = probe->dtpr_mod; *str != '\0'; str++)
5940 c[i++] = *str;
5941 c[i++] = ':';
5942
5943 for (str = probe->dtpr_func; *str != '\0'; str++)
5944 c[i++] = *str;
5945 c[i++] = ':';
5946
5947 for (str = probe->dtpr_name; *str != '\0'; str++)
5948 c[i++] = *str;
5949
5950 while (*ecbmsg != '\0')
5951 c[i++] = *ecbmsg++;
5952
5953 while (shift >= 0) {
5954 mask = (uintptr_t)0xf << shift;
5955
5956 if (val >= ((uintptr_t)1 << shift))
5957 c[i++] = "0123456789abcdef"[(val & mask) >> shift];
5958 shift -= 4;
5959 }
5960
5961 c[i++] = ')';
5962 c[i] = '\0';
5963
5964 debug_enter(c);
5965}
5966
5967static void
5968dtrace_action_panic(dtrace_ecb_t *ecb)
5969{
5970 dtrace_probe_t *probe = ecb->dte_probe;
5971
5972 /*
5973 * It's impossible to be taking action on the NULL probe.
5974 */
5975 ASSERT(probe != NULL);
5976
5977 if (dtrace_destructive_disallow)
5978 return;
5979
5980 if (dtrace_panicked != NULL)
5981 return;
5982
2d21ac55
A
5983 if (dtrace_casptr(&dtrace_panicked, NULL, current_thread()) != NULL)
5984 return;
2d21ac55
A
5985
5986 /*
5987 * We won the right to panic. (We want to be sure that only one
5988 * thread calls panic() from dtrace_probe(), and that panic() is
5989 * called exactly once.)
5990 */
316670eb 5991 panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
2d21ac55
A
5992 probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
5993 probe->dtpr_func, probe->dtpr_name, (void *)ecb);
5994
fe8ab488
A
5995 /*
5996 * APPLE NOTE: this was for an old Mac OS X debug feature
5997 * allowing a return from panic(). Revisit someday.
5998 */
2d21ac55 5999 dtrace_panicked = NULL;
2d21ac55
A
6000}
6001
6002static void
6003dtrace_action_raise(uint64_t sig)
6004{
6005 if (dtrace_destructive_disallow)
6006 return;
6007
6008 if (sig >= NSIG) {
6009 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
6010 return;
6011 }
6012
2d21ac55
A
6013 /*
6014 * raise() has a queue depth of 1 -- we ignore all subsequent
6015 * invocations of the raise() action.
6016 */
2d21ac55 6017
2d21ac55
A
6018 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
6019
6020 if (uthread && uthread->t_dtrace_sig == 0) {
6021 uthread->t_dtrace_sig = sig;
6d2010ae 6022 act_set_astbsd(current_thread());
2d21ac55 6023 }
2d21ac55
A
6024}
6025
6026static void
6027dtrace_action_stop(void)
6028{
6029 if (dtrace_destructive_disallow)
6030 return;
6031
6d2010ae
A
6032 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
6033 if (uthread) {
6034 /*
6035 * The currently running process will be set to task_suspend
6036 * when it next leaves the kernel.
6037 */
b0d623f7 6038 uthread->t_dtrace_stop = 1;
6d2010ae 6039 act_set_astbsd(current_thread());
b0d623f7 6040 }
2d21ac55
A
6041}
6042
fe8ab488
A
6043
6044/*
6045 * APPLE NOTE: pidresume works in conjunction with the dtrace stop action.
6046 * Both activate only when the currently running process next leaves the
6047 * kernel.
6048 */
6d2010ae
A
6049static void
6050dtrace_action_pidresume(uint64_t pid)
6051{
6052 if (dtrace_destructive_disallow)
6053 return;
6054
6055 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
6056 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
6057 return;
6058 }
6d2010ae
A
6059 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
6060
6061 /*
6062 * When the currently running process leaves the kernel, it attempts to
6063 * task_resume the process (denoted by pid), if that pid appears to have
6064 * been stopped by dtrace_action_stop().
6065 * The currently running process has a pidresume() queue depth of 1 --
6066 * subsequent invocations of the pidresume() action are ignored.
6067 */
6068
6069 if (pid != 0 && uthread && uthread->t_dtrace_resumepid == 0) {
6070 uthread->t_dtrace_resumepid = pid;
6071 act_set_astbsd(current_thread());
6072 }
6073}
6d2010ae 6074
2d21ac55
A
6075static void
6076dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
6077{
6078 hrtime_t now;
6079 volatile uint16_t *flags;
6d2010ae 6080 dtrace_cpu_t *cpu = CPU;
2d21ac55
A
6081
6082 if (dtrace_destructive_disallow)
6083 return;
6084
6085 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
6086
6087 now = dtrace_gethrtime();
6088
6089 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
6090 /*
6091 * We need to advance the mark to the current time.
6092 */
6093 cpu->cpu_dtrace_chillmark = now;
6094 cpu->cpu_dtrace_chilled = 0;
6095 }
6096
6097 /*
6098 * Now check to see if the requested chill time would take us over
6099 * the maximum amount of time allowed in the chill interval. (Or
6100 * worse, if the calculation itself induces overflow.)
6101 */
6102 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
6103 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
6104 *flags |= CPU_DTRACE_ILLOP;
6105 return;
6106 }
6107
6108 while (dtrace_gethrtime() - now < val)
6109 continue;
6110
6111 /*
6112 * Normally, we assure that the value of the variable "timestamp" does
6113 * not change within an ECB. The presence of chill() represents an
6114 * exception to this rule, however.
6115 */
6116 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
6117 cpu->cpu_dtrace_chilled += val;
6118}
6119
6120static void
6121dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
6122 uint64_t *buf, uint64_t arg)
6123{
6124 int nframes = DTRACE_USTACK_NFRAMES(arg);
6125 int strsize = DTRACE_USTACK_STRSIZE(arg);
6126 uint64_t *pcs = &buf[1], *fps;
6127 char *str = (char *)&pcs[nframes];
6128 int size, offs = 0, i, j;
6129 uintptr_t old = mstate->dtms_scratch_ptr, saved;
6130 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
6131 char *sym;
6132
6133 /*
6134 * Should be taking a faster path if string space has not been
6135 * allocated.
6136 */
6137 ASSERT(strsize != 0);
6138
6139 /*
6140 * We will first allocate some temporary space for the frame pointers.
6141 */
6142 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
6143 size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
6144 (nframes * sizeof (uint64_t));
6145
b0d623f7 6146 if (!DTRACE_INSCRATCH(mstate, (uintptr_t)size)) {
2d21ac55
A
6147 /*
6148 * Not enough room for our frame pointers -- need to indicate
6149 * that we ran out of scratch space.
6150 */
6151 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
6152 return;
6153 }
6154
6155 mstate->dtms_scratch_ptr += size;
6156 saved = mstate->dtms_scratch_ptr;
6157
6158 /*
6159 * Now get a stack with both program counters and frame pointers.
6160 */
6161 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6162 dtrace_getufpstack(buf, fps, nframes + 1);
6163 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6164
6165 /*
6166 * If that faulted, we're cooked.
6167 */
6168 if (*flags & CPU_DTRACE_FAULT)
6169 goto out;
6170
6171 /*
6172 * Now we want to walk up the stack, calling the USTACK helper. For
6173 * each iteration, we restore the scratch pointer.
6174 */
6175 for (i = 0; i < nframes; i++) {
6176 mstate->dtms_scratch_ptr = saved;
6177
6178 if (offs >= strsize)
6179 break;
6180
6181 sym = (char *)(uintptr_t)dtrace_helper(
6182 DTRACE_HELPER_ACTION_USTACK,
6183 mstate, state, pcs[i], fps[i]);
6184
6185 /*
6186 * If we faulted while running the helper, we're going to
6187 * clear the fault and null out the corresponding string.
6188 */
6189 if (*flags & CPU_DTRACE_FAULT) {
6190 *flags &= ~CPU_DTRACE_FAULT;
6191 str[offs++] = '\0';
6192 continue;
6193 }
6194
6195 if (sym == NULL) {
6196 str[offs++] = '\0';
6197 continue;
6198 }
6199
6200 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6201
6202 /*
6203 * Now copy in the string that the helper returned to us.
6204 */
6205 for (j = 0; offs + j < strsize; j++) {
6206 if ((str[offs + j] = sym[j]) == '\0')
6207 break;
6208 }
6209
6210 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6211
6212 offs += j + 1;
6213 }
6214
6215 if (offs >= strsize) {
6216 /*
6217 * If we didn't have room for all of the strings, we don't
6218 * abort processing -- this needn't be a fatal error -- but we
6219 * still want to increment a counter (dts_stkstroverflows) to
6220 * allow this condition to be warned about. (If this is from
6221 * a jstack() action, it is easily tuned via jstackstrsize.)
6222 */
6223 dtrace_error(&state->dts_stkstroverflows);
6224 }
6225
6226 while (offs < strsize)
6227 str[offs++] = '\0';
6228
6229out:
6230 mstate->dtms_scratch_ptr = old;
6231}
6232
3e170ce0
A
6233static void
6234dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
6235 size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
6236{
6237 volatile uint16_t *flags;
6238 uint64_t val = *valp;
6239 size_t valoffs = *valoffsp;
6240
6241 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
6242 ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
6243
6244 /*
6245 * If this is a string, we're going to only load until we find the zero
6246 * byte -- after which we'll store zero bytes.
6247 */
6248 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
6249 char c = '\0' + 1;
6250 size_t s;
6251
6252 for (s = 0; s < size; s++) {
6253 if (c != '\0' && dtkind == DIF_TF_BYREF) {
6254 c = dtrace_load8(val++);
6255 } else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
6256 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6257 c = dtrace_fuword8((user_addr_t)(uintptr_t)val++);
6258 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6259 if (*flags & CPU_DTRACE_FAULT)
6260 break;
6261 }
6262
6263 DTRACE_STORE(uint8_t, tomax, valoffs++, c);
6264
6265 if (c == '\0' && intuple)
6266 break;
6267 }
6268 } else {
6269 uint8_t c;
6270 while (valoffs < end) {
6271 if (dtkind == DIF_TF_BYREF) {
6272 c = dtrace_load8(val++);
6273 } else if (dtkind == DIF_TF_BYUREF) {
6274 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6275 c = dtrace_fuword8((user_addr_t)(uintptr_t)val++);
6276 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6277 if (*flags & CPU_DTRACE_FAULT)
6278 break;
6279 }
6280
6281 DTRACE_STORE(uint8_t, tomax,
6282 valoffs++, c);
6283 }
6284 }
6285
6286 *valp = val;
6287 *valoffsp = valoffs;
6288}
6289
2d21ac55
A
6290/*
6291 * If you're looking for the epicenter of DTrace, you just found it. This
6292 * is the function called by the provider to fire a probe -- from which all
6293 * subsequent probe-context DTrace activity emanates.
6294 */
2d21ac55
A
6295static void
6296__dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
6297 uint64_t arg2, uint64_t arg3, uint64_t arg4)
2d21ac55
A
6298{
6299 processorid_t cpuid;
6300 dtrace_icookie_t cookie;
6301 dtrace_probe_t *probe;
6302 dtrace_mstate_t mstate;
6303 dtrace_ecb_t *ecb;
6304 dtrace_action_t *act;
6305 intptr_t offs;
6306 size_t size;
6307 int vtime, onintr;
6308 volatile uint16_t *flags;
6309 hrtime_t now;
6310
2d21ac55
A
6311 cookie = dtrace_interrupt_disable();
6312 probe = dtrace_probes[id - 1];
6313 cpuid = CPU->cpu_id;
6314 onintr = CPU_ON_INTR(CPU);
6315
2d21ac55
A
6316 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
6317 probe->dtpr_predcache == dtrace_get_thread_predcache(current_thread())) {
2d21ac55
A
6318 /*
6319 * We have hit in the predicate cache; we know that
6320 * this predicate would evaluate to be false.
6321 */
6322 dtrace_interrupt_enable(cookie);
6323 return;
6324 }
6325
6326 if (panic_quiesce) {
6327 /*
6328 * We don't trace anything if we're panicking.
6329 */
6330 dtrace_interrupt_enable(cookie);
6331 return;
6332 }
6333
6334#if !defined(__APPLE__)
6335 now = dtrace_gethrtime();
6336 vtime = dtrace_vtime_references != 0;
6337
6338 if (vtime && curthread->t_dtrace_start)
6339 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
6340#else
fe8ab488
A
6341 /*
6342 * APPLE NOTE: The time spent entering DTrace and arriving
6343 * to this point, is attributed to the current thread.
6344 * Instead it should accrue to DTrace. FIXME
6345 */
2d21ac55
A
6346 vtime = dtrace_vtime_references != 0;
6347
6348 if (vtime)
6349 {
6350 int64_t dtrace_accum_time, recent_vtime;
6351 thread_t thread = current_thread();
6352
6353 dtrace_accum_time = dtrace_get_thread_tracing(thread); /* Time spent inside DTrace so far (nanoseconds) */
6354
6355 if (dtrace_accum_time >= 0) {
6356 recent_vtime = dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread)); /* up to the moment thread vtime */
6357
6358 recent_vtime = recent_vtime - dtrace_accum_time; /* Time without DTrace contribution */
6359
6360 dtrace_set_thread_vtime(thread, recent_vtime);
6361 }
6362 }
6363
6364 now = dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
6365#endif /* __APPLE__ */
6366
cf7d32b8 6367 /*
fe8ab488
A
6368 * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of
6369 * dtrace_probe() in some circumstances. See, e.g. fasttrap_isa.c.
6370 * However the provider has no access to ECB context, so passes
6371 * 0 through "arg0" and the probe_id of the overridden probe as arg1.
6372 * Detect that here and cons up a viable state (from the probe_id).
cf7d32b8 6373 */
b0d623f7 6374 if (dtrace_probeid_error == id && 0 == arg0) {
cf7d32b8
A
6375 dtrace_id_t ftp_id = (dtrace_id_t)arg1;
6376 dtrace_probe_t *ftp_probe = dtrace_probes[ftp_id - 1];
6377 dtrace_ecb_t *ftp_ecb = ftp_probe->dtpr_ecb;
6378
6379 if (NULL != ftp_ecb) {
6380 dtrace_state_t *ftp_state = ftp_ecb->dte_state;
6381
6382 arg0 = (uint64_t)(uintptr_t)ftp_state;
6383 arg1 = ftp_ecb->dte_epid;
6384 /*
6385 * args[2-4] established by caller.
6386 */
6387 ftp_state->dts_arg_error_illval = -1; /* arg5 */
6388 }
6389 }
cf7d32b8 6390
b0d623f7 6391 mstate.dtms_difo = NULL;
2d21ac55 6392 mstate.dtms_probe = probe;
fe8ab488 6393 mstate.dtms_strtok = 0;
2d21ac55
A
6394 mstate.dtms_arg[0] = arg0;
6395 mstate.dtms_arg[1] = arg1;
6396 mstate.dtms_arg[2] = arg2;
6397 mstate.dtms_arg[3] = arg3;
6398 mstate.dtms_arg[4] = arg4;
6399
6400 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
6401
6402 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
6403 dtrace_predicate_t *pred = ecb->dte_predicate;
6404 dtrace_state_t *state = ecb->dte_state;
6405 dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
6406 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
6407 dtrace_vstate_t *vstate = &state->dts_vstate;
6408 dtrace_provider_t *prov = probe->dtpr_provider;
fe8ab488 6409 uint64_t tracememsize = 0;
2d21ac55
A
6410 int committed = 0;
6411 caddr_t tomax;
6412
6413 /*
6414 * A little subtlety with the following (seemingly innocuous)
6415 * declaration of the automatic 'val': by looking at the
6416 * code, you might think that it could be declared in the
6417 * action processing loop, below. (That is, it's only used in
6418 * the action processing loop.) However, it must be declared
6419 * out of that scope because in the case of DIF expression
6420 * arguments to aggregating actions, one iteration of the
6421 * action loop will use the last iteration's value.
6422 */
6423#ifdef lint
6424 uint64_t val = 0;
6425#else
c910b4d9 6426 uint64_t val = 0;
2d21ac55
A
6427#endif
6428
6429 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
6430 *flags &= ~CPU_DTRACE_ERROR;
6431
6432 if (prov == dtrace_provider) {
6433 /*
6434 * If dtrace itself is the provider of this probe,
6435 * we're only going to continue processing the ECB if
6436 * arg0 (the dtrace_state_t) is equal to the ECB's
6437 * creating state. (This prevents disjoint consumers
6438 * from seeing one another's metaprobes.)
6439 */
6440 if (arg0 != (uint64_t)(uintptr_t)state)
6441 continue;
6442 }
6443
6444 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
6445 /*
6446 * We're not currently active. If our provider isn't
6447 * the dtrace pseudo provider, we're not interested.
6448 */
6449 if (prov != dtrace_provider)
6450 continue;
6451
6452 /*
6453 * Now we must further check if we are in the BEGIN
6454 * probe. If we are, we will only continue processing
6455 * if we're still in WARMUP -- if one BEGIN enabling
6456 * has invoked the exit() action, we don't want to
6457 * evaluate subsequent BEGIN enablings.
6458 */
6459 if (probe->dtpr_id == dtrace_probeid_begin &&
6460 state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
6461 ASSERT(state->dts_activity ==
6462 DTRACE_ACTIVITY_DRAINING);
6463 continue;
6464 }
6465 }
6466
2d21ac55
A
6467 if (ecb->dte_cond) {
6468 /*
6469 * If the dte_cond bits indicate that this
6470 * consumer is only allowed to see user-mode firings
6471 * of this probe, call the provider's dtps_usermode()
6472 * entry point to check that the probe was fired
6473 * while in a user context. Skip this ECB if that's
6474 * not the case.
6475 */
6476 if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
39037602 6477 prov->dtpv_pops.dtps_usermode &&
2d21ac55
A
6478 prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
6479 probe->dtpr_id, probe->dtpr_arg) == 0)
6480 continue;
6481
6482 /*
6483 * This is more subtle than it looks. We have to be
6484 * absolutely certain that CRED() isn't going to
6485 * change out from under us so it's only legit to
6486 * examine that structure if we're in constrained
6487 * situations. Currently, the only times we'll this
6488 * check is if a non-super-user has enabled the
6489 * profile or syscall providers -- providers that
6490 * allow visibility of all processes. For the
6491 * profile case, the check above will ensure that
6492 * we're examining a user context.
6493 */
6494 if (ecb->dte_cond & DTRACE_COND_OWNER) {
6495 cred_t *cr;
6496 cred_t *s_cr =
6497 ecb->dte_state->dts_cred.dcr_cred;
6498 proc_t *proc;
b0d623f7 6499#pragma unused(proc) /* __APPLE__ */
2d21ac55
A
6500
6501 ASSERT(s_cr != NULL);
6502
6d2010ae
A
6503 /*
6504 * XXX this is hackish, but so is setting a variable
6505 * XXX in a McCarthy OR...
6506 */
2d21ac55 6507 if ((cr = dtrace_CRED()) == NULL ||
6d2010ae
A
6508 posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid ||
6509 posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid ||
6510 posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid ||
6511 posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_gid ||
6512 posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_rgid ||
6513 posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_sgid ||
2d21ac55
A
6514#if !defined(__APPLE__)
6515 (proc = ttoproc(curthread)) == NULL ||
6516 (proc->p_flag & SNOCD))
6517#else
fe8ab488 6518 1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */
2d21ac55
A
6519#endif /* __APPLE__ */
6520 continue;
6521 }
6522
6523 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
6524 cred_t *cr;
6525 cred_t *s_cr =
6526 ecb->dte_state->dts_cred.dcr_cred;
b0d623f7 6527#pragma unused(cr, s_cr) /* __APPLE__ */
2d21ac55
A
6528
6529 ASSERT(s_cr != NULL);
6530
b0d623f7 6531#if !defined(__APPLE__)
2d21ac55
A
6532 if ((cr = CRED()) == NULL ||
6533 s_cr->cr_zone->zone_id !=
6534 cr->cr_zone->zone_id)
6535 continue;
b0d623f7 6536#else
fe8ab488 6537 /* APPLE NOTE: Darwin doesn't do zones. */
2d21ac55
A
6538#endif /* __APPLE__ */
6539 }
6540 }
6541
6542 if (now - state->dts_alive > dtrace_deadman_timeout) {
6543 /*
6544 * We seem to be dead. Unless we (a) have kernel
6545 * destructive permissions (b) have expicitly enabled
6546 * destructive actions and (c) destructive actions have
6547 * not been disabled, we're going to transition into
6548 * the KILLED state, from which no further processing
6549 * on this state will be performed.
6550 */
6551 if (!dtrace_priv_kernel_destructive(state) ||
6552 !state->dts_cred.dcr_destructive ||
6553 dtrace_destructive_disallow) {
6554 void *activity = &state->dts_activity;
6555 dtrace_activity_t current;
6556
6557 do {
6558 current = state->dts_activity;
6559 } while (dtrace_cas32(activity, current,
6560 DTRACE_ACTIVITY_KILLED) != current);
6561
6562 continue;
6563 }
6564 }
6565
6566 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
6567 ecb->dte_alignment, state, &mstate)) < 0)
6568 continue;
6569
6570 tomax = buf->dtb_tomax;
6571 ASSERT(tomax != NULL);
6572
04b8595b
A
6573 /*
6574 * Build and store the record header corresponding to the ECB.
6575 */
6576 if (ecb->dte_size != 0) {
6577 dtrace_rechdr_t dtrh;
6578
6579 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
6580 mstate.dtms_timestamp = dtrace_gethrtime();
6581 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
6582 }
6583
6584 ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t));
6585
6586 dtrh.dtrh_epid = ecb->dte_epid;
6587 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, mstate.dtms_timestamp);
6588 DTRACE_STORE(dtrace_rechdr_t, tomax, offs, dtrh);
6589 }
2d21ac55
A
6590
6591 mstate.dtms_epid = ecb->dte_epid;
6592 mstate.dtms_present |= DTRACE_MSTATE_EPID;
6593
b0d623f7
A
6594 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
6595 mstate.dtms_access = DTRACE_ACCESS_KERNEL;
6596 else
6597 mstate.dtms_access = 0;
6598
2d21ac55
A
6599 if (pred != NULL) {
6600 dtrace_difo_t *dp = pred->dtp_difo;
5ba3f43e 6601 uint64_t rval;
2d21ac55
A
6602
6603 rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
6604
6605 if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
6606 dtrace_cacheid_t cid = probe->dtpr_predcache;
6607
6608 if (cid != DTRACE_CACHEIDNONE && !onintr) {
6609 /*
6610 * Update the predicate cache...
6611 */
6612 ASSERT(cid == pred->dtp_cacheid);
fe8ab488 6613
2d21ac55 6614 dtrace_set_thread_predcache(current_thread(), cid);
2d21ac55
A
6615 }
6616
6617 continue;
6618 }
6619 }
6620
6621 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
6622 act != NULL; act = act->dta_next) {
6623 size_t valoffs;
6624 dtrace_difo_t *dp;
6625 dtrace_recdesc_t *rec = &act->dta_rec;
6626
6627 size = rec->dtrd_size;
6628 valoffs = offs + rec->dtrd_offset;
6629
6630 if (DTRACEACT_ISAGG(act->dta_kind)) {
6631 uint64_t v = 0xbad;
6632 dtrace_aggregation_t *agg;
6633
6634 agg = (dtrace_aggregation_t *)act;
6635
6636 if ((dp = act->dta_difo) != NULL)
6637 v = dtrace_dif_emulate(dp,
6638 &mstate, vstate, state);
6639
6640 if (*flags & CPU_DTRACE_ERROR)
6641 continue;
6642
6643 /*
6644 * Note that we always pass the expression
6645 * value from the previous iteration of the
6646 * action loop. This value will only be used
6647 * if there is an expression argument to the
6648 * aggregating action, denoted by the
6649 * dtag_hasarg field.
6650 */
6651 dtrace_aggregate(agg, buf,
6652 offs, aggbuf, v, val);
6653 continue;
6654 }
6655
6656 switch (act->dta_kind) {
6657 case DTRACEACT_STOP:
6658 if (dtrace_priv_proc_destructive(state))
6659 dtrace_action_stop();
6660 continue;
6661
6662 case DTRACEACT_BREAKPOINT:
6663 if (dtrace_priv_kernel_destructive(state))
6664 dtrace_action_breakpoint(ecb);
6665 continue;
6666
6667 case DTRACEACT_PANIC:
6668 if (dtrace_priv_kernel_destructive(state))
6669 dtrace_action_panic(ecb);
6670 continue;
6671
6672 case DTRACEACT_STACK:
6673 if (!dtrace_priv_kernel(state))
6674 continue;
6675
b0d623f7
A
6676 dtrace_getpcstack((pc_t *)(tomax + valoffs),
6677 size / sizeof (pc_t), probe->dtpr_aframes,
6678 DTRACE_ANCHORED(probe) ? NULL :
6679 (uint32_t *)(uintptr_t)arg0);
2d21ac55
A
6680 continue;
6681
6682 case DTRACEACT_JSTACK:
6683 case DTRACEACT_USTACK:
6684 if (!dtrace_priv_proc(state))
6685 continue;
6686
6687 /*
6688 * See comment in DIF_VAR_PID.
6689 */
6690 if (DTRACE_ANCHORED(mstate.dtms_probe) &&
6691 CPU_ON_INTR(CPU)) {
6692 int depth = DTRACE_USTACK_NFRAMES(
6693 rec->dtrd_arg) + 1;
6694
6695 dtrace_bzero((void *)(tomax + valoffs),
6696 DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
6697 + depth * sizeof (uint64_t));
6698
6699 continue;
6700 }
6701
6702 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
6703 curproc->p_dtrace_helpers != NULL) {
6704 /*
6705 * This is the slow path -- we have
6706 * allocated string space, and we're
6707 * getting the stack of a process that
6708 * has helpers. Call into a separate
6709 * routine to perform this processing.
6710 */
6711 dtrace_action_ustack(&mstate, state,
6712 (uint64_t *)(tomax + valoffs),
6713 rec->dtrd_arg);
6714 continue;
6715 }
6716
6717 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6718 dtrace_getupcstack((uint64_t *)
6719 (tomax + valoffs),
6720 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
6721 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6722 continue;
6723
6724 default:
6725 break;
6726 }
6727
6728 dp = act->dta_difo;
6729 ASSERT(dp != NULL);
6730
6731 val = dtrace_dif_emulate(dp, &mstate, vstate, state);
6732
6733 if (*flags & CPU_DTRACE_ERROR)
6734 continue;
6735
6736 switch (act->dta_kind) {
04b8595b
A
6737 case DTRACEACT_SPECULATE: {
6738 dtrace_rechdr_t *dtrh = NULL;
6739
2d21ac55
A
6740 ASSERT(buf == &state->dts_buffer[cpuid]);
6741 buf = dtrace_speculation_buffer(state,
6742 cpuid, val);
6743
6744 if (buf == NULL) {
6745 *flags |= CPU_DTRACE_DROP;
6746 continue;
6747 }
6748
6749 offs = dtrace_buffer_reserve(buf,
6750 ecb->dte_needed, ecb->dte_alignment,
6751 state, NULL);
6752
6753 if (offs < 0) {
6754 *flags |= CPU_DTRACE_DROP;
6755 continue;
6756 }
6757
6758 tomax = buf->dtb_tomax;
6759 ASSERT(tomax != NULL);
6760
39037602 6761 if (ecb->dte_size == 0)
04b8595b
A
6762 continue;
6763
6764 ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t));
6765 dtrh = ((void *)(tomax + offs));
6766 dtrh->dtrh_epid = ecb->dte_epid;
6767
6768 /*
6769 * When the speculation is committed, all of
6770 * the records in the speculative buffer will
6771 * have their timestamps set to the commit
6772 * time. Until then, it is set to a sentinel
6773 * value, for debugability.
6774 */
6775 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
6776
6777 continue;
6778 }
2d21ac55
A
6779
6780 case DTRACEACT_CHILL:
6781 if (dtrace_priv_kernel_destructive(state))
6782 dtrace_action_chill(&mstate, val);
6783 continue;
6784
6785 case DTRACEACT_RAISE:
6786 if (dtrace_priv_proc_destructive(state))
6787 dtrace_action_raise(val);
6788 continue;
6789
fe8ab488 6790 case DTRACEACT_PIDRESUME: /* __APPLE__ */
6d2010ae
A
6791 if (dtrace_priv_proc_destructive(state))
6792 dtrace_action_pidresume(val);
6793 continue;
6d2010ae 6794
2d21ac55
A
6795 case DTRACEACT_COMMIT:
6796 ASSERT(!committed);
6797
6798 /*
6799 * We need to commit our buffer state.
6800 */
6801 if (ecb->dte_size)
6802 buf->dtb_offset = offs + ecb->dte_size;
6803 buf = &state->dts_buffer[cpuid];
6804 dtrace_speculation_commit(state, cpuid, val);
6805 committed = 1;
6806 continue;
6807
6808 case DTRACEACT_DISCARD:
6809 dtrace_speculation_discard(state, cpuid, val);
6810 continue;
6811
6812 case DTRACEACT_DIFEXPR:
6813 case DTRACEACT_LIBACT:
6814 case DTRACEACT_PRINTF:
6815 case DTRACEACT_PRINTA:
6816 case DTRACEACT_SYSTEM:
6817 case DTRACEACT_FREOPEN:
fe8ab488
A
6818 case DTRACEACT_APPLEBINARY: /* __APPLE__ */
6819 case DTRACEACT_TRACEMEM:
6820 break;
6821
6822 case DTRACEACT_TRACEMEM_DYNSIZE:
6823 tracememsize = val;
2d21ac55
A
6824 break;
6825
6826 case DTRACEACT_SYM:
6827 case DTRACEACT_MOD:
6828 if (!dtrace_priv_kernel(state))
6829 continue;
6830 break;
6831
2d21ac55
A
6832 case DTRACEACT_USYM:
6833 case DTRACEACT_UMOD:
6834 case DTRACEACT_UADDR: {
6835 if (!dtrace_priv_proc(state))
6836 continue;
6837
6838 DTRACE_STORE(uint64_t, tomax,
39236c6e 6839 valoffs, (uint64_t)dtrace_proc_selfpid());
2d21ac55
A
6840 DTRACE_STORE(uint64_t, tomax,
6841 valoffs + sizeof (uint64_t), val);
6842
6843 continue;
6844 }
2d21ac55
A
6845
6846 case DTRACEACT_EXIT: {
6847 /*
6848 * For the exit action, we are going to attempt
6849 * to atomically set our activity to be
6850 * draining. If this fails (either because
6851 * another CPU has beat us to the exit action,
6852 * or because our current activity is something
6853 * other than ACTIVE or WARMUP), we will
6854 * continue. This assures that the exit action
6855 * can be successfully recorded at most once
6856 * when we're in the ACTIVE state. If we're
6857 * encountering the exit() action while in
6858 * COOLDOWN, however, we want to honor the new
6859 * status code. (We know that we're the only
6860 * thread in COOLDOWN, so there is no race.)
6861 */
6862 void *activity = &state->dts_activity;
6863 dtrace_activity_t current = state->dts_activity;
6864
6865 if (current == DTRACE_ACTIVITY_COOLDOWN)
6866 break;
6867
6868 if (current != DTRACE_ACTIVITY_WARMUP)
6869 current = DTRACE_ACTIVITY_ACTIVE;
6870
6871 if (dtrace_cas32(activity, current,
6872 DTRACE_ACTIVITY_DRAINING) != current) {
6873 *flags |= CPU_DTRACE_DROP;
6874 continue;
6875 }
6876
6877 break;
6878 }
6879
6880 default:
6881 ASSERT(0);
6882 }
6883
3e170ce0 6884 if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) {
2d21ac55
A
6885 uintptr_t end = valoffs + size;
6886
fe8ab488
A
6887 if (tracememsize != 0 &&
6888 valoffs + tracememsize < end)
6889 {
6890 end = valoffs + tracememsize;
6891 tracememsize = 0;
6892 }
6893
3e170ce0
A
6894 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
6895 !dtrace_vcanload((void *)(uintptr_t)val,
39037602 6896 &dp->dtdo_rtype, NULL, &mstate, vstate))
3e170ce0 6897 {
2d21ac55
A
6898 continue;
6899 }
6900
3e170ce0
A
6901 dtrace_store_by_ref(dp, tomax, size, &valoffs,
6902 &val, end, act->dta_intuple,
6903 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
6904 DIF_TF_BYREF: DIF_TF_BYUREF);
2d21ac55
A
6905
6906 continue;
6907 }
6908
6909 switch (size) {
6910 case 0:
6911 break;
6912
6913 case sizeof (uint8_t):
6914 DTRACE_STORE(uint8_t, tomax, valoffs, val);
6915 break;
6916 case sizeof (uint16_t):
6917 DTRACE_STORE(uint16_t, tomax, valoffs, val);
6918 break;
6919 case sizeof (uint32_t):
6920 DTRACE_STORE(uint32_t, tomax, valoffs, val);
6921 break;
6922 case sizeof (uint64_t):
6923 DTRACE_STORE(uint64_t, tomax, valoffs, val);
6924 break;
6925 default:
6926 /*
6927 * Any other size should have been returned by
6928 * reference, not by value.
6929 */
6930 ASSERT(0);
6931 break;
6932 }
6933 }
6934
6935 if (*flags & CPU_DTRACE_DROP)
6936 continue;
6937
6938 if (*flags & CPU_DTRACE_FAULT) {
6939 int ndx;
6940 dtrace_action_t *err;
6941
6942 buf->dtb_errors++;
6943
6944 if (probe->dtpr_id == dtrace_probeid_error) {
6945 /*
6946 * There's nothing we can do -- we had an
6947 * error on the error probe. We bump an
6948 * error counter to at least indicate that
6949 * this condition happened.
6950 */
6951 dtrace_error(&state->dts_dblerrors);
6952 continue;
6953 }
6954
6955 if (vtime) {
6956 /*
6957 * Before recursing on dtrace_probe(), we
6958 * need to explicitly clear out our start
6959 * time to prevent it from being accumulated
6960 * into t_dtrace_vtime.
6961 */
fe8ab488
A
6962
6963 /*
6964 * Darwin sets the sign bit on t_dtrace_tracing
6965 * to suspend accumulation to it.
6966 */
2d21ac55 6967 dtrace_set_thread_tracing(current_thread(),
fe8ab488
A
6968 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
6969
2d21ac55
A
6970 }
6971
6972 /*
6973 * Iterate over the actions to figure out which action
6974 * we were processing when we experienced the error.
6975 * Note that act points _past_ the faulting action; if
6976 * act is ecb->dte_action, the fault was in the
6977 * predicate, if it's ecb->dte_action->dta_next it's
6978 * in action #1, and so on.
6979 */
6980 for (err = ecb->dte_action, ndx = 0;
6981 err != act; err = err->dta_next, ndx++)
6982 continue;
6983
6984 dtrace_probe_error(state, ecb->dte_epid, ndx,
6985 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
6986 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
6987 cpu_core[cpuid].cpuc_dtrace_illval);
6988
6989 continue;
6990 }
6991
6992 if (!committed)
6993 buf->dtb_offset = offs + ecb->dte_size;
6994 }
6995
fe8ab488 6996 /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed
b0d623f7 6997 to the current thread. Instead it should accrue to DTrace. */
2d21ac55
A
6998 if (vtime) {
6999 thread_t thread = current_thread();
7000 int64_t t = dtrace_get_thread_tracing(thread);
7001
7002 if (t >= 0) {
7003 /* Usual case, accumulate time spent here into t_dtrace_tracing */
7004 dtrace_set_thread_tracing(thread, t + (dtrace_gethrtime() - now));
7005 } else {
7006 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
7007 dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t);
7008 }
7009 }
2d21ac55
A
7010
7011 dtrace_interrupt_enable(cookie);
7012}
7013
fe8ab488
A
7014/*
7015 * APPLE NOTE: Don't allow a thread to re-enter dtrace_probe().
7016 * This could occur if a probe is encountered on some function in the
7017 * transitive closure of the call to dtrace_probe().
7018 * Solaris has some strong guarantees that this won't happen.
7019 * The Darwin implementation is not so mature as to make those guarantees.
7020 * Hence, the introduction of __dtrace_probe() on xnu.
7021 */
6d2010ae 7022
2d21ac55
A
7023void
7024dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
7025 uint64_t arg2, uint64_t arg3, uint64_t arg4)
7026{
7027 thread_t thread = current_thread();
6d2010ae 7028 disable_preemption();
2d21ac55
A
7029 if (id == dtrace_probeid_error) {
7030 __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
b0d623f7 7031 dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */
2d21ac55
A
7032 } else if (!dtrace_get_thread_reentering(thread)) {
7033 dtrace_set_thread_reentering(thread, TRUE);
7034 __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
7035 dtrace_set_thread_reentering(thread, FALSE);
7036 }
b0d623f7
A
7037#if DEBUG
7038 else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN);
7039#endif
6d2010ae 7040 enable_preemption();
2d21ac55 7041}
2d21ac55
A
7042
7043/*
7044 * DTrace Probe Hashing Functions
7045 *
7046 * The functions in this section (and indeed, the functions in remaining
7047 * sections) are not _called_ from probe context. (Any exceptions to this are
7048 * marked with a "Note:".) Rather, they are called from elsewhere in the
7049 * DTrace framework to look-up probes in, add probes to and remove probes from
7050 * the DTrace probe hashes. (Each probe is hashed by each element of the
7051 * probe tuple -- allowing for fast lookups, regardless of what was
7052 * specified.)
7053 */
7054static uint_t
b0d623f7 7055dtrace_hash_str(const char *p)
2d21ac55
A
7056{
7057 unsigned int g;
7058 uint_t hval = 0;
7059
7060 while (*p) {
7061 hval = (hval << 4) + *p++;
7062 if ((g = (hval & 0xf0000000)) != 0)
7063 hval ^= g >> 24;
7064 hval &= ~g;
7065 }
7066 return (hval);
7067}
7068
d9a64523
A
7069static const char*
7070dtrace_strkey_probe_provider(void *elm, uintptr_t offs)
7071{
7072#pragma unused(offs)
7073 dtrace_probe_t *probe = (dtrace_probe_t*)elm;
7074 return probe->dtpr_provider->dtpv_name;
7075}
7076
7077static const char*
7078dtrace_strkey_offset(void *elm, uintptr_t offs)
7079{
7080 return ((char *)((uintptr_t)(elm) + offs));
7081}
7082
7083static const char*
7084dtrace_strkey_deref_offset(void *elm, uintptr_t offs)
7085{
7086 return *((char **)((uintptr_t)(elm) + offs));
7087}
7088
2d21ac55 7089static dtrace_hash_t *
d9a64523 7090dtrace_hash_create(dtrace_strkey_f func, uintptr_t arg, uintptr_t nextoffs, uintptr_t prevoffs)
2d21ac55
A
7091{
7092 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
7093
d9a64523
A
7094 hash->dth_getstr = func;
7095 hash->dth_stroffs = arg;
2d21ac55
A
7096 hash->dth_nextoffs = nextoffs;
7097 hash->dth_prevoffs = prevoffs;
7098
7099 hash->dth_size = 1;
7100 hash->dth_mask = hash->dth_size - 1;
7101
7102 hash->dth_tab = kmem_zalloc(hash->dth_size *
7103 sizeof (dtrace_hashbucket_t *), KM_SLEEP);
7104
7105 return (hash);
7106}
7107
fe8ab488
A
7108/*
7109 * APPLE NOTE: dtrace_hash_destroy is not used.
7110 * It is called by dtrace_detach which is not
7111 * currently implemented. Revisit someday.
7112 */
7113#if !defined(__APPLE__)
2d21ac55
A
7114static void
7115dtrace_hash_destroy(dtrace_hash_t *hash)
7116{
b0d623f7 7117#if DEBUG
2d21ac55
A
7118 int i;
7119
7120 for (i = 0; i < hash->dth_size; i++)
7121 ASSERT(hash->dth_tab[i] == NULL);
7122#endif
7123
7124 kmem_free(hash->dth_tab,
7125 hash->dth_size * sizeof (dtrace_hashbucket_t *));
7126 kmem_free(hash, sizeof (dtrace_hash_t));
7127}
7128#endif /* __APPLE__ */
7129
7130static void
7131dtrace_hash_resize(dtrace_hash_t *hash)
7132{
7133 int size = hash->dth_size, i, ndx;
7134 int new_size = hash->dth_size << 1;
7135 int new_mask = new_size - 1;
7136 dtrace_hashbucket_t **new_tab, *bucket, *next;
7137
7138 ASSERT((new_size & new_mask) == 0);
7139
7140 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
7141
7142 for (i = 0; i < size; i++) {
7143 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
d9a64523 7144 void *elm = bucket->dthb_chain;
2d21ac55 7145
d9a64523
A
7146 ASSERT(elm != NULL);
7147 ndx = DTRACE_HASHSTR(hash, elm) & new_mask;
2d21ac55
A
7148
7149 next = bucket->dthb_next;
7150 bucket->dthb_next = new_tab[ndx];
7151 new_tab[ndx] = bucket;
7152 }
7153 }
7154
7155 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
7156 hash->dth_tab = new_tab;
7157 hash->dth_size = new_size;
7158 hash->dth_mask = new_mask;
7159}
7160
7161static void
d9a64523 7162dtrace_hash_add(dtrace_hash_t *hash, void *new)
2d21ac55
A
7163{
7164 int hashval = DTRACE_HASHSTR(hash, new);
7165 int ndx = hashval & hash->dth_mask;
7166 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
d9a64523 7167 void **nextp, **prevp;
2d21ac55
A
7168
7169 for (; bucket != NULL; bucket = bucket->dthb_next) {
7170 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
7171 goto add;
7172 }
7173
7174 if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
7175 dtrace_hash_resize(hash);
7176 dtrace_hash_add(hash, new);
7177 return;
7178 }
7179
7180 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
7181 bucket->dthb_next = hash->dth_tab[ndx];
7182 hash->dth_tab[ndx] = bucket;
7183 hash->dth_nbuckets++;
7184
7185add:
7186 nextp = DTRACE_HASHNEXT(hash, new);
7187 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
7188 *nextp = bucket->dthb_chain;
7189
7190 if (bucket->dthb_chain != NULL) {
7191 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
7192 ASSERT(*prevp == NULL);
7193 *prevp = new;
7194 }
7195
7196 bucket->dthb_chain = new;
7197 bucket->dthb_len++;
7198}
7199
d9a64523
A
7200static void *
7201dtrace_hash_lookup_string(dtrace_hash_t *hash, const char *str)
2d21ac55 7202{
d9a64523 7203 int hashval = dtrace_hash_str(str);
2d21ac55
A
7204 int ndx = hashval & hash->dth_mask;
7205 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7206
7207 for (; bucket != NULL; bucket = bucket->dthb_next) {
d9a64523 7208 if (strcmp(str, DTRACE_GETSTR(hash, bucket->dthb_chain)) == 0)
2d21ac55
A
7209 return (bucket->dthb_chain);
7210 }
7211
7212 return (NULL);
7213}
7214
d9a64523
A
7215static dtrace_probe_t *
7216dtrace_hash_lookup(dtrace_hash_t *hash, void *template)
7217{
7218 return dtrace_hash_lookup_string(hash, DTRACE_GETSTR(hash, template));
7219}
7220
2d21ac55 7221static int
d9a64523 7222dtrace_hash_collisions(dtrace_hash_t *hash, void *template)
2d21ac55
A
7223{
7224 int hashval = DTRACE_HASHSTR(hash, template);
7225 int ndx = hashval & hash->dth_mask;
7226 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7227
7228 for (; bucket != NULL; bucket = bucket->dthb_next) {
7229 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
7230 return (bucket->dthb_len);
7231 }
7232
fe8ab488 7233 return (0);
2d21ac55
A
7234}
7235
7236static void
d9a64523 7237dtrace_hash_remove(dtrace_hash_t *hash, void *elm)
2d21ac55 7238{
d9a64523 7239 int ndx = DTRACE_HASHSTR(hash, elm) & hash->dth_mask;
2d21ac55
A
7240 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7241
d9a64523
A
7242 void **prevp = DTRACE_HASHPREV(hash, elm);
7243 void **nextp = DTRACE_HASHNEXT(hash, elm);
2d21ac55
A
7244
7245 /*
d9a64523 7246 * Find the bucket that we're removing this elm from.
2d21ac55
A
7247 */
7248 for (; bucket != NULL; bucket = bucket->dthb_next) {
d9a64523 7249 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, elm))
2d21ac55
A
7250 break;
7251 }
7252
7253 ASSERT(bucket != NULL);
7254
7255 if (*prevp == NULL) {
7256 if (*nextp == NULL) {
7257 /*
d9a64523 7258 * The removed element was the only element on this
2d21ac55
A
7259 * bucket; we need to remove the bucket.
7260 */
7261 dtrace_hashbucket_t *b = hash->dth_tab[ndx];
7262
d9a64523 7263 ASSERT(bucket->dthb_chain == elm);
2d21ac55
A
7264 ASSERT(b != NULL);
7265
7266 if (b == bucket) {
7267 hash->dth_tab[ndx] = bucket->dthb_next;
7268 } else {
7269 while (b->dthb_next != bucket)
7270 b = b->dthb_next;
7271 b->dthb_next = bucket->dthb_next;
7272 }
7273
7274 ASSERT(hash->dth_nbuckets > 0);
7275 hash->dth_nbuckets--;
7276 kmem_free(bucket, sizeof (dtrace_hashbucket_t));
7277 return;
7278 }
7279
7280 bucket->dthb_chain = *nextp;
7281 } else {
7282 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
7283 }
7284
7285 if (*nextp != NULL)
7286 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
7287}
7288
7289/*
7290 * DTrace Utility Functions
7291 *
7292 * These are random utility functions that are _not_ called from probe context.
7293 */
7294static int
7295dtrace_badattr(const dtrace_attribute_t *a)
7296{
7297 return (a->dtat_name > DTRACE_STABILITY_MAX ||
7298 a->dtat_data > DTRACE_STABILITY_MAX ||
7299 a->dtat_class > DTRACE_CLASS_MAX);
7300}
7301
7302/*
d9a64523
A
7303 * Returns a dtrace-managed copy of a string, and will
7304 * deduplicate copies of the same string.
7305 * If the specified string is NULL, returns an empty string
2d21ac55 7306 */
b0d623f7 7307static char *
d9a64523 7308dtrace_strref(const char *str)
b0d623f7 7309{
d9a64523 7310 dtrace_string_t *s = NULL;
b0d623f7 7311 size_t bufsize = (str != NULL ? strlen(str) : 0) + 1;
b0d623f7 7312
d9a64523 7313 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
b0d623f7 7314
d9a64523
A
7315 if (str == NULL)
7316 str = "";
7317
7318 for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL;
7319 s = *(DTRACE_HASHNEXT(dtrace_strings, s))) {
7320 if (strncmp(str, s->dtst_str, bufsize) != 0) {
7321 continue;
7322 }
7323 ASSERT(s->dtst_refcount != UINT32_MAX);
7324 s->dtst_refcount++;
7325 return s->dtst_str;
7326 }
7327
7328 s = kmem_zalloc(sizeof(dtrace_string_t) + bufsize, KM_SLEEP);
7329 s->dtst_refcount = 1;
7330 (void) strlcpy(s->dtst_str, str, bufsize);
7331
7332 dtrace_hash_add(dtrace_strings, s);
7333
7334 return s->dtst_str;
7335}
7336
7337static void
7338dtrace_strunref(const char *str)
7339{
7340 ASSERT(str != NULL);
7341 dtrace_string_t *s = NULL;
7342 size_t bufsize = strlen(str) + 1;
7343
7344 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
7345
7346 for (s = dtrace_hash_lookup_string(dtrace_strings, str); s != NULL;
7347 s = *(DTRACE_HASHNEXT(dtrace_strings, s))) {
7348 if (strncmp(str, s->dtst_str, bufsize) != 0) {
7349 continue;
7350 }
7351 ASSERT(s->dtst_refcount != 0);
7352 s->dtst_refcount--;
7353 if (s->dtst_refcount == 0) {
7354 dtrace_hash_remove(dtrace_strings, s);
7355 kmem_free(s, sizeof(dtrace_string_t) + bufsize);
7356 }
7357 return;
7358 }
7359 panic("attempt to unref non-existent string %s", str);
b0d623f7 7360}
2d21ac55
A
7361
7362#define DTRACE_ISALPHA(c) \
7363 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
7364
7365static int
7366dtrace_badname(const char *s)
7367{
7368 char c;
7369
7370 if (s == NULL || (c = *s++) == '\0')
7371 return (0);
7372
7373 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
7374 return (1);
7375
7376 while ((c = *s++) != '\0') {
7377 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
7378 c != '-' && c != '_' && c != '.' && c != '`')
7379 return (1);
7380 }
7381
7382 return (0);
7383}
7384
7385static void
7386dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
7387{
7388 uint32_t priv;
7389
7390 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
39037602 7391 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) {
5ba3f43e 7392 priv = DTRACE_PRIV_USER | DTRACE_PRIV_PROC | DTRACE_PRIV_OWNER;
39037602
A
7393 }
7394 else {
7395 priv = DTRACE_PRIV_ALL;
7396 }
5ba3f43e
A
7397 *uidp = 0;
7398 *zoneidp = 0;
2d21ac55
A
7399 } else {
7400 *uidp = crgetuid(cr);
7401 *zoneidp = crgetzoneid(cr);
7402
7403 priv = 0;
7404 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
7405 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
7406 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
7407 priv |= DTRACE_PRIV_USER;
7408 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
7409 priv |= DTRACE_PRIV_PROC;
7410 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
7411 priv |= DTRACE_PRIV_OWNER;
7412 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
7413 priv |= DTRACE_PRIV_ZONEOWNER;
7414 }
7415
7416 *privp = priv;
7417}
7418
7419#ifdef DTRACE_ERRDEBUG
7420static void
7421dtrace_errdebug(const char *str)
7422{
b0d623f7 7423 int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ;
2d21ac55
A
7424 int occupied = 0;
7425
7426 lck_mtx_lock(&dtrace_errlock);
7427 dtrace_errlast = str;
b0d623f7 7428 dtrace_errthread = (kthread_t *)current_thread();
2d21ac55
A
7429
7430 while (occupied++ < DTRACE_ERRHASHSZ) {
7431 if (dtrace_errhash[hval].dter_msg == str) {
7432 dtrace_errhash[hval].dter_count++;
7433 goto out;
7434 }
7435
7436 if (dtrace_errhash[hval].dter_msg != NULL) {
7437 hval = (hval + 1) % DTRACE_ERRHASHSZ;
7438 continue;
7439 }
7440
7441 dtrace_errhash[hval].dter_msg = str;
7442 dtrace_errhash[hval].dter_count = 1;
7443 goto out;
7444 }
7445
7446 panic("dtrace: undersized error hash");
7447out:
7448 lck_mtx_unlock(&dtrace_errlock);
7449}
7450#endif
7451
7452/*
7453 * DTrace Matching Functions
7454 *
7455 * These functions are used to match groups of probes, given some elements of
7456 * a probe tuple, or some globbed expressions for elements of a probe tuple.
7457 */
7458static int
7459dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
7460 zoneid_t zoneid)
7461{
7462 if (priv != DTRACE_PRIV_ALL) {
7463 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
7464 uint32_t match = priv & ppriv;
7465
7466 /*
7467 * No PRIV_DTRACE_* privileges...
7468 */
7469 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
7470 DTRACE_PRIV_KERNEL)) == 0)
7471 return (0);
7472
7473 /*
7474 * No matching bits, but there were bits to match...
7475 */
7476 if (match == 0 && ppriv != 0)
7477 return (0);
7478
7479 /*
7480 * Need to have permissions to the process, but don't...
7481 */
7482 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
7483 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
7484 return (0);
7485 }
7486
7487 /*
7488 * Need to be in the same zone unless we possess the
7489 * privilege to examine all zones.
7490 */
7491 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
7492 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
7493 return (0);
7494 }
7495 }
7496
7497 return (1);
7498}
7499
7500/*
7501 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
7502 * consists of input pattern strings and an ops-vector to evaluate them.
7503 * This function returns >0 for match, 0 for no match, and <0 for error.
7504 */
7505static int
7506dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
7507 uint32_t priv, uid_t uid, zoneid_t zoneid)
7508{
7509 dtrace_provider_t *pvp = prp->dtpr_provider;
7510 int rv;
7511
7512 if (pvp->dtpv_defunct)
7513 return (0);
7514
7515 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
7516 return (rv);
7517
7518 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
7519 return (rv);
7520
7521 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
7522 return (rv);
7523
7524 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
7525 return (rv);
7526
7527 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
7528 return (0);
7529
7530 return (rv);
7531}
7532
7533/*
7534 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
7535 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
7536 * libc's version, the kernel version only applies to 8-bit ASCII strings.
7537 * In addition, all of the recursion cases except for '*' matching have been
7538 * unwound. For '*', we still implement recursive evaluation, but a depth
7539 * counter is maintained and matching is aborted if we recurse too deep.
7540 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
7541 */
7542static int
7543dtrace_match_glob(const char *s, const char *p, int depth)
7544{
7545 const char *olds;
7546 char s1, c;
7547 int gs;
7548
7549 if (depth > DTRACE_PROBEKEY_MAXDEPTH)
7550 return (-1);
7551
7552 if (s == NULL)
7553 s = ""; /* treat NULL as empty string */
7554
7555top:
7556 olds = s;
7557 s1 = *s++;
7558
7559 if (p == NULL)
7560 return (0);
7561
7562 if ((c = *p++) == '\0')
7563 return (s1 == '\0');
7564
7565 switch (c) {
7566 case '[': {
7567 int ok = 0, notflag = 0;
7568 char lc = '\0';
7569
7570 if (s1 == '\0')
7571 return (0);
7572
7573 if (*p == '!') {
7574 notflag = 1;
7575 p++;
7576 }
7577
7578 if ((c = *p++) == '\0')
7579 return (0);
7580
7581 do {
7582 if (c == '-' && lc != '\0' && *p != ']') {
7583 if ((c = *p++) == '\0')
7584 return (0);
7585 if (c == '\\' && (c = *p++) == '\0')
7586 return (0);
7587
7588 if (notflag) {
7589 if (s1 < lc || s1 > c)
7590 ok++;
7591 else
7592 return (0);
7593 } else if (lc <= s1 && s1 <= c)
7594 ok++;
7595
7596 } else if (c == '\\' && (c = *p++) == '\0')
7597 return (0);
7598
7599 lc = c; /* save left-hand 'c' for next iteration */
7600
7601 if (notflag) {
7602 if (s1 != c)
7603 ok++;
7604 else
7605 return (0);
7606 } else if (s1 == c)
7607 ok++;
7608
7609 if ((c = *p++) == '\0')
7610 return (0);
7611
7612 } while (c != ']');
7613
7614 if (ok)
7615 goto top;
7616
7617 return (0);
7618 }
7619
7620 case '\\':
7621 if ((c = *p++) == '\0')
7622 return (0);
7623 /*FALLTHRU*/
7624
7625 default:
7626 if (c != s1)
7627 return (0);
7628 /*FALLTHRU*/
7629
7630 case '?':
7631 if (s1 != '\0')
7632 goto top;
7633 return (0);
7634
7635 case '*':
7636 while (*p == '*')
7637 p++; /* consecutive *'s are identical to a single one */
7638
7639 if (*p == '\0')
7640 return (1);
7641
7642 for (s = olds; *s != '\0'; s++) {
7643 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
7644 return (gs);
7645 }
7646
7647 return (0);
7648 }
7649}
7650
7651/*ARGSUSED*/
7652static int
7653dtrace_match_string(const char *s, const char *p, int depth)
7654{
b0d623f7 7655#pragma unused(depth) /* __APPLE__ */
d9a64523
A
7656 return (s != NULL && s == p);
7657}
fe8ab488 7658
d9a64523
A
7659/*ARGSUSED*/
7660static int
7661dtrace_match_module(const char *s, const char *p, int depth)
7662{
7663#pragma unused(depth) /* __APPLE__ */
7664 size_t len;
7665 if (s == NULL || p == NULL)
7666 return (0);
7667
7668 len = strlen(p);
7669
7670 if (strncmp(p, s, len) != 0)
7671 return (0);
7672
7673 if (s[len] == '.' || s[len] == '\0')
7674 return (1);
7675
7676 return (0);
2d21ac55
A
7677}
7678
7679/*ARGSUSED*/
7680static int
7681dtrace_match_nul(const char *s, const char *p, int depth)
7682{
b0d623f7 7683#pragma unused(s, p, depth) /* __APPLE__ */
2d21ac55
A
7684 return (1); /* always match the empty pattern */
7685}
7686
7687/*ARGSUSED*/
7688static int
7689dtrace_match_nonzero(const char *s, const char *p, int depth)
7690{
b0d623f7 7691#pragma unused(p, depth) /* __APPLE__ */
2d21ac55
A
7692 return (s != NULL && s[0] != '\0');
7693}
7694
7695static int
7696dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
d190cdc3 7697 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *, void *), void *arg1, void *arg2)
2d21ac55 7698{
d9a64523
A
7699 dtrace_probe_t *probe;
7700 dtrace_provider_t prov_template = {
7701 .dtpv_name = (char *)(uintptr_t)pkp->dtpk_prov
7702 };
7703
7704 dtrace_probe_t template = {
7705 .dtpr_provider = &prov_template,
7706 .dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod,
7707 .dtpr_func = (char *)(uintptr_t)pkp->dtpk_func,
7708 .dtpr_name = (char *)(uintptr_t)pkp->dtpk_name
7709 };
7710
2d21ac55 7711 dtrace_hash_t *hash = NULL;
6d2010ae 7712 int len, rc, best = INT_MAX, nmatched = 0;
2d21ac55
A
7713 dtrace_id_t i;
7714
5ba3f43e 7715 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
7716
7717 /*
7718 * If the probe ID is specified in the key, just lookup by ID and
7719 * invoke the match callback once if a matching probe is found.
7720 */
7721 if (pkp->dtpk_id != DTRACE_IDNONE) {
7722 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
7723 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
d190cdc3 7724 if ((*matched)(probe, arg1, arg2) == DTRACE_MATCH_FAIL)
6d2010ae 7725 return (DTRACE_MATCH_FAIL);
2d21ac55
A
7726 nmatched++;
7727 }
7728 return (nmatched);
7729 }
7730
2d21ac55 7731 /*
d9a64523
A
7732 * We want to find the most distinct of the provider name, module name,
7733 * function name, and name. So for each one that is not a glob
7734 * pattern or empty string, we perform a lookup in the corresponding
7735 * hash and use the hash table with the fewest collisions to do our
7736 * search.
2d21ac55 7737 */
d9a64523
A
7738 if (pkp->dtpk_pmatch == &dtrace_match_string &&
7739 (len = dtrace_hash_collisions(dtrace_byprov, &template)) < best) {
7740 best = len;
7741 hash = dtrace_byprov;
7742 }
7743
2d21ac55
A
7744 if (pkp->dtpk_mmatch == &dtrace_match_string &&
7745 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
7746 best = len;
7747 hash = dtrace_bymod;
7748 }
7749
7750 if (pkp->dtpk_fmatch == &dtrace_match_string &&
7751 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
7752 best = len;
7753 hash = dtrace_byfunc;
7754 }
7755
7756 if (pkp->dtpk_nmatch == &dtrace_match_string &&
7757 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
7758 best = len;
7759 hash = dtrace_byname;
7760 }
7761
7762 /*
7763 * If we did not select a hash table, iterate over every probe and
7764 * invoke our callback for each one that matches our input probe key.
7765 */
7766 if (hash == NULL) {
b0d623f7 7767 for (i = 0; i < (dtrace_id_t)dtrace_nprobes; i++) {
2d21ac55
A
7768 if ((probe = dtrace_probes[i]) == NULL ||
7769 dtrace_match_probe(probe, pkp, priv, uid,
7770 zoneid) <= 0)
7771 continue;
7772
7773 nmatched++;
7774
d190cdc3 7775 if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) {
6d2010ae
A
7776 if (rc == DTRACE_MATCH_FAIL)
7777 return (DTRACE_MATCH_FAIL);
7778 break;
7779 }
2d21ac55
A
7780 }
7781
7782 return (nmatched);
7783 }
7784
7785 /*
7786 * If we selected a hash table, iterate over each probe of the same key
7787 * name and invoke the callback for every probe that matches the other
7788 * attributes of our input probe key.
7789 */
7790 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
7791 probe = *(DTRACE_HASHNEXT(hash, probe))) {
7792
7793 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
7794 continue;
7795
7796 nmatched++;
7797
d190cdc3 7798 if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) {
6d2010ae
A
7799 if (rc == DTRACE_MATCH_FAIL)
7800 return (DTRACE_MATCH_FAIL);
7801 break;
7802 }
2d21ac55
A
7803 }
7804
7805 return (nmatched);
7806}
7807
7808/*
7809 * Return the function pointer dtrace_probecmp() should use to compare the
7810 * specified pattern with a string. For NULL or empty patterns, we select
7811 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7812 * For non-empty non-glob strings, we use dtrace_match_string().
7813 */
7814static dtrace_probekey_f *
7815dtrace_probekey_func(const char *p)
7816{
7817 char c;
7818
7819 if (p == NULL || *p == '\0')
7820 return (&dtrace_match_nul);
7821
7822 while ((c = *p++) != '\0') {
7823 if (c == '[' || c == '?' || c == '*' || c == '\\')
7824 return (&dtrace_match_glob);
7825 }
7826
7827 return (&dtrace_match_string);
7828}
7829
d9a64523
A
7830static dtrace_probekey_f *
7831dtrace_probekey_module_func(const char *p)
7832{
7833 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
7834
7835 dtrace_probekey_f *f = dtrace_probekey_func(p);
7836 if (f == &dtrace_match_string) {
7837 dtrace_probe_t template = {
7838 .dtpr_mod = (char *)(uintptr_t)p,
7839 };
7840 if (dtrace_hash_lookup(dtrace_bymod, &template) == NULL) {
7841 return (&dtrace_match_module);
7842 }
7843 return (&dtrace_match_string);
7844 }
7845 return f;
7846}
7847
2d21ac55
A
7848/*
7849 * Build a probe comparison key for use with dtrace_match_probe() from the
7850 * given probe description. By convention, a null key only matches anchored
7851 * probes: if each field is the empty string, reset dtpk_fmatch to
7852 * dtrace_match_nonzero().
7853 */
7854static void
7855dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
7856{
d9a64523
A
7857
7858 pkp->dtpk_prov = dtrace_strref(pdp->dtpd_provider);
2d21ac55
A
7859 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
7860
d9a64523
A
7861 pkp->dtpk_mod = dtrace_strref(pdp->dtpd_mod);
7862 pkp->dtpk_mmatch = dtrace_probekey_module_func(pdp->dtpd_mod);
2d21ac55 7863
d9a64523 7864 pkp->dtpk_func = dtrace_strref(pdp->dtpd_func);
2d21ac55
A
7865 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
7866
d9a64523 7867 pkp->dtpk_name = dtrace_strref(pdp->dtpd_name);
2d21ac55
A
7868 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
7869
7870 pkp->dtpk_id = pdp->dtpd_id;
7871
7872 if (pkp->dtpk_id == DTRACE_IDNONE &&
7873 pkp->dtpk_pmatch == &dtrace_match_nul &&
7874 pkp->dtpk_mmatch == &dtrace_match_nul &&
7875 pkp->dtpk_fmatch == &dtrace_match_nul &&
7876 pkp->dtpk_nmatch == &dtrace_match_nul)
7877 pkp->dtpk_fmatch = &dtrace_match_nonzero;
7878}
7879
d9a64523
A
7880static void
7881dtrace_probekey_release(dtrace_probekey_t *pkp)
7882{
7883 dtrace_strunref(pkp->dtpk_prov);
7884 dtrace_strunref(pkp->dtpk_mod);
7885 dtrace_strunref(pkp->dtpk_func);
7886 dtrace_strunref(pkp->dtpk_name);
7887}
7888
39037602
A
7889static int
7890dtrace_cond_provider_match(dtrace_probedesc_t *desc, void *data)
7891{
7892 if (desc == NULL)
7893 return 1;
7894
7895 dtrace_probekey_f *func = dtrace_probekey_func(desc->dtpd_provider);
7896
813fb2f6 7897 return func((char*)data, desc->dtpd_provider, 0);
39037602
A
7898}
7899
2d21ac55
A
7900/*
7901 * DTrace Provider-to-Framework API Functions
7902 *
7903 * These functions implement much of the Provider-to-Framework API, as
7904 * described in <sys/dtrace.h>. The parts of the API not in this section are
7905 * the functions in the API for probe management (found below), and
7906 * dtrace_probe() itself (found above).
7907 */
7908
7909/*
7910 * Register the calling provider with the DTrace framework. This should
7911 * generally be called by DTrace providers in their attach(9E) entry point.
7912 */
7913int
7914dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
7915 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
7916{
7917 dtrace_provider_t *provider;
7918
7919 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
7920 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7921 "arguments", name ? name : "<NULL>");
7922 return (EINVAL);
7923 }
7924
7925 if (name[0] == '\0' || dtrace_badname(name)) {
7926 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7927 "provider name", name);
7928 return (EINVAL);
7929 }
7930
7931 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
7932 pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
7933 pops->dtps_destroy == NULL ||
7934 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
7935 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7936 "provider ops", name);
7937 return (EINVAL);
7938 }
7939
7940 if (dtrace_badattr(&pap->dtpa_provider) ||
7941 dtrace_badattr(&pap->dtpa_mod) ||
7942 dtrace_badattr(&pap->dtpa_func) ||
7943 dtrace_badattr(&pap->dtpa_name) ||
7944 dtrace_badattr(&pap->dtpa_args)) {
7945 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7946 "provider attributes", name);
7947 return (EINVAL);
7948 }
7949
7950 if (priv & ~DTRACE_PRIV_ALL) {
7951 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7952 "privilege attributes", name);
7953 return (EINVAL);
7954 }
7955
7956 if ((priv & DTRACE_PRIV_KERNEL) &&
7957 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
7958 pops->dtps_usermode == NULL) {
7959 cmn_err(CE_WARN, "failed to register provider '%s': need "
7960 "dtps_usermode() op for given privilege attributes", name);
7961 return (EINVAL);
7962 }
7963
7964 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
fe8ab488 7965
2d21ac55
A
7966 provider->dtpv_attr = *pap;
7967 provider->dtpv_priv.dtpp_flags = priv;
7968 if (cr != NULL) {
7969 provider->dtpv_priv.dtpp_uid = crgetuid(cr);
7970 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
7971 }
7972 provider->dtpv_pops = *pops;
7973
7974 if (pops->dtps_provide == NULL) {
7975 ASSERT(pops->dtps_provide_module != NULL);
0a7de745 7976 provider->dtpv_pops.dtps_provide = dtrace_provide_nullop;
2d21ac55
A
7977 }
7978
7979 if (pops->dtps_provide_module == NULL) {
7980 ASSERT(pops->dtps_provide != NULL);
7981 provider->dtpv_pops.dtps_provide_module =
0a7de745 7982 dtrace_provide_module_nullop;
2d21ac55
A
7983 }
7984
7985 if (pops->dtps_suspend == NULL) {
7986 ASSERT(pops->dtps_resume == NULL);
0a7de745
A
7987 provider->dtpv_pops.dtps_suspend = dtrace_suspend_nullop;
7988 provider->dtpv_pops.dtps_resume = dtrace_resume_nullop;
2d21ac55
A
7989 }
7990
7991 provider->dtpv_arg = arg;
7992 *idp = (dtrace_provider_id_t)provider;
7993
7994 if (pops == &dtrace_provider_ops) {
5ba3f43e
A
7995 LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
7996 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
d9a64523
A
7997
7998 provider->dtpv_name = dtrace_strref(name);
7999
2d21ac55
A
8000 ASSERT(dtrace_anon.dta_enabling == NULL);
8001
8002 /*
8003 * We make sure that the DTrace provider is at the head of
8004 * the provider chain.
8005 */
8006 provider->dtpv_next = dtrace_provider;
8007 dtrace_provider = provider;
8008 return (0);
8009 }
8010
8011 lck_mtx_lock(&dtrace_provider_lock);
8012 lck_mtx_lock(&dtrace_lock);
8013
d9a64523
A
8014 provider->dtpv_name = dtrace_strref(name);
8015
2d21ac55
A
8016 /*
8017 * If there is at least one provider registered, we'll add this
8018 * provider after the first provider.
8019 */
8020 if (dtrace_provider != NULL) {
8021 provider->dtpv_next = dtrace_provider->dtpv_next;
8022 dtrace_provider->dtpv_next = provider;
8023 } else {
8024 dtrace_provider = provider;
8025 }
8026
8027 if (dtrace_retained != NULL) {
8028 dtrace_enabling_provide(provider);
8029
8030 /*
39037602
A
8031 * Now we need to call dtrace_enabling_matchall_with_cond() --
8032 * with a condition matching the provider name we just added,
8033 * which will acquire cpu_lock and dtrace_lock. We therefore need
2d21ac55
A
8034 * to drop all of our locks before calling into it...
8035 */
8036 lck_mtx_unlock(&dtrace_lock);
8037 lck_mtx_unlock(&dtrace_provider_lock);
39037602
A
8038
8039 dtrace_match_cond_t cond = {dtrace_cond_provider_match, provider->dtpv_name};
8040 dtrace_enabling_matchall_with_cond(&cond);
2d21ac55
A
8041
8042 return (0);
8043 }
8044
8045 lck_mtx_unlock(&dtrace_lock);
8046 lck_mtx_unlock(&dtrace_provider_lock);
8047
8048 return (0);
8049}
8050
8051/*
8052 * Unregister the specified provider from the DTrace framework. This should
8053 * generally be called by DTrace providers in their detach(9E) entry point.
8054 */
8055int
8056dtrace_unregister(dtrace_provider_id_t id)
8057{
8058 dtrace_provider_t *old = (dtrace_provider_t *)id;
8059 dtrace_provider_t *prev = NULL;
d9a64523
A
8060 int self = 0;
8061 dtrace_probe_t *probe, *first = NULL, *next = NULL;
8062 dtrace_probe_t template = {
8063 .dtpr_provider = old
8064 };
2d21ac55
A
8065
8066 if (old->dtpv_pops.dtps_enable ==
6d2010ae 8067 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
2d21ac55
A
8068 /*
8069 * If DTrace itself is the provider, we're called with locks
8070 * already held.
8071 */
8072 ASSERT(old == dtrace_provider);
8073 ASSERT(dtrace_devi != NULL);
5ba3f43e
A
8074 LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
8075 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
8076 self = 1;
8077
8078 if (dtrace_provider->dtpv_next != NULL) {
8079 /*
8080 * There's another provider here; return failure.
8081 */
8082 return (EBUSY);
8083 }
8084 } else {
8085 lck_mtx_lock(&dtrace_provider_lock);
8086 lck_mtx_lock(&mod_lock);
8087 lck_mtx_lock(&dtrace_lock);
8088 }
8089
8090 /*
8091 * If anyone has /dev/dtrace open, or if there are anonymous enabled
8092 * probes, we refuse to let providers slither away, unless this
8093 * provider has already been explicitly invalidated.
8094 */
8095 if (!old->dtpv_defunct &&
8096 (dtrace_opens || (dtrace_anon.dta_state != NULL &&
8097 dtrace_anon.dta_state->dts_necbs > 0))) {
8098 if (!self) {
8099 lck_mtx_unlock(&dtrace_lock);
8100 lck_mtx_unlock(&mod_lock);
8101 lck_mtx_unlock(&dtrace_provider_lock);
8102 }
8103 return (EBUSY);
8104 }
8105
8106 /*
8107 * Attempt to destroy the probes associated with this provider.
8108 */
fe8ab488 8109 if (old->dtpv_ecb_count!=0) {
2d21ac55
A
8110 /*
8111 * We have at least one ECB; we can't remove this provider.
8112 */
8113 if (!self) {
8114 lck_mtx_unlock(&dtrace_lock);
8115 lck_mtx_unlock(&mod_lock);
8116 lck_mtx_unlock(&dtrace_provider_lock);
8117 }
8118 return (EBUSY);
8119 }
8120
8121 /*
8122 * All of the probes for this provider are disabled; we can safely
8123 * remove all of them from their hash chains and from the probe array.
8124 */
d9a64523
A
8125 for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL;
8126 probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) {
2d21ac55
A
8127 if (probe->dtpr_provider != old)
8128 continue;
8129
d9a64523 8130 dtrace_probes[probe->dtpr_id - 1] = NULL;
fe8ab488 8131 old->dtpv_probe_count--;
2d21ac55
A
8132
8133 dtrace_hash_remove(dtrace_bymod, probe);
8134 dtrace_hash_remove(dtrace_byfunc, probe);
8135 dtrace_hash_remove(dtrace_byname, probe);
8136
8137 if (first == NULL) {
8138 first = probe;
8139 probe->dtpr_nextmod = NULL;
8140 } else {
d9a64523
A
8141 /*
8142 * Use nextmod as the chain of probes to remove
8143 */
2d21ac55
A
8144 probe->dtpr_nextmod = first;
8145 first = probe;
8146 }
8147 }
8148
d9a64523
A
8149 for (probe = first; probe != NULL; probe = next) {
8150 next = probe->dtpr_nextmod;
8151 dtrace_hash_remove(dtrace_byprov, probe);
8152 }
8153
2d21ac55
A
8154 /*
8155 * The provider's probes have been removed from the hash chains and
8156 * from the probe array. Now issue a dtrace_sync() to be sure that
8157 * everyone has cleared out from any probe array processing.
8158 */
8159 dtrace_sync();
8160
d9a64523
A
8161 for (probe = first; probe != NULL; probe = next) {
8162 next = probe->dtpr_nextmod;
2d21ac55
A
8163
8164 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
8165 probe->dtpr_arg);
d9a64523
A
8166 dtrace_strunref(probe->dtpr_mod);
8167 dtrace_strunref(probe->dtpr_func);
8168 dtrace_strunref(probe->dtpr_name);
2d21ac55 8169 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
2d21ac55 8170 zfree(dtrace_probe_t_zone, probe);
2d21ac55
A
8171 }
8172
8173 if ((prev = dtrace_provider) == old) {
8174 ASSERT(self || dtrace_devi == NULL);
8175 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
8176 dtrace_provider = old->dtpv_next;
8177 } else {
8178 while (prev != NULL && prev->dtpv_next != old)
8179 prev = prev->dtpv_next;
8180
8181 if (prev == NULL) {
8182 panic("attempt to unregister non-existent "
8183 "dtrace provider %p\n", (void *)id);
8184 }
8185
8186 prev->dtpv_next = old->dtpv_next;
8187 }
8188
d9a64523
A
8189 dtrace_strunref(old->dtpv_name);
8190
2d21ac55
A
8191 if (!self) {
8192 lck_mtx_unlock(&dtrace_lock);
8193 lck_mtx_unlock(&mod_lock);
8194 lck_mtx_unlock(&dtrace_provider_lock);
8195 }
8196
2d21ac55
A
8197 kmem_free(old, sizeof (dtrace_provider_t));
8198
8199 return (0);
8200}
8201
8202/*
8203 * Invalidate the specified provider. All subsequent probe lookups for the
8204 * specified provider will fail, but its probes will not be removed.
8205 */
8206void
8207dtrace_invalidate(dtrace_provider_id_t id)
8208{
8209 dtrace_provider_t *pvp = (dtrace_provider_t *)id;
8210
8211 ASSERT(pvp->dtpv_pops.dtps_enable !=
6d2010ae 8212 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
2d21ac55
A
8213
8214 lck_mtx_lock(&dtrace_provider_lock);
8215 lck_mtx_lock(&dtrace_lock);
8216
8217 pvp->dtpv_defunct = 1;
8218
8219 lck_mtx_unlock(&dtrace_lock);
8220 lck_mtx_unlock(&dtrace_provider_lock);
8221}
8222
8223/*
8224 * Indicate whether or not DTrace has attached.
8225 */
8226int
8227dtrace_attached(void)
8228{
8229 /*
8230 * dtrace_provider will be non-NULL iff the DTrace driver has
8231 * attached. (It's non-NULL because DTrace is always itself a
8232 * provider.)
8233 */
8234 return (dtrace_provider != NULL);
8235}
8236
8237/*
8238 * Remove all the unenabled probes for the given provider. This function is
8239 * not unlike dtrace_unregister(), except that it doesn't remove the provider
8240 * -- just as many of its associated probes as it can.
8241 */
8242int
8243dtrace_condense(dtrace_provider_id_t id)
8244{
8245 dtrace_provider_t *prov = (dtrace_provider_t *)id;
d9a64523
A
8246 dtrace_probe_t *probe, *first = NULL;
8247 dtrace_probe_t template = {
8248 .dtpr_provider = prov
8249 };
2d21ac55
A
8250
8251 /*
8252 * Make sure this isn't the dtrace provider itself.
8253 */
8254 ASSERT(prov->dtpv_pops.dtps_enable !=
6d2010ae 8255 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
2d21ac55
A
8256
8257 lck_mtx_lock(&dtrace_provider_lock);
8258 lck_mtx_lock(&dtrace_lock);
8259
8260 /*
8261 * Attempt to destroy the probes associated with this provider.
8262 */
d9a64523
A
8263 for (probe = dtrace_hash_lookup(dtrace_byprov, &template); probe != NULL;
8264 probe = *(DTRACE_HASHNEXT(dtrace_byprov, probe))) {
2d21ac55
A
8265
8266 if (probe->dtpr_provider != prov)
8267 continue;
8268
8269 if (probe->dtpr_ecb != NULL)
8270 continue;
8271
d9a64523 8272 dtrace_probes[probe->dtpr_id - 1] = NULL;
fe8ab488 8273 prov->dtpv_probe_count--;
2d21ac55
A
8274
8275 dtrace_hash_remove(dtrace_bymod, probe);
8276 dtrace_hash_remove(dtrace_byfunc, probe);
8277 dtrace_hash_remove(dtrace_byname, probe);
8278
d9a64523 8279 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
2d21ac55 8280 probe->dtpr_arg);
d9a64523
A
8281 dtrace_strunref(probe->dtpr_mod);
8282 dtrace_strunref(probe->dtpr_func);
8283 dtrace_strunref(probe->dtpr_name);
8284 if (first == NULL) {
8285 first = probe;
8286 probe->dtpr_nextmod = NULL;
8287 } else {
8288 /*
8289 * Use nextmod as the chain of probes to remove
8290 */
8291 probe->dtpr_nextmod = first;
8292 first = probe;
8293 }
8294 }
8295
8296 for (probe = first; probe != NULL; probe = first) {
8297 first = probe->dtpr_nextmod;
8298 dtrace_hash_remove(dtrace_byprov, probe);
8299 vmem_free(dtrace_arena, (void *)((uintptr_t)probe->dtpr_id), 1);
2d21ac55 8300 zfree(dtrace_probe_t_zone, probe);
2d21ac55
A
8301 }
8302
8303 lck_mtx_unlock(&dtrace_lock);
8304 lck_mtx_unlock(&dtrace_provider_lock);
8305
8306 return (0);
8307}
8308
8309/*
8310 * DTrace Probe Management Functions
8311 *
8312 * The functions in this section perform the DTrace probe management,
8313 * including functions to create probes, look-up probes, and call into the
8314 * providers to request that probes be provided. Some of these functions are
8315 * in the Provider-to-Framework API; these functions can be identified by the
8316 * fact that they are not declared "static".
8317 */
8318
8319/*
8320 * Create a probe with the specified module name, function name, and name.
8321 */
8322dtrace_id_t
8323dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
8324 const char *func, const char *name, int aframes, void *arg)
8325{
8326 dtrace_probe_t *probe, **probes;
8327 dtrace_provider_t *provider = (dtrace_provider_t *)prov;
8328 dtrace_id_t id;
8329
8330 if (provider == dtrace_provider) {
5ba3f43e 8331 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
8332 } else {
8333 lck_mtx_lock(&dtrace_lock);
8334 }
8335
8336 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
8337 VM_BESTFIT | VM_SLEEP);
fe8ab488 8338
2d21ac55
A
8339 probe = zalloc(dtrace_probe_t_zone);
8340 bzero(probe, sizeof (dtrace_probe_t));
2d21ac55
A
8341
8342 probe->dtpr_id = id;
8343 probe->dtpr_gen = dtrace_probegen++;
d9a64523
A
8344 probe->dtpr_mod = dtrace_strref(mod);
8345 probe->dtpr_func = dtrace_strref(func);
8346 probe->dtpr_name = dtrace_strref(name);
2d21ac55
A
8347 probe->dtpr_arg = arg;
8348 probe->dtpr_aframes = aframes;
8349 probe->dtpr_provider = provider;
8350
d9a64523 8351 dtrace_hash_add(dtrace_byprov, probe);
2d21ac55
A
8352 dtrace_hash_add(dtrace_bymod, probe);
8353 dtrace_hash_add(dtrace_byfunc, probe);
8354 dtrace_hash_add(dtrace_byname, probe);
8355
b0d623f7 8356 if (id - 1 >= (dtrace_id_t)dtrace_nprobes) {
2d21ac55
A
8357 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
8358 size_t nsize = osize << 1;
8359
8360 if (nsize == 0) {
8361 ASSERT(osize == 0);
8362 ASSERT(dtrace_probes == NULL);
8363 nsize = sizeof (dtrace_probe_t *);
8364 }
8365
8366 probes = kmem_zalloc(nsize, KM_SLEEP);
8367
8368 if (dtrace_probes == NULL) {
8369 ASSERT(osize == 0);
8370 dtrace_probes = probes;
8371 dtrace_nprobes = 1;
8372 } else {
8373 dtrace_probe_t **oprobes = dtrace_probes;
8374
8375 bcopy(oprobes, probes, osize);
8376 dtrace_membar_producer();
8377 dtrace_probes = probes;
8378
8379 dtrace_sync();
8380
8381 /*
8382 * All CPUs are now seeing the new probes array; we can
8383 * safely free the old array.
8384 */
8385 kmem_free(oprobes, osize);
8386 dtrace_nprobes <<= 1;
8387 }
8388
b0d623f7 8389 ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes);
2d21ac55
A
8390 }
8391
8392 ASSERT(dtrace_probes[id - 1] == NULL);
8393 dtrace_probes[id - 1] = probe;
fe8ab488 8394 provider->dtpv_probe_count++;
2d21ac55
A
8395
8396 if (provider != dtrace_provider)
8397 lck_mtx_unlock(&dtrace_lock);
8398
8399 return (id);
8400}
8401
8402static dtrace_probe_t *
8403dtrace_probe_lookup_id(dtrace_id_t id)
8404{
5ba3f43e 8405 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 8406
b0d623f7
A
8407 if (id == 0 || id > (dtrace_id_t)dtrace_nprobes)
8408 return (NULL);
2d21ac55
A
8409
8410 return (dtrace_probes[id - 1]);
8411}
8412
8413static int
d190cdc3 8414dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg1, void *arg2)
2d21ac55 8415{
d190cdc3
A
8416#pragma unused(arg2)
8417 *((dtrace_id_t *)arg1) = probe->dtpr_id;
2d21ac55
A
8418
8419 return (DTRACE_MATCH_DONE);
8420}
8421
8422/*
8423 * Look up a probe based on provider and one or more of module name, function
8424 * name and probe name.
8425 */
8426dtrace_id_t
8427dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
8428 const char *func, const char *name)
8429{
8430 dtrace_probekey_t pkey;
8431 dtrace_id_t id;
8432 int match;
8433
d9a64523
A
8434 lck_mtx_lock(&dtrace_lock);
8435
8436 pkey.dtpk_prov = dtrace_strref(((dtrace_provider_t *)prid)->dtpv_name);
2d21ac55 8437 pkey.dtpk_pmatch = &dtrace_match_string;
d9a64523 8438 pkey.dtpk_mod = dtrace_strref(mod);
2d21ac55 8439 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
d9a64523 8440 pkey.dtpk_func = dtrace_strref(func);
2d21ac55 8441 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
d9a64523 8442 pkey.dtpk_name = dtrace_strref(name);
2d21ac55
A
8443 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
8444 pkey.dtpk_id = DTRACE_IDNONE;
8445
2d21ac55 8446 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
d190cdc3 8447 dtrace_probe_lookup_match, &id, NULL);
d9a64523
A
8448
8449 dtrace_probekey_release(&pkey);
8450
2d21ac55
A
8451 lck_mtx_unlock(&dtrace_lock);
8452
8453 ASSERT(match == 1 || match == 0);
8454 return (match ? id : 0);
8455}
8456
8457/*
8458 * Returns the probe argument associated with the specified probe.
8459 */
8460void *
8461dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
8462{
8463 dtrace_probe_t *probe;
8464 void *rval = NULL;
8465
8466 lck_mtx_lock(&dtrace_lock);
8467
8468 if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
8469 probe->dtpr_provider == (dtrace_provider_t *)id)
8470 rval = probe->dtpr_arg;
8471
8472 lck_mtx_unlock(&dtrace_lock);
8473
8474 return (rval);
8475}
8476
8477/*
8478 * Copy a probe into a probe description.
8479 */
8480static void
8481dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
8482{
8483 bzero(pdp, sizeof (dtrace_probedesc_t));
8484 pdp->dtpd_id = prp->dtpr_id;
8485
fe8ab488 8486 /* APPLE NOTE: Darwin employs size bounded string operation. */
2d21ac55
A
8487 (void) strlcpy(pdp->dtpd_provider,
8488 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN);
8489
8490 (void) strlcpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN);
8491 (void) strlcpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN);
8492 (void) strlcpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN);
8493}
8494
8495/*
8496 * Called to indicate that a probe -- or probes -- should be provided by a
8497 * specfied provider. If the specified description is NULL, the provider will
8498 * be told to provide all of its probes. (This is done whenever a new
8499 * consumer comes along, or whenever a retained enabling is to be matched.) If
8500 * the specified description is non-NULL, the provider is given the
8501 * opportunity to dynamically provide the specified probe, allowing providers
8502 * to support the creation of probes on-the-fly. (So-called _autocreated_
8503 * probes.) If the provider is NULL, the operations will be applied to all
8504 * providers; if the provider is non-NULL the operations will only be applied
8505 * to the specified provider. The dtrace_provider_lock must be held, and the
8506 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
8507 * will need to grab the dtrace_lock when it reenters the framework through
8508 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
8509 */
8510static void
8511dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
8512{
8513 struct modctl *ctl;
8514 int all = 0;
8515
5ba3f43e 8516 LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
8517
8518 if (prv == NULL) {
8519 all = 1;
8520 prv = dtrace_provider;
8521 }
6d2010ae 8522
2d21ac55 8523 do {
2d21ac55
A
8524 /*
8525 * First, call the blanket provide operation.
8526 */
8527 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
6d2010ae 8528
2d21ac55
A
8529 /*
8530 * Now call the per-module provide operation. We will grab
8531 * mod_lock to prevent the list from being modified. Note
8532 * that this also prevents the mod_busy bits from changing.
8533 * (mod_busy can only be changed with mod_lock held.)
8534 */
6d2010ae
A
8535 lck_mtx_lock(&mod_lock);
8536
6d2010ae
A
8537 ctl = dtrace_modctl_list;
8538 while (ctl) {
8539 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
8540 ctl = ctl->mod_next;
2d21ac55 8541 }
6d2010ae
A
8542
8543 lck_mtx_unlock(&mod_lock);
2d21ac55
A
8544 } while (all && (prv = prv->dtpv_next) != NULL);
8545}
8546
8547/*
8548 * Iterate over each probe, and call the Framework-to-Provider API function
8549 * denoted by offs.
8550 */
8551static void
8552dtrace_probe_foreach(uintptr_t offs)
8553{
8554 dtrace_provider_t *prov;
8555 void (*func)(void *, dtrace_id_t, void *);
8556 dtrace_probe_t *probe;
8557 dtrace_icookie_t cookie;
8558 int i;
8559
8560 /*
8561 * We disable interrupts to walk through the probe array. This is
8562 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
8563 * won't see stale data.
8564 */
8565 cookie = dtrace_interrupt_disable();
8566
8567 for (i = 0; i < dtrace_nprobes; i++) {
8568 if ((probe = dtrace_probes[i]) == NULL)
8569 continue;
8570
8571 if (probe->dtpr_ecb == NULL) {
8572 /*
8573 * This probe isn't enabled -- don't call the function.
8574 */
8575 continue;
8576 }
8577
8578 prov = probe->dtpr_provider;
8579 func = *((void(**)(void *, dtrace_id_t, void *))
8580 ((uintptr_t)&prov->dtpv_pops + offs));
8581
8582 func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
8583 }
8584
8585 dtrace_interrupt_enable(cookie);
8586}
8587
8588static int
d190cdc3 8589dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab, dtrace_ecbdesc_t *ep)
2d21ac55
A
8590{
8591 dtrace_probekey_t pkey;
8592 uint32_t priv;
8593 uid_t uid;
8594 zoneid_t zoneid;
d9a64523 8595 int err;
2d21ac55 8596
5ba3f43e 8597 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
8598
8599 dtrace_ecb_create_cache = NULL;
8600
8601 if (desc == NULL) {
8602 /*
8603 * If we're passed a NULL description, we're being asked to
8604 * create an ECB with a NULL probe.
8605 */
d190cdc3 8606 (void) dtrace_ecb_create_enable(NULL, enab, ep);
2d21ac55
A
8607 return (0);
8608 }
8609
8610 dtrace_probekey(desc, &pkey);
8611 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
8612 &priv, &uid, &zoneid);
8613
d9a64523
A
8614 err = dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable, enab, ep);
8615
8616 dtrace_probekey_release(&pkey);
8617
8618 return err;
2d21ac55
A
8619}
8620
8621/*
8622 * DTrace Helper Provider Functions
8623 */
8624static void
8625dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
8626{
8627 attr->dtat_name = DOF_ATTR_NAME(dofattr);
8628 attr->dtat_data = DOF_ATTR_DATA(dofattr);
8629 attr->dtat_class = DOF_ATTR_CLASS(dofattr);
8630}
8631
8632static void
8633dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
8634 const dof_provider_t *dofprov, char *strtab)
8635{
8636 hprov->dthpv_provname = strtab + dofprov->dofpv_name;
8637 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
8638 dofprov->dofpv_provattr);
8639 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
8640 dofprov->dofpv_modattr);
8641 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
8642 dofprov->dofpv_funcattr);
8643 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
8644 dofprov->dofpv_nameattr);
8645 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
8646 dofprov->dofpv_argsattr);
8647}
8648
8649static void
d190cdc3 8650dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p)
2d21ac55
A
8651{
8652 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8653 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8654 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
8655 dof_provider_t *provider;
8656 dof_probe_t *probe;
8657 uint32_t *off, *enoff;
8658 uint8_t *arg;
8659 char *strtab;
8660 uint_t i, nprobes;
8661 dtrace_helper_provdesc_t dhpv;
8662 dtrace_helper_probedesc_t dhpb;
8663 dtrace_meta_t *meta = dtrace_meta_pid;
8664 dtrace_mops_t *mops = &meta->dtm_mops;
8665 void *parg;
8666
8667 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8668 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8669 provider->dofpv_strtab * dof->dofh_secsize);
8670 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8671 provider->dofpv_probes * dof->dofh_secsize);
8672 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8673 provider->dofpv_prargs * dof->dofh_secsize);
8674 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8675 provider->dofpv_proffs * dof->dofh_secsize);
8676
8677 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8678 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
8679 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
8680 enoff = NULL;
8681
8682 /*
8683 * See dtrace_helper_provider_validate().
8684 */
8685 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
8686 provider->dofpv_prenoffs != DOF_SECT_NONE) {
8687 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8688 provider->dofpv_prenoffs * dof->dofh_secsize);
8689 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
8690 }
8691
8692 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
8693
8694 /*
8695 * Create the provider.
8696 */
8697 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8698
d190cdc3 8699 if ((parg = mops->dtms_provide_proc(meta->dtm_arg, &dhpv, p)) == NULL)
2d21ac55
A
8700 return;
8701
8702 meta->dtm_count++;
8703
8704 /*
8705 * Create the probes.
8706 */
8707 for (i = 0; i < nprobes; i++) {
8708 probe = (dof_probe_t *)(uintptr_t)(daddr +
8709 prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
8710
8711 dhpb.dthpb_mod = dhp->dofhp_mod;
8712 dhpb.dthpb_func = strtab + probe->dofpr_func;
8713 dhpb.dthpb_name = strtab + probe->dofpr_name;
b0d623f7 8714#if !defined(__APPLE__)
2d21ac55 8715 dhpb.dthpb_base = probe->dofpr_addr;
b0d623f7
A
8716#else
8717 dhpb.dthpb_base = dhp->dofhp_addr; /* FIXME: James, why? */
2d21ac55 8718#endif
b0d623f7 8719 dhpb.dthpb_offs = (int32_t *)(off + probe->dofpr_offidx);
2d21ac55
A
8720 dhpb.dthpb_noffs = probe->dofpr_noffs;
8721 if (enoff != NULL) {
b0d623f7 8722 dhpb.dthpb_enoffs = (int32_t *)(enoff + probe->dofpr_enoffidx);
2d21ac55
A
8723 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
8724 } else {
8725 dhpb.dthpb_enoffs = NULL;
8726 dhpb.dthpb_nenoffs = 0;
8727 }
8728 dhpb.dthpb_args = arg + probe->dofpr_argidx;
8729 dhpb.dthpb_nargc = probe->dofpr_nargc;
8730 dhpb.dthpb_xargc = probe->dofpr_xargc;
8731 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
8732 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
8733
8734 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
8735 }
39037602
A
8736
8737 /*
8738 * Since we just created probes, we need to match our enablings
8739 * against those, with a precondition knowing that we have only
8740 * added probes from this provider
8741 */
8742 char *prov_name = mops->dtms_provider_name(parg);
8743 ASSERT(prov_name != NULL);
8744 dtrace_match_cond_t cond = {dtrace_cond_provider_match, (void*)prov_name};
8745
8746 dtrace_enabling_matchall_with_cond(&cond);
2d21ac55
A
8747}
8748
8749static void
d190cdc3 8750dtrace_helper_provide(dof_helper_t *dhp, proc_t *p)
2d21ac55
A
8751{
8752 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8753 dof_hdr_t *dof = (dof_hdr_t *)daddr;
b0d623f7 8754 uint32_t i;
2d21ac55 8755
5ba3f43e 8756 LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
8757
8758 for (i = 0; i < dof->dofh_secnum; i++) {
8759 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8760 dof->dofh_secoff + i * dof->dofh_secsize);
8761
8762 if (sec->dofs_type != DOF_SECT_PROVIDER)
8763 continue;
8764
d190cdc3 8765 dtrace_helper_provide_one(dhp, sec, p);
2d21ac55 8766 }
2d21ac55
A
8767}
8768
8769static void
d190cdc3 8770dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p)
2d21ac55
A
8771{
8772 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8773 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8774 dof_sec_t *str_sec;
8775 dof_provider_t *provider;
8776 char *strtab;
8777 dtrace_helper_provdesc_t dhpv;
8778 dtrace_meta_t *meta = dtrace_meta_pid;
8779 dtrace_mops_t *mops = &meta->dtm_mops;
8780
8781 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8782 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8783 provider->dofpv_strtab * dof->dofh_secsize);
8784
8785 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8786
8787 /*
8788 * Create the provider.
8789 */
8790 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8791
d190cdc3 8792 mops->dtms_remove_proc(meta->dtm_arg, &dhpv, p);
2d21ac55
A
8793
8794 meta->dtm_count--;
8795}
8796
8797static void
d190cdc3 8798dtrace_helper_provider_remove(dof_helper_t *dhp, proc_t *p)
2d21ac55
A
8799{
8800 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8801 dof_hdr_t *dof = (dof_hdr_t *)daddr;
b0d623f7 8802 uint32_t i;
2d21ac55 8803
5ba3f43e 8804 LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
8805
8806 for (i = 0; i < dof->dofh_secnum; i++) {
8807 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8808 dof->dofh_secoff + i * dof->dofh_secsize);
8809
8810 if (sec->dofs_type != DOF_SECT_PROVIDER)
8811 continue;
8812
d190cdc3 8813 dtrace_helper_provider_remove_one(dhp, sec, p);
2d21ac55
A
8814 }
8815}
8816
8817/*
8818 * DTrace Meta Provider-to-Framework API Functions
8819 *
8820 * These functions implement the Meta Provider-to-Framework API, as described
8821 * in <sys/dtrace.h>.
8822 */
8823int
8824dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
8825 dtrace_meta_provider_id_t *idp)
8826{
8827 dtrace_meta_t *meta;
8828 dtrace_helpers_t *help, *next;
b0d623f7 8829 uint_t i;
2d21ac55
A
8830
8831 *idp = DTRACE_METAPROVNONE;
8832
8833 /*
8834 * We strictly don't need the name, but we hold onto it for
8835 * debuggability. All hail error queues!
8836 */
8837 if (name == NULL) {
8838 cmn_err(CE_WARN, "failed to register meta-provider: "
8839 "invalid name");
8840 return (EINVAL);
8841 }
8842
8843 if (mops == NULL ||
8844 mops->dtms_create_probe == NULL ||
d190cdc3
A
8845 mops->dtms_provide_proc == NULL ||
8846 mops->dtms_remove_proc == NULL) {
2d21ac55
A
8847 cmn_err(CE_WARN, "failed to register meta-register %s: "
8848 "invalid ops", name);
8849 return (EINVAL);
8850 }
8851
8852 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
8853 meta->dtm_mops = *mops;
2d21ac55
A
8854 meta->dtm_arg = arg;
8855
8856 lck_mtx_lock(&dtrace_meta_lock);
8857 lck_mtx_lock(&dtrace_lock);
8858
8859 if (dtrace_meta_pid != NULL) {
8860 lck_mtx_unlock(&dtrace_lock);
8861 lck_mtx_unlock(&dtrace_meta_lock);
8862 cmn_err(CE_WARN, "failed to register meta-register %s: "
8863 "user-land meta-provider exists", name);
2d21ac55
A
8864 kmem_free(meta, sizeof (dtrace_meta_t));
8865 return (EINVAL);
8866 }
8867
d9a64523
A
8868 meta->dtm_name = dtrace_strref(name);
8869
2d21ac55
A
8870 dtrace_meta_pid = meta;
8871 *idp = (dtrace_meta_provider_id_t)meta;
8872
8873 /*
8874 * If there are providers and probes ready to go, pass them
8875 * off to the new meta provider now.
8876 */
8877
8878 help = dtrace_deferred_pid;
8879 dtrace_deferred_pid = NULL;
8880
8881 lck_mtx_unlock(&dtrace_lock);
8882
8883 while (help != NULL) {
8884 for (i = 0; i < help->dthps_nprovs; i++) {
d190cdc3
A
8885 proc_t *p = proc_find(help->dthps_pid);
8886 if (p == PROC_NULL)
8887 continue;
2d21ac55 8888 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
d190cdc3
A
8889 p);
8890 proc_rele(p);
2d21ac55
A
8891 }
8892
8893 next = help->dthps_next;
8894 help->dthps_next = NULL;
8895 help->dthps_prev = NULL;
8896 help->dthps_deferred = 0;
8897 help = next;
8898 }
8899
8900 lck_mtx_unlock(&dtrace_meta_lock);
8901
8902 return (0);
8903}
8904
8905int
8906dtrace_meta_unregister(dtrace_meta_provider_id_t id)
8907{
8908 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
8909
8910 lck_mtx_lock(&dtrace_meta_lock);
8911 lck_mtx_lock(&dtrace_lock);
8912
8913 if (old == dtrace_meta_pid) {
8914 pp = &dtrace_meta_pid;
8915 } else {
8916 panic("attempt to unregister non-existent "
8917 "dtrace meta-provider %p\n", (void *)old);
8918 }
8919
8920 if (old->dtm_count != 0) {
8921 lck_mtx_unlock(&dtrace_lock);
8922 lck_mtx_unlock(&dtrace_meta_lock);
8923 return (EBUSY);
8924 }
8925
8926 *pp = NULL;
8927
d9a64523
A
8928 dtrace_strunref(old->dtm_name);
8929
2d21ac55
A
8930 lck_mtx_unlock(&dtrace_lock);
8931 lck_mtx_unlock(&dtrace_meta_lock);
8932
2d21ac55
A
8933 kmem_free(old, sizeof (dtrace_meta_t));
8934
8935 return (0);
8936}
8937
8938
8939/*
8940 * DTrace DIF Object Functions
8941 */
8942static int
8943dtrace_difo_err(uint_t pc, const char *format, ...)
8944{
8945 if (dtrace_err_verbose) {
8946 va_list alist;
8947
8948 (void) uprintf("dtrace DIF object error: [%u]: ", pc);
8949 va_start(alist, format);
8950 (void) vuprintf(format, alist);
8951 va_end(alist);
8952 }
8953
8954#ifdef DTRACE_ERRDEBUG
8955 dtrace_errdebug(format);
8956#endif
8957 return (1);
8958}
8959
8960/*
8961 * Validate a DTrace DIF object by checking the IR instructions. The following
8962 * rules are currently enforced by dtrace_difo_validate():
8963 *
8964 * 1. Each instruction must have a valid opcode
8965 * 2. Each register, string, variable, or subroutine reference must be valid
8966 * 3. No instruction can modify register %r0 (must be zero)
8967 * 4. All instruction reserved bits must be set to zero
8968 * 5. The last instruction must be a "ret" instruction
8969 * 6. All branch targets must reference a valid instruction _after_ the branch
8970 */
8971static int
8972dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
8973 cred_t *cr)
8974{
b0d623f7
A
8975 int err = 0;
8976 uint_t i;
fe8ab488 8977
b0d623f7
A
8978 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8979 int kcheckload;
8980 uint_t pc;
39037602 8981 int maxglobal = -1, maxlocal = -1, maxtlocal = -1;
b0d623f7
A
8982
8983 kcheckload = cr == NULL ||
8984 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
2d21ac55
A
8985
8986 dp->dtdo_destructive = 0;
8987
8988 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
8989 dif_instr_t instr = dp->dtdo_buf[pc];
8990
8991 uint_t r1 = DIF_INSTR_R1(instr);
8992 uint_t r2 = DIF_INSTR_R2(instr);
8993 uint_t rd = DIF_INSTR_RD(instr);
8994 uint_t rs = DIF_INSTR_RS(instr);
8995 uint_t label = DIF_INSTR_LABEL(instr);
8996 uint_t v = DIF_INSTR_VAR(instr);
8997 uint_t subr = DIF_INSTR_SUBR(instr);
8998 uint_t type = DIF_INSTR_TYPE(instr);
8999 uint_t op = DIF_INSTR_OP(instr);
9000
9001 switch (op) {
9002 case DIF_OP_OR:
9003 case DIF_OP_XOR:
9004 case DIF_OP_AND:
9005 case DIF_OP_SLL:
9006 case DIF_OP_SRL:
9007 case DIF_OP_SRA:
9008 case DIF_OP_SUB:
9009 case DIF_OP_ADD:
9010 case DIF_OP_MUL:
9011 case DIF_OP_SDIV:
9012 case DIF_OP_UDIV:
9013 case DIF_OP_SREM:
9014 case DIF_OP_UREM:
9015 case DIF_OP_COPYS:
9016 if (r1 >= nregs)
9017 err += efunc(pc, "invalid register %u\n", r1);
9018 if (r2 >= nregs)
9019 err += efunc(pc, "invalid register %u\n", r2);
9020 if (rd >= nregs)
9021 err += efunc(pc, "invalid register %u\n", rd);
9022 if (rd == 0)
9023 err += efunc(pc, "cannot write to %r0\n");
9024 break;
9025 case DIF_OP_NOT:
9026 case DIF_OP_MOV:
9027 case DIF_OP_ALLOCS:
9028 if (r1 >= nregs)
9029 err += efunc(pc, "invalid register %u\n", r1);
9030 if (r2 != 0)
9031 err += efunc(pc, "non-zero reserved bits\n");
9032 if (rd >= nregs)
9033 err += efunc(pc, "invalid register %u\n", rd);
9034 if (rd == 0)
9035 err += efunc(pc, "cannot write to %r0\n");
9036 break;
9037 case DIF_OP_LDSB:
9038 case DIF_OP_LDSH:
9039 case DIF_OP_LDSW:
9040 case DIF_OP_LDUB:
9041 case DIF_OP_LDUH:
9042 case DIF_OP_LDUW:
9043 case DIF_OP_LDX:
9044 if (r1 >= nregs)
9045 err += efunc(pc, "invalid register %u\n", r1);
9046 if (r2 != 0)
9047 err += efunc(pc, "non-zero reserved bits\n");
9048 if (rd >= nregs)
9049 err += efunc(pc, "invalid register %u\n", rd);
9050 if (rd == 0)
9051 err += efunc(pc, "cannot write to %r0\n");
b0d623f7 9052 if (kcheckload)
2d21ac55
A
9053 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
9054 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
9055 break;
9056 case DIF_OP_RLDSB:
9057 case DIF_OP_RLDSH:
9058 case DIF_OP_RLDSW:
9059 case DIF_OP_RLDUB:
9060 case DIF_OP_RLDUH:
9061 case DIF_OP_RLDUW:
9062 case DIF_OP_RLDX:
9063 if (r1 >= nregs)
9064 err += efunc(pc, "invalid register %u\n", r1);
9065 if (r2 != 0)
9066 err += efunc(pc, "non-zero reserved bits\n");
9067 if (rd >= nregs)
9068 err += efunc(pc, "invalid register %u\n", rd);
9069 if (rd == 0)
9070 err += efunc(pc, "cannot write to %r0\n");
9071 break;
9072 case DIF_OP_ULDSB:
9073 case DIF_OP_ULDSH:
9074 case DIF_OP_ULDSW:
9075 case DIF_OP_ULDUB:
9076 case DIF_OP_ULDUH:
9077 case DIF_OP_ULDUW:
9078 case DIF_OP_ULDX:
9079 if (r1 >= nregs)
9080 err += efunc(pc, "invalid register %u\n", r1);
9081 if (r2 != 0)
9082 err += efunc(pc, "non-zero reserved bits\n");
9083 if (rd >= nregs)
9084 err += efunc(pc, "invalid register %u\n", rd);
9085 if (rd == 0)
9086 err += efunc(pc, "cannot write to %r0\n");
9087 break;
9088 case DIF_OP_STB:
9089 case DIF_OP_STH:
9090 case DIF_OP_STW:
9091 case DIF_OP_STX:
9092 if (r1 >= nregs)
9093 err += efunc(pc, "invalid register %u\n", r1);
9094 if (r2 != 0)
9095 err += efunc(pc, "non-zero reserved bits\n");
9096 if (rd >= nregs)
9097 err += efunc(pc, "invalid register %u\n", rd);
9098 if (rd == 0)
9099 err += efunc(pc, "cannot write to 0 address\n");
9100 break;
9101 case DIF_OP_CMP:
9102 case DIF_OP_SCMP:
9103 if (r1 >= nregs)
9104 err += efunc(pc, "invalid register %u\n", r1);
9105 if (r2 >= nregs)
9106 err += efunc(pc, "invalid register %u\n", r2);
9107 if (rd != 0)
9108 err += efunc(pc, "non-zero reserved bits\n");
9109 break;
9110 case DIF_OP_TST:
9111 if (r1 >= nregs)
9112 err += efunc(pc, "invalid register %u\n", r1);
9113 if (r2 != 0 || rd != 0)
9114 err += efunc(pc, "non-zero reserved bits\n");
9115 break;
9116 case DIF_OP_BA:
9117 case DIF_OP_BE:
9118 case DIF_OP_BNE:
9119 case DIF_OP_BG:
9120 case DIF_OP_BGU:
9121 case DIF_OP_BGE:
9122 case DIF_OP_BGEU:
9123 case DIF_OP_BL:
9124 case DIF_OP_BLU:
9125 case DIF_OP_BLE:
9126 case DIF_OP_BLEU:
9127 if (label >= dp->dtdo_len) {
9128 err += efunc(pc, "invalid branch target %u\n",
9129 label);
9130 }
9131 if (label <= pc) {
9132 err += efunc(pc, "backward branch to %u\n",
9133 label);
9134 }
9135 break;
9136 case DIF_OP_RET:
9137 if (r1 != 0 || r2 != 0)
9138 err += efunc(pc, "non-zero reserved bits\n");
9139 if (rd >= nregs)
9140 err += efunc(pc, "invalid register %u\n", rd);
9141 break;
9142 case DIF_OP_NOP:
9143 case DIF_OP_POPTS:
9144 case DIF_OP_FLUSHTS:
9145 if (r1 != 0 || r2 != 0 || rd != 0)
9146 err += efunc(pc, "non-zero reserved bits\n");
9147 break;
9148 case DIF_OP_SETX:
9149 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
9150 err += efunc(pc, "invalid integer ref %u\n",
9151 DIF_INSTR_INTEGER(instr));
9152 }
9153 if (rd >= nregs)
9154 err += efunc(pc, "invalid register %u\n", rd);
9155 if (rd == 0)
9156 err += efunc(pc, "cannot write to %r0\n");
9157 break;
9158 case DIF_OP_SETS:
9159 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
9160 err += efunc(pc, "invalid string ref %u\n",
9161 DIF_INSTR_STRING(instr));
9162 }
9163 if (rd >= nregs)
9164 err += efunc(pc, "invalid register %u\n", rd);
9165 if (rd == 0)
9166 err += efunc(pc, "cannot write to %r0\n");
9167 break;
9168 case DIF_OP_LDGA:
9169 case DIF_OP_LDTA:
9170 if (r1 > DIF_VAR_ARRAY_MAX)
9171 err += efunc(pc, "invalid array %u\n", r1);
9172 if (r2 >= nregs)
9173 err += efunc(pc, "invalid register %u\n", r2);
9174 if (rd >= nregs)
9175 err += efunc(pc, "invalid register %u\n", rd);
9176 if (rd == 0)
9177 err += efunc(pc, "cannot write to %r0\n");
9178 break;
9179 case DIF_OP_LDGS:
9180 case DIF_OP_LDTS:
9181 case DIF_OP_LDLS:
9182 case DIF_OP_LDGAA:
9183 case DIF_OP_LDTAA:
9184 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
9185 err += efunc(pc, "invalid variable %u\n", v);
9186 if (rd >= nregs)
9187 err += efunc(pc, "invalid register %u\n", rd);
9188 if (rd == 0)
9189 err += efunc(pc, "cannot write to %r0\n");
9190 break;
9191 case DIF_OP_STGS:
9192 case DIF_OP_STTS:
9193 case DIF_OP_STLS:
9194 case DIF_OP_STGAA:
9195 case DIF_OP_STTAA:
9196 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
9197 err += efunc(pc, "invalid variable %u\n", v);
9198 if (rs >= nregs)
9199 err += efunc(pc, "invalid register %u\n", rd);
9200 break;
9201 case DIF_OP_CALL:
39037602
A
9202 if (subr > DIF_SUBR_MAX &&
9203 !(subr >= DIF_SUBR_APPLE_MIN && subr <= DIF_SUBR_APPLE_MAX))
2d21ac55
A
9204 err += efunc(pc, "invalid subr %u\n", subr);
9205 if (rd >= nregs)
9206 err += efunc(pc, "invalid register %u\n", rd);
9207 if (rd == 0)
9208 err += efunc(pc, "cannot write to %r0\n");
9209
9210 if (subr == DIF_SUBR_COPYOUT ||
39037602
A
9211 subr == DIF_SUBR_COPYOUTSTR ||
9212 subr == DIF_SUBR_KDEBUG_TRACE ||
9213 subr == DIF_SUBR_KDEBUG_TRACE_STRING) {
2d21ac55
A
9214 dp->dtdo_destructive = 1;
9215 }
9216 break;
9217 case DIF_OP_PUSHTR:
9218 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
9219 err += efunc(pc, "invalid ref type %u\n", type);
9220 if (r2 >= nregs)
9221 err += efunc(pc, "invalid register %u\n", r2);
9222 if (rs >= nregs)
9223 err += efunc(pc, "invalid register %u\n", rs);
9224 break;
9225 case DIF_OP_PUSHTV:
9226 if (type != DIF_TYPE_CTF)
9227 err += efunc(pc, "invalid val type %u\n", type);
9228 if (r2 >= nregs)
9229 err += efunc(pc, "invalid register %u\n", r2);
9230 if (rs >= nregs)
9231 err += efunc(pc, "invalid register %u\n", rs);
9232 break;
9233 default:
9234 err += efunc(pc, "invalid opcode %u\n",
9235 DIF_INSTR_OP(instr));
9236 }
9237 }
9238
9239 if (dp->dtdo_len != 0 &&
9240 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
9241 err += efunc(dp->dtdo_len - 1,
9242 "expected 'ret' as last DIF instruction\n");
9243 }
9244
3e170ce0 9245 if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
2d21ac55
A
9246 /*
9247 * If we're not returning by reference, the size must be either
9248 * 0 or the size of one of the base types.
9249 */
9250 switch (dp->dtdo_rtype.dtdt_size) {
9251 case 0:
9252 case sizeof (uint8_t):
9253 case sizeof (uint16_t):
9254 case sizeof (uint32_t):
9255 case sizeof (uint64_t):
9256 break;
9257
9258 default:
6d2010ae 9259 err += efunc(dp->dtdo_len - 1, "bad return size\n");
2d21ac55
A
9260 }
9261 }
9262
9263 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
9264 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
9265 dtrace_diftype_t *vt, *et;
b0d623f7
A
9266 uint_t id;
9267 int ndx;
2d21ac55
A
9268
9269 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
9270 v->dtdv_scope != DIFV_SCOPE_THREAD &&
9271 v->dtdv_scope != DIFV_SCOPE_LOCAL) {
9272 err += efunc(i, "unrecognized variable scope %d\n",
9273 v->dtdv_scope);
9274 break;
9275 }
9276
9277 if (v->dtdv_kind != DIFV_KIND_ARRAY &&
9278 v->dtdv_kind != DIFV_KIND_SCALAR) {
9279 err += efunc(i, "unrecognized variable type %d\n",
9280 v->dtdv_kind);
9281 break;
9282 }
9283
9284 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
9285 err += efunc(i, "%d exceeds variable id limit\n", id);
9286 break;
9287 }
9288
9289 if (id < DIF_VAR_OTHER_UBASE)
9290 continue;
9291
9292 /*
9293 * For user-defined variables, we need to check that this
9294 * definition is identical to any previous definition that we
9295 * encountered.
9296 */
9297 ndx = id - DIF_VAR_OTHER_UBASE;
9298
9299 switch (v->dtdv_scope) {
9300 case DIFV_SCOPE_GLOBAL:
39037602
A
9301 if (maxglobal == -1 || ndx > maxglobal)
9302 maxglobal = ndx;
9303
2d21ac55
A
9304 if (ndx < vstate->dtvs_nglobals) {
9305 dtrace_statvar_t *svar;
9306
9307 if ((svar = vstate->dtvs_globals[ndx]) != NULL)
9308 existing = &svar->dtsv_var;
9309 }
9310
9311 break;
9312
9313 case DIFV_SCOPE_THREAD:
39037602
A
9314 if (maxtlocal == -1 || ndx > maxtlocal)
9315 maxtlocal = ndx;
9316
2d21ac55
A
9317 if (ndx < vstate->dtvs_ntlocals)
9318 existing = &vstate->dtvs_tlocals[ndx];
9319 break;
9320
9321 case DIFV_SCOPE_LOCAL:
39037602
A
9322 if (maxlocal == -1 || ndx > maxlocal)
9323 maxlocal = ndx;
2d21ac55
A
9324 if (ndx < vstate->dtvs_nlocals) {
9325 dtrace_statvar_t *svar;
9326
9327 if ((svar = vstate->dtvs_locals[ndx]) != NULL)
9328 existing = &svar->dtsv_var;
9329 }
9330
9331 break;
9332 }
9333
9334 vt = &v->dtdv_type;
9335
9336 if (vt->dtdt_flags & DIF_TF_BYREF) {
9337 if (vt->dtdt_size == 0) {
9338 err += efunc(i, "zero-sized variable\n");
9339 break;
9340 }
9341
ecc0ceb4
A
9342 if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL ||
9343 v->dtdv_scope == DIFV_SCOPE_LOCAL) &&
9344 vt->dtdt_size > dtrace_statvar_maxsize) {
9345 err += efunc(i, "oversized by-ref static\n");
2d21ac55
A
9346 break;
9347 }
9348 }
9349
9350 if (existing == NULL || existing->dtdv_id == 0)
9351 continue;
9352
9353 ASSERT(existing->dtdv_id == v->dtdv_id);
9354 ASSERT(existing->dtdv_scope == v->dtdv_scope);
9355
9356 if (existing->dtdv_kind != v->dtdv_kind)
9357 err += efunc(i, "%d changed variable kind\n", id);
9358
9359 et = &existing->dtdv_type;
9360
9361 if (vt->dtdt_flags != et->dtdt_flags) {
9362 err += efunc(i, "%d changed variable type flags\n", id);
9363 break;
9364 }
9365
9366 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
9367 err += efunc(i, "%d changed variable type size\n", id);
9368 break;
9369 }
9370 }
9371
39037602
A
9372 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
9373 dif_instr_t instr = dp->dtdo_buf[pc];
9374
9375 uint_t v = DIF_INSTR_VAR(instr);
9376 uint_t op = DIF_INSTR_OP(instr);
9377
9378 switch (op) {
9379 case DIF_OP_LDGS:
9380 case DIF_OP_LDGAA:
9381 case DIF_OP_STGS:
9382 case DIF_OP_STGAA:
9383 if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxglobal))
9384 err += efunc(pc, "invalid variable %u\n", v);
9385 break;
9386 case DIF_OP_LDTS:
9387 case DIF_OP_LDTAA:
9388 case DIF_OP_STTS:
9389 case DIF_OP_STTAA:
9390 if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxtlocal))
9391 err += efunc(pc, "invalid variable %u\n", v);
9392 break;
9393 case DIF_OP_LDLS:
9394 case DIF_OP_STLS:
9395 if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxlocal))
9396 err += efunc(pc, "invalid variable %u\n", v);
9397 break;
9398 default:
9399 break;
9400 }
9401 }
9402
2d21ac55
A
9403 return (err);
9404}
9405
9406/*
9407 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
9408 * are much more constrained than normal DIFOs. Specifically, they may
9409 * not:
9410 *
9411 * 1. Make calls to subroutines other than copyin(), copyinstr() or
9412 * miscellaneous string routines
9413 * 2. Access DTrace variables other than the args[] array, and the
9414 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
9415 * 3. Have thread-local variables.
9416 * 4. Have dynamic variables.
9417 */
9418static int
9419dtrace_difo_validate_helper(dtrace_difo_t *dp)
9420{
9421 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
9422 int err = 0;
9423 uint_t pc;
9424
9425 for (pc = 0; pc < dp->dtdo_len; pc++) {
9426 dif_instr_t instr = dp->dtdo_buf[pc];
9427
9428 uint_t v = DIF_INSTR_VAR(instr);
9429 uint_t subr = DIF_INSTR_SUBR(instr);
9430 uint_t op = DIF_INSTR_OP(instr);
9431
9432 switch (op) {
9433 case DIF_OP_OR:
9434 case DIF_OP_XOR:
9435 case DIF_OP_AND:
9436 case DIF_OP_SLL:
9437 case DIF_OP_SRL:
9438 case DIF_OP_SRA:
9439 case DIF_OP_SUB:
9440 case DIF_OP_ADD:
9441 case DIF_OP_MUL:
9442 case DIF_OP_SDIV:
9443 case DIF_OP_UDIV:
9444 case DIF_OP_SREM:
9445 case DIF_OP_UREM:
9446 case DIF_OP_COPYS:
9447 case DIF_OP_NOT:
9448 case DIF_OP_MOV:
9449 case DIF_OP_RLDSB:
9450 case DIF_OP_RLDSH:
9451 case DIF_OP_RLDSW:
9452 case DIF_OP_RLDUB:
9453 case DIF_OP_RLDUH:
9454 case DIF_OP_RLDUW:
9455 case DIF_OP_RLDX:
9456 case DIF_OP_ULDSB:
9457 case DIF_OP_ULDSH:
9458 case DIF_OP_ULDSW:
9459 case DIF_OP_ULDUB:
9460 case DIF_OP_ULDUH:
9461 case DIF_OP_ULDUW:
9462 case DIF_OP_ULDX:
9463 case DIF_OP_STB:
9464 case DIF_OP_STH:
9465 case DIF_OP_STW:
9466 case DIF_OP_STX:
9467 case DIF_OP_ALLOCS:
9468 case DIF_OP_CMP:
9469 case DIF_OP_SCMP:
9470 case DIF_OP_TST:
9471 case DIF_OP_BA:
9472 case DIF_OP_BE:
9473 case DIF_OP_BNE:
9474 case DIF_OP_BG:
9475 case DIF_OP_BGU:
9476 case DIF_OP_BGE:
9477 case DIF_OP_BGEU:
9478 case DIF_OP_BL:
9479 case DIF_OP_BLU:
9480 case DIF_OP_BLE:
9481 case DIF_OP_BLEU:
9482 case DIF_OP_RET:
9483 case DIF_OP_NOP:
9484 case DIF_OP_POPTS:
9485 case DIF_OP_FLUSHTS:
9486 case DIF_OP_SETX:
9487 case DIF_OP_SETS:
9488 case DIF_OP_LDGA:
9489 case DIF_OP_LDLS:
9490 case DIF_OP_STGS:
9491 case DIF_OP_STLS:
9492 case DIF_OP_PUSHTR:
9493 case DIF_OP_PUSHTV:
9494 break;
9495
9496 case DIF_OP_LDGS:
9497 if (v >= DIF_VAR_OTHER_UBASE)
9498 break;
9499
9500 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
9501 break;
9502
9503 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
9504 v == DIF_VAR_PPID || v == DIF_VAR_TID ||
9505 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
9506 v == DIF_VAR_UID || v == DIF_VAR_GID)
9507 break;
9508
9509 err += efunc(pc, "illegal variable %u\n", v);
9510 break;
9511
9512 case DIF_OP_LDTA:
9513 case DIF_OP_LDTS:
9514 case DIF_OP_LDGAA:
9515 case DIF_OP_LDTAA:
9516 err += efunc(pc, "illegal dynamic variable load\n");
9517 break;
9518
9519 case DIF_OP_STTS:
9520 case DIF_OP_STGAA:
9521 case DIF_OP_STTAA:
9522 err += efunc(pc, "illegal dynamic variable store\n");
9523 break;
9524
9525 case DIF_OP_CALL:
9526 if (subr == DIF_SUBR_ALLOCA ||
9527 subr == DIF_SUBR_BCOPY ||
9528 subr == DIF_SUBR_COPYIN ||
9529 subr == DIF_SUBR_COPYINTO ||
9530 subr == DIF_SUBR_COPYINSTR ||
9531 subr == DIF_SUBR_INDEX ||
b0d623f7
A
9532 subr == DIF_SUBR_INET_NTOA ||
9533 subr == DIF_SUBR_INET_NTOA6 ||
9534 subr == DIF_SUBR_INET_NTOP ||
2d21ac55
A
9535 subr == DIF_SUBR_LLTOSTR ||
9536 subr == DIF_SUBR_RINDEX ||
9537 subr == DIF_SUBR_STRCHR ||
9538 subr == DIF_SUBR_STRJOIN ||
9539 subr == DIF_SUBR_STRRCHR ||
9540 subr == DIF_SUBR_STRSTR ||
39037602
A
9541 subr == DIF_SUBR_KDEBUG_TRACE ||
9542 subr == DIF_SUBR_KDEBUG_TRACE_STRING ||
b0d623f7
A
9543 subr == DIF_SUBR_HTONS ||
9544 subr == DIF_SUBR_HTONL ||
9545 subr == DIF_SUBR_HTONLL ||
9546 subr == DIF_SUBR_NTOHS ||
9547 subr == DIF_SUBR_NTOHL ||
9548 subr == DIF_SUBR_NTOHLL)
2d21ac55
A
9549 break;
9550
9551 err += efunc(pc, "invalid subr %u\n", subr);
9552 break;
9553
9554 default:
9555 err += efunc(pc, "invalid opcode %u\n",
9556 DIF_INSTR_OP(instr));
9557 }
9558 }
9559
9560 return (err);
9561}
9562
9563/*
9564 * Returns 1 if the expression in the DIF object can be cached on a per-thread
9565 * basis; 0 if not.
9566 */
9567static int
9568dtrace_difo_cacheable(dtrace_difo_t *dp)
9569{
b0d623f7 9570 uint_t i;
2d21ac55
A
9571
9572 if (dp == NULL)
9573 return (0);
9574
9575 for (i = 0; i < dp->dtdo_varlen; i++) {
9576 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9577
9578 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
9579 continue;
9580
9581 switch (v->dtdv_id) {
9582 case DIF_VAR_CURTHREAD:
9583 case DIF_VAR_PID:
9584 case DIF_VAR_TID:
9585 case DIF_VAR_EXECNAME:
9586 case DIF_VAR_ZONENAME:
9587 break;
9588
9589 default:
9590 return (0);
9591 }
9592 }
9593
9594 /*
9595 * This DIF object may be cacheable. Now we need to look for any
9596 * array loading instructions, any memory loading instructions, or
9597 * any stores to thread-local variables.
9598 */
9599 for (i = 0; i < dp->dtdo_len; i++) {
9600 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
9601
9602 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
9603 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
9604 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
9605 op == DIF_OP_LDGA || op == DIF_OP_STTS)
9606 return (0);
9607 }
9608
9609 return (1);
9610}
9611
9612static void
9613dtrace_difo_hold(dtrace_difo_t *dp)
9614{
b0d623f7 9615 uint_t i;
2d21ac55 9616
5ba3f43e 9617 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
9618
9619 dp->dtdo_refcnt++;
9620 ASSERT(dp->dtdo_refcnt != 0);
9621
9622 /*
9623 * We need to check this DIF object for references to the variable
9624 * DIF_VAR_VTIMESTAMP.
9625 */
9626 for (i = 0; i < dp->dtdo_varlen; i++) {
9627 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9628
9629 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9630 continue;
9631
9632 if (dtrace_vtime_references++ == 0)
9633 dtrace_vtime_enable();
9634 }
9635}
9636
9637/*
9638 * This routine calculates the dynamic variable chunksize for a given DIF
9639 * object. The calculation is not fool-proof, and can probably be tricked by
9640 * malicious DIF -- but it works for all compiler-generated DIF. Because this
9641 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
9642 * if a dynamic variable size exceeds the chunksize.
9643 */
9644static void
9645dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9646{
b0d623f7 9647 uint64_t sval = 0;
2d21ac55
A
9648 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
9649 const dif_instr_t *text = dp->dtdo_buf;
9650 uint_t pc, srd = 0;
9651 uint_t ttop = 0;
9652 size_t size, ksize;
9653 uint_t id, i;
9654
9655 for (pc = 0; pc < dp->dtdo_len; pc++) {
9656 dif_instr_t instr = text[pc];
9657 uint_t op = DIF_INSTR_OP(instr);
9658 uint_t rd = DIF_INSTR_RD(instr);
9659 uint_t r1 = DIF_INSTR_R1(instr);
9660 uint_t nkeys = 0;
9661 uchar_t scope;
9662
9663 dtrace_key_t *key = tupregs;
9664
9665 switch (op) {
9666 case DIF_OP_SETX:
9667 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
9668 srd = rd;
9669 continue;
9670
9671 case DIF_OP_STTS:
9672 key = &tupregs[DIF_DTR_NREGS];
9673 key[0].dttk_size = 0;
9674 key[1].dttk_size = 0;
9675 nkeys = 2;
9676 scope = DIFV_SCOPE_THREAD;
9677 break;
9678
9679 case DIF_OP_STGAA:
9680 case DIF_OP_STTAA:
9681 nkeys = ttop;
9682
9683 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
9684 key[nkeys++].dttk_size = 0;
9685
9686 key[nkeys++].dttk_size = 0;
9687
9688 if (op == DIF_OP_STTAA) {
9689 scope = DIFV_SCOPE_THREAD;
9690 } else {
9691 scope = DIFV_SCOPE_GLOBAL;
9692 }
9693
9694 break;
9695
9696 case DIF_OP_PUSHTR:
9697 if (ttop == DIF_DTR_NREGS)
9698 return;
9699
9700 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
9701 /*
9702 * If the register for the size of the "pushtr"
9703 * is %r0 (or the value is 0) and the type is
9704 * a string, we'll use the system-wide default
9705 * string size.
9706 */
9707 tupregs[ttop++].dttk_size =
9708 dtrace_strsize_default;
9709 } else {
9710 if (srd == 0)
9711 return;
9712
ecc0ceb4
A
9713 if (sval > LONG_MAX)
9714 return;
9715
2d21ac55
A
9716 tupregs[ttop++].dttk_size = sval;
9717 }
9718
9719 break;
9720
9721 case DIF_OP_PUSHTV:
9722 if (ttop == DIF_DTR_NREGS)
9723 return;
9724
9725 tupregs[ttop++].dttk_size = 0;
9726 break;
9727
9728 case DIF_OP_FLUSHTS:
9729 ttop = 0;
9730 break;
9731
9732 case DIF_OP_POPTS:
9733 if (ttop != 0)
9734 ttop--;
9735 break;
9736 }
9737
9738 sval = 0;
9739 srd = 0;
9740
9741 if (nkeys == 0)
9742 continue;
9743
9744 /*
9745 * We have a dynamic variable allocation; calculate its size.
9746 */
9747 for (ksize = 0, i = 0; i < nkeys; i++)
9748 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
9749
9750 size = sizeof (dtrace_dynvar_t);
9751 size += sizeof (dtrace_key_t) * (nkeys - 1);
9752 size += ksize;
9753
9754 /*
9755 * Now we need to determine the size of the stored data.
9756 */
9757 id = DIF_INSTR_VAR(instr);
9758
9759 for (i = 0; i < dp->dtdo_varlen; i++) {
9760 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9761
9762 if (v->dtdv_id == id && v->dtdv_scope == scope) {
9763 size += v->dtdv_type.dtdt_size;
9764 break;
9765 }
9766 }
9767
9768 if (i == dp->dtdo_varlen)
9769 return;
9770
9771 /*
9772 * We have the size. If this is larger than the chunk size
9773 * for our dynamic variable state, reset the chunk size.
9774 */
9775 size = P2ROUNDUP(size, sizeof (uint64_t));
9776
ecc0ceb4
A
9777 /*
9778 * Before setting the chunk size, check that we're not going
9779 * to set it to a negative value...
9780 */
9781 if (size > LONG_MAX)
9782 return;
9783
9784 /*
9785 * ...and make certain that we didn't badly overflow.
9786 */
9787 if (size < ksize || size < sizeof (dtrace_dynvar_t))
9788 return;
9789
2d21ac55
A
9790 if (size > vstate->dtvs_dynvars.dtds_chunksize)
9791 vstate->dtvs_dynvars.dtds_chunksize = size;
9792 }
9793}
9794
9795static void
9796dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9797{
b0d623f7
A
9798 int oldsvars, osz, nsz, otlocals, ntlocals;
9799 uint_t i, id;
2d21ac55 9800
5ba3f43e 9801 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
9802 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
9803
9804 for (i = 0; i < dp->dtdo_varlen; i++) {
9805 dtrace_difv_t *v = &dp->dtdo_vartab[i];
b0d623f7
A
9806 dtrace_statvar_t *svar;
9807 dtrace_statvar_t ***svarp = NULL;
2d21ac55
A
9808 size_t dsize = 0;
9809 uint8_t scope = v->dtdv_scope;
b0d623f7 9810 int *np = (int *)NULL;
2d21ac55
A
9811
9812 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9813 continue;
9814
9815 id -= DIF_VAR_OTHER_UBASE;
9816
9817 switch (scope) {
9818 case DIFV_SCOPE_THREAD:
b0d623f7 9819 while (id >= (uint_t)(otlocals = vstate->dtvs_ntlocals)) {
2d21ac55
A
9820 dtrace_difv_t *tlocals;
9821
9822 if ((ntlocals = (otlocals << 1)) == 0)
9823 ntlocals = 1;
9824
9825 osz = otlocals * sizeof (dtrace_difv_t);
9826 nsz = ntlocals * sizeof (dtrace_difv_t);
9827
9828 tlocals = kmem_zalloc(nsz, KM_SLEEP);
9829
9830 if (osz != 0) {
9831 bcopy(vstate->dtvs_tlocals,
9832 tlocals, osz);
9833 kmem_free(vstate->dtvs_tlocals, osz);
9834 }
9835
9836 vstate->dtvs_tlocals = tlocals;
9837 vstate->dtvs_ntlocals = ntlocals;
9838 }
9839
9840 vstate->dtvs_tlocals[id] = *v;
9841 continue;
9842
9843 case DIFV_SCOPE_LOCAL:
9844 np = &vstate->dtvs_nlocals;
9845 svarp = &vstate->dtvs_locals;
9846
9847 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
c910b4d9 9848 dsize = (int)NCPU * (v->dtdv_type.dtdt_size +
2d21ac55
A
9849 sizeof (uint64_t));
9850 else
c910b4d9 9851 dsize = (int)NCPU * sizeof (uint64_t);
2d21ac55
A
9852
9853 break;
9854
9855 case DIFV_SCOPE_GLOBAL:
9856 np = &vstate->dtvs_nglobals;
9857 svarp = &vstate->dtvs_globals;
9858
9859 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
9860 dsize = v->dtdv_type.dtdt_size +
9861 sizeof (uint64_t);
9862
9863 break;
9864
9865 default:
9866 ASSERT(0);
9867 }
9868
b0d623f7 9869 while (id >= (uint_t)(oldsvars = *np)) {
2d21ac55
A
9870 dtrace_statvar_t **statics;
9871 int newsvars, oldsize, newsize;
9872
9873 if ((newsvars = (oldsvars << 1)) == 0)
9874 newsvars = 1;
9875
9876 oldsize = oldsvars * sizeof (dtrace_statvar_t *);
9877 newsize = newsvars * sizeof (dtrace_statvar_t *);
9878
9879 statics = kmem_zalloc(newsize, KM_SLEEP);
9880
9881 if (oldsize != 0) {
9882 bcopy(*svarp, statics, oldsize);
9883 kmem_free(*svarp, oldsize);
9884 }
9885
9886 *svarp = statics;
9887 *np = newsvars;
9888 }
9889
9890 if ((svar = (*svarp)[id]) == NULL) {
9891 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
9892 svar->dtsv_var = *v;
9893
9894 if ((svar->dtsv_size = dsize) != 0) {
9895 svar->dtsv_data = (uint64_t)(uintptr_t)
9896 kmem_zalloc(dsize, KM_SLEEP);
9897 }
9898
9899 (*svarp)[id] = svar;
9900 }
9901
9902 svar->dtsv_refcnt++;
9903 }
9904
9905 dtrace_difo_chunksize(dp, vstate);
9906 dtrace_difo_hold(dp);
9907}
9908
9909static dtrace_difo_t *
9910dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9911{
9912 dtrace_difo_t *new;
9913 size_t sz;
9914
9915 ASSERT(dp->dtdo_buf != NULL);
9916 ASSERT(dp->dtdo_refcnt != 0);
9917
9918 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
9919
9920 ASSERT(dp->dtdo_buf != NULL);
9921 sz = dp->dtdo_len * sizeof (dif_instr_t);
9922 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
9923 bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
9924 new->dtdo_len = dp->dtdo_len;
9925
9926 if (dp->dtdo_strtab != NULL) {
9927 ASSERT(dp->dtdo_strlen != 0);
9928 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
9929 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
9930 new->dtdo_strlen = dp->dtdo_strlen;
9931 }
9932
9933 if (dp->dtdo_inttab != NULL) {
9934 ASSERT(dp->dtdo_intlen != 0);
9935 sz = dp->dtdo_intlen * sizeof (uint64_t);
9936 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
9937 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
9938 new->dtdo_intlen = dp->dtdo_intlen;
9939 }
9940
9941 if (dp->dtdo_vartab != NULL) {
9942 ASSERT(dp->dtdo_varlen != 0);
9943 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
9944 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
9945 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
9946 new->dtdo_varlen = dp->dtdo_varlen;
9947 }
9948
9949 dtrace_difo_init(new, vstate);
9950 return (new);
9951}
9952
9953static void
9954dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9955{
b0d623f7 9956 uint_t i;
2d21ac55
A
9957
9958 ASSERT(dp->dtdo_refcnt == 0);
9959
9960 for (i = 0; i < dp->dtdo_varlen; i++) {
9961 dtrace_difv_t *v = &dp->dtdo_vartab[i];
b0d623f7
A
9962 dtrace_statvar_t *svar;
9963 dtrace_statvar_t **svarp = NULL;
9964 uint_t id;
9965 uint8_t scope = v->dtdv_scope;
9966 int *np = NULL;
2d21ac55
A
9967
9968 switch (scope) {
9969 case DIFV_SCOPE_THREAD:
9970 continue;
9971
9972 case DIFV_SCOPE_LOCAL:
9973 np = &vstate->dtvs_nlocals;
9974 svarp = vstate->dtvs_locals;
9975 break;
9976
9977 case DIFV_SCOPE_GLOBAL:
9978 np = &vstate->dtvs_nglobals;
9979 svarp = vstate->dtvs_globals;
9980 break;
9981
9982 default:
9983 ASSERT(0);
9984 }
9985
9986 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9987 continue;
9988
9989 id -= DIF_VAR_OTHER_UBASE;
b0d623f7 9990
b0d623f7 9991 ASSERT(id < (uint_t)*np);
2d21ac55
A
9992
9993 svar = svarp[id];
9994 ASSERT(svar != NULL);
9995 ASSERT(svar->dtsv_refcnt > 0);
9996
9997 if (--svar->dtsv_refcnt > 0)
9998 continue;
9999
10000 if (svar->dtsv_size != 0) {
fe8ab488 10001 ASSERT(svar->dtsv_data != 0);
2d21ac55
A
10002 kmem_free((void *)(uintptr_t)svar->dtsv_data,
10003 svar->dtsv_size);
10004 }
10005
10006 kmem_free(svar, sizeof (dtrace_statvar_t));
10007 svarp[id] = NULL;
10008 }
10009
10010 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
10011 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
10012 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
10013 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
10014
10015 kmem_free(dp, sizeof (dtrace_difo_t));
10016}
10017
10018static void
10019dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
10020{
b0d623f7 10021 uint_t i;
2d21ac55 10022
5ba3f43e 10023 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10024 ASSERT(dp->dtdo_refcnt != 0);
10025
10026 for (i = 0; i < dp->dtdo_varlen; i++) {
10027 dtrace_difv_t *v = &dp->dtdo_vartab[i];
10028
10029 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
10030 continue;
10031
10032 ASSERT(dtrace_vtime_references > 0);
10033 if (--dtrace_vtime_references == 0)
10034 dtrace_vtime_disable();
10035 }
10036
10037 if (--dp->dtdo_refcnt == 0)
10038 dtrace_difo_destroy(dp, vstate);
10039}
10040
10041/*
10042 * DTrace Format Functions
10043 */
10044static uint16_t
10045dtrace_format_add(dtrace_state_t *state, char *str)
10046{
10047 char *fmt, **new;
10048 uint16_t ndx, len = strlen(str) + 1;
10049
10050 fmt = kmem_zalloc(len, KM_SLEEP);
10051 bcopy(str, fmt, len);
10052
10053 for (ndx = 0; ndx < state->dts_nformats; ndx++) {
10054 if (state->dts_formats[ndx] == NULL) {
10055 state->dts_formats[ndx] = fmt;
10056 return (ndx + 1);
10057 }
10058 }
10059
10060 if (state->dts_nformats == USHRT_MAX) {
10061 /*
10062 * This is only likely if a denial-of-service attack is being
10063 * attempted. As such, it's okay to fail silently here.
10064 */
10065 kmem_free(fmt, len);
10066 return (0);
10067 }
10068
10069 /*
10070 * For simplicity, we always resize the formats array to be exactly the
10071 * number of formats.
10072 */
10073 ndx = state->dts_nformats++;
10074 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
10075
10076 if (state->dts_formats != NULL) {
10077 ASSERT(ndx != 0);
10078 bcopy(state->dts_formats, new, ndx * sizeof (char *));
10079 kmem_free(state->dts_formats, ndx * sizeof (char *));
10080 }
10081
10082 state->dts_formats = new;
10083 state->dts_formats[ndx] = fmt;
10084
10085 return (ndx + 1);
10086}
10087
10088static void
10089dtrace_format_remove(dtrace_state_t *state, uint16_t format)
10090{
10091 char *fmt;
10092
10093 ASSERT(state->dts_formats != NULL);
10094 ASSERT(format <= state->dts_nformats);
10095 ASSERT(state->dts_formats[format - 1] != NULL);
10096
10097 fmt = state->dts_formats[format - 1];
10098 kmem_free(fmt, strlen(fmt) + 1);
10099 state->dts_formats[format - 1] = NULL;
10100}
10101
10102static void
10103dtrace_format_destroy(dtrace_state_t *state)
10104{
10105 int i;
10106
10107 if (state->dts_nformats == 0) {
10108 ASSERT(state->dts_formats == NULL);
10109 return;
10110 }
10111
10112 ASSERT(state->dts_formats != NULL);
10113
10114 for (i = 0; i < state->dts_nformats; i++) {
10115 char *fmt = state->dts_formats[i];
10116
10117 if (fmt == NULL)
10118 continue;
10119
10120 kmem_free(fmt, strlen(fmt) + 1);
10121 }
10122
10123 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
10124 state->dts_nformats = 0;
10125 state->dts_formats = NULL;
10126}
10127
10128/*
10129 * DTrace Predicate Functions
10130 */
10131static dtrace_predicate_t *
10132dtrace_predicate_create(dtrace_difo_t *dp)
10133{
10134 dtrace_predicate_t *pred;
10135
5ba3f43e 10136 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10137 ASSERT(dp->dtdo_refcnt != 0);
10138
10139 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
10140 pred->dtp_difo = dp;
10141 pred->dtp_refcnt = 1;
10142
10143 if (!dtrace_difo_cacheable(dp))
10144 return (pred);
10145
10146 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
10147 /*
10148 * This is only theoretically possible -- we have had 2^32
10149 * cacheable predicates on this machine. We cannot allow any
10150 * more predicates to become cacheable: as unlikely as it is,
10151 * there may be a thread caching a (now stale) predicate cache
10152 * ID. (N.B.: the temptation is being successfully resisted to
10153 * have this cmn_err() "Holy shit -- we executed this code!")
10154 */
10155 return (pred);
10156 }
10157
10158 pred->dtp_cacheid = dtrace_predcache_id++;
10159
10160 return (pred);
10161}
10162
10163static void
10164dtrace_predicate_hold(dtrace_predicate_t *pred)
10165{
5ba3f43e 10166 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10167 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
10168 ASSERT(pred->dtp_refcnt > 0);
10169
10170 pred->dtp_refcnt++;
10171}
10172
10173static void
10174dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
10175{
10176 dtrace_difo_t *dp = pred->dtp_difo;
b0d623f7 10177#pragma unused(dp) /* __APPLE__ */
2d21ac55 10178
5ba3f43e 10179 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10180 ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
10181 ASSERT(pred->dtp_refcnt > 0);
10182
10183 if (--pred->dtp_refcnt == 0) {
10184 dtrace_difo_release(pred->dtp_difo, vstate);
10185 kmem_free(pred, sizeof (dtrace_predicate_t));
10186 }
10187}
10188
10189/*
10190 * DTrace Action Description Functions
10191 */
10192static dtrace_actdesc_t *
10193dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
10194 uint64_t uarg, uint64_t arg)
10195{
10196 dtrace_actdesc_t *act;
10197
fe8ab488
A
10198 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != 0 &&
10199 arg >= KERNELBASE) || (arg == 0 && kind == DTRACEACT_PRINTA));
2d21ac55
A
10200
10201 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
10202 act->dtad_kind = kind;
10203 act->dtad_ntuple = ntuple;
10204 act->dtad_uarg = uarg;
10205 act->dtad_arg = arg;
10206 act->dtad_refcnt = 1;
10207
10208 return (act);
10209}
10210
10211static void
10212dtrace_actdesc_hold(dtrace_actdesc_t *act)
10213{
10214 ASSERT(act->dtad_refcnt >= 1);
10215 act->dtad_refcnt++;
10216}
10217
10218static void
10219dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
10220{
10221 dtrace_actkind_t kind = act->dtad_kind;
10222 dtrace_difo_t *dp;
10223
10224 ASSERT(act->dtad_refcnt >= 1);
10225
10226 if (--act->dtad_refcnt != 0)
10227 return;
10228
10229 if ((dp = act->dtad_difo) != NULL)
10230 dtrace_difo_release(dp, vstate);
10231
10232 if (DTRACEACT_ISPRINTFLIKE(kind)) {
10233 char *str = (char *)(uintptr_t)act->dtad_arg;
10234
b0d623f7
A
10235 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
10236 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
2d21ac55
A
10237
10238 if (str != NULL)
10239 kmem_free(str, strlen(str) + 1);
10240 }
10241
10242 kmem_free(act, sizeof (dtrace_actdesc_t));
10243}
10244
10245/*
10246 * DTrace ECB Functions
10247 */
10248static dtrace_ecb_t *
10249dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
10250{
10251 dtrace_ecb_t *ecb;
10252 dtrace_epid_t epid;
10253
5ba3f43e 10254 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10255
10256 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
10257 ecb->dte_predicate = NULL;
10258 ecb->dte_probe = probe;
10259
10260 /*
10261 * The default size is the size of the default action: recording
04b8595b 10262 * the header.
2d21ac55 10263 */
04b8595b 10264 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
2d21ac55
A
10265 ecb->dte_alignment = sizeof (dtrace_epid_t);
10266
10267 epid = state->dts_epid++;
10268
b0d623f7 10269 if (epid - 1 >= (dtrace_epid_t)state->dts_necbs) {
2d21ac55
A
10270 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
10271 int necbs = state->dts_necbs << 1;
10272
b0d623f7 10273 ASSERT(epid == (dtrace_epid_t)state->dts_necbs + 1);
2d21ac55
A
10274
10275 if (necbs == 0) {
10276 ASSERT(oecbs == NULL);
10277 necbs = 1;
10278 }
10279
10280 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
10281
10282 if (oecbs != NULL)
10283 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
10284
10285 dtrace_membar_producer();
10286 state->dts_ecbs = ecbs;
10287
10288 if (oecbs != NULL) {
10289 /*
10290 * If this state is active, we must dtrace_sync()
10291 * before we can free the old dts_ecbs array: we're
10292 * coming in hot, and there may be active ring
10293 * buffer processing (which indexes into the dts_ecbs
10294 * array) on another CPU.
10295 */
10296 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
10297 dtrace_sync();
10298
10299 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
10300 }
10301
10302 dtrace_membar_producer();
10303 state->dts_necbs = necbs;
10304 }
10305
10306 ecb->dte_state = state;
10307
10308 ASSERT(state->dts_ecbs[epid - 1] == NULL);
10309 dtrace_membar_producer();
10310 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
10311
10312 return (ecb);
10313}
10314
6d2010ae 10315static int
2d21ac55
A
10316dtrace_ecb_enable(dtrace_ecb_t *ecb)
10317{
10318 dtrace_probe_t *probe = ecb->dte_probe;
10319
5ba3f43e
A
10320 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
10321 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10322 ASSERT(ecb->dte_next == NULL);
10323
10324 if (probe == NULL) {
10325 /*
10326 * This is the NULL probe -- there's nothing to do.
10327 */
6d2010ae 10328 return(0);
2d21ac55
A
10329 }
10330
fe8ab488 10331 probe->dtpr_provider->dtpv_ecb_count++;
2d21ac55
A
10332 if (probe->dtpr_ecb == NULL) {
10333 dtrace_provider_t *prov = probe->dtpr_provider;
10334
10335 /*
10336 * We're the first ECB on this probe.
10337 */
10338 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
10339
10340 if (ecb->dte_predicate != NULL)
10341 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
10342
6d2010ae
A
10343 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
10344 probe->dtpr_id, probe->dtpr_arg));
2d21ac55
A
10345 } else {
10346 /*
10347 * This probe is already active. Swing the last pointer to
10348 * point to the new ECB, and issue a dtrace_sync() to assure
10349 * that all CPUs have seen the change.
10350 */
10351 ASSERT(probe->dtpr_ecb_last != NULL);
10352 probe->dtpr_ecb_last->dte_next = ecb;
10353 probe->dtpr_ecb_last = ecb;
10354 probe->dtpr_predcache = 0;
10355
10356 dtrace_sync();
6d2010ae 10357 return(0);
2d21ac55
A
10358 }
10359}
10360
39037602 10361static int
2d21ac55
A
10362dtrace_ecb_resize(dtrace_ecb_t *ecb)
10363{
2d21ac55 10364 dtrace_action_t *act;
04b8595b 10365 uint32_t curneeded = UINT32_MAX;
2d21ac55 10366 uint32_t aggbase = UINT32_MAX;
2d21ac55
A
10367
10368 /*
04b8595b
A
10369 * If we record anything, we always record the dtrace_rechdr_t. (And
10370 * we always record it first.)
2d21ac55 10371 */
04b8595b
A
10372 ecb->dte_size = sizeof (dtrace_rechdr_t);
10373 ecb->dte_alignment = sizeof (dtrace_epid_t);
2d21ac55
A
10374
10375 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
10376 dtrace_recdesc_t *rec = &act->dta_rec;
04b8595b 10377 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
2d21ac55 10378
04b8595b 10379 ecb->dte_alignment = MAX(ecb->dte_alignment, rec->dtrd_alignment);
2d21ac55
A
10380
10381 if (DTRACEACT_ISAGG(act->dta_kind)) {
10382 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
2d21ac55 10383
04b8595b
A
10384 ASSERT(rec->dtrd_size != 0);
10385 ASSERT(agg->dtag_first != NULL);
10386 ASSERT(act->dta_prev->dta_intuple);
2d21ac55 10387 ASSERT(aggbase != UINT32_MAX);
04b8595b 10388 ASSERT(curneeded != UINT32_MAX);
2d21ac55
A
10389
10390 agg->dtag_base = aggbase;
04b8595b
A
10391 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
10392 rec->dtrd_offset = curneeded;
39037602
A
10393 if (curneeded + rec->dtrd_size < curneeded)
10394 return (EINVAL);
04b8595b
A
10395 curneeded += rec->dtrd_size;
10396 ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
2d21ac55 10397
04b8595b
A
10398 aggbase = UINT32_MAX;
10399 curneeded = UINT32_MAX;
10400 } else if (act->dta_intuple) {
10401 if (curneeded == UINT32_MAX) {
10402 /*
10403 * This is the first record in a tuple. Align
10404 * curneeded to be at offset 4 in an 8-byte
10405 * aligned block.
10406 */
10407 ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple);
10408 ASSERT(aggbase == UINT32_MAX);
10409
10410 curneeded = P2PHASEUP(ecb->dte_size,
10411 sizeof (uint64_t), sizeof (dtrace_aggid_t));
10412
10413 aggbase = curneeded - sizeof (dtrace_aggid_t);
10414 ASSERT(IS_P2ALIGNED(aggbase,
10415 sizeof (uint64_t)));
2d21ac55 10416 }
2d21ac55 10417
04b8595b
A
10418 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
10419 rec->dtrd_offset = curneeded;
10420 curneeded += rec->dtrd_size;
39037602
A
10421 if (curneeded + rec->dtrd_size < curneeded)
10422 return (EINVAL);
04b8595b
A
10423 } else {
10424 /* tuples must be followed by an aggregation */
10425 ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple);
10426 ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment);
10427 rec->dtrd_offset = ecb->dte_size;
39037602
A
10428 if (ecb->dte_size + rec->dtrd_size < ecb->dte_size)
10429 return (EINVAL);
04b8595b
A
10430 ecb->dte_size += rec->dtrd_size;
10431 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
2d21ac55 10432 }
2d21ac55
A
10433 }
10434
10435 if ((act = ecb->dte_action) != NULL &&
10436 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
04b8595b 10437 ecb->dte_size == sizeof (dtrace_rechdr_t)) {
2d21ac55 10438 /*
04b8595b 10439 * If the size is still sizeof (dtrace_rechdr_t), then all
2d21ac55
A
10440 * actions store no data; set the size to 0.
10441 */
2d21ac55 10442 ecb->dte_size = 0;
2d21ac55
A
10443 }
10444
04b8595b
A
10445 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
10446 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
10447 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed);
39037602 10448 return (0);
2d21ac55
A
10449}
10450
10451static dtrace_action_t *
10452dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
10453{
10454 dtrace_aggregation_t *agg;
10455 size_t size = sizeof (uint64_t);
10456 int ntuple = desc->dtad_ntuple;
10457 dtrace_action_t *act;
10458 dtrace_recdesc_t *frec;
10459 dtrace_aggid_t aggid;
10460 dtrace_state_t *state = ecb->dte_state;
10461
10462 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
10463 agg->dtag_ecb = ecb;
10464
10465 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
10466
10467 switch (desc->dtad_kind) {
10468 case DTRACEAGG_MIN:
b0d623f7 10469 agg->dtag_initial = INT64_MAX;
2d21ac55
A
10470 agg->dtag_aggregate = dtrace_aggregate_min;
10471 break;
10472
10473 case DTRACEAGG_MAX:
b0d623f7 10474 agg->dtag_initial = INT64_MIN;
2d21ac55
A
10475 agg->dtag_aggregate = dtrace_aggregate_max;
10476 break;
10477
10478 case DTRACEAGG_COUNT:
10479 agg->dtag_aggregate = dtrace_aggregate_count;
10480 break;
10481
10482 case DTRACEAGG_QUANTIZE:
10483 agg->dtag_aggregate = dtrace_aggregate_quantize;
10484 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
10485 sizeof (uint64_t);
10486 break;
10487
10488 case DTRACEAGG_LQUANTIZE: {
10489 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
10490 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
10491
10492 agg->dtag_initial = desc->dtad_arg;
10493 agg->dtag_aggregate = dtrace_aggregate_lquantize;
10494
10495 if (step == 0 || levels == 0)
10496 goto err;
10497
10498 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
10499 break;
10500 }
10501
39236c6e
A
10502 case DTRACEAGG_LLQUANTIZE: {
10503 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
10504 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
10505 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
15129b1c 10506 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
39236c6e
A
10507 int64_t v;
10508
10509 agg->dtag_initial = desc->dtad_arg;
10510 agg->dtag_aggregate = dtrace_aggregate_llquantize;
10511
10512 if (factor < 2 || low >= high || nsteps < factor)
10513 goto err;
10514
10515 /*
10516 * Now check that the number of steps evenly divides a power
10517 * of the factor. (This assures both integer bucket size and
10518 * linearity within each magnitude.)
10519 */
10520 for (v = factor; v < nsteps; v *= factor)
10521 continue;
10522
10523 if ((v % nsteps) || (nsteps % factor))
10524 goto err;
10525
10526 size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
10527 break;
10528 }
10529
2d21ac55
A
10530 case DTRACEAGG_AVG:
10531 agg->dtag_aggregate = dtrace_aggregate_avg;
10532 size = sizeof (uint64_t) * 2;
10533 break;
10534
b0d623f7
A
10535 case DTRACEAGG_STDDEV:
10536 agg->dtag_aggregate = dtrace_aggregate_stddev;
10537 size = sizeof (uint64_t) * 4;
10538 break;
10539
2d21ac55
A
10540 case DTRACEAGG_SUM:
10541 agg->dtag_aggregate = dtrace_aggregate_sum;
10542 break;
10543
10544 default:
10545 goto err;
10546 }
10547
10548 agg->dtag_action.dta_rec.dtrd_size = size;
10549
10550 if (ntuple == 0)
10551 goto err;
10552
10553 /*
10554 * We must make sure that we have enough actions for the n-tuple.
10555 */
10556 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
10557 if (DTRACEACT_ISAGG(act->dta_kind))
10558 break;
10559
10560 if (--ntuple == 0) {
10561 /*
10562 * This is the action with which our n-tuple begins.
10563 */
10564 agg->dtag_first = act;
10565 goto success;
10566 }
10567 }
10568
10569 /*
10570 * This n-tuple is short by ntuple elements. Return failure.
10571 */
10572 ASSERT(ntuple != 0);
10573err:
10574 kmem_free(agg, sizeof (dtrace_aggregation_t));
10575 return (NULL);
10576
10577success:
10578 /*
10579 * If the last action in the tuple has a size of zero, it's actually
10580 * an expression argument for the aggregating action.
10581 */
10582 ASSERT(ecb->dte_action_last != NULL);
10583 act = ecb->dte_action_last;
10584
10585 if (act->dta_kind == DTRACEACT_DIFEXPR) {
10586 ASSERT(act->dta_difo != NULL);
10587
10588 if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
10589 agg->dtag_hasarg = 1;
10590 }
10591
10592 /*
10593 * We need to allocate an id for this aggregation.
10594 */
10595 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
10596 VM_BESTFIT | VM_SLEEP);
10597
b0d623f7 10598 if (aggid - 1 >= (dtrace_aggid_t)state->dts_naggregations) {
2d21ac55
A
10599 dtrace_aggregation_t **oaggs = state->dts_aggregations;
10600 dtrace_aggregation_t **aggs;
10601 int naggs = state->dts_naggregations << 1;
10602 int onaggs = state->dts_naggregations;
10603
b0d623f7 10604 ASSERT(aggid == (dtrace_aggid_t)state->dts_naggregations + 1);
2d21ac55
A
10605
10606 if (naggs == 0) {
10607 ASSERT(oaggs == NULL);
10608 naggs = 1;
10609 }
10610
10611 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
10612
10613 if (oaggs != NULL) {
10614 bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
10615 kmem_free(oaggs, onaggs * sizeof (*aggs));
10616 }
10617
10618 state->dts_aggregations = aggs;
10619 state->dts_naggregations = naggs;
10620 }
10621
10622 ASSERT(state->dts_aggregations[aggid - 1] == NULL);
10623 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
10624
10625 frec = &agg->dtag_first->dta_rec;
10626 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
10627 frec->dtrd_alignment = sizeof (dtrace_aggid_t);
10628
10629 for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
10630 ASSERT(!act->dta_intuple);
10631 act->dta_intuple = 1;
10632 }
10633
10634 return (&agg->dtag_action);
10635}
10636
10637static void
10638dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
10639{
10640 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
10641 dtrace_state_t *state = ecb->dte_state;
10642 dtrace_aggid_t aggid = agg->dtag_id;
10643
10644 ASSERT(DTRACEACT_ISAGG(act->dta_kind));
10645 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
10646
10647 ASSERT(state->dts_aggregations[aggid - 1] == agg);
10648 state->dts_aggregations[aggid - 1] = NULL;
10649
10650 kmem_free(agg, sizeof (dtrace_aggregation_t));
10651}
10652
10653static int
10654dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
10655{
10656 dtrace_action_t *action, *last;
10657 dtrace_difo_t *dp = desc->dtad_difo;
10658 uint32_t size = 0, align = sizeof (uint8_t), mask;
10659 uint16_t format = 0;
10660 dtrace_recdesc_t *rec;
10661 dtrace_state_t *state = ecb->dte_state;
b0d623f7
A
10662 dtrace_optval_t *opt = state->dts_options;
10663 dtrace_optval_t nframes=0, strsize;
2d21ac55
A
10664 uint64_t arg = desc->dtad_arg;
10665
5ba3f43e 10666 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10667 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
10668
10669 if (DTRACEACT_ISAGG(desc->dtad_kind)) {
10670 /*
10671 * If this is an aggregating action, there must be neither
10672 * a speculate nor a commit on the action chain.
10673 */
10674 dtrace_action_t *act;
10675
10676 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
10677 if (act->dta_kind == DTRACEACT_COMMIT)
10678 return (EINVAL);
10679
10680 if (act->dta_kind == DTRACEACT_SPECULATE)
10681 return (EINVAL);
10682 }
10683
10684 action = dtrace_ecb_aggregation_create(ecb, desc);
10685
10686 if (action == NULL)
10687 return (EINVAL);
10688 } else {
10689 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
10690 (desc->dtad_kind == DTRACEACT_DIFEXPR &&
10691 dp != NULL && dp->dtdo_destructive)) {
10692 state->dts_destructive = 1;
10693 }
10694
10695 switch (desc->dtad_kind) {
10696 case DTRACEACT_PRINTF:
10697 case DTRACEACT_PRINTA:
10698 case DTRACEACT_SYSTEM:
10699 case DTRACEACT_FREOPEN:
3e170ce0 10700 case DTRACEACT_DIFEXPR:
2d21ac55
A
10701 /*
10702 * We know that our arg is a string -- turn it into a
10703 * format.
10704 */
fe8ab488 10705 if (arg == 0) {
3e170ce0
A
10706 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
10707 desc->dtad_kind == DTRACEACT_DIFEXPR);
2d21ac55
A
10708 format = 0;
10709 } else {
fe8ab488 10710 ASSERT(arg != 0);
b0d623f7 10711 ASSERT(arg > KERNELBASE);
2d21ac55
A
10712 format = dtrace_format_add(state,
10713 (char *)(uintptr_t)arg);
10714 }
10715
10716 /*FALLTHROUGH*/
10717 case DTRACEACT_LIBACT:
fe8ab488
A
10718 case DTRACEACT_TRACEMEM:
10719 case DTRACEACT_TRACEMEM_DYNSIZE:
10720 case DTRACEACT_APPLEBINARY: /* __APPLE__ */
2d21ac55
A
10721 if (dp == NULL)
10722 return (EINVAL);
10723
10724 if ((size = dp->dtdo_rtype.dtdt_size) != 0)
10725 break;
10726
10727 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
10728 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10729 return (EINVAL);
10730
10731 size = opt[DTRACEOPT_STRSIZE];
10732 }
10733
10734 break;
10735
10736 case DTRACEACT_STACK:
10737 if ((nframes = arg) == 0) {
10738 nframes = opt[DTRACEOPT_STACKFRAMES];
10739 ASSERT(nframes > 0);
10740 arg = nframes;
10741 }
10742
10743 size = nframes * sizeof (pc_t);
10744 break;
10745
10746 case DTRACEACT_JSTACK:
10747 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
10748 strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
10749
10750 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
10751 nframes = opt[DTRACEOPT_JSTACKFRAMES];
10752
10753 arg = DTRACE_USTACK_ARG(nframes, strsize);
10754
10755 /*FALLTHROUGH*/
10756 case DTRACEACT_USTACK:
10757 if (desc->dtad_kind != DTRACEACT_JSTACK &&
10758 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
10759 strsize = DTRACE_USTACK_STRSIZE(arg);
10760 nframes = opt[DTRACEOPT_USTACKFRAMES];
10761 ASSERT(nframes > 0);
10762 arg = DTRACE_USTACK_ARG(nframes, strsize);
10763 }
10764
10765 /*
10766 * Save a slot for the pid.
10767 */
10768 size = (nframes + 1) * sizeof (uint64_t);
10769 size += DTRACE_USTACK_STRSIZE(arg);
10770 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
10771
10772 break;
10773
10774 case DTRACEACT_SYM:
10775 case DTRACEACT_MOD:
10776 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
10777 sizeof (uint64_t)) ||
10778 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10779 return (EINVAL);
10780 break;
10781
10782 case DTRACEACT_USYM:
10783 case DTRACEACT_UMOD:
10784 case DTRACEACT_UADDR:
10785 if (dp == NULL ||
10786 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
10787 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10788 return (EINVAL);
10789
10790 /*
10791 * We have a slot for the pid, plus a slot for the
10792 * argument. To keep things simple (aligned with
10793 * bitness-neutral sizing), we store each as a 64-bit
10794 * quantity.
10795 */
10796 size = 2 * sizeof (uint64_t);
10797 break;
10798
10799 case DTRACEACT_STOP:
10800 case DTRACEACT_BREAKPOINT:
10801 case DTRACEACT_PANIC:
10802 break;
10803
10804 case DTRACEACT_CHILL:
10805 case DTRACEACT_DISCARD:
10806 case DTRACEACT_RAISE:
fe8ab488 10807 case DTRACEACT_PIDRESUME: /* __APPLE__ */
2d21ac55
A
10808 if (dp == NULL)
10809 return (EINVAL);
10810 break;
10811
10812 case DTRACEACT_EXIT:
10813 if (dp == NULL ||
10814 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
10815 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10816 return (EINVAL);
10817 break;
10818
10819 case DTRACEACT_SPECULATE:
04b8595b 10820 if (ecb->dte_size > sizeof (dtrace_rechdr_t))
2d21ac55
A
10821 return (EINVAL);
10822
10823 if (dp == NULL)
10824 return (EINVAL);
10825
10826 state->dts_speculates = 1;
10827 break;
10828
10829 case DTRACEACT_COMMIT: {
10830 dtrace_action_t *act = ecb->dte_action;
10831
10832 for (; act != NULL; act = act->dta_next) {
10833 if (act->dta_kind == DTRACEACT_COMMIT)
10834 return (EINVAL);
10835 }
10836
10837 if (dp == NULL)
10838 return (EINVAL);
10839 break;
10840 }
10841
10842 default:
10843 return (EINVAL);
10844 }
10845
10846 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
10847 /*
10848 * If this is a data-storing action or a speculate,
10849 * we must be sure that there isn't a commit on the
10850 * action chain.
10851 */
10852 dtrace_action_t *act = ecb->dte_action;
10853
10854 for (; act != NULL; act = act->dta_next) {
10855 if (act->dta_kind == DTRACEACT_COMMIT)
10856 return (EINVAL);
10857 }
10858 }
10859
10860 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
10861 action->dta_rec.dtrd_size = size;
10862 }
10863
10864 action->dta_refcnt = 1;
10865 rec = &action->dta_rec;
10866 size = rec->dtrd_size;
10867
10868 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
10869 if (!(size & mask)) {
10870 align = mask + 1;
10871 break;
10872 }
10873 }
10874
10875 action->dta_kind = desc->dtad_kind;
10876
10877 if ((action->dta_difo = dp) != NULL)
10878 dtrace_difo_hold(dp);
10879
10880 rec->dtrd_action = action->dta_kind;
10881 rec->dtrd_arg = arg;
10882 rec->dtrd_uarg = desc->dtad_uarg;
10883 rec->dtrd_alignment = (uint16_t)align;
10884 rec->dtrd_format = format;
10885
10886 if ((last = ecb->dte_action_last) != NULL) {
10887 ASSERT(ecb->dte_action != NULL);
10888 action->dta_prev = last;
10889 last->dta_next = action;
10890 } else {
10891 ASSERT(ecb->dte_action == NULL);
10892 ecb->dte_action = action;
10893 }
10894
10895 ecb->dte_action_last = action;
10896
10897 return (0);
10898}
10899
10900static void
10901dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
10902{
10903 dtrace_action_t *act = ecb->dte_action, *next;
10904 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
10905 dtrace_difo_t *dp;
10906 uint16_t format;
10907
10908 if (act != NULL && act->dta_refcnt > 1) {
10909 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
10910 act->dta_refcnt--;
10911 } else {
10912 for (; act != NULL; act = next) {
10913 next = act->dta_next;
10914 ASSERT(next != NULL || act == ecb->dte_action_last);
10915 ASSERT(act->dta_refcnt == 1);
10916
10917 if ((format = act->dta_rec.dtrd_format) != 0)
10918 dtrace_format_remove(ecb->dte_state, format);
10919
10920 if ((dp = act->dta_difo) != NULL)
10921 dtrace_difo_release(dp, vstate);
10922
10923 if (DTRACEACT_ISAGG(act->dta_kind)) {
10924 dtrace_ecb_aggregation_destroy(ecb, act);
10925 } else {
10926 kmem_free(act, sizeof (dtrace_action_t));
10927 }
10928 }
10929 }
10930
10931 ecb->dte_action = NULL;
10932 ecb->dte_action_last = NULL;
04b8595b 10933 ecb->dte_size = 0;
2d21ac55
A
10934}
10935
10936static void
10937dtrace_ecb_disable(dtrace_ecb_t *ecb)
10938{
10939 /*
10940 * We disable the ECB by removing it from its probe.
10941 */
10942 dtrace_ecb_t *pecb, *prev = NULL;
10943 dtrace_probe_t *probe = ecb->dte_probe;
10944
5ba3f43e 10945 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
10946
10947 if (probe == NULL) {
10948 /*
10949 * This is the NULL probe; there is nothing to disable.
10950 */
10951 return;
10952 }
10953
10954 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
10955 if (pecb == ecb)
10956 break;
10957 prev = pecb;
10958 }
10959
10960 ASSERT(pecb != NULL);
10961
10962 if (prev == NULL) {
10963 probe->dtpr_ecb = ecb->dte_next;
10964 } else {
10965 prev->dte_next = ecb->dte_next;
10966 }
10967
10968 if (ecb == probe->dtpr_ecb_last) {
10969 ASSERT(ecb->dte_next == NULL);
10970 probe->dtpr_ecb_last = prev;
10971 }
10972
fe8ab488 10973 probe->dtpr_provider->dtpv_ecb_count--;
2d21ac55
A
10974 /*
10975 * The ECB has been disconnected from the probe; now sync to assure
10976 * that all CPUs have seen the change before returning.
10977 */
10978 dtrace_sync();
10979
10980 if (probe->dtpr_ecb == NULL) {
10981 /*
10982 * That was the last ECB on the probe; clear the predicate
10983 * cache ID for the probe, disable it and sync one more time
10984 * to assure that we'll never hit it again.
10985 */
10986 dtrace_provider_t *prov = probe->dtpr_provider;
10987
10988 ASSERT(ecb->dte_next == NULL);
10989 ASSERT(probe->dtpr_ecb_last == NULL);
10990 probe->dtpr_predcache = DTRACE_CACHEIDNONE;
10991 prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
10992 probe->dtpr_id, probe->dtpr_arg);
10993 dtrace_sync();
10994 } else {
10995 /*
10996 * There is at least one ECB remaining on the probe. If there
10997 * is _exactly_ one, set the probe's predicate cache ID to be
10998 * the predicate cache ID of the remaining ECB.
10999 */
11000 ASSERT(probe->dtpr_ecb_last != NULL);
11001 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
11002
11003 if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
11004 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
11005
11006 ASSERT(probe->dtpr_ecb->dte_next == NULL);
11007
11008 if (p != NULL)
11009 probe->dtpr_predcache = p->dtp_cacheid;
11010 }
11011
11012 ecb->dte_next = NULL;
11013 }
11014}
11015
11016static void
11017dtrace_ecb_destroy(dtrace_ecb_t *ecb)
11018{
11019 dtrace_state_t *state = ecb->dte_state;
11020 dtrace_vstate_t *vstate = &state->dts_vstate;
11021 dtrace_predicate_t *pred;
11022 dtrace_epid_t epid = ecb->dte_epid;
11023
5ba3f43e 11024 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11025 ASSERT(ecb->dte_next == NULL);
11026 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
11027
11028 if ((pred = ecb->dte_predicate) != NULL)
11029 dtrace_predicate_release(pred, vstate);
11030
11031 dtrace_ecb_action_remove(ecb);
11032
11033 ASSERT(state->dts_ecbs[epid - 1] == ecb);
11034 state->dts_ecbs[epid - 1] = NULL;
11035
11036 kmem_free(ecb, sizeof (dtrace_ecb_t));
11037}
11038
11039static dtrace_ecb_t *
11040dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
11041 dtrace_enabling_t *enab)
11042{
11043 dtrace_ecb_t *ecb;
11044 dtrace_predicate_t *pred;
11045 dtrace_actdesc_t *act;
11046 dtrace_provider_t *prov;
11047 dtrace_ecbdesc_t *desc = enab->dten_current;
11048
5ba3f43e 11049 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11050 ASSERT(state != NULL);
11051
11052 ecb = dtrace_ecb_add(state, probe);
11053 ecb->dte_uarg = desc->dted_uarg;
11054
11055 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
11056 dtrace_predicate_hold(pred);
11057 ecb->dte_predicate = pred;
11058 }
11059
11060 if (probe != NULL) {
11061 /*
11062 * If the provider shows more leg than the consumer is old
11063 * enough to see, we need to enable the appropriate implicit
11064 * predicate bits to prevent the ecb from activating at
11065 * revealing times.
11066 *
11067 * Providers specifying DTRACE_PRIV_USER at register time
11068 * are stating that they need the /proc-style privilege
11069 * model to be enforced, and this is what DTRACE_COND_OWNER
11070 * and DTRACE_COND_ZONEOWNER will then do at probe time.
11071 */
11072 prov = probe->dtpr_provider;
11073 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
11074 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
11075 ecb->dte_cond |= DTRACE_COND_OWNER;
11076
11077 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
11078 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
11079 ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
11080
11081 /*
11082 * If the provider shows us kernel innards and the user
11083 * is lacking sufficient privilege, enable the
11084 * DTRACE_COND_USERMODE implicit predicate.
11085 */
11086 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
11087 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
11088 ecb->dte_cond |= DTRACE_COND_USERMODE;
11089 }
11090
11091 if (dtrace_ecb_create_cache != NULL) {
11092 /*
11093 * If we have a cached ecb, we'll use its action list instead
11094 * of creating our own (saving both time and space).
11095 */
11096 dtrace_ecb_t *cached = dtrace_ecb_create_cache;
c910b4d9 11097 dtrace_action_t *act_if = cached->dte_action;
2d21ac55 11098
c910b4d9
A
11099 if (act_if != NULL) {
11100 ASSERT(act_if->dta_refcnt > 0);
11101 act_if->dta_refcnt++;
11102 ecb->dte_action = act_if;
2d21ac55
A
11103 ecb->dte_action_last = cached->dte_action_last;
11104 ecb->dte_needed = cached->dte_needed;
11105 ecb->dte_size = cached->dte_size;
11106 ecb->dte_alignment = cached->dte_alignment;
11107 }
11108
11109 return (ecb);
11110 }
11111
11112 for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
11113 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
11114 dtrace_ecb_destroy(ecb);
11115 return (NULL);
11116 }
11117 }
11118
39037602
A
11119 if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) {
11120 dtrace_ecb_destroy(ecb);
11121 return (NULL);
11122 }
2d21ac55
A
11123
11124 return (dtrace_ecb_create_cache = ecb);
11125}
11126
11127static int
d190cdc3 11128dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg1, void *arg2)
2d21ac55
A
11129{
11130 dtrace_ecb_t *ecb;
d190cdc3
A
11131 dtrace_enabling_t *enab = arg1;
11132 dtrace_ecbdesc_t *ep = arg2;
2d21ac55
A
11133 dtrace_state_t *state = enab->dten_vstate->dtvs_state;
11134
11135 ASSERT(state != NULL);
11136
d190cdc3 11137 if (probe != NULL && ep != NULL && probe->dtpr_gen < ep->dted_probegen) {
2d21ac55
A
11138 /*
11139 * This probe was created in a generation for which this
11140 * enabling has previously created ECBs; we don't want to
11141 * enable it again, so just kick out.
11142 */
11143 return (DTRACE_MATCH_NEXT);
11144 }
11145
11146 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
11147 return (DTRACE_MATCH_DONE);
11148
6d2010ae
A
11149 if (dtrace_ecb_enable(ecb) < 0)
11150 return (DTRACE_MATCH_FAIL);
11151
2d21ac55
A
11152 return (DTRACE_MATCH_NEXT);
11153}
11154
11155static dtrace_ecb_t *
11156dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
11157{
11158 dtrace_ecb_t *ecb;
b0d623f7 11159#pragma unused(ecb) /* __APPLE__ */
2d21ac55 11160
5ba3f43e 11161 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 11162
fe8ab488 11163 if (id == 0 || id > (dtrace_epid_t)state->dts_necbs)
2d21ac55
A
11164 return (NULL);
11165
11166 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
11167 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
11168
11169 return (state->dts_ecbs[id - 1]);
11170}
11171
11172static dtrace_aggregation_t *
11173dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
11174{
11175 dtrace_aggregation_t *agg;
b0d623f7 11176#pragma unused(agg) /* __APPLE__ */
2d21ac55 11177
5ba3f43e 11178 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 11179
b0d623f7 11180 if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations)
2d21ac55
A
11181 return (NULL);
11182
11183 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
11184 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
11185 agg->dtag_id == id);
11186
11187 return (state->dts_aggregations[id - 1]);
11188}
11189
11190/*
11191 * DTrace Buffer Functions
11192 *
11193 * The following functions manipulate DTrace buffers. Most of these functions
11194 * are called in the context of establishing or processing consumer state;
11195 * exceptions are explicitly noted.
11196 */
11197
11198/*
11199 * Note: called from cross call context. This function switches the two
11200 * buffers on a given CPU. The atomicity of this operation is assured by
11201 * disabling interrupts while the actual switch takes place; the disabling of
11202 * interrupts serializes the execution with any execution of dtrace_probe() on
11203 * the same CPU.
11204 */
11205static void
11206dtrace_buffer_switch(dtrace_buffer_t *buf)
11207{
11208 caddr_t tomax = buf->dtb_tomax;
11209 caddr_t xamot = buf->dtb_xamot;
11210 dtrace_icookie_t cookie;
04b8595b 11211 hrtime_t now;
2d21ac55
A
11212
11213 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
11214 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
11215
11216 cookie = dtrace_interrupt_disable();
04b8595b 11217 now = dtrace_gethrtime();
2d21ac55
A
11218 buf->dtb_tomax = xamot;
11219 buf->dtb_xamot = tomax;
11220 buf->dtb_xamot_drops = buf->dtb_drops;
11221 buf->dtb_xamot_offset = buf->dtb_offset;
11222 buf->dtb_xamot_errors = buf->dtb_errors;
11223 buf->dtb_xamot_flags = buf->dtb_flags;
11224 buf->dtb_offset = 0;
11225 buf->dtb_drops = 0;
11226 buf->dtb_errors = 0;
11227 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
04b8595b
A
11228 buf->dtb_interval = now - buf->dtb_switched;
11229 buf->dtb_switched = now;
39037602
A
11230 buf->dtb_cur_limit = buf->dtb_limit;
11231
2d21ac55
A
11232 dtrace_interrupt_enable(cookie);
11233}
11234
11235/*
11236 * Note: called from cross call context. This function activates a buffer
11237 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
11238 * is guaranteed by the disabling of interrupts.
11239 */
11240static void
11241dtrace_buffer_activate(dtrace_state_t *state)
11242{
11243 dtrace_buffer_t *buf;
11244 dtrace_icookie_t cookie = dtrace_interrupt_disable();
11245
11246 buf = &state->dts_buffer[CPU->cpu_id];
11247
11248 if (buf->dtb_tomax != NULL) {
11249 /*
11250 * We might like to assert that the buffer is marked inactive,
11251 * but this isn't necessarily true: the buffer for the CPU
11252 * that processes the BEGIN probe has its buffer activated
11253 * manually. In this case, we take the (harmless) action
11254 * re-clearing the bit INACTIVE bit.
11255 */
11256 buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
11257 }
11258
11259 dtrace_interrupt_enable(cookie);
11260}
11261
fe8ab488
A
11262static int
11263dtrace_buffer_canalloc(size_t size)
11264{
11265 if (size > (UINT64_MAX - dtrace_buffer_memory_inuse))
11266 return (B_FALSE);
11267 if ((size + dtrace_buffer_memory_inuse) > dtrace_buffer_memory_maxsize)
11268 return (B_FALSE);
11269
11270 return (B_TRUE);
11271}
11272
2d21ac55 11273static int
39037602 11274dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t limit, size_t size, int flags,
2d21ac55
A
11275 processorid_t cpu)
11276{
6d2010ae 11277 dtrace_cpu_t *cp;
2d21ac55 11278 dtrace_buffer_t *buf;
fe8ab488 11279 size_t size_before_alloc = dtrace_buffer_memory_inuse;
2d21ac55 11280
5ba3f43e
A
11281 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
11282 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 11283
b0d623f7
A
11284 if (size > (size_t)dtrace_nonroot_maxsize &&
11285 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
11286 return (EFBIG);
2d21ac55
A
11287
11288 cp = cpu_list;
11289
11290 do {
11291 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
11292 continue;
11293
11294 buf = &bufs[cp->cpu_id];
11295
11296 /*
11297 * If there is already a buffer allocated for this CPU, it
11298 * is only possible that this is a DR event. In this case,
11299 * the buffer size must match our specified size.
11300 */
11301 if (buf->dtb_tomax != NULL) {
11302 ASSERT(buf->dtb_size == size);
11303 continue;
11304 }
11305
11306 ASSERT(buf->dtb_xamot == NULL);
11307
fe8ab488
A
11308 /* DTrace, please do not eat all the memory. */
11309 if (dtrace_buffer_canalloc(size) == B_FALSE)
11310 goto err;
2d21ac55
A
11311 if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
11312 goto err;
fe8ab488 11313 dtrace_buffer_memory_inuse += size;
2d21ac55 11314
39037602
A
11315 /* Unsure that limit is always lower than size */
11316 limit = limit == size ? limit - 1 : limit;
11317 buf->dtb_cur_limit = limit;
11318 buf->dtb_limit = limit;
2d21ac55
A
11319 buf->dtb_size = size;
11320 buf->dtb_flags = flags;
11321 buf->dtb_offset = 0;
11322 buf->dtb_drops = 0;
11323
11324 if (flags & DTRACEBUF_NOSWITCH)
11325 continue;
11326
fe8ab488
A
11327 /* DTrace, please do not eat all the memory. */
11328 if (dtrace_buffer_canalloc(size) == B_FALSE)
11329 goto err;
2d21ac55
A
11330 if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
11331 goto err;
fe8ab488 11332 dtrace_buffer_memory_inuse += size;
2d21ac55
A
11333 } while ((cp = cp->cpu_next) != cpu_list);
11334
fe8ab488
A
11335 ASSERT(dtrace_buffer_memory_inuse <= dtrace_buffer_memory_maxsize);
11336
2d21ac55
A
11337 return (0);
11338
11339err:
11340 cp = cpu_list;
11341
11342 do {
11343 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
11344 continue;
11345
11346 buf = &bufs[cp->cpu_id];
11347
11348 if (buf->dtb_xamot != NULL) {
11349 ASSERT(buf->dtb_tomax != NULL);
11350 ASSERT(buf->dtb_size == size);
11351 kmem_free(buf->dtb_xamot, size);
11352 }
11353
11354 if (buf->dtb_tomax != NULL) {
11355 ASSERT(buf->dtb_size == size);
11356 kmem_free(buf->dtb_tomax, size);
11357 }
11358
11359 buf->dtb_tomax = NULL;
11360 buf->dtb_xamot = NULL;
11361 buf->dtb_size = 0;
11362 } while ((cp = cp->cpu_next) != cpu_list);
11363
fe8ab488
A
11364 /* Restore the size saved before allocating memory */
11365 dtrace_buffer_memory_inuse = size_before_alloc;
11366
2d21ac55
A
11367 return (ENOMEM);
11368}
11369
11370/*
11371 * Note: called from probe context. This function just increments the drop
11372 * count on a buffer. It has been made a function to allow for the
11373 * possibility of understanding the source of mysterious drop counts. (A
11374 * problem for which one may be particularly disappointed that DTrace cannot
11375 * be used to understand DTrace.)
11376 */
11377static void
11378dtrace_buffer_drop(dtrace_buffer_t *buf)
11379{
11380 buf->dtb_drops++;
11381}
11382
11383/*
11384 * Note: called from probe context. This function is called to reserve space
11385 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
11386 * mstate. Returns the new offset in the buffer, or a negative value if an
11387 * error has occurred.
11388 */
11389static intptr_t
11390dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
11391 dtrace_state_t *state, dtrace_mstate_t *mstate)
11392{
11393 intptr_t offs = buf->dtb_offset, soffs;
11394 intptr_t woffs;
11395 caddr_t tomax;
c910b4d9 11396 size_t total_off;
2d21ac55
A
11397
11398 if (buf->dtb_flags & DTRACEBUF_INACTIVE)
11399 return (-1);
11400
11401 if ((tomax = buf->dtb_tomax) == NULL) {
11402 dtrace_buffer_drop(buf);
11403 return (-1);
11404 }
11405
11406 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
11407 while (offs & (align - 1)) {
11408 /*
11409 * Assert that our alignment is off by a number which
11410 * is itself sizeof (uint32_t) aligned.
11411 */
11412 ASSERT(!((align - (offs & (align - 1))) &
11413 (sizeof (uint32_t) - 1)));
11414 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
11415 offs += sizeof (uint32_t);
11416 }
11417
39037602
A
11418 if ((uint64_t)(soffs = offs + needed) > buf->dtb_cur_limit) {
11419 if (buf->dtb_cur_limit == buf->dtb_limit) {
11420 buf->dtb_cur_limit = buf->dtb_size;
11421
11422 atomic_add_32(&state->dts_buf_over_limit, 1);
11423 /**
11424 * Set an AST on the current processor
11425 * so that we can wake up the process
11426 * outside of probe context, when we know
11427 * it is safe to do so
11428 */
11429 minor_t minor = getminor(state->dts_dev);
11430 ASSERT(minor < 32);
11431
11432 atomic_or_32(&dtrace_wake_clients, 1 << minor);
11433 ast_dtrace_on();
11434 }
11435 if ((uint64_t)soffs > buf->dtb_size) {
11436 dtrace_buffer_drop(buf);
11437 return (-1);
11438 }
2d21ac55
A
11439 }
11440
11441 if (mstate == NULL)
11442 return (offs);
11443
11444 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
11445 mstate->dtms_scratch_size = buf->dtb_size - soffs;
11446 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
11447
11448 return (offs);
11449 }
11450
11451 if (buf->dtb_flags & DTRACEBUF_FILL) {
11452 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
11453 (buf->dtb_flags & DTRACEBUF_FULL))
11454 return (-1);
11455 goto out;
11456 }
11457
c910b4d9 11458 total_off = needed + (offs & (align - 1));
2d21ac55
A
11459
11460 /*
11461 * For a ring buffer, life is quite a bit more complicated. Before
11462 * we can store any padding, we need to adjust our wrapping offset.
11463 * (If we've never before wrapped or we're not about to, no adjustment
11464 * is required.)
11465 */
11466 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
c910b4d9 11467 offs + total_off > buf->dtb_size) {
2d21ac55
A
11468 woffs = buf->dtb_xamot_offset;
11469
c910b4d9 11470 if (offs + total_off > buf->dtb_size) {
2d21ac55
A
11471 /*
11472 * We can't fit in the end of the buffer. First, a
11473 * sanity check that we can fit in the buffer at all.
11474 */
c910b4d9 11475 if (total_off > buf->dtb_size) {
2d21ac55
A
11476 dtrace_buffer_drop(buf);
11477 return (-1);
11478 }
11479
11480 /*
11481 * We're going to be storing at the top of the buffer,
11482 * so now we need to deal with the wrapped offset. We
11483 * only reset our wrapped offset to 0 if it is
11484 * currently greater than the current offset. If it
11485 * is less than the current offset, it is because a
11486 * previous allocation induced a wrap -- but the
11487 * allocation didn't subsequently take the space due
11488 * to an error or false predicate evaluation. In this
11489 * case, we'll just leave the wrapped offset alone: if
11490 * the wrapped offset hasn't been advanced far enough
11491 * for this allocation, it will be adjusted in the
11492 * lower loop.
11493 */
11494 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
11495 if (woffs >= offs)
11496 woffs = 0;
11497 } else {
11498 woffs = 0;
11499 }
11500
11501 /*
11502 * Now we know that we're going to be storing to the
11503 * top of the buffer and that there is room for us
11504 * there. We need to clear the buffer from the current
11505 * offset to the end (there may be old gunk there).
11506 */
b0d623f7 11507 while ((uint64_t)offs < buf->dtb_size)
2d21ac55
A
11508 tomax[offs++] = 0;
11509
11510 /*
11511 * We need to set our offset to zero. And because we
11512 * are wrapping, we need to set the bit indicating as
11513 * much. We can also adjust our needed space back
11514 * down to the space required by the ECB -- we know
11515 * that the top of the buffer is aligned.
11516 */
11517 offs = 0;
c910b4d9 11518 total_off = needed;
2d21ac55
A
11519 buf->dtb_flags |= DTRACEBUF_WRAPPED;
11520 } else {
11521 /*
11522 * There is room for us in the buffer, so we simply
11523 * need to check the wrapped offset.
11524 */
11525 if (woffs < offs) {
11526 /*
11527 * The wrapped offset is less than the offset.
11528 * This can happen if we allocated buffer space
11529 * that induced a wrap, but then we didn't
11530 * subsequently take the space due to an error
11531 * or false predicate evaluation. This is
11532 * okay; we know that _this_ allocation isn't
11533 * going to induce a wrap. We still can't
11534 * reset the wrapped offset to be zero,
11535 * however: the space may have been trashed in
11536 * the previous failed probe attempt. But at
11537 * least the wrapped offset doesn't need to
11538 * be adjusted at all...
11539 */
11540 goto out;
11541 }
11542 }
11543
b0d623f7 11544 while (offs + total_off > (size_t)woffs) {
2d21ac55
A
11545 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
11546 size_t size;
11547
11548 if (epid == DTRACE_EPIDNONE) {
11549 size = sizeof (uint32_t);
11550 } else {
b0d623f7 11551 ASSERT(epid <= (dtrace_epid_t)state->dts_necbs);
2d21ac55
A
11552 ASSERT(state->dts_ecbs[epid - 1] != NULL);
11553
11554 size = state->dts_ecbs[epid - 1]->dte_size;
11555 }
11556
11557 ASSERT(woffs + size <= buf->dtb_size);
11558 ASSERT(size != 0);
11559
11560 if (woffs + size == buf->dtb_size) {
11561 /*
11562 * We've reached the end of the buffer; we want
11563 * to set the wrapped offset to 0 and break
11564 * out. However, if the offs is 0, then we're
11565 * in a strange edge-condition: the amount of
11566 * space that we want to reserve plus the size
11567 * of the record that we're overwriting is
11568 * greater than the size of the buffer. This
11569 * is problematic because if we reserve the
11570 * space but subsequently don't consume it (due
11571 * to a failed predicate or error) the wrapped
11572 * offset will be 0 -- yet the EPID at offset 0
11573 * will not be committed. This situation is
11574 * relatively easy to deal with: if we're in
11575 * this case, the buffer is indistinguishable
11576 * from one that hasn't wrapped; we need only
11577 * finish the job by clearing the wrapped bit,
11578 * explicitly setting the offset to be 0, and
11579 * zero'ing out the old data in the buffer.
11580 */
11581 if (offs == 0) {
11582 buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
11583 buf->dtb_offset = 0;
c910b4d9 11584 woffs = total_off;
2d21ac55 11585
b0d623f7 11586 while ((uint64_t)woffs < buf->dtb_size)
2d21ac55
A
11587 tomax[woffs++] = 0;
11588 }
11589
11590 woffs = 0;
11591 break;
11592 }
11593
11594 woffs += size;
11595 }
11596
11597 /*
11598 * We have a wrapped offset. It may be that the wrapped offset
11599 * has become zero -- that's okay.
11600 */
11601 buf->dtb_xamot_offset = woffs;
11602 }
11603
11604out:
11605 /*
11606 * Now we can plow the buffer with any necessary padding.
11607 */
11608 while (offs & (align - 1)) {
11609 /*
11610 * Assert that our alignment is off by a number which
11611 * is itself sizeof (uint32_t) aligned.
11612 */
11613 ASSERT(!((align - (offs & (align - 1))) &
11614 (sizeof (uint32_t) - 1)));
11615 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
11616 offs += sizeof (uint32_t);
11617 }
11618
11619 if (buf->dtb_flags & DTRACEBUF_FILL) {
11620 if (offs + needed > buf->dtb_size - state->dts_reserve) {
11621 buf->dtb_flags |= DTRACEBUF_FULL;
11622 return (-1);
11623 }
11624 }
11625
11626 if (mstate == NULL)
11627 return (offs);
11628
11629 /*
11630 * For ring buffers and fill buffers, the scratch space is always
11631 * the inactive buffer.
11632 */
11633 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
11634 mstate->dtms_scratch_size = buf->dtb_size;
11635 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
11636
11637 return (offs);
11638}
11639
11640static void
11641dtrace_buffer_polish(dtrace_buffer_t *buf)
11642{
11643 ASSERT(buf->dtb_flags & DTRACEBUF_RING);
5ba3f43e 11644 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11645
11646 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
11647 return;
11648
11649 /*
11650 * We need to polish the ring buffer. There are three cases:
11651 *
11652 * - The first (and presumably most common) is that there is no gap
11653 * between the buffer offset and the wrapped offset. In this case,
11654 * there is nothing in the buffer that isn't valid data; we can
11655 * mark the buffer as polished and return.
11656 *
11657 * - The second (less common than the first but still more common
11658 * than the third) is that there is a gap between the buffer offset
11659 * and the wrapped offset, and the wrapped offset is larger than the
11660 * buffer offset. This can happen because of an alignment issue, or
11661 * can happen because of a call to dtrace_buffer_reserve() that
11662 * didn't subsequently consume the buffer space. In this case,
11663 * we need to zero the data from the buffer offset to the wrapped
11664 * offset.
11665 *
11666 * - The third (and least common) is that there is a gap between the
11667 * buffer offset and the wrapped offset, but the wrapped offset is
11668 * _less_ than the buffer offset. This can only happen because a
11669 * call to dtrace_buffer_reserve() induced a wrap, but the space
11670 * was not subsequently consumed. In this case, we need to zero the
11671 * space from the offset to the end of the buffer _and_ from the
11672 * top of the buffer to the wrapped offset.
11673 */
11674 if (buf->dtb_offset < buf->dtb_xamot_offset) {
11675 bzero(buf->dtb_tomax + buf->dtb_offset,
11676 buf->dtb_xamot_offset - buf->dtb_offset);
11677 }
11678
11679 if (buf->dtb_offset > buf->dtb_xamot_offset) {
11680 bzero(buf->dtb_tomax + buf->dtb_offset,
11681 buf->dtb_size - buf->dtb_offset);
11682 bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
11683 }
11684}
11685
11686static void
11687dtrace_buffer_free(dtrace_buffer_t *bufs)
11688{
11689 int i;
11690
c910b4d9 11691 for (i = 0; i < (int)NCPU; i++) {
2d21ac55
A
11692 dtrace_buffer_t *buf = &bufs[i];
11693
11694 if (buf->dtb_tomax == NULL) {
11695 ASSERT(buf->dtb_xamot == NULL);
11696 ASSERT(buf->dtb_size == 0);
11697 continue;
11698 }
11699
11700 if (buf->dtb_xamot != NULL) {
11701 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
11702 kmem_free(buf->dtb_xamot, buf->dtb_size);
fe8ab488
A
11703
11704 ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size);
11705 dtrace_buffer_memory_inuse -= buf->dtb_size;
2d21ac55
A
11706 }
11707
11708 kmem_free(buf->dtb_tomax, buf->dtb_size);
fe8ab488
A
11709 ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size);
11710 dtrace_buffer_memory_inuse -= buf->dtb_size;
11711
2d21ac55
A
11712 buf->dtb_size = 0;
11713 buf->dtb_tomax = NULL;
11714 buf->dtb_xamot = NULL;
11715 }
11716}
11717
11718/*
11719 * DTrace Enabling Functions
11720 */
11721static dtrace_enabling_t *
11722dtrace_enabling_create(dtrace_vstate_t *vstate)
11723{
11724 dtrace_enabling_t *enab;
11725
11726 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
11727 enab->dten_vstate = vstate;
11728
11729 return (enab);
11730}
11731
11732static void
11733dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
11734{
11735 dtrace_ecbdesc_t **ndesc;
11736 size_t osize, nsize;
11737
11738 /*
11739 * We can't add to enablings after we've enabled them, or after we've
11740 * retained them.
11741 */
11742 ASSERT(enab->dten_probegen == 0);
11743 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
11744
fe8ab488
A
11745 /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */
11746 if (ecb == NULL) return;
2d21ac55
A
11747
11748 if (enab->dten_ndesc < enab->dten_maxdesc) {
11749 enab->dten_desc[enab->dten_ndesc++] = ecb;
11750 return;
11751 }
11752
11753 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
11754
11755 if (enab->dten_maxdesc == 0) {
11756 enab->dten_maxdesc = 1;
11757 } else {
11758 enab->dten_maxdesc <<= 1;
11759 }
11760
11761 ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
11762
11763 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
11764 ndesc = kmem_zalloc(nsize, KM_SLEEP);
11765 bcopy(enab->dten_desc, ndesc, osize);
11766 kmem_free(enab->dten_desc, osize);
11767
11768 enab->dten_desc = ndesc;
11769 enab->dten_desc[enab->dten_ndesc++] = ecb;
11770}
11771
11772static void
11773dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
11774 dtrace_probedesc_t *pd)
11775{
11776 dtrace_ecbdesc_t *new;
11777 dtrace_predicate_t *pred;
11778 dtrace_actdesc_t *act;
11779
11780 /*
11781 * We're going to create a new ECB description that matches the
11782 * specified ECB in every way, but has the specified probe description.
11783 */
11784 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
11785
11786 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
11787 dtrace_predicate_hold(pred);
11788
11789 for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
11790 dtrace_actdesc_hold(act);
11791
11792 new->dted_action = ecb->dted_action;
11793 new->dted_pred = ecb->dted_pred;
11794 new->dted_probe = *pd;
11795 new->dted_uarg = ecb->dted_uarg;
11796
11797 dtrace_enabling_add(enab, new);
11798}
11799
11800static void
11801dtrace_enabling_dump(dtrace_enabling_t *enab)
11802{
11803 int i;
11804
11805 for (i = 0; i < enab->dten_ndesc; i++) {
11806 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
11807
11808 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
11809 desc->dtpd_provider, desc->dtpd_mod,
11810 desc->dtpd_func, desc->dtpd_name);
11811 }
11812}
11813
11814static void
11815dtrace_enabling_destroy(dtrace_enabling_t *enab)
11816{
11817 int i;
11818 dtrace_ecbdesc_t *ep;
11819 dtrace_vstate_t *vstate = enab->dten_vstate;
11820
5ba3f43e 11821 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11822
11823 for (i = 0; i < enab->dten_ndesc; i++) {
11824 dtrace_actdesc_t *act, *next;
11825 dtrace_predicate_t *pred;
11826
11827 ep = enab->dten_desc[i];
11828
11829 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
11830 dtrace_predicate_release(pred, vstate);
11831
11832 for (act = ep->dted_action; act != NULL; act = next) {
11833 next = act->dtad_next;
11834 dtrace_actdesc_release(act, vstate);
11835 }
11836
11837 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
11838 }
11839
11840 kmem_free(enab->dten_desc,
11841 enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
11842
11843 /*
11844 * If this was a retained enabling, decrement the dts_nretained count
11845 * and take it off of the dtrace_retained list.
11846 */
11847 if (enab->dten_prev != NULL || enab->dten_next != NULL ||
11848 dtrace_retained == enab) {
11849 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11850 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
11851 enab->dten_vstate->dtvs_state->dts_nretained--;
b0d623f7 11852 dtrace_retained_gen++;
2d21ac55
A
11853 }
11854
11855 if (enab->dten_prev == NULL) {
11856 if (dtrace_retained == enab) {
11857 dtrace_retained = enab->dten_next;
11858
11859 if (dtrace_retained != NULL)
11860 dtrace_retained->dten_prev = NULL;
11861 }
11862 } else {
11863 ASSERT(enab != dtrace_retained);
11864 ASSERT(dtrace_retained != NULL);
11865 enab->dten_prev->dten_next = enab->dten_next;
11866 }
11867
11868 if (enab->dten_next != NULL) {
11869 ASSERT(dtrace_retained != NULL);
11870 enab->dten_next->dten_prev = enab->dten_prev;
11871 }
11872
11873 kmem_free(enab, sizeof (dtrace_enabling_t));
11874}
11875
11876static int
11877dtrace_enabling_retain(dtrace_enabling_t *enab)
11878{
11879 dtrace_state_t *state;
11880
5ba3f43e 11881 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11882 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
11883 ASSERT(enab->dten_vstate != NULL);
11884
11885 state = enab->dten_vstate->dtvs_state;
11886 ASSERT(state != NULL);
11887
11888 /*
11889 * We only allow each state to retain dtrace_retain_max enablings.
11890 */
11891 if (state->dts_nretained >= dtrace_retain_max)
11892 return (ENOSPC);
11893
11894 state->dts_nretained++;
b0d623f7 11895 dtrace_retained_gen++;
2d21ac55
A
11896
11897 if (dtrace_retained == NULL) {
11898 dtrace_retained = enab;
11899 return (0);
11900 }
11901
11902 enab->dten_next = dtrace_retained;
11903 dtrace_retained->dten_prev = enab;
11904 dtrace_retained = enab;
11905
11906 return (0);
11907}
11908
11909static int
11910dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
11911 dtrace_probedesc_t *create)
11912{
11913 dtrace_enabling_t *new, *enab;
11914 int found = 0, err = ENOENT;
11915
5ba3f43e 11916 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11917 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
11918 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
11919 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
11920 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
11921
11922 new = dtrace_enabling_create(&state->dts_vstate);
11923
11924 /*
11925 * Iterate over all retained enablings, looking for enablings that
11926 * match the specified state.
11927 */
11928 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11929 int i;
11930
11931 /*
11932 * dtvs_state can only be NULL for helper enablings -- and
11933 * helper enablings can't be retained.
11934 */
11935 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11936
11937 if (enab->dten_vstate->dtvs_state != state)
11938 continue;
11939
11940 /*
11941 * Now iterate over each probe description; we're looking for
11942 * an exact match to the specified probe description.
11943 */
11944 for (i = 0; i < enab->dten_ndesc; i++) {
11945 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11946 dtrace_probedesc_t *pd = &ep->dted_probe;
11947
fe8ab488 11948 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7
A
11949 if (strncmp(pd->dtpd_provider, match->dtpd_provider, DTRACE_PROVNAMELEN))
11950 continue;
11951
11952 if (strncmp(pd->dtpd_mod, match->dtpd_mod, DTRACE_MODNAMELEN))
11953 continue;
11954
11955 if (strncmp(pd->dtpd_func, match->dtpd_func, DTRACE_FUNCNAMELEN))
11956 continue;
11957
11958 if (strncmp(pd->dtpd_name, match->dtpd_name, DTRACE_NAMELEN))
11959 continue;
2d21ac55
A
11960
11961 /*
11962 * We have a winning probe! Add it to our growing
11963 * enabling.
11964 */
11965 found = 1;
11966 dtrace_enabling_addlike(new, ep, create);
11967 }
11968 }
11969
11970 if (!found || (err = dtrace_enabling_retain(new)) != 0) {
11971 dtrace_enabling_destroy(new);
11972 return (err);
11973 }
11974
11975 return (0);
11976}
11977
11978static void
11979dtrace_enabling_retract(dtrace_state_t *state)
11980{
11981 dtrace_enabling_t *enab, *next;
11982
5ba3f43e 11983 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
11984
11985 /*
11986 * Iterate over all retained enablings, destroy the enablings retained
11987 * for the specified state.
11988 */
11989 for (enab = dtrace_retained; enab != NULL; enab = next) {
11990 next = enab->dten_next;
11991
11992 /*
11993 * dtvs_state can only be NULL for helper enablings -- and
11994 * helper enablings can't be retained.
11995 */
11996 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11997
11998 if (enab->dten_vstate->dtvs_state == state) {
11999 ASSERT(state->dts_nretained > 0);
12000 dtrace_enabling_destroy(enab);
12001 }
12002 }
12003
12004 ASSERT(state->dts_nretained == 0);
12005}
12006
12007static int
39037602 12008dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched, dtrace_match_cond_t *cond)
2d21ac55
A
12009{
12010 int i = 0;
6d2010ae 12011 int total_matched = 0, matched = 0;
2d21ac55 12012
5ba3f43e
A
12013 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
12014 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
12015
12016 for (i = 0; i < enab->dten_ndesc; i++) {
12017 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
12018
12019 enab->dten_current = ep;
12020 enab->dten_error = 0;
12021
39037602
A
12022 /**
12023 * Before doing a dtrace_probe_enable, which is really
12024 * expensive, check that this enabling matches the matching precondition
12025 * if we have one
12026 */
12027 if (cond && (cond->dmc_func(&ep->dted_probe, cond->dmc_data) == 0)) {
12028 continue;
12029 }
6d2010ae
A
12030 /*
12031 * If a provider failed to enable a probe then get out and
12032 * let the consumer know we failed.
12033 */
d190cdc3 12034 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab, ep)) < 0)
6d2010ae
A
12035 return (EBUSY);
12036
12037 total_matched += matched;
2d21ac55
A
12038
12039 if (enab->dten_error != 0) {
12040 /*
12041 * If we get an error half-way through enabling the
12042 * probes, we kick out -- perhaps with some number of
12043 * them enabled. Leaving enabled probes enabled may
12044 * be slightly confusing for user-level, but we expect
12045 * that no one will attempt to actually drive on in
12046 * the face of such errors. If this is an anonymous
12047 * enabling (indicated with a NULL nmatched pointer),
12048 * we cmn_err() a message. We aren't expecting to
12049 * get such an error -- such as it can exist at all,
12050 * it would be a result of corrupted DOF in the driver
12051 * properties.
12052 */
12053 if (nmatched == NULL) {
12054 cmn_err(CE_WARN, "dtrace_enabling_match() "
12055 "error on %p: %d", (void *)ep,
12056 enab->dten_error);
12057 }
12058
12059 return (enab->dten_error);
12060 }
d190cdc3
A
12061
12062 ep->dted_probegen = dtrace_probegen;
2d21ac55
A
12063 }
12064
2d21ac55 12065 if (nmatched != NULL)
6d2010ae 12066 *nmatched = total_matched;
2d21ac55
A
12067
12068 return (0);
12069}
12070
12071static void
39037602 12072dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond)
2d21ac55
A
12073{
12074 dtrace_enabling_t *enab;
12075
12076 lck_mtx_lock(&cpu_lock);
12077 lck_mtx_lock(&dtrace_lock);
12078
12079 /*
b0d623f7
A
12080 * Iterate over all retained enablings to see if any probes match
12081 * against them. We only perform this operation on enablings for which
12082 * we have sufficient permissions by virtue of being in the global zone
12083 * or in the same zone as the DTrace client. Because we can be called
12084 * after dtrace_detach() has been called, we cannot assert that there
12085 * are retained enablings. We can safely load from dtrace_retained,
12086 * however: the taskq_destroy() at the end of dtrace_detach() will
12087 * block pending our completion.
2d21ac55 12088 */
2d21ac55 12089
fe8ab488
A
12090 /*
12091 * Darwin doesn't do zones.
12092 * Behave as if always in "global" zone."
12093 */
12094 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
39037602 12095 (void) dtrace_enabling_match(enab, NULL, cond);
2d21ac55
A
12096 }
12097
b0d623f7
A
12098 lck_mtx_unlock(&dtrace_lock);
12099 lck_mtx_unlock(&cpu_lock);
39037602
A
12100
12101}
12102
12103static void
12104dtrace_enabling_matchall(void)
12105{
12106 dtrace_enabling_matchall_with_cond(NULL);
2d21ac55
A
12107}
12108
39037602
A
12109
12110
2d21ac55
A
12111/*
12112 * If an enabling is to be enabled without having matched probes (that is, if
12113 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
12114 * enabling must be _primed_ by creating an ECB for every ECB description.
12115 * This must be done to assure that we know the number of speculations, the
12116 * number of aggregations, the minimum buffer size needed, etc. before we
12117 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
12118 * enabling any probes, we create ECBs for every ECB decription, but with a
12119 * NULL probe -- which is exactly what this function does.
12120 */
12121static void
12122dtrace_enabling_prime(dtrace_state_t *state)
12123{
12124 dtrace_enabling_t *enab;
12125 int i;
12126
12127 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
12128 ASSERT(enab->dten_vstate->dtvs_state != NULL);
12129
12130 if (enab->dten_vstate->dtvs_state != state)
12131 continue;
12132
12133 /*
12134 * We don't want to prime an enabling more than once, lest
12135 * we allow a malicious user to induce resource exhaustion.
12136 * (The ECBs that result from priming an enabling aren't
12137 * leaked -- but they also aren't deallocated until the
12138 * consumer state is destroyed.)
12139 */
12140 if (enab->dten_primed)
12141 continue;
12142
12143 for (i = 0; i < enab->dten_ndesc; i++) {
12144 enab->dten_current = enab->dten_desc[i];
d190cdc3 12145 (void) dtrace_probe_enable(NULL, enab, NULL);
2d21ac55
A
12146 }
12147
12148 enab->dten_primed = 1;
12149 }
12150}
12151
12152/*
12153 * Called to indicate that probes should be provided due to retained
12154 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
12155 * must take an initial lap through the enabling calling the dtps_provide()
12156 * entry point explicitly to allow for autocreated probes.
12157 */
12158static void
12159dtrace_enabling_provide(dtrace_provider_t *prv)
12160{
12161 int i, all = 0;
12162 dtrace_probedesc_t desc;
b0d623f7 12163 dtrace_genid_t gen;
2d21ac55 12164
5ba3f43e
A
12165 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
12166 LCK_MTX_ASSERT(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
12167
12168 if (prv == NULL) {
12169 all = 1;
12170 prv = dtrace_provider;
12171 }
12172
12173 do {
b0d623f7 12174 dtrace_enabling_t *enab;
2d21ac55
A
12175 void *parg = prv->dtpv_arg;
12176
b0d623f7
A
12177retry:
12178 gen = dtrace_retained_gen;
12179 for (enab = dtrace_retained; enab != NULL;
12180 enab = enab->dten_next) {
2d21ac55
A
12181 for (i = 0; i < enab->dten_ndesc; i++) {
12182 desc = enab->dten_desc[i]->dted_probe;
12183 lck_mtx_unlock(&dtrace_lock);
12184 prv->dtpv_pops.dtps_provide(parg, &desc);
12185 lck_mtx_lock(&dtrace_lock);
b0d623f7
A
12186 /*
12187 * Process the retained enablings again if
12188 * they have changed while we weren't holding
12189 * dtrace_lock.
12190 */
12191 if (gen != dtrace_retained_gen)
12192 goto retry;
2d21ac55
A
12193 }
12194 }
12195 } while (all && (prv = prv->dtpv_next) != NULL);
12196
12197 lck_mtx_unlock(&dtrace_lock);
12198 dtrace_probe_provide(NULL, all ? NULL : prv);
12199 lck_mtx_lock(&dtrace_lock);
12200}
12201
12202/*
12203 * DTrace DOF Functions
12204 */
12205/*ARGSUSED*/
12206static void
12207dtrace_dof_error(dof_hdr_t *dof, const char *str)
12208{
b0d623f7 12209#pragma unused(dof) /* __APPLE__ */
2d21ac55
A
12210 if (dtrace_err_verbose)
12211 cmn_err(CE_WARN, "failed to process DOF: %s", str);
12212
12213#ifdef DTRACE_ERRDEBUG
12214 dtrace_errdebug(str);
12215#endif
12216}
12217
12218/*
12219 * Create DOF out of a currently enabled state. Right now, we only create
12220 * DOF containing the run-time options -- but this could be expanded to create
12221 * complete DOF representing the enabled state.
12222 */
12223static dof_hdr_t *
12224dtrace_dof_create(dtrace_state_t *state)
12225{
12226 dof_hdr_t *dof;
12227 dof_sec_t *sec;
12228 dof_optdesc_t *opt;
12229 int i, len = sizeof (dof_hdr_t) +
12230 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
12231 sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
12232
5ba3f43e 12233 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 12234
d9a64523 12235 dof = kmem_zalloc_aligned(len, 8, KM_SLEEP);
2d21ac55
A
12236 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
12237 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
12238 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
12239 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
12240
12241 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
12242 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
12243 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
12244 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
12245 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
12246 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
12247
12248 dof->dofh_flags = 0;
12249 dof->dofh_hdrsize = sizeof (dof_hdr_t);
12250 dof->dofh_secsize = sizeof (dof_sec_t);
12251 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
12252 dof->dofh_secoff = sizeof (dof_hdr_t);
12253 dof->dofh_loadsz = len;
12254 dof->dofh_filesz = len;
12255 dof->dofh_pad = 0;
12256
12257 /*
12258 * Fill in the option section header...
12259 */
12260 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
12261 sec->dofs_type = DOF_SECT_OPTDESC;
12262 sec->dofs_align = sizeof (uint64_t);
12263 sec->dofs_flags = DOF_SECF_LOAD;
12264 sec->dofs_entsize = sizeof (dof_optdesc_t);
12265
12266 opt = (dof_optdesc_t *)((uintptr_t)sec +
12267 roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
12268
12269 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
12270 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
12271
12272 for (i = 0; i < DTRACEOPT_MAX; i++) {
12273 opt[i].dofo_option = i;
12274 opt[i].dofo_strtab = DOF_SECIDX_NONE;
12275 opt[i].dofo_value = state->dts_options[i];
12276 }
12277
12278 return (dof);
12279}
12280
12281static dof_hdr_t *
b0d623f7 12282dtrace_dof_copyin(user_addr_t uarg, int *errp)
2d21ac55
A
12283{
12284 dof_hdr_t hdr, *dof;
12285
5ba3f43e 12286 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
2d21ac55
A
12287
12288 /*
12289 * First, we're going to copyin() the sizeof (dof_hdr_t).
12290 */
b0d623f7 12291 if (copyin(uarg, &hdr, sizeof (hdr)) != 0) {
2d21ac55
A
12292 dtrace_dof_error(NULL, "failed to copyin DOF header");
12293 *errp = EFAULT;
12294 return (NULL);
12295 }
12296
12297 /*
12298 * Now we'll allocate the entire DOF and copy it in -- provided
12299 * that the length isn't outrageous.
12300 */
b0d623f7 12301 if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) {
2d21ac55
A
12302 dtrace_dof_error(&hdr, "load size exceeds maximum");
12303 *errp = E2BIG;
12304 return (NULL);
12305 }
12306
12307 if (hdr.dofh_loadsz < sizeof (hdr)) {
12308 dtrace_dof_error(&hdr, "invalid load size");
12309 *errp = EINVAL;
12310 return (NULL);
12311 }
12312
d9a64523 12313 dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP);
2d21ac55 12314
6d2010ae
A
12315 if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 ||
12316 dof->dofh_loadsz != hdr.dofh_loadsz) {
d9a64523 12317 kmem_free_aligned(dof, hdr.dofh_loadsz);
6d2010ae
A
12318 *errp = EFAULT;
12319 return (NULL);
12320 }
2d21ac55
A
12321
12322 return (dof);
12323}
12324
2d21ac55
A
12325static dof_hdr_t *
12326dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp)
12327{
12328 dof_hdr_t hdr, *dof;
12329
5ba3f43e 12330 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
2d21ac55
A
12331
12332 /*
12333 * First, we're going to copyin() the sizeof (dof_hdr_t).
12334 */
12335 if (uread(p, &hdr, sizeof(hdr), uarg) != KERN_SUCCESS) {
12336 dtrace_dof_error(NULL, "failed to copyin DOF header");
12337 *errp = EFAULT;
12338 return (NULL);
12339 }
12340
12341 /*
12342 * Now we'll allocate the entire DOF and copy it in -- provided
12343 * that the length isn't outrageous.
12344 */
b0d623f7 12345 if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) {
2d21ac55
A
12346 dtrace_dof_error(&hdr, "load size exceeds maximum");
12347 *errp = E2BIG;
12348 return (NULL);
12349 }
12350
12351 if (hdr.dofh_loadsz < sizeof (hdr)) {
12352 dtrace_dof_error(&hdr, "invalid load size");
12353 *errp = EINVAL;
12354 return (NULL);
12355 }
12356
d9a64523 12357 dof = kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP);
2d21ac55
A
12358
12359 if (uread(p, dof, hdr.dofh_loadsz, uarg) != KERN_SUCCESS) {
d9a64523 12360 kmem_free_aligned(dof, hdr.dofh_loadsz);
2d21ac55
A
12361 *errp = EFAULT;
12362 return (NULL);
12363 }
12364
12365 return (dof);
12366}
12367
a39ff7e2
A
12368static void
12369dtrace_dof_destroy(dof_hdr_t *dof)
12370{
d9a64523 12371 kmem_free_aligned(dof, dof->dofh_loadsz);
a39ff7e2
A
12372}
12373
2d21ac55
A
12374static dof_hdr_t *
12375dtrace_dof_property(const char *name)
12376{
d9a64523 12377 unsigned int len = 0;
2d21ac55
A
12378 dof_hdr_t *dof;
12379
a39ff7e2
A
12380 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) {
12381 return NULL;
12382 }
12383
12384 if (!PEReadNVRAMProperty(name, NULL, &len)) {
12385 return NULL;
12386 }
2d21ac55 12387
d9a64523 12388 dof = kmem_alloc_aligned(len, 8, KM_SLEEP);
a39ff7e2
A
12389
12390 if (!PEReadNVRAMProperty(name, dof, &len)) {
12391 dtrace_dof_destroy(dof);
12392 dtrace_dof_error(NULL, "unreadable DOF");
12393 return NULL;
12394 }
2d21ac55
A
12395
12396 if (len < sizeof (dof_hdr_t)) {
a39ff7e2 12397 dtrace_dof_destroy(dof);
2d21ac55
A
12398 dtrace_dof_error(NULL, "truncated header");
12399 return (NULL);
12400 }
12401
a39ff7e2
A
12402 if (len < dof->dofh_loadsz) {
12403 dtrace_dof_destroy(dof);
2d21ac55
A
12404 dtrace_dof_error(NULL, "truncated DOF");
12405 return (NULL);
12406 }
12407
a39ff7e2
A
12408 if (len != dof->dofh_loadsz) {
12409 dtrace_dof_destroy(dof);
12410 dtrace_dof_error(NULL, "invalid DOF size");
2d21ac55
A
12411 return (NULL);
12412 }
12413
a39ff7e2
A
12414 if (dof->dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) {
12415 dtrace_dof_destroy(dof);
12416 dtrace_dof_error(NULL, "oversized DOF");
12417 return (NULL);
12418 }
2d21ac55
A
12419
12420 return (dof);
12421}
12422
2d21ac55
A
12423/*
12424 * Return the dof_sec_t pointer corresponding to a given section index. If the
12425 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
12426 * a type other than DOF_SECT_NONE is specified, the header is checked against
12427 * this type and NULL is returned if the types do not match.
12428 */
12429static dof_sec_t *
12430dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
12431{
12432 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
12433 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
12434
12435 if (i >= dof->dofh_secnum) {
12436 dtrace_dof_error(dof, "referenced section index is invalid");
12437 return (NULL);
12438 }
12439
12440 if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
12441 dtrace_dof_error(dof, "referenced section is not loadable");
12442 return (NULL);
12443 }
12444
12445 if (type != DOF_SECT_NONE && type != sec->dofs_type) {
12446 dtrace_dof_error(dof, "referenced section is the wrong type");
12447 return (NULL);
12448 }
12449
12450 return (sec);
12451}
12452
12453static dtrace_probedesc_t *
12454dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
12455{
12456 dof_probedesc_t *probe;
12457 dof_sec_t *strtab;
12458 uintptr_t daddr = (uintptr_t)dof;
12459 uintptr_t str;
12460 size_t size;
12461
12462 if (sec->dofs_type != DOF_SECT_PROBEDESC) {
12463 dtrace_dof_error(dof, "invalid probe section");
12464 return (NULL);
12465 }
12466
12467 if (sec->dofs_align != sizeof (dof_secidx_t)) {
12468 dtrace_dof_error(dof, "bad alignment in probe description");
12469 return (NULL);
12470 }
12471
12472 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
12473 dtrace_dof_error(dof, "truncated probe description");
12474 return (NULL);
12475 }
12476
12477 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
12478 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
12479
12480 if (strtab == NULL)
12481 return (NULL);
12482
12483 str = daddr + strtab->dofs_offset;
12484 size = strtab->dofs_size;
12485
12486 if (probe->dofp_provider >= strtab->dofs_size) {
12487 dtrace_dof_error(dof, "corrupt probe provider");
12488 return (NULL);
12489 }
12490
12491 (void) strncpy(desc->dtpd_provider,
12492 (char *)(str + probe->dofp_provider),
12493 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
fe8ab488
A
12494
12495 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12496 desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
2d21ac55
A
12497
12498 if (probe->dofp_mod >= strtab->dofs_size) {
12499 dtrace_dof_error(dof, "corrupt probe module");
12500 return (NULL);
12501 }
12502
12503 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
12504 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
fe8ab488
A
12505
12506 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12507 desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
2d21ac55
A
12508
12509 if (probe->dofp_func >= strtab->dofs_size) {
12510 dtrace_dof_error(dof, "corrupt probe function");
12511 return (NULL);
12512 }
12513
12514 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
12515 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
fe8ab488
A
12516
12517 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12518 desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
2d21ac55
A
12519
12520 if (probe->dofp_name >= strtab->dofs_size) {
12521 dtrace_dof_error(dof, "corrupt probe name");
12522 return (NULL);
12523 }
12524
12525 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
12526 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
fe8ab488
A
12527
12528 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12529 desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
2d21ac55
A
12530
12531 return (desc);
12532}
12533
12534static dtrace_difo_t *
12535dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12536 cred_t *cr)
12537{
12538 dtrace_difo_t *dp;
12539 size_t ttl = 0;
12540 dof_difohdr_t *dofd;
12541 uintptr_t daddr = (uintptr_t)dof;
c910b4d9 12542 size_t max_size = dtrace_difo_maxsize;
b0d623f7
A
12543 uint_t i;
12544 int l, n;
b0d623f7 12545
2d21ac55
A
12546
12547 static const struct {
12548 int section;
12549 int bufoffs;
12550 int lenoffs;
12551 int entsize;
12552 int align;
12553 const char *msg;
12554 } difo[] = {
12555 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
12556 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
12557 sizeof (dif_instr_t), "multiple DIF sections" },
12558
12559 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
12560 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
12561 sizeof (uint64_t), "multiple integer tables" },
12562
12563 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
12564 offsetof(dtrace_difo_t, dtdo_strlen), 0,
12565 sizeof (char), "multiple string tables" },
12566
12567 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
12568 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
12569 sizeof (uint_t), "multiple variable tables" },
12570
2d21ac55 12571 { DOF_SECT_NONE, 0, 0, 0, 0, NULL }
2d21ac55
A
12572 };
12573
12574 if (sec->dofs_type != DOF_SECT_DIFOHDR) {
12575 dtrace_dof_error(dof, "invalid DIFO header section");
12576 return (NULL);
12577 }
12578
12579 if (sec->dofs_align != sizeof (dof_secidx_t)) {
12580 dtrace_dof_error(dof, "bad alignment in DIFO header");
12581 return (NULL);
12582 }
12583
12584 if (sec->dofs_size < sizeof (dof_difohdr_t) ||
12585 sec->dofs_size % sizeof (dof_secidx_t)) {
12586 dtrace_dof_error(dof, "bad size in DIFO header");
12587 return (NULL);
12588 }
12589
12590 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
12591 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
12592
12593 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
12594 dp->dtdo_rtype = dofd->dofd_rtype;
12595
12596 for (l = 0; l < n; l++) {
12597 dof_sec_t *subsec;
12598 void **bufp;
12599 uint32_t *lenp;
12600
12601 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
12602 dofd->dofd_links[l])) == NULL)
12603 goto err; /* invalid section link */
12604
c910b4d9 12605 if (ttl + subsec->dofs_size > max_size) {
2d21ac55
A
12606 dtrace_dof_error(dof, "exceeds maximum size");
12607 goto err;
12608 }
12609
12610 ttl += subsec->dofs_size;
12611
12612 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
b0d623f7 12613
b0d623f7
A
12614 if (subsec->dofs_type != (uint32_t)difo[i].section)
12615 continue;
2d21ac55
A
12616
12617 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
12618 dtrace_dof_error(dof, "section not loaded");
12619 goto err;
12620 }
12621
b0d623f7
A
12622 if (subsec->dofs_align != (uint32_t)difo[i].align) {
12623 dtrace_dof_error(dof, "bad alignment");
12624 goto err;
12625 }
2d21ac55
A
12626
12627 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
12628 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
12629
12630 if (*bufp != NULL) {
12631 dtrace_dof_error(dof, difo[i].msg);
12632 goto err;
12633 }
12634
b0d623f7
A
12635 if ((uint32_t)difo[i].entsize != subsec->dofs_entsize) {
12636 dtrace_dof_error(dof, "entry size mismatch");
12637 goto err;
12638 }
2d21ac55
A
12639
12640 if (subsec->dofs_entsize != 0 &&
12641 (subsec->dofs_size % subsec->dofs_entsize) != 0) {
12642 dtrace_dof_error(dof, "corrupt entry size");
12643 goto err;
12644 }
12645
12646 *lenp = subsec->dofs_size;
12647 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
12648 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
12649 *bufp, subsec->dofs_size);
12650
12651 if (subsec->dofs_entsize != 0)
12652 *lenp /= subsec->dofs_entsize;
12653
12654 break;
12655 }
12656
12657 /*
12658 * If we encounter a loadable DIFO sub-section that is not
12659 * known to us, assume this is a broken program and fail.
12660 */
12661 if (difo[i].section == DOF_SECT_NONE &&
12662 (subsec->dofs_flags & DOF_SECF_LOAD)) {
12663 dtrace_dof_error(dof, "unrecognized DIFO subsection");
12664 goto err;
12665 }
12666 }
b0d623f7 12667
2d21ac55
A
12668 if (dp->dtdo_buf == NULL) {
12669 /*
12670 * We can't have a DIF object without DIF text.
12671 */
12672 dtrace_dof_error(dof, "missing DIF text");
12673 goto err;
12674 }
12675
12676 /*
12677 * Before we validate the DIF object, run through the variable table
12678 * looking for the strings -- if any of their size are under, we'll set
12679 * their size to be the system-wide default string size. Note that
12680 * this should _not_ happen if the "strsize" option has been set --
12681 * in this case, the compiler should have set the size to reflect the
12682 * setting of the option.
12683 */
12684 for (i = 0; i < dp->dtdo_varlen; i++) {
12685 dtrace_difv_t *v = &dp->dtdo_vartab[i];
12686 dtrace_diftype_t *t = &v->dtdv_type;
12687
12688 if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
12689 continue;
12690
12691 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
12692 t->dtdt_size = dtrace_strsize_default;
12693 }
12694
12695 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
12696 goto err;
12697
12698 dtrace_difo_init(dp, vstate);
12699 return (dp);
12700
12701err:
12702 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
12703 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
12704 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
12705 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
12706
12707 kmem_free(dp, sizeof (dtrace_difo_t));
12708 return (NULL);
12709}
12710
12711static dtrace_predicate_t *
12712dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12713 cred_t *cr)
12714{
12715 dtrace_difo_t *dp;
12716
12717 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
12718 return (NULL);
12719
12720 return (dtrace_predicate_create(dp));
12721}
12722
12723static dtrace_actdesc_t *
12724dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12725 cred_t *cr)
12726{
12727 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
12728 dof_actdesc_t *desc;
12729 dof_sec_t *difosec;
12730 size_t offs;
12731 uintptr_t daddr = (uintptr_t)dof;
12732 uint64_t arg;
12733 dtrace_actkind_t kind;
12734
12735 if (sec->dofs_type != DOF_SECT_ACTDESC) {
12736 dtrace_dof_error(dof, "invalid action section");
12737 return (NULL);
12738 }
12739
12740 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
12741 dtrace_dof_error(dof, "truncated action description");
12742 return (NULL);
12743 }
12744
12745 if (sec->dofs_align != sizeof (uint64_t)) {
12746 dtrace_dof_error(dof, "bad alignment in action description");
12747 return (NULL);
12748 }
12749
12750 if (sec->dofs_size < sec->dofs_entsize) {
12751 dtrace_dof_error(dof, "section entry size exceeds total size");
12752 return (NULL);
12753 }
12754
12755 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
12756 dtrace_dof_error(dof, "bad entry size in action description");
12757 return (NULL);
12758 }
12759
12760 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
12761 dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
12762 return (NULL);
12763 }
12764
12765 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
12766 desc = (dof_actdesc_t *)(daddr +
12767 (uintptr_t)sec->dofs_offset + offs);
12768 kind = (dtrace_actkind_t)desc->dofa_kind;
12769
3e170ce0
A
12770 if ((DTRACEACT_ISPRINTFLIKE(kind) &&
12771 (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) ||
12772 (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE))
12773 {
2d21ac55
A
12774 dof_sec_t *strtab;
12775 char *str, *fmt;
12776 uint64_t i;
12777
12778 /*
3e170ce0
A
12779 * The argument to these actions is an index into the
12780 * DOF string table. For printf()-like actions, this
12781 * is the format string. For print(), this is the
12782 * CTF type of the expression result.
2d21ac55
A
12783 */
12784 if ((strtab = dtrace_dof_sect(dof,
12785 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
12786 goto err;
12787
12788 str = (char *)((uintptr_t)dof +
12789 (uintptr_t)strtab->dofs_offset);
12790
12791 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
12792 if (str[i] == '\0')
12793 break;
12794 }
12795
12796 if (i >= strtab->dofs_size) {
12797 dtrace_dof_error(dof, "bogus format string");
12798 goto err;
12799 }
12800
12801 if (i == desc->dofa_arg) {
12802 dtrace_dof_error(dof, "empty format string");
12803 goto err;
12804 }
12805
12806 i -= desc->dofa_arg;
12807 fmt = kmem_alloc(i + 1, KM_SLEEP);
12808 bcopy(&str[desc->dofa_arg], fmt, i + 1);
12809 arg = (uint64_t)(uintptr_t)fmt;
12810 } else {
12811 if (kind == DTRACEACT_PRINTA) {
12812 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
12813 arg = 0;
12814 } else {
12815 arg = desc->dofa_arg;
12816 }
12817 }
12818
12819 act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
12820 desc->dofa_uarg, arg);
12821
12822 if (last != NULL) {
12823 last->dtad_next = act;
12824 } else {
12825 first = act;
12826 }
12827
12828 last = act;
12829
12830 if (desc->dofa_difo == DOF_SECIDX_NONE)
12831 continue;
12832
12833 if ((difosec = dtrace_dof_sect(dof,
12834 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
12835 goto err;
12836
12837 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
12838
12839 if (act->dtad_difo == NULL)
12840 goto err;
12841 }
12842
12843 ASSERT(first != NULL);
12844 return (first);
12845
12846err:
12847 for (act = first; act != NULL; act = next) {
12848 next = act->dtad_next;
12849 dtrace_actdesc_release(act, vstate);
12850 }
12851
12852 return (NULL);
12853}
12854
12855static dtrace_ecbdesc_t *
12856dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12857 cred_t *cr)
12858{
12859 dtrace_ecbdesc_t *ep;
12860 dof_ecbdesc_t *ecb;
12861 dtrace_probedesc_t *desc;
12862 dtrace_predicate_t *pred = NULL;
12863
12864 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
12865 dtrace_dof_error(dof, "truncated ECB description");
12866 return (NULL);
12867 }
12868
12869 if (sec->dofs_align != sizeof (uint64_t)) {
12870 dtrace_dof_error(dof, "bad alignment in ECB description");
12871 return (NULL);
12872 }
12873
12874 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
12875 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
12876
12877 if (sec == NULL)
12878 return (NULL);
12879
12880 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
12881 ep->dted_uarg = ecb->dofe_uarg;
12882 desc = &ep->dted_probe;
12883
12884 if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
12885 goto err;
12886
12887 if (ecb->dofe_pred != DOF_SECIDX_NONE) {
12888 if ((sec = dtrace_dof_sect(dof,
12889 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
12890 goto err;
12891
12892 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
12893 goto err;
12894
12895 ep->dted_pred.dtpdd_predicate = pred;
12896 }
12897
12898 if (ecb->dofe_actions != DOF_SECIDX_NONE) {
12899 if ((sec = dtrace_dof_sect(dof,
12900 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
12901 goto err;
12902
12903 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
12904
12905 if (ep->dted_action == NULL)
12906 goto err;
12907 }
12908
12909 return (ep);
12910
12911err:
12912 if (pred != NULL)
12913 dtrace_predicate_release(pred, vstate);
12914 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12915 return (NULL);
12916}
12917
2d21ac55 12918/*
fe8ab488
A
12919 * APPLE NOTE: dyld handles dof relocation.
12920 * Darwin does not need dtrace_dof_relocate()
2d21ac55 12921 */
2d21ac55
A
12922
12923/*
12924 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
12925 * header: it should be at the front of a memory region that is at least
12926 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
12927 * size. It need not be validated in any other way.
12928 */
12929static int
12930dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
12931 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
12932{
b0d623f7 12933#pragma unused(ubase) /* __APPLE__ */
2d21ac55
A
12934 uint64_t len = dof->dofh_loadsz, seclen;
12935 uintptr_t daddr = (uintptr_t)dof;
12936 dtrace_ecbdesc_t *ep;
12937 dtrace_enabling_t *enab;
12938 uint_t i;
12939
5ba3f43e 12940 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
12941 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
12942
12943 /*
12944 * Check the DOF header identification bytes. In addition to checking
12945 * valid settings, we also verify that unused bits/bytes are zeroed so
12946 * we can use them later without fear of regressing existing binaries.
12947 */
12948 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
12949 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
12950 dtrace_dof_error(dof, "DOF magic string mismatch");
12951 return (-1);
12952 }
12953
12954 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
12955 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
12956 dtrace_dof_error(dof, "DOF has invalid data model");
12957 return (-1);
12958 }
12959
12960 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
12961 dtrace_dof_error(dof, "DOF encoding mismatch");
12962 return (-1);
12963 }
12964
2d21ac55 12965 /*
fe8ab488 12966 * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now.
2d21ac55
A
12967 */
12968 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_3) {
12969 dtrace_dof_error(dof, "DOF version mismatch");
12970 return (-1);
12971 }
2d21ac55
A
12972
12973 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
12974 dtrace_dof_error(dof, "DOF uses unsupported instruction set");
12975 return (-1);
12976 }
12977
12978 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
12979 dtrace_dof_error(dof, "DOF uses too many integer registers");
12980 return (-1);
12981 }
12982
12983 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
12984 dtrace_dof_error(dof, "DOF uses too many tuple registers");
12985 return (-1);
12986 }
12987
12988 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
12989 if (dof->dofh_ident[i] != 0) {
12990 dtrace_dof_error(dof, "DOF has invalid ident byte set");
12991 return (-1);
12992 }
12993 }
12994
12995 if (dof->dofh_flags & ~DOF_FL_VALID) {
12996 dtrace_dof_error(dof, "DOF has invalid flag bits set");
12997 return (-1);
12998 }
12999
d9a64523
A
13000 if (dof->dofh_secsize < sizeof(dof_sec_t)) {
13001 dtrace_dof_error(dof, "invalid section header size");
2d21ac55
A
13002 return (-1);
13003 }
13004
13005 /*
13006 * Check that the section headers don't exceed the amount of DOF
13007 * data. Note that we cast the section size and number of sections
13008 * to uint64_t's to prevent possible overflow in the multiplication.
13009 */
13010 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
13011
13012 if (dof->dofh_secoff > len || seclen > len ||
13013 dof->dofh_secoff + seclen > len) {
13014 dtrace_dof_error(dof, "truncated section headers");
13015 return (-1);
13016 }
13017
13018 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
13019 dtrace_dof_error(dof, "misaligned section headers");
13020 return (-1);
13021 }
13022
13023 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
13024 dtrace_dof_error(dof, "misaligned section size");
13025 return (-1);
13026 }
13027
13028 /*
13029 * Take an initial pass through the section headers to be sure that
13030 * the headers don't have stray offsets. If the 'noprobes' flag is
13031 * set, do not permit sections relating to providers, probes, or args.
13032 */
13033 for (i = 0; i < dof->dofh_secnum; i++) {
13034 dof_sec_t *sec = (dof_sec_t *)(daddr +
13035 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13036
13037 if (noprobes) {
13038 switch (sec->dofs_type) {
13039 case DOF_SECT_PROVIDER:
13040 case DOF_SECT_PROBES:
13041 case DOF_SECT_PRARGS:
13042 case DOF_SECT_PROFFS:
13043 dtrace_dof_error(dof, "illegal sections "
13044 "for enabling");
13045 return (-1);
13046 }
13047 }
13048
13049 if (!(sec->dofs_flags & DOF_SECF_LOAD))
13050 continue; /* just ignore non-loadable sections */
13051
13052 if (sec->dofs_align & (sec->dofs_align - 1)) {
13053 dtrace_dof_error(dof, "bad section alignment");
13054 return (-1);
13055 }
13056
13057 if (sec->dofs_offset & (sec->dofs_align - 1)) {
13058 dtrace_dof_error(dof, "misaligned section");
13059 return (-1);
13060 }
13061
13062 if (sec->dofs_offset > len || sec->dofs_size > len ||
13063 sec->dofs_offset + sec->dofs_size > len) {
13064 dtrace_dof_error(dof, "corrupt section header");
13065 return (-1);
13066 }
13067
13068 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
13069 sec->dofs_offset + sec->dofs_size - 1) != '\0') {
13070 dtrace_dof_error(dof, "non-terminating string table");
13071 return (-1);
13072 }
13073 }
13074
b0d623f7 13075 /*
fe8ab488
A
13076 * APPLE NOTE: We have no further relocation to perform.
13077 * All dof values are relative offsets.
b0d623f7 13078 */
2d21ac55
A
13079
13080 if ((enab = *enabp) == NULL)
13081 enab = *enabp = dtrace_enabling_create(vstate);
13082
13083 for (i = 0; i < dof->dofh_secnum; i++) {
13084 dof_sec_t *sec = (dof_sec_t *)(daddr +
13085 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13086
13087 if (sec->dofs_type != DOF_SECT_ECBDESC)
13088 continue;
13089
fe8ab488
A
13090 /*
13091 * APPLE NOTE: Defend against gcc 4.0 botch on x86.
13092 * not all paths out of inlined dtrace_dof_ecbdesc
13093 * are checked for the NULL return value.
13094 * Check for NULL explicitly here.
13095 */
2d21ac55
A
13096 ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr);
13097 if (ep == NULL) {
13098 dtrace_enabling_destroy(enab);
13099 *enabp = NULL;
13100 return (-1);
13101 }
2d21ac55
A
13102
13103 dtrace_enabling_add(enab, ep);
13104 }
13105
13106 return (0);
13107}
13108
13109/*
13110 * Process DOF for any options. This routine assumes that the DOF has been
13111 * at least processed by dtrace_dof_slurp().
13112 */
13113static int
13114dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
13115{
b0d623f7
A
13116 uint_t i;
13117 int rval;
2d21ac55
A
13118 uint32_t entsize;
13119 size_t offs;
13120 dof_optdesc_t *desc;
13121
13122 for (i = 0; i < dof->dofh_secnum; i++) {
13123 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
13124 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
13125
13126 if (sec->dofs_type != DOF_SECT_OPTDESC)
13127 continue;
13128
13129 if (sec->dofs_align != sizeof (uint64_t)) {
13130 dtrace_dof_error(dof, "bad alignment in "
13131 "option description");
13132 return (EINVAL);
13133 }
13134
13135 if ((entsize = sec->dofs_entsize) == 0) {
13136 dtrace_dof_error(dof, "zeroed option entry size");
13137 return (EINVAL);
13138 }
13139
13140 if (entsize < sizeof (dof_optdesc_t)) {
13141 dtrace_dof_error(dof, "bad option entry size");
13142 return (EINVAL);
13143 }
13144
13145 for (offs = 0; offs < sec->dofs_size; offs += entsize) {
13146 desc = (dof_optdesc_t *)((uintptr_t)dof +
13147 (uintptr_t)sec->dofs_offset + offs);
13148
13149 if (desc->dofo_strtab != DOF_SECIDX_NONE) {
13150 dtrace_dof_error(dof, "non-zero option string");
13151 return (EINVAL);
13152 }
13153
b0d623f7 13154 if (desc->dofo_value == (uint64_t)DTRACEOPT_UNSET) {
2d21ac55
A
13155 dtrace_dof_error(dof, "unset option");
13156 return (EINVAL);
13157 }
13158
13159 if ((rval = dtrace_state_option(state,
13160 desc->dofo_option, desc->dofo_value)) != 0) {
13161 dtrace_dof_error(dof, "rejected option");
13162 return (rval);
13163 }
13164 }
13165 }
13166
13167 return (0);
13168}
13169
13170/*
13171 * DTrace Consumer State Functions
13172 */
fe8ab488 13173static int
2d21ac55
A
13174dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
13175{
c910b4d9 13176 size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize;
2d21ac55
A
13177 void *base;
13178 uintptr_t limit;
13179 dtrace_dynvar_t *dvar, *next, *start;
b0d623f7 13180 size_t i;
2d21ac55 13181
5ba3f43e 13182 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
13183 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
13184
13185 bzero(dstate, sizeof (dtrace_dstate_t));
13186
13187 if ((dstate->dtds_chunksize = chunksize) == 0)
13188 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
13189
ecc0ceb4
A
13190 VERIFY(dstate->dtds_chunksize < (LONG_MAX - sizeof (dtrace_dynhash_t)));
13191
c910b4d9
A
13192 if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
13193 size = min_size;
2d21ac55
A
13194
13195 if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
13196 return (ENOMEM);
13197
13198 dstate->dtds_size = size;
13199 dstate->dtds_base = base;
13200 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
c910b4d9 13201 bzero(dstate->dtds_percpu, (int)NCPU * sizeof (dtrace_dstate_percpu_t));
2d21ac55
A
13202
13203 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
13204
13205 if (hashsize != 1 && (hashsize & 1))
13206 hashsize--;
13207
13208 dstate->dtds_hashsize = hashsize;
13209 dstate->dtds_hash = dstate->dtds_base;
13210
13211 /*
13212 * Set all of our hash buckets to point to the single sink, and (if
13213 * it hasn't already been set), set the sink's hash value to be the
13214 * sink sentinel value. The sink is needed for dynamic variable
13215 * lookups to know that they have iterated over an entire, valid hash
13216 * chain.
13217 */
13218 for (i = 0; i < hashsize; i++)
13219 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
13220
13221 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
13222 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
13223
13224 /*
13225 * Determine number of active CPUs. Divide free list evenly among
13226 * active CPUs.
13227 */
13228 start = (dtrace_dynvar_t *)
13229 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
13230 limit = (uintptr_t)base + size;
13231
ecc0ceb4
A
13232 VERIFY((uintptr_t)start < limit);
13233 VERIFY((uintptr_t)start >= (uintptr_t)base);
13234
c910b4d9 13235 maxper = (limit - (uintptr_t)start) / (int)NCPU;
2d21ac55
A
13236 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
13237
b0d623f7 13238 for (i = 0; i < NCPU; i++) {
2d21ac55
A
13239 dstate->dtds_percpu[i].dtdsc_free = dvar = start;
13240
13241 /*
13242 * If we don't even have enough chunks to make it once through
13243 * NCPUs, we're just going to allocate everything to the first
13244 * CPU. And if we're on the last CPU, we're going to allocate
13245 * whatever is left over. In either case, we set the limit to
13246 * be the limit of the dynamic variable space.
13247 */
b0d623f7 13248 if (maxper == 0 || i == NCPU - 1) {
2d21ac55
A
13249 limit = (uintptr_t)base + size;
13250 start = NULL;
13251 } else {
13252 limit = (uintptr_t)start + maxper;
13253 start = (dtrace_dynvar_t *)limit;
13254 }
13255
ecc0ceb4 13256 VERIFY(limit <= (uintptr_t)base + size);
2d21ac55
A
13257
13258 for (;;) {
13259 next = (dtrace_dynvar_t *)((uintptr_t)dvar +
13260 dstate->dtds_chunksize);
13261
13262 if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
13263 break;
13264
ecc0ceb4
A
13265 VERIFY((uintptr_t)dvar >= (uintptr_t)base &&
13266 (uintptr_t)dvar <= (uintptr_t)base + size);
2d21ac55
A
13267 dvar->dtdv_next = next;
13268 dvar = next;
13269 }
13270
13271 if (maxper == 0)
13272 break;
13273 }
13274
13275 return (0);
13276}
13277
fe8ab488 13278static void
2d21ac55
A
13279dtrace_dstate_fini(dtrace_dstate_t *dstate)
13280{
5ba3f43e 13281 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
13282
13283 if (dstate->dtds_base == NULL)
13284 return;
13285
13286 kmem_free(dstate->dtds_base, dstate->dtds_size);
13287 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
13288}
13289
13290static void
13291dtrace_vstate_fini(dtrace_vstate_t *vstate)
13292{
13293 /*
13294 * Logical XOR, where are you?
13295 */
13296 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
13297
13298 if (vstate->dtvs_nglobals > 0) {
13299 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
13300 sizeof (dtrace_statvar_t *));
13301 }
13302
13303 if (vstate->dtvs_ntlocals > 0) {
13304 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
13305 sizeof (dtrace_difv_t));
13306 }
13307
13308 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
13309
13310 if (vstate->dtvs_nlocals > 0) {
13311 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
13312 sizeof (dtrace_statvar_t *));
13313 }
13314}
13315
13316static void
13317dtrace_state_clean(dtrace_state_t *state)
13318{
13319 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
13320 return;
13321
13322 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
13323 dtrace_speculation_clean(state);
13324}
13325
13326static void
13327dtrace_state_deadman(dtrace_state_t *state)
13328{
13329 hrtime_t now;
13330
13331 dtrace_sync();
13332
13333 now = dtrace_gethrtime();
13334
13335 if (state != dtrace_anon.dta_state &&
13336 now - state->dts_laststatus >= dtrace_deadman_user)
13337 return;
13338
13339 /*
13340 * We must be sure that dts_alive never appears to be less than the
13341 * value upon entry to dtrace_state_deadman(), and because we lack a
13342 * dtrace_cas64(), we cannot store to it atomically. We thus instead
13343 * store INT64_MAX to it, followed by a memory barrier, followed by
13344 * the new value. This assures that dts_alive never appears to be
13345 * less than its true value, regardless of the order in which the
13346 * stores to the underlying storage are issued.
13347 */
13348 state->dts_alive = INT64_MAX;
13349 dtrace_membar_producer();
13350 state->dts_alive = now;
13351}
13352
b0d623f7
A
13353static int
13354dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state)
2d21ac55
A
13355{
13356 minor_t minor;
13357 major_t major;
13358 char c[30];
13359 dtrace_state_t *state;
13360 dtrace_optval_t *opt;
c910b4d9 13361 int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i;
2d21ac55 13362
5ba3f43e
A
13363 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13364 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 13365
b0d623f7
A
13366 /* Cause restart */
13367 *new_state = NULL;
13368
813fb2f6
A
13369 if (devp != NULL) {
13370 minor = getminor(*devp);
13371 }
13372 else {
13373 minor = DTRACE_NCLIENTS - 1;
13374 }
2d21ac55 13375
39037602
A
13376 state = dtrace_state_allocate(minor);
13377 if (NULL == state) {
13378 printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor);
13379 return (ERESTART); /* can't reacquire */
2d21ac55
A
13380 }
13381
2d21ac55
A
13382 state->dts_epid = DTRACE_EPIDNONE + 1;
13383
13384 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
13385 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
13386 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
13387
13388 if (devp != NULL) {
13389 major = getemajor(*devp);
13390 } else {
13391 major = ddi_driver_major(dtrace_devi);
13392 }
13393
d9a64523 13394 state->dts_dev = makedev(major, minor);
2d21ac55
A
13395
13396 if (devp != NULL)
13397 *devp = state->dts_dev;
13398
13399 /*
13400 * We allocate NCPU buffers. On the one hand, this can be quite
13401 * a bit of memory per instance (nearly 36K on a Starcat). On the
13402 * other hand, it saves an additional memory reference in the probe
13403 * path.
13404 */
13405 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
13406 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
39037602 13407 state->dts_buf_over_limit = 0;
2d21ac55
A
13408 state->dts_cleaner = CYCLIC_NONE;
13409 state->dts_deadman = CYCLIC_NONE;
13410 state->dts_vstate.dtvs_state = state;
13411
13412 for (i = 0; i < DTRACEOPT_MAX; i++)
13413 state->dts_options[i] = DTRACEOPT_UNSET;
13414
13415 /*
13416 * Set the default options.
13417 */
13418 opt = state->dts_options;
13419 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
13420 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
13421 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
13422 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
13423 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
13424 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
13425 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
13426 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
13427 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
13428 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
13429 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
13430 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
13431 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
13432 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
39037602 13433 opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_default;
2d21ac55
A
13434
13435 /*
13436 * Depending on the user credentials, we set flag bits which alter probe
13437 * visibility or the amount of destructiveness allowed. In the case of
13438 * actual anonymous tracing, or the possession of all privileges, all of
13439 * the normal checks are bypassed.
13440 */
39037602 13441#if defined(__APPLE__)
d9a64523
A
13442 if (cr != NULL) {
13443 kauth_cred_ref(cr);
13444 state->dts_cred.dcr_cred = cr;
13445 }
39037602
A
13446 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
13447 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) {
13448 /*
13449 * Allow only proc credentials when DTrace is
13450 * restricted by the current security policy
13451 */
13452 state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC;
13453 state->dts_cred.dcr_action = DTRACE_CRA_PROC | DTRACE_CRA_PROC_CONTROL | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13454 }
13455 else {
13456 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
13457 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
13458 }
13459 }
13460
13461#else
2d21ac55
A
13462 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
13463 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
13464 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
39037602
A
13465 }
13466 else {
2d21ac55
A
13467 /*
13468 * Set up the credentials for this instantiation. We take a
13469 * hold on the credential to prevent it from disappearing on
13470 * us; this in turn prevents the zone_t referenced by this
13471 * credential from disappearing. This means that we can
13472 * examine the credential and the zone from probe context.
13473 */
13474 crhold(cr);
13475 state->dts_cred.dcr_cred = cr;
13476
13477 /*
13478 * CRA_PROC means "we have *some* privilege for dtrace" and
13479 * unlocks the use of variables like pid, zonename, etc.
13480 */
13481 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
13482 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
13483 state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
13484 }
13485
13486 /*
13487 * dtrace_user allows use of syscall and profile providers.
13488 * If the user also has proc_owner and/or proc_zone, we
13489 * extend the scope to include additional visibility and
13490 * destructive power.
13491 */
13492 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
13493 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
13494 state->dts_cred.dcr_visible |=
13495 DTRACE_CRV_ALLPROC;
13496
13497 state->dts_cred.dcr_action |=
13498 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13499 }
13500
13501 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
13502 state->dts_cred.dcr_visible |=
13503 DTRACE_CRV_ALLZONE;
13504
13505 state->dts_cred.dcr_action |=
13506 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13507 }
13508
13509 /*
13510 * If we have all privs in whatever zone this is,
13511 * we can do destructive things to processes which
13512 * have altered credentials.
fe8ab488
A
13513 *
13514 * APPLE NOTE: Darwin doesn't do zones.
13515 * Behave as if zone always has destructive privs.
2d21ac55 13516 */
fe8ab488 13517
2d21ac55
A
13518 state->dts_cred.dcr_action |=
13519 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
2d21ac55
A
13520 }
13521
13522 /*
13523 * Holding the dtrace_kernel privilege also implies that
13524 * the user has the dtrace_user privilege from a visibility
13525 * perspective. But without further privileges, some
13526 * destructive actions are not available.
13527 */
13528 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
13529 /*
13530 * Make all probes in all zones visible. However,
13531 * this doesn't mean that all actions become available
13532 * to all zones.
13533 */
13534 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
13535 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
13536
13537 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
13538 DTRACE_CRA_PROC;
13539 /*
13540 * Holding proc_owner means that destructive actions
13541 * for *this* zone are allowed.
13542 */
13543 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
13544 state->dts_cred.dcr_action |=
13545 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13546
13547 /*
13548 * Holding proc_zone means that destructive actions
13549 * for this user/group ID in all zones is allowed.
13550 */
13551 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
13552 state->dts_cred.dcr_action |=
13553 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13554
13555 /*
13556 * If we have all privs in whatever zone this is,
13557 * we can do destructive things to processes which
13558 * have altered credentials.
fe8ab488
A
13559 *
13560 * APPLE NOTE: Darwin doesn't do zones.
13561 * Behave as if zone always has destructive privs.
13562 */
2d21ac55
A
13563 state->dts_cred.dcr_action |=
13564 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
2d21ac55
A
13565 }
13566
13567 /*
13568 * Holding the dtrace_proc privilege gives control over fasttrap
13569 * and pid providers. We need to grant wider destructive
13570 * privileges in the event that the user has proc_owner and/or
13571 * proc_zone.
13572 */
13573 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
13574 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
13575 state->dts_cred.dcr_action |=
13576 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13577
13578 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
13579 state->dts_cred.dcr_action |=
13580 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13581 }
13582 }
39037602 13583#endif
2d21ac55 13584
b0d623f7
A
13585 *new_state = state;
13586 return(0); /* Success */
2d21ac55
A
13587}
13588
13589static int
13590dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
13591{
13592 dtrace_optval_t *opt = state->dts_options, size;
c910b4d9 13593 processorid_t cpu = 0;
39037602 13594 size_t limit = buf->dtb_size;
2d21ac55
A
13595 int flags = 0, rval;
13596
5ba3f43e
A
13597 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13598 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
13599 ASSERT(which < DTRACEOPT_MAX);
13600 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
13601 (state == dtrace_anon.dta_state &&
13602 state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
13603
13604 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
13605 return (0);
13606
13607 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
13608 cpu = opt[DTRACEOPT_CPU];
13609
13610 if (which == DTRACEOPT_SPECSIZE)
13611 flags |= DTRACEBUF_NOSWITCH;
13612
13613 if (which == DTRACEOPT_BUFSIZE) {
13614 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
13615 flags |= DTRACEBUF_RING;
13616
13617 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
13618 flags |= DTRACEBUF_FILL;
13619
13620 if (state != dtrace_anon.dta_state ||
13621 state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
13622 flags |= DTRACEBUF_INACTIVE;
13623 }
13624
b0d623f7 13625 for (size = opt[which]; (size_t)size >= sizeof (uint64_t); size >>= 1) {
2d21ac55
A
13626 /*
13627 * The size must be 8-byte aligned. If the size is not 8-byte
13628 * aligned, drop it down by the difference.
13629 */
13630 if (size & (sizeof (uint64_t) - 1))
13631 size -= size & (sizeof (uint64_t) - 1);
13632
13633 if (size < state->dts_reserve) {
13634 /*
13635 * Buffers always must be large enough to accommodate
13636 * their prereserved space. We return E2BIG instead
13637 * of ENOMEM in this case to allow for user-level
13638 * software to differentiate the cases.
13639 */
13640 return (E2BIG);
13641 }
39037602
A
13642 limit = opt[DTRACEOPT_BUFLIMIT] * size / 100;
13643 rval = dtrace_buffer_alloc(buf, limit, size, flags, cpu);
2d21ac55
A
13644
13645 if (rval != ENOMEM) {
13646 opt[which] = size;
13647 return (rval);
13648 }
13649
13650 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13651 return (rval);
13652 }
13653
13654 return (ENOMEM);
13655}
13656
13657static int
13658dtrace_state_buffers(dtrace_state_t *state)
13659{
13660 dtrace_speculation_t *spec = state->dts_speculations;
13661 int rval, i;
13662
13663 if ((rval = dtrace_state_buffer(state, state->dts_buffer,
13664 DTRACEOPT_BUFSIZE)) != 0)
13665 return (rval);
13666
13667 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
13668 DTRACEOPT_AGGSIZE)) != 0)
13669 return (rval);
13670
13671 for (i = 0; i < state->dts_nspeculations; i++) {
13672 if ((rval = dtrace_state_buffer(state,
13673 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
13674 return (rval);
13675 }
13676
13677 return (0);
13678}
13679
13680static void
13681dtrace_state_prereserve(dtrace_state_t *state)
13682{
13683 dtrace_ecb_t *ecb;
13684 dtrace_probe_t *probe;
13685
13686 state->dts_reserve = 0;
13687
13688 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
13689 return;
13690
13691 /*
13692 * If our buffer policy is a "fill" buffer policy, we need to set the
13693 * prereserved space to be the space required by the END probes.
13694 */
13695 probe = dtrace_probes[dtrace_probeid_end - 1];
13696 ASSERT(probe != NULL);
13697
13698 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
13699 if (ecb->dte_state != state)
13700 continue;
13701
13702 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
13703 }
13704}
13705
13706static int
13707dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
13708{
13709 dtrace_optval_t *opt = state->dts_options, sz, nspec;
13710 dtrace_speculation_t *spec;
13711 dtrace_buffer_t *buf;
13712 cyc_handler_t hdlr;
13713 cyc_time_t when;
c910b4d9 13714 int rval = 0, i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t);
2d21ac55
A
13715 dtrace_icookie_t cookie;
13716
13717 lck_mtx_lock(&cpu_lock);
13718 lck_mtx_lock(&dtrace_lock);
13719
13720 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
13721 rval = EBUSY;
13722 goto out;
13723 }
13724
13725 /*
13726 * Before we can perform any checks, we must prime all of the
13727 * retained enablings that correspond to this state.
13728 */
13729 dtrace_enabling_prime(state);
13730
13731 if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
13732 rval = EACCES;
13733 goto out;
13734 }
13735
13736 dtrace_state_prereserve(state);
13737
13738 /*
13739 * Now we want to do is try to allocate our speculations.
13740 * We do not automatically resize the number of speculations; if
13741 * this fails, we will fail the operation.
13742 */
13743 nspec = opt[DTRACEOPT_NSPEC];
13744 ASSERT(nspec != DTRACEOPT_UNSET);
13745
13746 if (nspec > INT_MAX) {
13747 rval = ENOMEM;
13748 goto out;
13749 }
13750
13751 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP);
13752
13753 if (spec == NULL) {
13754 rval = ENOMEM;
13755 goto out;
13756 }
13757
13758 state->dts_speculations = spec;
13759 state->dts_nspeculations = (int)nspec;
13760
13761 for (i = 0; i < nspec; i++) {
13762 if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) {
13763 rval = ENOMEM;
13764 goto err;
13765 }
13766
13767 spec[i].dtsp_buffer = buf;
13768 }
13769
13770 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
13771 if (dtrace_anon.dta_state == NULL) {
13772 rval = ENOENT;
13773 goto out;
13774 }
13775
13776 if (state->dts_necbs != 0) {
13777 rval = EALREADY;
13778 goto out;
13779 }
13780
13781 state->dts_anon = dtrace_anon_grab();
13782 ASSERT(state->dts_anon != NULL);
13783 state = state->dts_anon;
13784
13785 /*
13786 * We want "grabanon" to be set in the grabbed state, so we'll
13787 * copy that option value from the grabbing state into the
13788 * grabbed state.
13789 */
13790 state->dts_options[DTRACEOPT_GRABANON] =
13791 opt[DTRACEOPT_GRABANON];
13792
13793 *cpu = dtrace_anon.dta_beganon;
13794
13795 /*
13796 * If the anonymous state is active (as it almost certainly
13797 * is if the anonymous enabling ultimately matched anything),
13798 * we don't allow any further option processing -- but we
13799 * don't return failure.
13800 */
13801 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13802 goto out;
13803 }
13804
13805 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
13806 opt[DTRACEOPT_AGGSIZE] != 0) {
13807 if (state->dts_aggregations == NULL) {
13808 /*
13809 * We're not going to create an aggregation buffer
13810 * because we don't have any ECBs that contain
13811 * aggregations -- set this option to 0.
13812 */
13813 opt[DTRACEOPT_AGGSIZE] = 0;
13814 } else {
13815 /*
13816 * If we have an aggregation buffer, we must also have
13817 * a buffer to use as scratch.
13818 */
b0d623f7
A
13819 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
13820 (size_t)opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
13821 opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
13822 }
2d21ac55
A
13823 }
13824 }
13825
13826 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
13827 opt[DTRACEOPT_SPECSIZE] != 0) {
13828 if (!state->dts_speculates) {
13829 /*
13830 * We're not going to create speculation buffers
13831 * because we don't have any ECBs that actually
13832 * speculate -- set the speculation size to 0.
13833 */
13834 opt[DTRACEOPT_SPECSIZE] = 0;
13835 }
13836 }
13837
13838 /*
13839 * The bare minimum size for any buffer that we're actually going to
13840 * do anything to is sizeof (uint64_t).
13841 */
13842 sz = sizeof (uint64_t);
13843
13844 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
13845 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
13846 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
13847 /*
13848 * A buffer size has been explicitly set to 0 (or to a size
13849 * that will be adjusted to 0) and we need the space -- we
13850 * need to return failure. We return ENOSPC to differentiate
13851 * it from failing to allocate a buffer due to failure to meet
13852 * the reserve (for which we return E2BIG).
13853 */
13854 rval = ENOSPC;
13855 goto out;
13856 }
13857
13858 if ((rval = dtrace_state_buffers(state)) != 0)
13859 goto err;
13860
13861 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
13862 sz = dtrace_dstate_defsize;
13863
13864 do {
13865 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
13866
13867 if (rval == 0)
13868 break;
13869
13870 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13871 goto err;
13872 } while (sz >>= 1);
13873
13874 opt[DTRACEOPT_DYNVARSIZE] = sz;
13875
13876 if (rval != 0)
13877 goto err;
13878
13879 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
13880 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
13881
13882 if (opt[DTRACEOPT_CLEANRATE] == 0)
13883 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13884
13885 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
13886 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
13887
13888 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
13889 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13890
39037602
A
13891 if (opt[DTRACEOPT_STRSIZE] > dtrace_strsize_max)
13892 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_max;
13893
13894 if (opt[DTRACEOPT_STRSIZE] < dtrace_strsize_min)
13895 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_min;
13896
13897 if (opt[DTRACEOPT_BUFLIMIT] > dtrace_buflimit_max)
13898 opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_max;
13899
13900 if (opt[DTRACEOPT_BUFLIMIT] < dtrace_buflimit_min)
13901 opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_min;
13902
2d21ac55
A
13903 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
13904 hdlr.cyh_arg = state;
13905 hdlr.cyh_level = CY_LOW_LEVEL;
13906
13907 when.cyt_when = 0;
13908 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13909
13910 state->dts_cleaner = cyclic_add(&hdlr, &when);
13911
13912 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13913 hdlr.cyh_arg = state;
13914 hdlr.cyh_level = CY_LOW_LEVEL;
13915
13916 when.cyt_when = 0;
13917 when.cyt_interval = dtrace_deadman_interval;
13918
13919 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13920 state->dts_deadman = cyclic_add(&hdlr, &when);
13921
13922 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13923
13924 /*
13925 * Now it's time to actually fire the BEGIN probe. We need to disable
13926 * interrupts here both to record the CPU on which we fired the BEGIN
13927 * probe (the data from this CPU will be processed first at user
13928 * level) and to manually activate the buffer for this CPU.
13929 */
13930 cookie = dtrace_interrupt_disable();
13931 *cpu = CPU->cpu_id;
13932 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13933 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13934
13935 dtrace_probe(dtrace_probeid_begin,
13936 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13937 dtrace_interrupt_enable(cookie);
13938 /*
13939 * We may have had an exit action from a BEGIN probe; only change our
13940 * state to ACTIVE if we're still in WARMUP.
13941 */
13942 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13943 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13944
13945 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
13946 state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
13947
13948 /*
13949 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
13950 * want each CPU to transition its principal buffer out of the
13951 * INACTIVE state. Doing this assures that no CPU will suddenly begin
13952 * processing an ECB halfway down a probe's ECB chain; all CPUs will
13953 * atomically transition from processing none of a state's ECBs to
13954 * processing all of them.
13955 */
13956 dtrace_xcall(DTRACE_CPUALL,
13957 (dtrace_xcall_t)dtrace_buffer_activate, state);
13958 goto out;
13959
13960err:
13961 dtrace_buffer_free(state->dts_buffer);
13962 dtrace_buffer_free(state->dts_aggbuffer);
13963
13964 if ((nspec = state->dts_nspeculations) == 0) {
13965 ASSERT(state->dts_speculations == NULL);
13966 goto out;
13967 }
13968
13969 spec = state->dts_speculations;
13970 ASSERT(spec != NULL);
13971
13972 for (i = 0; i < state->dts_nspeculations; i++) {
13973 if ((buf = spec[i].dtsp_buffer) == NULL)
13974 break;
13975
13976 dtrace_buffer_free(buf);
13977 kmem_free(buf, bufsize);
13978 }
13979
13980 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13981 state->dts_nspeculations = 0;
13982 state->dts_speculations = NULL;
13983
13984out:
13985 lck_mtx_unlock(&dtrace_lock);
13986 lck_mtx_unlock(&cpu_lock);
13987
13988 return (rval);
13989}
13990
13991static int
13992dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
13993{
13994 dtrace_icookie_t cookie;
13995
5ba3f43e 13996 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
13997
13998 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
13999 state->dts_activity != DTRACE_ACTIVITY_DRAINING)
14000 return (EINVAL);
14001
14002 /*
14003 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
14004 * to be sure that every CPU has seen it. See below for the details
14005 * on why this is done.
14006 */
14007 state->dts_activity = DTRACE_ACTIVITY_DRAINING;
14008 dtrace_sync();
14009
14010 /*
14011 * By this point, it is impossible for any CPU to be still processing
14012 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
14013 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
14014 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
14015 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
14016 * iff we're in the END probe.
14017 */
14018 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
14019 dtrace_sync();
14020 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
14021
14022 /*
14023 * Finally, we can release the reserve and call the END probe. We
14024 * disable interrupts across calling the END probe to allow us to
14025 * return the CPU on which we actually called the END probe. This
14026 * allows user-land to be sure that this CPU's principal buffer is
14027 * processed last.
14028 */
14029 state->dts_reserve = 0;
14030
14031 cookie = dtrace_interrupt_disable();
14032 *cpu = CPU->cpu_id;
14033 dtrace_probe(dtrace_probeid_end,
14034 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
14035 dtrace_interrupt_enable(cookie);
14036
14037 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
14038 dtrace_sync();
14039
14040 return (0);
14041}
14042
14043static int
14044dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
14045 dtrace_optval_t val)
14046{
5ba3f43e 14047 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
14048
14049 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
14050 return (EBUSY);
14051
14052 if (option >= DTRACEOPT_MAX)
14053 return (EINVAL);
14054
14055 if (option != DTRACEOPT_CPU && val < 0)
14056 return (EINVAL);
14057
14058 switch (option) {
14059 case DTRACEOPT_DESTRUCTIVE:
fe8ab488
A
14060 /*
14061 * Prevent consumers from enabling destructive actions if DTrace
14062 * is running in a restricted environment, or if actions are
14063 * disallowed.
14064 */
14065 if (dtrace_is_restricted() || dtrace_destructive_disallow)
2d21ac55
A
14066 return (EACCES);
14067
14068 state->dts_cred.dcr_destructive = 1;
14069 break;
14070
14071 case DTRACEOPT_BUFSIZE:
14072 case DTRACEOPT_DYNVARSIZE:
14073 case DTRACEOPT_AGGSIZE:
14074 case DTRACEOPT_SPECSIZE:
14075 case DTRACEOPT_STRSIZE:
14076 if (val < 0)
14077 return (EINVAL);
14078
14079 if (val >= LONG_MAX) {
14080 /*
14081 * If this is an otherwise negative value, set it to
14082 * the highest multiple of 128m less than LONG_MAX.
14083 * Technically, we're adjusting the size without
14084 * regard to the buffer resizing policy, but in fact,
14085 * this has no effect -- if we set the buffer size to
14086 * ~LONG_MAX and the buffer policy is ultimately set to
14087 * be "manual", the buffer allocation is guaranteed to
14088 * fail, if only because the allocation requires two
14089 * buffers. (We set the the size to the highest
14090 * multiple of 128m because it ensures that the size
14091 * will remain a multiple of a megabyte when
14092 * repeatedly halved -- all the way down to 15m.)
14093 */
14094 val = LONG_MAX - (1 << 27) + 1;
14095 }
14096 }
14097
14098 state->dts_options[option] = val;
14099
14100 return (0);
14101}
14102
14103static void
14104dtrace_state_destroy(dtrace_state_t *state)
14105{
14106 dtrace_ecb_t *ecb;
14107 dtrace_vstate_t *vstate = &state->dts_vstate;
14108 minor_t minor = getminor(state->dts_dev);
c910b4d9 14109 int i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t);
2d21ac55
A
14110 dtrace_speculation_t *spec = state->dts_speculations;
14111 int nspec = state->dts_nspeculations;
14112 uint32_t match;
14113
5ba3f43e
A
14114 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
14115 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
14116
14117 /*
14118 * First, retract any retained enablings for this state.
14119 */
14120 dtrace_enabling_retract(state);
14121 ASSERT(state->dts_nretained == 0);
14122
14123 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
14124 state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
14125 /*
14126 * We have managed to come into dtrace_state_destroy() on a
14127 * hot enabling -- almost certainly because of a disorderly
14128 * shutdown of a consumer. (That is, a consumer that is
14129 * exiting without having called dtrace_stop().) In this case,
14130 * we're going to set our activity to be KILLED, and then
14131 * issue a sync to be sure that everyone is out of probe
14132 * context before we start blowing away ECBs.
14133 */
14134 state->dts_activity = DTRACE_ACTIVITY_KILLED;
14135 dtrace_sync();
14136 }
14137
14138 /*
14139 * Release the credential hold we took in dtrace_state_create().
14140 */
14141 if (state->dts_cred.dcr_cred != NULL)
d9a64523 14142 kauth_cred_unref(&state->dts_cred.dcr_cred);
2d21ac55
A
14143
14144 /*
14145 * Now we can safely disable and destroy any enabled probes. Because
14146 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
14147 * (especially if they're all enabled), we take two passes through the
14148 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
14149 * in the second we disable whatever is left over.
14150 */
14151 for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
14152 for (i = 0; i < state->dts_necbs; i++) {
14153 if ((ecb = state->dts_ecbs[i]) == NULL)
14154 continue;
14155
14156 if (match && ecb->dte_probe != NULL) {
14157 dtrace_probe_t *probe = ecb->dte_probe;
14158 dtrace_provider_t *prov = probe->dtpr_provider;
14159
14160 if (!(prov->dtpv_priv.dtpp_flags & match))
14161 continue;
14162 }
14163
14164 dtrace_ecb_disable(ecb);
14165 dtrace_ecb_destroy(ecb);
14166 }
14167
14168 if (!match)
14169 break;
14170 }
14171
14172 /*
14173 * Before we free the buffers, perform one more sync to assure that
14174 * every CPU is out of probe context.
14175 */
14176 dtrace_sync();
14177
14178 dtrace_buffer_free(state->dts_buffer);
14179 dtrace_buffer_free(state->dts_aggbuffer);
14180
14181 for (i = 0; i < nspec; i++)
14182 dtrace_buffer_free(spec[i].dtsp_buffer);
14183
14184 if (state->dts_cleaner != CYCLIC_NONE)
14185 cyclic_remove(state->dts_cleaner);
14186
14187 if (state->dts_deadman != CYCLIC_NONE)
14188 cyclic_remove(state->dts_deadman);
14189
14190 dtrace_dstate_fini(&vstate->dtvs_dynvars);
14191 dtrace_vstate_fini(vstate);
14192 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
14193
14194 if (state->dts_aggregations != NULL) {
b0d623f7 14195#if DEBUG
2d21ac55
A
14196 for (i = 0; i < state->dts_naggregations; i++)
14197 ASSERT(state->dts_aggregations[i] == NULL);
14198#endif
14199 ASSERT(state->dts_naggregations > 0);
14200 kmem_free(state->dts_aggregations,
14201 state->dts_naggregations * sizeof (dtrace_aggregation_t *));
14202 }
14203
14204 kmem_free(state->dts_buffer, bufsize);
14205 kmem_free(state->dts_aggbuffer, bufsize);
14206
14207 for (i = 0; i < nspec; i++)
14208 kmem_free(spec[i].dtsp_buffer, bufsize);
14209
14210 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
14211
14212 dtrace_format_destroy(state);
14213
14214 vmem_destroy(state->dts_aggid_arena);
39037602 14215 dtrace_state_free(minor);
2d21ac55
A
14216}
14217
14218/*
14219 * DTrace Anonymous Enabling Functions
14220 */
d9a64523
A
14221
14222int
14223dtrace_keep_kernel_symbols(void)
14224{
14225 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) {
14226 return 0;
14227 }
14228
14229 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL)
14230 return 1;
14231
14232 return 0;
14233}
14234
2d21ac55
A
14235static dtrace_state_t *
14236dtrace_anon_grab(void)
14237{
14238 dtrace_state_t *state;
14239
5ba3f43e 14240 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
14241
14242 if ((state = dtrace_anon.dta_state) == NULL) {
14243 ASSERT(dtrace_anon.dta_enabling == NULL);
14244 return (NULL);
14245 }
14246
14247 ASSERT(dtrace_anon.dta_enabling != NULL);
14248 ASSERT(dtrace_retained != NULL);
14249
14250 dtrace_enabling_destroy(dtrace_anon.dta_enabling);
14251 dtrace_anon.dta_enabling = NULL;
14252 dtrace_anon.dta_state = NULL;
14253
14254 return (state);
14255}
14256
14257static void
14258dtrace_anon_property(void)
14259{
14260 int i, rv;
14261 dtrace_state_t *state;
14262 dof_hdr_t *dof;
14263 char c[32]; /* enough for "dof-data-" + digits */
14264
5ba3f43e
A
14265 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
14266 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
14267
14268 for (i = 0; ; i++) {
14269 (void) snprintf(c, sizeof (c), "dof-data-%d", i);
14270
14271 dtrace_err_verbose = 1;
14272
14273 if ((dof = dtrace_dof_property(c)) == NULL) {
14274 dtrace_err_verbose = 0;
14275 break;
14276 }
14277
d9a64523 14278#ifdef illumos
2d21ac55
A
14279 /*
14280 * We want to create anonymous state, so we need to transition
14281 * the kernel debugger to indicate that DTrace is active. If
14282 * this fails (e.g. because the debugger has modified text in
14283 * some way), we won't continue with the processing.
14284 */
14285 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
14286 cmn_err(CE_NOTE, "kernel debugger active; anonymous "
14287 "enabling ignored.");
14288 dtrace_dof_destroy(dof);
14289 break;
14290 }
d9a64523 14291#endif
2d21ac55
A
14292
14293 /*
14294 * If we haven't allocated an anonymous state, we'll do so now.
14295 */
14296 if ((state = dtrace_anon.dta_state) == NULL) {
b0d623f7
A
14297 rv = dtrace_state_create(NULL, NULL, &state);
14298 dtrace_anon.dta_state = state;
14299 if (rv != 0 || state == NULL) {
2d21ac55
A
14300 /*
14301 * This basically shouldn't happen: the only
14302 * failure mode from dtrace_state_create() is a
14303 * failure of ddi_soft_state_zalloc() that
14304 * itself should never happen. Still, the
14305 * interface allows for a failure mode, and
14306 * we want to fail as gracefully as possible:
14307 * we'll emit an error message and cease
14308 * processing anonymous state in this case.
14309 */
14310 cmn_err(CE_WARN, "failed to create "
14311 "anonymous state");
14312 dtrace_dof_destroy(dof);
14313 break;
14314 }
14315 }
14316
14317 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
14318 &dtrace_anon.dta_enabling, 0, B_TRUE);
14319
14320 if (rv == 0)
14321 rv = dtrace_dof_options(dof, state);
14322
14323 dtrace_err_verbose = 0;
14324 dtrace_dof_destroy(dof);
14325
14326 if (rv != 0) {
14327 /*
14328 * This is malformed DOF; chuck any anonymous state
14329 * that we created.
14330 */
14331 ASSERT(dtrace_anon.dta_enabling == NULL);
14332 dtrace_state_destroy(state);
14333 dtrace_anon.dta_state = NULL;
14334 break;
14335 }
14336
14337 ASSERT(dtrace_anon.dta_enabling != NULL);
14338 }
14339
14340 if (dtrace_anon.dta_enabling != NULL) {
14341 int rval;
14342
14343 /*
14344 * dtrace_enabling_retain() can only fail because we are
14345 * trying to retain more enablings than are allowed -- but
14346 * we only have one anonymous enabling, and we are guaranteed
14347 * to be allowed at least one retained enabling; we assert
14348 * that dtrace_enabling_retain() returns success.
14349 */
14350 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
14351 ASSERT(rval == 0);
14352
14353 dtrace_enabling_dump(dtrace_anon.dta_enabling);
14354 }
14355}
14356
14357/*
14358 * DTrace Helper Functions
14359 */
14360static void
14361dtrace_helper_trace(dtrace_helper_action_t *helper,
14362 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
14363{
b0d623f7
A
14364 uint32_t size, next, nnext;
14365 int i;
2d21ac55
A
14366 dtrace_helptrace_t *ent;
14367 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
14368
14369 if (!dtrace_helptrace_enabled)
14370 return;
14371
b0d623f7 14372 ASSERT((uint32_t)vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
2d21ac55
A
14373
14374 /*
14375 * What would a tracing framework be without its own tracing
14376 * framework? (Well, a hell of a lot simpler, for starters...)
14377 */
14378 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
14379 sizeof (uint64_t) - sizeof (uint64_t);
14380
14381 /*
14382 * Iterate until we can allocate a slot in the trace buffer.
14383 */
14384 do {
14385 next = dtrace_helptrace_next;
14386
14387 if (next + size < dtrace_helptrace_bufsize) {
14388 nnext = next + size;
14389 } else {
14390 nnext = size;
14391 }
14392 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
14393
14394 /*
14395 * We have our slot; fill it in.
14396 */
14397 if (nnext == size)
14398 next = 0;
14399
14400 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
14401 ent->dtht_helper = helper;
14402 ent->dtht_where = where;
14403 ent->dtht_nlocals = vstate->dtvs_nlocals;
14404
14405 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
14406 mstate->dtms_fltoffs : -1;
14407 ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
14408 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
14409
14410 for (i = 0; i < vstate->dtvs_nlocals; i++) {
14411 dtrace_statvar_t *svar;
14412
14413 if ((svar = vstate->dtvs_locals[i]) == NULL)
14414 continue;
14415
c910b4d9 14416 ASSERT(svar->dtsv_size >= (int)NCPU * sizeof (uint64_t));
2d21ac55
A
14417 ent->dtht_locals[i] =
14418 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
14419 }
14420}
14421
14422static uint64_t
14423dtrace_helper(int which, dtrace_mstate_t *mstate,
14424 dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
14425{
14426 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
14427 uint64_t sarg0 = mstate->dtms_arg[0];
14428 uint64_t sarg1 = mstate->dtms_arg[1];
c910b4d9 14429 uint64_t rval = 0;
2d21ac55
A
14430 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
14431 dtrace_helper_action_t *helper;
14432 dtrace_vstate_t *vstate;
14433 dtrace_difo_t *pred;
14434 int i, trace = dtrace_helptrace_enabled;
14435
14436 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
14437
14438 if (helpers == NULL)
14439 return (0);
14440
14441 if ((helper = helpers->dthps_actions[which]) == NULL)
14442 return (0);
14443
14444 vstate = &helpers->dthps_vstate;
14445 mstate->dtms_arg[0] = arg0;
14446 mstate->dtms_arg[1] = arg1;
14447
14448 /*
14449 * Now iterate over each helper. If its predicate evaluates to 'true',
14450 * we'll call the corresponding actions. Note that the below calls
14451 * to dtrace_dif_emulate() may set faults in machine state. This is
14452 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
14453 * the stored DIF offset with its own (which is the desired behavior).
14454 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
14455 * from machine state; this is okay, too.
14456 */
14457 for (; helper != NULL; helper = helper->dtha_next) {
14458 if ((pred = helper->dtha_predicate) != NULL) {
14459 if (trace)
14460 dtrace_helper_trace(helper, mstate, vstate, 0);
14461
14462 if (!dtrace_dif_emulate(pred, mstate, vstate, state))
14463 goto next;
14464
14465 if (*flags & CPU_DTRACE_FAULT)
14466 goto err;
14467 }
14468
14469 for (i = 0; i < helper->dtha_nactions; i++) {
14470 if (trace)
14471 dtrace_helper_trace(helper,
14472 mstate, vstate, i + 1);
14473
14474 rval = dtrace_dif_emulate(helper->dtha_actions[i],
14475 mstate, vstate, state);
14476
14477 if (*flags & CPU_DTRACE_FAULT)
14478 goto err;
14479 }
14480
14481next:
14482 if (trace)
14483 dtrace_helper_trace(helper, mstate, vstate,
14484 DTRACE_HELPTRACE_NEXT);
14485 }
14486
14487 if (trace)
14488 dtrace_helper_trace(helper, mstate, vstate,
14489 DTRACE_HELPTRACE_DONE);
14490
14491 /*
14492 * Restore the arg0 that we saved upon entry.
14493 */
14494 mstate->dtms_arg[0] = sarg0;
14495 mstate->dtms_arg[1] = sarg1;
14496
14497 return (rval);
14498
14499err:
14500 if (trace)
14501 dtrace_helper_trace(helper, mstate, vstate,
14502 DTRACE_HELPTRACE_ERR);
14503
14504 /*
14505 * Restore the arg0 that we saved upon entry.
14506 */
14507 mstate->dtms_arg[0] = sarg0;
14508 mstate->dtms_arg[1] = sarg1;
14509
fe8ab488 14510 return (0);
2d21ac55
A
14511}
14512
14513static void
14514dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
14515 dtrace_vstate_t *vstate)
14516{
14517 int i;
14518
14519 if (helper->dtha_predicate != NULL)
14520 dtrace_difo_release(helper->dtha_predicate, vstate);
14521
14522 for (i = 0; i < helper->dtha_nactions; i++) {
14523 ASSERT(helper->dtha_actions[i] != NULL);
14524 dtrace_difo_release(helper->dtha_actions[i], vstate);
14525 }
14526
14527 kmem_free(helper->dtha_actions,
14528 helper->dtha_nactions * sizeof (dtrace_difo_t *));
14529 kmem_free(helper, sizeof (dtrace_helper_action_t));
14530}
14531
2d21ac55
A
14532static int
14533dtrace_helper_destroygen(proc_t* p, int gen)
14534{
2d21ac55
A
14535 dtrace_helpers_t *help = p->p_dtrace_helpers;
14536 dtrace_vstate_t *vstate;
b0d623f7 14537 uint_t i;
2d21ac55 14538
d9a64523 14539 LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
5ba3f43e 14540 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
14541
14542 if (help == NULL || gen > help->dthps_generation)
14543 return (EINVAL);
14544
14545 vstate = &help->dthps_vstate;
14546
14547 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14548 dtrace_helper_action_t *last = NULL, *h, *next;
14549
14550 for (h = help->dthps_actions[i]; h != NULL; h = next) {
14551 next = h->dtha_next;
14552
14553 if (h->dtha_generation == gen) {
14554 if (last != NULL) {
14555 last->dtha_next = next;
14556 } else {
14557 help->dthps_actions[i] = next;
14558 }
14559
14560 dtrace_helper_action_destroy(h, vstate);
14561 } else {
14562 last = h;
14563 }
14564 }
14565 }
14566
14567 /*
14568 * Interate until we've cleared out all helper providers with the
14569 * given generation number.
14570 */
14571 for (;;) {
c910b4d9 14572 dtrace_helper_provider_t *prov = NULL;
2d21ac55
A
14573
14574 /*
14575 * Look for a helper provider with the right generation. We
14576 * have to start back at the beginning of the list each time
14577 * because we drop dtrace_lock. It's unlikely that we'll make
14578 * more than two passes.
14579 */
14580 for (i = 0; i < help->dthps_nprovs; i++) {
14581 prov = help->dthps_provs[i];
14582
14583 if (prov->dthp_generation == gen)
14584 break;
14585 }
14586
14587 /*
14588 * If there were no matches, we're done.
14589 */
14590 if (i == help->dthps_nprovs)
14591 break;
14592
14593 /*
14594 * Move the last helper provider into this slot.
14595 */
14596 help->dthps_nprovs--;
14597 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
14598 help->dthps_provs[help->dthps_nprovs] = NULL;
14599
14600 lck_mtx_unlock(&dtrace_lock);
14601
14602 /*
14603 * If we have a meta provider, remove this helper provider.
14604 */
2d21ac55
A
14605 if (dtrace_meta_pid != NULL) {
14606 ASSERT(dtrace_deferred_pid == NULL);
14607 dtrace_helper_provider_remove(&prov->dthp_prov,
d190cdc3 14608 p);
2d21ac55 14609 }
2d21ac55
A
14610
14611 dtrace_helper_provider_destroy(prov);
14612
14613 lck_mtx_lock(&dtrace_lock);
14614 }
14615
14616 return (0);
14617}
14618
14619static int
14620dtrace_helper_validate(dtrace_helper_action_t *helper)
14621{
14622 int err = 0, i;
14623 dtrace_difo_t *dp;
14624
14625 if ((dp = helper->dtha_predicate) != NULL)
14626 err += dtrace_difo_validate_helper(dp);
14627
14628 for (i = 0; i < helper->dtha_nactions; i++)
14629 err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
14630
14631 return (err == 0);
14632}
14633
2d21ac55
A
14634static int
14635dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep)
2d21ac55
A
14636{
14637 dtrace_helpers_t *help;
14638 dtrace_helper_action_t *helper, *last;
14639 dtrace_actdesc_t *act;
14640 dtrace_vstate_t *vstate;
14641 dtrace_predicate_t *pred;
14642 int count = 0, nactions = 0, i;
14643
14644 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
14645 return (EINVAL);
14646
2d21ac55 14647 help = p->p_dtrace_helpers;
2d21ac55
A
14648 last = help->dthps_actions[which];
14649 vstate = &help->dthps_vstate;
14650
14651 for (count = 0; last != NULL; last = last->dtha_next) {
14652 count++;
14653 if (last->dtha_next == NULL)
14654 break;
14655 }
14656
14657 /*
14658 * If we already have dtrace_helper_actions_max helper actions for this
14659 * helper action type, we'll refuse to add a new one.
14660 */
14661 if (count >= dtrace_helper_actions_max)
14662 return (ENOSPC);
14663
14664 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
14665 helper->dtha_generation = help->dthps_generation;
14666
14667 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
14668 ASSERT(pred->dtp_difo != NULL);
14669 dtrace_difo_hold(pred->dtp_difo);
14670 helper->dtha_predicate = pred->dtp_difo;
14671 }
14672
14673 for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
14674 if (act->dtad_kind != DTRACEACT_DIFEXPR)
14675 goto err;
14676
14677 if (act->dtad_difo == NULL)
14678 goto err;
14679
14680 nactions++;
14681 }
14682
14683 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
14684 (helper->dtha_nactions = nactions), KM_SLEEP);
14685
14686 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
14687 dtrace_difo_hold(act->dtad_difo);
14688 helper->dtha_actions[i++] = act->dtad_difo;
14689 }
14690
14691 if (!dtrace_helper_validate(helper))
14692 goto err;
14693
14694 if (last == NULL) {
14695 help->dthps_actions[which] = helper;
14696 } else {
14697 last->dtha_next = helper;
14698 }
14699
b0d623f7 14700 if ((uint32_t)vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
2d21ac55
A
14701 dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
14702 dtrace_helptrace_next = 0;
14703 }
14704
14705 return (0);
14706err:
14707 dtrace_helper_action_destroy(helper, vstate);
14708 return (EINVAL);
14709}
14710
14711static void
14712dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
14713 dof_helper_t *dofhp)
14714{
d9a64523 14715 LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
5ba3f43e 14716 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
2d21ac55 14717
2d21ac55
A
14718 lck_mtx_lock(&dtrace_lock);
14719
14720 if (!dtrace_attached() || dtrace_meta_pid == NULL) {
14721 /*
14722 * If the dtrace module is loaded but not attached, or if
14723 * there aren't isn't a meta provider registered to deal with
14724 * these provider descriptions, we need to postpone creating
14725 * the actual providers until later.
14726 */
14727
14728 if (help->dthps_next == NULL && help->dthps_prev == NULL &&
14729 dtrace_deferred_pid != help) {
14730 help->dthps_deferred = 1;
14731 help->dthps_pid = p->p_pid;
14732 help->dthps_next = dtrace_deferred_pid;
14733 help->dthps_prev = NULL;
14734 if (dtrace_deferred_pid != NULL)
14735 dtrace_deferred_pid->dthps_prev = help;
14736 dtrace_deferred_pid = help;
14737 }
14738
14739 lck_mtx_unlock(&dtrace_lock);
14740
14741 } else if (dofhp != NULL) {
14742 /*
14743 * If the dtrace module is loaded and we have a particular
14744 * helper provider description, pass that off to the
14745 * meta provider.
14746 */
14747
14748 lck_mtx_unlock(&dtrace_lock);
14749
d190cdc3 14750 dtrace_helper_provide(dofhp, p);
2d21ac55
A
14751
14752 } else {
14753 /*
14754 * Otherwise, just pass all the helper provider descriptions
14755 * off to the meta provider.
14756 */
14757
b0d623f7 14758 uint_t i;
2d21ac55
A
14759 lck_mtx_unlock(&dtrace_lock);
14760
14761 for (i = 0; i < help->dthps_nprovs; i++) {
14762 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
d190cdc3 14763 p);
2d21ac55
A
14764 }
14765 }
2d21ac55
A
14766}
14767
2d21ac55
A
14768static int
14769dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen)
2d21ac55
A
14770{
14771 dtrace_helpers_t *help;
14772 dtrace_helper_provider_t *hprov, **tmp_provs;
14773 uint_t tmp_maxprovs, i;
14774
5ba3f43e 14775 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 14776 help = p->p_dtrace_helpers;
2d21ac55
A
14777 ASSERT(help != NULL);
14778
14779 /*
14780 * If we already have dtrace_helper_providers_max helper providers,
14781 * we're refuse to add a new one.
14782 */
14783 if (help->dthps_nprovs >= dtrace_helper_providers_max)
14784 return (ENOSPC);
14785
14786 /*
14787 * Check to make sure this isn't a duplicate.
14788 */
14789 for (i = 0; i < help->dthps_nprovs; i++) {
14790 if (dofhp->dofhp_addr ==
14791 help->dthps_provs[i]->dthp_prov.dofhp_addr)
14792 return (EALREADY);
14793 }
14794
14795 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
14796 hprov->dthp_prov = *dofhp;
14797 hprov->dthp_ref = 1;
14798 hprov->dthp_generation = gen;
14799
14800 /*
14801 * Allocate a bigger table for helper providers if it's already full.
14802 */
14803 if (help->dthps_maxprovs == help->dthps_nprovs) {
14804 tmp_maxprovs = help->dthps_maxprovs;
14805 tmp_provs = help->dthps_provs;
14806
14807 if (help->dthps_maxprovs == 0)
14808 help->dthps_maxprovs = 2;
14809 else
14810 help->dthps_maxprovs *= 2;
14811 if (help->dthps_maxprovs > dtrace_helper_providers_max)
14812 help->dthps_maxprovs = dtrace_helper_providers_max;
14813
14814 ASSERT(tmp_maxprovs < help->dthps_maxprovs);
14815
14816 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
14817 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
14818
14819 if (tmp_provs != NULL) {
14820 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
14821 sizeof (dtrace_helper_provider_t *));
14822 kmem_free(tmp_provs, tmp_maxprovs *
14823 sizeof (dtrace_helper_provider_t *));
14824 }
14825 }
14826
14827 help->dthps_provs[help->dthps_nprovs] = hprov;
14828 help->dthps_nprovs++;
14829
14830 return (0);
14831}
14832
14833static void
14834dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
14835{
14836 lck_mtx_lock(&dtrace_lock);
14837
14838 if (--hprov->dthp_ref == 0) {
14839 dof_hdr_t *dof;
14840 lck_mtx_unlock(&dtrace_lock);
14841 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
14842 dtrace_dof_destroy(dof);
14843 kmem_free(hprov, sizeof (dtrace_helper_provider_t));
14844 } else {
14845 lck_mtx_unlock(&dtrace_lock);
14846 }
14847}
14848
14849static int
14850dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
14851{
14852 uintptr_t daddr = (uintptr_t)dof;
14853 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
14854 dof_provider_t *provider;
14855 dof_probe_t *probe;
14856 uint8_t *arg;
14857 char *strtab, *typestr;
14858 dof_stridx_t typeidx;
14859 size_t typesz;
14860 uint_t nprobes, j, k;
14861
14862 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
14863
14864 if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
14865 dtrace_dof_error(dof, "misaligned section offset");
14866 return (-1);
14867 }
14868
14869 /*
14870 * The section needs to be large enough to contain the DOF provider
14871 * structure appropriate for the given version.
14872 */
14873 if (sec->dofs_size <
14874 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
14875 offsetof(dof_provider_t, dofpv_prenoffs) :
14876 sizeof (dof_provider_t))) {
14877 dtrace_dof_error(dof, "provider section too small");
14878 return (-1);
14879 }
14880
14881 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
14882 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
14883 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
14884 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
14885 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
14886
14887 if (str_sec == NULL || prb_sec == NULL ||
14888 arg_sec == NULL || off_sec == NULL)
14889 return (-1);
14890
14891 enoff_sec = NULL;
14892
14893 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
14894 provider->dofpv_prenoffs != DOF_SECT_NONE &&
14895 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
14896 provider->dofpv_prenoffs)) == NULL)
14897 return (-1);
14898
14899 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
14900
14901 if (provider->dofpv_name >= str_sec->dofs_size ||
14902 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
14903 dtrace_dof_error(dof, "invalid provider name");
14904 return (-1);
14905 }
14906
14907 if (prb_sec->dofs_entsize == 0 ||
14908 prb_sec->dofs_entsize > prb_sec->dofs_size) {
14909 dtrace_dof_error(dof, "invalid entry size");
14910 return (-1);
14911 }
14912
14913 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
14914 dtrace_dof_error(dof, "misaligned entry size");
14915 return (-1);
14916 }
14917
14918 if (off_sec->dofs_entsize != sizeof (uint32_t)) {
14919 dtrace_dof_error(dof, "invalid entry size");
14920 return (-1);
14921 }
14922
14923 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
14924 dtrace_dof_error(dof, "misaligned section offset");
14925 return (-1);
14926 }
14927
14928 if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
14929 dtrace_dof_error(dof, "invalid entry size");
14930 return (-1);
14931 }
14932
14933 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
14934
14935 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
14936
14937 /*
14938 * Take a pass through the probes to check for errors.
14939 */
14940 for (j = 0; j < nprobes; j++) {
14941 probe = (dof_probe_t *)(uintptr_t)(daddr +
14942 prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
14943
14944 if (probe->dofpr_func >= str_sec->dofs_size) {
14945 dtrace_dof_error(dof, "invalid function name");
14946 return (-1);
14947 }
14948
14949 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
14950 dtrace_dof_error(dof, "function name too long");
14951 return (-1);
14952 }
14953
14954 if (probe->dofpr_name >= str_sec->dofs_size ||
14955 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
14956 dtrace_dof_error(dof, "invalid probe name");
14957 return (-1);
14958 }
14959
14960 /*
14961 * The offset count must not wrap the index, and the offsets
14962 * must also not overflow the section's data.
14963 */
14964 if (probe->dofpr_offidx + probe->dofpr_noffs <
14965 probe->dofpr_offidx ||
14966 (probe->dofpr_offidx + probe->dofpr_noffs) *
14967 off_sec->dofs_entsize > off_sec->dofs_size) {
14968 dtrace_dof_error(dof, "invalid probe offset");
14969 return (-1);
14970 }
14971
14972 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
14973 /*
14974 * If there's no is-enabled offset section, make sure
14975 * there aren't any is-enabled offsets. Otherwise
14976 * perform the same checks as for probe offsets
14977 * (immediately above).
14978 */
14979 if (enoff_sec == NULL) {
14980 if (probe->dofpr_enoffidx != 0 ||
14981 probe->dofpr_nenoffs != 0) {
14982 dtrace_dof_error(dof, "is-enabled "
14983 "offsets with null section");
14984 return (-1);
14985 }
14986 } else if (probe->dofpr_enoffidx +
14987 probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
14988 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
14989 enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
14990 dtrace_dof_error(dof, "invalid is-enabled "
14991 "offset");
14992 return (-1);
14993 }
14994
14995 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
14996 dtrace_dof_error(dof, "zero probe and "
14997 "is-enabled offsets");
14998 return (-1);
14999 }
15000 } else if (probe->dofpr_noffs == 0) {
15001 dtrace_dof_error(dof, "zero probe offsets");
15002 return (-1);
15003 }
15004
15005 if (probe->dofpr_argidx + probe->dofpr_xargc <
15006 probe->dofpr_argidx ||
15007 (probe->dofpr_argidx + probe->dofpr_xargc) *
15008 arg_sec->dofs_entsize > arg_sec->dofs_size) {
15009 dtrace_dof_error(dof, "invalid args");
15010 return (-1);
15011 }
15012
15013 typeidx = probe->dofpr_nargv;
15014 typestr = strtab + probe->dofpr_nargv;
15015 for (k = 0; k < probe->dofpr_nargc; k++) {
15016 if (typeidx >= str_sec->dofs_size) {
15017 dtrace_dof_error(dof, "bad "
15018 "native argument type");
15019 return (-1);
15020 }
15021
15022 typesz = strlen(typestr) + 1;
15023 if (typesz > DTRACE_ARGTYPELEN) {
15024 dtrace_dof_error(dof, "native "
15025 "argument type too long");
15026 return (-1);
15027 }
15028 typeidx += typesz;
15029 typestr += typesz;
15030 }
15031
15032 typeidx = probe->dofpr_xargv;
15033 typestr = strtab + probe->dofpr_xargv;
15034 for (k = 0; k < probe->dofpr_xargc; k++) {
15035 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
15036 dtrace_dof_error(dof, "bad "
15037 "native argument index");
15038 return (-1);
15039 }
15040
15041 if (typeidx >= str_sec->dofs_size) {
15042 dtrace_dof_error(dof, "bad "
15043 "translated argument type");
15044 return (-1);
15045 }
15046
15047 typesz = strlen(typestr) + 1;
15048 if (typesz > DTRACE_ARGTYPELEN) {
15049 dtrace_dof_error(dof, "translated argument "
15050 "type too long");
15051 return (-1);
15052 }
15053
15054 typeidx += typesz;
15055 typestr += typesz;
15056 }
15057 }
15058
15059 return (0);
15060}
15061
2d21ac55
A
15062static int
15063dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp)
2d21ac55
A
15064{
15065 dtrace_helpers_t *help;
15066 dtrace_vstate_t *vstate;
15067 dtrace_enabling_t *enab = NULL;
15068 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
15069 uintptr_t daddr = (uintptr_t)dof;
15070
d9a64523 15071 LCK_MTX_ASSERT(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
5ba3f43e 15072 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 15073
2d21ac55
A
15074 if ((help = p->p_dtrace_helpers) == NULL)
15075 help = dtrace_helpers_create(p);
2d21ac55
A
15076
15077 vstate = &help->dthps_vstate;
15078
15079 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
15080 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
15081 dtrace_dof_destroy(dof);
15082 return (rv);
15083 }
15084
15085 /*
15086 * Look for helper providers and validate their descriptions.
15087 */
15088 if (dhp != NULL) {
b0d623f7 15089 for (i = 0; (uint32_t)i < dof->dofh_secnum; i++) {
2d21ac55
A
15090 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
15091 dof->dofh_secoff + i * dof->dofh_secsize);
15092
15093 if (sec->dofs_type != DOF_SECT_PROVIDER)
15094 continue;
15095
15096 if (dtrace_helper_provider_validate(dof, sec) != 0) {
15097 dtrace_enabling_destroy(enab);
15098 dtrace_dof_destroy(dof);
15099 return (-1);
15100 }
15101
15102 nprovs++;
15103 }
15104 }
15105
15106 /*
15107 * Now we need to walk through the ECB descriptions in the enabling.
15108 */
15109 for (i = 0; i < enab->dten_ndesc; i++) {
15110 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
15111 dtrace_probedesc_t *desc = &ep->dted_probe;
15112
fe8ab488 15113 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7
A
15114 if (!LIT_STRNEQL(desc->dtpd_provider, "dtrace"))
15115 continue;
2d21ac55 15116
b0d623f7
A
15117 if (!LIT_STRNEQL(desc->dtpd_mod, "helper"))
15118 continue;
15119
15120 if (!LIT_STRNEQL(desc->dtpd_func, "ustack"))
15121 continue;
b0d623f7 15122
b0d623f7
A
15123 if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK,
15124 ep)) != 0) {
b0d623f7 15125 /*
2d21ac55
A
15126 * Adding this helper action failed -- we are now going
15127 * to rip out the entire generation and return failure.
15128 */
2d21ac55 15129 (void) dtrace_helper_destroygen(p, help->dthps_generation);
2d21ac55
A
15130 dtrace_enabling_destroy(enab);
15131 dtrace_dof_destroy(dof);
15132 return (-1);
15133 }
15134
15135 nhelpers++;
15136 }
15137
15138 if (nhelpers < enab->dten_ndesc)
15139 dtrace_dof_error(dof, "unmatched helpers");
15140
15141 gen = help->dthps_generation++;
15142 dtrace_enabling_destroy(enab);
15143
15144 if (dhp != NULL && nprovs > 0) {
15145 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
2d21ac55 15146 if (dtrace_helper_provider_add(p, dhp, gen) == 0) {
2d21ac55 15147 lck_mtx_unlock(&dtrace_lock);
2d21ac55 15148 dtrace_helper_provider_register(p, help, dhp);
2d21ac55
A
15149 lck_mtx_lock(&dtrace_lock);
15150
15151 destroy = 0;
15152 }
15153 }
15154
15155 if (destroy)
15156 dtrace_dof_destroy(dof);
15157
15158 return (gen);
15159}
15160
2d21ac55 15161/*
fe8ab488 15162 * APPLE NOTE: DTrace lazy dof implementation
2d21ac55
A
15163 *
15164 * DTrace user static probes (USDT probes) and helper actions are loaded
15165 * in a process by proccessing dof sections. The dof sections are passed
15166 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
15167 * expensive to process dof for a process that will never use it. There
15168 * is a memory cost (allocating the providers/probes), and a cpu cost
15169 * (creating the providers/probes).
15170 *
15171 * To reduce this cost, we use "lazy dof". The normal proceedure for
15172 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
15173 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
15174 * used, each process retains the dof_ioctl_data_t block, instead of
15175 * copying in the data it points to.
15176 *
15177 * The dof_ioctl_data_t blocks are managed as if they were the actual
15178 * processed dof; on fork the block is copied to the child, on exec and
15179 * exit the block is freed.
15180 *
15181 * If the process loads library(s) containing additional dof, the
15182 * new dof_ioctl_data_t is merged with the existing block.
15183 *
15184 * There are a few catches that make this slightly more difficult.
15185 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
15186 * identifier value for each dof in the block. In non-lazy dof terms,
15187 * this is the generation that dof was loaded in. If we hand back
15188 * a UID for a lazy dof, that same UID must be able to unload the
15189 * dof once it has become non-lazy. To meet this requirement, the
15190 * code that loads lazy dof requires that the UID's for dof(s) in
15191 * the lazy dof be sorted, and in ascending order. It is okay to skip
15192 * UID's, I.E., 1 -> 5 -> 6 is legal.
15193 *
15194 * Once a process has become non-lazy, it will stay non-lazy. All
15195 * future dof operations for that process will be non-lazy, even
15196 * if the dof mode transitions back to lazy.
15197 *
15198 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
15199 * That way if the lazy check fails due to transitioning to non-lazy, the
15200 * right thing is done with the newly faulted in dof.
15201 */
15202
15203/*
15204 * This method is a bit squicky. It must handle:
15205 *
15206 * dof should not be lazy.
15207 * dof should have been handled lazily, but there was an error
15208 * dof was handled lazily, and needs to be freed.
15209 * dof was handled lazily, and must not be freed.
15210 *
15211 *
15212 * Returns EACCESS if dof should be handled non-lazily.
15213 *
15214 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
15215 *
15216 * If the dofs data is claimed by this method, dofs_claimed will be set.
15217 * Callers should not free claimed dofs.
15218 */
b0d623f7 15219static int
2d21ac55
A
15220dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claimed)
15221{
15222 ASSERT(p);
15223 ASSERT(incoming_dofs && incoming_dofs->dofiod_count > 0);
15224
15225 int rval = 0;
15226 *dofs_claimed = 0;
15227
15228 lck_rw_lock_shared(&dtrace_dof_mode_lock);
15229
2d21ac55
A
15230 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15231 ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER);
15232
15233 /*
15234 * Any existing helpers force non-lazy behavior.
15235 */
15236 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) {
d9a64523 15237 dtrace_sprlock(p);
2d21ac55
A
15238
15239 dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs;
15240 unsigned int existing_dofs_count = (existing_dofs) ? existing_dofs->dofiod_count : 0;
15241 unsigned int i, merged_dofs_count = incoming_dofs->dofiod_count + existing_dofs_count;
15242
15243 /*
15244 * Range check...
15245 */
15246 if (merged_dofs_count == 0 || merged_dofs_count > 1024) {
15247 dtrace_dof_error(NULL, "lazy_dofs_add merged_dofs_count out of range");
15248 rval = EINVAL;
15249 goto unlock;
15250 }
15251
15252 /*
15253 * Each dof being added must be assigned a unique generation.
15254 */
15255 uint64_t generation = (existing_dofs) ? existing_dofs->dofiod_helpers[existing_dofs_count - 1].dofhp_dof + 1 : 1;
15256 for (i=0; i<incoming_dofs->dofiod_count; i++) {
15257 /*
15258 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
15259 */
15260 ASSERT(incoming_dofs->dofiod_helpers[i].dofhp_dof == incoming_dofs->dofiod_helpers[i].dofhp_addr);
15261 incoming_dofs->dofiod_helpers[i].dofhp_dof = generation++;
15262 }
15263
15264
15265 if (existing_dofs) {
15266 /*
15267 * Merge the existing and incoming dofs
15268 */
15269 size_t merged_dofs_size = DOF_IOCTL_DATA_T_SIZE(merged_dofs_count);
15270 dof_ioctl_data_t* merged_dofs = kmem_alloc(merged_dofs_size, KM_SLEEP);
15271
15272 bcopy(&existing_dofs->dofiod_helpers[0],
15273 &merged_dofs->dofiod_helpers[0],
15274 sizeof(dof_helper_t) * existing_dofs_count);
15275 bcopy(&incoming_dofs->dofiod_helpers[0],
15276 &merged_dofs->dofiod_helpers[existing_dofs_count],
15277 sizeof(dof_helper_t) * incoming_dofs->dofiod_count);
15278
15279 merged_dofs->dofiod_count = merged_dofs_count;
15280
15281 kmem_free(existing_dofs, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count));
15282
15283 p->p_dtrace_lazy_dofs = merged_dofs;
15284 } else {
15285 /*
15286 * Claim the incoming dofs
15287 */
15288 *dofs_claimed = 1;
15289 p->p_dtrace_lazy_dofs = incoming_dofs;
15290 }
15291
15292#if DEBUG
15293 dof_ioctl_data_t* all_dofs = p->p_dtrace_lazy_dofs;
15294 for (i=0; i<all_dofs->dofiod_count-1; i++) {
15295 ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof);
15296 }
b0d623f7 15297#endif /* DEBUG */
2d21ac55
A
15298
15299unlock:
d9a64523 15300 dtrace_sprunlock(p);
2d21ac55
A
15301 } else {
15302 rval = EACCES;
15303 }
15304
15305 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15306
15307 return rval;
15308}
15309
15310/*
15311 * Returns:
15312 *
15313 * EINVAL: lazy dof is enabled, but the requested generation was not found.
15314 * EACCES: This removal needs to be handled non-lazily.
15315 */
b0d623f7 15316static int
2d21ac55
A
15317dtrace_lazy_dofs_remove(proc_t *p, int generation)
15318{
15319 int rval = EINVAL;
15320
15321 lck_rw_lock_shared(&dtrace_dof_mode_lock);
15322
2d21ac55
A
15323 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15324 ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER);
15325
15326 /*
15327 * Any existing helpers force non-lazy behavior.
15328 */
15329 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) {
d9a64523 15330 dtrace_sprlock(p);
2d21ac55
A
15331
15332 dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs;
15333
15334 if (existing_dofs) {
15335 int index, existing_dofs_count = existing_dofs->dofiod_count;
15336 for (index=0; index<existing_dofs_count; index++) {
15337 if ((int)existing_dofs->dofiod_helpers[index].dofhp_dof == generation) {
15338 dof_ioctl_data_t* removed_dofs = NULL;
15339
15340 /*
15341 * If there is only 1 dof, we'll delete it and swap in NULL.
15342 */
15343 if (existing_dofs_count > 1) {
15344 int removed_dofs_count = existing_dofs_count - 1;
15345 size_t removed_dofs_size = DOF_IOCTL_DATA_T_SIZE(removed_dofs_count);
15346
15347 removed_dofs = kmem_alloc(removed_dofs_size, KM_SLEEP);
15348 removed_dofs->dofiod_count = removed_dofs_count;
15349
15350 /*
15351 * copy the remaining data.
15352 */
15353 if (index > 0) {
15354 bcopy(&existing_dofs->dofiod_helpers[0],
15355 &removed_dofs->dofiod_helpers[0],
15356 index * sizeof(dof_helper_t));
15357 }
15358
15359 if (index < existing_dofs_count-1) {
15360 bcopy(&existing_dofs->dofiod_helpers[index+1],
15361 &removed_dofs->dofiod_helpers[index],
15362 (existing_dofs_count - index - 1) * sizeof(dof_helper_t));
15363 }
15364 }
15365
15366 kmem_free(existing_dofs, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count));
15367
15368 p->p_dtrace_lazy_dofs = removed_dofs;
15369
15370 rval = KERN_SUCCESS;
15371
15372 break;
15373 }
15374 }
15375
15376#if DEBUG
15377 dof_ioctl_data_t* all_dofs = p->p_dtrace_lazy_dofs;
15378 if (all_dofs) {
15379 unsigned int i;
15380 for (i=0; i<all_dofs->dofiod_count-1; i++) {
15381 ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof);
15382 }
15383 }
15384#endif
15385
15386 }
d9a64523
A
15387 dtrace_sprunlock(p);
15388 } else {
2d21ac55
A
15389 rval = EACCES;
15390 }
15391
15392 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
39037602 15393
2d21ac55
A
15394 return rval;
15395}
15396
15397void
15398dtrace_lazy_dofs_destroy(proc_t *p)
15399{
15400 lck_rw_lock_shared(&dtrace_dof_mode_lock);
d9a64523 15401 dtrace_sprlock(p);
2d21ac55 15402
2d21ac55
A
15403 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15404
15405 dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs;
15406 p->p_dtrace_lazy_dofs = NULL;
15407
d9a64523 15408 dtrace_sprunlock(p);
2d21ac55
A
15409 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15410
15411 if (lazy_dofs) {
15412 kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count));
15413 }
15414}
15415
2d21ac55
A
15416static int
15417dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored)
15418{
15419#pragma unused(ignored)
15420 /*
15421 * Okay to NULL test without taking the sprlock.
15422 */
15423 return p->p_dtrace_lazy_dofs != NULL;
15424}
15425
39037602
A
15426static void
15427dtrace_lazy_dofs_process(proc_t *p) {
2d21ac55
A
15428 /*
15429 * It is possible this process may exit during our attempt to
15430 * fault in the dof. We could fix this by holding locks longer,
15431 * but the errors are benign.
15432 */
d9a64523 15433 dtrace_sprlock(p);
2d21ac55 15434
39037602 15435
2d21ac55
A
15436 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15437 ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF);
15438
2d21ac55
A
15439 dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs;
15440 p->p_dtrace_lazy_dofs = NULL;
15441
d9a64523
A
15442 dtrace_sprunlock(p);
15443 lck_mtx_lock(&dtrace_meta_lock);
2d21ac55
A
15444 /*
15445 * Process each dof_helper_t
15446 */
15447 if (lazy_dofs != NULL) {
15448 unsigned int i;
15449 int rval;
15450
15451 for (i=0; i<lazy_dofs->dofiod_count; i++) {
15452 /*
15453 * When loading lazy dof, we depend on the generations being sorted in ascending order.
15454 */
15455 ASSERT(i >= (lazy_dofs->dofiod_count - 1) || lazy_dofs->dofiod_helpers[i].dofhp_dof < lazy_dofs->dofiod_helpers[i+1].dofhp_dof);
15456
15457 dof_helper_t *dhp = &lazy_dofs->dofiod_helpers[i];
15458
15459 /*
15460 * We stored the generation in dofhp_dof. Save it, and restore the original value.
15461 */
15462 int generation = dhp->dofhp_dof;
15463 dhp->dofhp_dof = dhp->dofhp_addr;
15464
15465 dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval);
39037602 15466
2d21ac55
A
15467 if (dof != NULL) {
15468 dtrace_helpers_t *help;
15469
15470 lck_mtx_lock(&dtrace_lock);
15471
15472 /*
15473 * This must be done with the dtrace_lock held
15474 */
15475 if ((help = p->p_dtrace_helpers) == NULL)
15476 help = dtrace_helpers_create(p);
15477
15478 /*
15479 * If the generation value has been bumped, someone snuck in
15480 * when we released the dtrace lock. We have to dump this generation,
15481 * there is no safe way to load it.
15482 */
15483 if (help->dthps_generation <= generation) {
15484 help->dthps_generation = generation;
15485
15486 /*
15487 * dtrace_helper_slurp() takes responsibility for the dof --
15488 * it may free it now or it may save it and free it later.
15489 */
15490 if ((rval = dtrace_helper_slurp(p, dof, dhp)) != generation) {
15491 dtrace_dof_error(NULL, "returned value did not match expected generation");
15492 }
15493 }
15494
15495 lck_mtx_unlock(&dtrace_lock);
15496 }
15497 }
d9a64523 15498 lck_mtx_unlock(&dtrace_meta_lock);
2d21ac55 15499 kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count));
d9a64523
A
15500 } else {
15501 lck_mtx_unlock(&dtrace_meta_lock);
2d21ac55 15502 }
39037602
A
15503}
15504
15505static int
15506dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored)
15507{
15508#pragma unused(ignored)
15509
15510 dtrace_lazy_dofs_process(p);
2d21ac55
A
15511
15512 return PROC_RETURNED;
15513}
15514
39037602
A
15515#define DTRACE_LAZY_DOFS_DUPLICATED 1
15516
15517static int
15518dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child)
15519{
5ba3f43e
A
15520 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
15521 LCK_MTX_ASSERT(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
15522 LCK_MTX_ASSERT(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
39037602
A
15523
15524 lck_rw_lock_shared(&dtrace_dof_mode_lock);
d9a64523 15525 dtrace_sprlock(parent);
39037602
A
15526
15527 /*
15528 * We need to make sure that the transition to lazy dofs -> helpers
15529 * was atomic for our parent
15530 */
15531 ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL);
15532 /*
15533 * In theory we should hold the child sprlock, but this is safe...
15534 */
15535 ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL);
15536
15537 dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs;
15538 dof_ioctl_data_t* child_dofs = NULL;
15539 if (parent_dofs) {
15540 size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count);
15541 child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP);
15542 bcopy(parent_dofs, child_dofs, parent_dofs_size);
15543 }
15544
d9a64523 15545 dtrace_sprunlock(parent);
39037602
A
15546
15547 if (child_dofs) {
d9a64523 15548 dtrace_sprlock(child);
39037602 15549 child->p_dtrace_lazy_dofs = child_dofs;
d9a64523 15550 dtrace_sprunlock(child);
39037602
A
15551 /**
15552 * We process the DOF at this point if the mode is set to
15553 * LAZY_OFF. This can happen if DTrace is still processing the
15554 * DOF of other process (which can happen because the
15555 * protected pager can have a huge latency)
15556 * but has not processed our parent yet
15557 */
15558 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) {
15559 dtrace_lazy_dofs_process(child);
15560 }
15561 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15562
15563 return DTRACE_LAZY_DOFS_DUPLICATED;
15564 }
15565 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15566
15567 return 0;
15568}
15569
2d21ac55
A
15570static dtrace_helpers_t *
15571dtrace_helpers_create(proc_t *p)
15572{
15573 dtrace_helpers_t *help;
15574
5ba3f43e 15575 LCK_MTX_ASSERT(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
15576 ASSERT(p->p_dtrace_helpers == NULL);
15577
15578 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
15579 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
15580 DTRACE_NHELPER_ACTIONS, KM_SLEEP);
15581
15582 p->p_dtrace_helpers = help;
15583 dtrace_helpers++;
15584
15585 return (help);
15586}
15587
2d21ac55
A
15588static void
15589dtrace_helpers_destroy(proc_t* p)
15590{
2d21ac55
A
15591 dtrace_helpers_t *help;
15592 dtrace_vstate_t *vstate;
b0d623f7 15593 uint_t i;
2d21ac55 15594
d9a64523 15595 lck_mtx_lock(&dtrace_meta_lock);
2d21ac55
A
15596 lck_mtx_lock(&dtrace_lock);
15597
15598 ASSERT(p->p_dtrace_helpers != NULL);
15599 ASSERT(dtrace_helpers > 0);
15600
15601 help = p->p_dtrace_helpers;
15602 vstate = &help->dthps_vstate;
15603
15604 /*
15605 * We're now going to lose the help from this process.
15606 */
15607 p->p_dtrace_helpers = NULL;
15608 dtrace_sync();
15609
15610 /*
15611 * Destory the helper actions.
15612 */
15613 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15614 dtrace_helper_action_t *h, *next;
15615
15616 for (h = help->dthps_actions[i]; h != NULL; h = next) {
15617 next = h->dtha_next;
15618 dtrace_helper_action_destroy(h, vstate);
15619 h = next;
15620 }
15621 }
15622
15623 lck_mtx_unlock(&dtrace_lock);
15624
15625 /*
15626 * Destroy the helper providers.
15627 */
15628 if (help->dthps_maxprovs > 0) {
2d21ac55
A
15629 if (dtrace_meta_pid != NULL) {
15630 ASSERT(dtrace_deferred_pid == NULL);
15631
15632 for (i = 0; i < help->dthps_nprovs; i++) {
15633 dtrace_helper_provider_remove(
d190cdc3 15634 &help->dthps_provs[i]->dthp_prov, p);
2d21ac55
A
15635 }
15636 } else {
15637 lck_mtx_lock(&dtrace_lock);
15638 ASSERT(help->dthps_deferred == 0 ||
15639 help->dthps_next != NULL ||
15640 help->dthps_prev != NULL ||
15641 help == dtrace_deferred_pid);
15642
15643 /*
15644 * Remove the helper from the deferred list.
15645 */
15646 if (help->dthps_next != NULL)
15647 help->dthps_next->dthps_prev = help->dthps_prev;
15648 if (help->dthps_prev != NULL)
15649 help->dthps_prev->dthps_next = help->dthps_next;
15650 if (dtrace_deferred_pid == help) {
15651 dtrace_deferred_pid = help->dthps_next;
15652 ASSERT(help->dthps_prev == NULL);
15653 }
15654
15655 lck_mtx_unlock(&dtrace_lock);
15656 }
15657
2d21ac55
A
15658
15659 for (i = 0; i < help->dthps_nprovs; i++) {
15660 dtrace_helper_provider_destroy(help->dthps_provs[i]);
15661 }
15662
15663 kmem_free(help->dthps_provs, help->dthps_maxprovs *
15664 sizeof (dtrace_helper_provider_t *));
15665 }
15666
15667 lck_mtx_lock(&dtrace_lock);
15668
15669 dtrace_vstate_fini(&help->dthps_vstate);
15670 kmem_free(help->dthps_actions,
15671 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
15672 kmem_free(help, sizeof (dtrace_helpers_t));
15673
15674 --dtrace_helpers;
15675 lck_mtx_unlock(&dtrace_lock);
d9a64523 15676 lck_mtx_unlock(&dtrace_meta_lock);
2d21ac55
A
15677}
15678
15679static void
15680dtrace_helpers_duplicate(proc_t *from, proc_t *to)
15681{
15682 dtrace_helpers_t *help, *newhelp;
15683 dtrace_helper_action_t *helper, *new, *last;
15684 dtrace_difo_t *dp;
15685 dtrace_vstate_t *vstate;
b0d623f7
A
15686 uint_t i;
15687 int j, sz, hasprovs = 0;
2d21ac55 15688
d9a64523 15689 lck_mtx_lock(&dtrace_meta_lock);
2d21ac55
A
15690 lck_mtx_lock(&dtrace_lock);
15691 ASSERT(from->p_dtrace_helpers != NULL);
15692 ASSERT(dtrace_helpers > 0);
15693
15694 help = from->p_dtrace_helpers;
15695 newhelp = dtrace_helpers_create(to);
15696 ASSERT(to->p_dtrace_helpers != NULL);
15697
15698 newhelp->dthps_generation = help->dthps_generation;
15699 vstate = &newhelp->dthps_vstate;
15700
15701 /*
15702 * Duplicate the helper actions.
15703 */
15704 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15705 if ((helper = help->dthps_actions[i]) == NULL)
15706 continue;
15707
15708 for (last = NULL; helper != NULL; helper = helper->dtha_next) {
15709 new = kmem_zalloc(sizeof (dtrace_helper_action_t),
15710 KM_SLEEP);
15711 new->dtha_generation = helper->dtha_generation;
15712
15713 if ((dp = helper->dtha_predicate) != NULL) {
15714 dp = dtrace_difo_duplicate(dp, vstate);
15715 new->dtha_predicate = dp;
15716 }
15717
15718 new->dtha_nactions = helper->dtha_nactions;
15719 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
15720 new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
15721
b0d623f7
A
15722 for (j = 0; j < new->dtha_nactions; j++) {
15723 dtrace_difo_t *dpj = helper->dtha_actions[j];
15724
15725 ASSERT(dpj != NULL);
15726 dpj = dtrace_difo_duplicate(dpj, vstate);
15727 new->dtha_actions[j] = dpj;
15728 }
2d21ac55
A
15729
15730 if (last != NULL) {
15731 last->dtha_next = new;
15732 } else {
15733 newhelp->dthps_actions[i] = new;
15734 }
15735
15736 last = new;
15737 }
15738 }
15739
15740 /*
15741 * Duplicate the helper providers and register them with the
15742 * DTrace framework.
15743 */
15744 if (help->dthps_nprovs > 0) {
15745 newhelp->dthps_nprovs = help->dthps_nprovs;
15746 newhelp->dthps_maxprovs = help->dthps_nprovs;
15747 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
15748 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
15749 for (i = 0; i < newhelp->dthps_nprovs; i++) {
15750 newhelp->dthps_provs[i] = help->dthps_provs[i];
15751 newhelp->dthps_provs[i]->dthp_ref++;
15752 }
15753
15754 hasprovs = 1;
15755 }
15756
15757 lck_mtx_unlock(&dtrace_lock);
15758
15759 if (hasprovs)
15760 dtrace_helper_provider_register(to, newhelp, NULL);
d9a64523
A
15761
15762 lck_mtx_unlock(&dtrace_meta_lock);
2d21ac55
A
15763}
15764
39037602
A
15765/**
15766 * DTrace Process functions
15767 */
15768
15769void
15770dtrace_proc_fork(proc_t *parent_proc, proc_t *child_proc, int spawn)
15771{
15772 /*
15773 * This code applies to new processes who are copying the task
15774 * and thread state and address spaces of their parent process.
15775 */
15776 if (!spawn) {
15777 /*
15778 * APPLE NOTE: Solaris does a sprlock() and drops the
15779 * proc_lock here. We're cheating a bit and only taking
15780 * the p_dtrace_sprlock lock. A full sprlock would
15781 * task_suspend the parent.
15782 */
d9a64523 15783 dtrace_sprlock(parent_proc);
39037602
A
15784
15785 /*
15786 * Remove all DTrace tracepoints from the child process. We
15787 * need to do this _before_ duplicating USDT providers since
15788 * any associated probes may be immediately enabled.
15789 */
15790 if (parent_proc->p_dtrace_count > 0) {
15791 dtrace_fasttrap_fork(parent_proc, child_proc);
15792 }
15793
d9a64523 15794 dtrace_sprunlock(parent_proc);
39037602
A
15795
15796 /*
15797 * Duplicate any lazy dof(s). This must be done while NOT
15798 * holding the parent sprlock! Lock ordering is
15799 * dtrace_dof_mode_lock, then sprlock. It is imperative we
15800 * always call dtrace_lazy_dofs_duplicate, rather than null
15801 * check and call if !NULL. If we NULL test, during lazy dof
15802 * faulting we can race with the faulting code and proceed
15803 * from here to beyond the helpers copy. The lazy dof
15804 * faulting will then fail to copy the helpers to the child
15805 * process. We return if we duplicated lazy dofs as a process
15806 * can only have one at the same time to avoid a race between
15807 * a dtrace client and dtrace_proc_fork where a process would
15808 * end up with both lazy dofs and helpers.
15809 */
15810 if (dtrace_lazy_dofs_duplicate(parent_proc, child_proc) == DTRACE_LAZY_DOFS_DUPLICATED) {
15811 return;
15812 }
15813
15814 /*
15815 * Duplicate any helper actions and providers if they haven't
15816 * already.
15817 */
15818#if !defined(__APPLE__)
15819 /*
15820 * The SFORKING
15821 * we set above informs the code to enable USDT probes that
15822 * sprlock() may fail because the child is being forked.
15823 */
15824#endif
15825 /*
15826 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
15827 * never fails to find the child. We do not set SFORKING.
15828 */
15829 if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
15830 (*dtrace_helpers_fork)(parent_proc, child_proc);
15831 }
15832 }
15833}
15834
15835void
15836dtrace_proc_exec(proc_t *p)
15837{
15838 /*
15839 * Invalidate any predicate evaluation already cached for this thread by DTrace.
15840 * That's because we've just stored to p_comm and DTrace refers to that when it
15841 * evaluates the "execname" special variable. uid and gid may have changed as well.
15842 */
15843 dtrace_set_thread_predcache(current_thread(), 0);
15844
15845 /*
15846 * Free any outstanding lazy dof entries. It is imperative we
15847 * always call dtrace_lazy_dofs_destroy, rather than null check
15848 * and call if !NULL. If we NULL test, during lazy dof faulting
15849 * we can race with the faulting code and proceed from here to
15850 * beyond the helpers cleanup. The lazy dof faulting will then
15851 * install new helpers which no longer belong to this process!
15852 */
15853 dtrace_lazy_dofs_destroy(p);
15854
15855
15856 /*
15857 * Clean up any DTrace helpers for the process.
15858 */
15859 if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
15860 (*dtrace_helpers_cleanup)(p);
15861 }
15862
15863 /*
15864 * Cleanup the DTrace provider associated with this process.
15865 */
15866 proc_lock(p);
15867 if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
15868 (*dtrace_fasttrap_exec_ptr)(p);
15869 }
15870 proc_unlock(p);
15871}
15872
15873void
15874dtrace_proc_exit(proc_t *p)
15875{
15876 /*
15877 * Free any outstanding lazy dof entries. It is imperative we
15878 * always call dtrace_lazy_dofs_destroy, rather than null check
15879 * and call if !NULL. If we NULL test, during lazy dof faulting
15880 * we can race with the faulting code and proceed from here to
15881 * beyond the helpers cleanup. The lazy dof faulting will then
15882 * install new helpers which will never be cleaned up, and leak.
15883 */
15884 dtrace_lazy_dofs_destroy(p);
15885
15886 /*
15887 * Clean up any DTrace helper actions or probes for the process.
15888 */
15889 if (p->p_dtrace_helpers != NULL) {
15890 (*dtrace_helpers_cleanup)(p);
15891 }
15892
15893 /*
15894 * Clean up any DTrace probes associated with this process.
15895 */
15896 /*
15897 * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(),
15898 * call this after dtrace_helpers_cleanup()
15899 */
15900 proc_lock(p);
15901 if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) {
15902 (*dtrace_fasttrap_exit_ptr)(p);
15903 }
15904 proc_unlock(p);
15905}
15906
2d21ac55
A
15907/*
15908 * DTrace Hook Functions
15909 */
6d2010ae 15910
6d2010ae 15911/*
fe8ab488
A
15912 * APPLE NOTE: dtrace_modctl_* routines for kext support.
15913 * Used to manipulate the modctl list within dtrace xnu.
6d2010ae
A
15914 */
15915
15916modctl_t *dtrace_modctl_list;
15917
15918static void
15919dtrace_modctl_add(struct modctl * newctl)
15920{
15921 struct modctl *nextp, *prevp;
15922
15923 ASSERT(newctl != NULL);
5ba3f43e 15924 LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
15925
15926 // Insert new module at the front of the list,
15927
15928 newctl->mod_next = dtrace_modctl_list;
15929 dtrace_modctl_list = newctl;
15930
15931 /*
15932 * If a module exists with the same name, then that module
15933 * must have been unloaded with enabled probes. We will move
15934 * the unloaded module to the new module's stale chain and
15935 * then stop traversing the list.
15936 */
15937
15938 prevp = newctl;
15939 nextp = newctl->mod_next;
15940
15941 while (nextp != NULL) {
15942 if (nextp->mod_loaded) {
15943 /* This is a loaded module. Keep traversing. */
15944 prevp = nextp;
15945 nextp = nextp->mod_next;
15946 continue;
15947 }
15948 else {
15949 /* Found an unloaded module */
15950 if (strncmp (newctl->mod_modname, nextp->mod_modname, KMOD_MAX_NAME)) {
15951 /* Names don't match. Keep traversing. */
15952 prevp = nextp;
15953 nextp = nextp->mod_next;
15954 continue;
15955 }
15956 else {
15957 /* We found a stale entry, move it. We're done. */
15958 prevp->mod_next = nextp->mod_next;
15959 newctl->mod_stale = nextp;
15960 nextp->mod_next = NULL;
15961 break;
15962 }
15963 }
15964 }
15965}
15966
15967static modctl_t *
15968dtrace_modctl_lookup(struct kmod_info * kmod)
15969{
5ba3f43e 15970 LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
15971
15972 struct modctl * ctl;
15973
15974 for (ctl = dtrace_modctl_list; ctl; ctl=ctl->mod_next) {
15975 if (ctl->mod_id == kmod->id)
15976 return(ctl);
15977 }
15978 return (NULL);
15979}
15980
15981/*
15982 * This routine is called from dtrace_module_unloaded().
15983 * It removes a modctl structure and its stale chain
15984 * from the kext shadow list.
15985 */
15986static void
15987dtrace_modctl_remove(struct modctl * ctl)
15988{
15989 ASSERT(ctl != NULL);
5ba3f43e 15990 LCK_MTX_ASSERT(&mod_lock, LCK_MTX_ASSERT_OWNED);
6d2010ae
A
15991 modctl_t *prevp, *nextp, *curp;
15992
15993 // Remove stale chain first
15994 for (curp=ctl->mod_stale; curp != NULL; curp=nextp) {
15995 nextp = curp->mod_stale;
15996 /* There should NEVER be user symbols allocated at this point */
15997 ASSERT(curp->mod_user_symbols == NULL);
15998 kmem_free(curp, sizeof(modctl_t));
15999 }
16000
16001 prevp = NULL;
16002 curp = dtrace_modctl_list;
16003
16004 while (curp != ctl) {
16005 prevp = curp;
16006 curp = curp->mod_next;
16007 }
16008
16009 if (prevp != NULL) {
16010 prevp->mod_next = ctl->mod_next;
16011 }
16012 else {
16013 dtrace_modctl_list = ctl->mod_next;
16014 }
16015
16016 /* There should NEVER be user symbols allocated at this point */
16017 ASSERT(ctl->mod_user_symbols == NULL);
16018
16019 kmem_free (ctl, sizeof(modctl_t));
16020}
16021
6d2010ae
A
16022/*
16023 * APPLE NOTE: The kext loader will call dtrace_module_loaded
16024 * when the kext is loaded in memory, but before calling the
16025 * kext's start routine.
16026 *
16027 * Return 0 on success
16028 * Return -1 on failure
16029 */
16030
6d2010ae 16031static int
316670eb 16032dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag)
2d21ac55
A
16033{
16034 dtrace_provider_t *prv;
16035
6d2010ae
A
16036 /*
16037 * If kernel symbols have been disabled, return immediately
16038 * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks
16039 */
16040 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER)
16041 return 0;
16042
16043 struct modctl *ctl = NULL;
16044 if (!kmod || kmod->address == 0 || kmod->size == 0)
16045 return(-1);
16046
16047 lck_mtx_lock(&dtrace_provider_lock);
16048 lck_mtx_lock(&mod_lock);
16049
16050 /*
16051 * Have we seen this kext before?
16052 */
2d21ac55 16053
6d2010ae
A
16054 ctl = dtrace_modctl_lookup(kmod);
16055
16056 if (ctl != NULL) {
16057 /* bail... we already have this kext in the modctl list */
16058 lck_mtx_unlock(&mod_lock);
16059 lck_mtx_unlock(&dtrace_provider_lock);
16060 if (dtrace_err_verbose)
16061 cmn_err(CE_WARN, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod->name, (uint_t)kmod->id, ctl->mod_modname, ctl->mod_id);
16062 return(-1);
16063 }
16064 else {
16065 ctl = kmem_alloc(sizeof(struct modctl), KM_SLEEP);
16066 if (ctl == NULL) {
16067 if (dtrace_err_verbose)
16068 cmn_err(CE_WARN, "dtrace module load '%s %u' is failing ", kmod->name, (uint_t)kmod->id);
16069 lck_mtx_unlock(&mod_lock);
16070 lck_mtx_unlock(&dtrace_provider_lock);
16071 return (-1);
16072 }
16073 ctl->mod_next = NULL;
16074 ctl->mod_stale = NULL;
16075 strlcpy (ctl->mod_modname, kmod->name, sizeof(ctl->mod_modname));
16076 ctl->mod_loadcnt = kmod->id;
16077 ctl->mod_nenabled = 0;
16078 ctl->mod_address = kmod->address;
16079 ctl->mod_size = kmod->size;
16080 ctl->mod_id = kmod->id;
16081 ctl->mod_loaded = 1;
16082 ctl->mod_flags = 0;
16083 ctl->mod_user_symbols = NULL;
d9a64523 16084
6d2010ae
A
16085 /*
16086 * Find the UUID for this module, if it has one
16087 */
16088 kernel_mach_header_t* header = (kernel_mach_header_t *)ctl->mod_address;
16089 struct load_command* load_cmd = (struct load_command *)&header[1];
16090 uint32_t i;
16091 for (i = 0; i < header->ncmds; i++) {
16092 if (load_cmd->cmd == LC_UUID) {
16093 struct uuid_command* uuid_cmd = (struct uuid_command *)load_cmd;
16094 memcpy(ctl->mod_uuid, uuid_cmd->uuid, sizeof(uuid_cmd->uuid));
16095 ctl->mod_flags |= MODCTL_HAS_UUID;
16096 break;
16097 }
16098 load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize);
16099 }
16100
16101 if (ctl->mod_address == g_kernel_kmod_info.address) {
16102 ctl->mod_flags |= MODCTL_IS_MACH_KERNEL;
d9a64523
A
16103 memcpy(dtrace_kerneluuid, ctl->mod_uuid, sizeof(dtrace_kerneluuid));
16104 }
16105 /*
16106 * Static kexts have a UUID that is not used for symbolication, as all their
16107 * symbols are in kernel
16108 */
16109 else if ((flag & KMOD_DTRACE_STATIC_KEXT) == KMOD_DTRACE_STATIC_KEXT) {
16110 memcpy(ctl->mod_uuid, dtrace_kerneluuid, sizeof(dtrace_kerneluuid));
16111 ctl->mod_flags |= MODCTL_IS_STATIC_KEXT;
6d2010ae
A
16112 }
16113 }
16114 dtrace_modctl_add(ctl);
16115
16116 /*
16117 * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s)
16118 */
16119 lck_mtx_lock(&dtrace_lock);
16120
16121 /*
316670eb
A
16122 * DTrace must decide if it will instrument modules lazily via
16123 * userspace symbols (default mode), or instrument immediately via
16124 * kernel symbols (non-default mode)
16125 *
16126 * When in default/lazy mode, DTrace will only support modules
16127 * built with a valid UUID.
16128 *
16129 * Overriding the default can be done explicitly in one of
16130 * the following two ways.
16131 *
16132 * A module can force symbols from kernel space using the plist key,
16133 * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set,
16134 * we fall through and instrument this module now.
16135 *
16136 * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols
16137 * from kernel space (see dtrace_impl.h). If this system state is set
16138 * to a non-userspace mode, we fall through and instrument the module now.
6d2010ae 16139 */
316670eb
A
16140
16141 if ((dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) &&
16142 (!(flag & KMOD_DTRACE_FORCE_INIT)))
16143 {
16144 /* We will instrument the module lazily -- this is the default */
6d2010ae
A
16145 lck_mtx_unlock(&dtrace_lock);
16146 lck_mtx_unlock(&mod_lock);
16147 lck_mtx_unlock(&dtrace_provider_lock);
16148 return 0;
16149 }
16150
316670eb 16151 /* We will instrument the module immediately using kernel symbols */
6d2010ae
A
16152 ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS;
16153
16154 lck_mtx_unlock(&dtrace_lock);
6d2010ae 16155
2d21ac55
A
16156 /*
16157 * We're going to call each providers per-module provide operation
16158 * specifying only this module.
16159 */
16160 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
6d2010ae
A
16161 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
16162
6d2010ae 16163 /*
fe8ab488
A
16164 * APPLE NOTE: The contract with the kext loader is that once this function
16165 * has completed, it may delete kernel symbols at will.
16166 * We must set this while still holding the mod_lock.
6d2010ae
A
16167 */
16168 ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS;
6d2010ae 16169
2d21ac55
A
16170 lck_mtx_unlock(&mod_lock);
16171 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae 16172
2d21ac55
A
16173 /*
16174 * If we have any retained enablings, we need to match against them.
16175 * Enabling probes requires that cpu_lock be held, and we cannot hold
16176 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
16177 * module. (In particular, this happens when loading scheduling
16178 * classes.) So if we have any retained enablings, we need to dispatch
16179 * our task queue to do the match for us.
16180 */
16181 lck_mtx_lock(&dtrace_lock);
6d2010ae 16182
2d21ac55
A
16183 if (dtrace_retained == NULL) {
16184 lck_mtx_unlock(&dtrace_lock);
6d2010ae 16185 return 0;
2d21ac55 16186 }
6d2010ae 16187
6d2010ae
A
16188 /* APPLE NOTE!
16189 *
16190 * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually
16191 * holds it for any reason. Thus the comment above is invalid, we can directly invoke
16192 * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid
16193 * the delay call as well.
16194 */
16195 lck_mtx_unlock(&dtrace_lock);
16196
16197 dtrace_enabling_matchall();
16198
16199 return 0;
2d21ac55
A
16200}
16201
6d2010ae
A
16202/*
16203 * Return 0 on success
16204 * Return -1 on failure
16205 */
16206static int
16207dtrace_module_unloaded(struct kmod_info *kmod)
2d21ac55 16208{
6d2010ae
A
16209 dtrace_probe_t template, *probe, *first, *next;
16210 dtrace_provider_t *prov;
16211 struct modctl *ctl = NULL;
16212 struct modctl *syncctl = NULL;
16213 struct modctl *nextsyncctl = NULL;
16214 int syncmode = 0;
16215
16216 lck_mtx_lock(&dtrace_provider_lock);
16217 lck_mtx_lock(&mod_lock);
16218 lck_mtx_lock(&dtrace_lock);
2d21ac55 16219
6d2010ae
A
16220 if (kmod == NULL) {
16221 syncmode = 1;
16222 }
16223 else {
16224 ctl = dtrace_modctl_lookup(kmod);
16225 if (ctl == NULL)
16226 {
16227 lck_mtx_unlock(&dtrace_lock);
16228 lck_mtx_unlock(&mod_lock);
16229 lck_mtx_unlock(&dtrace_provider_lock);
16230 return (-1);
16231 }
16232 ctl->mod_loaded = 0;
16233 ctl->mod_address = 0;
16234 ctl->mod_size = 0;
16235 }
16236
16237 if (dtrace_bymod == NULL) {
16238 /*
16239 * The DTrace module is loaded (obviously) but not attached;
16240 * we don't have any work to do.
16241 */
16242 if (ctl != NULL)
16243 (void)dtrace_modctl_remove(ctl);
6d2010ae 16244 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
16245 lck_mtx_unlock(&mod_lock);
16246 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae
A
16247 return(0);
16248 }
16249
16250 /* Syncmode set means we target and traverse entire modctl list. */
16251 if (syncmode)
16252 nextsyncctl = dtrace_modctl_list;
16253
16254syncloop:
16255 if (syncmode)
16256 {
16257 /* find a stale modctl struct */
16258 for (syncctl = nextsyncctl; syncctl != NULL; syncctl=syncctl->mod_next) {
16259 if (syncctl->mod_address == 0)
16260 break;
16261 }
16262 if (syncctl==NULL)
16263 {
16264 /* We have no more work to do */
6d2010ae 16265 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
16266 lck_mtx_unlock(&mod_lock);
16267 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae
A
16268 return(0);
16269 }
16270 else {
16271 /* keep track of next syncctl in case this one is removed */
16272 nextsyncctl = syncctl->mod_next;
16273 ctl = syncctl;
16274 }
16275 }
16276
16277 template.dtpr_mod = ctl->mod_modname;
16278
16279 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
16280 probe != NULL; probe = probe->dtpr_nextmod) {
16281 if (probe->dtpr_ecb != NULL) {
16282 /*
16283 * This shouldn't _actually_ be possible -- we're
16284 * unloading a module that has an enabled probe in it.
16285 * (It's normally up to the provider to make sure that
16286 * this can't happen.) However, because dtps_enable()
16287 * doesn't have a failure mode, there can be an
16288 * enable/unload race. Upshot: we don't want to
16289 * assert, but we're not going to disable the
16290 * probe, either.
16291 */
16292
16293
16294 if (syncmode) {
16295 /* We're syncing, let's look at next in list */
16296 goto syncloop;
16297 }
16298
6d2010ae 16299 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
16300 lck_mtx_unlock(&mod_lock);
16301 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae
A
16302
16303 if (dtrace_err_verbose) {
16304 cmn_err(CE_WARN, "unloaded module '%s' had "
16305 "enabled probes", ctl->mod_modname);
16306 }
16307 return(-1);
16308 }
16309 }
16310
16311 probe = first;
16312
16313 for (first = NULL; probe != NULL; probe = next) {
16314 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
16315
16316 dtrace_probes[probe->dtpr_id - 1] = NULL;
fe8ab488 16317 probe->dtpr_provider->dtpv_probe_count--;
6d2010ae
A
16318
16319 next = probe->dtpr_nextmod;
d9a64523 16320 dtrace_hash_remove(dtrace_byprov, probe);
6d2010ae
A
16321 dtrace_hash_remove(dtrace_bymod, probe);
16322 dtrace_hash_remove(dtrace_byfunc, probe);
16323 dtrace_hash_remove(dtrace_byname, probe);
16324
16325 if (first == NULL) {
16326 first = probe;
16327 probe->dtpr_nextmod = NULL;
16328 } else {
16329 probe->dtpr_nextmod = first;
16330 first = probe;
16331 }
16332 }
16333
16334 /*
16335 * We've removed all of the module's probes from the hash chains and
16336 * from the probe array. Now issue a dtrace_sync() to be sure that
16337 * everyone has cleared out from any probe array processing.
16338 */
16339 dtrace_sync();
16340
16341 for (probe = first; probe != NULL; probe = first) {
16342 first = probe->dtpr_nextmod;
16343 prov = probe->dtpr_provider;
16344 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
16345 probe->dtpr_arg);
d9a64523
A
16346 dtrace_strunref(probe->dtpr_mod);
16347 dtrace_strunref(probe->dtpr_func);
16348 dtrace_strunref(probe->dtpr_name);
6d2010ae
A
16349 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
16350
16351 zfree(dtrace_probe_t_zone, probe);
16352 }
16353
16354 dtrace_modctl_remove(ctl);
16355
16356 if (syncmode)
16357 goto syncloop;
16358
16359 lck_mtx_unlock(&dtrace_lock);
16360 lck_mtx_unlock(&mod_lock);
16361 lck_mtx_unlock(&dtrace_provider_lock);
16362
16363 return(0);
16364}
6d2010ae
A
16365
16366void
16367dtrace_suspend(void)
16368{
16369 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
16370}
16371
16372void
2d21ac55
A
16373dtrace_resume(void)
16374{
16375 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
16376}
16377
16378static int
16379dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
16380{
5ba3f43e 16381 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
16382 lck_mtx_lock(&dtrace_lock);
16383
16384 switch (what) {
16385 case CPU_CONFIG: {
16386 dtrace_state_t *state;
16387 dtrace_optval_t *opt, rs, c;
16388
16389 /*
16390 * For now, we only allocate a new buffer for anonymous state.
16391 */
16392 if ((state = dtrace_anon.dta_state) == NULL)
16393 break;
16394
16395 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
16396 break;
16397
16398 opt = state->dts_options;
16399 c = opt[DTRACEOPT_CPU];
16400
16401 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
16402 break;
16403
16404 /*
16405 * Regardless of what the actual policy is, we're going to
16406 * temporarily set our resize policy to be manual. We're
16407 * also going to temporarily set our CPU option to denote
16408 * the newly configured CPU.
16409 */
16410 rs = opt[DTRACEOPT_BUFRESIZE];
16411 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
16412 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
16413
16414 (void) dtrace_state_buffers(state);
16415
16416 opt[DTRACEOPT_BUFRESIZE] = rs;
16417 opt[DTRACEOPT_CPU] = c;
16418
16419 break;
16420 }
16421
16422 case CPU_UNCONFIG:
16423 /*
16424 * We don't free the buffer in the CPU_UNCONFIG case. (The
16425 * buffer will be freed when the consumer exits.)
16426 */
16427 break;
16428
16429 default:
16430 break;
16431 }
16432
16433 lck_mtx_unlock(&dtrace_lock);
16434 return (0);
16435}
16436
16437static void
16438dtrace_cpu_setup_initial(processorid_t cpu)
16439{
16440 (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
16441}
16442
16443static void
16444dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
16445{
16446 if (dtrace_toxranges >= dtrace_toxranges_max) {
16447 int osize, nsize;
16448 dtrace_toxrange_t *range;
16449
16450 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
16451
16452 if (osize == 0) {
16453 ASSERT(dtrace_toxrange == NULL);
16454 ASSERT(dtrace_toxranges_max == 0);
16455 dtrace_toxranges_max = 1;
16456 } else {
16457 dtrace_toxranges_max <<= 1;
16458 }
16459
16460 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
16461 range = kmem_zalloc(nsize, KM_SLEEP);
16462
16463 if (dtrace_toxrange != NULL) {
16464 ASSERT(osize != 0);
16465 bcopy(dtrace_toxrange, range, osize);
16466 kmem_free(dtrace_toxrange, osize);
16467 }
16468
16469 dtrace_toxrange = range;
16470 }
16471
fe8ab488
A
16472 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0);
16473 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0);
2d21ac55
A
16474
16475 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
16476 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
16477 dtrace_toxranges++;
16478}
16479
16480/*
16481 * DTrace Driver Cookbook Functions
16482 */
16483/*ARGSUSED*/
16484static int
d9a64523 16485dtrace_attach(dev_info_t *devi)
2d21ac55
A
16486{
16487 dtrace_provider_id_t id;
16488 dtrace_state_t *state = NULL;
16489 dtrace_enabling_t *enab;
16490
16491 lck_mtx_lock(&cpu_lock);
16492 lck_mtx_lock(&dtrace_provider_lock);
16493 lck_mtx_lock(&dtrace_lock);
16494
b0d623f7 16495 /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */
2d21ac55
A
16496 dtrace_devi = devi;
16497
16498 dtrace_modload = dtrace_module_loaded;
16499 dtrace_modunload = dtrace_module_unloaded;
16500 dtrace_cpu_init = dtrace_cpu_setup_initial;
16501 dtrace_helpers_cleanup = dtrace_helpers_destroy;
16502 dtrace_helpers_fork = dtrace_helpers_duplicate;
16503 dtrace_cpustart_init = dtrace_suspend;
16504 dtrace_cpustart_fini = dtrace_resume;
16505 dtrace_debugger_init = dtrace_suspend;
16506 dtrace_debugger_fini = dtrace_resume;
2d21ac55
A
16507
16508 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
16509
5ba3f43e 16510 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
16511
16512 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
16513 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2d21ac55
A
16514
16515 dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
c910b4d9 16516 sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN,
2d21ac55
A
16517 NULL, NULL, NULL, NULL, NULL, 0);
16518
5ba3f43e 16519 LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED);
39037602 16520
d9a64523
A
16521 dtrace_byprov = dtrace_hash_create(dtrace_strkey_probe_provider,
16522 0, /* unused */
16523 offsetof(dtrace_probe_t, dtpr_nextprov),
16524 offsetof(dtrace_probe_t, dtpr_prevprov));
16525
16526 dtrace_bymod = dtrace_hash_create(dtrace_strkey_deref_offset,
16527 offsetof(dtrace_probe_t, dtpr_mod),
2d21ac55
A
16528 offsetof(dtrace_probe_t, dtpr_nextmod),
16529 offsetof(dtrace_probe_t, dtpr_prevmod));
16530
d9a64523
A
16531 dtrace_byfunc = dtrace_hash_create(dtrace_strkey_deref_offset,
16532 offsetof(dtrace_probe_t, dtpr_func),
2d21ac55
A
16533 offsetof(dtrace_probe_t, dtpr_nextfunc),
16534 offsetof(dtrace_probe_t, dtpr_prevfunc));
16535
d9a64523
A
16536 dtrace_byname = dtrace_hash_create(dtrace_strkey_deref_offset,
16537 offsetof(dtrace_probe_t, dtpr_name),
2d21ac55
A
16538 offsetof(dtrace_probe_t, dtpr_nextname),
16539 offsetof(dtrace_probe_t, dtpr_prevname));
16540
16541 if (dtrace_retain_max < 1) {
16542 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
16543 "setting to 1", dtrace_retain_max);
16544 dtrace_retain_max = 1;
16545 }
16546
16547 /*
16548 * Now discover our toxic ranges.
16549 */
16550 dtrace_toxic_ranges(dtrace_toxrange_add);
16551
16552 /*
16553 * Before we register ourselves as a provider to our own framework,
16554 * we would like to assert that dtrace_provider is NULL -- but that's
16555 * not true if we were loaded as a dependency of a DTrace provider.
16556 * Once we've registered, we can assert that dtrace_provider is our
16557 * pseudo provider.
16558 */
16559 (void) dtrace_register("dtrace", &dtrace_provider_attr,
16560 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
16561
16562 ASSERT(dtrace_provider != NULL);
16563 ASSERT((dtrace_provider_id_t)dtrace_provider == id);
16564
fe8ab488 16565#if defined (__x86_64__)
2d21ac55
A
16566 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
16567 dtrace_provider, NULL, NULL, "BEGIN", 1, NULL);
16568 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
16569 dtrace_provider, NULL, NULL, "END", 0, NULL);
16570 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
16571 dtrace_provider, NULL, NULL, "ERROR", 3, NULL);
5ba3f43e
A
16572#elif (defined(__arm__) || defined(__arm64__))
16573 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
16574 dtrace_provider, NULL, NULL, "BEGIN", 2, NULL);
16575 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
16576 dtrace_provider, NULL, NULL, "END", 1, NULL);
16577 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
16578 dtrace_provider, NULL, NULL, "ERROR", 4, NULL);
2d21ac55
A
16579#else
16580#error Unknown Architecture
fe8ab488 16581#endif
2d21ac55
A
16582
16583 dtrace_anon_property();
16584 lck_mtx_unlock(&cpu_lock);
16585
16586 /*
16587 * If DTrace helper tracing is enabled, we need to allocate the
16588 * trace buffer and initialize the values.
16589 */
16590 if (dtrace_helptrace_enabled) {
16591 ASSERT(dtrace_helptrace_buffer == NULL);
16592 dtrace_helptrace_buffer =
16593 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
16594 dtrace_helptrace_next = 0;
16595 }
16596
16597 /*
16598 * If there are already providers, we must ask them to provide their
16599 * probes, and then match any anonymous enabling against them. Note
16600 * that there should be no other retained enablings at this time:
16601 * the only retained enablings at this time should be the anonymous
16602 * enabling.
16603 */
16604 if (dtrace_anon.dta_enabling != NULL) {
16605 ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
16606
6d2010ae 16607 /*
fe8ab488 16608 * APPLE NOTE: if handling anonymous dof, switch symbol modes.
6d2010ae
A
16609 */
16610 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
16611 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
16612 }
6d2010ae 16613
2d21ac55
A
16614 dtrace_enabling_provide(NULL);
16615 state = dtrace_anon.dta_state;
16616
16617 /*
16618 * We couldn't hold cpu_lock across the above call to
16619 * dtrace_enabling_provide(), but we must hold it to actually
16620 * enable the probes. We have to drop all of our locks, pick
16621 * up cpu_lock, and regain our locks before matching the
16622 * retained anonymous enabling.
16623 */
16624 lck_mtx_unlock(&dtrace_lock);
16625 lck_mtx_unlock(&dtrace_provider_lock);
16626
16627 lck_mtx_lock(&cpu_lock);
16628 lck_mtx_lock(&dtrace_provider_lock);
16629 lck_mtx_lock(&dtrace_lock);
16630
16631 if ((enab = dtrace_anon.dta_enabling) != NULL)
39037602 16632 (void) dtrace_enabling_match(enab, NULL, NULL);
2d21ac55
A
16633
16634 lck_mtx_unlock(&cpu_lock);
16635 }
16636
16637 lck_mtx_unlock(&dtrace_lock);
16638 lck_mtx_unlock(&dtrace_provider_lock);
16639
16640 if (state != NULL) {
16641 /*
16642 * If we created any anonymous state, set it going now.
16643 */
16644 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
16645 }
16646
16647 return (DDI_SUCCESS);
16648}
16649
2d21ac55
A
16650/*ARGSUSED*/
16651static int
16652dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
16653{
16654#pragma unused(flag, otyp)
16655 dtrace_state_t *state;
16656 uint32_t priv;
16657 uid_t uid;
16658 zoneid_t zoneid;
b0d623f7 16659 int rv;
2d21ac55 16660
fe8ab488 16661 /* APPLE: Darwin puts Helper on its own major device. */
2d21ac55
A
16662
16663 /*
16664 * If no DTRACE_PRIV_* bits are set in the credential, then the
16665 * caller lacks sufficient permission to do anything with DTrace.
16666 */
16667 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
16668 if (priv == DTRACE_PRIV_NONE)
16669 return (EACCES);
16670
2d21ac55 16671 /*
fe8ab488 16672 * APPLE NOTE: We delay the initialization of fasttrap as late as possible.
2d21ac55
A
16673 * It certainly can't be later than now!
16674 */
16675 fasttrap_init();
2d21ac55
A
16676
16677 /*
16678 * Ask all providers to provide all their probes.
16679 */
16680 lck_mtx_lock(&dtrace_provider_lock);
16681 dtrace_probe_provide(NULL, NULL);
16682 lck_mtx_unlock(&dtrace_provider_lock);
16683
16684 lck_mtx_lock(&cpu_lock);
16685 lck_mtx_lock(&dtrace_lock);
16686 dtrace_opens++;
16687 dtrace_membar_producer();
16688
d9a64523 16689#ifdef illumos
2d21ac55
A
16690 /*
16691 * If the kernel debugger is active (that is, if the kernel debugger
16692 * modified text in some way), we won't allow the open.
16693 */
16694 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
16695 dtrace_opens--;
b0d623f7 16696 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
16697 lck_mtx_unlock(&cpu_lock);
16698 return (EBUSY);
16699 }
d9a64523 16700#endif
2d21ac55 16701
fe8ab488
A
16702 rv = dtrace_state_create(devp, cred_p, &state);
16703 lck_mtx_unlock(&cpu_lock);
2d21ac55 16704
fe8ab488 16705 if (rv != 0 || state == NULL) {
d9a64523
A
16706 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) {
16707#ifdef illumos
fe8ab488 16708 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
d9a64523
A
16709#endif
16710 }
fe8ab488
A
16711 lck_mtx_unlock(&dtrace_lock);
16712 /* propagate EAGAIN or ERESTART */
16713 return (rv);
16714 }
16715
16716 lck_mtx_unlock(&dtrace_lock);
2d21ac55 16717
fe8ab488 16718 lck_rw_lock_exclusive(&dtrace_dof_mode_lock);
2d21ac55 16719
fe8ab488
A
16720 /*
16721 * If we are currently lazy, transition states.
16722 *
16723 * Unlike dtrace_close, we do not need to check the
16724 * value of dtrace_opens, as any positive value (and
16725 * we count as 1) means we transition states.
16726 */
16727 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) {
16728 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF;
39037602
A
16729 /*
16730 * We do not need to hold the exclusive lock while processing
16731 * DOF on processes. We do need to make sure the mode does not get
16732 * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though
16733 * (which should not happen anyway since it only happens in
16734 * dtrace_close). There is no way imcomplete USDT probes can be
16735 * activate by any DTrace clients here since they all have to
16736 * call dtrace_open and be blocked on dtrace_dof_mode_lock
16737 */
16738 lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock);
fe8ab488
A
16739 /*
16740 * Iterate all existing processes and load lazy dofs.
16741 */
16742 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
16743 dtrace_lazy_dofs_proc_iterate_doit,
16744 NULL,
16745 dtrace_lazy_dofs_proc_iterate_filter,
16746 NULL);
39037602
A
16747
16748 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
16749 }
16750 else {
16751 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
fe8ab488 16752 }
2d21ac55 16753
2d21ac55 16754
fe8ab488
A
16755 /*
16756 * Update kernel symbol state.
16757 *
16758 * We must own the provider and dtrace locks.
16759 *
16760 * NOTE! It may appear there is a race by setting this value so late
16761 * after dtrace_probe_provide. However, any kext loaded after the
16762 * call to probe provide and before we set LAZY_OFF will be marked as
16763 * eligible for symbols from userspace. The same dtrace that is currently
16764 * calling dtrace_open() (this call!) will get a list of kexts needing
16765 * symbols and fill them in, thus closing the race window.
16766 *
16767 * We want to set this value only after it certain it will succeed, as
16768 * this significantly reduces the complexity of error exits.
16769 */
16770 lck_mtx_lock(&dtrace_lock);
16771 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
16772 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
2d21ac55 16773 }
fe8ab488 16774 lck_mtx_unlock(&dtrace_lock);
2d21ac55 16775
fe8ab488
A
16776 return (0);
16777}
2d21ac55 16778
fe8ab488
A
16779/*ARGSUSED*/
16780static int
16781dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
16782{
16783#pragma unused(flag, otyp, cred_p) /* __APPLE__ */
16784 minor_t minor = getminor(dev);
16785 dtrace_state_t *state;
2d21ac55 16786
fe8ab488 16787 /* APPLE NOTE: Darwin puts Helper on its own major device. */
39037602 16788 state = dtrace_state_get(minor);
fe8ab488
A
16789
16790 lck_mtx_lock(&cpu_lock);
16791 lck_mtx_lock(&dtrace_lock);
2d21ac55 16792
fe8ab488 16793 if (state->dts_anon) {
2d21ac55 16794 /*
fe8ab488 16795 * There is anonymous state. Destroy that first.
2d21ac55 16796 */
fe8ab488
A
16797 ASSERT(dtrace_anon.dta_state == NULL);
16798 dtrace_state_destroy(state->dts_anon);
16799 }
2d21ac55 16800
fe8ab488
A
16801 dtrace_state_destroy(state);
16802 ASSERT(dtrace_opens > 0);
2d21ac55 16803
fe8ab488
A
16804 /*
16805 * Only relinquish control of the kernel debugger interface when there
16806 * are no consumers and no anonymous enablings.
16807 */
d9a64523
A
16808 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL) {
16809#ifdef illumos
fe8ab488 16810 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
d9a64523
A
16811#endif
16812 }
16813
fe8ab488
A
16814 lck_mtx_unlock(&dtrace_lock);
16815 lck_mtx_unlock(&cpu_lock);
2d21ac55 16816
fe8ab488
A
16817 /*
16818 * Lock ordering requires the dof mode lock be taken before
16819 * the dtrace_lock.
16820 */
16821 lck_rw_lock_exclusive(&dtrace_dof_mode_lock);
16822 lck_mtx_lock(&dtrace_lock);
16823
16824 if (dtrace_opens == 0) {
16825 /*
16826 * If we are currently lazy-off, and this is the last close, transition to
16827 * lazy state.
16828 */
16829 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) {
16830 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
2d21ac55
A
16831 }
16832
fe8ab488
A
16833 /*
16834 * If we are the last dtrace client, switch back to lazy (from userspace) symbols
16835 */
16836 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) {
16837 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
16838 }
2d21ac55 16839 }
fe8ab488
A
16840
16841 lck_mtx_unlock(&dtrace_lock);
16842 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
16843
16844 /*
16845 * Kext probes may be retained past the end of the kext's lifespan. The
16846 * probes are kept until the last reference to them has been removed.
16847 * Since closing an active dtrace context is likely to drop that last reference,
16848 * lets take a shot at cleaning out the orphaned probes now.
16849 */
16850 dtrace_module_unloaded(NULL);
2d21ac55 16851
fe8ab488 16852 return (0);
2d21ac55 16853}
fe8ab488 16854
2d21ac55
A
16855/*ARGSUSED*/
16856static int
b0d623f7 16857dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv)
2d21ac55 16858{
b0d623f7
A
16859#pragma unused(rv)
16860 /*
16861 * Safe to check this outside the dof mode lock
16862 */
16863 if (dtrace_dof_mode == DTRACE_DOF_MODE_NEVER)
16864 return KERN_SUCCESS;
2d21ac55
A
16865
16866 switch (cmd) {
5ba3f43e
A
16867#if defined (__arm64__)
16868 case DTRACEHIOC_ADDDOF_U32:
16869 case DTRACEHIOC_ADDDOF_U64:
16870#else
39236c6e 16871 case DTRACEHIOC_ADDDOF:
5ba3f43e 16872#endif /* __arm64__*/
39236c6e 16873 {
b0d623f7
A
16874 dof_helper_t *dhp = NULL;
16875 size_t dof_ioctl_data_size;
16876 dof_ioctl_data_t* multi_dof;
16877 unsigned int i;
16878 int rval = 0;
16879 user_addr_t user_address = *(user_addr_t*)arg;
16880 uint64_t dof_count;
16881 int multi_dof_claimed = 0;
16882 proc_t* p = current_proc();
2d21ac55 16883
5ba3f43e
A
16884 /*
16885 * If this is a restricted process and dtrace is restricted,
16886 * do not allow DOFs to be registered
16887 */
16888 if (dtrace_is_restricted() &&
16889 !dtrace_are_restrictions_relaxed() &&
16890 !dtrace_can_attach_to_proc(current_proc())) {
16891 return (EACCES);
16892 }
16893
b0d623f7
A
16894 /*
16895 * Read the number of DOF sections being passed in.
16896 */
16897 if (copyin(user_address + offsetof(dof_ioctl_data_t, dofiod_count),
16898 &dof_count,
16899 sizeof(dof_count))) {
16900 dtrace_dof_error(NULL, "failed to copyin dofiod_count");
16901 return (EFAULT);
16902 }
5ba3f43e 16903
b0d623f7
A
16904 /*
16905 * Range check the count.
16906 */
16907 if (dof_count == 0 || dof_count > 1024) {
16908 dtrace_dof_error(NULL, "dofiod_count is not valid");
16909 return (EINVAL);
16910 }
16911
16912 /*
16913 * Allocate a correctly sized structure and copyin the data.
16914 */
16915 dof_ioctl_data_size = DOF_IOCTL_DATA_T_SIZE(dof_count);
16916 if ((multi_dof = kmem_alloc(dof_ioctl_data_size, KM_SLEEP)) == NULL)
16917 return (ENOMEM);
16918
16919 /* NOTE! We can no longer exit this method via return */
16920 if (copyin(user_address, multi_dof, dof_ioctl_data_size) != 0) {
16921 dtrace_dof_error(NULL, "failed copyin of dof_ioctl_data_t");
16922 rval = EFAULT;
16923 goto cleanup;
16924 }
16925
16926 /*
16927 * Check that the count didn't change between the first copyin and the second.
16928 */
16929 if (multi_dof->dofiod_count != dof_count) {
16930 rval = EINVAL;
16931 goto cleanup;
16932 }
16933
16934 /*
16935 * Try to process lazily first.
16936 */
16937 rval = dtrace_lazy_dofs_add(p, multi_dof, &multi_dof_claimed);
16938
16939 /*
16940 * If rval is EACCES, we must be non-lazy.
16941 */
16942 if (rval == EACCES) {
16943 rval = 0;
16944 /*
16945 * Process each dof_helper_t
16946 */
16947 i = 0;
16948 do {
16949 dhp = &multi_dof->dofiod_helpers[i];
16950
16951 dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval);
16952
16953 if (dof != NULL) {
d9a64523 16954 lck_mtx_lock(&dtrace_meta_lock);
b0d623f7
A
16955 lck_mtx_lock(&dtrace_lock);
16956
16957 /*
16958 * dtrace_helper_slurp() takes responsibility for the dof --
16959 * it may free it now or it may save it and free it later.
16960 */
16961 if ((dhp->dofhp_dof = (uint64_t)dtrace_helper_slurp(p, dof, dhp)) == -1ULL) {
16962 rval = EINVAL;
16963 }
16964
16965 lck_mtx_unlock(&dtrace_lock);
d9a64523 16966 lck_mtx_unlock(&dtrace_meta_lock);
b0d623f7
A
16967 }
16968 } while (++i < multi_dof->dofiod_count && rval == 0);
16969 }
16970
16971 /*
16972 * We need to copyout the multi_dof struct, because it contains
16973 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
16974 *
16975 * This could certainly be better optimized.
16976 */
16977 if (copyout(multi_dof, user_address, dof_ioctl_data_size) != 0) {
16978 dtrace_dof_error(NULL, "failed copyout of dof_ioctl_data_t");
16979 /* Don't overwrite pre-existing error code */
16980 if (rval == 0) rval = EFAULT;
16981 }
16982
16983 cleanup:
16984 /*
16985 * If we had to allocate struct memory, free it.
16986 */
16987 if (multi_dof != NULL && !multi_dof_claimed) {
16988 kmem_free(multi_dof, dof_ioctl_data_size);
16989 }
16990
16991 return rval;
16992 }
16993
16994 case DTRACEHIOC_REMOVE: {
16995 int generation = *(int*)arg;
16996 proc_t* p = current_proc();
16997
16998 /*
16999 * Try lazy first.
17000 */
17001 int rval = dtrace_lazy_dofs_remove(p, generation);
17002
17003 /*
17004 * EACCES means non-lazy
17005 */
17006 if (rval == EACCES) {
d9a64523 17007 lck_mtx_lock(&dtrace_meta_lock);
b0d623f7
A
17008 lck_mtx_lock(&dtrace_lock);
17009 rval = dtrace_helper_destroygen(p, generation);
17010 lck_mtx_unlock(&dtrace_lock);
d9a64523 17011 lck_mtx_unlock(&dtrace_meta_lock);
b0d623f7
A
17012 }
17013
17014 return (rval);
17015 }
17016
17017 default:
17018 break;
17019 }
17020
17021 return ENOTTY;
17022}
17023
17024/*ARGSUSED*/
17025static int
17026dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv)
17027{
17028#pragma unused(md)
17029 minor_t minor = getminor(dev);
17030 dtrace_state_t *state;
17031 int rval;
17032
17033 /* Darwin puts Helper on its own major device. */
17034
39037602 17035 state = dtrace_state_get(minor);
b0d623f7
A
17036
17037 if (state->dts_anon) {
17038 ASSERT(dtrace_anon.dta_state == NULL);
17039 state = state->dts_anon;
17040 }
17041
17042 switch (cmd) {
17043 case DTRACEIOC_PROVIDER: {
17044 dtrace_providerdesc_t pvd;
17045 dtrace_provider_t *pvp;
17046
17047 if (copyin(arg, &pvd, sizeof (pvd)) != 0)
17048 return (EFAULT);
17049
17050 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
17051 lck_mtx_lock(&dtrace_provider_lock);
17052
17053 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
17054 if (strncmp(pvp->dtpv_name, pvd.dtvd_name, DTRACE_PROVNAMELEN) == 0)
17055 break;
17056 }
17057
17058 lck_mtx_unlock(&dtrace_provider_lock);
17059
17060 if (pvp == NULL)
17061 return (ESRCH);
17062
17063 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
17064 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
17065 if (copyout(&pvd, arg, sizeof (pvd)) != 0)
17066 return (EFAULT);
17067
17068 return (0);
17069 }
17070
17071 case DTRACEIOC_EPROBE: {
17072 dtrace_eprobedesc_t epdesc;
17073 dtrace_ecb_t *ecb;
17074 dtrace_action_t *act;
17075 void *buf;
17076 size_t size;
17077 uintptr_t dest;
17078 int nrecs;
17079
17080 if (copyin(arg, &epdesc, sizeof (epdesc)) != 0)
17081 return (EFAULT);
17082
17083 lck_mtx_lock(&dtrace_lock);
17084
17085 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
17086 lck_mtx_unlock(&dtrace_lock);
17087 return (EINVAL);
17088 }
17089
17090 if (ecb->dte_probe == NULL) {
17091 lck_mtx_unlock(&dtrace_lock);
17092 return (EINVAL);
17093 }
17094
17095 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
17096 epdesc.dtepd_uarg = ecb->dte_uarg;
17097 epdesc.dtepd_size = ecb->dte_size;
17098
17099 nrecs = epdesc.dtepd_nrecs;
17100 epdesc.dtepd_nrecs = 0;
17101 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
17102 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
17103 continue;
17104
17105 epdesc.dtepd_nrecs++;
17106 }
17107
17108 /*
17109 * Now that we have the size, we need to allocate a temporary
17110 * buffer in which to store the complete description. We need
17111 * the temporary buffer to be able to drop dtrace_lock()
17112 * across the copyout(), below.
17113 */
17114 size = sizeof (dtrace_eprobedesc_t) +
17115 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
17116
17117 buf = kmem_alloc(size, KM_SLEEP);
17118 dest = (uintptr_t)buf;
17119
17120 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
17121 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
17122
17123 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
17124 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
17125 continue;
17126
17127 if (nrecs-- == 0)
17128 break;
17129
17130 bcopy(&act->dta_rec, (void *)dest,
17131 sizeof (dtrace_recdesc_t));
17132 dest += sizeof (dtrace_recdesc_t);
17133 }
17134
17135 lck_mtx_unlock(&dtrace_lock);
17136
17137 if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) {
17138 kmem_free(buf, size);
17139 return (EFAULT);
17140 }
17141
17142 kmem_free(buf, size);
17143 return (0);
17144 }
17145
17146 case DTRACEIOC_AGGDESC: {
17147 dtrace_aggdesc_t aggdesc;
17148 dtrace_action_t *act;
17149 dtrace_aggregation_t *agg;
17150 int nrecs;
17151 uint32_t offs;
17152 dtrace_recdesc_t *lrec;
17153 void *buf;
17154 size_t size;
17155 uintptr_t dest;
17156
17157 if (copyin(arg, &aggdesc, sizeof (aggdesc)) != 0)
17158 return (EFAULT);
17159
17160 lck_mtx_lock(&dtrace_lock);
17161
17162 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
17163 lck_mtx_unlock(&dtrace_lock);
17164 return (EINVAL);
17165 }
17166
17167 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
17168
17169 nrecs = aggdesc.dtagd_nrecs;
17170 aggdesc.dtagd_nrecs = 0;
17171
17172 offs = agg->dtag_base;
17173 lrec = &agg->dtag_action.dta_rec;
17174 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
17175
17176 for (act = agg->dtag_first; ; act = act->dta_next) {
17177 ASSERT(act->dta_intuple ||
17178 DTRACEACT_ISAGG(act->dta_kind));
17179
17180 /*
17181 * If this action has a record size of zero, it
17182 * denotes an argument to the aggregating action.
17183 * Because the presence of this record doesn't (or
17184 * shouldn't) affect the way the data is interpreted,
17185 * we don't copy it out to save user-level the
17186 * confusion of dealing with a zero-length record.
17187 */
17188 if (act->dta_rec.dtrd_size == 0) {
17189 ASSERT(agg->dtag_hasarg);
17190 continue;
17191 }
17192
17193 aggdesc.dtagd_nrecs++;
17194
17195 if (act == &agg->dtag_action)
17196 break;
17197 }
17198
17199 /*
17200 * Now that we have the size, we need to allocate a temporary
17201 * buffer in which to store the complete description. We need
17202 * the temporary buffer to be able to drop dtrace_lock()
17203 * across the copyout(), below.
17204 */
17205 size = sizeof (dtrace_aggdesc_t) +
17206 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
17207
17208 buf = kmem_alloc(size, KM_SLEEP);
17209 dest = (uintptr_t)buf;
17210
17211 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
17212 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
17213
17214 for (act = agg->dtag_first; ; act = act->dta_next) {
17215 dtrace_recdesc_t rec = act->dta_rec;
17216
17217 /*
17218 * See the comment in the above loop for why we pass
17219 * over zero-length records.
17220 */
17221 if (rec.dtrd_size == 0) {
17222 ASSERT(agg->dtag_hasarg);
17223 continue;
17224 }
17225
17226 if (nrecs-- == 0)
17227 break;
17228
17229 rec.dtrd_offset -= offs;
17230 bcopy(&rec, (void *)dest, sizeof (rec));
17231 dest += sizeof (dtrace_recdesc_t);
17232
17233 if (act == &agg->dtag_action)
17234 break;
17235 }
17236
17237 lck_mtx_unlock(&dtrace_lock);
17238
17239 if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) {
17240 kmem_free(buf, size);
17241 return (EFAULT);
17242 }
17243
17244 kmem_free(buf, size);
17245 return (0);
17246 }
17247
17248 case DTRACEIOC_ENABLE: {
17249 dof_hdr_t *dof;
17250 dtrace_enabling_t *enab = NULL;
17251 dtrace_vstate_t *vstate;
17252 int err = 0;
17253
17254 *rv = 0;
17255
17256 /*
17257 * If a NULL argument has been passed, we take this as our
17258 * cue to reevaluate our enablings.
17259 */
fe8ab488 17260 if (arg == 0) {
b0d623f7
A
17261 dtrace_enabling_matchall();
17262
17263 return (0);
17264 }
17265
17266 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
17267 return (rval);
17268
17269 lck_mtx_lock(&cpu_lock);
17270 lck_mtx_lock(&dtrace_lock);
17271 vstate = &state->dts_vstate;
17272
17273 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
17274 lck_mtx_unlock(&dtrace_lock);
17275 lck_mtx_unlock(&cpu_lock);
17276 dtrace_dof_destroy(dof);
17277 return (EBUSY);
17278 }
17279
17280 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
17281 lck_mtx_unlock(&dtrace_lock);
17282 lck_mtx_unlock(&cpu_lock);
17283 dtrace_dof_destroy(dof);
17284 return (EINVAL);
17285 }
17286
17287 if ((rval = dtrace_dof_options(dof, state)) != 0) {
17288 dtrace_enabling_destroy(enab);
17289 lck_mtx_unlock(&dtrace_lock);
17290 lck_mtx_unlock(&cpu_lock);
17291 dtrace_dof_destroy(dof);
17292 return (rval);
17293 }
17294
39037602 17295 if ((err = dtrace_enabling_match(enab, rv, NULL)) == 0) {
b0d623f7
A
17296 err = dtrace_enabling_retain(enab);
17297 } else {
17298 dtrace_enabling_destroy(enab);
17299 }
17300
b0d623f7 17301 lck_mtx_unlock(&dtrace_lock);
fe8ab488 17302 lck_mtx_unlock(&cpu_lock);
b0d623f7
A
17303 dtrace_dof_destroy(dof);
17304
17305 return (err);
17306 }
17307
17308 case DTRACEIOC_REPLICATE: {
17309 dtrace_repldesc_t desc;
17310 dtrace_probedesc_t *match = &desc.dtrpd_match;
17311 dtrace_probedesc_t *create = &desc.dtrpd_create;
17312 int err;
17313
17314 if (copyin(arg, &desc, sizeof (desc)) != 0)
17315 return (EFAULT);
17316
17317 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
17318 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
17319 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
17320 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
17321
17322 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
17323 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
17324 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
17325 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
17326
17327 lck_mtx_lock(&dtrace_lock);
17328 err = dtrace_enabling_replicate(state, match, create);
17329 lck_mtx_unlock(&dtrace_lock);
17330
17331 return (err);
17332 }
17333
17334 case DTRACEIOC_PROBEMATCH:
17335 case DTRACEIOC_PROBES: {
17336 dtrace_probe_t *probe = NULL;
17337 dtrace_probedesc_t desc;
17338 dtrace_probekey_t pkey;
17339 dtrace_id_t i;
17340 int m = 0;
17341 uint32_t priv;
17342 uid_t uid;
17343 zoneid_t zoneid;
17344
17345 if (copyin(arg, &desc, sizeof (desc)) != 0)
17346 return (EFAULT);
17347
17348 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
17349 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
17350 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
17351 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
17352
17353 /*
17354 * Before we attempt to match this probe, we want to give
17355 * all providers the opportunity to provide it.
17356 */
17357 if (desc.dtpd_id == DTRACE_IDNONE) {
17358 lck_mtx_lock(&dtrace_provider_lock);
17359 dtrace_probe_provide(&desc, NULL);
17360 lck_mtx_unlock(&dtrace_provider_lock);
17361 desc.dtpd_id++;
17362 }
17363
b0d623f7
A
17364 dtrace_cred2priv(cr, &priv, &uid, &zoneid);
17365
17366 lck_mtx_lock(&dtrace_lock);
17367
d9a64523
A
17368 if (cmd == DTRACEIOC_PROBEMATCH) {
17369 dtrace_probekey(&desc, &pkey);
17370 pkey.dtpk_id = DTRACE_IDNONE;
17371
17372 /* Quiet compiler warning */
b0d623f7
A
17373 for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) {
17374 if ((probe = dtrace_probes[i - 1]) != NULL &&
17375 (m = dtrace_match_probe(probe, &pkey,
17376 priv, uid, zoneid)) != 0)
17377 break;
17378 }
17379
17380 if (m < 0) {
17381 lck_mtx_unlock(&dtrace_lock);
17382 return (EINVAL);
17383 }
d9a64523 17384 dtrace_probekey_release(&pkey);
b0d623f7
A
17385
17386 } else {
17387 /* Quiet compiler warning */
17388 for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) {
17389 if ((probe = dtrace_probes[i - 1]) != NULL &&
17390 dtrace_match_priv(probe, priv, uid, zoneid))
17391 break;
17392 }
17393 }
17394
17395 if (probe == NULL) {
17396 lck_mtx_unlock(&dtrace_lock);
17397 return (ESRCH);
17398 }
17399
17400 dtrace_probe_description(probe, &desc);
17401 lck_mtx_unlock(&dtrace_lock);
17402
17403 if (copyout(&desc, arg, sizeof (desc)) != 0)
17404 return (EFAULT);
17405
17406 return (0);
17407 }
17408
17409 case DTRACEIOC_PROBEARG: {
17410 dtrace_argdesc_t desc;
17411 dtrace_probe_t *probe;
17412 dtrace_provider_t *prov;
17413
17414 if (copyin(arg, &desc, sizeof (desc)) != 0)
17415 return (EFAULT);
17416
17417 if (desc.dtargd_id == DTRACE_IDNONE)
17418 return (EINVAL);
17419
17420 if (desc.dtargd_ndx == DTRACE_ARGNONE)
17421 return (EINVAL);
17422
17423 lck_mtx_lock(&dtrace_provider_lock);
17424 lck_mtx_lock(&mod_lock);
17425 lck_mtx_lock(&dtrace_lock);
17426
17427 /* Quiet compiler warning */
17428 if (desc.dtargd_id > (dtrace_id_t)dtrace_nprobes) {
17429 lck_mtx_unlock(&dtrace_lock);
17430 lck_mtx_unlock(&mod_lock);
17431 lck_mtx_unlock(&dtrace_provider_lock);
17432 return (EINVAL);
17433 }
17434
17435 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
17436 lck_mtx_unlock(&dtrace_lock);
17437 lck_mtx_unlock(&mod_lock);
17438 lck_mtx_unlock(&dtrace_provider_lock);
17439 return (EINVAL);
17440 }
17441
17442 lck_mtx_unlock(&dtrace_lock);
17443
17444 prov = probe->dtpr_provider;
17445
17446 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
17447 /*
17448 * There isn't any typed information for this probe.
17449 * Set the argument number to DTRACE_ARGNONE.
17450 */
17451 desc.dtargd_ndx = DTRACE_ARGNONE;
17452 } else {
17453 desc.dtargd_native[0] = '\0';
17454 desc.dtargd_xlate[0] = '\0';
17455 desc.dtargd_mapping = desc.dtargd_ndx;
17456
17457 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
17458 probe->dtpr_id, probe->dtpr_arg, &desc);
17459 }
17460
17461 lck_mtx_unlock(&mod_lock);
17462 lck_mtx_unlock(&dtrace_provider_lock);
17463
17464 if (copyout(&desc, arg, sizeof (desc)) != 0)
17465 return (EFAULT);
17466
17467 return (0);
17468 }
17469
17470 case DTRACEIOC_GO: {
17471 processorid_t cpuid;
17472 rval = dtrace_state_go(state, &cpuid);
17473
17474 if (rval != 0)
17475 return (rval);
17476
17477 if (copyout(&cpuid, arg, sizeof (cpuid)) != 0)
17478 return (EFAULT);
17479
17480 return (0);
17481 }
17482
17483 case DTRACEIOC_STOP: {
17484 processorid_t cpuid;
17485
17486 lck_mtx_lock(&dtrace_lock);
17487 rval = dtrace_state_stop(state, &cpuid);
17488 lck_mtx_unlock(&dtrace_lock);
17489
17490 if (rval != 0)
17491 return (rval);
17492
17493 if (copyout(&cpuid, arg, sizeof (cpuid)) != 0)
17494 return (EFAULT);
17495
17496 return (0);
17497 }
17498
17499 case DTRACEIOC_DOFGET: {
17500 dof_hdr_t hdr, *dof;
17501 uint64_t len;
17502
17503 if (copyin(arg, &hdr, sizeof (hdr)) != 0)
17504 return (EFAULT);
17505
17506 lck_mtx_lock(&dtrace_lock);
17507 dof = dtrace_dof_create(state);
17508 lck_mtx_unlock(&dtrace_lock);
17509
17510 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
17511 rval = copyout(dof, arg, len);
17512 dtrace_dof_destroy(dof);
17513
17514 return (rval == 0 ? 0 : EFAULT);
17515 }
17516
39037602
A
17517 case DTRACEIOC_SLEEP: {
17518 int64_t time;
17519 uint64_t abstime;
17520 uint64_t rvalue = DTRACE_WAKE_TIMEOUT;
17521
17522 if (copyin(arg, &time, sizeof(time)) != 0)
17523 return (EFAULT);
17524
17525 nanoseconds_to_absolutetime((uint64_t)time, &abstime);
17526 clock_absolutetime_interval_to_deadline(abstime, &abstime);
17527
17528 if (assert_wait_deadline(state, THREAD_ABORTSAFE, abstime) == THREAD_WAITING) {
17529 if (state->dts_buf_over_limit > 0) {
17530 clear_wait(current_thread(), THREAD_INTERRUPTED);
17531 rvalue = DTRACE_WAKE_BUF_LIMIT;
17532 } else {
17533 thread_block(THREAD_CONTINUE_NULL);
17534 if (state->dts_buf_over_limit > 0) {
17535 rvalue = DTRACE_WAKE_BUF_LIMIT;
17536 }
17537 }
17538 }
17539
17540 if (copyout(&rvalue, arg, sizeof(rvalue)) != 0)
17541 return (EFAULT);
17542
17543 return (0);
17544 }
17545
17546 case DTRACEIOC_SIGNAL: {
17547 wakeup(state);
17548 return (0);
17549 }
17550
b0d623f7
A
17551 case DTRACEIOC_AGGSNAP:
17552 case DTRACEIOC_BUFSNAP: {
17553 dtrace_bufdesc_t desc;
17554 caddr_t cached;
39037602 17555 boolean_t over_limit;
b0d623f7
A
17556 dtrace_buffer_t *buf;
17557
17558 if (copyin(arg, &desc, sizeof (desc)) != 0)
17559 return (EFAULT);
17560
17561 if ((int)desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
17562 return (EINVAL);
17563
17564 lck_mtx_lock(&dtrace_lock);
17565
17566 if (cmd == DTRACEIOC_BUFSNAP) {
17567 buf = &state->dts_buffer[desc.dtbd_cpu];
17568 } else {
17569 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
17570 }
17571
17572 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
17573 size_t sz = buf->dtb_offset;
17574
17575 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
17576 lck_mtx_unlock(&dtrace_lock);
17577 return (EBUSY);
17578 }
17579
17580 /*
17581 * If this buffer has already been consumed, we're
17582 * going to indicate that there's nothing left here
17583 * to consume.
17584 */
17585 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
17586 lck_mtx_unlock(&dtrace_lock);
17587
17588 desc.dtbd_size = 0;
17589 desc.dtbd_drops = 0;
17590 desc.dtbd_errors = 0;
17591 desc.dtbd_oldest = 0;
17592 sz = sizeof (desc);
17593
17594 if (copyout(&desc, arg, sz) != 0)
17595 return (EFAULT);
17596
17597 return (0);
17598 }
17599
17600 /*
17601 * If this is a ring buffer that has wrapped, we want
17602 * to copy the whole thing out.
17603 */
17604 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
17605 dtrace_buffer_polish(buf);
17606 sz = buf->dtb_size;
17607 }
17608
17609 if (copyout(buf->dtb_tomax, (user_addr_t)desc.dtbd_data, sz) != 0) {
17610 lck_mtx_unlock(&dtrace_lock);
17611 return (EFAULT);
17612 }
17613
17614 desc.dtbd_size = sz;
17615 desc.dtbd_drops = buf->dtb_drops;
17616 desc.dtbd_errors = buf->dtb_errors;
17617 desc.dtbd_oldest = buf->dtb_xamot_offset;
04b8595b 17618 desc.dtbd_timestamp = dtrace_gethrtime();
b0d623f7
A
17619
17620 lck_mtx_unlock(&dtrace_lock);
17621
17622 if (copyout(&desc, arg, sizeof (desc)) != 0)
17623 return (EFAULT);
17624
17625 buf->dtb_flags |= DTRACEBUF_CONSUMED;
17626
17627 return (0);
17628 }
17629
17630 if (buf->dtb_tomax == NULL) {
17631 ASSERT(buf->dtb_xamot == NULL);
17632 lck_mtx_unlock(&dtrace_lock);
17633 return (ENOENT);
17634 }
17635
17636 cached = buf->dtb_tomax;
39037602
A
17637 over_limit = buf->dtb_cur_limit == buf->dtb_size;
17638
b0d623f7
A
17639 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
17640
17641 dtrace_xcall(desc.dtbd_cpu,
17642 (dtrace_xcall_t)dtrace_buffer_switch, buf);
17643
17644 state->dts_errors += buf->dtb_xamot_errors;
17645
17646 /*
17647 * If the buffers did not actually switch, then the cross call
17648 * did not take place -- presumably because the given CPU is
17649 * not in the ready set. If this is the case, we'll return
17650 * ENOENT.
17651 */
17652 if (buf->dtb_tomax == cached) {
17653 ASSERT(buf->dtb_xamot != cached);
17654 lck_mtx_unlock(&dtrace_lock);
17655 return (ENOENT);
17656 }
17657
17658 ASSERT(cached == buf->dtb_xamot);
39037602
A
17659 /*
17660 * At this point we know the buffer have switched, so we
17661 * can decrement the over limit count if the buffer was over
17662 * its limit. The new buffer might already be over its limit
17663 * yet, but we don't care since we're guaranteed not to be
17664 * checking the buffer over limit count at this point.
17665 */
17666 if (over_limit) {
17667 uint32_t old = atomic_add_32(&state->dts_buf_over_limit, -1);
17668 #pragma unused(old)
17669
17670 /*
17671 * Verify that we didn't underflow the value
17672 */
17673 ASSERT(old != 0);
17674 }
b0d623f7
A
17675
17676 /*
17677 * We have our snapshot; now copy it out.
17678 */
5ba3f43e
A
17679 if (dtrace_buffer_copyout(buf->dtb_xamot,
17680 (user_addr_t)desc.dtbd_data,
b0d623f7
A
17681 buf->dtb_xamot_offset) != 0) {
17682 lck_mtx_unlock(&dtrace_lock);
17683 return (EFAULT);
17684 }
17685
17686 desc.dtbd_size = buf->dtb_xamot_offset;
17687 desc.dtbd_drops = buf->dtb_xamot_drops;
17688 desc.dtbd_errors = buf->dtb_xamot_errors;
17689 desc.dtbd_oldest = 0;
04b8595b 17690 desc.dtbd_timestamp = buf->dtb_switched;
b0d623f7
A
17691
17692 lck_mtx_unlock(&dtrace_lock);
17693
17694 /*
17695 * Finally, copy out the buffer description.
17696 */
17697 if (copyout(&desc, arg, sizeof (desc)) != 0)
17698 return (EFAULT);
17699
17700 return (0);
17701 }
17702
17703 case DTRACEIOC_CONF: {
17704 dtrace_conf_t conf;
17705
17706 bzero(&conf, sizeof (conf));
17707 conf.dtc_difversion = DIF_VERSION;
17708 conf.dtc_difintregs = DIF_DIR_NREGS;
17709 conf.dtc_diftupregs = DIF_DTR_NREGS;
17710 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
17711
17712 if (copyout(&conf, arg, sizeof (conf)) != 0)
17713 return (EFAULT);
17714
17715 return (0);
17716 }
17717
17718 case DTRACEIOC_STATUS: {
17719 dtrace_status_t stat;
17720 dtrace_dstate_t *dstate;
17721 int i, j;
17722 uint64_t nerrs;
17723
17724 /*
17725 * See the comment in dtrace_state_deadman() for the reason
17726 * for setting dts_laststatus to INT64_MAX before setting
17727 * it to the correct value.
17728 */
17729 state->dts_laststatus = INT64_MAX;
17730 dtrace_membar_producer();
17731 state->dts_laststatus = dtrace_gethrtime();
17732
17733 bzero(&stat, sizeof (stat));
17734
17735 lck_mtx_lock(&dtrace_lock);
17736
17737 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
17738 lck_mtx_unlock(&dtrace_lock);
17739 return (ENOENT);
17740 }
17741
17742 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
17743 stat.dtst_exiting = 1;
17744
17745 nerrs = state->dts_errors;
17746 dstate = &state->dts_vstate.dtvs_dynvars;
17747
17748 for (i = 0; i < (int)NCPU; i++) {
17749 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
17750
17751 stat.dtst_dyndrops += dcpu->dtdsc_drops;
17752 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
17753 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
17754
17755 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
17756 stat.dtst_filled++;
17757
17758 nerrs += state->dts_buffer[i].dtb_errors;
17759
17760 for (j = 0; j < state->dts_nspeculations; j++) {
17761 dtrace_speculation_t *spec;
17762 dtrace_buffer_t *buf;
17763
17764 spec = &state->dts_speculations[j];
17765 buf = &spec->dtsp_buffer[i];
17766 stat.dtst_specdrops += buf->dtb_xamot_drops;
17767 }
17768 }
17769
17770 stat.dtst_specdrops_busy = state->dts_speculations_busy;
17771 stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
17772 stat.dtst_stkstroverflows = state->dts_stkstroverflows;
17773 stat.dtst_dblerrors = state->dts_dblerrors;
17774 stat.dtst_killed =
17775 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
17776 stat.dtst_errors = nerrs;
17777
17778 lck_mtx_unlock(&dtrace_lock);
17779
17780 if (copyout(&stat, arg, sizeof (stat)) != 0)
17781 return (EFAULT);
17782
17783 return (0);
17784 }
17785
17786 case DTRACEIOC_FORMAT: {
17787 dtrace_fmtdesc_t fmt;
17788 char *str;
17789 int len;
17790
17791 if (copyin(arg, &fmt, sizeof (fmt)) != 0)
17792 return (EFAULT);
17793
17794 lck_mtx_lock(&dtrace_lock);
17795
17796 if (fmt.dtfd_format == 0 ||
17797 fmt.dtfd_format > state->dts_nformats) {
17798 lck_mtx_unlock(&dtrace_lock);
17799 return (EINVAL);
17800 }
17801
17802 /*
17803 * Format strings are allocated contiguously and they are
17804 * never freed; if a format index is less than the number
17805 * of formats, we can assert that the format map is non-NULL
17806 * and that the format for the specified index is non-NULL.
17807 */
17808 ASSERT(state->dts_formats != NULL);
17809 str = state->dts_formats[fmt.dtfd_format - 1];
17810 ASSERT(str != NULL);
17811
17812 len = strlen(str) + 1;
17813
17814 if (len > fmt.dtfd_length) {
17815 fmt.dtfd_length = len;
17816
17817 if (copyout(&fmt, arg, sizeof (fmt)) != 0) {
17818 lck_mtx_unlock(&dtrace_lock);
17819 return (EINVAL);
17820 }
17821 } else {
17822 if (copyout(str, (user_addr_t)fmt.dtfd_string, len) != 0) {
17823 lck_mtx_unlock(&dtrace_lock);
17824 return (EINVAL);
17825 }
17826 }
17827
17828 lck_mtx_unlock(&dtrace_lock);
17829 return (0);
17830 }
17831
6d2010ae
A
17832 case DTRACEIOC_MODUUIDSLIST: {
17833 size_t module_uuids_list_size;
17834 dtrace_module_uuids_list_t* uuids_list;
17835 uint64_t dtmul_count;
fe8ab488
A
17836
17837 /*
17838 * Security restrictions make this operation illegal, if this is enabled DTrace
17839 * must refuse to provide any fbt probes.
17840 */
3e170ce0 17841 if (dtrace_fbt_probes_restricted()) {
fe8ab488
A
17842 cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");
17843 return (EPERM);
17844 }
17845
6d2010ae
A
17846 /*
17847 * Fail if the kernel symbol mode makes this operation illegal.
17848 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
17849 * for them without holding the dtrace_lock.
17850 */
17851 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
17852 dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
17853 cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode);
17854 return (EPERM);
17855 }
17856
17857 /*
17858 * Read the number of symbolsdesc structs being passed in.
17859 */
17860 if (copyin(arg + offsetof(dtrace_module_uuids_list_t, dtmul_count),
17861 &dtmul_count,
17862 sizeof(dtmul_count))) {
17863 cmn_err(CE_WARN, "failed to copyin dtmul_count");
17864 return (EFAULT);
17865 }
17866
17867 /*
17868 * Range check the count. More than 2k kexts is probably an error.
17869 */
17870 if (dtmul_count > 2048) {
17871 cmn_err(CE_WARN, "dtmul_count is not valid");
17872 return (EINVAL);
17873 }
17874
17875 /*
17876 * For all queries, we return EINVAL when the user specified
17877 * count does not match the actual number of modules we find
17878 * available.
17879 *
17880 * If the user specified count is zero, then this serves as a
17881 * simple query to count the available modules in need of symbols.
17882 */
17883
17884 rval = 0;
17885
17886 if (dtmul_count == 0)
17887 {
17888 lck_mtx_lock(&mod_lock);
17889 struct modctl* ctl = dtrace_modctl_list;
17890 while (ctl) {
fe8ab488
A
17891 /* Update the private probes bit */
17892 if (dtrace_provide_private_probes)
17893 ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES;
17894
6d2010ae 17895 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
d9a64523 17896 if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) {
6d2010ae
A
17897 dtmul_count++;
17898 rval = EINVAL;
17899 }
17900 ctl = ctl->mod_next;
17901 }
17902 lck_mtx_unlock(&mod_lock);
17903
17904 if (copyout(&dtmul_count, arg, sizeof (dtmul_count)) != 0)
17905 return (EFAULT);
17906 else
17907 return (rval);
17908 }
17909
17910 /*
17911 * If we reach this point, then we have a request for full list data.
17912 * Allocate a correctly sized structure and copyin the data.
17913 */
17914 module_uuids_list_size = DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count);
17915 if ((uuids_list = kmem_alloc(module_uuids_list_size, KM_SLEEP)) == NULL)
17916 return (ENOMEM);
17917
17918 /* NOTE! We can no longer exit this method via return */
17919 if (copyin(arg, uuids_list, module_uuids_list_size) != 0) {
17920 cmn_err(CE_WARN, "failed copyin of dtrace_module_uuids_list_t");
17921 rval = EFAULT;
17922 goto moduuidslist_cleanup;
17923 }
17924
17925 /*
17926 * Check that the count didn't change between the first copyin and the second.
17927 */
17928 if (uuids_list->dtmul_count != dtmul_count) {
17929 rval = EINVAL;
17930 goto moduuidslist_cleanup;
17931 }
17932
17933 /*
17934 * Build the list of UUID's that need symbols
17935 */
17936 lck_mtx_lock(&mod_lock);
17937
17938 dtmul_count = 0;
17939
17940 struct modctl* ctl = dtrace_modctl_list;
17941 while (ctl) {
fe8ab488
A
17942 /* Update the private probes bit */
17943 if (dtrace_provide_private_probes)
17944 ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES;
17945
6d2010ae
A
17946 /*
17947 * We assume that userspace symbols will be "better" than kernel level symbols,
17948 * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms
17949 * are available, add user syms if the module might use them.
17950 */
17951 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
d9a64523 17952 if (!MOD_SYMBOLS_DONE(ctl) && !MOD_IS_STATIC_KEXT(ctl)) {
6d2010ae
A
17953 UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count];
17954 if (dtmul_count++ < uuids_list->dtmul_count) {
17955 memcpy(uuid, ctl->mod_uuid, sizeof(UUID));
17956 }
17957 }
17958 ctl = ctl->mod_next;
17959 }
17960
17961 lck_mtx_unlock(&mod_lock);
17962
17963 if (uuids_list->dtmul_count < dtmul_count)
17964 rval = EINVAL;
17965
17966 uuids_list->dtmul_count = dtmul_count;
17967
17968 /*
17969 * Copyout the symbols list (or at least the count!)
17970 */
17971 if (copyout(uuids_list, arg, module_uuids_list_size) != 0) {
17972 cmn_err(CE_WARN, "failed copyout of dtrace_symbolsdesc_list_t");
17973 rval = EFAULT;
17974 }
17975
17976 moduuidslist_cleanup:
17977 /*
17978 * If we had to allocate struct memory, free it.
17979 */
17980 if (uuids_list != NULL) {
17981 kmem_free(uuids_list, module_uuids_list_size);
17982 }
17983
17984 return rval;
17985 }
17986
17987 case DTRACEIOC_PROVMODSYMS: {
17988 size_t module_symbols_size;
17989 dtrace_module_symbols_t* module_symbols;
17990 uint64_t dtmodsyms_count;
fe8ab488
A
17991
17992 /*
17993 * Security restrictions make this operation illegal, if this is enabled DTrace
17994 * must refuse to provide any fbt probes.
17995 */
3e170ce0 17996 if (dtrace_fbt_probes_restricted()) {
fe8ab488
A
17997 cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");
17998 return (EPERM);
17999 }
18000
6d2010ae
A
18001 /*
18002 * Fail if the kernel symbol mode makes this operation illegal.
18003 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
18004 * for them without holding the dtrace_lock.
18005 */
18006 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
18007 dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
18008 cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode);
18009 return (EPERM);
18010 }
18011
18012 /*
18013 * Read the number of module symbols structs being passed in.
18014 */
18015 if (copyin(arg + offsetof(dtrace_module_symbols_t, dtmodsyms_count),
18016 &dtmodsyms_count,
18017 sizeof(dtmodsyms_count))) {
18018 cmn_err(CE_WARN, "failed to copyin dtmodsyms_count");
18019 return (EFAULT);
18020 }
18021
18022 /*
18023 * Range check the count. How much data can we pass around?
18024 * FIX ME!
18025 */
18026 if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) {
18027 cmn_err(CE_WARN, "dtmodsyms_count is not valid");
18028 return (EINVAL);
18029 }
18030
18031 /*
18032 * Allocate a correctly sized structure and copyin the data.
18033 */
18034 module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count);
18035 if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL)
18036 return (ENOMEM);
18037
18038 rval = 0;
18039
18040 /* NOTE! We can no longer exit this method via return */
18041 if (copyin(arg, module_symbols, module_symbols_size) != 0) {
39037602 18042 cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t");
6d2010ae
A
18043 rval = EFAULT;
18044 goto module_symbols_cleanup;
18045 }
18046
18047 /*
18048 * Check that the count didn't change between the first copyin and the second.
18049 */
18050 if (module_symbols->dtmodsyms_count != dtmodsyms_count) {
18051 rval = EINVAL;
18052 goto module_symbols_cleanup;
18053 }
18054
18055 /*
18056 * Find the modctl to add symbols to.
18057 */
18058 lck_mtx_lock(&dtrace_provider_lock);
18059 lck_mtx_lock(&mod_lock);
18060
18061 struct modctl* ctl = dtrace_modctl_list;
18062 while (ctl) {
fe8ab488
A
18063 /* Update the private probes bit */
18064 if (dtrace_provide_private_probes)
18065 ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES;
18066
6d2010ae 18067 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
d9a64523
A
18068 if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl) && memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) {
18069 dtrace_provider_t *prv;
18070 ctl->mod_user_symbols = module_symbols;
18071
18072 /*
18073 * We're going to call each providers per-module provide operation
18074 * specifying only this module.
18075 */
18076 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
18077 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
18078 /*
18079 * We gave every provider a chance to provide with the user syms, go ahead and clear them
18080 */
18081 ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */
6d2010ae
A
18082 }
18083 ctl = ctl->mod_next;
18084 }
18085
6d2010ae
A
18086 lck_mtx_unlock(&mod_lock);
18087 lck_mtx_unlock(&dtrace_provider_lock);
18088
18089 module_symbols_cleanup:
18090 /*
18091 * If we had to allocate struct memory, free it.
18092 */
18093 if (module_symbols != NULL) {
18094 kmem_free(module_symbols, module_symbols_size);
18095 }
18096
18097 return rval;
18098 }
fe8ab488
A
18099
18100 case DTRACEIOC_PROCWAITFOR: {
18101 dtrace_procdesc_t pdesc = {
3e170ce0 18102 .p_name = {0},
fe8ab488
A
18103 .p_pid = -1
18104 };
18105
18106 if ((rval = copyin(arg, &pdesc, sizeof(pdesc))) != 0)
18107 goto proc_waitfor_error;
18108
18109 if ((rval = dtrace_proc_waitfor(&pdesc)) != 0)
18110 goto proc_waitfor_error;
18111
18112 if ((rval = copyout(&pdesc, arg, sizeof(pdesc))) != 0)
18113 goto proc_waitfor_error;
18114
18115 return 0;
18116
18117 proc_waitfor_error:
18118 /* The process was suspended, revert this since the client will not do it. */
18119 if (pdesc.p_pid != -1) {
18120 proc_t *proc = proc_find(pdesc.p_pid);
18121 if (proc != PROC_NULL) {
18122 task_pidresume(proc->task);
18123 proc_rele(proc);
18124 }
18125 }
18126
18127 return rval;
18128 }
18129
18130 default:
18131 break;
b0d623f7
A
18132 }
18133
18134 return (ENOTTY);
18135}
b0d623f7 18136
fe8ab488
A
18137/*
18138 * APPLE NOTE: dtrace_detach not implemented
18139 */
b0d623f7
A
18140#if !defined(__APPLE__)
18141/*ARGSUSED*/
18142static int
18143dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
18144{
18145 dtrace_state_t *state;
18146
18147 switch (cmd) {
18148 case DDI_DETACH:
18149 break;
18150
18151 case DDI_SUSPEND:
18152 return (DDI_SUCCESS);
18153
18154 default:
18155 return (DDI_FAILURE);
18156 }
18157
18158 lck_mtx_lock(&cpu_lock);
18159 lck_mtx_lock(&dtrace_provider_lock);
18160 lck_mtx_lock(&dtrace_lock);
2d21ac55
A
18161
18162 ASSERT(dtrace_opens == 0);
18163
18164 if (dtrace_helpers > 0) {
2d21ac55 18165 lck_mtx_unlock(&dtrace_lock);
fe8ab488 18166 lck_mtx_unlock(&dtrace_provider_lock);
2d21ac55
A
18167 lck_mtx_unlock(&cpu_lock);
18168 return (DDI_FAILURE);
18169 }
18170
18171 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
2d21ac55 18172 lck_mtx_unlock(&dtrace_lock);
fe8ab488 18173 lck_mtx_unlock(&dtrace_provider_lock);
2d21ac55
A
18174 lck_mtx_unlock(&cpu_lock);
18175 return (DDI_FAILURE);
18176 }
18177
18178 dtrace_provider = NULL;
18179
18180 if ((state = dtrace_anon_grab()) != NULL) {
18181 /*
18182 * If there were ECBs on this state, the provider should
18183 * have not been allowed to detach; assert that there is
18184 * none.
18185 */
18186 ASSERT(state->dts_necbs == 0);
18187 dtrace_state_destroy(state);
18188
18189 /*
18190 * If we're being detached with anonymous state, we need to
18191 * indicate to the kernel debugger that DTrace is now inactive.
18192 */
18193 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
18194 }
18195
18196 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
18197 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
18198 dtrace_cpu_init = NULL;
18199 dtrace_helpers_cleanup = NULL;
18200 dtrace_helpers_fork = NULL;
18201 dtrace_cpustart_init = NULL;
18202 dtrace_cpustart_fini = NULL;
18203 dtrace_debugger_init = NULL;
18204 dtrace_debugger_fini = NULL;
18205 dtrace_kreloc_init = NULL;
18206 dtrace_kreloc_fini = NULL;
18207 dtrace_modload = NULL;
18208 dtrace_modunload = NULL;
18209
18210 lck_mtx_unlock(&cpu_lock);
18211
18212 if (dtrace_helptrace_enabled) {
18213 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
18214 dtrace_helptrace_buffer = NULL;
18215 }
18216
18217 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
18218 dtrace_probes = NULL;
18219 dtrace_nprobes = 0;
18220
d9a64523
A
18221 dtrace_hash_destroy(dtrace_strings);
18222 dtrace_hash_destroy(dtrace_byprov);
2d21ac55
A
18223 dtrace_hash_destroy(dtrace_bymod);
18224 dtrace_hash_destroy(dtrace_byfunc);
18225 dtrace_hash_destroy(dtrace_byname);
d9a64523
A
18226 dtrace_strings = NULL;
18227 dtrace_byprov = NULL;
2d21ac55
A
18228 dtrace_bymod = NULL;
18229 dtrace_byfunc = NULL;
18230 dtrace_byname = NULL;
18231
18232 kmem_cache_destroy(dtrace_state_cache);
2d21ac55
A
18233 vmem_destroy(dtrace_arena);
18234
18235 if (dtrace_toxrange != NULL) {
18236 kmem_free(dtrace_toxrange,
18237 dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
18238 dtrace_toxrange = NULL;
18239 dtrace_toxranges = 0;
18240 dtrace_toxranges_max = 0;
18241 }
18242
18243 ddi_remove_minor_node(dtrace_devi, NULL);
18244 dtrace_devi = NULL;
18245
18246 ddi_soft_state_fini(&dtrace_softstate);
18247
18248 ASSERT(dtrace_vtime_references == 0);
18249 ASSERT(dtrace_opens == 0);
18250 ASSERT(dtrace_retained == NULL);
18251
18252 lck_mtx_unlock(&dtrace_lock);
18253 lck_mtx_unlock(&dtrace_provider_lock);
18254
d9a64523 18255#ifdef illumos
2d21ac55
A
18256 /*
18257 * We don't destroy the task queue until after we have dropped our
18258 * locks (taskq_destroy() may block on running tasks). To prevent
18259 * attempting to do work after we have effectively detached but before
18260 * the task queue has been destroyed, all tasks dispatched via the
18261 * task queue must check that DTrace is still attached before
18262 * performing any operation.
18263 */
18264 taskq_destroy(dtrace_taskq);
18265 dtrace_taskq = NULL;
d9a64523 18266#endif
2d21ac55
A
18267
18268 return (DDI_SUCCESS);
18269}
fe8ab488 18270#endif /* __APPLE__ */
2d21ac55
A
18271
18272d_open_t _dtrace_open, helper_open;
18273d_close_t _dtrace_close, helper_close;
18274d_ioctl_t _dtrace_ioctl, helper_ioctl;
18275
18276int
18277_dtrace_open(dev_t dev, int flags, int devtype, struct proc *p)
18278{
18279#pragma unused(p)
18280 dev_t locdev = dev;
18281
18282 return dtrace_open( &locdev, flags, devtype, CRED());
18283}
18284
18285int
18286helper_open(dev_t dev, int flags, int devtype, struct proc *p)
18287{
18288#pragma unused(dev,flags,devtype,p)
18289 return 0;
18290}
18291
18292int
18293_dtrace_close(dev_t dev, int flags, int devtype, struct proc *p)
18294{
18295#pragma unused(p)
18296 return dtrace_close( dev, flags, devtype, CRED());
18297}
18298
18299int
18300helper_close(dev_t dev, int flags, int devtype, struct proc *p)
18301{
18302#pragma unused(dev,flags,devtype,p)
18303 return 0;
18304}
18305
18306int
18307_dtrace_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p)
18308{
18309#pragma unused(p)
18310 int err, rv = 0;
b0d623f7
A
18311 user_addr_t uaddrp;
18312
18313 if (proc_is64bit(p))
18314 uaddrp = *(user_addr_t *)data;
18315 else
18316 uaddrp = (user_addr_t) *(uint32_t *)data;
2d21ac55 18317
b0d623f7 18318 err = dtrace_ioctl(dev, cmd, uaddrp, fflag, CRED(), &rv);
2d21ac55 18319
b0d623f7 18320 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
2d21ac55
A
18321 if (err != 0) {
18322 ASSERT( (err & 0xfffff000) == 0 );
b0d623f7 18323 return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
2d21ac55
A
18324 } else if (rv != 0) {
18325 ASSERT( (rv & 0xfff00000) == 0 );
b0d623f7 18326 return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
2d21ac55
A
18327 } else
18328 return 0;
18329}
18330
18331int
18332helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p)
18333{
18334#pragma unused(dev,fflag,p)
18335 int err, rv = 0;
18336
b0d623f7
A
18337 err = dtrace_ioctl_helper(cmd, data, &rv);
18338 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
2d21ac55
A
18339 if (err != 0) {
18340 ASSERT( (err & 0xfffff000) == 0 );
b0d623f7 18341 return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
2d21ac55
A
18342 } else if (rv != 0) {
18343 ASSERT( (rv & 0xfff00000) == 0 );
b0d623f7 18344 return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
2d21ac55
A
18345 } else
18346 return 0;
18347}
18348
18349#define HELPER_MAJOR -24 /* let the kernel pick the device number */
18350
18351/*
18352 * A struct describing which functions will get invoked for certain
18353 * actions.
18354 */
18355static struct cdevsw helper_cdevsw =
18356{
18357 helper_open, /* open */
18358 helper_close, /* close */
18359 eno_rdwrt, /* read */
18360 eno_rdwrt, /* write */
18361 helper_ioctl, /* ioctl */
18362 (stop_fcn_t *)nulldev, /* stop */
18363 (reset_fcn_t *)nulldev, /* reset */
18364 NULL, /* tty's */
18365 eno_select, /* select */
18366 eno_mmap, /* mmap */
18367 eno_strat, /* strategy */
18368 eno_getc, /* getc */
18369 eno_putc, /* putc */
18370 0 /* type */
18371};
18372
18373static int helper_majdevno = 0;
18374
18375static int gDTraceInited = 0;
18376
18377void
18378helper_init( void )
18379{
18380 /*
18381 * Once the "helper" is initialized, it can take ioctl calls that use locks
18382 * and zones initialized in dtrace_init. Make certain dtrace_init was called
18383 * before us.
18384 */
18385
18386 if (!gDTraceInited) {
18387 panic("helper_init before dtrace_init\n");
18388 }
18389
18390 if (0 >= helper_majdevno)
18391 {
18392 helper_majdevno = cdevsw_add(HELPER_MAJOR, &helper_cdevsw);
18393
18394 if (helper_majdevno < 0) {
18395 printf("helper_init: failed to allocate a major number!\n");
18396 return;
18397 }
18398
18399 if (NULL == devfs_make_node( makedev(helper_majdevno, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666,
18400 DTRACEMNR_HELPER, 0 )) {
18401 printf("dtrace_init: failed to devfs_make_node for helper!\n");
18402 return;
18403 }
18404 } else
18405 panic("helper_init: called twice!\n");
18406}
18407
18408#undef HELPER_MAJOR
18409
2d21ac55
A
18410static int
18411dtrace_clone_func(dev_t dev, int action)
18412{
18413#pragma unused(dev)
18414
18415 if (action == DEVFS_CLONE_ALLOC) {
39037602 18416 return dtrace_state_reserve();
2d21ac55
A
18417 }
18418 else if (action == DEVFS_CLONE_FREE) {
18419 return 0;
18420 }
18421 else return -1;
18422}
18423
39037602
A
18424void dtrace_ast(void);
18425
18426void
18427dtrace_ast(void)
18428{
18429 int i;
18430 uint32_t clients = atomic_and_32(&dtrace_wake_clients, 0);
18431 if (clients == 0)
18432 return;
18433 /**
18434 * We disable preemption here to be sure that we won't get
18435 * interrupted by a wakeup to a thread that is higher
18436 * priority than us, so that we do issue all wakeups
18437 */
18438 disable_preemption();
18439 for (i = 0; i < DTRACE_NCLIENTS; i++) {
18440 if (clients & (1 << i)) {
18441 dtrace_state_t *state = dtrace_state_get(i);
18442 if (state) {
18443 wakeup(state);
18444 }
18445
18446 }
18447 }
18448 enable_preemption();
18449}
18450
18451
2d21ac55
A
18452#define DTRACE_MAJOR -24 /* let the kernel pick the device number */
18453
18454static struct cdevsw dtrace_cdevsw =
18455{
18456 _dtrace_open, /* open */
18457 _dtrace_close, /* close */
18458 eno_rdwrt, /* read */
18459 eno_rdwrt, /* write */
18460 _dtrace_ioctl, /* ioctl */
18461 (stop_fcn_t *)nulldev, /* stop */
18462 (reset_fcn_t *)nulldev, /* reset */
18463 NULL, /* tty's */
18464 eno_select, /* select */
18465 eno_mmap, /* mmap */
18466 eno_strat, /* strategy */
18467 eno_getc, /* getc */
18468 eno_putc, /* putc */
18469 0 /* type */
18470};
18471
18472lck_attr_t* dtrace_lck_attr;
18473lck_grp_attr_t* dtrace_lck_grp_attr;
18474lck_grp_t* dtrace_lck_grp;
18475
18476static int gMajDevNo;
18477
d9a64523
A
18478void dtrace_early_init (void)
18479{
18480 dtrace_restriction_policy_load();
18481
18482 /*
18483 * See dtrace_impl.h for a description of kernel symbol modes.
18484 * The default is to wait for symbols from userspace (lazy symbols).
18485 */
18486 if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) {
18487 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
18488 }
18489}
18490
2d21ac55
A
18491void
18492dtrace_init( void )
18493{
18494 if (0 == gDTraceInited) {
39236c6e 18495 int i, ncpu;
fe8ab488 18496 size_t size = sizeof(dtrace_buffer_memory_maxsize);
2d21ac55 18497
39236c6e
A
18498 /*
18499 * DTrace allocates buffers based on the maximum number
18500 * of enabled cpus. This call avoids any race when finding
18501 * that count.
18502 */
18503 ASSERT(dtrace_max_cpus == 0);
18504 ncpu = dtrace_max_cpus = ml_get_max_cpus();
fe8ab488
A
18505
18506 /*
18507 * Retrieve the size of the physical memory in order to define
18508 * the state buffer memory maximal size. If we cannot retrieve
18509 * this value, we'll consider that we have 1Gb of memory per CPU, that's
18510 * still better than raising a kernel panic.
18511 */
18512 if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize,
18513 &size, NULL, 0))
18514 {
18515 dtrace_buffer_memory_maxsize = ncpu * 1024 * 1024 * 1024;
18516 printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n",
18517 dtrace_buffer_memory_maxsize);
18518 }
18519
18520 /*
18521 * Finally, divide by three to prevent DTrace from eating too
18522 * much memory.
18523 */
18524 dtrace_buffer_memory_maxsize /= 3;
18525 ASSERT(dtrace_buffer_memory_maxsize > 0);
18526
2d21ac55
A
18527 gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw);
18528
18529 if (gMajDevNo < 0) {
18530 printf("dtrace_init: failed to allocate a major number!\n");
18531 gDTraceInited = 0;
18532 return;
18533 }
18534
18535 if (NULL == devfs_make_node_clone( makedev(gMajDevNo, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666,
18536 dtrace_clone_func, DTRACEMNR_DTRACE, 0 )) {
18537 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
18538 gDTraceInited = 0;
18539 return;
18540 }
18541
2d21ac55
A
18542 /*
18543 * Allocate the dtrace_probe_t zone
18544 */
18545 dtrace_probe_t_zone = zinit(sizeof(dtrace_probe_t),
18546 1024 * sizeof(dtrace_probe_t),
18547 sizeof(dtrace_probe_t),
18548 "dtrace.dtrace_probe_t");
18549
18550 /*
18551 * Create the dtrace lock group and attrs.
18552 */
18553 dtrace_lck_attr = lck_attr_alloc_init();
18554 dtrace_lck_grp_attr= lck_grp_attr_alloc_init();
18555 dtrace_lck_grp = lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr);
18556
18557 /*
18558 * We have to initialize all locks explicitly
18559 */
18560 lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr);
18561 lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr);
18562 lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr);
fe8ab488 18563 lck_mtx_init(&dtrace_procwaitfor_lock, dtrace_lck_grp, dtrace_lck_attr);
b0d623f7 18564#if DEBUG
2d21ac55
A
18565 lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr);
18566#endif
18567 lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr);
18568
18569 /*
18570 * The cpu_core structure consists of per-CPU state available in any context.
18571 * On some architectures, this may mean that the page(s) containing the
18572 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
18573 * is up to the platform to assure that this is performed properly. Note that
18574 * the structure is sized to avoid false sharing.
18575 */
18576 lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr);
fe8ab488 18577 lck_mtx_init(&cyc_lock, dtrace_lck_grp, dtrace_lck_attr);
2d21ac55
A
18578 lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr);
18579
fe8ab488
A
18580 /*
18581 * Initialize the CPU offline/online hooks.
18582 */
18583 dtrace_install_cpu_hooks();
18584
6d2010ae
A
18585 dtrace_modctl_list = NULL;
18586
2d21ac55
A
18587 cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP );
18588 for (i = 0; i < ncpu; ++i) {
18589 lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr);
18590 }
18591
6d2010ae 18592 cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP );
2d21ac55
A
18593 for (i = 0; i < ncpu; ++i) {
18594 cpu_list[i].cpu_id = (processorid_t)i;
18595 cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]);
fe8ab488 18596 LIST_INIT(&cpu_list[i].cpu_cyc_list);
2d21ac55
A
18597 lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr);
18598 }
18599
18600 lck_mtx_lock(&cpu_lock);
18601 for (i = 0; i < ncpu; ++i)
39037602 18602 /* FIXME: track CPU configuration */
2d21ac55
A
18603 dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */
18604 lck_mtx_unlock(&cpu_lock);
18605
18606 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
18607
d9a64523
A
18608 dtrace_strings = dtrace_hash_create(dtrace_strkey_offset,
18609 offsetof(dtrace_string_t, dtst_str),
18610 offsetof(dtrace_string_t, dtst_next),
18611 offsetof(dtrace_string_t, dtst_prev));
18612
316670eb 18613 dtrace_isa_init();
2d21ac55
A
18614 /*
18615 * See dtrace_impl.h for a description of dof modes.
18616 * The default is lazy dof.
18617 *
b0d623f7 18618 * FIXME: Warn if state is LAZY_OFF? It won't break anything, but
2d21ac55
A
18619 * makes no sense...
18620 */
593a1d5f 18621 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) {
5ba3f43e
A
18622#if CONFIG_EMBEDDED
18623 /* Disable DOF mode by default for performance reasons */
18624 dtrace_dof_mode = DTRACE_DOF_MODE_NEVER;
18625#else
2d21ac55 18626 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
5ba3f43e 18627#endif
2d21ac55
A
18628 }
18629
18630 /*
18631 * Sanity check of dof mode value.
18632 */
18633 switch (dtrace_dof_mode) {
18634 case DTRACE_DOF_MODE_NEVER:
18635 case DTRACE_DOF_MODE_LAZY_ON:
18636 /* valid modes, but nothing else we need to do */
18637 break;
18638
18639 case DTRACE_DOF_MODE_LAZY_OFF:
18640 case DTRACE_DOF_MODE_NON_LAZY:
18641 /* Cannot wait for a dtrace_open to init fasttrap */
18642 fasttrap_init();
18643 break;
18644
18645 default:
18646 /* Invalid, clamp to non lazy */
18647 dtrace_dof_mode = DTRACE_DOF_MODE_NON_LAZY;
18648 fasttrap_init();
18649 break;
18650 }
18651
18652 gDTraceInited = 1;
18653
18654 } else
18655 panic("dtrace_init: called twice!\n");
18656}
18657
18658void
18659dtrace_postinit(void)
18660{
6d2010ae
A
18661 /*
18662 * Called from bsd_init after all provider's *_init() routines have been
18663 * run. That way, anonymous DOF enabled under dtrace_attach() is safe
18664 * to go.
18665 */
d9a64523 18666 dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0)); /* Punning a dev_t to a dev_info_t* */
6d2010ae
A
18667
18668 /*
18669 * Add the mach_kernel to the module list for lazy processing
18670 */
18671 struct kmod_info fake_kernel_kmod;
18672 memset(&fake_kernel_kmod, 0, sizeof(fake_kernel_kmod));
18673
18674 strlcpy(fake_kernel_kmod.name, "mach_kernel", sizeof(fake_kernel_kmod.name));
18675 fake_kernel_kmod.id = 1;
18676 fake_kernel_kmod.address = g_kernel_kmod_info.address;
18677 fake_kernel_kmod.size = g_kernel_kmod_info.size;
18678
316670eb 18679 if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) {
6d2010ae
A
18680 printf("dtrace_postinit: Could not register mach_kernel modctl\n");
18681 }
5ba3f43e
A
18682
18683 if (!PE_parse_boot_argn("dtrace_provide_private_probes", &dtrace_provide_private_probes, sizeof (dtrace_provide_private_probes))) {
18684 dtrace_provide_private_probes = 0;
18685 }
6d2010ae
A
18686
18687 (void)OSKextRegisterKextsWithDTrace();
2d21ac55
A
18688}
18689#undef DTRACE_MAJOR
18690
18691/*
18692 * Routines used to register interest in cpu's being added to or removed
18693 * from the system.
18694 */
18695void
18696register_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2)
18697{
18698#pragma unused(ignore1,ignore2)
18699}
18700
18701void
18702unregister_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2)
18703{
18704#pragma unused(ignore1,ignore2)
18705}