]> git.saurik.com Git - apple/xnu.git/blame - bsd/dev/dtrace/dtrace.c
xnu-3789.41.3.tar.gz
[apple/xnu.git] / bsd / dev / dtrace / dtrace.c
CommitLineData
2d21ac55
A
1/*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
39236c6e 22/*
39037602 23 * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved.
3e170ce0 24 * Portions Copyright (c) 2013 by Delphix. All rights reserved.
39236c6e
A
25 */
26
2d21ac55 27/*
6d2010ae 28 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
2d21ac55
A
29 * Use is subject to license terms.
30 */
31
b0d623f7 32/* #pragma ident "@(#)dtrace.c 1.65 08/07/02 SMI" */
2d21ac55
A
33
34/*
35 * DTrace - Dynamic Tracing for Solaris
36 *
37 * This is the implementation of the Solaris Dynamic Tracing framework
38 * (DTrace). The user-visible interface to DTrace is described at length in
39 * the "Solaris Dynamic Tracing Guide". The interfaces between the libdtrace
40 * library, the in-kernel DTrace framework, and the DTrace providers are
41 * described in the block comments in the <sys/dtrace.h> header file. The
42 * internal architecture of DTrace is described in the block comments in the
43 * <sys/dtrace_impl.h> header file. The comments contained within the DTrace
44 * implementation very much assume mastery of all of these sources; if one has
45 * an unanswered question about the implementation, one should consult them
46 * first.
47 *
48 * The functions here are ordered roughly as follows:
49 *
50 * - Probe context functions
51 * - Probe hashing functions
52 * - Non-probe context utility functions
53 * - Matching functions
54 * - Provider-to-Framework API functions
55 * - Probe management functions
56 * - DIF object functions
57 * - Format functions
58 * - Predicate functions
59 * - ECB functions
60 * - Buffer functions
61 * - Enabling functions
62 * - DOF functions
63 * - Anonymous enabling functions
39037602 64 * - Process functions
2d21ac55
A
65 * - Consumer state functions
66 * - Helper functions
67 * - Hook functions
68 * - Driver cookbook functions
69 *
70 * Each group of functions begins with a block comment labelled the "DTrace
71 * [Group] Functions", allowing one to find each block by searching forward
72 * on capital-f functions.
73 */
2d21ac55
A
74#include <sys/errno.h>
75#include <sys/types.h>
76#include <sys/stat.h>
77#include <sys/conf.h>
78#include <sys/systm.h>
79#include <sys/dtrace_impl.h>
80#include <sys/param.h>
6d2010ae 81#include <sys/proc_internal.h>
2d21ac55
A
82#include <sys/ioctl.h>
83#include <sys/fcntl.h>
84#include <miscfs/devfs/devfs.h>
85#include <sys/malloc.h>
86#include <sys/kernel_types.h>
87#include <sys/proc_internal.h>
88#include <sys/uio_internal.h>
89#include <sys/kauth.h>
90#include <vm/pmap.h>
91#include <sys/user.h>
92#include <mach/exception_types.h>
93#include <sys/signalvar.h>
6d2010ae 94#include <mach/task.h>
2d21ac55 95#include <kern/zalloc.h>
b0d623f7 96#include <kern/ast.h>
39037602 97#include <kern/sched_prim.h>
fe8ab488 98#include <kern/task.h>
b0d623f7 99#include <netinet/in.h>
39037602
A
100#include <libkern/sysctl.h>
101#include <sys/kdebug.h>
b0d623f7 102
6d2010ae 103#include <kern/cpu_data.h>
b0d623f7
A
104extern uint32_t pmap_find_phys(void *, uint64_t);
105extern boolean_t pmap_valid_page(uint32_t);
6d2010ae
A
106extern void OSKextRegisterKextsWithDTrace(void);
107extern kmod_info_t g_kernel_kmod_info;
b0d623f7
A
108
109/* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */
110#define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */
2d21ac55
A
111
112#define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */
113
114extern void dtrace_suspend(void);
115extern void dtrace_resume(void);
116extern void dtrace_init(void);
117extern void helper_init(void);
b0d623f7 118extern void fasttrap_init(void);
39037602
A
119
120static int dtrace_lazy_dofs_duplicate(proc_t *, proc_t *);
b0d623f7
A
121extern void dtrace_lazy_dofs_destroy(proc_t *);
122extern void dtrace_postinit(void);
2d21ac55 123
39037602
A
124extern void dtrace_proc_fork(proc_t*, proc_t*, int);
125extern void dtrace_proc_exec(proc_t*);
126extern void dtrace_proc_exit(proc_t*);
2d21ac55
A
127/*
128 * DTrace Tunable Variables
129 *
fe8ab488
A
130 * The following variables may be dynamically tuned by using sysctl(8), the
131 * variables being stored in the kern.dtrace namespace. For example:
132 * sysctl kern.dtrace.dof_maxsize = 1048575 # 1M
2d21ac55
A
133 *
134 * In general, the only variables that one should be tuning this way are those
135 * that affect system-wide DTrace behavior, and for which the default behavior
136 * is undesirable. Most of these variables are tunable on a per-consumer
137 * basis using DTrace options, and need not be tuned on a system-wide basis.
138 * When tuning these variables, avoid pathological values; while some attempt
139 * is made to verify the integrity of these variables, they are not considered
140 * part of the supported interface to DTrace, and they are therefore not
fe8ab488 141 * checked comprehensively.
2d21ac55 142 */
fe8ab488
A
143uint64_t dtrace_buffer_memory_maxsize = 0; /* initialized in dtrace_init */
144uint64_t dtrace_buffer_memory_inuse = 0;
2d21ac55 145int dtrace_destructive_disallow = 0;
2d21ac55
A
146dtrace_optval_t dtrace_nonroot_maxsize = (16 * 1024 * 1024);
147size_t dtrace_difo_maxsize = (256 * 1024);
b0d623f7 148dtrace_optval_t dtrace_dof_maxsize = (384 * 1024);
ecc0ceb4
A
149dtrace_optval_t dtrace_statvar_maxsize = (16 * 1024);
150dtrace_optval_t dtrace_statvar_maxsize_max = (16 * 10 * 1024);
2d21ac55
A
151size_t dtrace_actions_max = (16 * 1024);
152size_t dtrace_retain_max = 1024;
153dtrace_optval_t dtrace_helper_actions_max = 32;
6d2010ae 154dtrace_optval_t dtrace_helper_providers_max = 64;
2d21ac55
A
155dtrace_optval_t dtrace_dstate_defsize = (1 * 1024 * 1024);
156size_t dtrace_strsize_default = 256;
39037602
A
157dtrace_optval_t dtrace_strsize_min = 8;
158dtrace_optval_t dtrace_strsize_max = 65536;
39236c6e
A
159dtrace_optval_t dtrace_cleanrate_default = 990099000; /* 1.1 hz */
160dtrace_optval_t dtrace_cleanrate_min = 20000000; /* 50 hz */
2d21ac55
A
161dtrace_optval_t dtrace_cleanrate_max = (uint64_t)60 * NANOSEC; /* 1/minute */
162dtrace_optval_t dtrace_aggrate_default = NANOSEC; /* 1 hz */
163dtrace_optval_t dtrace_statusrate_default = NANOSEC; /* 1 hz */
164dtrace_optval_t dtrace_statusrate_max = (hrtime_t)10 * NANOSEC; /* 6/minute */
165dtrace_optval_t dtrace_switchrate_default = NANOSEC; /* 1 hz */
166dtrace_optval_t dtrace_nspec_default = 1;
167dtrace_optval_t dtrace_specsize_default = 32 * 1024;
168dtrace_optval_t dtrace_stackframes_default = 20;
169dtrace_optval_t dtrace_ustackframes_default = 20;
170dtrace_optval_t dtrace_jstackframes_default = 50;
171dtrace_optval_t dtrace_jstackstrsize_default = 512;
39037602
A
172dtrace_optval_t dtrace_buflimit_default = 75;
173dtrace_optval_t dtrace_buflimit_min = 1;
174dtrace_optval_t dtrace_buflimit_max = 99;
2d21ac55
A
175int dtrace_msgdsize_max = 128;
176hrtime_t dtrace_chill_max = 500 * (NANOSEC / MILLISEC); /* 500 ms */
177hrtime_t dtrace_chill_interval = NANOSEC; /* 1000 ms */
178int dtrace_devdepth_max = 32;
179int dtrace_err_verbose;
fe8ab488 180int dtrace_provide_private_probes = 0;
2d21ac55
A
181hrtime_t dtrace_deadman_interval = NANOSEC;
182hrtime_t dtrace_deadman_timeout = (hrtime_t)10 * NANOSEC;
183hrtime_t dtrace_deadman_user = (hrtime_t)30 * NANOSEC;
184
185/*
186 * DTrace External Variables
187 *
188 * As dtrace(7D) is a kernel module, any DTrace variables are obviously
189 * available to DTrace consumers via the backtick (`) syntax. One of these,
190 * dtrace_zero, is made deliberately so: it is provided as a source of
191 * well-known, zero-filled memory. While this variable is not documented,
192 * it is used by some translators as an implementation detail.
193 */
194const char dtrace_zero[256] = { 0 }; /* zero-filled memory */
39236c6e 195unsigned int dtrace_max_cpus = 0; /* number of enabled cpus */
2d21ac55
A
196/*
197 * DTrace Internal Variables
198 */
199static dev_info_t *dtrace_devi; /* device info */
200static vmem_t *dtrace_arena; /* probe ID arena */
2d21ac55
A
201static taskq_t *dtrace_taskq; /* task queue */
202static dtrace_probe_t **dtrace_probes; /* array of all probes */
203static int dtrace_nprobes; /* number of probes */
204static dtrace_provider_t *dtrace_provider; /* provider list */
205static dtrace_meta_t *dtrace_meta_pid; /* user-land meta provider */
206static int dtrace_opens; /* number of opens */
207static int dtrace_helpers; /* number of helpers */
2d21ac55
A
208static dtrace_hash_t *dtrace_bymod; /* probes hashed by module */
209static dtrace_hash_t *dtrace_byfunc; /* probes hashed by function */
210static dtrace_hash_t *dtrace_byname; /* probes hashed by name */
211static dtrace_toxrange_t *dtrace_toxrange; /* toxic range array */
212static int dtrace_toxranges; /* number of toxic ranges */
213static int dtrace_toxranges_max; /* size of toxic range array */
214static dtrace_anon_t dtrace_anon; /* anonymous enabling */
215static kmem_cache_t *dtrace_state_cache; /* cache for dynamic state */
216static uint64_t dtrace_vtime_references; /* number of vtimestamp refs */
217static kthread_t *dtrace_panicked; /* panicking thread */
218static dtrace_ecb_t *dtrace_ecb_create_cache; /* cached created ECB */
219static dtrace_genid_t dtrace_probegen; /* current probe generation */
220static dtrace_helpers_t *dtrace_deferred_pid; /* deferred helper list */
221static dtrace_enabling_t *dtrace_retained; /* list of retained enablings */
b0d623f7 222static dtrace_genid_t dtrace_retained_gen; /* current retained enab gen */
2d21ac55 223static dtrace_dynvar_t dtrace_dynhash_sink; /* end of dynamic hash chains */
fe8ab488 224
b0d623f7 225static int dtrace_dof_mode; /* See dtrace_impl.h for a description of Darwin's dof modes. */
6d2010ae
A
226
227 /*
228 * This does't quite fit as an internal variable, as it must be accessed in
229 * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either...
230 */
231int dtrace_kernel_symbol_mode; /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */
39037602 232static uint32_t dtrace_wake_clients;
2d21ac55 233
fe8ab488 234
2d21ac55
A
235/*
236 * To save memory, some common memory allocations are given a
b0d623f7 237 * unique zone. For example, dtrace_probe_t is 72 bytes in size,
2d21ac55
A
238 * which means it would fall into the kalloc.128 bucket. With
239 * 20k elements allocated, the space saved is substantial.
240 */
241
242struct zone *dtrace_probe_t_zone;
6d2010ae
A
243
244static int dtrace_module_unloaded(struct kmod_info *kmod);
2d21ac55
A
245
246/*
247 * DTrace Locking
248 * DTrace is protected by three (relatively coarse-grained) locks:
249 *
250 * (1) dtrace_lock is required to manipulate essentially any DTrace state,
251 * including enabling state, probes, ECBs, consumer state, helper state,
252 * etc. Importantly, dtrace_lock is _not_ required when in probe context;
253 * probe context is lock-free -- synchronization is handled via the
254 * dtrace_sync() cross call mechanism.
255 *
256 * (2) dtrace_provider_lock is required when manipulating provider state, or
257 * when provider state must be held constant.
258 *
259 * (3) dtrace_meta_lock is required when manipulating meta provider state, or
260 * when meta provider state must be held constant.
261 *
262 * The lock ordering between these three locks is dtrace_meta_lock before
263 * dtrace_provider_lock before dtrace_lock. (In particular, there are
264 * several places where dtrace_provider_lock is held by the framework as it
265 * calls into the providers -- which then call back into the framework,
266 * grabbing dtrace_lock.)
267 *
268 * There are two other locks in the mix: mod_lock and cpu_lock. With respect
269 * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical
270 * role as a coarse-grained lock; it is acquired before both of these locks.
271 * With respect to dtrace_meta_lock, its behavior is stranger: cpu_lock must
272 * be acquired _between_ dtrace_meta_lock and any other DTrace locks.
273 * mod_lock is similar with respect to dtrace_provider_lock in that it must be
274 * acquired _between_ dtrace_provider_lock and dtrace_lock.
275 */
276
fe8ab488 277
2d21ac55
A
278/*
279 * APPLE NOTE:
280 *
fe8ab488
A
281 * For porting purposes, all kmutex_t vars have been changed
282 * to lck_mtx_t, which require explicit initialization.
2d21ac55 283 *
fe8ab488 284 * kmutex_t becomes lck_mtx_t
2d21ac55
A
285 * mutex_enter() becomes lck_mtx_lock()
286 * mutex_exit() becomes lck_mtx_unlock()
287 *
288 * Lock asserts are changed like this:
289 *
290 * ASSERT(MUTEX_HELD(&cpu_lock));
291 * becomes:
292 * lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
293 *
2d21ac55
A
294 */
295static lck_mtx_t dtrace_lock; /* probe state lock */
296static lck_mtx_t dtrace_provider_lock; /* provider state lock */
297static lck_mtx_t dtrace_meta_lock; /* meta-provider state lock */
2d21ac55 298static lck_rw_t dtrace_dof_mode_lock; /* dof mode lock */
2d21ac55
A
299
300/*
301 * DTrace Provider Variables
302 *
303 * These are the variables relating to DTrace as a provider (that is, the
304 * provider of the BEGIN, END, and ERROR probes).
305 */
306static dtrace_pattr_t dtrace_provider_attr = {
307{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
308{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
309{ DTRACE_STABILITY_PRIVATE, DTRACE_STABILITY_PRIVATE, DTRACE_CLASS_UNKNOWN },
310{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
311{ DTRACE_STABILITY_STABLE, DTRACE_STABILITY_STABLE, DTRACE_CLASS_COMMON },
312};
313
314static void
315dtrace_nullop(void)
316{}
317
6d2010ae
A
318static int
319dtrace_enable_nullop(void)
320{
321 return (0);
322}
323
2d21ac55
A
324static dtrace_pops_t dtrace_provider_ops = {
325 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop,
326 (void (*)(void *, struct modctl *))dtrace_nullop,
6d2010ae 327 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop,
2d21ac55
A
328 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
329 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
330 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop,
331 NULL,
332 NULL,
333 NULL,
334 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop
335};
336
337static dtrace_id_t dtrace_probeid_begin; /* special BEGIN probe */
338static dtrace_id_t dtrace_probeid_end; /* special END probe */
339dtrace_id_t dtrace_probeid_error; /* special ERROR probe */
340
341/*
342 * DTrace Helper Tracing Variables
343 */
344uint32_t dtrace_helptrace_next = 0;
345uint32_t dtrace_helptrace_nlocals;
346char *dtrace_helptrace_buffer;
b0d623f7 347size_t dtrace_helptrace_bufsize = 512 * 1024;
2d21ac55 348
b0d623f7 349#if DEBUG
2d21ac55
A
350int dtrace_helptrace_enabled = 1;
351#else
352int dtrace_helptrace_enabled = 0;
353#endif
354
fe8ab488 355
2d21ac55
A
356/*
357 * DTrace Error Hashing
358 *
359 * On DEBUG kernels, DTrace will track the errors that has seen in a hash
360 * table. This is very useful for checking coverage of tests that are
361 * expected to induce DIF or DOF processing errors, and may be useful for
362 * debugging problems in the DIF code generator or in DOF generation . The
363 * error hash may be examined with the ::dtrace_errhash MDB dcmd.
364 */
b0d623f7 365#if DEBUG
2d21ac55
A
366static dtrace_errhash_t dtrace_errhash[DTRACE_ERRHASHSZ];
367static const char *dtrace_errlast;
368static kthread_t *dtrace_errthread;
369static lck_mtx_t dtrace_errlock;
370#endif
371
372/*
373 * DTrace Macros and Constants
374 *
375 * These are various macros that are useful in various spots in the
376 * implementation, along with a few random constants that have no meaning
377 * outside of the implementation. There is no real structure to this cpp
378 * mishmash -- but is there ever?
379 */
380#define DTRACE_HASHSTR(hash, probe) \
381 dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs)))
382
383#define DTRACE_HASHNEXT(hash, probe) \
384 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs)
385
386#define DTRACE_HASHPREV(hash, probe) \
387 (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs)
388
389#define DTRACE_HASHEQ(hash, lhs, rhs) \
390 (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \
391 *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0)
392
393#define DTRACE_AGGHASHSIZE_SLEW 17
394
b0d623f7
A
395#define DTRACE_V4MAPPED_OFFSET (sizeof (uint32_t) * 3)
396
2d21ac55
A
397/*
398 * The key for a thread-local variable consists of the lower 61 bits of the
fe8ab488 399 * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL.
2d21ac55
A
400 * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never
401 * equal to a variable identifier. This is necessary (but not sufficient) to
402 * assure that global associative arrays never collide with thread-local
403 * variables. To guarantee that they cannot collide, we must also define the
404 * order for keying dynamic variables. That order is:
405 *
406 * [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ]
407 *
408 * Because the variable-key and the tls-key are in orthogonal spaces, there is
409 * no way for a global variable key signature to match a thread-local key
410 * signature.
411 */
39236c6e 412#if defined (__x86_64__)
b0d623f7
A
413/* FIXME: two function calls!! */
414#define DTRACE_TLS_THRKEY(where) { \
415 uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \
416 uint64_t thr = (uintptr_t)current_thread(); \
417 ASSERT(intr < (1 << 3)); \
418 (where) = ((thr + DIF_VARIABLE_MAX) & \
419 (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \
420}
2d21ac55 421#else
39236c6e 422#error Unknown architecture
b0d623f7 423#endif
2d21ac55 424
b0d623f7
A
425#define DT_BSWAP_8(x) ((x) & 0xff)
426#define DT_BSWAP_16(x) ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8))
427#define DT_BSWAP_32(x) ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16))
428#define DT_BSWAP_64(x) ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32))
429
430#define DT_MASK_LO 0x00000000FFFFFFFFULL
431
2d21ac55
A
432#define DTRACE_STORE(type, tomax, offset, what) \
433 *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what);
434
39236c6e 435
b0d623f7
A
436#define DTRACE_ALIGNCHECK(addr, size, flags) \
437 if (addr & (MIN(size,4) - 1)) { \
438 *flags |= CPU_DTRACE_BADALIGN; \
439 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
440 return (0); \
441 }
b0d623f7 442
39037602
A
443#define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz) \
444do { \
445 if ((remp) != NULL) { \
446 *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr); \
447 } \
448} while (0)
449
450
b0d623f7
A
451/*
452 * Test whether a range of memory starting at testaddr of size testsz falls
453 * within the range of memory described by addr, sz. We take care to avoid
454 * problems with overflow and underflow of the unsigned quantities, and
455 * disallow all negative sizes. Ranges of size 0 are allowed.
456 */
457#define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \
458 ((testaddr) - (baseaddr) < (basesz) && \
459 (testaddr) + (testsz) - (baseaddr) <= (basesz) && \
460 (testaddr) + (testsz) >= (testaddr))
461
462/*
463 * Test whether alloc_sz bytes will fit in the scratch region. We isolate
464 * alloc_sz on the righthand side of the comparison in order to avoid overflow
465 * or underflow in the comparison with it. This is simpler than the INRANGE
466 * check above, because we know that the dtms_scratch_ptr is valid in the
467 * range. Allocations of size zero are allowed.
468 */
469#define DTRACE_INSCRATCH(mstate, alloc_sz) \
470 ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \
471 (mstate)->dtms_scratch_ptr >= (alloc_sz))
2d21ac55 472
6d2010ae 473#define RECOVER_LABEL(bits) dtraceLoadRecover##bits:
2d21ac55 474
39037602 475#if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__))
2d21ac55
A
476#define DTRACE_LOADFUNC(bits) \
477/*CSTYLED*/ \
2d21ac55
A
478uint##bits##_t dtrace_load##bits(uintptr_t addr); \
479 \
480uint##bits##_t \
481dtrace_load##bits(uintptr_t addr) \
482{ \
483 size_t size = bits / NBBY; \
484 /*CSTYLED*/ \
485 uint##bits##_t rval = 0; \
486 int i; \
2d21ac55
A
487 volatile uint16_t *flags = (volatile uint16_t *) \
488 &cpu_core[CPU->cpu_id].cpuc_dtrace_flags; \
489 \
490 DTRACE_ALIGNCHECK(addr, size, flags); \
491 \
492 for (i = 0; i < dtrace_toxranges; i++) { \
493 if (addr >= dtrace_toxrange[i].dtt_limit) \
494 continue; \
495 \
496 if (addr + size <= dtrace_toxrange[i].dtt_base) \
497 continue; \
498 \
499 /* \
500 * This address falls within a toxic region; return 0. \
501 */ \
502 *flags |= CPU_DTRACE_BADADDR; \
503 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
504 return (0); \
505 } \
506 \
b0d623f7 507 { \
6d2010ae 508 volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits; \
b0d623f7
A
509 *flags |= CPU_DTRACE_NOFAULT; \
510 recover = dtrace_set_thread_recover(current_thread(), recover); \
511 /*CSTYLED*/ \
512 /* \
513 * PR6394061 - avoid device memory that is unpredictably \
514 * mapped and unmapped \
515 */ \
516 if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr))) \
517 rval = *((volatile uint##bits##_t *)addr); \
39037602
A
518 else { \
519 *flags |= CPU_DTRACE_BADADDR; \
520 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr; \
521 return (0); \
522 } \
523 \
b0d623f7
A
524 RECOVER_LABEL(bits); \
525 (void)dtrace_set_thread_recover(current_thread(), recover); \
526 *flags &= ~CPU_DTRACE_NOFAULT; \
527 } \
528 \
529 return (rval); \
530}
531#else /* all other architectures */
39236c6e 532#error Unknown Architecture
b0d623f7 533#endif
2d21ac55 534
2d21ac55
A
535#ifdef __LP64__
536#define dtrace_loadptr dtrace_load64
537#else
538#define dtrace_loadptr dtrace_load32
539#endif
540
541#define DTRACE_DYNHASH_FREE 0
542#define DTRACE_DYNHASH_SINK 1
543#define DTRACE_DYNHASH_VALID 2
544
6d2010ae 545#define DTRACE_MATCH_FAIL -1
2d21ac55
A
546#define DTRACE_MATCH_NEXT 0
547#define DTRACE_MATCH_DONE 1
548#define DTRACE_ANCHORED(probe) ((probe)->dtpr_func[0] != '\0')
549#define DTRACE_STATE_ALIGN 64
550
551#define DTRACE_FLAGS2FLT(flags) \
552 (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR : \
553 ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP : \
554 ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO : \
555 ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV : \
556 ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV : \
557 ((flags) & CPU_DTRACE_TUPOFLOW) ? DTRACEFLT_TUPOFLOW : \
558 ((flags) & CPU_DTRACE_BADALIGN) ? DTRACEFLT_BADALIGN : \
559 ((flags) & CPU_DTRACE_NOSCRATCH) ? DTRACEFLT_NOSCRATCH : \
b0d623f7 560 ((flags) & CPU_DTRACE_BADSTACK) ? DTRACEFLT_BADSTACK : \
2d21ac55
A
561 DTRACEFLT_UNKNOWN)
562
563#define DTRACEACT_ISSTRING(act) \
564 ((act)->dta_kind == DTRACEACT_DIFEXPR && \
565 (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING)
566
b0d623f7 567
b0d623f7 568static size_t dtrace_strlen(const char *, size_t);
2d21ac55
A
569static dtrace_probe_t *dtrace_probe_lookup_id(dtrace_id_t id);
570static void dtrace_enabling_provide(dtrace_provider_t *);
39037602
A
571static int dtrace_enabling_match(dtrace_enabling_t *, int *, dtrace_match_cond_t *cond);
572static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond);
2d21ac55
A
573static void dtrace_enabling_matchall(void);
574static dtrace_state_t *dtrace_anon_grab(void);
575static uint64_t dtrace_helper(int, dtrace_mstate_t *,
576 dtrace_state_t *, uint64_t, uint64_t);
577static dtrace_helpers_t *dtrace_helpers_create(proc_t *);
578static void dtrace_buffer_drop(dtrace_buffer_t *);
579static intptr_t dtrace_buffer_reserve(dtrace_buffer_t *, size_t, size_t,
580 dtrace_state_t *, dtrace_mstate_t *);
581static int dtrace_state_option(dtrace_state_t *, dtrace_optid_t,
582 dtrace_optval_t);
d190cdc3 583static int dtrace_ecb_create_enable(dtrace_probe_t *, void *, void *);
2d21ac55 584static void dtrace_helper_provider_destroy(dtrace_helper_provider_t *);
39037602
A
585static int dtrace_canload_remains(uint64_t, size_t, size_t *,
586 dtrace_mstate_t *, dtrace_vstate_t *);
587static int dtrace_canstore_remains(uint64_t, size_t, size_t *,
588 dtrace_mstate_t *, dtrace_vstate_t *);
2d21ac55 589
fe8ab488
A
590
591/*
592 * DTrace sysctl handlers
593 *
594 * These declarations and functions are used for a deeper DTrace configuration.
595 * Most of them are not per-consumer basis and may impact the other DTrace
596 * consumers. Correctness may not be supported for all the variables, so you
597 * should be careful about what values you are using.
598 */
599
600SYSCTL_DECL(_kern_dtrace);
601SYSCTL_NODE(_kern, OID_AUTO, dtrace, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "dtrace");
602
603static int
604sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS
605{
606#pragma unused(oidp, arg2)
607 int changed, error;
608 int value = *(int *) arg1;
609
610 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
611 if (error || !changed)
612 return (error);
613
614 if (value != 0 && value != 1)
615 return (ERANGE);
616
617 lck_mtx_lock(&dtrace_lock);
618 dtrace_err_verbose = value;
619 lck_mtx_unlock(&dtrace_lock);
620
621 return (0);
622}
623
624/*
625 * kern.dtrace.err_verbose
626 *
627 * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld).
628 * Errors are reported when a DIFO or a DOF has been rejected by the kernel.
629 */
630SYSCTL_PROC(_kern_dtrace, OID_AUTO, err_verbose,
631 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
632 &dtrace_err_verbose, 0,
633 sysctl_dtrace_err_verbose, "I", "dtrace error verbose");
634
635static int
636sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS
637{
638#pragma unused(oidp, arg2, req)
639 int changed, error;
640 uint64_t value = *(uint64_t *) arg1;
641
642 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
643 if (error || !changed)
644 return (error);
645
646 if (value <= dtrace_buffer_memory_inuse)
647 return (ERANGE);
648
649 lck_mtx_lock(&dtrace_lock);
650 dtrace_buffer_memory_maxsize = value;
651 lck_mtx_unlock(&dtrace_lock);
652
653 return (0);
654}
655
656/*
657 * kern.dtrace.buffer_memory_maxsize
658 *
659 * Set DTrace maximal size in bytes used by all the consumers' state buffers. By default
660 * the limit is PHYS_MEM / 3 for *all* consumers. Attempting to set a null, a negative value
661 * or a value <= to dtrace_buffer_memory_inuse will result in a failure.
662 */
663SYSCTL_PROC(_kern_dtrace, OID_AUTO, buffer_memory_maxsize,
664 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
665 &dtrace_buffer_memory_maxsize, 0,
666 sysctl_dtrace_buffer_memory_maxsize, "Q", "dtrace state buffer memory maxsize");
667
668/*
669 * kern.dtrace.buffer_memory_inuse
670 *
671 * Current state buffer memory used, in bytes, by all the DTrace consumers.
672 * This value is read-only.
673 */
674SYSCTL_QUAD(_kern_dtrace, OID_AUTO, buffer_memory_inuse, CTLFLAG_RD | CTLFLAG_LOCKED,
675 &dtrace_buffer_memory_inuse, "dtrace state buffer memory in-use");
676
677static int
678sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS
679{
680#pragma unused(oidp, arg2, req)
681 int changed, error;
682 size_t value = *(size_t*) arg1;
683
684 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
685 if (error || !changed)
686 return (error);
687
688 if (value <= 0)
689 return (ERANGE);
690
691 lck_mtx_lock(&dtrace_lock);
692 dtrace_difo_maxsize = value;
693 lck_mtx_unlock(&dtrace_lock);
694
695 return (0);
696}
697
698/*
699 * kern.dtrace.difo_maxsize
700 *
701 * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize
702 * to get the default value. Attempting to set a null or negative size will
703 * result in a failure.
704 */
705SYSCTL_PROC(_kern_dtrace, OID_AUTO, difo_maxsize,
706 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
707 &dtrace_difo_maxsize, 0,
708 sysctl_dtrace_difo_maxsize, "Q", "dtrace difo maxsize");
709
710static int
711sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS
712{
713#pragma unused(oidp, arg2, req)
714 int changed, error;
715 dtrace_optval_t value = *(dtrace_optval_t *) arg1;
716
717 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
718 if (error || !changed)
719 return (error);
720
721 if (value <= 0)
722 return (ERANGE);
723
724 lck_mtx_lock(&dtrace_lock);
725 dtrace_dof_maxsize = value;
726 lck_mtx_unlock(&dtrace_lock);
727
728 return (0);
729}
730
731/*
732 * kern.dtrace.dof_maxsize
733 *
734 * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to
735 * get the default value. Attempting to set a null or negative size will result
736 * in a failure.
737 */
738SYSCTL_PROC(_kern_dtrace, OID_AUTO, dof_maxsize,
739 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
740 &dtrace_dof_maxsize, 0,
741 sysctl_dtrace_dof_maxsize, "Q", "dtrace dof maxsize");
742
743static int
ecc0ceb4 744sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS
fe8ab488
A
745{
746#pragma unused(oidp, arg2, req)
747 int changed, error;
748 dtrace_optval_t value = *(dtrace_optval_t*) arg1;
749
750 error = sysctl_io_number(req, value, sizeof(value), &value, &changed);
751 if (error || !changed)
752 return (error);
753
754 if (value <= 0)
755 return (ERANGE);
ecc0ceb4
A
756 if (value > dtrace_statvar_maxsize_max)
757 return (ERANGE);
fe8ab488
A
758
759 lck_mtx_lock(&dtrace_lock);
ecc0ceb4 760 dtrace_statvar_maxsize = value;
fe8ab488
A
761 lck_mtx_unlock(&dtrace_lock);
762
763 return (0);
764}
765
766/*
767 * kern.dtrace.global_maxsize
768 *
ecc0ceb4
A
769 * Set the variable max size in bytes, check the definition of
770 * dtrace_statvar_maxsize to get the default value. Attempting to set a null,
771 * too high or negative size will result in a failure.
fe8ab488
A
772 */
773SYSCTL_PROC(_kern_dtrace, OID_AUTO, global_maxsize,
774 CTLTYPE_QUAD | CTLFLAG_RW | CTLFLAG_LOCKED,
ecc0ceb4
A
775 &dtrace_statvar_maxsize, 0,
776 sysctl_dtrace_statvar_maxsize, "Q", "dtrace statvar maxsize");
fe8ab488
A
777
778static int
779sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS
780{
781#pragma unused(oidp, arg2)
782 int error;
783 int value = *(int *) arg1;
784
785 error = sysctl_io_number(req, value, sizeof(value), &value, NULL);
786 if (error)
787 return (error);
788
789 if (value != 0 && value != 1)
790 return (ERANGE);
791
792 lck_mtx_lock(&dtrace_lock);
793 dtrace_provide_private_probes = value;
794 lck_mtx_unlock(&dtrace_lock);
795
796 return (0);
797}
798
799/*
800 * kern.dtrace.provide_private_probes
801 *
802 * Set whether the providers must provide the private probes. This is
803 * mainly used by the FBT provider to request probes for the private/static
804 * symbols.
805 */
806SYSCTL_PROC(_kern_dtrace, OID_AUTO, provide_private_probes,
807 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
808 &dtrace_provide_private_probes, 0,
809 sysctl_dtrace_provide_private_probes, "I", "provider must provide the private probes");
810
2d21ac55
A
811/*
812 * DTrace Probe Context Functions
813 *
814 * These functions are called from probe context. Because probe context is
815 * any context in which C may be called, arbitrarily locks may be held,
816 * interrupts may be disabled, we may be in arbitrary dispatched state, etc.
817 * As a result, functions called from probe context may only call other DTrace
818 * support functions -- they may not interact at all with the system at large.
819 * (Note that the ASSERT macro is made probe-context safe by redefining it in
820 * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary
821 * loads are to be performed from probe context, they _must_ be in terms of
822 * the safe dtrace_load*() variants.
823 *
824 * Some functions in this block are not actually called from probe context;
825 * for these functions, there will be a comment above the function reading
826 * "Note: not called from probe context."
827 */
2d21ac55
A
828
829int
830dtrace_assfail(const char *a, const char *f, int l)
831{
316670eb 832 panic("dtrace: assertion failed: %s, file: %s, line: %d", a, f, l);
2d21ac55
A
833
834 /*
835 * We just need something here that even the most clever compiler
836 * cannot optimize away.
837 */
838 return (a[(uintptr_t)f]);
839}
840
841/*
842 * Atomically increment a specified error counter from probe context.
843 */
844static void
845dtrace_error(uint32_t *counter)
846{
847 /*
848 * Most counters stored to in probe context are per-CPU counters.
849 * However, there are some error conditions that are sufficiently
850 * arcane that they don't merit per-CPU storage. If these counters
851 * are incremented concurrently on different CPUs, scalability will be
852 * adversely affected -- but we don't expect them to be white-hot in a
853 * correctly constructed enabling...
854 */
855 uint32_t oval, nval;
856
857 do {
858 oval = *counter;
859
860 if ((nval = oval + 1) == 0) {
861 /*
862 * If the counter would wrap, set it to 1 -- assuring
863 * that the counter is never zero when we have seen
864 * errors. (The counter must be 32-bits because we
865 * aren't guaranteed a 64-bit compare&swap operation.)
866 * To save this code both the infamy of being fingered
867 * by a priggish news story and the indignity of being
868 * the target of a neo-puritan witch trial, we're
869 * carefully avoiding any colorful description of the
870 * likelihood of this condition -- but suffice it to
871 * say that it is only slightly more likely than the
872 * overflow of predicate cache IDs, as discussed in
873 * dtrace_predicate_create().
874 */
875 nval = 1;
876 }
877 } while (dtrace_cas32(counter, oval, nval) != oval);
878}
879
880/*
881 * Use the DTRACE_LOADFUNC macro to define functions for each of loading a
882 * uint8_t, a uint16_t, a uint32_t and a uint64_t.
883 */
884DTRACE_LOADFUNC(8)
885DTRACE_LOADFUNC(16)
886DTRACE_LOADFUNC(32)
887DTRACE_LOADFUNC(64)
888
889static int
890dtrace_inscratch(uintptr_t dest, size_t size, dtrace_mstate_t *mstate)
891{
892 if (dest < mstate->dtms_scratch_base)
893 return (0);
894
895 if (dest + size < dest)
896 return (0);
897
898 if (dest + size > mstate->dtms_scratch_ptr)
899 return (0);
900
901 return (1);
902}
903
904static int
39037602 905dtrace_canstore_statvar(uint64_t addr, size_t sz, size_t *remain,
2d21ac55
A
906 dtrace_statvar_t **svars, int nsvars)
907{
908 int i;
909
ecc0ceb4
A
910 size_t maxglobalsize, maxlocalsize;
911
39037602
A
912 maxglobalsize = dtrace_statvar_maxsize + sizeof (uint64_t);
913 maxlocalsize = (maxglobalsize) * NCPU;
ecc0ceb4
A
914
915 if (nsvars == 0)
916 return (0);
917
2d21ac55
A
918 for (i = 0; i < nsvars; i++) {
919 dtrace_statvar_t *svar = svars[i];
ecc0ceb4
A
920 uint8_t scope;
921 size_t size;
2d21ac55 922
ecc0ceb4 923 if (svar == NULL || (size = svar->dtsv_size) == 0)
2d21ac55
A
924 continue;
925
ecc0ceb4
A
926 scope = svar->dtsv_var.dtdv_scope;
927
928 /**
929 * We verify that our size is valid in the spirit of providing
930 * defense in depth: we want to prevent attackers from using
931 * DTrace to escalate an orthogonal kernel heap corruption bug
932 * into the ability to store to arbitrary locations in memory.
933 */
39037602
A
934 VERIFY((scope == DIFV_SCOPE_GLOBAL && size <= maxglobalsize) ||
935 (scope == DIFV_SCOPE_LOCAL && size <= maxlocalsize));
ecc0ceb4 936
39037602
A
937 if (DTRACE_INRANGE(addr, sz, svar->dtsv_data, svar->dtsv_size)) {
938 DTRACE_RANGE_REMAIN(remain, addr, svar->dtsv_data,
939 svar->dtsv_size);
2d21ac55 940 return (1);
39037602 941 }
2d21ac55
A
942 }
943
944 return (0);
945}
946
947/*
948 * Check to see if the address is within a memory region to which a store may
949 * be issued. This includes the DTrace scratch areas, and any DTrace variable
950 * region. The caller of dtrace_canstore() is responsible for performing any
951 * alignment checks that are needed before stores are actually executed.
952 */
953static int
954dtrace_canstore(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
955 dtrace_vstate_t *vstate)
39037602
A
956{
957 return (dtrace_canstore_remains(addr, sz, NULL, mstate, vstate));
958}
959/*
960 * Implementation of dtrace_canstore which communicates the upper bound of the
961 * allowed memory region.
962 */
963static int
964dtrace_canstore_remains(uint64_t addr, size_t sz, size_t *remain,
965 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
2d21ac55 966{
2d21ac55
A
967 /*
968 * First, check to see if the address is in scratch space...
969 */
b0d623f7 970 if (DTRACE_INRANGE(addr, sz, mstate->dtms_scratch_base,
39037602
A
971 mstate->dtms_scratch_size)) {
972 DTRACE_RANGE_REMAIN(remain, addr, mstate->dtms_scratch_base,
973 mstate->dtms_scratch_size);
2d21ac55 974 return (1);
39037602 975 }
2d21ac55
A
976 /*
977 * Now check to see if it's a dynamic variable. This check will pick
978 * up both thread-local variables and any global dynamically-allocated
979 * variables.
980 */
b0d623f7
A
981 if (DTRACE_INRANGE(addr, sz, (uintptr_t)vstate->dtvs_dynvars.dtds_base,
982 vstate->dtvs_dynvars.dtds_size)) {
983 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
984 uintptr_t base = (uintptr_t)dstate->dtds_base +
985 (dstate->dtds_hashsize * sizeof (dtrace_dynhash_t));
986 uintptr_t chunkoffs;
39037602 987 dtrace_dynvar_t *dvar;
b0d623f7
A
988
989 /*
990 * Before we assume that we can store here, we need to make
991 * sure that it isn't in our metadata -- storing to our
992 * dynamic variable metadata would corrupt our state. For
993 * the range to not include any dynamic variable metadata,
994 * it must:
995 *
996 * (1) Start above the hash table that is at the base of
997 * the dynamic variable space
998 *
999 * (2) Have a starting chunk offset that is beyond the
1000 * dtrace_dynvar_t that is at the base of every chunk
1001 *
1002 * (3) Not span a chunk boundary
1003 *
39037602
A
1004 * (4) Not be in the tuple space of a dynamic variable
1005 *
b0d623f7
A
1006 */
1007 if (addr < base)
1008 return (0);
1009
1010 chunkoffs = (addr - base) % dstate->dtds_chunksize;
1011
1012 if (chunkoffs < sizeof (dtrace_dynvar_t))
1013 return (0);
1014
1015 if (chunkoffs + sz > dstate->dtds_chunksize)
1016 return (0);
1017
39037602
A
1018 dvar = (dtrace_dynvar_t *)((uintptr_t)addr - chunkoffs);
1019
1020 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE)
1021 return (0);
1022
1023 if (chunkoffs < sizeof (dtrace_dynvar_t) +
1024 ((dvar->dtdv_tuple.dtt_nkeys - 1) * sizeof (dtrace_key_t)))
1025 return (0);
1026
2d21ac55 1027 return (1);
b0d623f7 1028 }
2d21ac55
A
1029
1030 /*
1031 * Finally, check the static local and global variables. These checks
1032 * take the longest, so we perform them last.
1033 */
39037602 1034 if (dtrace_canstore_statvar(addr, sz, remain,
2d21ac55
A
1035 vstate->dtvs_locals, vstate->dtvs_nlocals))
1036 return (1);
1037
39037602 1038 if (dtrace_canstore_statvar(addr, sz, remain,
2d21ac55
A
1039 vstate->dtvs_globals, vstate->dtvs_nglobals))
1040 return (1);
1041
1042 return (0);
1043}
1044
b0d623f7
A
1045
1046/*
1047 * Convenience routine to check to see if the address is within a memory
1048 * region in which a load may be issued given the user's privilege level;
1049 * if not, it sets the appropriate error flags and loads 'addr' into the
1050 * illegal value slot.
1051 *
1052 * DTrace subroutines (DIF_SUBR_*) should use this helper to implement
1053 * appropriate memory access protection.
1054 */
1055static int
1056dtrace_canload(uint64_t addr, size_t sz, dtrace_mstate_t *mstate,
1057 dtrace_vstate_t *vstate)
39037602
A
1058{
1059 return (dtrace_canload_remains(addr, sz, NULL, mstate, vstate));
1060}
1061
1062/*
1063 * Implementation of dtrace_canload which communicates the upper bound of the
1064 * allowed memory region.
1065 */
1066static int
1067dtrace_canload_remains(uint64_t addr, size_t sz, size_t *remain,
1068 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
b0d623f7 1069{
b0d623f7 1070 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
b0d623f7
A
1071
1072 /*
1073 * If we hold the privilege to read from kernel memory, then
1074 * everything is readable.
1075 */
39037602
A
1076 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1077 DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
b0d623f7 1078 return (1);
39037602 1079 }
b0d623f7
A
1080
1081 /*
1082 * You can obviously read that which you can store.
1083 */
39037602 1084 if (dtrace_canstore_remains(addr, sz, remain, mstate, vstate))
b0d623f7
A
1085 return (1);
1086
1087 /*
1088 * We're allowed to read from our own string table.
1089 */
1090 if (DTRACE_INRANGE(addr, sz, (uintptr_t)mstate->dtms_difo->dtdo_strtab,
39037602
A
1091 mstate->dtms_difo->dtdo_strlen)) {
1092 DTRACE_RANGE_REMAIN(remain, addr,
1093 mstate->dtms_difo->dtdo_strtab,
1094 mstate->dtms_difo->dtdo_strlen);
b0d623f7 1095 return (1);
39037602 1096 }
b0d623f7
A
1097
1098 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
1099 *illval = addr;
1100 return (0);
1101}
1102
1103/*
1104 * Convenience routine to check to see if a given string is within a memory
1105 * region in which a load may be issued given the user's privilege level;
1106 * this exists so that we don't need to issue unnecessary dtrace_strlen()
1107 * calls in the event that the user has all privileges.
1108 */
1109static int
39037602
A
1110dtrace_strcanload(uint64_t addr, size_t sz, size_t *remain,
1111 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
b0d623f7 1112{
39037602 1113 size_t rsize;
b0d623f7
A
1114
1115 /*
1116 * If we hold the privilege to read from kernel memory, then
1117 * everything is readable.
1118 */
39037602
A
1119 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1120 DTRACE_RANGE_REMAIN(remain, addr, addr, sz);
b0d623f7 1121 return (1);
39037602 1122 }
b0d623f7 1123
39037602
A
1124 /*
1125 * Even if the caller is uninterested in querying the remaining valid
1126 * range, it is required to ensure that the access is allowed.
1127 */
1128 if (remain == NULL) {
1129 remain = &rsize;
1130 }
1131 if (dtrace_canload_remains(addr, 0, remain, mstate, vstate)) {
1132 size_t strsz;
1133 /*
1134 * Perform the strlen after determining the length of the
1135 * memory region which is accessible. This prevents timing
1136 * information from being used to find NULs in memory which is
1137 * not accessible to the caller.
1138 */
1139 strsz = 1 + dtrace_strlen((char *)(uintptr_t)addr,
1140 MIN(sz, *remain));
1141 if (strsz <= *remain) {
1142 return (1);
1143 }
1144 }
b0d623f7
A
1145
1146 return (0);
1147}
1148
1149/*
1150 * Convenience routine to check to see if a given variable is within a memory
1151 * region in which a load may be issued given the user's privilege level.
1152 */
1153static int
39037602
A
1154dtrace_vcanload(void *src, dtrace_diftype_t *type, size_t *remain,
1155 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
b0d623f7
A
1156{
1157 size_t sz;
1158 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1159
39037602
A
1160 /*
1161 * Calculate the max size before performing any checks since even
1162 * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function
1163 * return the max length via 'remain'.
1164 */
1165 if (type->dtdt_kind == DIF_TYPE_STRING) {
1166 dtrace_state_t *state = vstate->dtvs_state;
1167
1168 if (state != NULL) {
1169 sz = state->dts_options[DTRACEOPT_STRSIZE];
1170 } else {
1171 /*
1172 * In helper context, we have a NULL state; fall back
1173 * to using the system-wide default for the string size
1174 * in this case.
1175 */
1176 sz = dtrace_strsize_default;
1177 }
1178 } else {
1179 sz = type->dtdt_size;
1180 }
1181
b0d623f7
A
1182 /*
1183 * If we hold the privilege to read from kernel memory, then
1184 * everything is readable.
1185 */
39037602
A
1186 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0) {
1187 DTRACE_RANGE_REMAIN(remain, (uintptr_t)src, src, sz);
b0d623f7 1188 return (1);
39037602 1189 }
b0d623f7 1190
39037602
A
1191 if (type->dtdt_kind == DIF_TYPE_STRING) {
1192 return (dtrace_strcanload((uintptr_t)src, sz, remain, mstate,
1193 vstate));
1194 }
1195 return (dtrace_canload_remains((uintptr_t)src, sz, remain, mstate,
1196 vstate));
b0d623f7
A
1197}
1198
2d21ac55
A
1199/*
1200 * Compare two strings using safe loads.
1201 */
1202static int
1203dtrace_strncmp(char *s1, char *s2, size_t limit)
1204{
1205 uint8_t c1, c2;
1206 volatile uint16_t *flags;
1207
1208 if (s1 == s2 || limit == 0)
1209 return (0);
1210
1211 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1212
1213 do {
b0d623f7 1214 if (s1 == NULL) {
2d21ac55 1215 c1 = '\0';
b0d623f7 1216 } else {
2d21ac55 1217 c1 = dtrace_load8((uintptr_t)s1++);
b0d623f7 1218 }
2d21ac55 1219
b0d623f7 1220 if (s2 == NULL) {
2d21ac55 1221 c2 = '\0';
b0d623f7 1222 } else {
2d21ac55 1223 c2 = dtrace_load8((uintptr_t)s2++);
b0d623f7 1224 }
2d21ac55
A
1225
1226 if (c1 != c2)
1227 return (c1 - c2);
1228 } while (--limit && c1 != '\0' && !(*flags & CPU_DTRACE_FAULT));
1229
1230 return (0);
1231}
1232
1233/*
1234 * Compute strlen(s) for a string using safe memory accesses. The additional
1235 * len parameter is used to specify a maximum length to ensure completion.
1236 */
1237static size_t
1238dtrace_strlen(const char *s, size_t lim)
1239{
1240 uint_t len;
1241
b0d623f7 1242 for (len = 0; len != lim; len++) {
2d21ac55
A
1243 if (dtrace_load8((uintptr_t)s++) == '\0')
1244 break;
b0d623f7 1245 }
2d21ac55
A
1246
1247 return (len);
1248}
1249
1250/*
1251 * Check if an address falls within a toxic region.
1252 */
1253static int
1254dtrace_istoxic(uintptr_t kaddr, size_t size)
1255{
1256 uintptr_t taddr, tsize;
1257 int i;
1258
1259 for (i = 0; i < dtrace_toxranges; i++) {
1260 taddr = dtrace_toxrange[i].dtt_base;
1261 tsize = dtrace_toxrange[i].dtt_limit - taddr;
1262
1263 if (kaddr - taddr < tsize) {
1264 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1265 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = kaddr;
1266 return (1);
1267 }
1268
1269 if (taddr - kaddr < size) {
1270 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
1271 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = taddr;
1272 return (1);
1273 }
1274 }
1275
1276 return (0);
1277}
1278
1279/*
1280 * Copy src to dst using safe memory accesses. The src is assumed to be unsafe
1281 * memory specified by the DIF program. The dst is assumed to be safe memory
1282 * that we can store to directly because it is managed by DTrace. As with
1283 * standard bcopy, overlapping copies are handled properly.
1284 */
1285static void
1286dtrace_bcopy(const void *src, void *dst, size_t len)
1287{
1288 if (len != 0) {
1289 uint8_t *s1 = dst;
1290 const uint8_t *s2 = src;
1291
1292 if (s1 <= s2) {
1293 do {
1294 *s1++ = dtrace_load8((uintptr_t)s2++);
1295 } while (--len != 0);
1296 } else {
1297 s2 += len;
1298 s1 += len;
1299
1300 do {
1301 *--s1 = dtrace_load8((uintptr_t)--s2);
1302 } while (--len != 0);
1303 }
1304 }
1305}
1306
1307/*
1308 * Copy src to dst using safe memory accesses, up to either the specified
1309 * length, or the point that a nul byte is encountered. The src is assumed to
1310 * be unsafe memory specified by the DIF program. The dst is assumed to be
1311 * safe memory that we can store to directly because it is managed by DTrace.
1312 * Unlike dtrace_bcopy(), overlapping regions are not handled.
1313 */
1314static void
1315dtrace_strcpy(const void *src, void *dst, size_t len)
1316{
1317 if (len != 0) {
1318 uint8_t *s1 = dst, c;
1319 const uint8_t *s2 = src;
1320
1321 do {
1322 *s1++ = c = dtrace_load8((uintptr_t)s2++);
1323 } while (--len != 0 && c != '\0');
1324 }
1325}
1326
1327/*
1328 * Copy src to dst, deriving the size and type from the specified (BYREF)
1329 * variable type. The src is assumed to be unsafe memory specified by the DIF
1330 * program. The dst is assumed to be DTrace variable memory that is of the
1331 * specified type; we assume that we can store to directly.
1332 */
1333static void
39037602 1334dtrace_vcopy(void *src, void *dst, dtrace_diftype_t *type, size_t limit)
2d21ac55
A
1335{
1336 ASSERT(type->dtdt_flags & DIF_TF_BYREF);
1337
b0d623f7 1338 if (type->dtdt_kind == DIF_TYPE_STRING) {
39037602 1339 dtrace_strcpy(src, dst, MIN(type->dtdt_size, limit));
b0d623f7 1340 } else {
39037602
A
1341 dtrace_bcopy(src, dst, MIN(type->dtdt_size, limit));
1342 }
b0d623f7 1343}
2d21ac55
A
1344
1345/*
1346 * Compare s1 to s2 using safe memory accesses. The s1 data is assumed to be
1347 * unsafe memory specified by the DIF program. The s2 data is assumed to be
1348 * safe memory that we can access directly because it is managed by DTrace.
1349 */
1350static int
1351dtrace_bcmp(const void *s1, const void *s2, size_t len)
1352{
1353 volatile uint16_t *flags;
1354
1355 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
1356
1357 if (s1 == s2)
1358 return (0);
1359
1360 if (s1 == NULL || s2 == NULL)
1361 return (1);
1362
1363 if (s1 != s2 && len != 0) {
1364 const uint8_t *ps1 = s1;
1365 const uint8_t *ps2 = s2;
1366
1367 do {
1368 if (dtrace_load8((uintptr_t)ps1++) != *ps2++)
1369 return (1);
1370 } while (--len != 0 && !(*flags & CPU_DTRACE_FAULT));
1371 }
1372 return (0);
1373}
1374
1375/*
1376 * Zero the specified region using a simple byte-by-byte loop. Note that this
1377 * is for safe DTrace-managed memory only.
1378 */
1379static void
1380dtrace_bzero(void *dst, size_t len)
1381{
1382 uchar_t *cp;
1383
1384 for (cp = dst; len != 0; len--)
1385 *cp++ = 0;
1386}
1387
b0d623f7
A
1388static void
1389dtrace_add_128(uint64_t *addend1, uint64_t *addend2, uint64_t *sum)
1390{
1391 uint64_t result[2];
1392
1393 result[0] = addend1[0] + addend2[0];
1394 result[1] = addend1[1] + addend2[1] +
1395 (result[0] < addend1[0] || result[0] < addend2[0] ? 1 : 0);
1396
1397 sum[0] = result[0];
1398 sum[1] = result[1];
1399}
1400
1401/*
1402 * Shift the 128-bit value in a by b. If b is positive, shift left.
1403 * If b is negative, shift right.
1404 */
1405static void
1406dtrace_shift_128(uint64_t *a, int b)
1407{
1408 uint64_t mask;
1409
1410 if (b == 0)
1411 return;
1412
1413 if (b < 0) {
1414 b = -b;
1415 if (b >= 64) {
1416 a[0] = a[1] >> (b - 64);
1417 a[1] = 0;
1418 } else {
1419 a[0] >>= b;
1420 mask = 1LL << (64 - b);
1421 mask -= 1;
1422 a[0] |= ((a[1] & mask) << (64 - b));
1423 a[1] >>= b;
1424 }
1425 } else {
1426 if (b >= 64) {
1427 a[1] = a[0] << (b - 64);
1428 a[0] = 0;
1429 } else {
1430 a[1] <<= b;
1431 mask = a[0] >> (64 - b);
1432 a[1] |= mask;
1433 a[0] <<= b;
1434 }
1435 }
1436}
1437
1438/*
1439 * The basic idea is to break the 2 64-bit values into 4 32-bit values,
1440 * use native multiplication on those, and then re-combine into the
1441 * resulting 128-bit value.
1442 *
1443 * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) =
1444 * hi1 * hi2 << 64 +
1445 * hi1 * lo2 << 32 +
1446 * hi2 * lo1 << 32 +
1447 * lo1 * lo2
1448 */
1449static void
1450dtrace_multiply_128(uint64_t factor1, uint64_t factor2, uint64_t *product)
1451{
1452 uint64_t hi1, hi2, lo1, lo2;
1453 uint64_t tmp[2];
1454
1455 hi1 = factor1 >> 32;
1456 hi2 = factor2 >> 32;
1457
1458 lo1 = factor1 & DT_MASK_LO;
1459 lo2 = factor2 & DT_MASK_LO;
1460
1461 product[0] = lo1 * lo2;
1462 product[1] = hi1 * hi2;
1463
1464 tmp[0] = hi1 * lo2;
1465 tmp[1] = 0;
1466 dtrace_shift_128(tmp, 32);
1467 dtrace_add_128(product, tmp, product);
1468
1469 tmp[0] = hi2 * lo1;
1470 tmp[1] = 0;
1471 dtrace_shift_128(tmp, 32);
1472 dtrace_add_128(product, tmp, product);
1473}
1474
2d21ac55
A
1475/*
1476 * This privilege check should be used by actions and subroutines to
1477 * verify that the user credentials of the process that enabled the
1478 * invoking ECB match the target credentials
1479 */
1480static int
1481dtrace_priv_proc_common_user(dtrace_state_t *state)
1482{
1483 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
1484
1485 /*
1486 * We should always have a non-NULL state cred here, since if cred
1487 * is null (anonymous tracing), we fast-path bypass this routine.
1488 */
1489 ASSERT(s_cr != NULL);
1490
2d21ac55 1491 if ((cr = dtrace_CRED()) != NULL &&
6d2010ae
A
1492 posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_uid &&
1493 posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_ruid &&
1494 posix_cred_get(s_cr)->cr_uid == posix_cred_get(cr)->cr_suid &&
1495 posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_gid &&
1496 posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_rgid &&
1497 posix_cred_get(s_cr)->cr_gid == posix_cred_get(cr)->cr_sgid)
2d21ac55
A
1498 return (1);
1499
1500 return (0);
1501}
1502
1503/*
1504 * This privilege check should be used by actions and subroutines to
1505 * verify that the zone of the process that enabled the invoking ECB
1506 * matches the target credentials
1507 */
1508static int
1509dtrace_priv_proc_common_zone(dtrace_state_t *state)
1510{
1511 cred_t *cr, *s_cr = state->dts_cred.dcr_cred;
fe8ab488 1512#pragma unused(cr, s_cr, state) /* __APPLE__ */
2d21ac55
A
1513
1514 /*
1515 * We should always have a non-NULL state cred here, since if cred
1516 * is null (anonymous tracing), we fast-path bypass this routine.
1517 */
1518 ASSERT(s_cr != NULL);
1519
fe8ab488 1520 return 1; /* APPLE NOTE: Darwin doesn't do zones. */
2d21ac55
A
1521}
1522
1523/*
1524 * This privilege check should be used by actions and subroutines to
1525 * verify that the process has not setuid or changed credentials.
1526 */
2d21ac55
A
1527static int
1528dtrace_priv_proc_common_nocd(void)
1529{
1530 return 1; /* Darwin omits "No Core Dump" flag. */
1531}
2d21ac55
A
1532
1533static int
1534dtrace_priv_proc_destructive(dtrace_state_t *state)
1535{
1536 int action = state->dts_cred.dcr_action;
1537
cf7d32b8
A
1538 if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
1539 goto bad;
fe8ab488
A
1540
1541 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1542 goto bad;
cf7d32b8 1543
2d21ac55
A
1544 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE) == 0) &&
1545 dtrace_priv_proc_common_zone(state) == 0)
1546 goto bad;
1547
1548 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER) == 0) &&
1549 dtrace_priv_proc_common_user(state) == 0)
1550 goto bad;
1551
1552 if (((action & DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG) == 0) &&
1553 dtrace_priv_proc_common_nocd() == 0)
1554 goto bad;
1555
1556 return (1);
1557
1558bad:
1559 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1560
1561 return (0);
1562}
1563
1564static int
1565dtrace_priv_proc_control(dtrace_state_t *state)
1566{
cf7d32b8
A
1567 if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
1568 goto bad;
fe8ab488
A
1569
1570 if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc()))
1571 goto bad;
cf7d32b8 1572
2d21ac55
A
1573 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC_CONTROL)
1574 return (1);
1575
1576 if (dtrace_priv_proc_common_zone(state) &&
1577 dtrace_priv_proc_common_user(state) &&
1578 dtrace_priv_proc_common_nocd())
1579 return (1);
1580
cf7d32b8 1581bad:
2d21ac55
A
1582 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1583
1584 return (0);
1585}
1586
1587static int
1588dtrace_priv_proc(dtrace_state_t *state)
1589{
cf7d32b8
A
1590 if (ISSET(current_proc()->p_lflag, P_LNOATTACH))
1591 goto bad;
fe8ab488 1592
39037602 1593 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc()))
fe8ab488 1594 goto bad;
cf7d32b8 1595
2d21ac55
A
1596 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
1597 return (1);
1598
cf7d32b8 1599bad:
2d21ac55
A
1600 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1601
1602 return (0);
1603}
1604
fe8ab488
A
1605/*
1606 * The P_LNOATTACH check is an Apple specific check.
1607 * We need a version of dtrace_priv_proc() that omits
1608 * that check for PID and EXECNAME accesses
1609 */
935ed37a
A
1610static int
1611dtrace_priv_proc_relaxed(dtrace_state_t *state)
1612{
1613
1614 if (state->dts_cred.dcr_action & DTRACE_CRA_PROC)
1615 return (1);
1616
1617 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_UPRIV;
1618
1619 return (0);
1620}
935ed37a 1621
2d21ac55
A
1622static int
1623dtrace_priv_kernel(dtrace_state_t *state)
1624{
39037602 1625 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed())
fe8ab488
A
1626 goto bad;
1627
2d21ac55
A
1628 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL)
1629 return (1);
1630
fe8ab488 1631bad:
2d21ac55
A
1632 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1633
1634 return (0);
1635}
1636
1637static int
1638dtrace_priv_kernel_destructive(dtrace_state_t *state)
1639{
fe8ab488
A
1640 if (dtrace_is_restricted())
1641 goto bad;
1642
2d21ac55
A
1643 if (state->dts_cred.dcr_action & DTRACE_CRA_KERNEL_DESTRUCTIVE)
1644 return (1);
1645
fe8ab488 1646bad:
2d21ac55
A
1647 cpu_core[CPU->cpu_id].cpuc_dtrace_flags |= CPU_DTRACE_KPRIV;
1648
1649 return (0);
1650}
1651
1652/*
1653 * Note: not called from probe context. This function is called
1654 * asynchronously (and at a regular interval) from outside of probe context to
1655 * clean the dirty dynamic variable lists on all CPUs. Dynamic variable
1656 * cleaning is explained in detail in <sys/dtrace_impl.h>.
1657 */
fe8ab488 1658static void
2d21ac55
A
1659dtrace_dynvar_clean(dtrace_dstate_t *dstate)
1660{
1661 dtrace_dynvar_t *dirty;
1662 dtrace_dstate_percpu_t *dcpu;
1663 int i, work = 0;
1664
c910b4d9 1665 for (i = 0; i < (int)NCPU; i++) {
2d21ac55
A
1666 dcpu = &dstate->dtds_percpu[i];
1667
1668 ASSERT(dcpu->dtdsc_rinsing == NULL);
1669
1670 /*
1671 * If the dirty list is NULL, there is no dirty work to do.
1672 */
1673 if (dcpu->dtdsc_dirty == NULL)
1674 continue;
1675
1676 /*
1677 * If the clean list is non-NULL, then we're not going to do
1678 * any work for this CPU -- it means that there has not been
1679 * a dtrace_dynvar() allocation on this CPU (or from this CPU)
1680 * since the last time we cleaned house.
1681 */
1682 if (dcpu->dtdsc_clean != NULL)
1683 continue;
1684
1685 work = 1;
1686
1687 /*
1688 * Atomically move the dirty list aside.
1689 */
1690 do {
1691 dirty = dcpu->dtdsc_dirty;
1692
1693 /*
1694 * Before we zap the dirty list, set the rinsing list.
1695 * (This allows for a potential assertion in
1696 * dtrace_dynvar(): if a free dynamic variable appears
1697 * on a hash chain, either the dirty list or the
1698 * rinsing list for some CPU must be non-NULL.)
1699 */
1700 dcpu->dtdsc_rinsing = dirty;
1701 dtrace_membar_producer();
1702 } while (dtrace_casptr(&dcpu->dtdsc_dirty,
1703 dirty, NULL) != dirty);
1704 }
1705
1706 if (!work) {
1707 /*
1708 * We have no work to do; we can simply return.
1709 */
1710 return;
1711 }
1712
1713 dtrace_sync();
1714
c910b4d9 1715 for (i = 0; i < (int)NCPU; i++) {
2d21ac55
A
1716 dcpu = &dstate->dtds_percpu[i];
1717
1718 if (dcpu->dtdsc_rinsing == NULL)
1719 continue;
1720
1721 /*
1722 * We are now guaranteed that no hash chain contains a pointer
1723 * into this dirty list; we can make it clean.
1724 */
1725 ASSERT(dcpu->dtdsc_clean == NULL);
1726 dcpu->dtdsc_clean = dcpu->dtdsc_rinsing;
1727 dcpu->dtdsc_rinsing = NULL;
1728 }
1729
1730 /*
1731 * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make
1732 * sure that all CPUs have seen all of the dtdsc_clean pointers.
1733 * This prevents a race whereby a CPU incorrectly decides that
1734 * the state should be something other than DTRACE_DSTATE_CLEAN
1735 * after dtrace_dynvar_clean() has completed.
1736 */
1737 dtrace_sync();
1738
1739 dstate->dtds_state = DTRACE_DSTATE_CLEAN;
1740}
1741
1742/*
1743 * Depending on the value of the op parameter, this function looks-up,
1744 * allocates or deallocates an arbitrarily-keyed dynamic variable. If an
1745 * allocation is requested, this function will return a pointer to a
1746 * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no
1747 * variable can be allocated. If NULL is returned, the appropriate counter
1748 * will be incremented.
1749 */
fe8ab488 1750static dtrace_dynvar_t *
2d21ac55 1751dtrace_dynvar(dtrace_dstate_t *dstate, uint_t nkeys,
b0d623f7
A
1752 dtrace_key_t *key, size_t dsize, dtrace_dynvar_op_t op,
1753 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate)
2d21ac55
A
1754{
1755 uint64_t hashval = DTRACE_DYNHASH_VALID;
1756 dtrace_dynhash_t *hash = dstate->dtds_hash;
1757 dtrace_dynvar_t *free, *new_free, *next, *dvar, *start, *prev = NULL;
1758 processorid_t me = CPU->cpu_id, cpu = me;
1759 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[me];
1760 size_t bucket, ksize;
1761 size_t chunksize = dstate->dtds_chunksize;
1762 uintptr_t kdata, lock, nstate;
1763 uint_t i;
1764
1765 ASSERT(nkeys != 0);
1766
1767 /*
1768 * Hash the key. As with aggregations, we use Jenkins' "One-at-a-time"
1769 * algorithm. For the by-value portions, we perform the algorithm in
1770 * 16-bit chunks (as opposed to 8-bit chunks). This speeds things up a
1771 * bit, and seems to have only a minute effect on distribution. For
1772 * the by-reference data, we perform "One-at-a-time" iterating (safely)
1773 * over each referenced byte. It's painful to do this, but it's much
1774 * better than pathological hash distribution. The efficacy of the
1775 * hashing algorithm (and a comparison with other algorithms) may be
1776 * found by running the ::dtrace_dynstat MDB dcmd.
1777 */
1778 for (i = 0; i < nkeys; i++) {
1779 if (key[i].dttk_size == 0) {
1780 uint64_t val = key[i].dttk_value;
1781
1782 hashval += (val >> 48) & 0xffff;
1783 hashval += (hashval << 10);
1784 hashval ^= (hashval >> 6);
1785
1786 hashval += (val >> 32) & 0xffff;
1787 hashval += (hashval << 10);
1788 hashval ^= (hashval >> 6);
1789
1790 hashval += (val >> 16) & 0xffff;
1791 hashval += (hashval << 10);
1792 hashval ^= (hashval >> 6);
1793
1794 hashval += val & 0xffff;
1795 hashval += (hashval << 10);
1796 hashval ^= (hashval >> 6);
1797 } else {
1798 /*
1799 * This is incredibly painful, but it beats the hell
1800 * out of the alternative.
1801 */
1802 uint64_t j, size = key[i].dttk_size;
1803 uintptr_t base = (uintptr_t)key[i].dttk_value;
1804
b0d623f7
A
1805 if (!dtrace_canload(base, size, mstate, vstate))
1806 break;
1807
2d21ac55
A
1808 for (j = 0; j < size; j++) {
1809 hashval += dtrace_load8(base + j);
1810 hashval += (hashval << 10);
1811 hashval ^= (hashval >> 6);
1812 }
1813 }
1814 }
1815
b0d623f7
A
1816 if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT))
1817 return (NULL);
1818
2d21ac55
A
1819 hashval += (hashval << 3);
1820 hashval ^= (hashval >> 11);
1821 hashval += (hashval << 15);
1822
1823 /*
1824 * There is a remote chance (ideally, 1 in 2^31) that our hashval
1825 * comes out to be one of our two sentinel hash values. If this
1826 * actually happens, we set the hashval to be a value known to be a
1827 * non-sentinel value.
1828 */
1829 if (hashval == DTRACE_DYNHASH_FREE || hashval == DTRACE_DYNHASH_SINK)
1830 hashval = DTRACE_DYNHASH_VALID;
1831
1832 /*
1833 * Yes, it's painful to do a divide here. If the cycle count becomes
1834 * important here, tricks can be pulled to reduce it. (However, it's
1835 * critical that hash collisions be kept to an absolute minimum;
1836 * they're much more painful than a divide.) It's better to have a
1837 * solution that generates few collisions and still keeps things
1838 * relatively simple.
1839 */
1840 bucket = hashval % dstate->dtds_hashsize;
1841
1842 if (op == DTRACE_DYNVAR_DEALLOC) {
1843 volatile uintptr_t *lockp = &hash[bucket].dtdh_lock;
1844
1845 for (;;) {
1846 while ((lock = *lockp) & 1)
1847 continue;
1848
b0d623f7
A
1849 if (dtrace_casptr((void *)(uintptr_t)lockp,
1850 (void *)lock, (void *)(lock + 1)) == (void *)lock)
1851 break;
2d21ac55
A
1852 }
1853
1854 dtrace_membar_producer();
1855 }
1856
1857top:
1858 prev = NULL;
1859 lock = hash[bucket].dtdh_lock;
1860
1861 dtrace_membar_consumer();
1862
1863 start = hash[bucket].dtdh_chain;
1864 ASSERT(start != NULL && (start->dtdv_hashval == DTRACE_DYNHASH_SINK ||
1865 start->dtdv_hashval != DTRACE_DYNHASH_FREE ||
1866 op != DTRACE_DYNVAR_DEALLOC));
1867
1868 for (dvar = start; dvar != NULL; dvar = dvar->dtdv_next) {
1869 dtrace_tuple_t *dtuple = &dvar->dtdv_tuple;
1870 dtrace_key_t *dkey = &dtuple->dtt_key[0];
1871
1872 if (dvar->dtdv_hashval != hashval) {
1873 if (dvar->dtdv_hashval == DTRACE_DYNHASH_SINK) {
1874 /*
1875 * We've reached the sink, and therefore the
1876 * end of the hash chain; we can kick out of
1877 * the loop knowing that we have seen a valid
1878 * snapshot of state.
1879 */
1880 ASSERT(dvar->dtdv_next == NULL);
1881 ASSERT(dvar == &dtrace_dynhash_sink);
1882 break;
1883 }
1884
1885 if (dvar->dtdv_hashval == DTRACE_DYNHASH_FREE) {
1886 /*
1887 * We've gone off the rails: somewhere along
1888 * the line, one of the members of this hash
1889 * chain was deleted. Note that we could also
1890 * detect this by simply letting this loop run
1891 * to completion, as we would eventually hit
1892 * the end of the dirty list. However, we
1893 * want to avoid running the length of the
1894 * dirty list unnecessarily (it might be quite
1895 * long), so we catch this as early as
1896 * possible by detecting the hash marker. In
1897 * this case, we simply set dvar to NULL and
1898 * break; the conditional after the loop will
1899 * send us back to top.
1900 */
1901 dvar = NULL;
1902 break;
1903 }
1904
1905 goto next;
1906 }
1907
1908 if (dtuple->dtt_nkeys != nkeys)
1909 goto next;
1910
1911 for (i = 0; i < nkeys; i++, dkey++) {
1912 if (dkey->dttk_size != key[i].dttk_size)
1913 goto next; /* size or type mismatch */
1914
1915 if (dkey->dttk_size != 0) {
1916 if (dtrace_bcmp(
1917 (void *)(uintptr_t)key[i].dttk_value,
1918 (void *)(uintptr_t)dkey->dttk_value,
1919 dkey->dttk_size))
1920 goto next;
1921 } else {
1922 if (dkey->dttk_value != key[i].dttk_value)
1923 goto next;
1924 }
1925 }
1926
1927 if (op != DTRACE_DYNVAR_DEALLOC)
1928 return (dvar);
1929
1930 ASSERT(dvar->dtdv_next == NULL ||
1931 dvar->dtdv_next->dtdv_hashval != DTRACE_DYNHASH_FREE);
1932
1933 if (prev != NULL) {
1934 ASSERT(hash[bucket].dtdh_chain != dvar);
1935 ASSERT(start != dvar);
1936 ASSERT(prev->dtdv_next == dvar);
1937 prev->dtdv_next = dvar->dtdv_next;
1938 } else {
1939 if (dtrace_casptr(&hash[bucket].dtdh_chain,
1940 start, dvar->dtdv_next) != start) {
1941 /*
1942 * We have failed to atomically swing the
1943 * hash table head pointer, presumably because
1944 * of a conflicting allocation on another CPU.
1945 * We need to reread the hash chain and try
1946 * again.
1947 */
1948 goto top;
1949 }
1950 }
1951
1952 dtrace_membar_producer();
1953
1954 /*
1955 * Now set the hash value to indicate that it's free.
1956 */
1957 ASSERT(hash[bucket].dtdh_chain != dvar);
1958 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
1959
1960 dtrace_membar_producer();
1961
1962 /*
1963 * Set the next pointer to point at the dirty list, and
1964 * atomically swing the dirty pointer to the newly freed dvar.
1965 */
1966 do {
1967 next = dcpu->dtdsc_dirty;
1968 dvar->dtdv_next = next;
1969 } while (dtrace_casptr(&dcpu->dtdsc_dirty, next, dvar) != next);
1970
1971 /*
1972 * Finally, unlock this hash bucket.
1973 */
1974 ASSERT(hash[bucket].dtdh_lock == lock);
1975 ASSERT(lock & 1);
1976 hash[bucket].dtdh_lock++;
1977
1978 return (NULL);
1979next:
1980 prev = dvar;
1981 continue;
1982 }
1983
1984 if (dvar == NULL) {
1985 /*
1986 * If dvar is NULL, it is because we went off the rails:
1987 * one of the elements that we traversed in the hash chain
1988 * was deleted while we were traversing it. In this case,
1989 * we assert that we aren't doing a dealloc (deallocs lock
1990 * the hash bucket to prevent themselves from racing with
1991 * one another), and retry the hash chain traversal.
1992 */
1993 ASSERT(op != DTRACE_DYNVAR_DEALLOC);
1994 goto top;
1995 }
1996
1997 if (op != DTRACE_DYNVAR_ALLOC) {
1998 /*
1999 * If we are not to allocate a new variable, we want to
2000 * return NULL now. Before we return, check that the value
2001 * of the lock word hasn't changed. If it has, we may have
2002 * seen an inconsistent snapshot.
2003 */
2004 if (op == DTRACE_DYNVAR_NOALLOC) {
2005 if (hash[bucket].dtdh_lock != lock)
2006 goto top;
2007 } else {
2008 ASSERT(op == DTRACE_DYNVAR_DEALLOC);
2009 ASSERT(hash[bucket].dtdh_lock == lock);
2010 ASSERT(lock & 1);
2011 hash[bucket].dtdh_lock++;
2012 }
2013
2014 return (NULL);
2015 }
2016
2017 /*
2018 * We need to allocate a new dynamic variable. The size we need is the
2019 * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the
2020 * size of any auxiliary key data (rounded up to 8-byte alignment) plus
2021 * the size of any referred-to data (dsize). We then round the final
2022 * size up to the chunksize for allocation.
2023 */
2024 for (ksize = 0, i = 0; i < nkeys; i++)
2025 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
2026
2027 /*
2028 * This should be pretty much impossible, but could happen if, say,
2029 * strange DIF specified the tuple. Ideally, this should be an
2030 * assertion and not an error condition -- but that requires that the
2031 * chunksize calculation in dtrace_difo_chunksize() be absolutely
2032 * bullet-proof. (That is, it must not be able to be fooled by
2033 * malicious DIF.) Given the lack of backwards branches in DIF,
2034 * solving this would presumably not amount to solving the Halting
2035 * Problem -- but it still seems awfully hard.
2036 */
2037 if (sizeof (dtrace_dynvar_t) + sizeof (dtrace_key_t) * (nkeys - 1) +
2038 ksize + dsize > chunksize) {
2039 dcpu->dtdsc_drops++;
2040 return (NULL);
2041 }
2042
2043 nstate = DTRACE_DSTATE_EMPTY;
2044
2045 do {
2046retry:
2047 free = dcpu->dtdsc_free;
2048
2049 if (free == NULL) {
2050 dtrace_dynvar_t *clean = dcpu->dtdsc_clean;
2051 void *rval;
2052
2053 if (clean == NULL) {
2054 /*
2055 * We're out of dynamic variable space on
2056 * this CPU. Unless we have tried all CPUs,
2057 * we'll try to allocate from a different
2058 * CPU.
2059 */
2060 switch (dstate->dtds_state) {
2061 case DTRACE_DSTATE_CLEAN: {
2062 void *sp = &dstate->dtds_state;
2063
c910b4d9 2064 if (++cpu >= (int)NCPU)
2d21ac55
A
2065 cpu = 0;
2066
2067 if (dcpu->dtdsc_dirty != NULL &&
2068 nstate == DTRACE_DSTATE_EMPTY)
2069 nstate = DTRACE_DSTATE_DIRTY;
2070
2071 if (dcpu->dtdsc_rinsing != NULL)
2072 nstate = DTRACE_DSTATE_RINSING;
2073
2074 dcpu = &dstate->dtds_percpu[cpu];
2075
2076 if (cpu != me)
2077 goto retry;
2078
2079 (void) dtrace_cas32(sp,
2080 DTRACE_DSTATE_CLEAN, nstate);
2081
2082 /*
2083 * To increment the correct bean
2084 * counter, take another lap.
2085 */
2086 goto retry;
2087 }
2088
2089 case DTRACE_DSTATE_DIRTY:
2090 dcpu->dtdsc_dirty_drops++;
2091 break;
2092
2093 case DTRACE_DSTATE_RINSING:
2094 dcpu->dtdsc_rinsing_drops++;
2095 break;
2096
2097 case DTRACE_DSTATE_EMPTY:
2098 dcpu->dtdsc_drops++;
2099 break;
2100 }
2101
2102 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP);
2103 return (NULL);
2104 }
2105
2106 /*
2107 * The clean list appears to be non-empty. We want to
2108 * move the clean list to the free list; we start by
2109 * moving the clean pointer aside.
2110 */
2111 if (dtrace_casptr(&dcpu->dtdsc_clean,
2112 clean, NULL) != clean) {
2113 /*
2114 * We are in one of two situations:
2115 *
2116 * (a) The clean list was switched to the
2117 * free list by another CPU.
2118 *
2119 * (b) The clean list was added to by the
2120 * cleansing cyclic.
2121 *
2122 * In either of these situations, we can
2123 * just reattempt the free list allocation.
2124 */
2125 goto retry;
2126 }
2127
2128 ASSERT(clean->dtdv_hashval == DTRACE_DYNHASH_FREE);
2129
2130 /*
2131 * Now we'll move the clean list to the free list.
2132 * It's impossible for this to fail: the only way
2133 * the free list can be updated is through this
2134 * code path, and only one CPU can own the clean list.
2135 * Thus, it would only be possible for this to fail if
2136 * this code were racing with dtrace_dynvar_clean().
2137 * (That is, if dtrace_dynvar_clean() updated the clean
2138 * list, and we ended up racing to update the free
2139 * list.) This race is prevented by the dtrace_sync()
2140 * in dtrace_dynvar_clean() -- which flushes the
2141 * owners of the clean lists out before resetting
2142 * the clean lists.
2143 */
2144 rval = dtrace_casptr(&dcpu->dtdsc_free, NULL, clean);
2145 ASSERT(rval == NULL);
2146 goto retry;
2147 }
2148
2149 dvar = free;
2150 new_free = dvar->dtdv_next;
2151 } while (dtrace_casptr(&dcpu->dtdsc_free, free, new_free) != free);
2152
2153 /*
2154 * We have now allocated a new chunk. We copy the tuple keys into the
2155 * tuple array and copy any referenced key data into the data space
2156 * following the tuple array. As we do this, we relocate dttk_value
2157 * in the final tuple to point to the key data address in the chunk.
2158 */
2159 kdata = (uintptr_t)&dvar->dtdv_tuple.dtt_key[nkeys];
2160 dvar->dtdv_data = (void *)(kdata + ksize);
2161 dvar->dtdv_tuple.dtt_nkeys = nkeys;
2162
2163 for (i = 0; i < nkeys; i++) {
2164 dtrace_key_t *dkey = &dvar->dtdv_tuple.dtt_key[i];
2165 size_t kesize = key[i].dttk_size;
2166
2167 if (kesize != 0) {
2168 dtrace_bcopy(
2169 (const void *)(uintptr_t)key[i].dttk_value,
2170 (void *)kdata, kesize);
2171 dkey->dttk_value = kdata;
2172 kdata += P2ROUNDUP(kesize, sizeof (uint64_t));
2173 } else {
2174 dkey->dttk_value = key[i].dttk_value;
2175 }
2176
2177 dkey->dttk_size = kesize;
2178 }
2179
2180 ASSERT(dvar->dtdv_hashval == DTRACE_DYNHASH_FREE);
2181 dvar->dtdv_hashval = hashval;
2182 dvar->dtdv_next = start;
2183
2184 if (dtrace_casptr(&hash[bucket].dtdh_chain, start, dvar) == start)
2185 return (dvar);
2186
2187 /*
2188 * The cas has failed. Either another CPU is adding an element to
2189 * this hash chain, or another CPU is deleting an element from this
2190 * hash chain. The simplest way to deal with both of these cases
2191 * (though not necessarily the most efficient) is to free our
2192 * allocated block and tail-call ourselves. Note that the free is
2193 * to the dirty list and _not_ to the free list. This is to prevent
2194 * races with allocators, above.
2195 */
2196 dvar->dtdv_hashval = DTRACE_DYNHASH_FREE;
2197
2198 dtrace_membar_producer();
2199
2200 do {
2201 free = dcpu->dtdsc_dirty;
2202 dvar->dtdv_next = free;
2203 } while (dtrace_casptr(&dcpu->dtdsc_dirty, free, dvar) != free);
2204
b0d623f7 2205 return (dtrace_dynvar(dstate, nkeys, key, dsize, op, mstate, vstate));
2d21ac55
A
2206}
2207
2208/*ARGSUSED*/
2209static void
2210dtrace_aggregate_min(uint64_t *oval, uint64_t nval, uint64_t arg)
2211{
b0d623f7
A
2212#pragma unused(arg) /* __APPLE__ */
2213 if ((int64_t)nval < (int64_t)*oval)
2d21ac55
A
2214 *oval = nval;
2215}
2216
2217/*ARGSUSED*/
2218static void
2219dtrace_aggregate_max(uint64_t *oval, uint64_t nval, uint64_t arg)
2220{
b0d623f7
A
2221#pragma unused(arg) /* __APPLE__ */
2222 if ((int64_t)nval > (int64_t)*oval)
2d21ac55
A
2223 *oval = nval;
2224}
2225
2226static void
2227dtrace_aggregate_quantize(uint64_t *quanta, uint64_t nval, uint64_t incr)
2228{
2229 int i, zero = DTRACE_QUANTIZE_ZEROBUCKET;
2230 int64_t val = (int64_t)nval;
2231
2232 if (val < 0) {
2233 for (i = 0; i < zero; i++) {
2234 if (val <= DTRACE_QUANTIZE_BUCKETVAL(i)) {
2235 quanta[i] += incr;
2236 return;
2237 }
2238 }
2239 } else {
2240 for (i = zero + 1; i < DTRACE_QUANTIZE_NBUCKETS; i++) {
2241 if (val < DTRACE_QUANTIZE_BUCKETVAL(i)) {
2242 quanta[i - 1] += incr;
2243 return;
2244 }
2245 }
2246
2247 quanta[DTRACE_QUANTIZE_NBUCKETS - 1] += incr;
2248 return;
2249 }
2250
2251 ASSERT(0);
2252}
2253
2254static void
2255dtrace_aggregate_lquantize(uint64_t *lquanta, uint64_t nval, uint64_t incr)
2256{
2257 uint64_t arg = *lquanta++;
2258 int32_t base = DTRACE_LQUANTIZE_BASE(arg);
2259 uint16_t step = DTRACE_LQUANTIZE_STEP(arg);
2260 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(arg);
2261 int32_t val = (int32_t)nval, level;
2262
2263 ASSERT(step != 0);
2264 ASSERT(levels != 0);
2265
2266 if (val < base) {
2267 /*
2268 * This is an underflow.
2269 */
2270 lquanta[0] += incr;
2271 return;
2272 }
2273
2274 level = (val - base) / step;
2275
2276 if (level < levels) {
2277 lquanta[level + 1] += incr;
2278 return;
2279 }
2280
2281 /*
2282 * This is an overflow.
2283 */
2284 lquanta[levels + 1] += incr;
2285}
2286
39236c6e
A
2287static int
2288dtrace_aggregate_llquantize_bucket(int16_t factor, int16_t low, int16_t high,
2289 int16_t nsteps, int64_t value)
2290{
2291 int64_t this = 1, last, next;
2292 int base = 1, order;
2293
2294 for (order = 0; order < low; ++order)
2295 this *= factor;
2296
2297 /*
2298 * If our value is less than our factor taken to the power of the
2299 * low order of magnitude, it goes into the zeroth bucket.
2300 */
2301 if (value < this)
2302 return 0;
2303 else
2304 last = this;
2305
2306 for (this *= factor; order <= high; ++order) {
2307 int nbuckets = this > nsteps ? nsteps : this;
2308
2309 /*
2310 * We should not generally get log/linear quantizations
2311 * with a high magnitude that allows 64-bits to
2312 * overflow, but we nonetheless protect against this
2313 * by explicitly checking for overflow, and clamping
2314 * our value accordingly.
2315 */
2316 next = this * factor;
2317 if (next < this) {
2318 value = this - 1;
2319 }
2320
2321 /*
2322 * If our value lies within this order of magnitude,
2323 * determine its position by taking the offset within
2324 * the order of magnitude, dividing by the bucket
2325 * width, and adding to our (accumulated) base.
2326 */
2327 if (value < this) {
2328 return (base + (value - last) / (this / nbuckets));
2329 }
2330
2331 base += nbuckets - (nbuckets / factor);
2332 last = this;
2333 this = next;
2334 }
2335
2336 /*
2337 * Our value is greater than or equal to our factor taken to the
2338 * power of one plus the high magnitude -- return the top bucket.
2339 */
2340 return base;
2341}
2342
2343static void
2344dtrace_aggregate_llquantize(uint64_t *llquanta, uint64_t nval, uint64_t incr)
2345{
2346 uint64_t arg = *llquanta++;
2347 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(arg);
2348 uint16_t low = DTRACE_LLQUANTIZE_LOW(arg);
2349 uint16_t high = DTRACE_LLQUANTIZE_HIGH(arg);
15129b1c 2350 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(arg);
39236c6e
A
2351
2352 llquanta[dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, nval)] += incr;
2353}
2354
2d21ac55
A
2355/*ARGSUSED*/
2356static void
2357dtrace_aggregate_avg(uint64_t *data, uint64_t nval, uint64_t arg)
2358{
b0d623f7 2359#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
2360 data[0]++;
2361 data[1] += nval;
2362}
2363
2364/*ARGSUSED*/
2365static void
b0d623f7 2366dtrace_aggregate_stddev(uint64_t *data, uint64_t nval, uint64_t arg)
2d21ac55 2367{
b0d623f7
A
2368#pragma unused(arg) /* __APPLE__ */
2369 int64_t snval = (int64_t)nval;
2370 uint64_t tmp[2];
2371
2372 data[0]++;
2373 data[1] += nval;
2374
2375 /*
2376 * What we want to say here is:
2377 *
2378 * data[2] += nval * nval;
2379 *
2380 * But given that nval is 64-bit, we could easily overflow, so
2381 * we do this as 128-bit arithmetic.
2382 */
2383 if (snval < 0)
2384 snval = -snval;
2385
2386 dtrace_multiply_128((uint64_t)snval, (uint64_t)snval, tmp);
2387 dtrace_add_128(data + 2, tmp, data + 2);
2d21ac55
A
2388}
2389
2390/*ARGSUSED*/
2391static void
b0d623f7 2392dtrace_aggregate_count(uint64_t *oval, uint64_t nval, uint64_t arg)
2d21ac55 2393{
b0d623f7
A
2394#pragma unused(nval, arg) /* __APPLE__ */
2395 *oval = *oval + 1;
2396}
2397
2398/*ARGSUSED*/
2399static void
2400dtrace_aggregate_sum(uint64_t *oval, uint64_t nval, uint64_t arg)
2401{
2402#pragma unused(arg) /* __APPLE__ */
2d21ac55
A
2403 *oval += nval;
2404}
2405
2406/*
2407 * Aggregate given the tuple in the principal data buffer, and the aggregating
2408 * action denoted by the specified dtrace_aggregation_t. The aggregation
2409 * buffer is specified as the buf parameter. This routine does not return
2410 * failure; if there is no space in the aggregation buffer, the data will be
2411 * dropped, and a corresponding counter incremented.
2412 */
2413static void
2414dtrace_aggregate(dtrace_aggregation_t *agg, dtrace_buffer_t *dbuf,
2415 intptr_t offset, dtrace_buffer_t *buf, uint64_t expr, uint64_t arg)
2416{
c910b4d9 2417#pragma unused(arg)
2d21ac55
A
2418 dtrace_recdesc_t *rec = &agg->dtag_action.dta_rec;
2419 uint32_t i, ndx, size, fsize;
2420 uint32_t align = sizeof (uint64_t) - 1;
2421 dtrace_aggbuffer_t *agb;
2422 dtrace_aggkey_t *key;
2423 uint32_t hashval = 0, limit, isstr;
2424 caddr_t tomax, data, kdata;
2425 dtrace_actkind_t action;
2426 dtrace_action_t *act;
2427 uintptr_t offs;
2428
2429 if (buf == NULL)
2430 return;
2431
2432 if (!agg->dtag_hasarg) {
2433 /*
2434 * Currently, only quantize() and lquantize() take additional
2435 * arguments, and they have the same semantics: an increment
2436 * value that defaults to 1 when not present. If additional
2437 * aggregating actions take arguments, the setting of the
2438 * default argument value will presumably have to become more
2439 * sophisticated...
2440 */
2441 arg = 1;
2442 }
2443
2444 action = agg->dtag_action.dta_kind - DTRACEACT_AGGREGATION;
2445 size = rec->dtrd_offset - agg->dtag_base;
2446 fsize = size + rec->dtrd_size;
2447
2448 ASSERT(dbuf->dtb_tomax != NULL);
2449 data = dbuf->dtb_tomax + offset + agg->dtag_base;
2450
2451 if ((tomax = buf->dtb_tomax) == NULL) {
2452 dtrace_buffer_drop(buf);
2453 return;
2454 }
2455
2456 /*
2457 * The metastructure is always at the bottom of the buffer.
2458 */
2459 agb = (dtrace_aggbuffer_t *)(tomax + buf->dtb_size -
2460 sizeof (dtrace_aggbuffer_t));
2461
2462 if (buf->dtb_offset == 0) {
2463 /*
2464 * We just kludge up approximately 1/8th of the size to be
2465 * buckets. If this guess ends up being routinely
2466 * off-the-mark, we may need to dynamically readjust this
2467 * based on past performance.
2468 */
2469 uintptr_t hashsize = (buf->dtb_size >> 3) / sizeof (uintptr_t);
2470
2471 if ((uintptr_t)agb - hashsize * sizeof (dtrace_aggkey_t *) <
2472 (uintptr_t)tomax || hashsize == 0) {
2473 /*
2474 * We've been given a ludicrously small buffer;
2475 * increment our drop count and leave.
2476 */
2477 dtrace_buffer_drop(buf);
2478 return;
2479 }
2480
2481 /*
2482 * And now, a pathetic attempt to try to get a an odd (or
2483 * perchance, a prime) hash size for better hash distribution.
2484 */
2485 if (hashsize > (DTRACE_AGGHASHSIZE_SLEW << 3))
2486 hashsize -= DTRACE_AGGHASHSIZE_SLEW;
2487
2488 agb->dtagb_hashsize = hashsize;
2489 agb->dtagb_hash = (dtrace_aggkey_t **)((uintptr_t)agb -
2490 agb->dtagb_hashsize * sizeof (dtrace_aggkey_t *));
2491 agb->dtagb_free = (uintptr_t)agb->dtagb_hash;
2492
2493 for (i = 0; i < agb->dtagb_hashsize; i++)
2494 agb->dtagb_hash[i] = NULL;
2495 }
2496
2497 ASSERT(agg->dtag_first != NULL);
2498 ASSERT(agg->dtag_first->dta_intuple);
2499
2500 /*
2501 * Calculate the hash value based on the key. Note that we _don't_
2502 * include the aggid in the hashing (but we will store it as part of
2503 * the key). The hashing algorithm is Bob Jenkins' "One-at-a-time"
2504 * algorithm: a simple, quick algorithm that has no known funnels, and
2505 * gets good distribution in practice. The efficacy of the hashing
2506 * algorithm (and a comparison with other algorithms) may be found by
2507 * running the ::dtrace_aggstat MDB dcmd.
2508 */
2509 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2510 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2511 limit = i + act->dta_rec.dtrd_size;
2512 ASSERT(limit <= size);
2513 isstr = DTRACEACT_ISSTRING(act);
2514
2515 for (; i < limit; i++) {
2516 hashval += data[i];
2517 hashval += (hashval << 10);
2518 hashval ^= (hashval >> 6);
2519
2520 if (isstr && data[i] == '\0')
2521 break;
2522 }
2523 }
2524
2525 hashval += (hashval << 3);
2526 hashval ^= (hashval >> 11);
2527 hashval += (hashval << 15);
2528
2529 /*
2530 * Yes, the divide here is expensive -- but it's generally the least
2531 * of the performance issues given the amount of data that we iterate
2532 * over to compute hash values, compare data, etc.
2533 */
2534 ndx = hashval % agb->dtagb_hashsize;
2535
2536 for (key = agb->dtagb_hash[ndx]; key != NULL; key = key->dtak_next) {
2537 ASSERT((caddr_t)key >= tomax);
2538 ASSERT((caddr_t)key < tomax + buf->dtb_size);
2539
2540 if (hashval != key->dtak_hashval || key->dtak_size != size)
2541 continue;
2542
2543 kdata = key->dtak_data;
2544 ASSERT(kdata >= tomax && kdata < tomax + buf->dtb_size);
2545
2546 for (act = agg->dtag_first; act->dta_intuple;
2547 act = act->dta_next) {
2548 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2549 limit = i + act->dta_rec.dtrd_size;
2550 ASSERT(limit <= size);
2551 isstr = DTRACEACT_ISSTRING(act);
2552
2553 for (; i < limit; i++) {
2554 if (kdata[i] != data[i])
2555 goto next;
2556
2557 if (isstr && data[i] == '\0')
2558 break;
2559 }
2560 }
2561
2562 if (action != key->dtak_action) {
2563 /*
2564 * We are aggregating on the same value in the same
2565 * aggregation with two different aggregating actions.
2566 * (This should have been picked up in the compiler,
2567 * so we may be dealing with errant or devious DIF.)
2568 * This is an error condition; we indicate as much,
2569 * and return.
2570 */
2571 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2572 return;
2573 }
2574
2575 /*
2576 * This is a hit: we need to apply the aggregator to
2577 * the value at this key.
2578 */
2579 agg->dtag_aggregate((uint64_t *)(kdata + size), expr, arg);
2580 return;
2581next:
2582 continue;
2583 }
2584
2585 /*
2586 * We didn't find it. We need to allocate some zero-filled space,
2587 * link it into the hash table appropriately, and apply the aggregator
2588 * to the (zero-filled) value.
2589 */
2590 offs = buf->dtb_offset;
2591 while (offs & (align - 1))
2592 offs += sizeof (uint32_t);
2593
2594 /*
2595 * If we don't have enough room to both allocate a new key _and_
2596 * its associated data, increment the drop count and return.
2597 */
2598 if ((uintptr_t)tomax + offs + fsize >
2599 agb->dtagb_free - sizeof (dtrace_aggkey_t)) {
2600 dtrace_buffer_drop(buf);
2601 return;
2602 }
2603
2604 /*CONSTCOND*/
2605 ASSERT(!(sizeof (dtrace_aggkey_t) & (sizeof (uintptr_t) - 1)));
2606 key = (dtrace_aggkey_t *)(agb->dtagb_free - sizeof (dtrace_aggkey_t));
2607 agb->dtagb_free -= sizeof (dtrace_aggkey_t);
2608
2609 key->dtak_data = kdata = tomax + offs;
2610 buf->dtb_offset = offs + fsize;
2611
2612 /*
2613 * Now copy the data across.
2614 */
2615 *((dtrace_aggid_t *)kdata) = agg->dtag_id;
2616
2617 for (i = sizeof (dtrace_aggid_t); i < size; i++)
2618 kdata[i] = data[i];
2619
2620 /*
2621 * Because strings are not zeroed out by default, we need to iterate
2622 * looking for actions that store strings, and we need to explicitly
2623 * pad these strings out with zeroes.
2624 */
2625 for (act = agg->dtag_first; act->dta_intuple; act = act->dta_next) {
2626 int nul;
2627
2628 if (!DTRACEACT_ISSTRING(act))
2629 continue;
2630
2631 i = act->dta_rec.dtrd_offset - agg->dtag_base;
2632 limit = i + act->dta_rec.dtrd_size;
2633 ASSERT(limit <= size);
2634
2635 for (nul = 0; i < limit; i++) {
2636 if (nul) {
2637 kdata[i] = '\0';
2638 continue;
2639 }
2640
2641 if (data[i] != '\0')
2642 continue;
2643
2644 nul = 1;
2645 }
2646 }
2647
2648 for (i = size; i < fsize; i++)
2649 kdata[i] = 0;
2650
2651 key->dtak_hashval = hashval;
2652 key->dtak_size = size;
2653 key->dtak_action = action;
2654 key->dtak_next = agb->dtagb_hash[ndx];
2655 agb->dtagb_hash[ndx] = key;
2656
2657 /*
2658 * Finally, apply the aggregator.
2659 */
2660 *((uint64_t *)(key->dtak_data + size)) = agg->dtag_initial;
2661 agg->dtag_aggregate((uint64_t *)(key->dtak_data + size), expr, arg);
2662}
2663
2664/*
2665 * Given consumer state, this routine finds a speculation in the INACTIVE
2666 * state and transitions it into the ACTIVE state. If there is no speculation
2667 * in the INACTIVE state, 0 is returned. In this case, no error counter is
2668 * incremented -- it is up to the caller to take appropriate action.
2669 */
2670static int
2671dtrace_speculation(dtrace_state_t *state)
2672{
2673 int i = 0;
2674 dtrace_speculation_state_t current;
2675 uint32_t *stat = &state->dts_speculations_unavail, count;
2676
2677 while (i < state->dts_nspeculations) {
2678 dtrace_speculation_t *spec = &state->dts_speculations[i];
2679
2680 current = spec->dtsp_state;
2681
2682 if (current != DTRACESPEC_INACTIVE) {
2683 if (current == DTRACESPEC_COMMITTINGMANY ||
2684 current == DTRACESPEC_COMMITTING ||
2685 current == DTRACESPEC_DISCARDING)
2686 stat = &state->dts_speculations_busy;
2687 i++;
2688 continue;
2689 }
2690
2691 if (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2692 current, DTRACESPEC_ACTIVE) == current)
2693 return (i + 1);
2694 }
2695
2696 /*
2697 * We couldn't find a speculation. If we found as much as a single
2698 * busy speculation buffer, we'll attribute this failure as "busy"
2699 * instead of "unavail".
2700 */
2701 do {
2702 count = *stat;
2703 } while (dtrace_cas32(stat, count, count + 1) != count);
2704
2705 return (0);
2706}
2707
2708/*
2709 * This routine commits an active speculation. If the specified speculation
2710 * is not in a valid state to perform a commit(), this routine will silently do
2711 * nothing. The state of the specified speculation is transitioned according
2712 * to the state transition diagram outlined in <sys/dtrace_impl.h>
2713 */
2714static void
2715dtrace_speculation_commit(dtrace_state_t *state, processorid_t cpu,
2716 dtrace_specid_t which)
2717{
2718 dtrace_speculation_t *spec;
2719 dtrace_buffer_t *src, *dest;
04b8595b 2720 uintptr_t daddr, saddr, dlimit, slimit;
b0d623f7 2721 dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE;
2d21ac55 2722 intptr_t offs;
04b8595b 2723 uint64_t timestamp;
2d21ac55
A
2724
2725 if (which == 0)
2726 return;
2727
b0d623f7
A
2728 if (which > (dtrace_specid_t)state->dts_nspeculations) {
2729 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2730 return;
2731 }
b0d623f7 2732
2d21ac55
A
2733 spec = &state->dts_speculations[which - 1];
2734 src = &spec->dtsp_buffer[cpu];
2735 dest = &state->dts_buffer[cpu];
2736
2737 do {
2738 current = spec->dtsp_state;
2739
2740 if (current == DTRACESPEC_COMMITTINGMANY)
2741 break;
2742
2743 switch (current) {
2744 case DTRACESPEC_INACTIVE:
2745 case DTRACESPEC_DISCARDING:
2746 return;
2747
2748 case DTRACESPEC_COMMITTING:
2749 /*
2750 * This is only possible if we are (a) commit()'ing
2751 * without having done a prior speculate() on this CPU
2752 * and (b) racing with another commit() on a different
2753 * CPU. There's nothing to do -- we just assert that
2754 * our offset is 0.
2755 */
2756 ASSERT(src->dtb_offset == 0);
2757 return;
2758
2759 case DTRACESPEC_ACTIVE:
2760 new = DTRACESPEC_COMMITTING;
2761 break;
2762
2763 case DTRACESPEC_ACTIVEONE:
2764 /*
2765 * This speculation is active on one CPU. If our
2766 * buffer offset is non-zero, we know that the one CPU
2767 * must be us. Otherwise, we are committing on a
2768 * different CPU from the speculate(), and we must
2769 * rely on being asynchronously cleaned.
2770 */
2771 if (src->dtb_offset != 0) {
2772 new = DTRACESPEC_COMMITTING;
2773 break;
2774 }
2775 /*FALLTHROUGH*/
2776
2777 case DTRACESPEC_ACTIVEMANY:
2778 new = DTRACESPEC_COMMITTINGMANY;
2779 break;
2780
2781 default:
2782 ASSERT(0);
2783 }
2784 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2785 current, new) != current);
2786
2787 /*
2788 * We have set the state to indicate that we are committing this
2789 * speculation. Now reserve the necessary space in the destination
2790 * buffer.
2791 */
2792 if ((offs = dtrace_buffer_reserve(dest, src->dtb_offset,
2793 sizeof (uint64_t), state, NULL)) < 0) {
2794 dtrace_buffer_drop(dest);
2795 goto out;
2796 }
2797
2798 /*
04b8595b
A
2799 * We have sufficient space to copy the speculative buffer into the
2800 * primary buffer. First, modify the speculative buffer, filling
2801 * in the timestamp of all entries with the current time. The data
2802 * must have the commit() time rather than the time it was traced,
2803 * so that all entries in the primary buffer are in timestamp order.
2804 */
2805 timestamp = dtrace_gethrtime();
2806 saddr = (uintptr_t)src->dtb_tomax;
2807 slimit = saddr + src->dtb_offset;
2808 while (saddr < slimit) {
2809 size_t size;
2810 dtrace_rechdr_t *dtrh = (dtrace_rechdr_t *)saddr;
2811
2812 if (dtrh->dtrh_epid == DTRACE_EPIDNONE) {
2813 saddr += sizeof (dtrace_epid_t);
2814 continue;
2815 }
2816
2817 ASSERT(dtrh->dtrh_epid <= ((dtrace_epid_t) state->dts_necbs));
2818 size = state->dts_ecbs[dtrh->dtrh_epid - 1]->dte_size;
2819
2820 ASSERT(saddr + size <= slimit);
2821 ASSERT(size >= sizeof(dtrace_rechdr_t));
2822 ASSERT(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh) == UINT64_MAX);
2823
2824 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, timestamp);
2825
2826 saddr += size;
2827 }
2828
2829 /*
2830 * Copy the buffer across. (Note that this is a
2d21ac55
A
2831 * highly subobtimal bcopy(); in the unlikely event that this becomes
2832 * a serious performance issue, a high-performance DTrace-specific
2833 * bcopy() should obviously be invented.)
2834 */
2835 daddr = (uintptr_t)dest->dtb_tomax + offs;
2836 dlimit = daddr + src->dtb_offset;
2837 saddr = (uintptr_t)src->dtb_tomax;
2838
2839 /*
2840 * First, the aligned portion.
2841 */
2842 while (dlimit - daddr >= sizeof (uint64_t)) {
2843 *((uint64_t *)daddr) = *((uint64_t *)saddr);
2844
2845 daddr += sizeof (uint64_t);
2846 saddr += sizeof (uint64_t);
2847 }
2848
2849 /*
2850 * Now any left-over bit...
2851 */
2852 while (dlimit - daddr)
2853 *((uint8_t *)daddr++) = *((uint8_t *)saddr++);
2854
2855 /*
2856 * Finally, commit the reserved space in the destination buffer.
2857 */
2858 dest->dtb_offset = offs + src->dtb_offset;
2859
2860out:
2861 /*
2862 * If we're lucky enough to be the only active CPU on this speculation
2863 * buffer, we can just set the state back to DTRACESPEC_INACTIVE.
2864 */
2865 if (current == DTRACESPEC_ACTIVE ||
2866 (current == DTRACESPEC_ACTIVEONE && new == DTRACESPEC_COMMITTING)) {
2867 uint32_t rval = dtrace_cas32((uint32_t *)&spec->dtsp_state,
2868 DTRACESPEC_COMMITTING, DTRACESPEC_INACTIVE);
b0d623f7 2869#pragma unused(rval) /* __APPLE__ */
2d21ac55
A
2870
2871 ASSERT(rval == DTRACESPEC_COMMITTING);
2872 }
2873
2874 src->dtb_offset = 0;
2875 src->dtb_xamot_drops += src->dtb_drops;
2876 src->dtb_drops = 0;
2877}
2878
2879/*
2880 * This routine discards an active speculation. If the specified speculation
2881 * is not in a valid state to perform a discard(), this routine will silently
2882 * do nothing. The state of the specified speculation is transitioned
2883 * according to the state transition diagram outlined in <sys/dtrace_impl.h>
2884 */
2885static void
2886dtrace_speculation_discard(dtrace_state_t *state, processorid_t cpu,
2887 dtrace_specid_t which)
2888{
2889 dtrace_speculation_t *spec;
b0d623f7 2890 dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE;
2d21ac55
A
2891 dtrace_buffer_t *buf;
2892
2893 if (which == 0)
2894 return;
2895
b0d623f7
A
2896 if (which > (dtrace_specid_t)state->dts_nspeculations) {
2897 cpu_core[cpu].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
2898 return;
2899 }
2d21ac55
A
2900
2901 spec = &state->dts_speculations[which - 1];
2902 buf = &spec->dtsp_buffer[cpu];
2903
2904 do {
2905 current = spec->dtsp_state;
2906
2907 switch (current) {
2908 case DTRACESPEC_INACTIVE:
2909 case DTRACESPEC_COMMITTINGMANY:
2910 case DTRACESPEC_COMMITTING:
2911 case DTRACESPEC_DISCARDING:
2912 return;
2913
2914 case DTRACESPEC_ACTIVE:
2915 case DTRACESPEC_ACTIVEMANY:
2916 new = DTRACESPEC_DISCARDING;
2917 break;
2918
2919 case DTRACESPEC_ACTIVEONE:
2920 if (buf->dtb_offset != 0) {
2921 new = DTRACESPEC_INACTIVE;
2922 } else {
2923 new = DTRACESPEC_DISCARDING;
2924 }
2925 break;
2926
2927 default:
2928 ASSERT(0);
2929 }
2930 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
2931 current, new) != current);
2932
2933 buf->dtb_offset = 0;
2934 buf->dtb_drops = 0;
2935}
2936
2937/*
2938 * Note: not called from probe context. This function is called
2939 * asynchronously from cross call context to clean any speculations that are
2940 * in the COMMITTINGMANY or DISCARDING states. These speculations may not be
2941 * transitioned back to the INACTIVE state until all CPUs have cleaned the
2942 * speculation.
2943 */
2944static void
2945dtrace_speculation_clean_here(dtrace_state_t *state)
2946{
2947 dtrace_icookie_t cookie;
2948 processorid_t cpu = CPU->cpu_id;
2949 dtrace_buffer_t *dest = &state->dts_buffer[cpu];
2950 dtrace_specid_t i;
2951
2952 cookie = dtrace_interrupt_disable();
2953
2954 if (dest->dtb_tomax == NULL) {
2955 dtrace_interrupt_enable(cookie);
2956 return;
2957 }
2958
b0d623f7 2959 for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) {
2d21ac55
A
2960 dtrace_speculation_t *spec = &state->dts_speculations[i];
2961 dtrace_buffer_t *src = &spec->dtsp_buffer[cpu];
2962
2963 if (src->dtb_tomax == NULL)
2964 continue;
2965
2966 if (spec->dtsp_state == DTRACESPEC_DISCARDING) {
2967 src->dtb_offset = 0;
2968 continue;
2969 }
2970
2971 if (spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
2972 continue;
2973
2974 if (src->dtb_offset == 0)
2975 continue;
2976
2977 dtrace_speculation_commit(state, cpu, i + 1);
2978 }
2979
2980 dtrace_interrupt_enable(cookie);
2981}
2982
2983/*
2984 * Note: not called from probe context. This function is called
2985 * asynchronously (and at a regular interval) to clean any speculations that
2986 * are in the COMMITTINGMANY or DISCARDING states. If it discovers that there
2987 * is work to be done, it cross calls all CPUs to perform that work;
2988 * COMMITMANY and DISCARDING speculations may not be transitioned back to the
2989 * INACTIVE state until they have been cleaned by all CPUs.
2990 */
2991static void
2992dtrace_speculation_clean(dtrace_state_t *state)
2993{
b0d623f7
A
2994 int work = 0;
2995 uint32_t rv;
2d21ac55
A
2996 dtrace_specid_t i;
2997
b0d623f7 2998 for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) {
2d21ac55
A
2999 dtrace_speculation_t *spec = &state->dts_speculations[i];
3000
3001 ASSERT(!spec->dtsp_cleaning);
3002
3003 if (spec->dtsp_state != DTRACESPEC_DISCARDING &&
3004 spec->dtsp_state != DTRACESPEC_COMMITTINGMANY)
3005 continue;
3006
3007 work++;
3008 spec->dtsp_cleaning = 1;
3009 }
3010
3011 if (!work)
3012 return;
3013
3014 dtrace_xcall(DTRACE_CPUALL,
3015 (dtrace_xcall_t)dtrace_speculation_clean_here, state);
3016
3017 /*
3018 * We now know that all CPUs have committed or discarded their
3019 * speculation buffers, as appropriate. We can now set the state
3020 * to inactive.
3021 */
b0d623f7 3022 for (i = 0; i < (dtrace_specid_t)state->dts_nspeculations; i++) {
2d21ac55
A
3023 dtrace_speculation_t *spec = &state->dts_speculations[i];
3024 dtrace_speculation_state_t current, new;
3025
3026 if (!spec->dtsp_cleaning)
3027 continue;
3028
3029 current = spec->dtsp_state;
3030 ASSERT(current == DTRACESPEC_DISCARDING ||
3031 current == DTRACESPEC_COMMITTINGMANY);
3032
3033 new = DTRACESPEC_INACTIVE;
3034
3035 rv = dtrace_cas32((uint32_t *)&spec->dtsp_state, current, new);
3036 ASSERT(rv == current);
3037 spec->dtsp_cleaning = 0;
3038 }
3039}
3040
3041/*
3042 * Called as part of a speculate() to get the speculative buffer associated
3043 * with a given speculation. Returns NULL if the specified speculation is not
3044 * in an ACTIVE state. If the speculation is in the ACTIVEONE state -- and
3045 * the active CPU is not the specified CPU -- the speculation will be
3046 * atomically transitioned into the ACTIVEMANY state.
3047 */
3048static dtrace_buffer_t *
3049dtrace_speculation_buffer(dtrace_state_t *state, processorid_t cpuid,
3050 dtrace_specid_t which)
3051{
3052 dtrace_speculation_t *spec;
b0d623f7 3053 dtrace_speculation_state_t current, new = DTRACESPEC_INACTIVE;
2d21ac55
A
3054 dtrace_buffer_t *buf;
3055
3056 if (which == 0)
3057 return (NULL);
3058
b0d623f7 3059 if (which > (dtrace_specid_t)state->dts_nspeculations) {
2d21ac55
A
3060 cpu_core[cpuid].cpuc_dtrace_flags |= CPU_DTRACE_ILLOP;
3061 return (NULL);
3062 }
3063
3064 spec = &state->dts_speculations[which - 1];
3065 buf = &spec->dtsp_buffer[cpuid];
3066
3067 do {
3068 current = spec->dtsp_state;
3069
3070 switch (current) {
3071 case DTRACESPEC_INACTIVE:
3072 case DTRACESPEC_COMMITTINGMANY:
3073 case DTRACESPEC_DISCARDING:
3074 return (NULL);
3075
3076 case DTRACESPEC_COMMITTING:
3077 ASSERT(buf->dtb_offset == 0);
3078 return (NULL);
3079
3080 case DTRACESPEC_ACTIVEONE:
3081 /*
3082 * This speculation is currently active on one CPU.
3083 * Check the offset in the buffer; if it's non-zero,
3084 * that CPU must be us (and we leave the state alone).
3085 * If it's zero, assume that we're starting on a new
3086 * CPU -- and change the state to indicate that the
3087 * speculation is active on more than one CPU.
3088 */
3089 if (buf->dtb_offset != 0)
3090 return (buf);
3091
3092 new = DTRACESPEC_ACTIVEMANY;
3093 break;
3094
3095 case DTRACESPEC_ACTIVEMANY:
3096 return (buf);
3097
3098 case DTRACESPEC_ACTIVE:
3099 new = DTRACESPEC_ACTIVEONE;
3100 break;
3101
3102 default:
3103 ASSERT(0);
3104 }
3105 } while (dtrace_cas32((uint32_t *)&spec->dtsp_state,
3106 current, new) != current);
3107
3108 ASSERT(new == DTRACESPEC_ACTIVEONE || new == DTRACESPEC_ACTIVEMANY);
3109 return (buf);
3110}
3111
b0d623f7
A
3112/*
3113 * Return a string. In the event that the user lacks the privilege to access
3114 * arbitrary kernel memory, we copy the string out to scratch memory so that we
3115 * don't fail access checking.
3116 *
3117 * dtrace_dif_variable() uses this routine as a helper for various
3118 * builtin values such as 'execname' and 'probefunc.'
3119 */
b0d623f7 3120static
b0d623f7
A
3121uintptr_t
3122dtrace_dif_varstr(uintptr_t addr, dtrace_state_t *state,
3123 dtrace_mstate_t *mstate)
3124{
3125 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3126 uintptr_t ret;
3127 size_t strsz;
3128
3129 /*
3130 * The easy case: this probe is allowed to read all of memory, so
3131 * we can just return this as a vanilla pointer.
3132 */
3133 if ((mstate->dtms_access & DTRACE_ACCESS_KERNEL) != 0)
3134 return (addr);
3135
3136 /*
3137 * This is the tougher case: we copy the string in question from
3138 * kernel memory into scratch memory and return it that way: this
3139 * ensures that we won't trip up when access checking tests the
3140 * BYREF return value.
3141 */
3142 strsz = dtrace_strlen((char *)addr, size) + 1;
3143
3144 if (mstate->dtms_scratch_ptr + strsz >
3145 mstate->dtms_scratch_base + mstate->dtms_scratch_size) {
3146 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 3147 return (0);
b0d623f7
A
3148 }
3149
3150 dtrace_strcpy((const void *)addr, (void *)mstate->dtms_scratch_ptr,
3151 strsz);
3152 ret = mstate->dtms_scratch_ptr;
3153 mstate->dtms_scratch_ptr += strsz;
3154 return (ret);
3155}
3156
2d21ac55
A
3157/*
3158 * This function implements the DIF emulator's variable lookups. The emulator
3159 * passes a reserved variable identifier and optional built-in array index.
3160 */
3161static uint64_t
3162dtrace_dif_variable(dtrace_mstate_t *mstate, dtrace_state_t *state, uint64_t v,
3163 uint64_t ndx)
3164{
3165 /*
3166 * If we're accessing one of the uncached arguments, we'll turn this
3167 * into a reference in the args array.
3168 */
3169 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9) {
3170 ndx = v - DIF_VAR_ARG0;
3171 v = DIF_VAR_ARGS;
3172 }
3173
3174 switch (v) {
3175 case DIF_VAR_ARGS:
3176 ASSERT(mstate->dtms_present & DTRACE_MSTATE_ARGS);
3177 if (ndx >= sizeof (mstate->dtms_arg) /
3178 sizeof (mstate->dtms_arg[0])) {
fe8ab488
A
3179 /*
3180 * APPLE NOTE: Account for introduction of __dtrace_probe()
3181 */
2d21ac55 3182 int aframes = mstate->dtms_probe->dtpr_aframes + 3;
2d21ac55
A
3183 dtrace_provider_t *pv;
3184 uint64_t val;
3185
3186 pv = mstate->dtms_probe->dtpr_provider;
3187 if (pv->dtpv_pops.dtps_getargval != NULL)
3188 val = pv->dtpv_pops.dtps_getargval(pv->dtpv_arg,
3189 mstate->dtms_probe->dtpr_id,
3190 mstate->dtms_probe->dtpr_arg, ndx, aframes);
b0d623f7 3191 /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */
2d21ac55 3192 else if (mstate->dtms_probe->dtpr_id == dtrace_probeid_error && ndx == 5) {
b0d623f7 3193 return ((dtrace_state_t *)(uintptr_t)(mstate->dtms_arg[0]))->dts_arg_error_illval;
2d21ac55 3194 }
fe8ab488 3195
2d21ac55
A
3196 else
3197 val = dtrace_getarg(ndx, aframes);
3198
3199 /*
3200 * This is regrettably required to keep the compiler
3201 * from tail-optimizing the call to dtrace_getarg().
3202 * The condition always evaluates to true, but the
3203 * compiler has no way of figuring that out a priori.
3204 * (None of this would be necessary if the compiler
3205 * could be relied upon to _always_ tail-optimize
3206 * the call to dtrace_getarg() -- but it can't.)
3207 */
3208 if (mstate->dtms_probe != NULL)
3209 return (val);
3210
3211 ASSERT(0);
3212 }
3213
3214 return (mstate->dtms_arg[ndx]);
3215
2d21ac55
A
3216 case DIF_VAR_UREGS: {
3217 thread_t thread;
3218
3219 if (!dtrace_priv_proc(state))
3220 return (0);
3221
3222 if ((thread = current_thread()) == NULL) {
3223 DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR);
3224 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = 0;
3225 return (0);
3226 }
3227
3228 return (dtrace_getreg(find_user_regs(thread), ndx));
3229 }
2d21ac55 3230
fe8ab488 3231
2d21ac55
A
3232 case DIF_VAR_CURTHREAD:
3233 if (!dtrace_priv_kernel(state))
3234 return (0);
3235
3236 return ((uint64_t)(uintptr_t)current_thread());
2d21ac55
A
3237
3238 case DIF_VAR_TIMESTAMP:
3239 if (!(mstate->dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
3240 mstate->dtms_timestamp = dtrace_gethrtime();
3241 mstate->dtms_present |= DTRACE_MSTATE_TIMESTAMP;
3242 }
3243 return (mstate->dtms_timestamp);
3244
2d21ac55
A
3245 case DIF_VAR_VTIMESTAMP:
3246 ASSERT(dtrace_vtime_references != 0);
3247 return (dtrace_get_thread_vtime(current_thread()));
2d21ac55
A
3248
3249 case DIF_VAR_WALLTIMESTAMP:
3250 if (!(mstate->dtms_present & DTRACE_MSTATE_WALLTIMESTAMP)) {
3251 mstate->dtms_walltimestamp = dtrace_gethrestime();
3252 mstate->dtms_present |= DTRACE_MSTATE_WALLTIMESTAMP;
3253 }
3254 return (mstate->dtms_walltimestamp);
3255
fe8ab488
A
3256 case DIF_VAR_MACHTIMESTAMP:
3257 if (!(mstate->dtms_present & DTRACE_MSTATE_MACHTIMESTAMP)) {
3258 mstate->dtms_machtimestamp = mach_absolute_time();
3259 mstate->dtms_present |= DTRACE_MSTATE_MACHTIMESTAMP;
3260 }
3261 return (mstate->dtms_machtimestamp);
3262
3e170ce0
A
3263 case DIF_VAR_CPU:
3264 return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread()));
3265
2d21ac55
A
3266 case DIF_VAR_IPL:
3267 if (!dtrace_priv_kernel(state))
3268 return (0);
3269 if (!(mstate->dtms_present & DTRACE_MSTATE_IPL)) {
3270 mstate->dtms_ipl = dtrace_getipl();
3271 mstate->dtms_present |= DTRACE_MSTATE_IPL;
3272 }
3273 return (mstate->dtms_ipl);
3274
3275 case DIF_VAR_EPID:
3276 ASSERT(mstate->dtms_present & DTRACE_MSTATE_EPID);
3277 return (mstate->dtms_epid);
3278
3279 case DIF_VAR_ID:
3280 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
3281 return (mstate->dtms_probe->dtpr_id);
3282
3283 case DIF_VAR_STACKDEPTH:
3284 if (!dtrace_priv_kernel(state))
3285 return (0);
3286 if (!(mstate->dtms_present & DTRACE_MSTATE_STACKDEPTH)) {
fe8ab488
A
3287 /*
3288 * APPLE NOTE: Account for introduction of __dtrace_probe()
3289 */
2d21ac55 3290 int aframes = mstate->dtms_probe->dtpr_aframes + 3;
2d21ac55
A
3291
3292 mstate->dtms_stackdepth = dtrace_getstackdepth(aframes);
3293 mstate->dtms_present |= DTRACE_MSTATE_STACKDEPTH;
3294 }
3295 return (mstate->dtms_stackdepth);
3296
3297 case DIF_VAR_USTACKDEPTH:
3298 if (!dtrace_priv_proc(state))
3299 return (0);
3300 if (!(mstate->dtms_present & DTRACE_MSTATE_USTACKDEPTH)) {
3301 /*
3302 * See comment in DIF_VAR_PID.
3303 */
3304 if (DTRACE_ANCHORED(mstate->dtms_probe) &&
3305 CPU_ON_INTR(CPU)) {
3306 mstate->dtms_ustackdepth = 0;
3307 } else {
3308 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
3309 mstate->dtms_ustackdepth =
3310 dtrace_getustackdepth();
3311 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3312 }
3313 mstate->dtms_present |= DTRACE_MSTATE_USTACKDEPTH;
3314 }
3315 return (mstate->dtms_ustackdepth);
3316
3317 case DIF_VAR_CALLER:
3318 if (!dtrace_priv_kernel(state))
3319 return (0);
3320 if (!(mstate->dtms_present & DTRACE_MSTATE_CALLER)) {
fe8ab488
A
3321 /*
3322 * APPLE NOTE: Account for introduction of __dtrace_probe()
3323 */
2d21ac55 3324 int aframes = mstate->dtms_probe->dtpr_aframes + 3;
2d21ac55
A
3325
3326 if (!DTRACE_ANCHORED(mstate->dtms_probe)) {
3327 /*
3328 * If this is an unanchored probe, we are
3329 * required to go through the slow path:
3330 * dtrace_caller() only guarantees correct
3331 * results for anchored probes.
3332 */
3333 pc_t caller[2];
3334
3335 dtrace_getpcstack(caller, 2, aframes,
3336 (uint32_t *)(uintptr_t)mstate->dtms_arg[0]);
3337 mstate->dtms_caller = caller[1];
3338 } else if ((mstate->dtms_caller =
fe8ab488 3339 dtrace_caller(aframes)) == (uintptr_t)-1) {
2d21ac55
A
3340 /*
3341 * We have failed to do this the quick way;
3342 * we must resort to the slower approach of
3343 * calling dtrace_getpcstack().
3344 */
3345 pc_t caller;
3346
3347 dtrace_getpcstack(&caller, 1, aframes, NULL);
3348 mstate->dtms_caller = caller;
3349 }
3350
3351 mstate->dtms_present |= DTRACE_MSTATE_CALLER;
3352 }
3353 return (mstate->dtms_caller);
3354
3355 case DIF_VAR_UCALLER:
3356 if (!dtrace_priv_proc(state))
3357 return (0);
3358
3359 if (!(mstate->dtms_present & DTRACE_MSTATE_UCALLER)) {
3360 uint64_t ustack[3];
3361
3362 /*
3363 * dtrace_getupcstack() fills in the first uint64_t
3364 * with the current PID. The second uint64_t will
3365 * be the program counter at user-level. The third
3366 * uint64_t will contain the caller, which is what
3367 * we're after.
3368 */
fe8ab488 3369 ustack[2] = 0;
b0d623f7 3370 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
2d21ac55 3371 dtrace_getupcstack(ustack, 3);
b0d623f7 3372 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
2d21ac55
A
3373 mstate->dtms_ucaller = ustack[2];
3374 mstate->dtms_present |= DTRACE_MSTATE_UCALLER;
3375 }
3376
3377 return (mstate->dtms_ucaller);
3378
3379 case DIF_VAR_PROBEPROV:
3380 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3381 return (dtrace_dif_varstr(
3382 (uintptr_t)mstate->dtms_probe->dtpr_provider->dtpv_name,
3383 state, mstate));
2d21ac55
A
3384
3385 case DIF_VAR_PROBEMOD:
3386 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3387 return (dtrace_dif_varstr(
3388 (uintptr_t)mstate->dtms_probe->dtpr_mod,
3389 state, mstate));
2d21ac55
A
3390
3391 case DIF_VAR_PROBEFUNC:
3392 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3393 return (dtrace_dif_varstr(
3394 (uintptr_t)mstate->dtms_probe->dtpr_func,
3395 state, mstate));
2d21ac55
A
3396
3397 case DIF_VAR_PROBENAME:
3398 ASSERT(mstate->dtms_present & DTRACE_MSTATE_PROBE);
b0d623f7
A
3399 return (dtrace_dif_varstr(
3400 (uintptr_t)mstate->dtms_probe->dtpr_name,
3401 state, mstate));
2d21ac55 3402
2d21ac55 3403 case DIF_VAR_PID:
935ed37a 3404 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3405 return (0);
3406
3407 /*
3408 * Note that we are assuming that an unanchored probe is
3409 * always due to a high-level interrupt. (And we're assuming
3410 * that there is only a single high level interrupt.)
3411 */
3412 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3413 /* Anchored probe that fires while on an interrupt accrues to process 0 */
3414 return 0;
3415
39236c6e 3416 return ((uint64_t)dtrace_proc_selfpid());
2d21ac55 3417
2d21ac55 3418 case DIF_VAR_PPID:
935ed37a 3419 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3420 return (0);
3421
3422 /*
3423 * See comment in DIF_VAR_PID.
3424 */
3425 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3426 return (0);
3427
39236c6e 3428 return ((uint64_t)dtrace_proc_selfppid());
2d21ac55 3429
2d21ac55 3430 case DIF_VAR_TID:
b0d623f7
A
3431 /* We do not need to check for null current_thread() */
3432 return thread_tid(current_thread()); /* globally unique */
3433
3434 case DIF_VAR_PTHREAD_SELF:
3435 if (!dtrace_priv_proc(state))
3436 return (0);
3437
3438 /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */
3439 return 0;
3440
3441 case DIF_VAR_DISPATCHQADDR:
3442 if (!dtrace_priv_proc(state))
2d21ac55
A
3443 return (0);
3444
b0d623f7
A
3445 /* We do not need to check for null current_thread() */
3446 return thread_dispatchqaddr(current_thread());
2d21ac55 3447
2d21ac55
A
3448 case DIF_VAR_EXECNAME:
3449 {
3450 char *xname = (char *)mstate->dtms_scratch_ptr;
3451 size_t scratch_size = MAXCOMLEN+1;
3452
3453 /* The scratch allocation's lifetime is that of the clause. */
b0d623f7
A
3454 if (!DTRACE_INSCRATCH(mstate, scratch_size)) {
3455 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
2d21ac55 3456 return 0;
b0d623f7 3457 }
2d21ac55 3458
935ed37a 3459 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3460 return (0);
3461
3462 mstate->dtms_scratch_ptr += scratch_size;
3e170ce0 3463 proc_selfname( xname, scratch_size );
2d21ac55
A
3464
3465 return ((uint64_t)(uintptr_t)xname);
3466 }
2d21ac55 3467
2d21ac55 3468
2d21ac55 3469 case DIF_VAR_ZONENAME:
39236c6e
A
3470 {
3471 /* scratch_size is equal to length('global') + 1 for the null-terminator. */
3472 char *zname = (char *)mstate->dtms_scratch_ptr;
3473 size_t scratch_size = 6 + 1;
3474
2d21ac55
A
3475 if (!dtrace_priv_proc(state))
3476 return (0);
39236c6e
A
3477
3478 /* The scratch allocation's lifetime is that of the clause. */
3479 if (!DTRACE_INSCRATCH(mstate, scratch_size)) {
3480 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
3481 return 0;
3482 }
3483
3484 mstate->dtms_scratch_ptr += scratch_size;
3485
3486 /* The kernel does not provide zonename, it will always return 'global'. */
3487 strlcpy(zname, "global", scratch_size);
3488
3489 return ((uint64_t)(uintptr_t)zname);
3490 }
2d21ac55 3491
2d21ac55 3492 case DIF_VAR_UID:
39236c6e 3493 if (!dtrace_priv_proc_relaxed(state))
2d21ac55
A
3494 return (0);
3495
3496 /*
3497 * See comment in DIF_VAR_PID.
3498 */
3499 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3500 return (0);
3501
39236c6e 3502 return ((uint64_t) dtrace_proc_selfruid());
2d21ac55 3503
2d21ac55
A
3504 case DIF_VAR_GID:
3505 if (!dtrace_priv_proc(state))
3506 return (0);
3507
3508 /*
3509 * See comment in DIF_VAR_PID.
3510 */
3511 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3512 return (0);
3513
3514 if (dtrace_CRED() != NULL)
b0d623f7 3515 /* Credential does not require lazy initialization. */
2d21ac55 3516 return ((uint64_t)kauth_getgid());
b0d623f7
A
3517 else {
3518 /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */
3519 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3520 return -1ULL;
3521 }
2d21ac55 3522
2d21ac55
A
3523 case DIF_VAR_ERRNO: {
3524 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
3525 if (!dtrace_priv_proc(state))
3526 return (0);
3527
3528 /*
3529 * See comment in DIF_VAR_PID.
3530 */
3531 if (DTRACE_ANCHORED(mstate->dtms_probe) && CPU_ON_INTR(CPU))
3532 return (0);
3533
b0d623f7
A
3534 if (uthread)
3535 return (uint64_t)uthread->t_dtrace_errno;
3536 else {
3537 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3538 return -1ULL;
3539 }
2d21ac55 3540 }
2d21ac55
A
3541
3542 default:
3543 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
3544 return (0);
3545 }
3546}
3547
3548/*
3549 * Emulate the execution of DTrace ID subroutines invoked by the call opcode.
3550 * Notice that we don't bother validating the proper number of arguments or
3551 * their types in the tuple stack. This isn't needed because all argument
3552 * interpretation is safe because of our load safety -- the worst that can
3553 * happen is that a bogus program can obtain bogus results.
3554 */
3555static void
3556dtrace_dif_subr(uint_t subr, uint_t rd, uint64_t *regs,
3557 dtrace_key_t *tupregs, int nargs,
3558 dtrace_mstate_t *mstate, dtrace_state_t *state)
3559{
3560 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
2d21ac55 3561 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
b0d623f7 3562 dtrace_vstate_t *vstate = &state->dts_vstate;
2d21ac55
A
3563
3564#if !defined(__APPLE__)
3565 union {
3566 mutex_impl_t mi;
3567 uint64_t mx;
3568 } m;
3569
3570 union {
3571 krwlock_t ri;
3572 uintptr_t rw;
3573 } r;
3574#else
b0d623f7 3575/* FIXME: awaits lock/mutex work */
2d21ac55
A
3576#endif /* __APPLE__ */
3577
3578 switch (subr) {
3579 case DIF_SUBR_RAND:
3580 regs[rd] = (dtrace_gethrtime() * 2416 + 374441) % 1771875;
3581 break;
3582
3583#if !defined(__APPLE__)
3584 case DIF_SUBR_MUTEX_OWNED:
b0d623f7
A
3585 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3586 mstate, vstate)) {
fe8ab488 3587 regs[rd] = 0;
b0d623f7
A
3588 break;
3589 }
3590
2d21ac55
A
3591 m.mx = dtrace_load64(tupregs[0].dttk_value);
3592 if (MUTEX_TYPE_ADAPTIVE(&m.mi))
3593 regs[rd] = MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER;
3594 else
3595 regs[rd] = LOCK_HELD(&m.mi.m_spin.m_spinlock);
3596 break;
3597
3598 case DIF_SUBR_MUTEX_OWNER:
b0d623f7
A
3599 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3600 mstate, vstate)) {
fe8ab488 3601 regs[rd] = 0;
b0d623f7
A
3602 break;
3603 }
3604
2d21ac55
A
3605 m.mx = dtrace_load64(tupregs[0].dttk_value);
3606 if (MUTEX_TYPE_ADAPTIVE(&m.mi) &&
3607 MUTEX_OWNER(&m.mi) != MUTEX_NO_OWNER)
3608 regs[rd] = (uintptr_t)MUTEX_OWNER(&m.mi);
3609 else
3610 regs[rd] = 0;
3611 break;
3612
3613 case DIF_SUBR_MUTEX_TYPE_ADAPTIVE:
b0d623f7
A
3614 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3615 mstate, vstate)) {
fe8ab488 3616 regs[rd] = 0;
b0d623f7
A
3617 break;
3618 }
3619
2d21ac55
A
3620 m.mx = dtrace_load64(tupregs[0].dttk_value);
3621 regs[rd] = MUTEX_TYPE_ADAPTIVE(&m.mi);
3622 break;
3623
3624 case DIF_SUBR_MUTEX_TYPE_SPIN:
b0d623f7
A
3625 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (kmutex_t),
3626 mstate, vstate)) {
fe8ab488 3627 regs[rd] = 0;
b0d623f7
A
3628 break;
3629 }
3630
2d21ac55
A
3631 m.mx = dtrace_load64(tupregs[0].dttk_value);
3632 regs[rd] = MUTEX_TYPE_SPIN(&m.mi);
3633 break;
3634
3635 case DIF_SUBR_RW_READ_HELD: {
3636 uintptr_t tmp;
3637
b0d623f7
A
3638 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (uintptr_t),
3639 mstate, vstate)) {
fe8ab488 3640 regs[rd] = 0;
b0d623f7
A
3641 break;
3642 }
3643
2d21ac55
A
3644 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3645 regs[rd] = _RW_READ_HELD(&r.ri, tmp);
3646 break;
3647 }
3648
3649 case DIF_SUBR_RW_WRITE_HELD:
b0d623f7
A
3650 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3651 mstate, vstate)) {
fe8ab488 3652 regs[rd] = 0;
b0d623f7
A
3653 break;
3654 }
3655
2d21ac55
A
3656 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3657 regs[rd] = _RW_WRITE_HELD(&r.ri);
3658 break;
3659
3660 case DIF_SUBR_RW_ISWRITER:
b0d623f7
A
3661 if (!dtrace_canload(tupregs[0].dttk_value, sizeof (krwlock_t),
3662 mstate, vstate)) {
fe8ab488 3663 regs[rd] = 0;
b0d623f7
A
3664 break;
3665 }
3666
2d21ac55
A
3667 r.rw = dtrace_loadptr(tupregs[0].dttk_value);
3668 regs[rd] = _RW_ISWRITER(&r.ri);
3669 break;
3670#else
b0d623f7 3671/* FIXME: awaits lock/mutex work */
2d21ac55
A
3672#endif /* __APPLE__ */
3673
3674 case DIF_SUBR_BCOPY: {
3675 /*
3676 * We need to be sure that the destination is in the scratch
3677 * region -- no other region is allowed.
3678 */
3679 uintptr_t src = tupregs[0].dttk_value;
3680 uintptr_t dest = tupregs[1].dttk_value;
3681 size_t size = tupregs[2].dttk_value;
3682
3683 if (!dtrace_inscratch(dest, size, mstate)) {
3684 *flags |= CPU_DTRACE_BADADDR;
3685 *illval = regs[rd];
3686 break;
3687 }
3688
b0d623f7 3689 if (!dtrace_canload(src, size, mstate, vstate)) {
fe8ab488 3690 regs[rd] = 0;
b0d623f7
A
3691 break;
3692 }
3693
2d21ac55
A
3694 dtrace_bcopy((void *)src, (void *)dest, size);
3695 break;
3696 }
3697
3698 case DIF_SUBR_ALLOCA:
3699 case DIF_SUBR_COPYIN: {
3700 uintptr_t dest = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
3701 uint64_t size =
3702 tupregs[subr == DIF_SUBR_ALLOCA ? 0 : 1].dttk_value;
3703 size_t scratch_size = (dest - mstate->dtms_scratch_ptr) + size;
3704
39037602
A
3705 /*
3706 * Check whether the user can access kernel memory
3707 */
3708 if (dtrace_priv_kernel(state) == 0) {
3709 DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV);
3710 regs[rd] = 0;
3711 break;
3712 }
2d21ac55
A
3713 /*
3714 * This action doesn't require any credential checks since
3715 * probes will not activate in user contexts to which the
3716 * enabling user does not have permissions.
3717 */
b0d623f7
A
3718
3719 /*
3720 * Rounding up the user allocation size could have overflowed
3721 * a large, bogus allocation (like -1ULL) to 0.
3722 */
3723 if (scratch_size < size ||
3724 !DTRACE_INSCRATCH(mstate, scratch_size)) {
2d21ac55 3725 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 3726 regs[rd] = 0;
2d21ac55
A
3727 break;
3728 }
3729
3730 if (subr == DIF_SUBR_COPYIN) {
3731 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
cf7d32b8 3732 if (dtrace_priv_proc(state))
b0d623f7 3733 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
2d21ac55
A
3734 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3735 }
3736
3737 mstate->dtms_scratch_ptr += scratch_size;
3738 regs[rd] = dest;
3739 break;
3740 }
3741
3742 case DIF_SUBR_COPYINTO: {
3743 uint64_t size = tupregs[1].dttk_value;
3744 uintptr_t dest = tupregs[2].dttk_value;
3745
3746 /*
3747 * This action doesn't require any credential checks since
3748 * probes will not activate in user contexts to which the
3749 * enabling user does not have permissions.
3750 */
3751 if (!dtrace_inscratch(dest, size, mstate)) {
3752 *flags |= CPU_DTRACE_BADADDR;
3753 *illval = regs[rd];
3754 break;
3755 }
3756
3757 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
cf7d32b8 3758 if (dtrace_priv_proc(state))
b0d623f7 3759 dtrace_copyin(tupregs[0].dttk_value, dest, size, flags);
2d21ac55
A
3760 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3761 break;
3762 }
3763
3764 case DIF_SUBR_COPYINSTR: {
3765 uintptr_t dest = mstate->dtms_scratch_ptr;
3766 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3767
3768 if (nargs > 1 && tupregs[1].dttk_value < size)
3769 size = tupregs[1].dttk_value + 1;
3770
3771 /*
3772 * This action doesn't require any credential checks since
3773 * probes will not activate in user contexts to which the
3774 * enabling user does not have permissions.
3775 */
b0d623f7 3776 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 3777 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 3778 regs[rd] = 0;
2d21ac55
A
3779 break;
3780 }
3781
3782 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
cf7d32b8 3783 if (dtrace_priv_proc(state))
b0d623f7 3784 dtrace_copyinstr(tupregs[0].dttk_value, dest, size, flags);
2d21ac55
A
3785 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3786
3787 ((char *)dest)[size - 1] = '\0';
3788 mstate->dtms_scratch_ptr += size;
3789 regs[rd] = dest;
3790 break;
3791 }
3792
2d21ac55
A
3793 case DIF_SUBR_MSGSIZE:
3794 case DIF_SUBR_MSGDSIZE: {
3795 /* Darwin does not implement SysV streams messages */
b0d623f7 3796 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
2d21ac55
A
3797 regs[rd] = 0;
3798 break;
3799 }
2d21ac55 3800
2d21ac55
A
3801 case DIF_SUBR_PROGENYOF: {
3802 pid_t pid = tupregs[0].dttk_value;
3803 struct proc *p = current_proc();
3804 int rval = 0, lim = nprocs;
3805
3806 while(p && (lim-- > 0)) {
3807 pid_t ppid;
3808
3809 ppid = (pid_t)dtrace_load32((uintptr_t)&(p->p_pid));
3810 if (*flags & CPU_DTRACE_FAULT)
3811 break;
3812
3813 if (ppid == pid) {
3814 rval = 1;
3815 break;
3816 }
3817
3818 if (ppid == 0)
3819 break; /* Can't climb process tree any further. */
3820
3821 p = (struct proc *)dtrace_loadptr((uintptr_t)&(p->p_pptr));
3822 if (*flags & CPU_DTRACE_FAULT)
3823 break;
3824 }
3825
3826 regs[rd] = rval;
3827 break;
3828 }
2d21ac55
A
3829
3830 case DIF_SUBR_SPECULATION:
3831 regs[rd] = dtrace_speculation(state);
3832 break;
3833
fe8ab488 3834
2d21ac55
A
3835 case DIF_SUBR_COPYOUT: {
3836 uintptr_t kaddr = tupregs[0].dttk_value;
fe8ab488 3837 user_addr_t uaddr = tupregs[1].dttk_value;
2d21ac55
A
3838 uint64_t size = tupregs[2].dttk_value;
3839
3840 if (!dtrace_destructive_disallow &&
3841 dtrace_priv_proc_control(state) &&
ecc0ceb4
A
3842 !dtrace_istoxic(kaddr, size) &&
3843 dtrace_canload(kaddr, size, mstate, vstate)) {
2d21ac55 3844 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
b0d623f7 3845 dtrace_copyout(kaddr, uaddr, size, flags);
2d21ac55
A
3846 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3847 }
3848 break;
3849 }
3850
3851 case DIF_SUBR_COPYOUTSTR: {
3852 uintptr_t kaddr = tupregs[0].dttk_value;
fe8ab488 3853 user_addr_t uaddr = tupregs[1].dttk_value;
2d21ac55 3854 uint64_t size = tupregs[2].dttk_value;
39037602 3855 size_t lim;
2d21ac55
A
3856
3857 if (!dtrace_destructive_disallow &&
3858 dtrace_priv_proc_control(state) &&
ecc0ceb4 3859 !dtrace_istoxic(kaddr, size) &&
39037602 3860 dtrace_strcanload(kaddr, size, &lim, mstate, vstate)) {
2d21ac55 3861 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
39037602 3862 dtrace_copyoutstr(kaddr, uaddr, lim, flags);
2d21ac55
A
3863 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
3864 }
3865 break;
3866 }
2d21ac55 3867
b0d623f7 3868 case DIF_SUBR_STRLEN: {
39037602 3869 size_t size = state->dts_options[DTRACEOPT_STRSIZE];
b0d623f7 3870 uintptr_t addr = (uintptr_t)tupregs[0].dttk_value;
39037602 3871 size_t lim;
b0d623f7 3872
39037602 3873 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
fe8ab488 3874 regs[rd] = 0;
b0d623f7
A
3875 break;
3876 }
3877
39037602 3878 regs[rd] = dtrace_strlen((char *)addr, lim);
b0d623f7 3879
2d21ac55 3880 break;
b0d623f7 3881 }
2d21ac55
A
3882
3883 case DIF_SUBR_STRCHR:
3884 case DIF_SUBR_STRRCHR: {
3885 /*
3886 * We're going to iterate over the string looking for the
3887 * specified character. We will iterate until we have reached
3888 * the string length or we have found the character. If this
3889 * is DIF_SUBR_STRRCHR, we will look for the last occurrence
3890 * of the specified character instead of the first.
3891 */
3892 uintptr_t addr = tupregs[0].dttk_value;
39037602
A
3893 uintptr_t addr_limit;
3894 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3895 size_t lim;
2d21ac55
A
3896 char c, target = (char)tupregs[1].dttk_value;
3897
39037602
A
3898 if (!dtrace_strcanload(addr, size, &lim, mstate, vstate)) {
3899 regs[rd] = NULL;
3900 break;
3901 }
3902 addr_limit = addr + lim;
3903
3904 for (regs[rd] = 0; addr < addr_limit; addr++) {
2d21ac55
A
3905 if ((c = dtrace_load8(addr)) == target) {
3906 regs[rd] = addr;
3907
3908 if (subr == DIF_SUBR_STRCHR)
3909 break;
3910 }
3911
3912 if (c == '\0')
3913 break;
3914 }
3915
3916 break;
3917 }
3918
3919 case DIF_SUBR_STRSTR:
3920 case DIF_SUBR_INDEX:
3921 case DIF_SUBR_RINDEX: {
3922 /*
3923 * We're going to iterate over the string looking for the
3924 * specified string. We will iterate until we have reached
3925 * the string length or we have found the string. (Yes, this
3926 * is done in the most naive way possible -- but considering
3927 * that the string we're searching for is likely to be
3928 * relatively short, the complexity of Rabin-Karp or similar
3929 * hardly seems merited.)
3930 */
3931 char *addr = (char *)(uintptr_t)tupregs[0].dttk_value;
3932 char *substr = (char *)(uintptr_t)tupregs[1].dttk_value;
3933 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
3934 size_t len = dtrace_strlen(addr, size);
3935 size_t sublen = dtrace_strlen(substr, size);
3936 char *limit = addr + len, *orig = addr;
3937 int notfound = subr == DIF_SUBR_STRSTR ? 0 : -1;
3938 int inc = 1;
3939
3940 regs[rd] = notfound;
3941
b0d623f7 3942 if (!dtrace_canload((uintptr_t)addr, len + 1, mstate, vstate)) {
fe8ab488 3943 regs[rd] = 0;
b0d623f7
A
3944 break;
3945 }
3946
3947 if (!dtrace_canload((uintptr_t)substr, sublen + 1, mstate,
3948 vstate)) {
fe8ab488 3949 regs[rd] = 0;
b0d623f7
A
3950 break;
3951 }
3952
2d21ac55
A
3953 /*
3954 * strstr() and index()/rindex() have similar semantics if
3955 * both strings are the empty string: strstr() returns a
3956 * pointer to the (empty) string, and index() and rindex()
3957 * both return index 0 (regardless of any position argument).
3958 */
3959 if (sublen == 0 && len == 0) {
3960 if (subr == DIF_SUBR_STRSTR)
3961 regs[rd] = (uintptr_t)addr;
3962 else
3963 regs[rd] = 0;
3964 break;
3965 }
3966
3967 if (subr != DIF_SUBR_STRSTR) {
3968 if (subr == DIF_SUBR_RINDEX) {
3969 limit = orig - 1;
3970 addr += len;
3971 inc = -1;
3972 }
3973
3974 /*
3975 * Both index() and rindex() take an optional position
3976 * argument that denotes the starting position.
3977 */
3978 if (nargs == 3) {
3979 int64_t pos = (int64_t)tupregs[2].dttk_value;
3980
3981 /*
3982 * If the position argument to index() is
3983 * negative, Perl implicitly clamps it at
3984 * zero. This semantic is a little surprising
3985 * given the special meaning of negative
3986 * positions to similar Perl functions like
3987 * substr(), but it appears to reflect a
3988 * notion that index() can start from a
3989 * negative index and increment its way up to
3990 * the string. Given this notion, Perl's
3991 * rindex() is at least self-consistent in
3992 * that it implicitly clamps positions greater
3993 * than the string length to be the string
3994 * length. Where Perl completely loses
3995 * coherence, however, is when the specified
3996 * substring is the empty string (""). In
3997 * this case, even if the position is
3998 * negative, rindex() returns 0 -- and even if
3999 * the position is greater than the length,
4000 * index() returns the string length. These
4001 * semantics violate the notion that index()
4002 * should never return a value less than the
4003 * specified position and that rindex() should
4004 * never return a value greater than the
4005 * specified position. (One assumes that
4006 * these semantics are artifacts of Perl's
4007 * implementation and not the results of
4008 * deliberate design -- it beggars belief that
4009 * even Larry Wall could desire such oddness.)
4010 * While in the abstract one would wish for
4011 * consistent position semantics across
4012 * substr(), index() and rindex() -- or at the
4013 * very least self-consistent position
4014 * semantics for index() and rindex() -- we
4015 * instead opt to keep with the extant Perl
4016 * semantics, in all their broken glory. (Do
4017 * we have more desire to maintain Perl's
4018 * semantics than Perl does? Probably.)
4019 */
4020 if (subr == DIF_SUBR_RINDEX) {
4021 if (pos < 0) {
4022 if (sublen == 0)
4023 regs[rd] = 0;
4024 break;
4025 }
4026
b0d623f7 4027 if ((size_t)pos > len)
2d21ac55
A
4028 pos = len;
4029 } else {
4030 if (pos < 0)
4031 pos = 0;
4032
b0d623f7 4033 if ((size_t)pos >= len) {
2d21ac55
A
4034 if (sublen == 0)
4035 regs[rd] = len;
4036 break;
4037 }
4038 }
4039
4040 addr = orig + pos;
4041 }
4042 }
4043
4044 for (regs[rd] = notfound; addr != limit; addr += inc) {
4045 if (dtrace_strncmp(addr, substr, sublen) == 0) {
4046 if (subr != DIF_SUBR_STRSTR) {
4047 /*
4048 * As D index() and rindex() are
4049 * modeled on Perl (and not on awk),
4050 * we return a zero-based (and not a
4051 * one-based) index. (For you Perl
4052 * weenies: no, we're not going to add
4053 * $[ -- and shouldn't you be at a con
4054 * or something?)
4055 */
4056 regs[rd] = (uintptr_t)(addr - orig);
4057 break;
4058 }
4059
4060 ASSERT(subr == DIF_SUBR_STRSTR);
4061 regs[rd] = (uintptr_t)addr;
4062 break;
4063 }
4064 }
4065
4066 break;
4067 }
4068
4069 case DIF_SUBR_STRTOK: {
4070 uintptr_t addr = tupregs[0].dttk_value;
4071 uintptr_t tokaddr = tupregs[1].dttk_value;
4072 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
39037602
A
4073 uintptr_t limit, toklimit;
4074 size_t clim;
2d21ac55 4075 char *dest = (char *)mstate->dtms_scratch_ptr;
b0d623f7
A
4076 uint8_t c='\0', tokmap[32]; /* 256 / 8 */
4077 uint64_t i = 0;
b0d623f7
A
4078
4079 /*
4080 * Check both the token buffer and (later) the input buffer,
4081 * since both could be non-scratch addresses.
4082 */
39037602 4083 if (!dtrace_strcanload(tokaddr, size, &clim, mstate, vstate)) {
fe8ab488 4084 regs[rd] = 0;
b0d623f7
A
4085 break;
4086 }
39037602 4087 toklimit = tokaddr + clim;
2d21ac55 4088
b0d623f7 4089 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4090 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4091 regs[rd] = 0;
2d21ac55
A
4092 break;
4093 }
4094
fe8ab488 4095 if (addr == 0) {
2d21ac55
A
4096 /*
4097 * If the address specified is NULL, we use our saved
4098 * strtok pointer from the mstate. Note that this
4099 * means that the saved strtok pointer is _only_
4100 * valid within multiple enablings of the same probe --
4101 * it behaves like an implicit clause-local variable.
4102 */
4103 addr = mstate->dtms_strtok;
39037602 4104 limit = mstate->dtms_strtok_limit;
b0d623f7
A
4105 } else {
4106 /*
4107 * If the user-specified address is non-NULL we must
4108 * access check it. This is the only time we have
4109 * a chance to do so, since this address may reside
4110 * in the string table of this clause-- future calls
4111 * (when we fetch addr from mstate->dtms_strtok)
4112 * would fail this access check.
4113 */
39037602
A
4114 if (!dtrace_strcanload(addr, size, &clim, mstate,
4115 vstate)) {
fe8ab488 4116 regs[rd] = 0;
b0d623f7 4117 break;
fe8ab488 4118 }
39037602 4119 limit = addr + clim;
2d21ac55
A
4120 }
4121
4122 /*
4123 * First, zero the token map, and then process the token
4124 * string -- setting a bit in the map for every character
4125 * found in the token string.
4126 */
c910b4d9 4127 for (i = 0; i < (int)sizeof (tokmap); i++)
2d21ac55
A
4128 tokmap[i] = 0;
4129
4130 for (; tokaddr < toklimit; tokaddr++) {
4131 if ((c = dtrace_load8(tokaddr)) == '\0')
4132 break;
4133
4134 ASSERT((c >> 3) < sizeof (tokmap));
4135 tokmap[c >> 3] |= (1 << (c & 0x7));
4136 }
4137
39037602 4138 for (; addr < limit; addr++) {
2d21ac55 4139 /*
39037602
A
4140 * We're looking for a character that is _not_
4141 * contained in the token string.
2d21ac55
A
4142 */
4143 if ((c = dtrace_load8(addr)) == '\0')
4144 break;
4145
4146 if (!(tokmap[c >> 3] & (1 << (c & 0x7))))
4147 break;
4148 }
4149
4150 if (c == '\0') {
4151 /*
4152 * We reached the end of the string without finding
4153 * any character that was not in the token string.
4154 * We return NULL in this case, and we set the saved
4155 * address to NULL as well.
4156 */
fe8ab488
A
4157 regs[rd] = 0;
4158 mstate->dtms_strtok = 0;
39037602 4159 mstate->dtms_strtok_limit = NULL;
2d21ac55
A
4160 break;
4161 }
4162
4163 /*
4164 * From here on, we're copying into the destination string.
4165 */
4166 for (i = 0; addr < limit && i < size - 1; addr++) {
4167 if ((c = dtrace_load8(addr)) == '\0')
4168 break;
4169
4170 if (tokmap[c >> 3] & (1 << (c & 0x7)))
4171 break;
4172
4173 ASSERT(i < size);
4174 dest[i++] = c;
4175 }
4176
4177 ASSERT(i < size);
4178 dest[i] = '\0';
4179 regs[rd] = (uintptr_t)dest;
4180 mstate->dtms_scratch_ptr += size;
4181 mstate->dtms_strtok = addr;
39037602 4182 mstate->dtms_strtok_limit = limit;
2d21ac55
A
4183 break;
4184 }
4185
4186 case DIF_SUBR_SUBSTR: {
4187 uintptr_t s = tupregs[0].dttk_value;
4188 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4189 char *d = (char *)mstate->dtms_scratch_ptr;
4190 int64_t index = (int64_t)tupregs[1].dttk_value;
4191 int64_t remaining = (int64_t)tupregs[2].dttk_value;
4192 size_t len = dtrace_strlen((char *)s, size);
4193 int64_t i = 0;
4194
b0d623f7 4195 if (!dtrace_canload(s, len + 1, mstate, vstate)) {
fe8ab488 4196 regs[rd] = 0;
b0d623f7
A
4197 break;
4198 }
2d21ac55 4199
b0d623f7 4200 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4201 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4202 regs[rd] = 0;
2d21ac55
A
4203 break;
4204 }
4205
b0d623f7
A
4206 if (nargs <= 2)
4207 remaining = (int64_t)size;
4208
2d21ac55
A
4209 if (index < 0) {
4210 index += len;
4211
4212 if (index < 0 && index + remaining > 0) {
4213 remaining += index;
4214 index = 0;
4215 }
4216 }
4217
b0d623f7
A
4218 if ((size_t)index >= len || index < 0) {
4219 remaining = 0;
4220 } else if (remaining < 0) {
4221 remaining += len - index;
4222 } else if ((uint64_t)index + (uint64_t)remaining > size) {
4223 remaining = size - index;
4224 }
fe8ab488 4225
b0d623f7
A
4226 for (i = 0; i < remaining; i++) {
4227 if ((d[i] = dtrace_load8(s + index + i)) == '\0')
2d21ac55
A
4228 break;
4229 }
b0d623f7
A
4230
4231 d[i] = '\0';
2d21ac55
A
4232
4233 mstate->dtms_scratch_ptr += size;
4234 regs[rd] = (uintptr_t)d;
4235 break;
4236 }
4237
2d21ac55
A
4238 case DIF_SUBR_GETMAJOR:
4239 regs[rd] = (uintptr_t)major( (dev_t)tupregs[0].dttk_value );
4240 break;
2d21ac55 4241
2d21ac55
A
4242 case DIF_SUBR_GETMINOR:
4243 regs[rd] = (uintptr_t)minor( (dev_t)tupregs[0].dttk_value );
4244 break;
2d21ac55 4245
2d21ac55 4246 case DIF_SUBR_DDI_PATHNAME: {
fe8ab488 4247 /* APPLE NOTE: currently unsupported on Darwin */
b0d623f7 4248 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
fe8ab488 4249 regs[rd] = 0;
2d21ac55
A
4250 break;
4251 }
2d21ac55
A
4252
4253 case DIF_SUBR_STRJOIN: {
4254 char *d = (char *)mstate->dtms_scratch_ptr;
4255 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4256 uintptr_t s1 = tupregs[0].dttk_value;
4257 uintptr_t s2 = tupregs[1].dttk_value;
39037602
A
4258 uint64_t i = 0, j = 0;
4259 size_t lim1, lim2;
4260 char c;
b0d623f7 4261
39037602
A
4262 if (!dtrace_strcanload(s1, size, &lim1, mstate, vstate) ||
4263 !dtrace_strcanload(s2, size, &lim2, mstate, vstate)) {
fe8ab488 4264 regs[rd] = 0;
b0d623f7
A
4265 break;
4266 }
2d21ac55 4267
b0d623f7 4268 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4269 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4270 regs[rd] = 0;
2d21ac55
A
4271 break;
4272 }
4273
4274 for (;;) {
4275 if (i >= size) {
4276 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4277 regs[rd] = 0;
2d21ac55
A
4278 break;
4279 }
39037602
A
4280 c = (i >= lim1) ? '\0' : dtrace_load8(s1++);
4281 if ((d[i++] = c) == '\0') {
2d21ac55
A
4282 i--;
4283 break;
4284 }
4285 }
4286
4287 for (;;) {
4288 if (i >= size) {
4289 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4290 regs[rd] = 0;
2d21ac55
A
4291 break;
4292 }
39037602
A
4293 c = (j++ >= lim2) ? '\0' : dtrace_load8(s2++);
4294 if ((d[i++] = c) == '\0')
2d21ac55
A
4295 break;
4296 }
4297
4298 if (i < size) {
4299 mstate->dtms_scratch_ptr += i;
4300 regs[rd] = (uintptr_t)d;
4301 }
4302
4303 break;
4304 }
4305
4306 case DIF_SUBR_LLTOSTR: {
4307 int64_t i = (int64_t)tupregs[0].dttk_value;
4308 int64_t val = i < 0 ? i * -1 : i;
4309 uint64_t size = 22; /* enough room for 2^64 in decimal */
4310 char *end = (char *)mstate->dtms_scratch_ptr + size - 1;
4311
b0d623f7 4312 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4313 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4314 regs[rd] = 0;
2d21ac55
A
4315 break;
4316 }
4317
4318 for (*end-- = '\0'; val; val /= 10)
4319 *end-- = '0' + (val % 10);
4320
4321 if (i == 0)
4322 *end-- = '0';
4323
4324 if (i < 0)
4325 *end-- = '-';
4326
4327 regs[rd] = (uintptr_t)end + 1;
4328 mstate->dtms_scratch_ptr += size;
4329 break;
4330 }
4331
b0d623f7
A
4332 case DIF_SUBR_HTONS:
4333 case DIF_SUBR_NTOHS:
4334#ifdef _BIG_ENDIAN
4335 regs[rd] = (uint16_t)tupregs[0].dttk_value;
4336#else
4337 regs[rd] = DT_BSWAP_16((uint16_t)tupregs[0].dttk_value);
4338#endif
4339 break;
4340
4341
4342 case DIF_SUBR_HTONL:
4343 case DIF_SUBR_NTOHL:
4344#ifdef _BIG_ENDIAN
4345 regs[rd] = (uint32_t)tupregs[0].dttk_value;
4346#else
4347 regs[rd] = DT_BSWAP_32((uint32_t)tupregs[0].dttk_value);
4348#endif
4349 break;
4350
4351
4352 case DIF_SUBR_HTONLL:
4353 case DIF_SUBR_NTOHLL:
4354#ifdef _BIG_ENDIAN
4355 regs[rd] = (uint64_t)tupregs[0].dttk_value;
4356#else
4357 regs[rd] = DT_BSWAP_64((uint64_t)tupregs[0].dttk_value);
4358#endif
4359 break;
4360
4361
2d21ac55
A
4362 case DIF_SUBR_DIRNAME:
4363 case DIF_SUBR_BASENAME: {
4364 char *dest = (char *)mstate->dtms_scratch_ptr;
4365 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4366 uintptr_t src = tupregs[0].dttk_value;
4367 int i, j, len = dtrace_strlen((char *)src, size);
4368 int lastbase = -1, firstbase = -1, lastdir = -1;
4369 int start, end;
4370
b0d623f7 4371 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
fe8ab488 4372 regs[rd] = 0;
b0d623f7
A
4373 break;
4374 }
4375
4376 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4377 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4378 regs[rd] = 0;
2d21ac55
A
4379 break;
4380 }
4381
4382 /*
4383 * The basename and dirname for a zero-length string is
4384 * defined to be "."
4385 */
4386 if (len == 0) {
4387 len = 1;
4388 src = (uintptr_t)".";
4389 }
4390
4391 /*
4392 * Start from the back of the string, moving back toward the
4393 * front until we see a character that isn't a slash. That
4394 * character is the last character in the basename.
4395 */
4396 for (i = len - 1; i >= 0; i--) {
4397 if (dtrace_load8(src + i) != '/')
4398 break;
4399 }
4400
4401 if (i >= 0)
4402 lastbase = i;
4403
4404 /*
4405 * Starting from the last character in the basename, move
4406 * towards the front until we find a slash. The character
4407 * that we processed immediately before that is the first
4408 * character in the basename.
4409 */
4410 for (; i >= 0; i--) {
4411 if (dtrace_load8(src + i) == '/')
4412 break;
4413 }
4414
4415 if (i >= 0)
4416 firstbase = i + 1;
4417
4418 /*
4419 * Now keep going until we find a non-slash character. That
4420 * character is the last character in the dirname.
4421 */
4422 for (; i >= 0; i--) {
4423 if (dtrace_load8(src + i) != '/')
4424 break;
4425 }
4426
4427 if (i >= 0)
4428 lastdir = i;
4429
4430 ASSERT(!(lastbase == -1 && firstbase != -1));
4431 ASSERT(!(firstbase == -1 && lastdir != -1));
4432
4433 if (lastbase == -1) {
4434 /*
4435 * We didn't find a non-slash character. We know that
4436 * the length is non-zero, so the whole string must be
4437 * slashes. In either the dirname or the basename
4438 * case, we return '/'.
4439 */
4440 ASSERT(firstbase == -1);
4441 firstbase = lastbase = lastdir = 0;
4442 }
4443
4444 if (firstbase == -1) {
4445 /*
4446 * The entire string consists only of a basename
4447 * component. If we're looking for dirname, we need
4448 * to change our string to be just "."; if we're
4449 * looking for a basename, we'll just set the first
4450 * character of the basename to be 0.
4451 */
4452 if (subr == DIF_SUBR_DIRNAME) {
4453 ASSERT(lastdir == -1);
4454 src = (uintptr_t)".";
4455 lastdir = 0;
4456 } else {
4457 firstbase = 0;
4458 }
4459 }
4460
4461 if (subr == DIF_SUBR_DIRNAME) {
4462 if (lastdir == -1) {
4463 /*
4464 * We know that we have a slash in the name --
4465 * or lastdir would be set to 0, above. And
4466 * because lastdir is -1, we know that this
4467 * slash must be the first character. (That
4468 * is, the full string must be of the form
4469 * "/basename".) In this case, the last
4470 * character of the directory name is 0.
4471 */
4472 lastdir = 0;
4473 }
4474
4475 start = 0;
4476 end = lastdir;
4477 } else {
4478 ASSERT(subr == DIF_SUBR_BASENAME);
4479 ASSERT(firstbase != -1 && lastbase != -1);
4480 start = firstbase;
4481 end = lastbase;
4482 }
4483
b0d623f7
A
4484 for (i = start, j = 0; i <= end && (uint64_t)j < size - 1; i++, j++)
4485 dest[j] = dtrace_load8(src + i);
2d21ac55
A
4486
4487 dest[j] = '\0';
4488 regs[rd] = (uintptr_t)dest;
4489 mstate->dtms_scratch_ptr += size;
4490 break;
4491 }
4492
4493 case DIF_SUBR_CLEANPATH: {
4494 char *dest = (char *)mstate->dtms_scratch_ptr, c;
4495 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4496 uintptr_t src = tupregs[0].dttk_value;
39037602
A
4497 size_t lim;
4498 size_t i = 0, j = 0;
2d21ac55 4499
39037602 4500 if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
fe8ab488 4501 regs[rd] = 0;
b0d623f7
A
4502 break;
4503 }
4504
4505 if (!DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 4506 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4507 regs[rd] = 0;
2d21ac55
A
4508 break;
4509 }
4510
4511 /*
4512 * Move forward, loading each character.
4513 */
4514 do {
39037602 4515 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55 4516next:
b0d623f7
A
4517 if ((uint64_t)(j + 5) >= size) /* 5 = strlen("/..c\0") */
4518 break;
2d21ac55
A
4519
4520 if (c != '/') {
4521 dest[j++] = c;
4522 continue;
4523 }
4524
39037602 4525 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55
A
4526
4527 if (c == '/') {
4528 /*
4529 * We have two slashes -- we can just advance
4530 * to the next character.
4531 */
4532 goto next;
4533 }
4534
4535 if (c != '.') {
4536 /*
4537 * This is not "." and it's not ".." -- we can
4538 * just store the "/" and this character and
4539 * drive on.
4540 */
4541 dest[j++] = '/';
4542 dest[j++] = c;
4543 continue;
4544 }
4545
39037602 4546 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55
A
4547
4548 if (c == '/') {
4549 /*
4550 * This is a "/./" component. We're not going
4551 * to store anything in the destination buffer;
4552 * we're just going to go to the next component.
4553 */
4554 goto next;
4555 }
4556
4557 if (c != '.') {
4558 /*
4559 * This is not ".." -- we can just store the
4560 * "/." and this character and continue
4561 * processing.
4562 */
4563 dest[j++] = '/';
4564 dest[j++] = '.';
4565 dest[j++] = c;
4566 continue;
4567 }
4568
39037602 4569 c = (i >= lim) ? '\0' : dtrace_load8(src + i++);
2d21ac55
A
4570
4571 if (c != '/' && c != '\0') {
4572 /*
4573 * This is not ".." -- it's "..[mumble]".
4574 * We'll store the "/.." and this character
4575 * and continue processing.
4576 */
4577 dest[j++] = '/';
4578 dest[j++] = '.';
4579 dest[j++] = '.';
4580 dest[j++] = c;
4581 continue;
4582 }
4583
4584 /*
4585 * This is "/../" or "/..\0". We need to back up
4586 * our destination pointer until we find a "/".
4587 */
4588 i--;
4589 while (j != 0 && dest[--j] != '/')
4590 continue;
4591
4592 if (c == '\0')
4593 dest[++j] = '/';
4594 } while (c != '\0');
4595
4596 dest[j] = '\0';
4597 regs[rd] = (uintptr_t)dest;
4598 mstate->dtms_scratch_ptr += size;
4599 break;
4600 }
2d21ac55 4601
b0d623f7
A
4602 case DIF_SUBR_INET_NTOA:
4603 case DIF_SUBR_INET_NTOA6:
4604 case DIF_SUBR_INET_NTOP: {
4605 size_t size;
4606 int af, argi, i;
4607 char *base, *end;
2d21ac55 4608
b0d623f7
A
4609 if (subr == DIF_SUBR_INET_NTOP) {
4610 af = (int)tupregs[0].dttk_value;
4611 argi = 1;
4612 } else {
4613 af = subr == DIF_SUBR_INET_NTOA ? AF_INET: AF_INET6;
4614 argi = 0;
2d21ac55
A
4615 }
4616
b0d623f7
A
4617 if (af == AF_INET) {
4618#if !defined(__APPLE__)
4619 ipaddr_t ip4;
4620#else
6d2010ae 4621 uint32_t ip4;
b0d623f7
A
4622#endif /* __APPLE__ */
4623 uint8_t *ptr8, val;
4624
4625 /*
4626 * Safely load the IPv4 address.
4627 */
6d2010ae 4628#if !defined(__APPLE__)
b0d623f7 4629 ip4 = dtrace_load32(tupregs[argi].dttk_value);
6d2010ae 4630#else
39037602
A
4631 if (!dtrace_canload(tupregs[argi].dttk_value, sizeof(ip4),
4632 mstate, vstate)) {
4633 regs[rd] = 0;
4634 break;
4635 }
4636
6d2010ae
A
4637 dtrace_bcopy(
4638 (void *)(uintptr_t)tupregs[argi].dttk_value,
4639 (void *)(uintptr_t)&ip4, sizeof (ip4));
4640#endif /* __APPLE__ */
b0d623f7
A
4641 /*
4642 * Check an IPv4 string will fit in scratch.
4643 */
4644#if !defined(__APPLE__)
4645 size = INET_ADDRSTRLEN;
4646#else
4647 size = MAX_IPv4_STR_LEN;
4648#endif /* __APPLE__ */
4649 if (!DTRACE_INSCRATCH(mstate, size)) {
4650 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4651 regs[rd] = 0;
b0d623f7
A
4652 break;
4653 }
4654 base = (char *)mstate->dtms_scratch_ptr;
4655 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4656
4657 /*
4658 * Stringify as a dotted decimal quad.
4659 */
4660 *end-- = '\0';
4661 ptr8 = (uint8_t *)&ip4;
4662 for (i = 3; i >= 0; i--) {
4663 val = ptr8[i];
4664
4665 if (val == 0) {
4666 *end-- = '0';
4667 } else {
4668 for (; val; val /= 10) {
4669 *end-- = '0' + (val % 10);
4670 }
4671 }
4672
4673 if (i > 0)
4674 *end-- = '.';
4675 }
4676 ASSERT(end + 1 >= base);
4677
4678 } else if (af == AF_INET6) {
4679#if defined(__APPLE__)
4680#define _S6_un __u6_addr
4681#define _S6_u8 __u6_addr8
4682#endif /* __APPLE__ */
4683 struct in6_addr ip6;
4684 int firstzero, tryzero, numzero, v6end;
4685 uint16_t val;
4686 const char digits[] = "0123456789abcdef";
4687
4688 /*
4689 * Stringify using RFC 1884 convention 2 - 16 bit
4690 * hexadecimal values with a zero-run compression.
4691 * Lower case hexadecimal digits are used.
4692 * eg, fe80::214:4fff:fe0b:76c8.
4693 * The IPv4 embedded form is returned for inet_ntop,
4694 * just the IPv4 string is returned for inet_ntoa6.
4695 */
4696
39037602
A
4697 if (!dtrace_canload(tupregs[argi].dttk_value,
4698 sizeof(struct in6_addr), mstate, vstate)) {
4699 regs[rd] = 0;
4700 break;
4701 }
4702
b0d623f7
A
4703 /*
4704 * Safely load the IPv6 address.
4705 */
4706 dtrace_bcopy(
4707 (void *)(uintptr_t)tupregs[argi].dttk_value,
4708 (void *)(uintptr_t)&ip6, sizeof (struct in6_addr));
4709
4710 /*
4711 * Check an IPv6 string will fit in scratch.
4712 */
4713 size = INET6_ADDRSTRLEN;
4714 if (!DTRACE_INSCRATCH(mstate, size)) {
4715 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 4716 regs[rd] = 0;
b0d623f7
A
4717 break;
4718 }
4719 base = (char *)mstate->dtms_scratch_ptr;
4720 end = (char *)mstate->dtms_scratch_ptr + size - 1;
4721 *end-- = '\0';
4722
4723 /*
4724 * Find the longest run of 16 bit zero values
4725 * for the single allowed zero compression - "::".
4726 */
4727 firstzero = -1;
4728 tryzero = -1;
4729 numzero = 1;
b0d623f7 4730 for (i = 0; i < (int)sizeof (struct in6_addr); i++) {
b0d623f7
A
4731 if (ip6._S6_un._S6_u8[i] == 0 &&
4732 tryzero == -1 && i % 2 == 0) {
4733 tryzero = i;
4734 continue;
4735 }
4736
4737 if (tryzero != -1 &&
4738 (ip6._S6_un._S6_u8[i] != 0 ||
4739 i == sizeof (struct in6_addr) - 1)) {
4740
4741 if (i - tryzero <= numzero) {
4742 tryzero = -1;
4743 continue;
4744 }
4745
4746 firstzero = tryzero;
4747 numzero = i - i % 2 - tryzero;
4748 tryzero = -1;
4749
4750 if (ip6._S6_un._S6_u8[i] == 0 &&
4751 i == sizeof (struct in6_addr) - 1)
4752 numzero += 2;
4753 }
4754 }
b0d623f7 4755 ASSERT(firstzero + numzero <= (int)sizeof (struct in6_addr));
b0d623f7
A
4756
4757 /*
4758 * Check for an IPv4 embedded address.
4759 */
4760 v6end = sizeof (struct in6_addr) - 2;
4761 if (IN6_IS_ADDR_V4MAPPED(&ip6) ||
4762 IN6_IS_ADDR_V4COMPAT(&ip6)) {
b0d623f7
A
4763 for (i = sizeof (struct in6_addr) - 1;
4764 i >= (int)DTRACE_V4MAPPED_OFFSET; i--) {
b0d623f7
A
4765 ASSERT(end >= base);
4766
4767 val = ip6._S6_un._S6_u8[i];
4768
4769 if (val == 0) {
4770 *end-- = '0';
4771 } else {
4772 for (; val; val /= 10) {
4773 *end-- = '0' + val % 10;
4774 }
4775 }
4776
b0d623f7
A
4777 if (i > (int)DTRACE_V4MAPPED_OFFSET)
4778 *end-- = '.';
b0d623f7
A
4779 }
4780
4781 if (subr == DIF_SUBR_INET_NTOA6)
4782 goto inetout;
4783
4784 /*
4785 * Set v6end to skip the IPv4 address that
4786 * we have already stringified.
4787 */
4788 v6end = 10;
4789 }
4790
4791 /*
4792 * Build the IPv6 string by working through the
4793 * address in reverse.
4794 */
4795 for (i = v6end; i >= 0; i -= 2) {
4796 ASSERT(end >= base);
4797
4798 if (i == firstzero + numzero - 2) {
4799 *end-- = ':';
4800 *end-- = ':';
4801 i -= numzero - 2;
4802 continue;
4803 }
4804
4805 if (i < 14 && i != firstzero - 2)
4806 *end-- = ':';
4807
4808 val = (ip6._S6_un._S6_u8[i] << 8) +
4809 ip6._S6_un._S6_u8[i + 1];
4810
4811 if (val == 0) {
4812 *end-- = '0';
4813 } else {
4814 for (; val; val /= 16) {
4815 *end-- = digits[val % 16];
4816 }
4817 }
4818 }
4819 ASSERT(end + 1 >= base);
4820
4821#if defined(__APPLE__)
4822#undef _S6_un
4823#undef _S6_u8
4824#endif /* __APPLE__ */
4825 } else {
4826 /*
4827 * The user didn't use AH_INET or AH_INET6.
4828 */
4829 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
fe8ab488 4830 regs[rd] = 0;
b0d623f7
A
4831 break;
4832 }
4833
4834inetout: regs[rd] = (uintptr_t)end + 1;
4835 mstate->dtms_scratch_ptr += size;
4836 break;
4837 }
b0d623f7 4838
fe8ab488
A
4839 case DIF_SUBR_TOUPPER:
4840 case DIF_SUBR_TOLOWER: {
4841 uintptr_t src = tupregs[0].dttk_value;
4842 char *dest = (char *)mstate->dtms_scratch_ptr;
4843 char lower, upper, base, c;
4844 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4845 size_t len = dtrace_strlen((char*) src, size);
4846 size_t i = 0;
4847
4848 lower = (subr == DIF_SUBR_TOUPPER) ? 'a' : 'A';
4849 upper = (subr == DIF_SUBR_TOUPPER) ? 'z' : 'Z';
4850 base = (subr == DIF_SUBR_TOUPPER) ? 'A' : 'a';
4851
4852 if (!dtrace_canload(src, len + 1, mstate, vstate)) {
4853 regs[rd] = 0;
4854 break;
4855 }
4856
4857 if (!DTRACE_INSCRATCH(mstate, size)) {
4858 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
4859 regs[rd] = 0;
4860 break;
4861 }
4862
4863 for (i = 0; i < size - 1; ++i) {
4864 if ((c = dtrace_load8(src + i)) == '\0')
4865 break;
4866 if (c >= lower && c <= upper)
4867 c = base + (c - lower);
4868 dest[i] = c;
4869 }
4870
4871 ASSERT(i < size);
4872
4873 dest[i] = '\0';
4874 regs[rd] = (uintptr_t) dest;
4875 mstate->dtms_scratch_ptr += size;
4876
4877 break;
4878 }
4879
39037602 4880#if defined(__APPLE__)
3e170ce0
A
4881 case DIF_SUBR_VM_KERNEL_ADDRPERM: {
4882 if (!dtrace_priv_kernel(state)) {
4883 regs[rd] = 0;
4884 } else {
4885 regs[rd] = VM_KERNEL_ADDRPERM((vm_offset_t) tupregs[0].dttk_value);
4886 }
4887
4888 break;
4889 }
39037602
A
4890
4891 case DIF_SUBR_KDEBUG_TRACE: {
4892 uint32_t debugid;
4893 uintptr_t args[4] = {0};
4894 int i;
4895
4896 if (nargs < 2 || nargs > 5) {
4897 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
4898 break;
b0d623f7 4899 }
b0d623f7 4900
39037602
A
4901 if (dtrace_destructive_disallow)
4902 return;
4903
4904 debugid = tupregs[0].dttk_value;
4905 for (i = 0; i < nargs - 1; i++)
4906 args[i] = tupregs[i + 1].dttk_value;
4907
4908 kernel_debug(debugid, args[0], args[1], args[2], args[3], 0);
4909
4910 break;
4911 }
4912
4913 case DIF_SUBR_KDEBUG_TRACE_STRING: {
4914 if (nargs != 3) {
4915 break;
b0d623f7
A
4916 }
4917
39037602
A
4918 if (dtrace_destructive_disallow)
4919 return;
4920
4921 uint64_t size = state->dts_options[DTRACEOPT_STRSIZE];
4922 uint32_t debugid = tupregs[0].dttk_value;
4923 uint64_t str_id = tupregs[1].dttk_value;
4924 uintptr_t src = tupregs[2].dttk_value;
4925 size_t lim;
4926 char buf[size];
4927 char* str = NULL;
4928
4929 if (src != (uintptr_t)0) {
4930 str = buf;
4931 if (!dtrace_strcanload(src, size, &lim, mstate, vstate)) {
4932 break;
4933 }
4934 dtrace_strcpy((void*)src, buf, size);
2d21ac55 4935 }
b0d623f7 4936
39037602
A
4937 (void)kernel_debug_string(debugid, &str_id, str);
4938 regs[rd] = str_id;
4939
2d21ac55
A
4940 break;
4941 }
39037602
A
4942#endif
4943
2d21ac55
A
4944 }
4945}
4946
4947/*
4948 * Emulate the execution of DTrace IR instructions specified by the given
4949 * DIF object. This function is deliberately void of assertions as all of
4950 * the necessary checks are handled by a call to dtrace_difo_validate().
4951 */
4952static uint64_t
4953dtrace_dif_emulate(dtrace_difo_t *difo, dtrace_mstate_t *mstate,
4954 dtrace_vstate_t *vstate, dtrace_state_t *state)
4955{
4956 const dif_instr_t *text = difo->dtdo_buf;
4957 const uint_t textlen = difo->dtdo_len;
4958 const char *strtab = difo->dtdo_strtab;
4959 const uint64_t *inttab = difo->dtdo_inttab;
4960
4961 uint64_t rval = 0;
4962 dtrace_statvar_t *svar;
4963 dtrace_dstate_t *dstate = &vstate->dtvs_dynvars;
4964 dtrace_difv_t *v;
4965 volatile uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
2d21ac55 4966 volatile uint64_t *illval = &cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
2d21ac55
A
4967
4968 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
4969 uint64_t regs[DIF_DIR_NREGS];
4970 uint64_t *tmp;
4971
4972 uint8_t cc_n = 0, cc_z = 0, cc_v = 0, cc_c = 0;
4973 int64_t cc_r;
b0d623f7 4974 uint_t pc = 0, id, opc = 0;
2d21ac55
A
4975 uint8_t ttop = 0;
4976 dif_instr_t instr;
4977 uint_t r1, r2, rd;
4978
b0d623f7
A
4979 /*
4980 * We stash the current DIF object into the machine state: we need it
4981 * for subsequent access checking.
4982 */
4983 mstate->dtms_difo = difo;
4984
2d21ac55
A
4985 regs[DIF_REG_R0] = 0; /* %r0 is fixed at zero */
4986
4987 while (pc < textlen && !(*flags & CPU_DTRACE_FAULT)) {
4988 opc = pc;
4989
4990 instr = text[pc++];
4991 r1 = DIF_INSTR_R1(instr);
4992 r2 = DIF_INSTR_R2(instr);
4993 rd = DIF_INSTR_RD(instr);
4994
4995 switch (DIF_INSTR_OP(instr)) {
4996 case DIF_OP_OR:
4997 regs[rd] = regs[r1] | regs[r2];
4998 break;
4999 case DIF_OP_XOR:
5000 regs[rd] = regs[r1] ^ regs[r2];
5001 break;
5002 case DIF_OP_AND:
5003 regs[rd] = regs[r1] & regs[r2];
5004 break;
5005 case DIF_OP_SLL:
5006 regs[rd] = regs[r1] << regs[r2];
5007 break;
5008 case DIF_OP_SRL:
5009 regs[rd] = regs[r1] >> regs[r2];
5010 break;
5011 case DIF_OP_SUB:
5012 regs[rd] = regs[r1] - regs[r2];
5013 break;
5014 case DIF_OP_ADD:
5015 regs[rd] = regs[r1] + regs[r2];
5016 break;
5017 case DIF_OP_MUL:
5018 regs[rd] = regs[r1] * regs[r2];
5019 break;
5020 case DIF_OP_SDIV:
5021 if (regs[r2] == 0) {
5022 regs[rd] = 0;
5023 *flags |= CPU_DTRACE_DIVZERO;
5024 } else {
5025 regs[rd] = (int64_t)regs[r1] /
5026 (int64_t)regs[r2];
5027 }
5028 break;
5029
5030 case DIF_OP_UDIV:
5031 if (regs[r2] == 0) {
5032 regs[rd] = 0;
5033 *flags |= CPU_DTRACE_DIVZERO;
5034 } else {
5035 regs[rd] = regs[r1] / regs[r2];
5036 }
5037 break;
5038
5039 case DIF_OP_SREM:
5040 if (regs[r2] == 0) {
5041 regs[rd] = 0;
5042 *flags |= CPU_DTRACE_DIVZERO;
5043 } else {
5044 regs[rd] = (int64_t)regs[r1] %
5045 (int64_t)regs[r2];
5046 }
5047 break;
5048
5049 case DIF_OP_UREM:
5050 if (regs[r2] == 0) {
5051 regs[rd] = 0;
5052 *flags |= CPU_DTRACE_DIVZERO;
5053 } else {
5054 regs[rd] = regs[r1] % regs[r2];
5055 }
5056 break;
5057
5058 case DIF_OP_NOT:
5059 regs[rd] = ~regs[r1];
5060 break;
5061 case DIF_OP_MOV:
5062 regs[rd] = regs[r1];
5063 break;
5064 case DIF_OP_CMP:
5065 cc_r = regs[r1] - regs[r2];
5066 cc_n = cc_r < 0;
5067 cc_z = cc_r == 0;
5068 cc_v = 0;
5069 cc_c = regs[r1] < regs[r2];
5070 break;
5071 case DIF_OP_TST:
5072 cc_n = cc_v = cc_c = 0;
5073 cc_z = regs[r1] == 0;
5074 break;
5075 case DIF_OP_BA:
5076 pc = DIF_INSTR_LABEL(instr);
5077 break;
5078 case DIF_OP_BE:
5079 if (cc_z)
5080 pc = DIF_INSTR_LABEL(instr);
5081 break;
5082 case DIF_OP_BNE:
5083 if (cc_z == 0)
5084 pc = DIF_INSTR_LABEL(instr);
5085 break;
5086 case DIF_OP_BG:
5087 if ((cc_z | (cc_n ^ cc_v)) == 0)
5088 pc = DIF_INSTR_LABEL(instr);
5089 break;
5090 case DIF_OP_BGU:
5091 if ((cc_c | cc_z) == 0)
5092 pc = DIF_INSTR_LABEL(instr);
5093 break;
5094 case DIF_OP_BGE:
5095 if ((cc_n ^ cc_v) == 0)
5096 pc = DIF_INSTR_LABEL(instr);
5097 break;
5098 case DIF_OP_BGEU:
5099 if (cc_c == 0)
5100 pc = DIF_INSTR_LABEL(instr);
5101 break;
5102 case DIF_OP_BL:
5103 if (cc_n ^ cc_v)
5104 pc = DIF_INSTR_LABEL(instr);
5105 break;
5106 case DIF_OP_BLU:
5107 if (cc_c)
5108 pc = DIF_INSTR_LABEL(instr);
5109 break;
5110 case DIF_OP_BLE:
5111 if (cc_z | (cc_n ^ cc_v))
5112 pc = DIF_INSTR_LABEL(instr);
5113 break;
5114 case DIF_OP_BLEU:
5115 if (cc_c | cc_z)
5116 pc = DIF_INSTR_LABEL(instr);
5117 break;
5118 case DIF_OP_RLDSB:
5119 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
5120 *flags |= CPU_DTRACE_KPRIV;
5121 *illval = regs[r1];
5122 break;
5123 }
5124 /*FALLTHROUGH*/
5125 case DIF_OP_LDSB:
5126 regs[rd] = (int8_t)dtrace_load8(regs[r1]);
5127 break;
5128 case DIF_OP_RLDSH:
5129 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
5130 *flags |= CPU_DTRACE_KPRIV;
5131 *illval = regs[r1];
5132 break;
5133 }
5134 /*FALLTHROUGH*/
5135 case DIF_OP_LDSH:
5136 regs[rd] = (int16_t)dtrace_load16(regs[r1]);
5137 break;
5138 case DIF_OP_RLDSW:
5139 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
5140 *flags |= CPU_DTRACE_KPRIV;
5141 *illval = regs[r1];
5142 break;
5143 }
5144 /*FALLTHROUGH*/
5145 case DIF_OP_LDSW:
5146 regs[rd] = (int32_t)dtrace_load32(regs[r1]);
5147 break;
5148 case DIF_OP_RLDUB:
5149 if (!dtrace_canstore(regs[r1], 1, mstate, vstate)) {
5150 *flags |= CPU_DTRACE_KPRIV;
5151 *illval = regs[r1];
5152 break;
5153 }
5154 /*FALLTHROUGH*/
5155 case DIF_OP_LDUB:
5156 regs[rd] = dtrace_load8(regs[r1]);
5157 break;
5158 case DIF_OP_RLDUH:
5159 if (!dtrace_canstore(regs[r1], 2, mstate, vstate)) {
5160 *flags |= CPU_DTRACE_KPRIV;
5161 *illval = regs[r1];
5162 break;
5163 }
5164 /*FALLTHROUGH*/
5165 case DIF_OP_LDUH:
5166 regs[rd] = dtrace_load16(regs[r1]);
5167 break;
5168 case DIF_OP_RLDUW:
5169 if (!dtrace_canstore(regs[r1], 4, mstate, vstate)) {
5170 *flags |= CPU_DTRACE_KPRIV;
5171 *illval = regs[r1];
5172 break;
5173 }
5174 /*FALLTHROUGH*/
5175 case DIF_OP_LDUW:
5176 regs[rd] = dtrace_load32(regs[r1]);
5177 break;
5178 case DIF_OP_RLDX:
5179 if (!dtrace_canstore(regs[r1], 8, mstate, vstate)) {
5180 *flags |= CPU_DTRACE_KPRIV;
5181 *illval = regs[r1];
5182 break;
5183 }
5184 /*FALLTHROUGH*/
5185 case DIF_OP_LDX:
5186 regs[rd] = dtrace_load64(regs[r1]);
5187 break;
fe8ab488
A
5188/*
5189 * Darwin 32-bit kernel may fetch from 64-bit user.
5190 * Do not cast regs to uintptr_t
5191 * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB
5192 * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX
5193 */
2d21ac55
A
5194 case DIF_OP_ULDSB:
5195 regs[rd] = (int8_t)
5196 dtrace_fuword8(regs[r1]);
5197 break;
5198 case DIF_OP_ULDSH:
5199 regs[rd] = (int16_t)
5200 dtrace_fuword16(regs[r1]);
5201 break;
5202 case DIF_OP_ULDSW:
5203 regs[rd] = (int32_t)
5204 dtrace_fuword32(regs[r1]);
5205 break;
5206 case DIF_OP_ULDUB:
5207 regs[rd] =
5208 dtrace_fuword8(regs[r1]);
5209 break;
5210 case DIF_OP_ULDUH:
5211 regs[rd] =
5212 dtrace_fuword16(regs[r1]);
5213 break;
5214 case DIF_OP_ULDUW:
5215 regs[rd] =
5216 dtrace_fuword32(regs[r1]);
5217 break;
5218 case DIF_OP_ULDX:
5219 regs[rd] =
5220 dtrace_fuword64(regs[r1]);
5221 break;
5222 case DIF_OP_RET:
5223 rval = regs[rd];
b0d623f7 5224 pc = textlen;
2d21ac55
A
5225 break;
5226 case DIF_OP_NOP:
5227 break;
5228 case DIF_OP_SETX:
5229 regs[rd] = inttab[DIF_INSTR_INTEGER(instr)];
5230 break;
5231 case DIF_OP_SETS:
5232 regs[rd] = (uint64_t)(uintptr_t)
5233 (strtab + DIF_INSTR_STRING(instr));
5234 break;
b0d623f7
A
5235 case DIF_OP_SCMP: {
5236 size_t sz = state->dts_options[DTRACEOPT_STRSIZE];
5237 uintptr_t s1 = regs[r1];
5238 uintptr_t s2 = regs[r2];
39037602 5239 size_t lim1 = sz, lim2 = sz;
b0d623f7 5240
fe8ab488 5241 if (s1 != 0 &&
39037602 5242 !dtrace_strcanload(s1, sz, &lim1, mstate, vstate))
b0d623f7 5243 break;
fe8ab488 5244 if (s2 != 0 &&
39037602 5245 !dtrace_strcanload(s2, sz, &lim2, mstate, vstate))
b0d623f7
A
5246 break;
5247
39037602
A
5248 cc_r = dtrace_strncmp((char *)s1, (char *)s2,
5249 MIN(lim1, lim2));
2d21ac55
A
5250
5251 cc_n = cc_r < 0;
5252 cc_z = cc_r == 0;
5253 cc_v = cc_c = 0;
5254 break;
b0d623f7 5255 }
2d21ac55
A
5256 case DIF_OP_LDGA:
5257 regs[rd] = dtrace_dif_variable(mstate, state,
5258 r1, regs[r2]);
5259 break;
5260 case DIF_OP_LDGS:
5261 id = DIF_INSTR_VAR(instr);
5262
5263 if (id >= DIF_VAR_OTHER_UBASE) {
5264 uintptr_t a;
5265
5266 id -= DIF_VAR_OTHER_UBASE;
5267 svar = vstate->dtvs_globals[id];
5268 ASSERT(svar != NULL);
5269 v = &svar->dtsv_var;
5270
5271 if (!(v->dtdv_type.dtdt_flags & DIF_TF_BYREF)) {
5272 regs[rd] = svar->dtsv_data;
5273 break;
5274 }
5275
5276 a = (uintptr_t)svar->dtsv_data;
5277
5278 if (*(uint8_t *)a == UINT8_MAX) {
5279 /*
5280 * If the 0th byte is set to UINT8_MAX
5281 * then this is to be treated as a
5282 * reference to a NULL variable.
5283 */
fe8ab488 5284 regs[rd] = 0;
2d21ac55
A
5285 } else {
5286 regs[rd] = a + sizeof (uint64_t);
5287 }
5288
5289 break;
5290 }
5291
5292 regs[rd] = dtrace_dif_variable(mstate, state, id, 0);
5293 break;
5294
5295 case DIF_OP_STGS:
5296 id = DIF_INSTR_VAR(instr);
5297
5298 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5299 id -= DIF_VAR_OTHER_UBASE;
5300
39037602 5301 VERIFY(id < (uint_t)vstate->dtvs_nglobals);
2d21ac55
A
5302 svar = vstate->dtvs_globals[id];
5303 ASSERT(svar != NULL);
5304 v = &svar->dtsv_var;
5305
5306 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5307 uintptr_t a = (uintptr_t)svar->dtsv_data;
39037602 5308 size_t lim;
2d21ac55 5309
fe8ab488 5310 ASSERT(a != 0);
2d21ac55
A
5311 ASSERT(svar->dtsv_size != 0);
5312
fe8ab488 5313 if (regs[rd] == 0) {
2d21ac55
A
5314 *(uint8_t *)a = UINT8_MAX;
5315 break;
5316 } else {
5317 *(uint8_t *)a = 0;
5318 a += sizeof (uint64_t);
5319 }
b0d623f7
A
5320 if (!dtrace_vcanload(
5321 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
39037602 5322 &lim, mstate, vstate))
b0d623f7 5323 break;
2d21ac55
A
5324
5325 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5326 (void *)a, &v->dtdv_type, lim);
2d21ac55
A
5327 break;
5328 }
5329
5330 svar->dtsv_data = regs[rd];
5331 break;
5332
5333 case DIF_OP_LDTA:
5334 /*
5335 * There are no DTrace built-in thread-local arrays at
5336 * present. This opcode is saved for future work.
5337 */
5338 *flags |= CPU_DTRACE_ILLOP;
5339 regs[rd] = 0;
5340 break;
5341
5342 case DIF_OP_LDLS:
5343 id = DIF_INSTR_VAR(instr);
5344
5345 if (id < DIF_VAR_OTHER_UBASE) {
5346 /*
5347 * For now, this has no meaning.
5348 */
5349 regs[rd] = 0;
5350 break;
5351 }
5352
5353 id -= DIF_VAR_OTHER_UBASE;
5354
b0d623f7 5355 ASSERT(id < (uint_t)vstate->dtvs_nlocals);
2d21ac55 5356 ASSERT(vstate->dtvs_locals != NULL);
2d21ac55
A
5357 svar = vstate->dtvs_locals[id];
5358 ASSERT(svar != NULL);
5359 v = &svar->dtsv_var;
5360
5361 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5362 uintptr_t a = (uintptr_t)svar->dtsv_data;
5363 size_t sz = v->dtdv_type.dtdt_size;
5364
5365 sz += sizeof (uint64_t);
c910b4d9 5366 ASSERT(svar->dtsv_size == (int)NCPU * sz);
2d21ac55
A
5367 a += CPU->cpu_id * sz;
5368
5369 if (*(uint8_t *)a == UINT8_MAX) {
5370 /*
5371 * If the 0th byte is set to UINT8_MAX
5372 * then this is to be treated as a
5373 * reference to a NULL variable.
5374 */
fe8ab488 5375 regs[rd] = 0;
2d21ac55
A
5376 } else {
5377 regs[rd] = a + sizeof (uint64_t);
5378 }
5379
5380 break;
5381 }
5382
c910b4d9 5383 ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t));
2d21ac55
A
5384 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5385 regs[rd] = tmp[CPU->cpu_id];
5386 break;
5387
5388 case DIF_OP_STLS:
5389 id = DIF_INSTR_VAR(instr);
5390
5391 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5392 id -= DIF_VAR_OTHER_UBASE;
39037602 5393 VERIFY(id < (uint_t)vstate->dtvs_nlocals);
2d21ac55
A
5394 ASSERT(vstate->dtvs_locals != NULL);
5395 svar = vstate->dtvs_locals[id];
5396 ASSERT(svar != NULL);
5397 v = &svar->dtsv_var;
5398
5399 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5400 uintptr_t a = (uintptr_t)svar->dtsv_data;
5401 size_t sz = v->dtdv_type.dtdt_size;
39037602 5402 size_t lim;
2d21ac55
A
5403
5404 sz += sizeof (uint64_t);
c910b4d9 5405 ASSERT(svar->dtsv_size == (int)NCPU * sz);
2d21ac55
A
5406 a += CPU->cpu_id * sz;
5407
fe8ab488 5408 if (regs[rd] == 0) {
2d21ac55
A
5409 *(uint8_t *)a = UINT8_MAX;
5410 break;
5411 } else {
5412 *(uint8_t *)a = 0;
5413 a += sizeof (uint64_t);
5414 }
5415
b0d623f7
A
5416 if (!dtrace_vcanload(
5417 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
39037602 5418 &lim, mstate, vstate))
b0d623f7
A
5419 break;
5420
2d21ac55 5421 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5422 (void *)a, &v->dtdv_type, lim);
2d21ac55
A
5423 break;
5424 }
5425
c910b4d9 5426 ASSERT(svar->dtsv_size == (int)NCPU * sizeof (uint64_t));
2d21ac55
A
5427 tmp = (uint64_t *)(uintptr_t)svar->dtsv_data;
5428 tmp[CPU->cpu_id] = regs[rd];
5429 break;
5430
5431 case DIF_OP_LDTS: {
5432 dtrace_dynvar_t *dvar;
5433 dtrace_key_t *key;
5434
5435 id = DIF_INSTR_VAR(instr);
5436 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5437 id -= DIF_VAR_OTHER_UBASE;
5438 v = &vstate->dtvs_tlocals[id];
5439
5440 key = &tupregs[DIF_DTR_NREGS];
5441 key[0].dttk_value = (uint64_t)id;
5442 key[0].dttk_size = 0;
5443 DTRACE_TLS_THRKEY(key[1].dttk_value);
5444 key[1].dttk_size = 0;
5445
5446 dvar = dtrace_dynvar(dstate, 2, key,
b0d623f7
A
5447 sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC,
5448 mstate, vstate);
2d21ac55
A
5449
5450 if (dvar == NULL) {
5451 regs[rd] = 0;
5452 break;
5453 }
5454
5455 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5456 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5457 } else {
5458 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5459 }
5460
5461 break;
5462 }
5463
5464 case DIF_OP_STTS: {
5465 dtrace_dynvar_t *dvar;
5466 dtrace_key_t *key;
5467
5468 id = DIF_INSTR_VAR(instr);
5469 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5470 id -= DIF_VAR_OTHER_UBASE;
39037602 5471 VERIFY(id < (uint_t)vstate->dtvs_ntlocals);
2d21ac55
A
5472
5473 key = &tupregs[DIF_DTR_NREGS];
5474 key[0].dttk_value = (uint64_t)id;
5475 key[0].dttk_size = 0;
5476 DTRACE_TLS_THRKEY(key[1].dttk_value);
5477 key[1].dttk_size = 0;
5478 v = &vstate->dtvs_tlocals[id];
5479
5480 dvar = dtrace_dynvar(dstate, 2, key,
5481 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5482 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5483 regs[rd] ? DTRACE_DYNVAR_ALLOC :
b0d623f7 5484 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
2d21ac55
A
5485
5486 /*
5487 * Given that we're storing to thread-local data,
5488 * we need to flush our predicate cache.
5489 */
2d21ac55 5490 dtrace_set_thread_predcache(current_thread(), 0);
2d21ac55 5491
2d21ac55
A
5492 if (dvar == NULL)
5493 break;
5494
5495 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
39037602
A
5496 size_t lim;
5497
b0d623f7
A
5498 if (!dtrace_vcanload(
5499 (void *)(uintptr_t)regs[rd],
39037602 5500 &v->dtdv_type, &lim, mstate, vstate))
b0d623f7
A
5501 break;
5502
2d21ac55 5503 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5504 dvar->dtdv_data, &v->dtdv_type, lim);
2d21ac55
A
5505 } else {
5506 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5507 }
5508
5509 break;
5510 }
5511
5512 case DIF_OP_SRA:
5513 regs[rd] = (int64_t)regs[r1] >> regs[r2];
5514 break;
5515
5516 case DIF_OP_CALL:
5517 dtrace_dif_subr(DIF_INSTR_SUBR(instr), rd,
5518 regs, tupregs, ttop, mstate, state);
5519 break;
5520
5521 case DIF_OP_PUSHTR:
5522 if (ttop == DIF_DTR_NREGS) {
5523 *flags |= CPU_DTRACE_TUPOFLOW;
5524 break;
5525 }
5526
5527 if (r1 == DIF_TYPE_STRING) {
5528 /*
5529 * If this is a string type and the size is 0,
5530 * we'll use the system-wide default string
5531 * size. Note that we are _not_ looking at
5532 * the value of the DTRACEOPT_STRSIZE option;
5533 * had this been set, we would expect to have
5534 * a non-zero size value in the "pushtr".
5535 */
5536 tupregs[ttop].dttk_size =
5537 dtrace_strlen((char *)(uintptr_t)regs[rd],
5538 regs[r2] ? regs[r2] :
5539 dtrace_strsize_default) + 1;
5540 } else {
ecc0ceb4
A
5541 if (regs[r2] > LONG_MAX) {
5542 *flags |= CPU_DTRACE_ILLOP;
5543 break;
5544 }
2d21ac55
A
5545 tupregs[ttop].dttk_size = regs[r2];
5546 }
5547
5548 tupregs[ttop++].dttk_value = regs[rd];
5549 break;
5550
5551 case DIF_OP_PUSHTV:
5552 if (ttop == DIF_DTR_NREGS) {
5553 *flags |= CPU_DTRACE_TUPOFLOW;
5554 break;
5555 }
5556
5557 tupregs[ttop].dttk_value = regs[rd];
5558 tupregs[ttop++].dttk_size = 0;
5559 break;
5560
5561 case DIF_OP_POPTS:
5562 if (ttop != 0)
5563 ttop--;
5564 break;
5565
5566 case DIF_OP_FLUSHTS:
5567 ttop = 0;
5568 break;
5569
5570 case DIF_OP_LDGAA:
5571 case DIF_OP_LDTAA: {
5572 dtrace_dynvar_t *dvar;
5573 dtrace_key_t *key = tupregs;
5574 uint_t nkeys = ttop;
5575
5576 id = DIF_INSTR_VAR(instr);
5577 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5578 id -= DIF_VAR_OTHER_UBASE;
5579
5580 key[nkeys].dttk_value = (uint64_t)id;
5581 key[nkeys++].dttk_size = 0;
5582
5583 if (DIF_INSTR_OP(instr) == DIF_OP_LDTAA) {
5584 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5585 key[nkeys++].dttk_size = 0;
39037602 5586 VERIFY(id < (uint_t)vstate->dtvs_ntlocals);
2d21ac55
A
5587 v = &vstate->dtvs_tlocals[id];
5588 } else {
39037602 5589 VERIFY(id < (uint_t)vstate->dtvs_nglobals);
2d21ac55
A
5590 v = &vstate->dtvs_globals[id]->dtsv_var;
5591 }
5592
5593 dvar = dtrace_dynvar(dstate, nkeys, key,
5594 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5595 v->dtdv_type.dtdt_size : sizeof (uint64_t),
b0d623f7 5596 DTRACE_DYNVAR_NOALLOC, mstate, vstate);
2d21ac55
A
5597
5598 if (dvar == NULL) {
5599 regs[rd] = 0;
5600 break;
5601 }
5602
5603 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
5604 regs[rd] = (uint64_t)(uintptr_t)dvar->dtdv_data;
5605 } else {
5606 regs[rd] = *((uint64_t *)dvar->dtdv_data);
5607 }
5608
5609 break;
5610 }
5611
5612 case DIF_OP_STGAA:
5613 case DIF_OP_STTAA: {
5614 dtrace_dynvar_t *dvar;
5615 dtrace_key_t *key = tupregs;
5616 uint_t nkeys = ttop;
5617
5618 id = DIF_INSTR_VAR(instr);
5619 ASSERT(id >= DIF_VAR_OTHER_UBASE);
5620 id -= DIF_VAR_OTHER_UBASE;
5621
5622 key[nkeys].dttk_value = (uint64_t)id;
5623 key[nkeys++].dttk_size = 0;
5624
5625 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA) {
5626 DTRACE_TLS_THRKEY(key[nkeys].dttk_value);
5627 key[nkeys++].dttk_size = 0;
39037602 5628 VERIFY(id < (uint_t)vstate->dtvs_ntlocals);
2d21ac55
A
5629 v = &vstate->dtvs_tlocals[id];
5630 } else {
39037602 5631 VERIFY(id < (uint_t)vstate->dtvs_nglobals);
2d21ac55
A
5632 v = &vstate->dtvs_globals[id]->dtsv_var;
5633 }
5634
5635 dvar = dtrace_dynvar(dstate, nkeys, key,
5636 v->dtdv_type.dtdt_size > sizeof (uint64_t) ?
5637 v->dtdv_type.dtdt_size : sizeof (uint64_t),
5638 regs[rd] ? DTRACE_DYNVAR_ALLOC :
b0d623f7 5639 DTRACE_DYNVAR_DEALLOC, mstate, vstate);
2d21ac55
A
5640
5641 if (dvar == NULL)
5642 break;
5643
5644 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF) {
39037602
A
5645 size_t lim;
5646
b0d623f7
A
5647 if (!dtrace_vcanload(
5648 (void *)(uintptr_t)regs[rd], &v->dtdv_type,
39037602 5649 &lim, mstate, vstate))
b0d623f7
A
5650 break;
5651
2d21ac55 5652 dtrace_vcopy((void *)(uintptr_t)regs[rd],
39037602 5653 dvar->dtdv_data, &v->dtdv_type, lim);
2d21ac55
A
5654 } else {
5655 *((uint64_t *)dvar->dtdv_data) = regs[rd];
5656 }
5657
5658 break;
5659 }
5660
5661 case DIF_OP_ALLOCS: {
5662 uintptr_t ptr = P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
5663 size_t size = ptr - mstate->dtms_scratch_ptr + regs[r1];
5664
b0d623f7
A
5665 /*
5666 * Rounding up the user allocation size could have
5667 * overflowed large, bogus allocations (like -1ULL) to
5668 * 0.
5669 */
5670 if (size < regs[r1] ||
5671 !DTRACE_INSCRATCH(mstate, size)) {
2d21ac55 5672 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
fe8ab488 5673 regs[rd] = 0;
b0d623f7
A
5674 break;
5675 }
5676
5677 dtrace_bzero((void *) mstate->dtms_scratch_ptr, size);
2d21ac55
A
5678 mstate->dtms_scratch_ptr += size;
5679 regs[rd] = ptr;
2d21ac55
A
5680 break;
5681 }
5682
5683 case DIF_OP_COPYS:
5684 if (!dtrace_canstore(regs[rd], regs[r2],
5685 mstate, vstate)) {
5686 *flags |= CPU_DTRACE_BADADDR;
5687 *illval = regs[rd];
5688 break;
5689 }
5690
b0d623f7
A
5691 if (!dtrace_canload(regs[r1], regs[r2], mstate, vstate))
5692 break;
5693
2d21ac55
A
5694 dtrace_bcopy((void *)(uintptr_t)regs[r1],
5695 (void *)(uintptr_t)regs[rd], (size_t)regs[r2]);
5696 break;
5697
5698 case DIF_OP_STB:
5699 if (!dtrace_canstore(regs[rd], 1, mstate, vstate)) {
5700 *flags |= CPU_DTRACE_BADADDR;
5701 *illval = regs[rd];
5702 break;
5703 }
5704 *((uint8_t *)(uintptr_t)regs[rd]) = (uint8_t)regs[r1];
5705 break;
5706
5707 case DIF_OP_STH:
5708 if (!dtrace_canstore(regs[rd], 2, mstate, vstate)) {
5709 *flags |= CPU_DTRACE_BADADDR;
5710 *illval = regs[rd];
5711 break;
5712 }
5713 if (regs[rd] & 1) {
5714 *flags |= CPU_DTRACE_BADALIGN;
5715 *illval = regs[rd];
5716 break;
5717 }
5718 *((uint16_t *)(uintptr_t)regs[rd]) = (uint16_t)regs[r1];
5719 break;
5720
5721 case DIF_OP_STW:
5722 if (!dtrace_canstore(regs[rd], 4, mstate, vstate)) {
5723 *flags |= CPU_DTRACE_BADADDR;
5724 *illval = regs[rd];
5725 break;
5726 }
5727 if (regs[rd] & 3) {
5728 *flags |= CPU_DTRACE_BADALIGN;
5729 *illval = regs[rd];
5730 break;
5731 }
5732 *((uint32_t *)(uintptr_t)regs[rd]) = (uint32_t)regs[r1];
5733 break;
5734
5735 case DIF_OP_STX:
5736 if (!dtrace_canstore(regs[rd], 8, mstate, vstate)) {
5737 *flags |= CPU_DTRACE_BADADDR;
5738 *illval = regs[rd];
5739 break;
5740 }
fe8ab488
A
5741
5742 /*
5743 * Darwin kmem_zalloc() called from
5744 * dtrace_difo_init() is 4-byte aligned.
5745 */
5746 if (regs[rd] & 3) {
2d21ac55
A
5747 *flags |= CPU_DTRACE_BADALIGN;
5748 *illval = regs[rd];
5749 break;
5750 }
5751 *((uint64_t *)(uintptr_t)regs[rd]) = regs[r1];
5752 break;
5753 }
5754 }
5755
5756 if (!(*flags & CPU_DTRACE_FAULT))
5757 return (rval);
5758
5759 mstate->dtms_fltoffs = opc * sizeof (dif_instr_t);
5760 mstate->dtms_present |= DTRACE_MSTATE_FLTOFFS;
5761
5762 return (0);
5763}
5764
5765static void
5766dtrace_action_breakpoint(dtrace_ecb_t *ecb)
5767{
5768 dtrace_probe_t *probe = ecb->dte_probe;
5769 dtrace_provider_t *prov = probe->dtpr_provider;
5770 char c[DTRACE_FULLNAMELEN + 80], *str;
b0d623f7
A
5771 const char *msg = "dtrace: breakpoint action at probe ";
5772 const char *ecbmsg = " (ecb ";
2d21ac55
A
5773 uintptr_t mask = (0xf << (sizeof (uintptr_t) * NBBY / 4));
5774 uintptr_t val = (uintptr_t)ecb;
5775 int shift = (sizeof (uintptr_t) * NBBY) - 4, i = 0;
5776
5777 if (dtrace_destructive_disallow)
5778 return;
5779
5780 /*
5781 * It's impossible to be taking action on the NULL probe.
5782 */
5783 ASSERT(probe != NULL);
5784
5785 /*
5786 * This is a poor man's (destitute man's?) sprintf(): we want to
5787 * print the provider name, module name, function name and name of
5788 * the probe, along with the hex address of the ECB with the breakpoint
5789 * action -- all of which we must place in the character buffer by
5790 * hand.
5791 */
5792 while (*msg != '\0')
5793 c[i++] = *msg++;
5794
5795 for (str = prov->dtpv_name; *str != '\0'; str++)
5796 c[i++] = *str;
5797 c[i++] = ':';
5798
5799 for (str = probe->dtpr_mod; *str != '\0'; str++)
5800 c[i++] = *str;
5801 c[i++] = ':';
5802
5803 for (str = probe->dtpr_func; *str != '\0'; str++)
5804 c[i++] = *str;
5805 c[i++] = ':';
5806
5807 for (str = probe->dtpr_name; *str != '\0'; str++)
5808 c[i++] = *str;
5809
5810 while (*ecbmsg != '\0')
5811 c[i++] = *ecbmsg++;
5812
5813 while (shift >= 0) {
5814 mask = (uintptr_t)0xf << shift;
5815
5816 if (val >= ((uintptr_t)1 << shift))
5817 c[i++] = "0123456789abcdef"[(val & mask) >> shift];
5818 shift -= 4;
5819 }
5820
5821 c[i++] = ')';
5822 c[i] = '\0';
5823
5824 debug_enter(c);
5825}
5826
5827static void
5828dtrace_action_panic(dtrace_ecb_t *ecb)
5829{
5830 dtrace_probe_t *probe = ecb->dte_probe;
5831
5832 /*
5833 * It's impossible to be taking action on the NULL probe.
5834 */
5835 ASSERT(probe != NULL);
5836
5837 if (dtrace_destructive_disallow)
5838 return;
5839
5840 if (dtrace_panicked != NULL)
5841 return;
5842
2d21ac55
A
5843 if (dtrace_casptr(&dtrace_panicked, NULL, current_thread()) != NULL)
5844 return;
2d21ac55
A
5845
5846 /*
5847 * We won the right to panic. (We want to be sure that only one
5848 * thread calls panic() from dtrace_probe(), and that panic() is
5849 * called exactly once.)
5850 */
316670eb 5851 panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)",
2d21ac55
A
5852 probe->dtpr_provider->dtpv_name, probe->dtpr_mod,
5853 probe->dtpr_func, probe->dtpr_name, (void *)ecb);
5854
fe8ab488
A
5855 /*
5856 * APPLE NOTE: this was for an old Mac OS X debug feature
5857 * allowing a return from panic(). Revisit someday.
5858 */
2d21ac55 5859 dtrace_panicked = NULL;
2d21ac55
A
5860}
5861
5862static void
5863dtrace_action_raise(uint64_t sig)
5864{
5865 if (dtrace_destructive_disallow)
5866 return;
5867
5868 if (sig >= NSIG) {
5869 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5870 return;
5871 }
5872
2d21ac55
A
5873 /*
5874 * raise() has a queue depth of 1 -- we ignore all subsequent
5875 * invocations of the raise() action.
5876 */
2d21ac55 5877
2d21ac55
A
5878 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
5879
5880 if (uthread && uthread->t_dtrace_sig == 0) {
5881 uthread->t_dtrace_sig = sig;
6d2010ae 5882 act_set_astbsd(current_thread());
2d21ac55 5883 }
2d21ac55
A
5884}
5885
5886static void
5887dtrace_action_stop(void)
5888{
5889 if (dtrace_destructive_disallow)
5890 return;
5891
6d2010ae
A
5892 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
5893 if (uthread) {
5894 /*
5895 * The currently running process will be set to task_suspend
5896 * when it next leaves the kernel.
5897 */
b0d623f7 5898 uthread->t_dtrace_stop = 1;
6d2010ae 5899 act_set_astbsd(current_thread());
b0d623f7 5900 }
2d21ac55
A
5901}
5902
fe8ab488
A
5903
5904/*
5905 * APPLE NOTE: pidresume works in conjunction with the dtrace stop action.
5906 * Both activate only when the currently running process next leaves the
5907 * kernel.
5908 */
6d2010ae
A
5909static void
5910dtrace_action_pidresume(uint64_t pid)
5911{
5912 if (dtrace_destructive_disallow)
5913 return;
5914
5915 if (kauth_cred_issuser(kauth_cred_get()) == 0) {
5916 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP);
5917 return;
5918 }
6d2010ae
A
5919 uthread_t uthread = (uthread_t)get_bsdthread_info(current_thread());
5920
5921 /*
5922 * When the currently running process leaves the kernel, it attempts to
5923 * task_resume the process (denoted by pid), if that pid appears to have
5924 * been stopped by dtrace_action_stop().
5925 * The currently running process has a pidresume() queue depth of 1 --
5926 * subsequent invocations of the pidresume() action are ignored.
5927 */
5928
5929 if (pid != 0 && uthread && uthread->t_dtrace_resumepid == 0) {
5930 uthread->t_dtrace_resumepid = pid;
5931 act_set_astbsd(current_thread());
5932 }
5933}
6d2010ae 5934
2d21ac55
A
5935static void
5936dtrace_action_chill(dtrace_mstate_t *mstate, hrtime_t val)
5937{
5938 hrtime_t now;
5939 volatile uint16_t *flags;
6d2010ae 5940 dtrace_cpu_t *cpu = CPU;
2d21ac55
A
5941
5942 if (dtrace_destructive_disallow)
5943 return;
5944
5945 flags = (volatile uint16_t *)&cpu_core[cpu->cpu_id].cpuc_dtrace_flags;
5946
5947 now = dtrace_gethrtime();
5948
5949 if (now - cpu->cpu_dtrace_chillmark > dtrace_chill_interval) {
5950 /*
5951 * We need to advance the mark to the current time.
5952 */
5953 cpu->cpu_dtrace_chillmark = now;
5954 cpu->cpu_dtrace_chilled = 0;
5955 }
5956
5957 /*
5958 * Now check to see if the requested chill time would take us over
5959 * the maximum amount of time allowed in the chill interval. (Or
5960 * worse, if the calculation itself induces overflow.)
5961 */
5962 if (cpu->cpu_dtrace_chilled + val > dtrace_chill_max ||
5963 cpu->cpu_dtrace_chilled + val < cpu->cpu_dtrace_chilled) {
5964 *flags |= CPU_DTRACE_ILLOP;
5965 return;
5966 }
5967
5968 while (dtrace_gethrtime() - now < val)
5969 continue;
5970
5971 /*
5972 * Normally, we assure that the value of the variable "timestamp" does
5973 * not change within an ECB. The presence of chill() represents an
5974 * exception to this rule, however.
5975 */
5976 mstate->dtms_present &= ~DTRACE_MSTATE_TIMESTAMP;
5977 cpu->cpu_dtrace_chilled += val;
5978}
5979
5980static void
5981dtrace_action_ustack(dtrace_mstate_t *mstate, dtrace_state_t *state,
5982 uint64_t *buf, uint64_t arg)
5983{
5984 int nframes = DTRACE_USTACK_NFRAMES(arg);
5985 int strsize = DTRACE_USTACK_STRSIZE(arg);
5986 uint64_t *pcs = &buf[1], *fps;
5987 char *str = (char *)&pcs[nframes];
5988 int size, offs = 0, i, j;
5989 uintptr_t old = mstate->dtms_scratch_ptr, saved;
5990 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
5991 char *sym;
5992
5993 /*
5994 * Should be taking a faster path if string space has not been
5995 * allocated.
5996 */
5997 ASSERT(strsize != 0);
5998
5999 /*
6000 * We will first allocate some temporary space for the frame pointers.
6001 */
6002 fps = (uint64_t *)P2ROUNDUP(mstate->dtms_scratch_ptr, 8);
6003 size = (uintptr_t)fps - mstate->dtms_scratch_ptr +
6004 (nframes * sizeof (uint64_t));
6005
b0d623f7 6006 if (!DTRACE_INSCRATCH(mstate, (uintptr_t)size)) {
2d21ac55
A
6007 /*
6008 * Not enough room for our frame pointers -- need to indicate
6009 * that we ran out of scratch space.
6010 */
6011 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH);
6012 return;
6013 }
6014
6015 mstate->dtms_scratch_ptr += size;
6016 saved = mstate->dtms_scratch_ptr;
6017
6018 /*
6019 * Now get a stack with both program counters and frame pointers.
6020 */
6021 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6022 dtrace_getufpstack(buf, fps, nframes + 1);
6023 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6024
6025 /*
6026 * If that faulted, we're cooked.
6027 */
6028 if (*flags & CPU_DTRACE_FAULT)
6029 goto out;
6030
6031 /*
6032 * Now we want to walk up the stack, calling the USTACK helper. For
6033 * each iteration, we restore the scratch pointer.
6034 */
6035 for (i = 0; i < nframes; i++) {
6036 mstate->dtms_scratch_ptr = saved;
6037
6038 if (offs >= strsize)
6039 break;
6040
6041 sym = (char *)(uintptr_t)dtrace_helper(
6042 DTRACE_HELPER_ACTION_USTACK,
6043 mstate, state, pcs[i], fps[i]);
6044
6045 /*
6046 * If we faulted while running the helper, we're going to
6047 * clear the fault and null out the corresponding string.
6048 */
6049 if (*flags & CPU_DTRACE_FAULT) {
6050 *flags &= ~CPU_DTRACE_FAULT;
6051 str[offs++] = '\0';
6052 continue;
6053 }
6054
6055 if (sym == NULL) {
6056 str[offs++] = '\0';
6057 continue;
6058 }
6059
6060 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6061
6062 /*
6063 * Now copy in the string that the helper returned to us.
6064 */
6065 for (j = 0; offs + j < strsize; j++) {
6066 if ((str[offs + j] = sym[j]) == '\0')
6067 break;
6068 }
6069
6070 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6071
6072 offs += j + 1;
6073 }
6074
6075 if (offs >= strsize) {
6076 /*
6077 * If we didn't have room for all of the strings, we don't
6078 * abort processing -- this needn't be a fatal error -- but we
6079 * still want to increment a counter (dts_stkstroverflows) to
6080 * allow this condition to be warned about. (If this is from
6081 * a jstack() action, it is easily tuned via jstackstrsize.)
6082 */
6083 dtrace_error(&state->dts_stkstroverflows);
6084 }
6085
6086 while (offs < strsize)
6087 str[offs++] = '\0';
6088
6089out:
6090 mstate->dtms_scratch_ptr = old;
6091}
6092
3e170ce0
A
6093static void
6094dtrace_store_by_ref(dtrace_difo_t *dp, caddr_t tomax, size_t size,
6095 size_t *valoffsp, uint64_t *valp, uint64_t end, int intuple, int dtkind)
6096{
6097 volatile uint16_t *flags;
6098 uint64_t val = *valp;
6099 size_t valoffs = *valoffsp;
6100
6101 flags = (volatile uint16_t *)&cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
6102 ASSERT(dtkind == DIF_TF_BYREF || dtkind == DIF_TF_BYUREF);
6103
6104 /*
6105 * If this is a string, we're going to only load until we find the zero
6106 * byte -- after which we'll store zero bytes.
6107 */
6108 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
6109 char c = '\0' + 1;
6110 size_t s;
6111
6112 for (s = 0; s < size; s++) {
6113 if (c != '\0' && dtkind == DIF_TF_BYREF) {
6114 c = dtrace_load8(val++);
6115 } else if (c != '\0' && dtkind == DIF_TF_BYUREF) {
6116 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6117 c = dtrace_fuword8((user_addr_t)(uintptr_t)val++);
6118 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6119 if (*flags & CPU_DTRACE_FAULT)
6120 break;
6121 }
6122
6123 DTRACE_STORE(uint8_t, tomax, valoffs++, c);
6124
6125 if (c == '\0' && intuple)
6126 break;
6127 }
6128 } else {
6129 uint8_t c;
6130 while (valoffs < end) {
6131 if (dtkind == DIF_TF_BYREF) {
6132 c = dtrace_load8(val++);
6133 } else if (dtkind == DIF_TF_BYUREF) {
6134 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6135 c = dtrace_fuword8((user_addr_t)(uintptr_t)val++);
6136 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6137 if (*flags & CPU_DTRACE_FAULT)
6138 break;
6139 }
6140
6141 DTRACE_STORE(uint8_t, tomax,
6142 valoffs++, c);
6143 }
6144 }
6145
6146 *valp = val;
6147 *valoffsp = valoffs;
6148}
6149
2d21ac55
A
6150/*
6151 * If you're looking for the epicenter of DTrace, you just found it. This
6152 * is the function called by the provider to fire a probe -- from which all
6153 * subsequent probe-context DTrace activity emanates.
6154 */
2d21ac55
A
6155static void
6156__dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
6157 uint64_t arg2, uint64_t arg3, uint64_t arg4)
2d21ac55
A
6158{
6159 processorid_t cpuid;
6160 dtrace_icookie_t cookie;
6161 dtrace_probe_t *probe;
6162 dtrace_mstate_t mstate;
6163 dtrace_ecb_t *ecb;
6164 dtrace_action_t *act;
6165 intptr_t offs;
6166 size_t size;
6167 int vtime, onintr;
6168 volatile uint16_t *flags;
6169 hrtime_t now;
6170
2d21ac55
A
6171 cookie = dtrace_interrupt_disable();
6172 probe = dtrace_probes[id - 1];
6173 cpuid = CPU->cpu_id;
6174 onintr = CPU_ON_INTR(CPU);
6175
2d21ac55
A
6176 if (!onintr && probe->dtpr_predcache != DTRACE_CACHEIDNONE &&
6177 probe->dtpr_predcache == dtrace_get_thread_predcache(current_thread())) {
2d21ac55
A
6178 /*
6179 * We have hit in the predicate cache; we know that
6180 * this predicate would evaluate to be false.
6181 */
6182 dtrace_interrupt_enable(cookie);
6183 return;
6184 }
6185
6186 if (panic_quiesce) {
6187 /*
6188 * We don't trace anything if we're panicking.
6189 */
6190 dtrace_interrupt_enable(cookie);
6191 return;
6192 }
6193
6194#if !defined(__APPLE__)
6195 now = dtrace_gethrtime();
6196 vtime = dtrace_vtime_references != 0;
6197
6198 if (vtime && curthread->t_dtrace_start)
6199 curthread->t_dtrace_vtime += now - curthread->t_dtrace_start;
6200#else
fe8ab488
A
6201 /*
6202 * APPLE NOTE: The time spent entering DTrace and arriving
6203 * to this point, is attributed to the current thread.
6204 * Instead it should accrue to DTrace. FIXME
6205 */
2d21ac55
A
6206 vtime = dtrace_vtime_references != 0;
6207
6208 if (vtime)
6209 {
6210 int64_t dtrace_accum_time, recent_vtime;
6211 thread_t thread = current_thread();
6212
6213 dtrace_accum_time = dtrace_get_thread_tracing(thread); /* Time spent inside DTrace so far (nanoseconds) */
6214
6215 if (dtrace_accum_time >= 0) {
6216 recent_vtime = dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread)); /* up to the moment thread vtime */
6217
6218 recent_vtime = recent_vtime - dtrace_accum_time; /* Time without DTrace contribution */
6219
6220 dtrace_set_thread_vtime(thread, recent_vtime);
6221 }
6222 }
6223
6224 now = dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */
6225#endif /* __APPLE__ */
6226
cf7d32b8 6227 /*
fe8ab488
A
6228 * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of
6229 * dtrace_probe() in some circumstances. See, e.g. fasttrap_isa.c.
6230 * However the provider has no access to ECB context, so passes
6231 * 0 through "arg0" and the probe_id of the overridden probe as arg1.
6232 * Detect that here and cons up a viable state (from the probe_id).
cf7d32b8 6233 */
b0d623f7 6234 if (dtrace_probeid_error == id && 0 == arg0) {
cf7d32b8
A
6235 dtrace_id_t ftp_id = (dtrace_id_t)arg1;
6236 dtrace_probe_t *ftp_probe = dtrace_probes[ftp_id - 1];
6237 dtrace_ecb_t *ftp_ecb = ftp_probe->dtpr_ecb;
6238
6239 if (NULL != ftp_ecb) {
6240 dtrace_state_t *ftp_state = ftp_ecb->dte_state;
6241
6242 arg0 = (uint64_t)(uintptr_t)ftp_state;
6243 arg1 = ftp_ecb->dte_epid;
6244 /*
6245 * args[2-4] established by caller.
6246 */
6247 ftp_state->dts_arg_error_illval = -1; /* arg5 */
6248 }
6249 }
cf7d32b8 6250
b0d623f7 6251 mstate.dtms_difo = NULL;
2d21ac55 6252 mstate.dtms_probe = probe;
fe8ab488 6253 mstate.dtms_strtok = 0;
2d21ac55
A
6254 mstate.dtms_arg[0] = arg0;
6255 mstate.dtms_arg[1] = arg1;
6256 mstate.dtms_arg[2] = arg2;
6257 mstate.dtms_arg[3] = arg3;
6258 mstate.dtms_arg[4] = arg4;
6259
6260 flags = (volatile uint16_t *)&cpu_core[cpuid].cpuc_dtrace_flags;
6261
6262 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
6263 dtrace_predicate_t *pred = ecb->dte_predicate;
6264 dtrace_state_t *state = ecb->dte_state;
6265 dtrace_buffer_t *buf = &state->dts_buffer[cpuid];
6266 dtrace_buffer_t *aggbuf = &state->dts_aggbuffer[cpuid];
6267 dtrace_vstate_t *vstate = &state->dts_vstate;
6268 dtrace_provider_t *prov = probe->dtpr_provider;
fe8ab488 6269 uint64_t tracememsize = 0;
2d21ac55
A
6270 int committed = 0;
6271 caddr_t tomax;
6272
6273 /*
6274 * A little subtlety with the following (seemingly innocuous)
6275 * declaration of the automatic 'val': by looking at the
6276 * code, you might think that it could be declared in the
6277 * action processing loop, below. (That is, it's only used in
6278 * the action processing loop.) However, it must be declared
6279 * out of that scope because in the case of DIF expression
6280 * arguments to aggregating actions, one iteration of the
6281 * action loop will use the last iteration's value.
6282 */
6283#ifdef lint
6284 uint64_t val = 0;
6285#else
c910b4d9 6286 uint64_t val = 0;
2d21ac55
A
6287#endif
6288
6289 mstate.dtms_present = DTRACE_MSTATE_ARGS | DTRACE_MSTATE_PROBE;
6290 *flags &= ~CPU_DTRACE_ERROR;
6291
6292 if (prov == dtrace_provider) {
6293 /*
6294 * If dtrace itself is the provider of this probe,
6295 * we're only going to continue processing the ECB if
6296 * arg0 (the dtrace_state_t) is equal to the ECB's
6297 * creating state. (This prevents disjoint consumers
6298 * from seeing one another's metaprobes.)
6299 */
6300 if (arg0 != (uint64_t)(uintptr_t)state)
6301 continue;
6302 }
6303
6304 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE) {
6305 /*
6306 * We're not currently active. If our provider isn't
6307 * the dtrace pseudo provider, we're not interested.
6308 */
6309 if (prov != dtrace_provider)
6310 continue;
6311
6312 /*
6313 * Now we must further check if we are in the BEGIN
6314 * probe. If we are, we will only continue processing
6315 * if we're still in WARMUP -- if one BEGIN enabling
6316 * has invoked the exit() action, we don't want to
6317 * evaluate subsequent BEGIN enablings.
6318 */
6319 if (probe->dtpr_id == dtrace_probeid_begin &&
6320 state->dts_activity != DTRACE_ACTIVITY_WARMUP) {
6321 ASSERT(state->dts_activity ==
6322 DTRACE_ACTIVITY_DRAINING);
6323 continue;
6324 }
6325 }
6326
2d21ac55
A
6327 if (ecb->dte_cond) {
6328 /*
6329 * If the dte_cond bits indicate that this
6330 * consumer is only allowed to see user-mode firings
6331 * of this probe, call the provider's dtps_usermode()
6332 * entry point to check that the probe was fired
6333 * while in a user context. Skip this ECB if that's
6334 * not the case.
6335 */
6336 if ((ecb->dte_cond & DTRACE_COND_USERMODE) &&
39037602 6337 prov->dtpv_pops.dtps_usermode &&
2d21ac55
A
6338 prov->dtpv_pops.dtps_usermode(prov->dtpv_arg,
6339 probe->dtpr_id, probe->dtpr_arg) == 0)
6340 continue;
6341
6342 /*
6343 * This is more subtle than it looks. We have to be
6344 * absolutely certain that CRED() isn't going to
6345 * change out from under us so it's only legit to
6346 * examine that structure if we're in constrained
6347 * situations. Currently, the only times we'll this
6348 * check is if a non-super-user has enabled the
6349 * profile or syscall providers -- providers that
6350 * allow visibility of all processes. For the
6351 * profile case, the check above will ensure that
6352 * we're examining a user context.
6353 */
6354 if (ecb->dte_cond & DTRACE_COND_OWNER) {
6355 cred_t *cr;
6356 cred_t *s_cr =
6357 ecb->dte_state->dts_cred.dcr_cred;
6358 proc_t *proc;
b0d623f7 6359#pragma unused(proc) /* __APPLE__ */
2d21ac55
A
6360
6361 ASSERT(s_cr != NULL);
6362
6d2010ae
A
6363 /*
6364 * XXX this is hackish, but so is setting a variable
6365 * XXX in a McCarthy OR...
6366 */
2d21ac55 6367 if ((cr = dtrace_CRED()) == NULL ||
6d2010ae
A
6368 posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_uid ||
6369 posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_ruid ||
6370 posix_cred_get(s_cr)->cr_uid != posix_cred_get(cr)->cr_suid ||
6371 posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_gid ||
6372 posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_rgid ||
6373 posix_cred_get(s_cr)->cr_gid != posix_cred_get(cr)->cr_sgid ||
2d21ac55
A
6374#if !defined(__APPLE__)
6375 (proc = ttoproc(curthread)) == NULL ||
6376 (proc->p_flag & SNOCD))
6377#else
fe8ab488 6378 1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */
2d21ac55
A
6379#endif /* __APPLE__ */
6380 continue;
6381 }
6382
6383 if (ecb->dte_cond & DTRACE_COND_ZONEOWNER) {
6384 cred_t *cr;
6385 cred_t *s_cr =
6386 ecb->dte_state->dts_cred.dcr_cred;
b0d623f7 6387#pragma unused(cr, s_cr) /* __APPLE__ */
2d21ac55
A
6388
6389 ASSERT(s_cr != NULL);
6390
b0d623f7 6391#if !defined(__APPLE__)
2d21ac55
A
6392 if ((cr = CRED()) == NULL ||
6393 s_cr->cr_zone->zone_id !=
6394 cr->cr_zone->zone_id)
6395 continue;
b0d623f7 6396#else
fe8ab488 6397 /* APPLE NOTE: Darwin doesn't do zones. */
2d21ac55
A
6398#endif /* __APPLE__ */
6399 }
6400 }
6401
6402 if (now - state->dts_alive > dtrace_deadman_timeout) {
6403 /*
6404 * We seem to be dead. Unless we (a) have kernel
6405 * destructive permissions (b) have expicitly enabled
6406 * destructive actions and (c) destructive actions have
6407 * not been disabled, we're going to transition into
6408 * the KILLED state, from which no further processing
6409 * on this state will be performed.
6410 */
6411 if (!dtrace_priv_kernel_destructive(state) ||
6412 !state->dts_cred.dcr_destructive ||
6413 dtrace_destructive_disallow) {
6414 void *activity = &state->dts_activity;
6415 dtrace_activity_t current;
6416
6417 do {
6418 current = state->dts_activity;
6419 } while (dtrace_cas32(activity, current,
6420 DTRACE_ACTIVITY_KILLED) != current);
6421
6422 continue;
6423 }
6424 }
6425
6426 if ((offs = dtrace_buffer_reserve(buf, ecb->dte_needed,
6427 ecb->dte_alignment, state, &mstate)) < 0)
6428 continue;
6429
6430 tomax = buf->dtb_tomax;
6431 ASSERT(tomax != NULL);
6432
04b8595b
A
6433 /*
6434 * Build and store the record header corresponding to the ECB.
6435 */
6436 if (ecb->dte_size != 0) {
6437 dtrace_rechdr_t dtrh;
6438
6439 if (!(mstate.dtms_present & DTRACE_MSTATE_TIMESTAMP)) {
6440 mstate.dtms_timestamp = dtrace_gethrtime();
6441 mstate.dtms_present |= DTRACE_MSTATE_TIMESTAMP;
6442 }
6443
6444 ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t));
6445
6446 dtrh.dtrh_epid = ecb->dte_epid;
6447 DTRACE_RECORD_STORE_TIMESTAMP(&dtrh, mstate.dtms_timestamp);
6448 DTRACE_STORE(dtrace_rechdr_t, tomax, offs, dtrh);
6449 }
2d21ac55
A
6450
6451 mstate.dtms_epid = ecb->dte_epid;
6452 mstate.dtms_present |= DTRACE_MSTATE_EPID;
6453
b0d623f7
A
6454 if (state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL)
6455 mstate.dtms_access = DTRACE_ACCESS_KERNEL;
6456 else
6457 mstate.dtms_access = 0;
6458
2d21ac55
A
6459 if (pred != NULL) {
6460 dtrace_difo_t *dp = pred->dtp_difo;
6461 int rval;
6462
6463 rval = dtrace_dif_emulate(dp, &mstate, vstate, state);
6464
6465 if (!(*flags & CPU_DTRACE_ERROR) && !rval) {
6466 dtrace_cacheid_t cid = probe->dtpr_predcache;
6467
6468 if (cid != DTRACE_CACHEIDNONE && !onintr) {
6469 /*
6470 * Update the predicate cache...
6471 */
6472 ASSERT(cid == pred->dtp_cacheid);
fe8ab488 6473
2d21ac55 6474 dtrace_set_thread_predcache(current_thread(), cid);
2d21ac55
A
6475 }
6476
6477 continue;
6478 }
6479 }
6480
6481 for (act = ecb->dte_action; !(*flags & CPU_DTRACE_ERROR) &&
6482 act != NULL; act = act->dta_next) {
6483 size_t valoffs;
6484 dtrace_difo_t *dp;
6485 dtrace_recdesc_t *rec = &act->dta_rec;
6486
6487 size = rec->dtrd_size;
6488 valoffs = offs + rec->dtrd_offset;
6489
6490 if (DTRACEACT_ISAGG(act->dta_kind)) {
6491 uint64_t v = 0xbad;
6492 dtrace_aggregation_t *agg;
6493
6494 agg = (dtrace_aggregation_t *)act;
6495
6496 if ((dp = act->dta_difo) != NULL)
6497 v = dtrace_dif_emulate(dp,
6498 &mstate, vstate, state);
6499
6500 if (*flags & CPU_DTRACE_ERROR)
6501 continue;
6502
6503 /*
6504 * Note that we always pass the expression
6505 * value from the previous iteration of the
6506 * action loop. This value will only be used
6507 * if there is an expression argument to the
6508 * aggregating action, denoted by the
6509 * dtag_hasarg field.
6510 */
6511 dtrace_aggregate(agg, buf,
6512 offs, aggbuf, v, val);
6513 continue;
6514 }
6515
6516 switch (act->dta_kind) {
6517 case DTRACEACT_STOP:
6518 if (dtrace_priv_proc_destructive(state))
6519 dtrace_action_stop();
6520 continue;
6521
6522 case DTRACEACT_BREAKPOINT:
6523 if (dtrace_priv_kernel_destructive(state))
6524 dtrace_action_breakpoint(ecb);
6525 continue;
6526
6527 case DTRACEACT_PANIC:
6528 if (dtrace_priv_kernel_destructive(state))
6529 dtrace_action_panic(ecb);
6530 continue;
6531
6532 case DTRACEACT_STACK:
6533 if (!dtrace_priv_kernel(state))
6534 continue;
6535
b0d623f7
A
6536 dtrace_getpcstack((pc_t *)(tomax + valoffs),
6537 size / sizeof (pc_t), probe->dtpr_aframes,
6538 DTRACE_ANCHORED(probe) ? NULL :
6539 (uint32_t *)(uintptr_t)arg0);
2d21ac55
A
6540 continue;
6541
6542 case DTRACEACT_JSTACK:
6543 case DTRACEACT_USTACK:
6544 if (!dtrace_priv_proc(state))
6545 continue;
6546
6547 /*
6548 * See comment in DIF_VAR_PID.
6549 */
6550 if (DTRACE_ANCHORED(mstate.dtms_probe) &&
6551 CPU_ON_INTR(CPU)) {
6552 int depth = DTRACE_USTACK_NFRAMES(
6553 rec->dtrd_arg) + 1;
6554
6555 dtrace_bzero((void *)(tomax + valoffs),
6556 DTRACE_USTACK_STRSIZE(rec->dtrd_arg)
6557 + depth * sizeof (uint64_t));
6558
6559 continue;
6560 }
6561
6562 if (DTRACE_USTACK_STRSIZE(rec->dtrd_arg) != 0 &&
6563 curproc->p_dtrace_helpers != NULL) {
6564 /*
6565 * This is the slow path -- we have
6566 * allocated string space, and we're
6567 * getting the stack of a process that
6568 * has helpers. Call into a separate
6569 * routine to perform this processing.
6570 */
6571 dtrace_action_ustack(&mstate, state,
6572 (uint64_t *)(tomax + valoffs),
6573 rec->dtrd_arg);
6574 continue;
6575 }
6576
6577 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT);
6578 dtrace_getupcstack((uint64_t *)
6579 (tomax + valoffs),
6580 DTRACE_USTACK_NFRAMES(rec->dtrd_arg) + 1);
6581 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT);
6582 continue;
6583
6584 default:
6585 break;
6586 }
6587
6588 dp = act->dta_difo;
6589 ASSERT(dp != NULL);
6590
6591 val = dtrace_dif_emulate(dp, &mstate, vstate, state);
6592
6593 if (*flags & CPU_DTRACE_ERROR)
6594 continue;
6595
6596 switch (act->dta_kind) {
04b8595b
A
6597 case DTRACEACT_SPECULATE: {
6598 dtrace_rechdr_t *dtrh = NULL;
6599
2d21ac55
A
6600 ASSERT(buf == &state->dts_buffer[cpuid]);
6601 buf = dtrace_speculation_buffer(state,
6602 cpuid, val);
6603
6604 if (buf == NULL) {
6605 *flags |= CPU_DTRACE_DROP;
6606 continue;
6607 }
6608
6609 offs = dtrace_buffer_reserve(buf,
6610 ecb->dte_needed, ecb->dte_alignment,
6611 state, NULL);
6612
6613 if (offs < 0) {
6614 *flags |= CPU_DTRACE_DROP;
6615 continue;
6616 }
6617
6618 tomax = buf->dtb_tomax;
6619 ASSERT(tomax != NULL);
6620
39037602 6621 if (ecb->dte_size == 0)
04b8595b
A
6622 continue;
6623
6624 ASSERT(ecb->dte_size >= sizeof(dtrace_rechdr_t));
6625 dtrh = ((void *)(tomax + offs));
6626 dtrh->dtrh_epid = ecb->dte_epid;
6627
6628 /*
6629 * When the speculation is committed, all of
6630 * the records in the speculative buffer will
6631 * have their timestamps set to the commit
6632 * time. Until then, it is set to a sentinel
6633 * value, for debugability.
6634 */
6635 DTRACE_RECORD_STORE_TIMESTAMP(dtrh, UINT64_MAX);
6636
6637 continue;
6638 }
2d21ac55
A
6639
6640 case DTRACEACT_CHILL:
6641 if (dtrace_priv_kernel_destructive(state))
6642 dtrace_action_chill(&mstate, val);
6643 continue;
6644
6645 case DTRACEACT_RAISE:
6646 if (dtrace_priv_proc_destructive(state))
6647 dtrace_action_raise(val);
6648 continue;
6649
fe8ab488 6650 case DTRACEACT_PIDRESUME: /* __APPLE__ */
6d2010ae
A
6651 if (dtrace_priv_proc_destructive(state))
6652 dtrace_action_pidresume(val);
6653 continue;
6d2010ae 6654
2d21ac55
A
6655 case DTRACEACT_COMMIT:
6656 ASSERT(!committed);
6657
6658 /*
6659 * We need to commit our buffer state.
6660 */
6661 if (ecb->dte_size)
6662 buf->dtb_offset = offs + ecb->dte_size;
6663 buf = &state->dts_buffer[cpuid];
6664 dtrace_speculation_commit(state, cpuid, val);
6665 committed = 1;
6666 continue;
6667
6668 case DTRACEACT_DISCARD:
6669 dtrace_speculation_discard(state, cpuid, val);
6670 continue;
6671
6672 case DTRACEACT_DIFEXPR:
6673 case DTRACEACT_LIBACT:
6674 case DTRACEACT_PRINTF:
6675 case DTRACEACT_PRINTA:
6676 case DTRACEACT_SYSTEM:
6677 case DTRACEACT_FREOPEN:
fe8ab488
A
6678 case DTRACEACT_APPLEBINARY: /* __APPLE__ */
6679 case DTRACEACT_TRACEMEM:
6680 break;
6681
6682 case DTRACEACT_TRACEMEM_DYNSIZE:
6683 tracememsize = val;
2d21ac55
A
6684 break;
6685
6686 case DTRACEACT_SYM:
6687 case DTRACEACT_MOD:
6688 if (!dtrace_priv_kernel(state))
6689 continue;
6690 break;
6691
2d21ac55
A
6692 case DTRACEACT_USYM:
6693 case DTRACEACT_UMOD:
6694 case DTRACEACT_UADDR: {
6695 if (!dtrace_priv_proc(state))
6696 continue;
6697
6698 DTRACE_STORE(uint64_t, tomax,
39236c6e 6699 valoffs, (uint64_t)dtrace_proc_selfpid());
2d21ac55
A
6700 DTRACE_STORE(uint64_t, tomax,
6701 valoffs + sizeof (uint64_t), val);
6702
6703 continue;
6704 }
2d21ac55
A
6705
6706 case DTRACEACT_EXIT: {
6707 /*
6708 * For the exit action, we are going to attempt
6709 * to atomically set our activity to be
6710 * draining. If this fails (either because
6711 * another CPU has beat us to the exit action,
6712 * or because our current activity is something
6713 * other than ACTIVE or WARMUP), we will
6714 * continue. This assures that the exit action
6715 * can be successfully recorded at most once
6716 * when we're in the ACTIVE state. If we're
6717 * encountering the exit() action while in
6718 * COOLDOWN, however, we want to honor the new
6719 * status code. (We know that we're the only
6720 * thread in COOLDOWN, so there is no race.)
6721 */
6722 void *activity = &state->dts_activity;
6723 dtrace_activity_t current = state->dts_activity;
6724
6725 if (current == DTRACE_ACTIVITY_COOLDOWN)
6726 break;
6727
6728 if (current != DTRACE_ACTIVITY_WARMUP)
6729 current = DTRACE_ACTIVITY_ACTIVE;
6730
6731 if (dtrace_cas32(activity, current,
6732 DTRACE_ACTIVITY_DRAINING) != current) {
6733 *flags |= CPU_DTRACE_DROP;
6734 continue;
6735 }
6736
6737 break;
6738 }
6739
6740 default:
6741 ASSERT(0);
6742 }
6743
3e170ce0 6744 if (dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF)) {
2d21ac55
A
6745 uintptr_t end = valoffs + size;
6746
fe8ab488
A
6747 if (tracememsize != 0 &&
6748 valoffs + tracememsize < end)
6749 {
6750 end = valoffs + tracememsize;
6751 tracememsize = 0;
6752 }
6753
3e170ce0
A
6754 if (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF &&
6755 !dtrace_vcanload((void *)(uintptr_t)val,
39037602 6756 &dp->dtdo_rtype, NULL, &mstate, vstate))
3e170ce0 6757 {
2d21ac55
A
6758 continue;
6759 }
6760
3e170ce0
A
6761 dtrace_store_by_ref(dp, tomax, size, &valoffs,
6762 &val, end, act->dta_intuple,
6763 dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF ?
6764 DIF_TF_BYREF: DIF_TF_BYUREF);
2d21ac55
A
6765
6766 continue;
6767 }
6768
6769 switch (size) {
6770 case 0:
6771 break;
6772
6773 case sizeof (uint8_t):
6774 DTRACE_STORE(uint8_t, tomax, valoffs, val);
6775 break;
6776 case sizeof (uint16_t):
6777 DTRACE_STORE(uint16_t, tomax, valoffs, val);
6778 break;
6779 case sizeof (uint32_t):
6780 DTRACE_STORE(uint32_t, tomax, valoffs, val);
6781 break;
6782 case sizeof (uint64_t):
6783 DTRACE_STORE(uint64_t, tomax, valoffs, val);
6784 break;
6785 default:
6786 /*
6787 * Any other size should have been returned by
6788 * reference, not by value.
6789 */
6790 ASSERT(0);
6791 break;
6792 }
6793 }
6794
6795 if (*flags & CPU_DTRACE_DROP)
6796 continue;
6797
6798 if (*flags & CPU_DTRACE_FAULT) {
6799 int ndx;
6800 dtrace_action_t *err;
6801
6802 buf->dtb_errors++;
6803
6804 if (probe->dtpr_id == dtrace_probeid_error) {
6805 /*
6806 * There's nothing we can do -- we had an
6807 * error on the error probe. We bump an
6808 * error counter to at least indicate that
6809 * this condition happened.
6810 */
6811 dtrace_error(&state->dts_dblerrors);
6812 continue;
6813 }
6814
6815 if (vtime) {
6816 /*
6817 * Before recursing on dtrace_probe(), we
6818 * need to explicitly clear out our start
6819 * time to prevent it from being accumulated
6820 * into t_dtrace_vtime.
6821 */
fe8ab488
A
6822
6823 /*
6824 * Darwin sets the sign bit on t_dtrace_tracing
6825 * to suspend accumulation to it.
6826 */
2d21ac55 6827 dtrace_set_thread_tracing(current_thread(),
fe8ab488
A
6828 (1ULL<<63) | dtrace_get_thread_tracing(current_thread()));
6829
2d21ac55
A
6830 }
6831
6832 /*
6833 * Iterate over the actions to figure out which action
6834 * we were processing when we experienced the error.
6835 * Note that act points _past_ the faulting action; if
6836 * act is ecb->dte_action, the fault was in the
6837 * predicate, if it's ecb->dte_action->dta_next it's
6838 * in action #1, and so on.
6839 */
6840 for (err = ecb->dte_action, ndx = 0;
6841 err != act; err = err->dta_next, ndx++)
6842 continue;
6843
6844 dtrace_probe_error(state, ecb->dte_epid, ndx,
6845 (mstate.dtms_present & DTRACE_MSTATE_FLTOFFS) ?
6846 mstate.dtms_fltoffs : -1, DTRACE_FLAGS2FLT(*flags),
6847 cpu_core[cpuid].cpuc_dtrace_illval);
6848
6849 continue;
6850 }
6851
6852 if (!committed)
6853 buf->dtb_offset = offs + ecb->dte_size;
6854 }
6855
fe8ab488 6856 /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed
b0d623f7 6857 to the current thread. Instead it should accrue to DTrace. */
2d21ac55
A
6858 if (vtime) {
6859 thread_t thread = current_thread();
6860 int64_t t = dtrace_get_thread_tracing(thread);
6861
6862 if (t >= 0) {
6863 /* Usual case, accumulate time spent here into t_dtrace_tracing */
6864 dtrace_set_thread_tracing(thread, t + (dtrace_gethrtime() - now));
6865 } else {
6866 /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */
6867 dtrace_set_thread_tracing(thread, (~(1ULL<<63)) & t);
6868 }
6869 }
2d21ac55
A
6870
6871 dtrace_interrupt_enable(cookie);
6872}
6873
fe8ab488
A
6874/*
6875 * APPLE NOTE: Don't allow a thread to re-enter dtrace_probe().
6876 * This could occur if a probe is encountered on some function in the
6877 * transitive closure of the call to dtrace_probe().
6878 * Solaris has some strong guarantees that this won't happen.
6879 * The Darwin implementation is not so mature as to make those guarantees.
6880 * Hence, the introduction of __dtrace_probe() on xnu.
6881 */
6d2010ae 6882
2d21ac55
A
6883void
6884dtrace_probe(dtrace_id_t id, uint64_t arg0, uint64_t arg1,
6885 uint64_t arg2, uint64_t arg3, uint64_t arg4)
6886{
6887 thread_t thread = current_thread();
6d2010ae 6888 disable_preemption();
2d21ac55
A
6889 if (id == dtrace_probeid_error) {
6890 __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
b0d623f7 6891 dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */
2d21ac55
A
6892 } else if (!dtrace_get_thread_reentering(thread)) {
6893 dtrace_set_thread_reentering(thread, TRUE);
6894 __dtrace_probe(id, arg0, arg1, arg2, arg3, arg4);
6895 dtrace_set_thread_reentering(thread, FALSE);
6896 }
b0d623f7
A
6897#if DEBUG
6898 else __dtrace_probe(dtrace_probeid_error, 0, id, 1, -1, DTRACEFLT_UNKNOWN);
6899#endif
6d2010ae 6900 enable_preemption();
2d21ac55 6901}
2d21ac55
A
6902
6903/*
6904 * DTrace Probe Hashing Functions
6905 *
6906 * The functions in this section (and indeed, the functions in remaining
6907 * sections) are not _called_ from probe context. (Any exceptions to this are
6908 * marked with a "Note:".) Rather, they are called from elsewhere in the
6909 * DTrace framework to look-up probes in, add probes to and remove probes from
6910 * the DTrace probe hashes. (Each probe is hashed by each element of the
6911 * probe tuple -- allowing for fast lookups, regardless of what was
6912 * specified.)
6913 */
6914static uint_t
b0d623f7 6915dtrace_hash_str(const char *p)
2d21ac55
A
6916{
6917 unsigned int g;
6918 uint_t hval = 0;
6919
6920 while (*p) {
6921 hval = (hval << 4) + *p++;
6922 if ((g = (hval & 0xf0000000)) != 0)
6923 hval ^= g >> 24;
6924 hval &= ~g;
6925 }
6926 return (hval);
6927}
6928
6929static dtrace_hash_t *
6930dtrace_hash_create(uintptr_t stroffs, uintptr_t nextoffs, uintptr_t prevoffs)
6931{
6932 dtrace_hash_t *hash = kmem_zalloc(sizeof (dtrace_hash_t), KM_SLEEP);
6933
6934 hash->dth_stroffs = stroffs;
6935 hash->dth_nextoffs = nextoffs;
6936 hash->dth_prevoffs = prevoffs;
6937
6938 hash->dth_size = 1;
6939 hash->dth_mask = hash->dth_size - 1;
6940
6941 hash->dth_tab = kmem_zalloc(hash->dth_size *
6942 sizeof (dtrace_hashbucket_t *), KM_SLEEP);
6943
6944 return (hash);
6945}
6946
fe8ab488
A
6947/*
6948 * APPLE NOTE: dtrace_hash_destroy is not used.
6949 * It is called by dtrace_detach which is not
6950 * currently implemented. Revisit someday.
6951 */
6952#if !defined(__APPLE__)
2d21ac55
A
6953static void
6954dtrace_hash_destroy(dtrace_hash_t *hash)
6955{
b0d623f7 6956#if DEBUG
2d21ac55
A
6957 int i;
6958
6959 for (i = 0; i < hash->dth_size; i++)
6960 ASSERT(hash->dth_tab[i] == NULL);
6961#endif
6962
6963 kmem_free(hash->dth_tab,
6964 hash->dth_size * sizeof (dtrace_hashbucket_t *));
6965 kmem_free(hash, sizeof (dtrace_hash_t));
6966}
6967#endif /* __APPLE__ */
6968
6969static void
6970dtrace_hash_resize(dtrace_hash_t *hash)
6971{
6972 int size = hash->dth_size, i, ndx;
6973 int new_size = hash->dth_size << 1;
6974 int new_mask = new_size - 1;
6975 dtrace_hashbucket_t **new_tab, *bucket, *next;
6976
6977 ASSERT((new_size & new_mask) == 0);
6978
6979 new_tab = kmem_zalloc(new_size * sizeof (void *), KM_SLEEP);
6980
6981 for (i = 0; i < size; i++) {
6982 for (bucket = hash->dth_tab[i]; bucket != NULL; bucket = next) {
6983 dtrace_probe_t *probe = bucket->dthb_chain;
6984
6985 ASSERT(probe != NULL);
6986 ndx = DTRACE_HASHSTR(hash, probe) & new_mask;
6987
6988 next = bucket->dthb_next;
6989 bucket->dthb_next = new_tab[ndx];
6990 new_tab[ndx] = bucket;
6991 }
6992 }
6993
6994 kmem_free(hash->dth_tab, hash->dth_size * sizeof (void *));
6995 hash->dth_tab = new_tab;
6996 hash->dth_size = new_size;
6997 hash->dth_mask = new_mask;
6998}
6999
7000static void
7001dtrace_hash_add(dtrace_hash_t *hash, dtrace_probe_t *new)
7002{
7003 int hashval = DTRACE_HASHSTR(hash, new);
7004 int ndx = hashval & hash->dth_mask;
7005 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7006 dtrace_probe_t **nextp, **prevp;
7007
7008 for (; bucket != NULL; bucket = bucket->dthb_next) {
7009 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, new))
7010 goto add;
7011 }
7012
7013 if ((hash->dth_nbuckets >> 1) > hash->dth_size) {
7014 dtrace_hash_resize(hash);
7015 dtrace_hash_add(hash, new);
7016 return;
7017 }
7018
7019 bucket = kmem_zalloc(sizeof (dtrace_hashbucket_t), KM_SLEEP);
7020 bucket->dthb_next = hash->dth_tab[ndx];
7021 hash->dth_tab[ndx] = bucket;
7022 hash->dth_nbuckets++;
7023
7024add:
7025 nextp = DTRACE_HASHNEXT(hash, new);
7026 ASSERT(*nextp == NULL && *(DTRACE_HASHPREV(hash, new)) == NULL);
7027 *nextp = bucket->dthb_chain;
7028
7029 if (bucket->dthb_chain != NULL) {
7030 prevp = DTRACE_HASHPREV(hash, bucket->dthb_chain);
7031 ASSERT(*prevp == NULL);
7032 *prevp = new;
7033 }
7034
7035 bucket->dthb_chain = new;
7036 bucket->dthb_len++;
7037}
7038
7039static dtrace_probe_t *
7040dtrace_hash_lookup(dtrace_hash_t *hash, dtrace_probe_t *template)
7041{
7042 int hashval = DTRACE_HASHSTR(hash, template);
7043 int ndx = hashval & hash->dth_mask;
7044 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7045
7046 for (; bucket != NULL; bucket = bucket->dthb_next) {
7047 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
7048 return (bucket->dthb_chain);
7049 }
7050
7051 return (NULL);
7052}
7053
7054static int
7055dtrace_hash_collisions(dtrace_hash_t *hash, dtrace_probe_t *template)
7056{
7057 int hashval = DTRACE_HASHSTR(hash, template);
7058 int ndx = hashval & hash->dth_mask;
7059 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7060
7061 for (; bucket != NULL; bucket = bucket->dthb_next) {
7062 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, template))
7063 return (bucket->dthb_len);
7064 }
7065
fe8ab488 7066 return (0);
2d21ac55
A
7067}
7068
7069static void
7070dtrace_hash_remove(dtrace_hash_t *hash, dtrace_probe_t *probe)
7071{
7072 int ndx = DTRACE_HASHSTR(hash, probe) & hash->dth_mask;
7073 dtrace_hashbucket_t *bucket = hash->dth_tab[ndx];
7074
7075 dtrace_probe_t **prevp = DTRACE_HASHPREV(hash, probe);
7076 dtrace_probe_t **nextp = DTRACE_HASHNEXT(hash, probe);
7077
7078 /*
7079 * Find the bucket that we're removing this probe from.
7080 */
7081 for (; bucket != NULL; bucket = bucket->dthb_next) {
7082 if (DTRACE_HASHEQ(hash, bucket->dthb_chain, probe))
7083 break;
7084 }
7085
7086 ASSERT(bucket != NULL);
7087
7088 if (*prevp == NULL) {
7089 if (*nextp == NULL) {
7090 /*
7091 * The removed probe was the only probe on this
7092 * bucket; we need to remove the bucket.
7093 */
7094 dtrace_hashbucket_t *b = hash->dth_tab[ndx];
7095
7096 ASSERT(bucket->dthb_chain == probe);
7097 ASSERT(b != NULL);
7098
7099 if (b == bucket) {
7100 hash->dth_tab[ndx] = bucket->dthb_next;
7101 } else {
7102 while (b->dthb_next != bucket)
7103 b = b->dthb_next;
7104 b->dthb_next = bucket->dthb_next;
7105 }
7106
7107 ASSERT(hash->dth_nbuckets > 0);
7108 hash->dth_nbuckets--;
7109 kmem_free(bucket, sizeof (dtrace_hashbucket_t));
7110 return;
7111 }
7112
7113 bucket->dthb_chain = *nextp;
7114 } else {
7115 *(DTRACE_HASHNEXT(hash, *prevp)) = *nextp;
7116 }
7117
7118 if (*nextp != NULL)
7119 *(DTRACE_HASHPREV(hash, *nextp)) = *prevp;
7120}
7121
7122/*
7123 * DTrace Utility Functions
7124 *
7125 * These are random utility functions that are _not_ called from probe context.
7126 */
7127static int
7128dtrace_badattr(const dtrace_attribute_t *a)
7129{
7130 return (a->dtat_name > DTRACE_STABILITY_MAX ||
7131 a->dtat_data > DTRACE_STABILITY_MAX ||
7132 a->dtat_class > DTRACE_CLASS_MAX);
7133}
7134
7135/*
7136 * Return a duplicate copy of a string. If the specified string is NULL,
7137 * this function returns a zero-length string.
fe8ab488 7138 * APPLE NOTE: Darwin employs size bounded string operation.
2d21ac55 7139 */
b0d623f7
A
7140static char *
7141dtrace_strdup(const char *str)
7142{
7143 size_t bufsize = (str != NULL ? strlen(str) : 0) + 1;
7144 char *new = kmem_zalloc(bufsize, KM_SLEEP);
7145
7146 if (str != NULL)
7147 (void) strlcpy(new, str, bufsize);
7148
7149 return (new);
7150}
2d21ac55
A
7151
7152#define DTRACE_ISALPHA(c) \
7153 (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z'))
7154
7155static int
7156dtrace_badname(const char *s)
7157{
7158 char c;
7159
7160 if (s == NULL || (c = *s++) == '\0')
7161 return (0);
7162
7163 if (!DTRACE_ISALPHA(c) && c != '-' && c != '_' && c != '.')
7164 return (1);
7165
7166 while ((c = *s++) != '\0') {
7167 if (!DTRACE_ISALPHA(c) && (c < '0' || c > '9') &&
7168 c != '-' && c != '_' && c != '.' && c != '`')
7169 return (1);
7170 }
7171
7172 return (0);
7173}
7174
7175static void
7176dtrace_cred2priv(cred_t *cr, uint32_t *privp, uid_t *uidp, zoneid_t *zoneidp)
7177{
7178 uint32_t priv;
7179
7180 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
39037602
A
7181 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) {
7182 priv = DTRACE_PRIV_USER | DTRACE_PRIV_PROC;
7183 }
7184 else {
7185 priv = DTRACE_PRIV_ALL;
7186 }
2d21ac55
A
7187 } else {
7188 *uidp = crgetuid(cr);
7189 *zoneidp = crgetzoneid(cr);
7190
7191 priv = 0;
7192 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE))
7193 priv |= DTRACE_PRIV_KERNEL | DTRACE_PRIV_USER;
7194 else if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE))
7195 priv |= DTRACE_PRIV_USER;
7196 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE))
7197 priv |= DTRACE_PRIV_PROC;
7198 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
7199 priv |= DTRACE_PRIV_OWNER;
7200 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
7201 priv |= DTRACE_PRIV_ZONEOWNER;
7202 }
7203
7204 *privp = priv;
7205}
7206
7207#ifdef DTRACE_ERRDEBUG
7208static void
7209dtrace_errdebug(const char *str)
7210{
b0d623f7 7211 int hval = dtrace_hash_str(str) % DTRACE_ERRHASHSZ;
2d21ac55
A
7212 int occupied = 0;
7213
7214 lck_mtx_lock(&dtrace_errlock);
7215 dtrace_errlast = str;
b0d623f7 7216 dtrace_errthread = (kthread_t *)current_thread();
2d21ac55
A
7217
7218 while (occupied++ < DTRACE_ERRHASHSZ) {
7219 if (dtrace_errhash[hval].dter_msg == str) {
7220 dtrace_errhash[hval].dter_count++;
7221 goto out;
7222 }
7223
7224 if (dtrace_errhash[hval].dter_msg != NULL) {
7225 hval = (hval + 1) % DTRACE_ERRHASHSZ;
7226 continue;
7227 }
7228
7229 dtrace_errhash[hval].dter_msg = str;
7230 dtrace_errhash[hval].dter_count = 1;
7231 goto out;
7232 }
7233
7234 panic("dtrace: undersized error hash");
7235out:
7236 lck_mtx_unlock(&dtrace_errlock);
7237}
7238#endif
7239
7240/*
7241 * DTrace Matching Functions
7242 *
7243 * These functions are used to match groups of probes, given some elements of
7244 * a probe tuple, or some globbed expressions for elements of a probe tuple.
7245 */
7246static int
7247dtrace_match_priv(const dtrace_probe_t *prp, uint32_t priv, uid_t uid,
7248 zoneid_t zoneid)
7249{
7250 if (priv != DTRACE_PRIV_ALL) {
7251 uint32_t ppriv = prp->dtpr_provider->dtpv_priv.dtpp_flags;
7252 uint32_t match = priv & ppriv;
7253
7254 /*
7255 * No PRIV_DTRACE_* privileges...
7256 */
7257 if ((priv & (DTRACE_PRIV_PROC | DTRACE_PRIV_USER |
7258 DTRACE_PRIV_KERNEL)) == 0)
7259 return (0);
7260
7261 /*
7262 * No matching bits, but there were bits to match...
7263 */
7264 if (match == 0 && ppriv != 0)
7265 return (0);
7266
7267 /*
7268 * Need to have permissions to the process, but don't...
7269 */
7270 if (((ppriv & ~match) & DTRACE_PRIV_OWNER) != 0 &&
7271 uid != prp->dtpr_provider->dtpv_priv.dtpp_uid) {
7272 return (0);
7273 }
7274
7275 /*
7276 * Need to be in the same zone unless we possess the
7277 * privilege to examine all zones.
7278 */
7279 if (((ppriv & ~match) & DTRACE_PRIV_ZONEOWNER) != 0 &&
7280 zoneid != prp->dtpr_provider->dtpv_priv.dtpp_zoneid) {
7281 return (0);
7282 }
7283 }
7284
7285 return (1);
7286}
7287
7288/*
7289 * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which
7290 * consists of input pattern strings and an ops-vector to evaluate them.
7291 * This function returns >0 for match, 0 for no match, and <0 for error.
7292 */
7293static int
7294dtrace_match_probe(const dtrace_probe_t *prp, const dtrace_probekey_t *pkp,
7295 uint32_t priv, uid_t uid, zoneid_t zoneid)
7296{
7297 dtrace_provider_t *pvp = prp->dtpr_provider;
7298 int rv;
7299
7300 if (pvp->dtpv_defunct)
7301 return (0);
7302
7303 if ((rv = pkp->dtpk_pmatch(pvp->dtpv_name, pkp->dtpk_prov, 0)) <= 0)
7304 return (rv);
7305
7306 if ((rv = pkp->dtpk_mmatch(prp->dtpr_mod, pkp->dtpk_mod, 0)) <= 0)
7307 return (rv);
7308
7309 if ((rv = pkp->dtpk_fmatch(prp->dtpr_func, pkp->dtpk_func, 0)) <= 0)
7310 return (rv);
7311
7312 if ((rv = pkp->dtpk_nmatch(prp->dtpr_name, pkp->dtpk_name, 0)) <= 0)
7313 return (rv);
7314
7315 if (dtrace_match_priv(prp, priv, uid, zoneid) == 0)
7316 return (0);
7317
7318 return (rv);
7319}
7320
7321/*
7322 * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN)
7323 * interface for matching a glob pattern 'p' to an input string 's'. Unlike
7324 * libc's version, the kernel version only applies to 8-bit ASCII strings.
7325 * In addition, all of the recursion cases except for '*' matching have been
7326 * unwound. For '*', we still implement recursive evaluation, but a depth
7327 * counter is maintained and matching is aborted if we recurse too deep.
7328 * The function returns 0 if no match, >0 if match, and <0 if recursion error.
7329 */
7330static int
7331dtrace_match_glob(const char *s, const char *p, int depth)
7332{
7333 const char *olds;
7334 char s1, c;
7335 int gs;
7336
7337 if (depth > DTRACE_PROBEKEY_MAXDEPTH)
7338 return (-1);
7339
7340 if (s == NULL)
7341 s = ""; /* treat NULL as empty string */
7342
7343top:
7344 olds = s;
7345 s1 = *s++;
7346
7347 if (p == NULL)
7348 return (0);
7349
7350 if ((c = *p++) == '\0')
7351 return (s1 == '\0');
7352
7353 switch (c) {
7354 case '[': {
7355 int ok = 0, notflag = 0;
7356 char lc = '\0';
7357
7358 if (s1 == '\0')
7359 return (0);
7360
7361 if (*p == '!') {
7362 notflag = 1;
7363 p++;
7364 }
7365
7366 if ((c = *p++) == '\0')
7367 return (0);
7368
7369 do {
7370 if (c == '-' && lc != '\0' && *p != ']') {
7371 if ((c = *p++) == '\0')
7372 return (0);
7373 if (c == '\\' && (c = *p++) == '\0')
7374 return (0);
7375
7376 if (notflag) {
7377 if (s1 < lc || s1 > c)
7378 ok++;
7379 else
7380 return (0);
7381 } else if (lc <= s1 && s1 <= c)
7382 ok++;
7383
7384 } else if (c == '\\' && (c = *p++) == '\0')
7385 return (0);
7386
7387 lc = c; /* save left-hand 'c' for next iteration */
7388
7389 if (notflag) {
7390 if (s1 != c)
7391 ok++;
7392 else
7393 return (0);
7394 } else if (s1 == c)
7395 ok++;
7396
7397 if ((c = *p++) == '\0')
7398 return (0);
7399
7400 } while (c != ']');
7401
7402 if (ok)
7403 goto top;
7404
7405 return (0);
7406 }
7407
7408 case '\\':
7409 if ((c = *p++) == '\0')
7410 return (0);
7411 /*FALLTHRU*/
7412
7413 default:
7414 if (c != s1)
7415 return (0);
7416 /*FALLTHRU*/
7417
7418 case '?':
7419 if (s1 != '\0')
7420 goto top;
7421 return (0);
7422
7423 case '*':
7424 while (*p == '*')
7425 p++; /* consecutive *'s are identical to a single one */
7426
7427 if (*p == '\0')
7428 return (1);
7429
7430 for (s = olds; *s != '\0'; s++) {
7431 if ((gs = dtrace_match_glob(s, p, depth + 1)) != 0)
7432 return (gs);
7433 }
7434
7435 return (0);
7436 }
7437}
7438
7439/*ARGSUSED*/
7440static int
7441dtrace_match_string(const char *s, const char *p, int depth)
7442{
b0d623f7 7443#pragma unused(depth) /* __APPLE__ */
fe8ab488
A
7444
7445 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 7446 return (s != NULL && strncmp(s, p, strlen(s) + 1) == 0);
2d21ac55
A
7447}
7448
7449/*ARGSUSED*/
7450static int
7451dtrace_match_nul(const char *s, const char *p, int depth)
7452{
b0d623f7 7453#pragma unused(s, p, depth) /* __APPLE__ */
2d21ac55
A
7454 return (1); /* always match the empty pattern */
7455}
7456
7457/*ARGSUSED*/
7458static int
7459dtrace_match_nonzero(const char *s, const char *p, int depth)
7460{
b0d623f7 7461#pragma unused(p, depth) /* __APPLE__ */
2d21ac55
A
7462 return (s != NULL && s[0] != '\0');
7463}
7464
7465static int
7466dtrace_match(const dtrace_probekey_t *pkp, uint32_t priv, uid_t uid,
d190cdc3 7467 zoneid_t zoneid, int (*matched)(dtrace_probe_t *, void *, void *), void *arg1, void *arg2)
2d21ac55
A
7468{
7469 dtrace_probe_t template, *probe;
7470 dtrace_hash_t *hash = NULL;
6d2010ae 7471 int len, rc, best = INT_MAX, nmatched = 0;
2d21ac55
A
7472 dtrace_id_t i;
7473
7474 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
7475
7476 /*
7477 * If the probe ID is specified in the key, just lookup by ID and
7478 * invoke the match callback once if a matching probe is found.
7479 */
7480 if (pkp->dtpk_id != DTRACE_IDNONE) {
7481 if ((probe = dtrace_probe_lookup_id(pkp->dtpk_id)) != NULL &&
7482 dtrace_match_probe(probe, pkp, priv, uid, zoneid) > 0) {
d190cdc3 7483 if ((*matched)(probe, arg1, arg2) == DTRACE_MATCH_FAIL)
6d2010ae 7484 return (DTRACE_MATCH_FAIL);
2d21ac55
A
7485 nmatched++;
7486 }
7487 return (nmatched);
7488 }
7489
b0d623f7
A
7490 template.dtpr_mod = (char *)(uintptr_t)pkp->dtpk_mod;
7491 template.dtpr_func = (char *)(uintptr_t)pkp->dtpk_func;
7492 template.dtpr_name = (char *)(uintptr_t)pkp->dtpk_name;
2d21ac55
A
7493
7494 /*
7495 * We want to find the most distinct of the module name, function
7496 * name, and name. So for each one that is not a glob pattern or
7497 * empty string, we perform a lookup in the corresponding hash and
7498 * use the hash table with the fewest collisions to do our search.
7499 */
7500 if (pkp->dtpk_mmatch == &dtrace_match_string &&
7501 (len = dtrace_hash_collisions(dtrace_bymod, &template)) < best) {
7502 best = len;
7503 hash = dtrace_bymod;
7504 }
7505
7506 if (pkp->dtpk_fmatch == &dtrace_match_string &&
7507 (len = dtrace_hash_collisions(dtrace_byfunc, &template)) < best) {
7508 best = len;
7509 hash = dtrace_byfunc;
7510 }
7511
7512 if (pkp->dtpk_nmatch == &dtrace_match_string &&
7513 (len = dtrace_hash_collisions(dtrace_byname, &template)) < best) {
7514 best = len;
7515 hash = dtrace_byname;
7516 }
7517
7518 /*
7519 * If we did not select a hash table, iterate over every probe and
7520 * invoke our callback for each one that matches our input probe key.
7521 */
7522 if (hash == NULL) {
b0d623f7 7523 for (i = 0; i < (dtrace_id_t)dtrace_nprobes; i++) {
2d21ac55
A
7524 if ((probe = dtrace_probes[i]) == NULL ||
7525 dtrace_match_probe(probe, pkp, priv, uid,
7526 zoneid) <= 0)
7527 continue;
7528
7529 nmatched++;
7530
d190cdc3 7531 if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) {
6d2010ae
A
7532 if (rc == DTRACE_MATCH_FAIL)
7533 return (DTRACE_MATCH_FAIL);
7534 break;
7535 }
2d21ac55
A
7536 }
7537
7538 return (nmatched);
7539 }
7540
7541 /*
7542 * If we selected a hash table, iterate over each probe of the same key
7543 * name and invoke the callback for every probe that matches the other
7544 * attributes of our input probe key.
7545 */
7546 for (probe = dtrace_hash_lookup(hash, &template); probe != NULL;
7547 probe = *(DTRACE_HASHNEXT(hash, probe))) {
7548
7549 if (dtrace_match_probe(probe, pkp, priv, uid, zoneid) <= 0)
7550 continue;
7551
7552 nmatched++;
7553
d190cdc3 7554 if ((rc = (*matched)(probe, arg1, arg2)) != DTRACE_MATCH_NEXT) {
6d2010ae
A
7555 if (rc == DTRACE_MATCH_FAIL)
7556 return (DTRACE_MATCH_FAIL);
7557 break;
7558 }
2d21ac55
A
7559 }
7560
7561 return (nmatched);
7562}
7563
7564/*
7565 * Return the function pointer dtrace_probecmp() should use to compare the
7566 * specified pattern with a string. For NULL or empty patterns, we select
7567 * dtrace_match_nul(). For glob pattern strings, we use dtrace_match_glob().
7568 * For non-empty non-glob strings, we use dtrace_match_string().
7569 */
7570static dtrace_probekey_f *
7571dtrace_probekey_func(const char *p)
7572{
7573 char c;
7574
7575 if (p == NULL || *p == '\0')
7576 return (&dtrace_match_nul);
7577
7578 while ((c = *p++) != '\0') {
7579 if (c == '[' || c == '?' || c == '*' || c == '\\')
7580 return (&dtrace_match_glob);
7581 }
7582
7583 return (&dtrace_match_string);
7584}
7585
7586/*
7587 * Build a probe comparison key for use with dtrace_match_probe() from the
7588 * given probe description. By convention, a null key only matches anchored
7589 * probes: if each field is the empty string, reset dtpk_fmatch to
7590 * dtrace_match_nonzero().
7591 */
7592static void
7593dtrace_probekey(const dtrace_probedesc_t *pdp, dtrace_probekey_t *pkp)
7594{
7595 pkp->dtpk_prov = pdp->dtpd_provider;
7596 pkp->dtpk_pmatch = dtrace_probekey_func(pdp->dtpd_provider);
7597
7598 pkp->dtpk_mod = pdp->dtpd_mod;
7599 pkp->dtpk_mmatch = dtrace_probekey_func(pdp->dtpd_mod);
7600
7601 pkp->dtpk_func = pdp->dtpd_func;
7602 pkp->dtpk_fmatch = dtrace_probekey_func(pdp->dtpd_func);
7603
7604 pkp->dtpk_name = pdp->dtpd_name;
7605 pkp->dtpk_nmatch = dtrace_probekey_func(pdp->dtpd_name);
7606
7607 pkp->dtpk_id = pdp->dtpd_id;
7608
7609 if (pkp->dtpk_id == DTRACE_IDNONE &&
7610 pkp->dtpk_pmatch == &dtrace_match_nul &&
7611 pkp->dtpk_mmatch == &dtrace_match_nul &&
7612 pkp->dtpk_fmatch == &dtrace_match_nul &&
7613 pkp->dtpk_nmatch == &dtrace_match_nul)
7614 pkp->dtpk_fmatch = &dtrace_match_nonzero;
7615}
7616
39037602
A
7617static int
7618dtrace_cond_provider_match(dtrace_probedesc_t *desc, void *data)
7619{
7620 if (desc == NULL)
7621 return 1;
7622
7623 dtrace_probekey_f *func = dtrace_probekey_func(desc->dtpd_provider);
7624
7625 return func(desc->dtpd_provider, (char*)data, 0);
7626}
7627
2d21ac55
A
7628/*
7629 * DTrace Provider-to-Framework API Functions
7630 *
7631 * These functions implement much of the Provider-to-Framework API, as
7632 * described in <sys/dtrace.h>. The parts of the API not in this section are
7633 * the functions in the API for probe management (found below), and
7634 * dtrace_probe() itself (found above).
7635 */
7636
7637/*
7638 * Register the calling provider with the DTrace framework. This should
7639 * generally be called by DTrace providers in their attach(9E) entry point.
7640 */
7641int
7642dtrace_register(const char *name, const dtrace_pattr_t *pap, uint32_t priv,
7643 cred_t *cr, const dtrace_pops_t *pops, void *arg, dtrace_provider_id_t *idp)
7644{
7645 dtrace_provider_t *provider;
7646
7647 if (name == NULL || pap == NULL || pops == NULL || idp == NULL) {
7648 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7649 "arguments", name ? name : "<NULL>");
7650 return (EINVAL);
7651 }
7652
7653 if (name[0] == '\0' || dtrace_badname(name)) {
7654 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7655 "provider name", name);
7656 return (EINVAL);
7657 }
7658
7659 if ((pops->dtps_provide == NULL && pops->dtps_provide_module == NULL) ||
7660 pops->dtps_enable == NULL || pops->dtps_disable == NULL ||
7661 pops->dtps_destroy == NULL ||
7662 ((pops->dtps_resume == NULL) != (pops->dtps_suspend == NULL))) {
7663 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7664 "provider ops", name);
7665 return (EINVAL);
7666 }
7667
7668 if (dtrace_badattr(&pap->dtpa_provider) ||
7669 dtrace_badattr(&pap->dtpa_mod) ||
7670 dtrace_badattr(&pap->dtpa_func) ||
7671 dtrace_badattr(&pap->dtpa_name) ||
7672 dtrace_badattr(&pap->dtpa_args)) {
7673 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7674 "provider attributes", name);
7675 return (EINVAL);
7676 }
7677
7678 if (priv & ~DTRACE_PRIV_ALL) {
7679 cmn_err(CE_WARN, "failed to register provider '%s': invalid "
7680 "privilege attributes", name);
7681 return (EINVAL);
7682 }
7683
7684 if ((priv & DTRACE_PRIV_KERNEL) &&
7685 (priv & (DTRACE_PRIV_USER | DTRACE_PRIV_OWNER)) &&
7686 pops->dtps_usermode == NULL) {
7687 cmn_err(CE_WARN, "failed to register provider '%s': need "
7688 "dtps_usermode() op for given privilege attributes", name);
7689 return (EINVAL);
7690 }
7691
7692 provider = kmem_zalloc(sizeof (dtrace_provider_t), KM_SLEEP);
fe8ab488
A
7693
7694 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7
A
7695 {
7696 size_t bufsize = strlen(name) + 1;
7697 provider->dtpv_name = kmem_alloc(bufsize, KM_SLEEP);
7698 (void) strlcpy(provider->dtpv_name, name, bufsize);
7699 }
2d21ac55
A
7700
7701 provider->dtpv_attr = *pap;
7702 provider->dtpv_priv.dtpp_flags = priv;
7703 if (cr != NULL) {
7704 provider->dtpv_priv.dtpp_uid = crgetuid(cr);
7705 provider->dtpv_priv.dtpp_zoneid = crgetzoneid(cr);
7706 }
7707 provider->dtpv_pops = *pops;
7708
7709 if (pops->dtps_provide == NULL) {
7710 ASSERT(pops->dtps_provide_module != NULL);
7711 provider->dtpv_pops.dtps_provide =
7712 (void (*)(void *, const dtrace_probedesc_t *))dtrace_nullop;
7713 }
7714
7715 if (pops->dtps_provide_module == NULL) {
7716 ASSERT(pops->dtps_provide != NULL);
7717 provider->dtpv_pops.dtps_provide_module =
7718 (void (*)(void *, struct modctl *))dtrace_nullop;
7719 }
7720
7721 if (pops->dtps_suspend == NULL) {
7722 ASSERT(pops->dtps_resume == NULL);
7723 provider->dtpv_pops.dtps_suspend =
7724 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7725 provider->dtpv_pops.dtps_resume =
7726 (void (*)(void *, dtrace_id_t, void *))dtrace_nullop;
7727 }
7728
7729 provider->dtpv_arg = arg;
7730 *idp = (dtrace_provider_id_t)provider;
7731
7732 if (pops == &dtrace_provider_ops) {
7733 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
7734 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
7735 ASSERT(dtrace_anon.dta_enabling == NULL);
7736
7737 /*
7738 * We make sure that the DTrace provider is at the head of
7739 * the provider chain.
7740 */
7741 provider->dtpv_next = dtrace_provider;
7742 dtrace_provider = provider;
7743 return (0);
7744 }
7745
7746 lck_mtx_lock(&dtrace_provider_lock);
7747 lck_mtx_lock(&dtrace_lock);
7748
7749 /*
7750 * If there is at least one provider registered, we'll add this
7751 * provider after the first provider.
7752 */
7753 if (dtrace_provider != NULL) {
7754 provider->dtpv_next = dtrace_provider->dtpv_next;
7755 dtrace_provider->dtpv_next = provider;
7756 } else {
7757 dtrace_provider = provider;
7758 }
7759
7760 if (dtrace_retained != NULL) {
7761 dtrace_enabling_provide(provider);
7762
7763 /*
39037602
A
7764 * Now we need to call dtrace_enabling_matchall_with_cond() --
7765 * with a condition matching the provider name we just added,
7766 * which will acquire cpu_lock and dtrace_lock. We therefore need
2d21ac55
A
7767 * to drop all of our locks before calling into it...
7768 */
7769 lck_mtx_unlock(&dtrace_lock);
7770 lck_mtx_unlock(&dtrace_provider_lock);
39037602
A
7771
7772 dtrace_match_cond_t cond = {dtrace_cond_provider_match, provider->dtpv_name};
7773 dtrace_enabling_matchall_with_cond(&cond);
2d21ac55
A
7774
7775 return (0);
7776 }
7777
7778 lck_mtx_unlock(&dtrace_lock);
7779 lck_mtx_unlock(&dtrace_provider_lock);
7780
7781 return (0);
7782}
7783
7784/*
7785 * Unregister the specified provider from the DTrace framework. This should
7786 * generally be called by DTrace providers in their detach(9E) entry point.
7787 */
7788int
7789dtrace_unregister(dtrace_provider_id_t id)
7790{
7791 dtrace_provider_t *old = (dtrace_provider_t *)id;
7792 dtrace_provider_t *prev = NULL;
7793 int i, self = 0;
7794 dtrace_probe_t *probe, *first = NULL;
7795
7796 if (old->dtpv_pops.dtps_enable ==
6d2010ae 7797 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop) {
2d21ac55
A
7798 /*
7799 * If DTrace itself is the provider, we're called with locks
7800 * already held.
7801 */
7802 ASSERT(old == dtrace_provider);
7803 ASSERT(dtrace_devi != NULL);
7804 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
7805 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55
A
7806 self = 1;
7807
7808 if (dtrace_provider->dtpv_next != NULL) {
7809 /*
7810 * There's another provider here; return failure.
7811 */
7812 return (EBUSY);
7813 }
7814 } else {
7815 lck_mtx_lock(&dtrace_provider_lock);
7816 lck_mtx_lock(&mod_lock);
7817 lck_mtx_lock(&dtrace_lock);
7818 }
7819
7820 /*
7821 * If anyone has /dev/dtrace open, or if there are anonymous enabled
7822 * probes, we refuse to let providers slither away, unless this
7823 * provider has already been explicitly invalidated.
7824 */
7825 if (!old->dtpv_defunct &&
7826 (dtrace_opens || (dtrace_anon.dta_state != NULL &&
7827 dtrace_anon.dta_state->dts_necbs > 0))) {
7828 if (!self) {
7829 lck_mtx_unlock(&dtrace_lock);
7830 lck_mtx_unlock(&mod_lock);
7831 lck_mtx_unlock(&dtrace_provider_lock);
7832 }
7833 return (EBUSY);
7834 }
7835
7836 /*
7837 * Attempt to destroy the probes associated with this provider.
7838 */
fe8ab488 7839 if (old->dtpv_ecb_count!=0) {
2d21ac55
A
7840 /*
7841 * We have at least one ECB; we can't remove this provider.
7842 */
7843 if (!self) {
7844 lck_mtx_unlock(&dtrace_lock);
7845 lck_mtx_unlock(&mod_lock);
7846 lck_mtx_unlock(&dtrace_provider_lock);
7847 }
7848 return (EBUSY);
7849 }
7850
7851 /*
7852 * All of the probes for this provider are disabled; we can safely
7853 * remove all of them from their hash chains and from the probe array.
7854 */
fe8ab488 7855 for (i = 0; i < dtrace_nprobes && old->dtpv_probe_count!=0; i++) {
2d21ac55
A
7856 if ((probe = dtrace_probes[i]) == NULL)
7857 continue;
7858
7859 if (probe->dtpr_provider != old)
7860 continue;
7861
7862 dtrace_probes[i] = NULL;
fe8ab488 7863 old->dtpv_probe_count--;
2d21ac55
A
7864
7865 dtrace_hash_remove(dtrace_bymod, probe);
7866 dtrace_hash_remove(dtrace_byfunc, probe);
7867 dtrace_hash_remove(dtrace_byname, probe);
7868
7869 if (first == NULL) {
7870 first = probe;
7871 probe->dtpr_nextmod = NULL;
7872 } else {
7873 probe->dtpr_nextmod = first;
7874 first = probe;
7875 }
7876 }
7877
7878 /*
7879 * The provider's probes have been removed from the hash chains and
7880 * from the probe array. Now issue a dtrace_sync() to be sure that
7881 * everyone has cleared out from any probe array processing.
7882 */
7883 dtrace_sync();
7884
7885 for (probe = first; probe != NULL; probe = first) {
7886 first = probe->dtpr_nextmod;
7887
7888 old->dtpv_pops.dtps_destroy(old->dtpv_arg, probe->dtpr_id,
7889 probe->dtpr_arg);
7890 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
7891 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
7892 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
7893 vmem_free(dtrace_arena, (void *)(uintptr_t)(probe->dtpr_id), 1);
2d21ac55 7894 zfree(dtrace_probe_t_zone, probe);
2d21ac55
A
7895 }
7896
7897 if ((prev = dtrace_provider) == old) {
7898 ASSERT(self || dtrace_devi == NULL);
7899 ASSERT(old->dtpv_next == NULL || dtrace_devi == NULL);
7900 dtrace_provider = old->dtpv_next;
7901 } else {
7902 while (prev != NULL && prev->dtpv_next != old)
7903 prev = prev->dtpv_next;
7904
7905 if (prev == NULL) {
7906 panic("attempt to unregister non-existent "
7907 "dtrace provider %p\n", (void *)id);
7908 }
7909
7910 prev->dtpv_next = old->dtpv_next;
7911 }
7912
7913 if (!self) {
7914 lck_mtx_unlock(&dtrace_lock);
7915 lck_mtx_unlock(&mod_lock);
7916 lck_mtx_unlock(&dtrace_provider_lock);
7917 }
7918
7919 kmem_free(old->dtpv_name, strlen(old->dtpv_name) + 1);
7920 kmem_free(old, sizeof (dtrace_provider_t));
7921
7922 return (0);
7923}
7924
7925/*
7926 * Invalidate the specified provider. All subsequent probe lookups for the
7927 * specified provider will fail, but its probes will not be removed.
7928 */
7929void
7930dtrace_invalidate(dtrace_provider_id_t id)
7931{
7932 dtrace_provider_t *pvp = (dtrace_provider_t *)id;
7933
7934 ASSERT(pvp->dtpv_pops.dtps_enable !=
6d2010ae 7935 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
2d21ac55
A
7936
7937 lck_mtx_lock(&dtrace_provider_lock);
7938 lck_mtx_lock(&dtrace_lock);
7939
7940 pvp->dtpv_defunct = 1;
7941
7942 lck_mtx_unlock(&dtrace_lock);
7943 lck_mtx_unlock(&dtrace_provider_lock);
7944}
7945
7946/*
7947 * Indicate whether or not DTrace has attached.
7948 */
7949int
7950dtrace_attached(void)
7951{
7952 /*
7953 * dtrace_provider will be non-NULL iff the DTrace driver has
7954 * attached. (It's non-NULL because DTrace is always itself a
7955 * provider.)
7956 */
7957 return (dtrace_provider != NULL);
7958}
7959
7960/*
7961 * Remove all the unenabled probes for the given provider. This function is
7962 * not unlike dtrace_unregister(), except that it doesn't remove the provider
7963 * -- just as many of its associated probes as it can.
7964 */
7965int
7966dtrace_condense(dtrace_provider_id_t id)
7967{
7968 dtrace_provider_t *prov = (dtrace_provider_t *)id;
7969 int i;
7970 dtrace_probe_t *probe;
7971
7972 /*
7973 * Make sure this isn't the dtrace provider itself.
7974 */
7975 ASSERT(prov->dtpv_pops.dtps_enable !=
6d2010ae 7976 (int (*)(void *, dtrace_id_t, void *))dtrace_enable_nullop);
2d21ac55
A
7977
7978 lck_mtx_lock(&dtrace_provider_lock);
7979 lck_mtx_lock(&dtrace_lock);
7980
7981 /*
7982 * Attempt to destroy the probes associated with this provider.
7983 */
7984 for (i = 0; i < dtrace_nprobes; i++) {
7985 if ((probe = dtrace_probes[i]) == NULL)
7986 continue;
7987
7988 if (probe->dtpr_provider != prov)
7989 continue;
7990
7991 if (probe->dtpr_ecb != NULL)
7992 continue;
7993
7994 dtrace_probes[i] = NULL;
fe8ab488 7995 prov->dtpv_probe_count--;
2d21ac55
A
7996
7997 dtrace_hash_remove(dtrace_bymod, probe);
7998 dtrace_hash_remove(dtrace_byfunc, probe);
7999 dtrace_hash_remove(dtrace_byname, probe);
8000
8001 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, i + 1,
8002 probe->dtpr_arg);
8003 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
8004 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
8005 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
2d21ac55 8006 zfree(dtrace_probe_t_zone, probe);
2d21ac55
A
8007 vmem_free(dtrace_arena, (void *)((uintptr_t)i + 1), 1);
8008 }
8009
8010 lck_mtx_unlock(&dtrace_lock);
8011 lck_mtx_unlock(&dtrace_provider_lock);
8012
8013 return (0);
8014}
8015
8016/*
8017 * DTrace Probe Management Functions
8018 *
8019 * The functions in this section perform the DTrace probe management,
8020 * including functions to create probes, look-up probes, and call into the
8021 * providers to request that probes be provided. Some of these functions are
8022 * in the Provider-to-Framework API; these functions can be identified by the
8023 * fact that they are not declared "static".
8024 */
8025
8026/*
8027 * Create a probe with the specified module name, function name, and name.
8028 */
8029dtrace_id_t
8030dtrace_probe_create(dtrace_provider_id_t prov, const char *mod,
8031 const char *func, const char *name, int aframes, void *arg)
8032{
8033 dtrace_probe_t *probe, **probes;
8034 dtrace_provider_t *provider = (dtrace_provider_t *)prov;
8035 dtrace_id_t id;
8036
8037 if (provider == dtrace_provider) {
8038 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
8039 } else {
8040 lck_mtx_lock(&dtrace_lock);
8041 }
8042
8043 id = (dtrace_id_t)(uintptr_t)vmem_alloc(dtrace_arena, 1,
8044 VM_BESTFIT | VM_SLEEP);
fe8ab488 8045
2d21ac55
A
8046 probe = zalloc(dtrace_probe_t_zone);
8047 bzero(probe, sizeof (dtrace_probe_t));
2d21ac55
A
8048
8049 probe->dtpr_id = id;
8050 probe->dtpr_gen = dtrace_probegen++;
8051 probe->dtpr_mod = dtrace_strdup(mod);
8052 probe->dtpr_func = dtrace_strdup(func);
8053 probe->dtpr_name = dtrace_strdup(name);
8054 probe->dtpr_arg = arg;
8055 probe->dtpr_aframes = aframes;
8056 probe->dtpr_provider = provider;
8057
8058 dtrace_hash_add(dtrace_bymod, probe);
8059 dtrace_hash_add(dtrace_byfunc, probe);
8060 dtrace_hash_add(dtrace_byname, probe);
8061
b0d623f7 8062 if (id - 1 >= (dtrace_id_t)dtrace_nprobes) {
2d21ac55
A
8063 size_t osize = dtrace_nprobes * sizeof (dtrace_probe_t *);
8064 size_t nsize = osize << 1;
8065
8066 if (nsize == 0) {
8067 ASSERT(osize == 0);
8068 ASSERT(dtrace_probes == NULL);
8069 nsize = sizeof (dtrace_probe_t *);
8070 }
8071
8072 probes = kmem_zalloc(nsize, KM_SLEEP);
8073
8074 if (dtrace_probes == NULL) {
8075 ASSERT(osize == 0);
8076 dtrace_probes = probes;
8077 dtrace_nprobes = 1;
8078 } else {
8079 dtrace_probe_t **oprobes = dtrace_probes;
8080
8081 bcopy(oprobes, probes, osize);
8082 dtrace_membar_producer();
8083 dtrace_probes = probes;
8084
8085 dtrace_sync();
8086
8087 /*
8088 * All CPUs are now seeing the new probes array; we can
8089 * safely free the old array.
8090 */
8091 kmem_free(oprobes, osize);
8092 dtrace_nprobes <<= 1;
8093 }
8094
b0d623f7 8095 ASSERT(id - 1 < (dtrace_id_t)dtrace_nprobes);
2d21ac55
A
8096 }
8097
8098 ASSERT(dtrace_probes[id - 1] == NULL);
8099 dtrace_probes[id - 1] = probe;
fe8ab488 8100 provider->dtpv_probe_count++;
2d21ac55
A
8101
8102 if (provider != dtrace_provider)
8103 lck_mtx_unlock(&dtrace_lock);
8104
8105 return (id);
8106}
8107
8108static dtrace_probe_t *
8109dtrace_probe_lookup_id(dtrace_id_t id)
8110{
8111 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
8112
b0d623f7
A
8113 if (id == 0 || id > (dtrace_id_t)dtrace_nprobes)
8114 return (NULL);
2d21ac55
A
8115
8116 return (dtrace_probes[id - 1]);
8117}
8118
8119static int
d190cdc3 8120dtrace_probe_lookup_match(dtrace_probe_t *probe, void *arg1, void *arg2)
2d21ac55 8121{
d190cdc3
A
8122#pragma unused(arg2)
8123 *((dtrace_id_t *)arg1) = probe->dtpr_id;
2d21ac55
A
8124
8125 return (DTRACE_MATCH_DONE);
8126}
8127
8128/*
8129 * Look up a probe based on provider and one or more of module name, function
8130 * name and probe name.
8131 */
8132dtrace_id_t
8133dtrace_probe_lookup(dtrace_provider_id_t prid, const char *mod,
8134 const char *func, const char *name)
8135{
8136 dtrace_probekey_t pkey;
8137 dtrace_id_t id;
8138 int match;
8139
8140 pkey.dtpk_prov = ((dtrace_provider_t *)prid)->dtpv_name;
8141 pkey.dtpk_pmatch = &dtrace_match_string;
8142 pkey.dtpk_mod = mod;
8143 pkey.dtpk_mmatch = mod ? &dtrace_match_string : &dtrace_match_nul;
8144 pkey.dtpk_func = func;
8145 pkey.dtpk_fmatch = func ? &dtrace_match_string : &dtrace_match_nul;
8146 pkey.dtpk_name = name;
8147 pkey.dtpk_nmatch = name ? &dtrace_match_string : &dtrace_match_nul;
8148 pkey.dtpk_id = DTRACE_IDNONE;
8149
8150 lck_mtx_lock(&dtrace_lock);
8151 match = dtrace_match(&pkey, DTRACE_PRIV_ALL, 0, 0,
d190cdc3 8152 dtrace_probe_lookup_match, &id, NULL);
2d21ac55
A
8153 lck_mtx_unlock(&dtrace_lock);
8154
8155 ASSERT(match == 1 || match == 0);
8156 return (match ? id : 0);
8157}
8158
8159/*
8160 * Returns the probe argument associated with the specified probe.
8161 */
8162void *
8163dtrace_probe_arg(dtrace_provider_id_t id, dtrace_id_t pid)
8164{
8165 dtrace_probe_t *probe;
8166 void *rval = NULL;
8167
8168 lck_mtx_lock(&dtrace_lock);
8169
8170 if ((probe = dtrace_probe_lookup_id(pid)) != NULL &&
8171 probe->dtpr_provider == (dtrace_provider_t *)id)
8172 rval = probe->dtpr_arg;
8173
8174 lck_mtx_unlock(&dtrace_lock);
8175
8176 return (rval);
8177}
8178
8179/*
8180 * Copy a probe into a probe description.
8181 */
8182static void
8183dtrace_probe_description(const dtrace_probe_t *prp, dtrace_probedesc_t *pdp)
8184{
8185 bzero(pdp, sizeof (dtrace_probedesc_t));
8186 pdp->dtpd_id = prp->dtpr_id;
8187
fe8ab488 8188 /* APPLE NOTE: Darwin employs size bounded string operation. */
2d21ac55
A
8189 (void) strlcpy(pdp->dtpd_provider,
8190 prp->dtpr_provider->dtpv_name, DTRACE_PROVNAMELEN);
8191
8192 (void) strlcpy(pdp->dtpd_mod, prp->dtpr_mod, DTRACE_MODNAMELEN);
8193 (void) strlcpy(pdp->dtpd_func, prp->dtpr_func, DTRACE_FUNCNAMELEN);
8194 (void) strlcpy(pdp->dtpd_name, prp->dtpr_name, DTRACE_NAMELEN);
8195}
8196
8197/*
8198 * Called to indicate that a probe -- or probes -- should be provided by a
8199 * specfied provider. If the specified description is NULL, the provider will
8200 * be told to provide all of its probes. (This is done whenever a new
8201 * consumer comes along, or whenever a retained enabling is to be matched.) If
8202 * the specified description is non-NULL, the provider is given the
8203 * opportunity to dynamically provide the specified probe, allowing providers
8204 * to support the creation of probes on-the-fly. (So-called _autocreated_
8205 * probes.) If the provider is NULL, the operations will be applied to all
8206 * providers; if the provider is non-NULL the operations will only be applied
8207 * to the specified provider. The dtrace_provider_lock must be held, and the
8208 * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation
8209 * will need to grab the dtrace_lock when it reenters the framework through
8210 * dtrace_probe_lookup(), dtrace_probe_create(), etc.
8211 */
8212static void
8213dtrace_probe_provide(dtrace_probedesc_t *desc, dtrace_provider_t *prv)
8214{
8215 struct modctl *ctl;
8216 int all = 0;
8217
8218 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
8219
8220 if (prv == NULL) {
8221 all = 1;
8222 prv = dtrace_provider;
8223 }
6d2010ae 8224
2d21ac55 8225 do {
2d21ac55
A
8226 /*
8227 * First, call the blanket provide operation.
8228 */
8229 prv->dtpv_pops.dtps_provide(prv->dtpv_arg, desc);
6d2010ae 8230
2d21ac55
A
8231 /*
8232 * Now call the per-module provide operation. We will grab
8233 * mod_lock to prevent the list from being modified. Note
8234 * that this also prevents the mod_busy bits from changing.
8235 * (mod_busy can only be changed with mod_lock held.)
8236 */
6d2010ae
A
8237 lck_mtx_lock(&mod_lock);
8238
6d2010ae
A
8239 ctl = dtrace_modctl_list;
8240 while (ctl) {
8241 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
8242 ctl = ctl->mod_next;
2d21ac55 8243 }
6d2010ae
A
8244
8245 lck_mtx_unlock(&mod_lock);
2d21ac55
A
8246 } while (all && (prv = prv->dtpv_next) != NULL);
8247}
8248
8249/*
8250 * Iterate over each probe, and call the Framework-to-Provider API function
8251 * denoted by offs.
8252 */
8253static void
8254dtrace_probe_foreach(uintptr_t offs)
8255{
8256 dtrace_provider_t *prov;
8257 void (*func)(void *, dtrace_id_t, void *);
8258 dtrace_probe_t *probe;
8259 dtrace_icookie_t cookie;
8260 int i;
8261
8262 /*
8263 * We disable interrupts to walk through the probe array. This is
8264 * safe -- the dtrace_sync() in dtrace_unregister() assures that we
8265 * won't see stale data.
8266 */
8267 cookie = dtrace_interrupt_disable();
8268
8269 for (i = 0; i < dtrace_nprobes; i++) {
8270 if ((probe = dtrace_probes[i]) == NULL)
8271 continue;
8272
8273 if (probe->dtpr_ecb == NULL) {
8274 /*
8275 * This probe isn't enabled -- don't call the function.
8276 */
8277 continue;
8278 }
8279
8280 prov = probe->dtpr_provider;
8281 func = *((void(**)(void *, dtrace_id_t, void *))
8282 ((uintptr_t)&prov->dtpv_pops + offs));
8283
8284 func(prov->dtpv_arg, i + 1, probe->dtpr_arg);
8285 }
8286
8287 dtrace_interrupt_enable(cookie);
8288}
8289
8290static int
d190cdc3 8291dtrace_probe_enable(const dtrace_probedesc_t *desc, dtrace_enabling_t *enab, dtrace_ecbdesc_t *ep)
2d21ac55
A
8292{
8293 dtrace_probekey_t pkey;
8294 uint32_t priv;
8295 uid_t uid;
8296 zoneid_t zoneid;
8297
8298 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
8299
8300 dtrace_ecb_create_cache = NULL;
8301
8302 if (desc == NULL) {
8303 /*
8304 * If we're passed a NULL description, we're being asked to
8305 * create an ECB with a NULL probe.
8306 */
d190cdc3 8307 (void) dtrace_ecb_create_enable(NULL, enab, ep);
2d21ac55
A
8308 return (0);
8309 }
8310
8311 dtrace_probekey(desc, &pkey);
8312 dtrace_cred2priv(enab->dten_vstate->dtvs_state->dts_cred.dcr_cred,
8313 &priv, &uid, &zoneid);
8314
8315 return (dtrace_match(&pkey, priv, uid, zoneid, dtrace_ecb_create_enable,
d190cdc3 8316 enab, ep));
2d21ac55
A
8317}
8318
8319/*
8320 * DTrace Helper Provider Functions
8321 */
8322static void
8323dtrace_dofattr2attr(dtrace_attribute_t *attr, const dof_attr_t dofattr)
8324{
8325 attr->dtat_name = DOF_ATTR_NAME(dofattr);
8326 attr->dtat_data = DOF_ATTR_DATA(dofattr);
8327 attr->dtat_class = DOF_ATTR_CLASS(dofattr);
8328}
8329
8330static void
8331dtrace_dofprov2hprov(dtrace_helper_provdesc_t *hprov,
8332 const dof_provider_t *dofprov, char *strtab)
8333{
8334 hprov->dthpv_provname = strtab + dofprov->dofpv_name;
8335 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_provider,
8336 dofprov->dofpv_provattr);
8337 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_mod,
8338 dofprov->dofpv_modattr);
8339 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_func,
8340 dofprov->dofpv_funcattr);
8341 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_name,
8342 dofprov->dofpv_nameattr);
8343 dtrace_dofattr2attr(&hprov->dthpv_pattr.dtpa_args,
8344 dofprov->dofpv_argsattr);
8345}
8346
8347static void
d190cdc3 8348dtrace_helper_provide_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p)
2d21ac55
A
8349{
8350 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8351 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8352 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
8353 dof_provider_t *provider;
8354 dof_probe_t *probe;
8355 uint32_t *off, *enoff;
8356 uint8_t *arg;
8357 char *strtab;
8358 uint_t i, nprobes;
8359 dtrace_helper_provdesc_t dhpv;
8360 dtrace_helper_probedesc_t dhpb;
8361 dtrace_meta_t *meta = dtrace_meta_pid;
8362 dtrace_mops_t *mops = &meta->dtm_mops;
8363 void *parg;
8364
8365 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8366 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8367 provider->dofpv_strtab * dof->dofh_secsize);
8368 prb_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8369 provider->dofpv_probes * dof->dofh_secsize);
8370 arg_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8371 provider->dofpv_prargs * dof->dofh_secsize);
8372 off_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8373 provider->dofpv_proffs * dof->dofh_secsize);
8374
8375 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8376 off = (uint32_t *)(uintptr_t)(daddr + off_sec->dofs_offset);
8377 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
8378 enoff = NULL;
8379
8380 /*
8381 * See dtrace_helper_provider_validate().
8382 */
8383 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
8384 provider->dofpv_prenoffs != DOF_SECT_NONE) {
8385 enoff_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8386 provider->dofpv_prenoffs * dof->dofh_secsize);
8387 enoff = (uint32_t *)(uintptr_t)(daddr + enoff_sec->dofs_offset);
8388 }
8389
8390 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
8391
8392 /*
8393 * Create the provider.
8394 */
8395 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8396
d190cdc3 8397 if ((parg = mops->dtms_provide_proc(meta->dtm_arg, &dhpv, p)) == NULL)
2d21ac55
A
8398 return;
8399
8400 meta->dtm_count++;
8401
8402 /*
8403 * Create the probes.
8404 */
8405 for (i = 0; i < nprobes; i++) {
8406 probe = (dof_probe_t *)(uintptr_t)(daddr +
8407 prb_sec->dofs_offset + i * prb_sec->dofs_entsize);
8408
8409 dhpb.dthpb_mod = dhp->dofhp_mod;
8410 dhpb.dthpb_func = strtab + probe->dofpr_func;
8411 dhpb.dthpb_name = strtab + probe->dofpr_name;
b0d623f7 8412#if !defined(__APPLE__)
2d21ac55 8413 dhpb.dthpb_base = probe->dofpr_addr;
b0d623f7
A
8414#else
8415 dhpb.dthpb_base = dhp->dofhp_addr; /* FIXME: James, why? */
2d21ac55 8416#endif
b0d623f7 8417 dhpb.dthpb_offs = (int32_t *)(off + probe->dofpr_offidx);
2d21ac55
A
8418 dhpb.dthpb_noffs = probe->dofpr_noffs;
8419 if (enoff != NULL) {
b0d623f7 8420 dhpb.dthpb_enoffs = (int32_t *)(enoff + probe->dofpr_enoffidx);
2d21ac55
A
8421 dhpb.dthpb_nenoffs = probe->dofpr_nenoffs;
8422 } else {
8423 dhpb.dthpb_enoffs = NULL;
8424 dhpb.dthpb_nenoffs = 0;
8425 }
8426 dhpb.dthpb_args = arg + probe->dofpr_argidx;
8427 dhpb.dthpb_nargc = probe->dofpr_nargc;
8428 dhpb.dthpb_xargc = probe->dofpr_xargc;
8429 dhpb.dthpb_ntypes = strtab + probe->dofpr_nargv;
8430 dhpb.dthpb_xtypes = strtab + probe->dofpr_xargv;
8431
8432 mops->dtms_create_probe(meta->dtm_arg, parg, &dhpb);
8433 }
39037602
A
8434
8435 /*
8436 * Since we just created probes, we need to match our enablings
8437 * against those, with a precondition knowing that we have only
8438 * added probes from this provider
8439 */
8440 char *prov_name = mops->dtms_provider_name(parg);
8441 ASSERT(prov_name != NULL);
8442 dtrace_match_cond_t cond = {dtrace_cond_provider_match, (void*)prov_name};
8443
8444 dtrace_enabling_matchall_with_cond(&cond);
2d21ac55
A
8445}
8446
8447static void
d190cdc3 8448dtrace_helper_provide(dof_helper_t *dhp, proc_t *p)
2d21ac55
A
8449{
8450 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8451 dof_hdr_t *dof = (dof_hdr_t *)daddr;
b0d623f7 8452 uint32_t i;
2d21ac55
A
8453
8454 lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
8455
8456 for (i = 0; i < dof->dofh_secnum; i++) {
8457 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8458 dof->dofh_secoff + i * dof->dofh_secsize);
8459
8460 if (sec->dofs_type != DOF_SECT_PROVIDER)
8461 continue;
8462
d190cdc3 8463 dtrace_helper_provide_one(dhp, sec, p);
2d21ac55 8464 }
2d21ac55
A
8465}
8466
8467static void
d190cdc3 8468dtrace_helper_provider_remove_one(dof_helper_t *dhp, dof_sec_t *sec, proc_t *p)
2d21ac55
A
8469{
8470 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8471 dof_hdr_t *dof = (dof_hdr_t *)daddr;
8472 dof_sec_t *str_sec;
8473 dof_provider_t *provider;
8474 char *strtab;
8475 dtrace_helper_provdesc_t dhpv;
8476 dtrace_meta_t *meta = dtrace_meta_pid;
8477 dtrace_mops_t *mops = &meta->dtm_mops;
8478
8479 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
8480 str_sec = (dof_sec_t *)(uintptr_t)(daddr + dof->dofh_secoff +
8481 provider->dofpv_strtab * dof->dofh_secsize);
8482
8483 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
8484
8485 /*
8486 * Create the provider.
8487 */
8488 dtrace_dofprov2hprov(&dhpv, provider, strtab);
8489
d190cdc3 8490 mops->dtms_remove_proc(meta->dtm_arg, &dhpv, p);
2d21ac55
A
8491
8492 meta->dtm_count--;
8493}
8494
8495static void
d190cdc3 8496dtrace_helper_provider_remove(dof_helper_t *dhp, proc_t *p)
2d21ac55
A
8497{
8498 uintptr_t daddr = (uintptr_t)dhp->dofhp_dof;
8499 dof_hdr_t *dof = (dof_hdr_t *)daddr;
b0d623f7 8500 uint32_t i;
2d21ac55
A
8501
8502 lck_mtx_assert(&dtrace_meta_lock, LCK_MTX_ASSERT_OWNED);
8503
8504 for (i = 0; i < dof->dofh_secnum; i++) {
8505 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
8506 dof->dofh_secoff + i * dof->dofh_secsize);
8507
8508 if (sec->dofs_type != DOF_SECT_PROVIDER)
8509 continue;
8510
d190cdc3 8511 dtrace_helper_provider_remove_one(dhp, sec, p);
2d21ac55
A
8512 }
8513}
8514
8515/*
8516 * DTrace Meta Provider-to-Framework API Functions
8517 *
8518 * These functions implement the Meta Provider-to-Framework API, as described
8519 * in <sys/dtrace.h>.
8520 */
8521int
8522dtrace_meta_register(const char *name, const dtrace_mops_t *mops, void *arg,
8523 dtrace_meta_provider_id_t *idp)
8524{
8525 dtrace_meta_t *meta;
8526 dtrace_helpers_t *help, *next;
b0d623f7 8527 uint_t i;
2d21ac55
A
8528
8529 *idp = DTRACE_METAPROVNONE;
8530
8531 /*
8532 * We strictly don't need the name, but we hold onto it for
8533 * debuggability. All hail error queues!
8534 */
8535 if (name == NULL) {
8536 cmn_err(CE_WARN, "failed to register meta-provider: "
8537 "invalid name");
8538 return (EINVAL);
8539 }
8540
8541 if (mops == NULL ||
8542 mops->dtms_create_probe == NULL ||
d190cdc3
A
8543 mops->dtms_provide_proc == NULL ||
8544 mops->dtms_remove_proc == NULL) {
2d21ac55
A
8545 cmn_err(CE_WARN, "failed to register meta-register %s: "
8546 "invalid ops", name);
8547 return (EINVAL);
8548 }
8549
8550 meta = kmem_zalloc(sizeof (dtrace_meta_t), KM_SLEEP);
8551 meta->dtm_mops = *mops;
fe8ab488
A
8552
8553 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7
A
8554 {
8555 size_t bufsize = strlen(name) + 1;
8556 meta->dtm_name = kmem_alloc(bufsize, KM_SLEEP);
8557 (void) strlcpy(meta->dtm_name, name, bufsize);
8558 }
fe8ab488 8559
2d21ac55
A
8560 meta->dtm_arg = arg;
8561
8562 lck_mtx_lock(&dtrace_meta_lock);
8563 lck_mtx_lock(&dtrace_lock);
8564
8565 if (dtrace_meta_pid != NULL) {
8566 lck_mtx_unlock(&dtrace_lock);
8567 lck_mtx_unlock(&dtrace_meta_lock);
8568 cmn_err(CE_WARN, "failed to register meta-register %s: "
8569 "user-land meta-provider exists", name);
8570 kmem_free(meta->dtm_name, strlen(meta->dtm_name) + 1);
8571 kmem_free(meta, sizeof (dtrace_meta_t));
8572 return (EINVAL);
8573 }
8574
8575 dtrace_meta_pid = meta;
8576 *idp = (dtrace_meta_provider_id_t)meta;
8577
8578 /*
8579 * If there are providers and probes ready to go, pass them
8580 * off to the new meta provider now.
8581 */
8582
8583 help = dtrace_deferred_pid;
8584 dtrace_deferred_pid = NULL;
8585
8586 lck_mtx_unlock(&dtrace_lock);
8587
8588 while (help != NULL) {
8589 for (i = 0; i < help->dthps_nprovs; i++) {
d190cdc3
A
8590 proc_t *p = proc_find(help->dthps_pid);
8591 if (p == PROC_NULL)
8592 continue;
2d21ac55 8593 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
d190cdc3
A
8594 p);
8595 proc_rele(p);
2d21ac55
A
8596 }
8597
8598 next = help->dthps_next;
8599 help->dthps_next = NULL;
8600 help->dthps_prev = NULL;
8601 help->dthps_deferred = 0;
8602 help = next;
8603 }
8604
8605 lck_mtx_unlock(&dtrace_meta_lock);
8606
8607 return (0);
8608}
8609
8610int
8611dtrace_meta_unregister(dtrace_meta_provider_id_t id)
8612{
8613 dtrace_meta_t **pp, *old = (dtrace_meta_t *)id;
8614
8615 lck_mtx_lock(&dtrace_meta_lock);
8616 lck_mtx_lock(&dtrace_lock);
8617
8618 if (old == dtrace_meta_pid) {
8619 pp = &dtrace_meta_pid;
8620 } else {
8621 panic("attempt to unregister non-existent "
8622 "dtrace meta-provider %p\n", (void *)old);
8623 }
8624
8625 if (old->dtm_count != 0) {
8626 lck_mtx_unlock(&dtrace_lock);
8627 lck_mtx_unlock(&dtrace_meta_lock);
8628 return (EBUSY);
8629 }
8630
8631 *pp = NULL;
8632
8633 lck_mtx_unlock(&dtrace_lock);
8634 lck_mtx_unlock(&dtrace_meta_lock);
8635
8636 kmem_free(old->dtm_name, strlen(old->dtm_name) + 1);
8637 kmem_free(old, sizeof (dtrace_meta_t));
8638
8639 return (0);
8640}
8641
8642
8643/*
8644 * DTrace DIF Object Functions
8645 */
8646static int
8647dtrace_difo_err(uint_t pc, const char *format, ...)
8648{
8649 if (dtrace_err_verbose) {
8650 va_list alist;
8651
8652 (void) uprintf("dtrace DIF object error: [%u]: ", pc);
8653 va_start(alist, format);
8654 (void) vuprintf(format, alist);
8655 va_end(alist);
8656 }
8657
8658#ifdef DTRACE_ERRDEBUG
8659 dtrace_errdebug(format);
8660#endif
8661 return (1);
8662}
8663
8664/*
8665 * Validate a DTrace DIF object by checking the IR instructions. The following
8666 * rules are currently enforced by dtrace_difo_validate():
8667 *
8668 * 1. Each instruction must have a valid opcode
8669 * 2. Each register, string, variable, or subroutine reference must be valid
8670 * 3. No instruction can modify register %r0 (must be zero)
8671 * 4. All instruction reserved bits must be set to zero
8672 * 5. The last instruction must be a "ret" instruction
8673 * 6. All branch targets must reference a valid instruction _after_ the branch
8674 */
8675static int
8676dtrace_difo_validate(dtrace_difo_t *dp, dtrace_vstate_t *vstate, uint_t nregs,
8677 cred_t *cr)
8678{
b0d623f7
A
8679 int err = 0;
8680 uint_t i;
fe8ab488 8681
b0d623f7
A
8682 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
8683 int kcheckload;
8684 uint_t pc;
39037602 8685 int maxglobal = -1, maxlocal = -1, maxtlocal = -1;
b0d623f7
A
8686
8687 kcheckload = cr == NULL ||
8688 (vstate->dtvs_state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) == 0;
2d21ac55
A
8689
8690 dp->dtdo_destructive = 0;
8691
8692 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
8693 dif_instr_t instr = dp->dtdo_buf[pc];
8694
8695 uint_t r1 = DIF_INSTR_R1(instr);
8696 uint_t r2 = DIF_INSTR_R2(instr);
8697 uint_t rd = DIF_INSTR_RD(instr);
8698 uint_t rs = DIF_INSTR_RS(instr);
8699 uint_t label = DIF_INSTR_LABEL(instr);
8700 uint_t v = DIF_INSTR_VAR(instr);
8701 uint_t subr = DIF_INSTR_SUBR(instr);
8702 uint_t type = DIF_INSTR_TYPE(instr);
8703 uint_t op = DIF_INSTR_OP(instr);
8704
8705 switch (op) {
8706 case DIF_OP_OR:
8707 case DIF_OP_XOR:
8708 case DIF_OP_AND:
8709 case DIF_OP_SLL:
8710 case DIF_OP_SRL:
8711 case DIF_OP_SRA:
8712 case DIF_OP_SUB:
8713 case DIF_OP_ADD:
8714 case DIF_OP_MUL:
8715 case DIF_OP_SDIV:
8716 case DIF_OP_UDIV:
8717 case DIF_OP_SREM:
8718 case DIF_OP_UREM:
8719 case DIF_OP_COPYS:
8720 if (r1 >= nregs)
8721 err += efunc(pc, "invalid register %u\n", r1);
8722 if (r2 >= nregs)
8723 err += efunc(pc, "invalid register %u\n", r2);
8724 if (rd >= nregs)
8725 err += efunc(pc, "invalid register %u\n", rd);
8726 if (rd == 0)
8727 err += efunc(pc, "cannot write to %r0\n");
8728 break;
8729 case DIF_OP_NOT:
8730 case DIF_OP_MOV:
8731 case DIF_OP_ALLOCS:
8732 if (r1 >= nregs)
8733 err += efunc(pc, "invalid register %u\n", r1);
8734 if (r2 != 0)
8735 err += efunc(pc, "non-zero reserved bits\n");
8736 if (rd >= nregs)
8737 err += efunc(pc, "invalid register %u\n", rd);
8738 if (rd == 0)
8739 err += efunc(pc, "cannot write to %r0\n");
8740 break;
8741 case DIF_OP_LDSB:
8742 case DIF_OP_LDSH:
8743 case DIF_OP_LDSW:
8744 case DIF_OP_LDUB:
8745 case DIF_OP_LDUH:
8746 case DIF_OP_LDUW:
8747 case DIF_OP_LDX:
8748 if (r1 >= nregs)
8749 err += efunc(pc, "invalid register %u\n", r1);
8750 if (r2 != 0)
8751 err += efunc(pc, "non-zero reserved bits\n");
8752 if (rd >= nregs)
8753 err += efunc(pc, "invalid register %u\n", rd);
8754 if (rd == 0)
8755 err += efunc(pc, "cannot write to %r0\n");
b0d623f7 8756 if (kcheckload)
2d21ac55
A
8757 dp->dtdo_buf[pc] = DIF_INSTR_LOAD(op +
8758 DIF_OP_RLDSB - DIF_OP_LDSB, r1, rd);
8759 break;
8760 case DIF_OP_RLDSB:
8761 case DIF_OP_RLDSH:
8762 case DIF_OP_RLDSW:
8763 case DIF_OP_RLDUB:
8764 case DIF_OP_RLDUH:
8765 case DIF_OP_RLDUW:
8766 case DIF_OP_RLDX:
8767 if (r1 >= nregs)
8768 err += efunc(pc, "invalid register %u\n", r1);
8769 if (r2 != 0)
8770 err += efunc(pc, "non-zero reserved bits\n");
8771 if (rd >= nregs)
8772 err += efunc(pc, "invalid register %u\n", rd);
8773 if (rd == 0)
8774 err += efunc(pc, "cannot write to %r0\n");
8775 break;
8776 case DIF_OP_ULDSB:
8777 case DIF_OP_ULDSH:
8778 case DIF_OP_ULDSW:
8779 case DIF_OP_ULDUB:
8780 case DIF_OP_ULDUH:
8781 case DIF_OP_ULDUW:
8782 case DIF_OP_ULDX:
8783 if (r1 >= nregs)
8784 err += efunc(pc, "invalid register %u\n", r1);
8785 if (r2 != 0)
8786 err += efunc(pc, "non-zero reserved bits\n");
8787 if (rd >= nregs)
8788 err += efunc(pc, "invalid register %u\n", rd);
8789 if (rd == 0)
8790 err += efunc(pc, "cannot write to %r0\n");
8791 break;
8792 case DIF_OP_STB:
8793 case DIF_OP_STH:
8794 case DIF_OP_STW:
8795 case DIF_OP_STX:
8796 if (r1 >= nregs)
8797 err += efunc(pc, "invalid register %u\n", r1);
8798 if (r2 != 0)
8799 err += efunc(pc, "non-zero reserved bits\n");
8800 if (rd >= nregs)
8801 err += efunc(pc, "invalid register %u\n", rd);
8802 if (rd == 0)
8803 err += efunc(pc, "cannot write to 0 address\n");
8804 break;
8805 case DIF_OP_CMP:
8806 case DIF_OP_SCMP:
8807 if (r1 >= nregs)
8808 err += efunc(pc, "invalid register %u\n", r1);
8809 if (r2 >= nregs)
8810 err += efunc(pc, "invalid register %u\n", r2);
8811 if (rd != 0)
8812 err += efunc(pc, "non-zero reserved bits\n");
8813 break;
8814 case DIF_OP_TST:
8815 if (r1 >= nregs)
8816 err += efunc(pc, "invalid register %u\n", r1);
8817 if (r2 != 0 || rd != 0)
8818 err += efunc(pc, "non-zero reserved bits\n");
8819 break;
8820 case DIF_OP_BA:
8821 case DIF_OP_BE:
8822 case DIF_OP_BNE:
8823 case DIF_OP_BG:
8824 case DIF_OP_BGU:
8825 case DIF_OP_BGE:
8826 case DIF_OP_BGEU:
8827 case DIF_OP_BL:
8828 case DIF_OP_BLU:
8829 case DIF_OP_BLE:
8830 case DIF_OP_BLEU:
8831 if (label >= dp->dtdo_len) {
8832 err += efunc(pc, "invalid branch target %u\n",
8833 label);
8834 }
8835 if (label <= pc) {
8836 err += efunc(pc, "backward branch to %u\n",
8837 label);
8838 }
8839 break;
8840 case DIF_OP_RET:
8841 if (r1 != 0 || r2 != 0)
8842 err += efunc(pc, "non-zero reserved bits\n");
8843 if (rd >= nregs)
8844 err += efunc(pc, "invalid register %u\n", rd);
8845 break;
8846 case DIF_OP_NOP:
8847 case DIF_OP_POPTS:
8848 case DIF_OP_FLUSHTS:
8849 if (r1 != 0 || r2 != 0 || rd != 0)
8850 err += efunc(pc, "non-zero reserved bits\n");
8851 break;
8852 case DIF_OP_SETX:
8853 if (DIF_INSTR_INTEGER(instr) >= dp->dtdo_intlen) {
8854 err += efunc(pc, "invalid integer ref %u\n",
8855 DIF_INSTR_INTEGER(instr));
8856 }
8857 if (rd >= nregs)
8858 err += efunc(pc, "invalid register %u\n", rd);
8859 if (rd == 0)
8860 err += efunc(pc, "cannot write to %r0\n");
8861 break;
8862 case DIF_OP_SETS:
8863 if (DIF_INSTR_STRING(instr) >= dp->dtdo_strlen) {
8864 err += efunc(pc, "invalid string ref %u\n",
8865 DIF_INSTR_STRING(instr));
8866 }
8867 if (rd >= nregs)
8868 err += efunc(pc, "invalid register %u\n", rd);
8869 if (rd == 0)
8870 err += efunc(pc, "cannot write to %r0\n");
8871 break;
8872 case DIF_OP_LDGA:
8873 case DIF_OP_LDTA:
8874 if (r1 > DIF_VAR_ARRAY_MAX)
8875 err += efunc(pc, "invalid array %u\n", r1);
8876 if (r2 >= nregs)
8877 err += efunc(pc, "invalid register %u\n", r2);
8878 if (rd >= nregs)
8879 err += efunc(pc, "invalid register %u\n", rd);
8880 if (rd == 0)
8881 err += efunc(pc, "cannot write to %r0\n");
8882 break;
8883 case DIF_OP_LDGS:
8884 case DIF_OP_LDTS:
8885 case DIF_OP_LDLS:
8886 case DIF_OP_LDGAA:
8887 case DIF_OP_LDTAA:
8888 if (v < DIF_VAR_OTHER_MIN || v > DIF_VAR_OTHER_MAX)
8889 err += efunc(pc, "invalid variable %u\n", v);
8890 if (rd >= nregs)
8891 err += efunc(pc, "invalid register %u\n", rd);
8892 if (rd == 0)
8893 err += efunc(pc, "cannot write to %r0\n");
8894 break;
8895 case DIF_OP_STGS:
8896 case DIF_OP_STTS:
8897 case DIF_OP_STLS:
8898 case DIF_OP_STGAA:
8899 case DIF_OP_STTAA:
8900 if (v < DIF_VAR_OTHER_UBASE || v > DIF_VAR_OTHER_MAX)
8901 err += efunc(pc, "invalid variable %u\n", v);
8902 if (rs >= nregs)
8903 err += efunc(pc, "invalid register %u\n", rd);
8904 break;
8905 case DIF_OP_CALL:
39037602
A
8906 if (subr > DIF_SUBR_MAX &&
8907 !(subr >= DIF_SUBR_APPLE_MIN && subr <= DIF_SUBR_APPLE_MAX))
2d21ac55
A
8908 err += efunc(pc, "invalid subr %u\n", subr);
8909 if (rd >= nregs)
8910 err += efunc(pc, "invalid register %u\n", rd);
8911 if (rd == 0)
8912 err += efunc(pc, "cannot write to %r0\n");
8913
8914 if (subr == DIF_SUBR_COPYOUT ||
39037602
A
8915 subr == DIF_SUBR_COPYOUTSTR ||
8916 subr == DIF_SUBR_KDEBUG_TRACE ||
8917 subr == DIF_SUBR_KDEBUG_TRACE_STRING) {
2d21ac55
A
8918 dp->dtdo_destructive = 1;
8919 }
8920 break;
8921 case DIF_OP_PUSHTR:
8922 if (type != DIF_TYPE_STRING && type != DIF_TYPE_CTF)
8923 err += efunc(pc, "invalid ref type %u\n", type);
8924 if (r2 >= nregs)
8925 err += efunc(pc, "invalid register %u\n", r2);
8926 if (rs >= nregs)
8927 err += efunc(pc, "invalid register %u\n", rs);
8928 break;
8929 case DIF_OP_PUSHTV:
8930 if (type != DIF_TYPE_CTF)
8931 err += efunc(pc, "invalid val type %u\n", type);
8932 if (r2 >= nregs)
8933 err += efunc(pc, "invalid register %u\n", r2);
8934 if (rs >= nregs)
8935 err += efunc(pc, "invalid register %u\n", rs);
8936 break;
8937 default:
8938 err += efunc(pc, "invalid opcode %u\n",
8939 DIF_INSTR_OP(instr));
8940 }
8941 }
8942
8943 if (dp->dtdo_len != 0 &&
8944 DIF_INSTR_OP(dp->dtdo_buf[dp->dtdo_len - 1]) != DIF_OP_RET) {
8945 err += efunc(dp->dtdo_len - 1,
8946 "expected 'ret' as last DIF instruction\n");
8947 }
8948
3e170ce0 8949 if (!(dp->dtdo_rtype.dtdt_flags & (DIF_TF_BYREF | DIF_TF_BYUREF))) {
2d21ac55
A
8950 /*
8951 * If we're not returning by reference, the size must be either
8952 * 0 or the size of one of the base types.
8953 */
8954 switch (dp->dtdo_rtype.dtdt_size) {
8955 case 0:
8956 case sizeof (uint8_t):
8957 case sizeof (uint16_t):
8958 case sizeof (uint32_t):
8959 case sizeof (uint64_t):
8960 break;
8961
8962 default:
6d2010ae 8963 err += efunc(dp->dtdo_len - 1, "bad return size\n");
2d21ac55
A
8964 }
8965 }
8966
8967 for (i = 0; i < dp->dtdo_varlen && err == 0; i++) {
8968 dtrace_difv_t *v = &dp->dtdo_vartab[i], *existing = NULL;
8969 dtrace_diftype_t *vt, *et;
b0d623f7
A
8970 uint_t id;
8971 int ndx;
2d21ac55
A
8972
8973 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL &&
8974 v->dtdv_scope != DIFV_SCOPE_THREAD &&
8975 v->dtdv_scope != DIFV_SCOPE_LOCAL) {
8976 err += efunc(i, "unrecognized variable scope %d\n",
8977 v->dtdv_scope);
8978 break;
8979 }
8980
8981 if (v->dtdv_kind != DIFV_KIND_ARRAY &&
8982 v->dtdv_kind != DIFV_KIND_SCALAR) {
8983 err += efunc(i, "unrecognized variable type %d\n",
8984 v->dtdv_kind);
8985 break;
8986 }
8987
8988 if ((id = v->dtdv_id) > DIF_VARIABLE_MAX) {
8989 err += efunc(i, "%d exceeds variable id limit\n", id);
8990 break;
8991 }
8992
8993 if (id < DIF_VAR_OTHER_UBASE)
8994 continue;
8995
8996 /*
8997 * For user-defined variables, we need to check that this
8998 * definition is identical to any previous definition that we
8999 * encountered.
9000 */
9001 ndx = id - DIF_VAR_OTHER_UBASE;
9002
9003 switch (v->dtdv_scope) {
9004 case DIFV_SCOPE_GLOBAL:
39037602
A
9005 if (maxglobal == -1 || ndx > maxglobal)
9006 maxglobal = ndx;
9007
2d21ac55
A
9008 if (ndx < vstate->dtvs_nglobals) {
9009 dtrace_statvar_t *svar;
9010
9011 if ((svar = vstate->dtvs_globals[ndx]) != NULL)
9012 existing = &svar->dtsv_var;
9013 }
9014
9015 break;
9016
9017 case DIFV_SCOPE_THREAD:
39037602
A
9018 if (maxtlocal == -1 || ndx > maxtlocal)
9019 maxtlocal = ndx;
9020
2d21ac55
A
9021 if (ndx < vstate->dtvs_ntlocals)
9022 existing = &vstate->dtvs_tlocals[ndx];
9023 break;
9024
9025 case DIFV_SCOPE_LOCAL:
39037602
A
9026 if (maxlocal == -1 || ndx > maxlocal)
9027 maxlocal = ndx;
2d21ac55
A
9028 if (ndx < vstate->dtvs_nlocals) {
9029 dtrace_statvar_t *svar;
9030
9031 if ((svar = vstate->dtvs_locals[ndx]) != NULL)
9032 existing = &svar->dtsv_var;
9033 }
9034
9035 break;
9036 }
9037
9038 vt = &v->dtdv_type;
9039
9040 if (vt->dtdt_flags & DIF_TF_BYREF) {
9041 if (vt->dtdt_size == 0) {
9042 err += efunc(i, "zero-sized variable\n");
9043 break;
9044 }
9045
ecc0ceb4
A
9046 if ((v->dtdv_scope == DIFV_SCOPE_GLOBAL ||
9047 v->dtdv_scope == DIFV_SCOPE_LOCAL) &&
9048 vt->dtdt_size > dtrace_statvar_maxsize) {
9049 err += efunc(i, "oversized by-ref static\n");
2d21ac55
A
9050 break;
9051 }
9052 }
9053
9054 if (existing == NULL || existing->dtdv_id == 0)
9055 continue;
9056
9057 ASSERT(existing->dtdv_id == v->dtdv_id);
9058 ASSERT(existing->dtdv_scope == v->dtdv_scope);
9059
9060 if (existing->dtdv_kind != v->dtdv_kind)
9061 err += efunc(i, "%d changed variable kind\n", id);
9062
9063 et = &existing->dtdv_type;
9064
9065 if (vt->dtdt_flags != et->dtdt_flags) {
9066 err += efunc(i, "%d changed variable type flags\n", id);
9067 break;
9068 }
9069
9070 if (vt->dtdt_size != 0 && vt->dtdt_size != et->dtdt_size) {
9071 err += efunc(i, "%d changed variable type size\n", id);
9072 break;
9073 }
9074 }
9075
39037602
A
9076 for (pc = 0; pc < dp->dtdo_len && err == 0; pc++) {
9077 dif_instr_t instr = dp->dtdo_buf[pc];
9078
9079 uint_t v = DIF_INSTR_VAR(instr);
9080 uint_t op = DIF_INSTR_OP(instr);
9081
9082 switch (op) {
9083 case DIF_OP_LDGS:
9084 case DIF_OP_LDGAA:
9085 case DIF_OP_STGS:
9086 case DIF_OP_STGAA:
9087 if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxglobal))
9088 err += efunc(pc, "invalid variable %u\n", v);
9089 break;
9090 case DIF_OP_LDTS:
9091 case DIF_OP_LDTAA:
9092 case DIF_OP_STTS:
9093 case DIF_OP_STTAA:
9094 if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxtlocal))
9095 err += efunc(pc, "invalid variable %u\n", v);
9096 break;
9097 case DIF_OP_LDLS:
9098 case DIF_OP_STLS:
9099 if (v > (uint_t)(DIF_VAR_OTHER_UBASE + maxlocal))
9100 err += efunc(pc, "invalid variable %u\n", v);
9101 break;
9102 default:
9103 break;
9104 }
9105 }
9106
2d21ac55
A
9107 return (err);
9108}
9109
9110/*
9111 * Validate a DTrace DIF object that it is to be used as a helper. Helpers
9112 * are much more constrained than normal DIFOs. Specifically, they may
9113 * not:
9114 *
9115 * 1. Make calls to subroutines other than copyin(), copyinstr() or
9116 * miscellaneous string routines
9117 * 2. Access DTrace variables other than the args[] array, and the
9118 * curthread, pid, ppid, tid, execname, zonename, uid and gid variables.
9119 * 3. Have thread-local variables.
9120 * 4. Have dynamic variables.
9121 */
9122static int
9123dtrace_difo_validate_helper(dtrace_difo_t *dp)
9124{
9125 int (*efunc)(uint_t pc, const char *, ...) = dtrace_difo_err;
9126 int err = 0;
9127 uint_t pc;
9128
9129 for (pc = 0; pc < dp->dtdo_len; pc++) {
9130 dif_instr_t instr = dp->dtdo_buf[pc];
9131
9132 uint_t v = DIF_INSTR_VAR(instr);
9133 uint_t subr = DIF_INSTR_SUBR(instr);
9134 uint_t op = DIF_INSTR_OP(instr);
9135
9136 switch (op) {
9137 case DIF_OP_OR:
9138 case DIF_OP_XOR:
9139 case DIF_OP_AND:
9140 case DIF_OP_SLL:
9141 case DIF_OP_SRL:
9142 case DIF_OP_SRA:
9143 case DIF_OP_SUB:
9144 case DIF_OP_ADD:
9145 case DIF_OP_MUL:
9146 case DIF_OP_SDIV:
9147 case DIF_OP_UDIV:
9148 case DIF_OP_SREM:
9149 case DIF_OP_UREM:
9150 case DIF_OP_COPYS:
9151 case DIF_OP_NOT:
9152 case DIF_OP_MOV:
9153 case DIF_OP_RLDSB:
9154 case DIF_OP_RLDSH:
9155 case DIF_OP_RLDSW:
9156 case DIF_OP_RLDUB:
9157 case DIF_OP_RLDUH:
9158 case DIF_OP_RLDUW:
9159 case DIF_OP_RLDX:
9160 case DIF_OP_ULDSB:
9161 case DIF_OP_ULDSH:
9162 case DIF_OP_ULDSW:
9163 case DIF_OP_ULDUB:
9164 case DIF_OP_ULDUH:
9165 case DIF_OP_ULDUW:
9166 case DIF_OP_ULDX:
9167 case DIF_OP_STB:
9168 case DIF_OP_STH:
9169 case DIF_OP_STW:
9170 case DIF_OP_STX:
9171 case DIF_OP_ALLOCS:
9172 case DIF_OP_CMP:
9173 case DIF_OP_SCMP:
9174 case DIF_OP_TST:
9175 case DIF_OP_BA:
9176 case DIF_OP_BE:
9177 case DIF_OP_BNE:
9178 case DIF_OP_BG:
9179 case DIF_OP_BGU:
9180 case DIF_OP_BGE:
9181 case DIF_OP_BGEU:
9182 case DIF_OP_BL:
9183 case DIF_OP_BLU:
9184 case DIF_OP_BLE:
9185 case DIF_OP_BLEU:
9186 case DIF_OP_RET:
9187 case DIF_OP_NOP:
9188 case DIF_OP_POPTS:
9189 case DIF_OP_FLUSHTS:
9190 case DIF_OP_SETX:
9191 case DIF_OP_SETS:
9192 case DIF_OP_LDGA:
9193 case DIF_OP_LDLS:
9194 case DIF_OP_STGS:
9195 case DIF_OP_STLS:
9196 case DIF_OP_PUSHTR:
9197 case DIF_OP_PUSHTV:
9198 break;
9199
9200 case DIF_OP_LDGS:
9201 if (v >= DIF_VAR_OTHER_UBASE)
9202 break;
9203
9204 if (v >= DIF_VAR_ARG0 && v <= DIF_VAR_ARG9)
9205 break;
9206
9207 if (v == DIF_VAR_CURTHREAD || v == DIF_VAR_PID ||
9208 v == DIF_VAR_PPID || v == DIF_VAR_TID ||
9209 v == DIF_VAR_EXECNAME || v == DIF_VAR_ZONENAME ||
9210 v == DIF_VAR_UID || v == DIF_VAR_GID)
9211 break;
9212
9213 err += efunc(pc, "illegal variable %u\n", v);
9214 break;
9215
9216 case DIF_OP_LDTA:
9217 case DIF_OP_LDTS:
9218 case DIF_OP_LDGAA:
9219 case DIF_OP_LDTAA:
9220 err += efunc(pc, "illegal dynamic variable load\n");
9221 break;
9222
9223 case DIF_OP_STTS:
9224 case DIF_OP_STGAA:
9225 case DIF_OP_STTAA:
9226 err += efunc(pc, "illegal dynamic variable store\n");
9227 break;
9228
9229 case DIF_OP_CALL:
9230 if (subr == DIF_SUBR_ALLOCA ||
9231 subr == DIF_SUBR_BCOPY ||
9232 subr == DIF_SUBR_COPYIN ||
9233 subr == DIF_SUBR_COPYINTO ||
9234 subr == DIF_SUBR_COPYINSTR ||
9235 subr == DIF_SUBR_INDEX ||
b0d623f7
A
9236 subr == DIF_SUBR_INET_NTOA ||
9237 subr == DIF_SUBR_INET_NTOA6 ||
9238 subr == DIF_SUBR_INET_NTOP ||
2d21ac55
A
9239 subr == DIF_SUBR_LLTOSTR ||
9240 subr == DIF_SUBR_RINDEX ||
9241 subr == DIF_SUBR_STRCHR ||
9242 subr == DIF_SUBR_STRJOIN ||
9243 subr == DIF_SUBR_STRRCHR ||
9244 subr == DIF_SUBR_STRSTR ||
39037602
A
9245 subr == DIF_SUBR_KDEBUG_TRACE ||
9246 subr == DIF_SUBR_KDEBUG_TRACE_STRING ||
b0d623f7
A
9247 subr == DIF_SUBR_HTONS ||
9248 subr == DIF_SUBR_HTONL ||
9249 subr == DIF_SUBR_HTONLL ||
9250 subr == DIF_SUBR_NTOHS ||
9251 subr == DIF_SUBR_NTOHL ||
9252 subr == DIF_SUBR_NTOHLL)
2d21ac55
A
9253 break;
9254
9255 err += efunc(pc, "invalid subr %u\n", subr);
9256 break;
9257
9258 default:
9259 err += efunc(pc, "invalid opcode %u\n",
9260 DIF_INSTR_OP(instr));
9261 }
9262 }
9263
9264 return (err);
9265}
9266
9267/*
9268 * Returns 1 if the expression in the DIF object can be cached on a per-thread
9269 * basis; 0 if not.
9270 */
9271static int
9272dtrace_difo_cacheable(dtrace_difo_t *dp)
9273{
b0d623f7 9274 uint_t i;
2d21ac55
A
9275
9276 if (dp == NULL)
9277 return (0);
9278
9279 for (i = 0; i < dp->dtdo_varlen; i++) {
9280 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9281
9282 if (v->dtdv_scope != DIFV_SCOPE_GLOBAL)
9283 continue;
9284
9285 switch (v->dtdv_id) {
9286 case DIF_VAR_CURTHREAD:
9287 case DIF_VAR_PID:
9288 case DIF_VAR_TID:
9289 case DIF_VAR_EXECNAME:
9290 case DIF_VAR_ZONENAME:
9291 break;
9292
9293 default:
9294 return (0);
9295 }
9296 }
9297
9298 /*
9299 * This DIF object may be cacheable. Now we need to look for any
9300 * array loading instructions, any memory loading instructions, or
9301 * any stores to thread-local variables.
9302 */
9303 for (i = 0; i < dp->dtdo_len; i++) {
9304 uint_t op = DIF_INSTR_OP(dp->dtdo_buf[i]);
9305
9306 if ((op >= DIF_OP_LDSB && op <= DIF_OP_LDX) ||
9307 (op >= DIF_OP_ULDSB && op <= DIF_OP_ULDX) ||
9308 (op >= DIF_OP_RLDSB && op <= DIF_OP_RLDX) ||
9309 op == DIF_OP_LDGA || op == DIF_OP_STTS)
9310 return (0);
9311 }
9312
9313 return (1);
9314}
9315
9316static void
9317dtrace_difo_hold(dtrace_difo_t *dp)
9318{
b0d623f7 9319 uint_t i;
2d21ac55
A
9320
9321 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9322
9323 dp->dtdo_refcnt++;
9324 ASSERT(dp->dtdo_refcnt != 0);
9325
9326 /*
9327 * We need to check this DIF object for references to the variable
9328 * DIF_VAR_VTIMESTAMP.
9329 */
9330 for (i = 0; i < dp->dtdo_varlen; i++) {
9331 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9332
9333 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9334 continue;
9335
9336 if (dtrace_vtime_references++ == 0)
9337 dtrace_vtime_enable();
9338 }
9339}
9340
9341/*
9342 * This routine calculates the dynamic variable chunksize for a given DIF
9343 * object. The calculation is not fool-proof, and can probably be tricked by
9344 * malicious DIF -- but it works for all compiler-generated DIF. Because this
9345 * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail
9346 * if a dynamic variable size exceeds the chunksize.
9347 */
9348static void
9349dtrace_difo_chunksize(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9350{
b0d623f7 9351 uint64_t sval = 0;
2d21ac55
A
9352 dtrace_key_t tupregs[DIF_DTR_NREGS + 2]; /* +2 for thread and id */
9353 const dif_instr_t *text = dp->dtdo_buf;
9354 uint_t pc, srd = 0;
9355 uint_t ttop = 0;
9356 size_t size, ksize;
9357 uint_t id, i;
9358
9359 for (pc = 0; pc < dp->dtdo_len; pc++) {
9360 dif_instr_t instr = text[pc];
9361 uint_t op = DIF_INSTR_OP(instr);
9362 uint_t rd = DIF_INSTR_RD(instr);
9363 uint_t r1 = DIF_INSTR_R1(instr);
9364 uint_t nkeys = 0;
9365 uchar_t scope;
9366
9367 dtrace_key_t *key = tupregs;
9368
9369 switch (op) {
9370 case DIF_OP_SETX:
9371 sval = dp->dtdo_inttab[DIF_INSTR_INTEGER(instr)];
9372 srd = rd;
9373 continue;
9374
9375 case DIF_OP_STTS:
9376 key = &tupregs[DIF_DTR_NREGS];
9377 key[0].dttk_size = 0;
9378 key[1].dttk_size = 0;
9379 nkeys = 2;
9380 scope = DIFV_SCOPE_THREAD;
9381 break;
9382
9383 case DIF_OP_STGAA:
9384 case DIF_OP_STTAA:
9385 nkeys = ttop;
9386
9387 if (DIF_INSTR_OP(instr) == DIF_OP_STTAA)
9388 key[nkeys++].dttk_size = 0;
9389
9390 key[nkeys++].dttk_size = 0;
9391
9392 if (op == DIF_OP_STTAA) {
9393 scope = DIFV_SCOPE_THREAD;
9394 } else {
9395 scope = DIFV_SCOPE_GLOBAL;
9396 }
9397
9398 break;
9399
9400 case DIF_OP_PUSHTR:
9401 if (ttop == DIF_DTR_NREGS)
9402 return;
9403
9404 if ((srd == 0 || sval == 0) && r1 == DIF_TYPE_STRING) {
9405 /*
9406 * If the register for the size of the "pushtr"
9407 * is %r0 (or the value is 0) and the type is
9408 * a string, we'll use the system-wide default
9409 * string size.
9410 */
9411 tupregs[ttop++].dttk_size =
9412 dtrace_strsize_default;
9413 } else {
9414 if (srd == 0)
9415 return;
9416
ecc0ceb4
A
9417 if (sval > LONG_MAX)
9418 return;
9419
2d21ac55
A
9420 tupregs[ttop++].dttk_size = sval;
9421 }
9422
9423 break;
9424
9425 case DIF_OP_PUSHTV:
9426 if (ttop == DIF_DTR_NREGS)
9427 return;
9428
9429 tupregs[ttop++].dttk_size = 0;
9430 break;
9431
9432 case DIF_OP_FLUSHTS:
9433 ttop = 0;
9434 break;
9435
9436 case DIF_OP_POPTS:
9437 if (ttop != 0)
9438 ttop--;
9439 break;
9440 }
9441
9442 sval = 0;
9443 srd = 0;
9444
9445 if (nkeys == 0)
9446 continue;
9447
9448 /*
9449 * We have a dynamic variable allocation; calculate its size.
9450 */
9451 for (ksize = 0, i = 0; i < nkeys; i++)
9452 ksize += P2ROUNDUP(key[i].dttk_size, sizeof (uint64_t));
9453
9454 size = sizeof (dtrace_dynvar_t);
9455 size += sizeof (dtrace_key_t) * (nkeys - 1);
9456 size += ksize;
9457
9458 /*
9459 * Now we need to determine the size of the stored data.
9460 */
9461 id = DIF_INSTR_VAR(instr);
9462
9463 for (i = 0; i < dp->dtdo_varlen; i++) {
9464 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9465
9466 if (v->dtdv_id == id && v->dtdv_scope == scope) {
9467 size += v->dtdv_type.dtdt_size;
9468 break;
9469 }
9470 }
9471
9472 if (i == dp->dtdo_varlen)
9473 return;
9474
9475 /*
9476 * We have the size. If this is larger than the chunk size
9477 * for our dynamic variable state, reset the chunk size.
9478 */
9479 size = P2ROUNDUP(size, sizeof (uint64_t));
9480
ecc0ceb4
A
9481 /*
9482 * Before setting the chunk size, check that we're not going
9483 * to set it to a negative value...
9484 */
9485 if (size > LONG_MAX)
9486 return;
9487
9488 /*
9489 * ...and make certain that we didn't badly overflow.
9490 */
9491 if (size < ksize || size < sizeof (dtrace_dynvar_t))
9492 return;
9493
2d21ac55
A
9494 if (size > vstate->dtvs_dynvars.dtds_chunksize)
9495 vstate->dtvs_dynvars.dtds_chunksize = size;
9496 }
9497}
9498
9499static void
9500dtrace_difo_init(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9501{
b0d623f7
A
9502 int oldsvars, osz, nsz, otlocals, ntlocals;
9503 uint_t i, id;
2d21ac55
A
9504
9505 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9506 ASSERT(dp->dtdo_buf != NULL && dp->dtdo_len != 0);
9507
9508 for (i = 0; i < dp->dtdo_varlen; i++) {
9509 dtrace_difv_t *v = &dp->dtdo_vartab[i];
b0d623f7
A
9510 dtrace_statvar_t *svar;
9511 dtrace_statvar_t ***svarp = NULL;
2d21ac55
A
9512 size_t dsize = 0;
9513 uint8_t scope = v->dtdv_scope;
b0d623f7 9514 int *np = (int *)NULL;
2d21ac55
A
9515
9516 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9517 continue;
9518
9519 id -= DIF_VAR_OTHER_UBASE;
9520
9521 switch (scope) {
9522 case DIFV_SCOPE_THREAD:
b0d623f7 9523 while (id >= (uint_t)(otlocals = vstate->dtvs_ntlocals)) {
2d21ac55
A
9524 dtrace_difv_t *tlocals;
9525
9526 if ((ntlocals = (otlocals << 1)) == 0)
9527 ntlocals = 1;
9528
9529 osz = otlocals * sizeof (dtrace_difv_t);
9530 nsz = ntlocals * sizeof (dtrace_difv_t);
9531
9532 tlocals = kmem_zalloc(nsz, KM_SLEEP);
9533
9534 if (osz != 0) {
9535 bcopy(vstate->dtvs_tlocals,
9536 tlocals, osz);
9537 kmem_free(vstate->dtvs_tlocals, osz);
9538 }
9539
9540 vstate->dtvs_tlocals = tlocals;
9541 vstate->dtvs_ntlocals = ntlocals;
9542 }
9543
9544 vstate->dtvs_tlocals[id] = *v;
9545 continue;
9546
9547 case DIFV_SCOPE_LOCAL:
9548 np = &vstate->dtvs_nlocals;
9549 svarp = &vstate->dtvs_locals;
9550
9551 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
c910b4d9 9552 dsize = (int)NCPU * (v->dtdv_type.dtdt_size +
2d21ac55
A
9553 sizeof (uint64_t));
9554 else
c910b4d9 9555 dsize = (int)NCPU * sizeof (uint64_t);
2d21ac55
A
9556
9557 break;
9558
9559 case DIFV_SCOPE_GLOBAL:
9560 np = &vstate->dtvs_nglobals;
9561 svarp = &vstate->dtvs_globals;
9562
9563 if (v->dtdv_type.dtdt_flags & DIF_TF_BYREF)
9564 dsize = v->dtdv_type.dtdt_size +
9565 sizeof (uint64_t);
9566
9567 break;
9568
9569 default:
9570 ASSERT(0);
9571 }
9572
b0d623f7 9573 while (id >= (uint_t)(oldsvars = *np)) {
2d21ac55
A
9574 dtrace_statvar_t **statics;
9575 int newsvars, oldsize, newsize;
9576
9577 if ((newsvars = (oldsvars << 1)) == 0)
9578 newsvars = 1;
9579
9580 oldsize = oldsvars * sizeof (dtrace_statvar_t *);
9581 newsize = newsvars * sizeof (dtrace_statvar_t *);
9582
9583 statics = kmem_zalloc(newsize, KM_SLEEP);
9584
9585 if (oldsize != 0) {
9586 bcopy(*svarp, statics, oldsize);
9587 kmem_free(*svarp, oldsize);
9588 }
9589
9590 *svarp = statics;
9591 *np = newsvars;
9592 }
9593
9594 if ((svar = (*svarp)[id]) == NULL) {
9595 svar = kmem_zalloc(sizeof (dtrace_statvar_t), KM_SLEEP);
9596 svar->dtsv_var = *v;
9597
9598 if ((svar->dtsv_size = dsize) != 0) {
9599 svar->dtsv_data = (uint64_t)(uintptr_t)
9600 kmem_zalloc(dsize, KM_SLEEP);
9601 }
9602
9603 (*svarp)[id] = svar;
9604 }
9605
9606 svar->dtsv_refcnt++;
9607 }
9608
9609 dtrace_difo_chunksize(dp, vstate);
9610 dtrace_difo_hold(dp);
9611}
9612
9613static dtrace_difo_t *
9614dtrace_difo_duplicate(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9615{
9616 dtrace_difo_t *new;
9617 size_t sz;
9618
9619 ASSERT(dp->dtdo_buf != NULL);
9620 ASSERT(dp->dtdo_refcnt != 0);
9621
9622 new = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
9623
9624 ASSERT(dp->dtdo_buf != NULL);
9625 sz = dp->dtdo_len * sizeof (dif_instr_t);
9626 new->dtdo_buf = kmem_alloc(sz, KM_SLEEP);
9627 bcopy(dp->dtdo_buf, new->dtdo_buf, sz);
9628 new->dtdo_len = dp->dtdo_len;
9629
9630 if (dp->dtdo_strtab != NULL) {
9631 ASSERT(dp->dtdo_strlen != 0);
9632 new->dtdo_strtab = kmem_alloc(dp->dtdo_strlen, KM_SLEEP);
9633 bcopy(dp->dtdo_strtab, new->dtdo_strtab, dp->dtdo_strlen);
9634 new->dtdo_strlen = dp->dtdo_strlen;
9635 }
9636
9637 if (dp->dtdo_inttab != NULL) {
9638 ASSERT(dp->dtdo_intlen != 0);
9639 sz = dp->dtdo_intlen * sizeof (uint64_t);
9640 new->dtdo_inttab = kmem_alloc(sz, KM_SLEEP);
9641 bcopy(dp->dtdo_inttab, new->dtdo_inttab, sz);
9642 new->dtdo_intlen = dp->dtdo_intlen;
9643 }
9644
9645 if (dp->dtdo_vartab != NULL) {
9646 ASSERT(dp->dtdo_varlen != 0);
9647 sz = dp->dtdo_varlen * sizeof (dtrace_difv_t);
9648 new->dtdo_vartab = kmem_alloc(sz, KM_SLEEP);
9649 bcopy(dp->dtdo_vartab, new->dtdo_vartab, sz);
9650 new->dtdo_varlen = dp->dtdo_varlen;
9651 }
9652
9653 dtrace_difo_init(new, vstate);
9654 return (new);
9655}
9656
9657static void
9658dtrace_difo_destroy(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9659{
b0d623f7 9660 uint_t i;
2d21ac55
A
9661
9662 ASSERT(dp->dtdo_refcnt == 0);
9663
9664 for (i = 0; i < dp->dtdo_varlen; i++) {
9665 dtrace_difv_t *v = &dp->dtdo_vartab[i];
b0d623f7
A
9666 dtrace_statvar_t *svar;
9667 dtrace_statvar_t **svarp = NULL;
9668 uint_t id;
9669 uint8_t scope = v->dtdv_scope;
9670 int *np = NULL;
2d21ac55
A
9671
9672 switch (scope) {
9673 case DIFV_SCOPE_THREAD:
9674 continue;
9675
9676 case DIFV_SCOPE_LOCAL:
9677 np = &vstate->dtvs_nlocals;
9678 svarp = vstate->dtvs_locals;
9679 break;
9680
9681 case DIFV_SCOPE_GLOBAL:
9682 np = &vstate->dtvs_nglobals;
9683 svarp = vstate->dtvs_globals;
9684 break;
9685
9686 default:
9687 ASSERT(0);
9688 }
9689
9690 if ((id = v->dtdv_id) < DIF_VAR_OTHER_UBASE)
9691 continue;
9692
9693 id -= DIF_VAR_OTHER_UBASE;
b0d623f7 9694
b0d623f7 9695 ASSERT(id < (uint_t)*np);
2d21ac55
A
9696
9697 svar = svarp[id];
9698 ASSERT(svar != NULL);
9699 ASSERT(svar->dtsv_refcnt > 0);
9700
9701 if (--svar->dtsv_refcnt > 0)
9702 continue;
9703
9704 if (svar->dtsv_size != 0) {
fe8ab488 9705 ASSERT(svar->dtsv_data != 0);
2d21ac55
A
9706 kmem_free((void *)(uintptr_t)svar->dtsv_data,
9707 svar->dtsv_size);
9708 }
9709
9710 kmem_free(svar, sizeof (dtrace_statvar_t));
9711 svarp[id] = NULL;
9712 }
9713
9714 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
9715 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
9716 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
9717 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
9718
9719 kmem_free(dp, sizeof (dtrace_difo_t));
9720}
9721
9722static void
9723dtrace_difo_release(dtrace_difo_t *dp, dtrace_vstate_t *vstate)
9724{
b0d623f7 9725 uint_t i;
2d21ac55
A
9726
9727 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9728 ASSERT(dp->dtdo_refcnt != 0);
9729
9730 for (i = 0; i < dp->dtdo_varlen; i++) {
9731 dtrace_difv_t *v = &dp->dtdo_vartab[i];
9732
9733 if (v->dtdv_id != DIF_VAR_VTIMESTAMP)
9734 continue;
9735
9736 ASSERT(dtrace_vtime_references > 0);
9737 if (--dtrace_vtime_references == 0)
9738 dtrace_vtime_disable();
9739 }
9740
9741 if (--dp->dtdo_refcnt == 0)
9742 dtrace_difo_destroy(dp, vstate);
9743}
9744
9745/*
9746 * DTrace Format Functions
9747 */
9748static uint16_t
9749dtrace_format_add(dtrace_state_t *state, char *str)
9750{
9751 char *fmt, **new;
9752 uint16_t ndx, len = strlen(str) + 1;
9753
9754 fmt = kmem_zalloc(len, KM_SLEEP);
9755 bcopy(str, fmt, len);
9756
9757 for (ndx = 0; ndx < state->dts_nformats; ndx++) {
9758 if (state->dts_formats[ndx] == NULL) {
9759 state->dts_formats[ndx] = fmt;
9760 return (ndx + 1);
9761 }
9762 }
9763
9764 if (state->dts_nformats == USHRT_MAX) {
9765 /*
9766 * This is only likely if a denial-of-service attack is being
9767 * attempted. As such, it's okay to fail silently here.
9768 */
9769 kmem_free(fmt, len);
9770 return (0);
9771 }
9772
9773 /*
9774 * For simplicity, we always resize the formats array to be exactly the
9775 * number of formats.
9776 */
9777 ndx = state->dts_nformats++;
9778 new = kmem_alloc((ndx + 1) * sizeof (char *), KM_SLEEP);
9779
9780 if (state->dts_formats != NULL) {
9781 ASSERT(ndx != 0);
9782 bcopy(state->dts_formats, new, ndx * sizeof (char *));
9783 kmem_free(state->dts_formats, ndx * sizeof (char *));
9784 }
9785
9786 state->dts_formats = new;
9787 state->dts_formats[ndx] = fmt;
9788
9789 return (ndx + 1);
9790}
9791
9792static void
9793dtrace_format_remove(dtrace_state_t *state, uint16_t format)
9794{
9795 char *fmt;
9796
9797 ASSERT(state->dts_formats != NULL);
9798 ASSERT(format <= state->dts_nformats);
9799 ASSERT(state->dts_formats[format - 1] != NULL);
9800
9801 fmt = state->dts_formats[format - 1];
9802 kmem_free(fmt, strlen(fmt) + 1);
9803 state->dts_formats[format - 1] = NULL;
9804}
9805
9806static void
9807dtrace_format_destroy(dtrace_state_t *state)
9808{
9809 int i;
9810
9811 if (state->dts_nformats == 0) {
9812 ASSERT(state->dts_formats == NULL);
9813 return;
9814 }
9815
9816 ASSERT(state->dts_formats != NULL);
9817
9818 for (i = 0; i < state->dts_nformats; i++) {
9819 char *fmt = state->dts_formats[i];
9820
9821 if (fmt == NULL)
9822 continue;
9823
9824 kmem_free(fmt, strlen(fmt) + 1);
9825 }
9826
9827 kmem_free(state->dts_formats, state->dts_nformats * sizeof (char *));
9828 state->dts_nformats = 0;
9829 state->dts_formats = NULL;
9830}
9831
9832/*
9833 * DTrace Predicate Functions
9834 */
9835static dtrace_predicate_t *
9836dtrace_predicate_create(dtrace_difo_t *dp)
9837{
9838 dtrace_predicate_t *pred;
9839
9840 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9841 ASSERT(dp->dtdo_refcnt != 0);
9842
9843 pred = kmem_zalloc(sizeof (dtrace_predicate_t), KM_SLEEP);
9844 pred->dtp_difo = dp;
9845 pred->dtp_refcnt = 1;
9846
9847 if (!dtrace_difo_cacheable(dp))
9848 return (pred);
9849
9850 if (dtrace_predcache_id == DTRACE_CACHEIDNONE) {
9851 /*
9852 * This is only theoretically possible -- we have had 2^32
9853 * cacheable predicates on this machine. We cannot allow any
9854 * more predicates to become cacheable: as unlikely as it is,
9855 * there may be a thread caching a (now stale) predicate cache
9856 * ID. (N.B.: the temptation is being successfully resisted to
9857 * have this cmn_err() "Holy shit -- we executed this code!")
9858 */
9859 return (pred);
9860 }
9861
9862 pred->dtp_cacheid = dtrace_predcache_id++;
9863
9864 return (pred);
9865}
9866
9867static void
9868dtrace_predicate_hold(dtrace_predicate_t *pred)
9869{
9870 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9871 ASSERT(pred->dtp_difo != NULL && pred->dtp_difo->dtdo_refcnt != 0);
9872 ASSERT(pred->dtp_refcnt > 0);
9873
9874 pred->dtp_refcnt++;
9875}
9876
9877static void
9878dtrace_predicate_release(dtrace_predicate_t *pred, dtrace_vstate_t *vstate)
9879{
9880 dtrace_difo_t *dp = pred->dtp_difo;
b0d623f7 9881#pragma unused(dp) /* __APPLE__ */
2d21ac55
A
9882
9883 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9884 ASSERT(dp != NULL && dp->dtdo_refcnt != 0);
9885 ASSERT(pred->dtp_refcnt > 0);
9886
9887 if (--pred->dtp_refcnt == 0) {
9888 dtrace_difo_release(pred->dtp_difo, vstate);
9889 kmem_free(pred, sizeof (dtrace_predicate_t));
9890 }
9891}
9892
9893/*
9894 * DTrace Action Description Functions
9895 */
9896static dtrace_actdesc_t *
9897dtrace_actdesc_create(dtrace_actkind_t kind, uint32_t ntuple,
9898 uint64_t uarg, uint64_t arg)
9899{
9900 dtrace_actdesc_t *act;
9901
fe8ab488
A
9902 ASSERT(!DTRACEACT_ISPRINTFLIKE(kind) || (arg != 0 &&
9903 arg >= KERNELBASE) || (arg == 0 && kind == DTRACEACT_PRINTA));
2d21ac55
A
9904
9905 act = kmem_zalloc(sizeof (dtrace_actdesc_t), KM_SLEEP);
9906 act->dtad_kind = kind;
9907 act->dtad_ntuple = ntuple;
9908 act->dtad_uarg = uarg;
9909 act->dtad_arg = arg;
9910 act->dtad_refcnt = 1;
9911
9912 return (act);
9913}
9914
9915static void
9916dtrace_actdesc_hold(dtrace_actdesc_t *act)
9917{
9918 ASSERT(act->dtad_refcnt >= 1);
9919 act->dtad_refcnt++;
9920}
9921
9922static void
9923dtrace_actdesc_release(dtrace_actdesc_t *act, dtrace_vstate_t *vstate)
9924{
9925 dtrace_actkind_t kind = act->dtad_kind;
9926 dtrace_difo_t *dp;
9927
9928 ASSERT(act->dtad_refcnt >= 1);
9929
9930 if (--act->dtad_refcnt != 0)
9931 return;
9932
9933 if ((dp = act->dtad_difo) != NULL)
9934 dtrace_difo_release(dp, vstate);
9935
9936 if (DTRACEACT_ISPRINTFLIKE(kind)) {
9937 char *str = (char *)(uintptr_t)act->dtad_arg;
9938
b0d623f7
A
9939 ASSERT((str != NULL && (uintptr_t)str >= KERNELBASE) ||
9940 (str == NULL && act->dtad_kind == DTRACEACT_PRINTA));
2d21ac55
A
9941
9942 if (str != NULL)
9943 kmem_free(str, strlen(str) + 1);
9944 }
9945
9946 kmem_free(act, sizeof (dtrace_actdesc_t));
9947}
9948
9949/*
9950 * DTrace ECB Functions
9951 */
9952static dtrace_ecb_t *
9953dtrace_ecb_add(dtrace_state_t *state, dtrace_probe_t *probe)
9954{
9955 dtrace_ecb_t *ecb;
9956 dtrace_epid_t epid;
9957
9958 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
9959
9960 ecb = kmem_zalloc(sizeof (dtrace_ecb_t), KM_SLEEP);
9961 ecb->dte_predicate = NULL;
9962 ecb->dte_probe = probe;
9963
9964 /*
9965 * The default size is the size of the default action: recording
04b8595b 9966 * the header.
2d21ac55 9967 */
04b8595b 9968 ecb->dte_size = ecb->dte_needed = sizeof (dtrace_rechdr_t);
2d21ac55
A
9969 ecb->dte_alignment = sizeof (dtrace_epid_t);
9970
9971 epid = state->dts_epid++;
9972
b0d623f7 9973 if (epid - 1 >= (dtrace_epid_t)state->dts_necbs) {
2d21ac55
A
9974 dtrace_ecb_t **oecbs = state->dts_ecbs, **ecbs;
9975 int necbs = state->dts_necbs << 1;
9976
b0d623f7 9977 ASSERT(epid == (dtrace_epid_t)state->dts_necbs + 1);
2d21ac55
A
9978
9979 if (necbs == 0) {
9980 ASSERT(oecbs == NULL);
9981 necbs = 1;
9982 }
9983
9984 ecbs = kmem_zalloc(necbs * sizeof (*ecbs), KM_SLEEP);
9985
9986 if (oecbs != NULL)
9987 bcopy(oecbs, ecbs, state->dts_necbs * sizeof (*ecbs));
9988
9989 dtrace_membar_producer();
9990 state->dts_ecbs = ecbs;
9991
9992 if (oecbs != NULL) {
9993 /*
9994 * If this state is active, we must dtrace_sync()
9995 * before we can free the old dts_ecbs array: we're
9996 * coming in hot, and there may be active ring
9997 * buffer processing (which indexes into the dts_ecbs
9998 * array) on another CPU.
9999 */
10000 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
10001 dtrace_sync();
10002
10003 kmem_free(oecbs, state->dts_necbs * sizeof (*ecbs));
10004 }
10005
10006 dtrace_membar_producer();
10007 state->dts_necbs = necbs;
10008 }
10009
10010 ecb->dte_state = state;
10011
10012 ASSERT(state->dts_ecbs[epid - 1] == NULL);
10013 dtrace_membar_producer();
10014 state->dts_ecbs[(ecb->dte_epid = epid) - 1] = ecb;
10015
10016 return (ecb);
10017}
10018
6d2010ae 10019static int
2d21ac55
A
10020dtrace_ecb_enable(dtrace_ecb_t *ecb)
10021{
10022 dtrace_probe_t *probe = ecb->dte_probe;
10023
10024 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
10025 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10026 ASSERT(ecb->dte_next == NULL);
10027
10028 if (probe == NULL) {
10029 /*
10030 * This is the NULL probe -- there's nothing to do.
10031 */
6d2010ae 10032 return(0);
2d21ac55
A
10033 }
10034
fe8ab488 10035 probe->dtpr_provider->dtpv_ecb_count++;
2d21ac55
A
10036 if (probe->dtpr_ecb == NULL) {
10037 dtrace_provider_t *prov = probe->dtpr_provider;
10038
10039 /*
10040 * We're the first ECB on this probe.
10041 */
10042 probe->dtpr_ecb = probe->dtpr_ecb_last = ecb;
10043
10044 if (ecb->dte_predicate != NULL)
10045 probe->dtpr_predcache = ecb->dte_predicate->dtp_cacheid;
10046
6d2010ae
A
10047 return (prov->dtpv_pops.dtps_enable(prov->dtpv_arg,
10048 probe->dtpr_id, probe->dtpr_arg));
2d21ac55
A
10049 } else {
10050 /*
10051 * This probe is already active. Swing the last pointer to
10052 * point to the new ECB, and issue a dtrace_sync() to assure
10053 * that all CPUs have seen the change.
10054 */
10055 ASSERT(probe->dtpr_ecb_last != NULL);
10056 probe->dtpr_ecb_last->dte_next = ecb;
10057 probe->dtpr_ecb_last = ecb;
10058 probe->dtpr_predcache = 0;
10059
10060 dtrace_sync();
6d2010ae 10061 return(0);
2d21ac55
A
10062 }
10063}
10064
39037602 10065static int
2d21ac55
A
10066dtrace_ecb_resize(dtrace_ecb_t *ecb)
10067{
2d21ac55 10068 dtrace_action_t *act;
04b8595b 10069 uint32_t curneeded = UINT32_MAX;
2d21ac55 10070 uint32_t aggbase = UINT32_MAX;
2d21ac55
A
10071
10072 /*
04b8595b
A
10073 * If we record anything, we always record the dtrace_rechdr_t. (And
10074 * we always record it first.)
2d21ac55 10075 */
04b8595b
A
10076 ecb->dte_size = sizeof (dtrace_rechdr_t);
10077 ecb->dte_alignment = sizeof (dtrace_epid_t);
2d21ac55
A
10078
10079 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
10080 dtrace_recdesc_t *rec = &act->dta_rec;
04b8595b 10081 ASSERT(rec->dtrd_size > 0 || rec->dtrd_alignment == 1);
2d21ac55 10082
04b8595b 10083 ecb->dte_alignment = MAX(ecb->dte_alignment, rec->dtrd_alignment);
2d21ac55
A
10084
10085 if (DTRACEACT_ISAGG(act->dta_kind)) {
10086 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
2d21ac55 10087
04b8595b
A
10088 ASSERT(rec->dtrd_size != 0);
10089 ASSERT(agg->dtag_first != NULL);
10090 ASSERT(act->dta_prev->dta_intuple);
2d21ac55 10091 ASSERT(aggbase != UINT32_MAX);
04b8595b 10092 ASSERT(curneeded != UINT32_MAX);
2d21ac55
A
10093
10094 agg->dtag_base = aggbase;
04b8595b
A
10095 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
10096 rec->dtrd_offset = curneeded;
39037602
A
10097 if (curneeded + rec->dtrd_size < curneeded)
10098 return (EINVAL);
04b8595b
A
10099 curneeded += rec->dtrd_size;
10100 ecb->dte_needed = MAX(ecb->dte_needed, curneeded);
2d21ac55 10101
04b8595b
A
10102 aggbase = UINT32_MAX;
10103 curneeded = UINT32_MAX;
10104 } else if (act->dta_intuple) {
10105 if (curneeded == UINT32_MAX) {
10106 /*
10107 * This is the first record in a tuple. Align
10108 * curneeded to be at offset 4 in an 8-byte
10109 * aligned block.
10110 */
10111 ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple);
10112 ASSERT(aggbase == UINT32_MAX);
10113
10114 curneeded = P2PHASEUP(ecb->dte_size,
10115 sizeof (uint64_t), sizeof (dtrace_aggid_t));
10116
10117 aggbase = curneeded - sizeof (dtrace_aggid_t);
10118 ASSERT(IS_P2ALIGNED(aggbase,
10119 sizeof (uint64_t)));
2d21ac55 10120 }
2d21ac55 10121
04b8595b
A
10122 curneeded = P2ROUNDUP(curneeded, rec->dtrd_alignment);
10123 rec->dtrd_offset = curneeded;
10124 curneeded += rec->dtrd_size;
39037602
A
10125 if (curneeded + rec->dtrd_size < curneeded)
10126 return (EINVAL);
04b8595b
A
10127 } else {
10128 /* tuples must be followed by an aggregation */
10129 ASSERT(act->dta_prev == NULL || !act->dta_prev->dta_intuple);
10130 ecb->dte_size = P2ROUNDUP(ecb->dte_size, rec->dtrd_alignment);
10131 rec->dtrd_offset = ecb->dte_size;
39037602
A
10132 if (ecb->dte_size + rec->dtrd_size < ecb->dte_size)
10133 return (EINVAL);
04b8595b
A
10134 ecb->dte_size += rec->dtrd_size;
10135 ecb->dte_needed = MAX(ecb->dte_needed, ecb->dte_size);
2d21ac55 10136 }
2d21ac55
A
10137 }
10138
10139 if ((act = ecb->dte_action) != NULL &&
10140 !(act->dta_kind == DTRACEACT_SPECULATE && act->dta_next == NULL) &&
04b8595b 10141 ecb->dte_size == sizeof (dtrace_rechdr_t)) {
2d21ac55 10142 /*
04b8595b 10143 * If the size is still sizeof (dtrace_rechdr_t), then all
2d21ac55
A
10144 * actions store no data; set the size to 0.
10145 */
2d21ac55 10146 ecb->dte_size = 0;
2d21ac55
A
10147 }
10148
04b8595b
A
10149 ecb->dte_size = P2ROUNDUP(ecb->dte_size, sizeof (dtrace_epid_t));
10150 ecb->dte_needed = P2ROUNDUP(ecb->dte_needed, (sizeof (dtrace_epid_t)));
10151 ecb->dte_state->dts_needed = MAX(ecb->dte_state->dts_needed, ecb->dte_needed);
39037602 10152 return (0);
2d21ac55
A
10153}
10154
10155static dtrace_action_t *
10156dtrace_ecb_aggregation_create(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
10157{
10158 dtrace_aggregation_t *agg;
10159 size_t size = sizeof (uint64_t);
10160 int ntuple = desc->dtad_ntuple;
10161 dtrace_action_t *act;
10162 dtrace_recdesc_t *frec;
10163 dtrace_aggid_t aggid;
10164 dtrace_state_t *state = ecb->dte_state;
10165
10166 agg = kmem_zalloc(sizeof (dtrace_aggregation_t), KM_SLEEP);
10167 agg->dtag_ecb = ecb;
10168
10169 ASSERT(DTRACEACT_ISAGG(desc->dtad_kind));
10170
10171 switch (desc->dtad_kind) {
10172 case DTRACEAGG_MIN:
b0d623f7 10173 agg->dtag_initial = INT64_MAX;
2d21ac55
A
10174 agg->dtag_aggregate = dtrace_aggregate_min;
10175 break;
10176
10177 case DTRACEAGG_MAX:
b0d623f7 10178 agg->dtag_initial = INT64_MIN;
2d21ac55
A
10179 agg->dtag_aggregate = dtrace_aggregate_max;
10180 break;
10181
10182 case DTRACEAGG_COUNT:
10183 agg->dtag_aggregate = dtrace_aggregate_count;
10184 break;
10185
10186 case DTRACEAGG_QUANTIZE:
10187 agg->dtag_aggregate = dtrace_aggregate_quantize;
10188 size = (((sizeof (uint64_t) * NBBY) - 1) * 2 + 1) *
10189 sizeof (uint64_t);
10190 break;
10191
10192 case DTRACEAGG_LQUANTIZE: {
10193 uint16_t step = DTRACE_LQUANTIZE_STEP(desc->dtad_arg);
10194 uint16_t levels = DTRACE_LQUANTIZE_LEVELS(desc->dtad_arg);
10195
10196 agg->dtag_initial = desc->dtad_arg;
10197 agg->dtag_aggregate = dtrace_aggregate_lquantize;
10198
10199 if (step == 0 || levels == 0)
10200 goto err;
10201
10202 size = levels * sizeof (uint64_t) + 3 * sizeof (uint64_t);
10203 break;
10204 }
10205
39236c6e
A
10206 case DTRACEAGG_LLQUANTIZE: {
10207 uint16_t factor = DTRACE_LLQUANTIZE_FACTOR(desc->dtad_arg);
10208 uint16_t low = DTRACE_LLQUANTIZE_LOW(desc->dtad_arg);
10209 uint16_t high = DTRACE_LLQUANTIZE_HIGH(desc->dtad_arg);
15129b1c 10210 uint16_t nsteps = DTRACE_LLQUANTIZE_NSTEP(desc->dtad_arg);
39236c6e
A
10211 int64_t v;
10212
10213 agg->dtag_initial = desc->dtad_arg;
10214 agg->dtag_aggregate = dtrace_aggregate_llquantize;
10215
10216 if (factor < 2 || low >= high || nsteps < factor)
10217 goto err;
10218
10219 /*
10220 * Now check that the number of steps evenly divides a power
10221 * of the factor. (This assures both integer bucket size and
10222 * linearity within each magnitude.)
10223 */
10224 for (v = factor; v < nsteps; v *= factor)
10225 continue;
10226
10227 if ((v % nsteps) || (nsteps % factor))
10228 goto err;
10229
10230 size = (dtrace_aggregate_llquantize_bucket(factor, low, high, nsteps, INT64_MAX) + 2) * sizeof (uint64_t);
10231 break;
10232 }
10233
2d21ac55
A
10234 case DTRACEAGG_AVG:
10235 agg->dtag_aggregate = dtrace_aggregate_avg;
10236 size = sizeof (uint64_t) * 2;
10237 break;
10238
b0d623f7
A
10239 case DTRACEAGG_STDDEV:
10240 agg->dtag_aggregate = dtrace_aggregate_stddev;
10241 size = sizeof (uint64_t) * 4;
10242 break;
10243
2d21ac55
A
10244 case DTRACEAGG_SUM:
10245 agg->dtag_aggregate = dtrace_aggregate_sum;
10246 break;
10247
10248 default:
10249 goto err;
10250 }
10251
10252 agg->dtag_action.dta_rec.dtrd_size = size;
10253
10254 if (ntuple == 0)
10255 goto err;
10256
10257 /*
10258 * We must make sure that we have enough actions for the n-tuple.
10259 */
10260 for (act = ecb->dte_action_last; act != NULL; act = act->dta_prev) {
10261 if (DTRACEACT_ISAGG(act->dta_kind))
10262 break;
10263
10264 if (--ntuple == 0) {
10265 /*
10266 * This is the action with which our n-tuple begins.
10267 */
10268 agg->dtag_first = act;
10269 goto success;
10270 }
10271 }
10272
10273 /*
10274 * This n-tuple is short by ntuple elements. Return failure.
10275 */
10276 ASSERT(ntuple != 0);
10277err:
10278 kmem_free(agg, sizeof (dtrace_aggregation_t));
10279 return (NULL);
10280
10281success:
10282 /*
10283 * If the last action in the tuple has a size of zero, it's actually
10284 * an expression argument for the aggregating action.
10285 */
10286 ASSERT(ecb->dte_action_last != NULL);
10287 act = ecb->dte_action_last;
10288
10289 if (act->dta_kind == DTRACEACT_DIFEXPR) {
10290 ASSERT(act->dta_difo != NULL);
10291
10292 if (act->dta_difo->dtdo_rtype.dtdt_size == 0)
10293 agg->dtag_hasarg = 1;
10294 }
10295
10296 /*
10297 * We need to allocate an id for this aggregation.
10298 */
10299 aggid = (dtrace_aggid_t)(uintptr_t)vmem_alloc(state->dts_aggid_arena, 1,
10300 VM_BESTFIT | VM_SLEEP);
10301
b0d623f7 10302 if (aggid - 1 >= (dtrace_aggid_t)state->dts_naggregations) {
2d21ac55
A
10303 dtrace_aggregation_t **oaggs = state->dts_aggregations;
10304 dtrace_aggregation_t **aggs;
10305 int naggs = state->dts_naggregations << 1;
10306 int onaggs = state->dts_naggregations;
10307
b0d623f7 10308 ASSERT(aggid == (dtrace_aggid_t)state->dts_naggregations + 1);
2d21ac55
A
10309
10310 if (naggs == 0) {
10311 ASSERT(oaggs == NULL);
10312 naggs = 1;
10313 }
10314
10315 aggs = kmem_zalloc(naggs * sizeof (*aggs), KM_SLEEP);
10316
10317 if (oaggs != NULL) {
10318 bcopy(oaggs, aggs, onaggs * sizeof (*aggs));
10319 kmem_free(oaggs, onaggs * sizeof (*aggs));
10320 }
10321
10322 state->dts_aggregations = aggs;
10323 state->dts_naggregations = naggs;
10324 }
10325
10326 ASSERT(state->dts_aggregations[aggid - 1] == NULL);
10327 state->dts_aggregations[(agg->dtag_id = aggid) - 1] = agg;
10328
10329 frec = &agg->dtag_first->dta_rec;
10330 if (frec->dtrd_alignment < sizeof (dtrace_aggid_t))
10331 frec->dtrd_alignment = sizeof (dtrace_aggid_t);
10332
10333 for (act = agg->dtag_first; act != NULL; act = act->dta_next) {
10334 ASSERT(!act->dta_intuple);
10335 act->dta_intuple = 1;
10336 }
10337
10338 return (&agg->dtag_action);
10339}
10340
10341static void
10342dtrace_ecb_aggregation_destroy(dtrace_ecb_t *ecb, dtrace_action_t *act)
10343{
10344 dtrace_aggregation_t *agg = (dtrace_aggregation_t *)act;
10345 dtrace_state_t *state = ecb->dte_state;
10346 dtrace_aggid_t aggid = agg->dtag_id;
10347
10348 ASSERT(DTRACEACT_ISAGG(act->dta_kind));
10349 vmem_free(state->dts_aggid_arena, (void *)(uintptr_t)aggid, 1);
10350
10351 ASSERT(state->dts_aggregations[aggid - 1] == agg);
10352 state->dts_aggregations[aggid - 1] = NULL;
10353
10354 kmem_free(agg, sizeof (dtrace_aggregation_t));
10355}
10356
10357static int
10358dtrace_ecb_action_add(dtrace_ecb_t *ecb, dtrace_actdesc_t *desc)
10359{
10360 dtrace_action_t *action, *last;
10361 dtrace_difo_t *dp = desc->dtad_difo;
10362 uint32_t size = 0, align = sizeof (uint8_t), mask;
10363 uint16_t format = 0;
10364 dtrace_recdesc_t *rec;
10365 dtrace_state_t *state = ecb->dte_state;
b0d623f7
A
10366 dtrace_optval_t *opt = state->dts_options;
10367 dtrace_optval_t nframes=0, strsize;
2d21ac55
A
10368 uint64_t arg = desc->dtad_arg;
10369
10370 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10371 ASSERT(ecb->dte_action == NULL || ecb->dte_action->dta_refcnt == 1);
10372
10373 if (DTRACEACT_ISAGG(desc->dtad_kind)) {
10374 /*
10375 * If this is an aggregating action, there must be neither
10376 * a speculate nor a commit on the action chain.
10377 */
10378 dtrace_action_t *act;
10379
10380 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
10381 if (act->dta_kind == DTRACEACT_COMMIT)
10382 return (EINVAL);
10383
10384 if (act->dta_kind == DTRACEACT_SPECULATE)
10385 return (EINVAL);
10386 }
10387
10388 action = dtrace_ecb_aggregation_create(ecb, desc);
10389
10390 if (action == NULL)
10391 return (EINVAL);
10392 } else {
10393 if (DTRACEACT_ISDESTRUCTIVE(desc->dtad_kind) ||
10394 (desc->dtad_kind == DTRACEACT_DIFEXPR &&
10395 dp != NULL && dp->dtdo_destructive)) {
10396 state->dts_destructive = 1;
10397 }
10398
10399 switch (desc->dtad_kind) {
10400 case DTRACEACT_PRINTF:
10401 case DTRACEACT_PRINTA:
10402 case DTRACEACT_SYSTEM:
10403 case DTRACEACT_FREOPEN:
3e170ce0 10404 case DTRACEACT_DIFEXPR:
2d21ac55
A
10405 /*
10406 * We know that our arg is a string -- turn it into a
10407 * format.
10408 */
fe8ab488 10409 if (arg == 0) {
3e170ce0
A
10410 ASSERT(desc->dtad_kind == DTRACEACT_PRINTA ||
10411 desc->dtad_kind == DTRACEACT_DIFEXPR);
2d21ac55
A
10412 format = 0;
10413 } else {
fe8ab488 10414 ASSERT(arg != 0);
b0d623f7 10415 ASSERT(arg > KERNELBASE);
2d21ac55
A
10416 format = dtrace_format_add(state,
10417 (char *)(uintptr_t)arg);
10418 }
10419
10420 /*FALLTHROUGH*/
10421 case DTRACEACT_LIBACT:
fe8ab488
A
10422 case DTRACEACT_TRACEMEM:
10423 case DTRACEACT_TRACEMEM_DYNSIZE:
10424 case DTRACEACT_APPLEBINARY: /* __APPLE__ */
2d21ac55
A
10425 if (dp == NULL)
10426 return (EINVAL);
10427
10428 if ((size = dp->dtdo_rtype.dtdt_size) != 0)
10429 break;
10430
10431 if (dp->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) {
10432 if (!(dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10433 return (EINVAL);
10434
10435 size = opt[DTRACEOPT_STRSIZE];
10436 }
10437
10438 break;
10439
10440 case DTRACEACT_STACK:
10441 if ((nframes = arg) == 0) {
10442 nframes = opt[DTRACEOPT_STACKFRAMES];
10443 ASSERT(nframes > 0);
10444 arg = nframes;
10445 }
10446
10447 size = nframes * sizeof (pc_t);
10448 break;
10449
10450 case DTRACEACT_JSTACK:
10451 if ((strsize = DTRACE_USTACK_STRSIZE(arg)) == 0)
10452 strsize = opt[DTRACEOPT_JSTACKSTRSIZE];
10453
10454 if ((nframes = DTRACE_USTACK_NFRAMES(arg)) == 0)
10455 nframes = opt[DTRACEOPT_JSTACKFRAMES];
10456
10457 arg = DTRACE_USTACK_ARG(nframes, strsize);
10458
10459 /*FALLTHROUGH*/
10460 case DTRACEACT_USTACK:
10461 if (desc->dtad_kind != DTRACEACT_JSTACK &&
10462 (nframes = DTRACE_USTACK_NFRAMES(arg)) == 0) {
10463 strsize = DTRACE_USTACK_STRSIZE(arg);
10464 nframes = opt[DTRACEOPT_USTACKFRAMES];
10465 ASSERT(nframes > 0);
10466 arg = DTRACE_USTACK_ARG(nframes, strsize);
10467 }
10468
10469 /*
10470 * Save a slot for the pid.
10471 */
10472 size = (nframes + 1) * sizeof (uint64_t);
10473 size += DTRACE_USTACK_STRSIZE(arg);
10474 size = P2ROUNDUP(size, (uint32_t)(sizeof (uintptr_t)));
10475
10476 break;
10477
10478 case DTRACEACT_SYM:
10479 case DTRACEACT_MOD:
10480 if (dp == NULL || ((size = dp->dtdo_rtype.dtdt_size) !=
10481 sizeof (uint64_t)) ||
10482 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10483 return (EINVAL);
10484 break;
10485
10486 case DTRACEACT_USYM:
10487 case DTRACEACT_UMOD:
10488 case DTRACEACT_UADDR:
10489 if (dp == NULL ||
10490 (dp->dtdo_rtype.dtdt_size != sizeof (uint64_t)) ||
10491 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10492 return (EINVAL);
10493
10494 /*
10495 * We have a slot for the pid, plus a slot for the
10496 * argument. To keep things simple (aligned with
10497 * bitness-neutral sizing), we store each as a 64-bit
10498 * quantity.
10499 */
10500 size = 2 * sizeof (uint64_t);
10501 break;
10502
10503 case DTRACEACT_STOP:
10504 case DTRACEACT_BREAKPOINT:
10505 case DTRACEACT_PANIC:
10506 break;
10507
10508 case DTRACEACT_CHILL:
10509 case DTRACEACT_DISCARD:
10510 case DTRACEACT_RAISE:
fe8ab488 10511 case DTRACEACT_PIDRESUME: /* __APPLE__ */
2d21ac55
A
10512 if (dp == NULL)
10513 return (EINVAL);
10514 break;
10515
10516 case DTRACEACT_EXIT:
10517 if (dp == NULL ||
10518 (size = dp->dtdo_rtype.dtdt_size) != sizeof (int) ||
10519 (dp->dtdo_rtype.dtdt_flags & DIF_TF_BYREF))
10520 return (EINVAL);
10521 break;
10522
10523 case DTRACEACT_SPECULATE:
04b8595b 10524 if (ecb->dte_size > sizeof (dtrace_rechdr_t))
2d21ac55
A
10525 return (EINVAL);
10526
10527 if (dp == NULL)
10528 return (EINVAL);
10529
10530 state->dts_speculates = 1;
10531 break;
10532
10533 case DTRACEACT_COMMIT: {
10534 dtrace_action_t *act = ecb->dte_action;
10535
10536 for (; act != NULL; act = act->dta_next) {
10537 if (act->dta_kind == DTRACEACT_COMMIT)
10538 return (EINVAL);
10539 }
10540
10541 if (dp == NULL)
10542 return (EINVAL);
10543 break;
10544 }
10545
10546 default:
10547 return (EINVAL);
10548 }
10549
10550 if (size != 0 || desc->dtad_kind == DTRACEACT_SPECULATE) {
10551 /*
10552 * If this is a data-storing action or a speculate,
10553 * we must be sure that there isn't a commit on the
10554 * action chain.
10555 */
10556 dtrace_action_t *act = ecb->dte_action;
10557
10558 for (; act != NULL; act = act->dta_next) {
10559 if (act->dta_kind == DTRACEACT_COMMIT)
10560 return (EINVAL);
10561 }
10562 }
10563
10564 action = kmem_zalloc(sizeof (dtrace_action_t), KM_SLEEP);
10565 action->dta_rec.dtrd_size = size;
10566 }
10567
10568 action->dta_refcnt = 1;
10569 rec = &action->dta_rec;
10570 size = rec->dtrd_size;
10571
10572 for (mask = sizeof (uint64_t) - 1; size != 0 && mask > 0; mask >>= 1) {
10573 if (!(size & mask)) {
10574 align = mask + 1;
10575 break;
10576 }
10577 }
10578
10579 action->dta_kind = desc->dtad_kind;
10580
10581 if ((action->dta_difo = dp) != NULL)
10582 dtrace_difo_hold(dp);
10583
10584 rec->dtrd_action = action->dta_kind;
10585 rec->dtrd_arg = arg;
10586 rec->dtrd_uarg = desc->dtad_uarg;
10587 rec->dtrd_alignment = (uint16_t)align;
10588 rec->dtrd_format = format;
10589
10590 if ((last = ecb->dte_action_last) != NULL) {
10591 ASSERT(ecb->dte_action != NULL);
10592 action->dta_prev = last;
10593 last->dta_next = action;
10594 } else {
10595 ASSERT(ecb->dte_action == NULL);
10596 ecb->dte_action = action;
10597 }
10598
10599 ecb->dte_action_last = action;
10600
10601 return (0);
10602}
10603
10604static void
10605dtrace_ecb_action_remove(dtrace_ecb_t *ecb)
10606{
10607 dtrace_action_t *act = ecb->dte_action, *next;
10608 dtrace_vstate_t *vstate = &ecb->dte_state->dts_vstate;
10609 dtrace_difo_t *dp;
10610 uint16_t format;
10611
10612 if (act != NULL && act->dta_refcnt > 1) {
10613 ASSERT(act->dta_next == NULL || act->dta_next->dta_refcnt == 1);
10614 act->dta_refcnt--;
10615 } else {
10616 for (; act != NULL; act = next) {
10617 next = act->dta_next;
10618 ASSERT(next != NULL || act == ecb->dte_action_last);
10619 ASSERT(act->dta_refcnt == 1);
10620
10621 if ((format = act->dta_rec.dtrd_format) != 0)
10622 dtrace_format_remove(ecb->dte_state, format);
10623
10624 if ((dp = act->dta_difo) != NULL)
10625 dtrace_difo_release(dp, vstate);
10626
10627 if (DTRACEACT_ISAGG(act->dta_kind)) {
10628 dtrace_ecb_aggregation_destroy(ecb, act);
10629 } else {
10630 kmem_free(act, sizeof (dtrace_action_t));
10631 }
10632 }
10633 }
10634
10635 ecb->dte_action = NULL;
10636 ecb->dte_action_last = NULL;
04b8595b 10637 ecb->dte_size = 0;
2d21ac55
A
10638}
10639
10640static void
10641dtrace_ecb_disable(dtrace_ecb_t *ecb)
10642{
10643 /*
10644 * We disable the ECB by removing it from its probe.
10645 */
10646 dtrace_ecb_t *pecb, *prev = NULL;
10647 dtrace_probe_t *probe = ecb->dte_probe;
10648
10649 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10650
10651 if (probe == NULL) {
10652 /*
10653 * This is the NULL probe; there is nothing to disable.
10654 */
10655 return;
10656 }
10657
10658 for (pecb = probe->dtpr_ecb; pecb != NULL; pecb = pecb->dte_next) {
10659 if (pecb == ecb)
10660 break;
10661 prev = pecb;
10662 }
10663
10664 ASSERT(pecb != NULL);
10665
10666 if (prev == NULL) {
10667 probe->dtpr_ecb = ecb->dte_next;
10668 } else {
10669 prev->dte_next = ecb->dte_next;
10670 }
10671
10672 if (ecb == probe->dtpr_ecb_last) {
10673 ASSERT(ecb->dte_next == NULL);
10674 probe->dtpr_ecb_last = prev;
10675 }
10676
fe8ab488 10677 probe->dtpr_provider->dtpv_ecb_count--;
2d21ac55
A
10678 /*
10679 * The ECB has been disconnected from the probe; now sync to assure
10680 * that all CPUs have seen the change before returning.
10681 */
10682 dtrace_sync();
10683
10684 if (probe->dtpr_ecb == NULL) {
10685 /*
10686 * That was the last ECB on the probe; clear the predicate
10687 * cache ID for the probe, disable it and sync one more time
10688 * to assure that we'll never hit it again.
10689 */
10690 dtrace_provider_t *prov = probe->dtpr_provider;
10691
10692 ASSERT(ecb->dte_next == NULL);
10693 ASSERT(probe->dtpr_ecb_last == NULL);
10694 probe->dtpr_predcache = DTRACE_CACHEIDNONE;
10695 prov->dtpv_pops.dtps_disable(prov->dtpv_arg,
10696 probe->dtpr_id, probe->dtpr_arg);
10697 dtrace_sync();
10698 } else {
10699 /*
10700 * There is at least one ECB remaining on the probe. If there
10701 * is _exactly_ one, set the probe's predicate cache ID to be
10702 * the predicate cache ID of the remaining ECB.
10703 */
10704 ASSERT(probe->dtpr_ecb_last != NULL);
10705 ASSERT(probe->dtpr_predcache == DTRACE_CACHEIDNONE);
10706
10707 if (probe->dtpr_ecb == probe->dtpr_ecb_last) {
10708 dtrace_predicate_t *p = probe->dtpr_ecb->dte_predicate;
10709
10710 ASSERT(probe->dtpr_ecb->dte_next == NULL);
10711
10712 if (p != NULL)
10713 probe->dtpr_predcache = p->dtp_cacheid;
10714 }
10715
10716 ecb->dte_next = NULL;
10717 }
10718}
10719
10720static void
10721dtrace_ecb_destroy(dtrace_ecb_t *ecb)
10722{
10723 dtrace_state_t *state = ecb->dte_state;
10724 dtrace_vstate_t *vstate = &state->dts_vstate;
10725 dtrace_predicate_t *pred;
10726 dtrace_epid_t epid = ecb->dte_epid;
10727
10728 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10729 ASSERT(ecb->dte_next == NULL);
10730 ASSERT(ecb->dte_probe == NULL || ecb->dte_probe->dtpr_ecb != ecb);
10731
10732 if ((pred = ecb->dte_predicate) != NULL)
10733 dtrace_predicate_release(pred, vstate);
10734
10735 dtrace_ecb_action_remove(ecb);
10736
10737 ASSERT(state->dts_ecbs[epid - 1] == ecb);
10738 state->dts_ecbs[epid - 1] = NULL;
10739
10740 kmem_free(ecb, sizeof (dtrace_ecb_t));
10741}
10742
10743static dtrace_ecb_t *
10744dtrace_ecb_create(dtrace_state_t *state, dtrace_probe_t *probe,
10745 dtrace_enabling_t *enab)
10746{
10747 dtrace_ecb_t *ecb;
10748 dtrace_predicate_t *pred;
10749 dtrace_actdesc_t *act;
10750 dtrace_provider_t *prov;
10751 dtrace_ecbdesc_t *desc = enab->dten_current;
10752
10753 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10754 ASSERT(state != NULL);
10755
10756 ecb = dtrace_ecb_add(state, probe);
10757 ecb->dte_uarg = desc->dted_uarg;
10758
10759 if ((pred = desc->dted_pred.dtpdd_predicate) != NULL) {
10760 dtrace_predicate_hold(pred);
10761 ecb->dte_predicate = pred;
10762 }
10763
10764 if (probe != NULL) {
10765 /*
10766 * If the provider shows more leg than the consumer is old
10767 * enough to see, we need to enable the appropriate implicit
10768 * predicate bits to prevent the ecb from activating at
10769 * revealing times.
10770 *
10771 * Providers specifying DTRACE_PRIV_USER at register time
10772 * are stating that they need the /proc-style privilege
10773 * model to be enforced, and this is what DTRACE_COND_OWNER
10774 * and DTRACE_COND_ZONEOWNER will then do at probe time.
10775 */
10776 prov = probe->dtpr_provider;
10777 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLPROC) &&
10778 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10779 ecb->dte_cond |= DTRACE_COND_OWNER;
10780
10781 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_ALLZONE) &&
10782 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_USER))
10783 ecb->dte_cond |= DTRACE_COND_ZONEOWNER;
10784
10785 /*
10786 * If the provider shows us kernel innards and the user
10787 * is lacking sufficient privilege, enable the
10788 * DTRACE_COND_USERMODE implicit predicate.
10789 */
10790 if (!(state->dts_cred.dcr_visible & DTRACE_CRV_KERNEL) &&
10791 (prov->dtpv_priv.dtpp_flags & DTRACE_PRIV_KERNEL))
10792 ecb->dte_cond |= DTRACE_COND_USERMODE;
10793 }
10794
10795 if (dtrace_ecb_create_cache != NULL) {
10796 /*
10797 * If we have a cached ecb, we'll use its action list instead
10798 * of creating our own (saving both time and space).
10799 */
10800 dtrace_ecb_t *cached = dtrace_ecb_create_cache;
c910b4d9 10801 dtrace_action_t *act_if = cached->dte_action;
2d21ac55 10802
c910b4d9
A
10803 if (act_if != NULL) {
10804 ASSERT(act_if->dta_refcnt > 0);
10805 act_if->dta_refcnt++;
10806 ecb->dte_action = act_if;
2d21ac55
A
10807 ecb->dte_action_last = cached->dte_action_last;
10808 ecb->dte_needed = cached->dte_needed;
10809 ecb->dte_size = cached->dte_size;
10810 ecb->dte_alignment = cached->dte_alignment;
10811 }
10812
10813 return (ecb);
10814 }
10815
10816 for (act = desc->dted_action; act != NULL; act = act->dtad_next) {
10817 if ((enab->dten_error = dtrace_ecb_action_add(ecb, act)) != 0) {
10818 dtrace_ecb_destroy(ecb);
10819 return (NULL);
10820 }
10821 }
10822
39037602
A
10823 if ((enab->dten_error = dtrace_ecb_resize(ecb)) != 0) {
10824 dtrace_ecb_destroy(ecb);
10825 return (NULL);
10826 }
2d21ac55
A
10827
10828 return (dtrace_ecb_create_cache = ecb);
10829}
10830
10831static int
d190cdc3 10832dtrace_ecb_create_enable(dtrace_probe_t *probe, void *arg1, void *arg2)
2d21ac55
A
10833{
10834 dtrace_ecb_t *ecb;
d190cdc3
A
10835 dtrace_enabling_t *enab = arg1;
10836 dtrace_ecbdesc_t *ep = arg2;
2d21ac55
A
10837 dtrace_state_t *state = enab->dten_vstate->dtvs_state;
10838
10839 ASSERT(state != NULL);
10840
d190cdc3 10841 if (probe != NULL && ep != NULL && probe->dtpr_gen < ep->dted_probegen) {
2d21ac55
A
10842 /*
10843 * This probe was created in a generation for which this
10844 * enabling has previously created ECBs; we don't want to
10845 * enable it again, so just kick out.
10846 */
10847 return (DTRACE_MATCH_NEXT);
10848 }
10849
10850 if ((ecb = dtrace_ecb_create(state, probe, enab)) == NULL)
10851 return (DTRACE_MATCH_DONE);
10852
6d2010ae
A
10853 if (dtrace_ecb_enable(ecb) < 0)
10854 return (DTRACE_MATCH_FAIL);
10855
2d21ac55
A
10856 return (DTRACE_MATCH_NEXT);
10857}
10858
10859static dtrace_ecb_t *
10860dtrace_epid2ecb(dtrace_state_t *state, dtrace_epid_t id)
10861{
10862 dtrace_ecb_t *ecb;
b0d623f7 10863#pragma unused(ecb) /* __APPLE__ */
2d21ac55
A
10864
10865 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10866
fe8ab488 10867 if (id == 0 || id > (dtrace_epid_t)state->dts_necbs)
2d21ac55
A
10868 return (NULL);
10869
10870 ASSERT(state->dts_necbs > 0 && state->dts_ecbs != NULL);
10871 ASSERT((ecb = state->dts_ecbs[id - 1]) == NULL || ecb->dte_epid == id);
10872
10873 return (state->dts_ecbs[id - 1]);
10874}
10875
10876static dtrace_aggregation_t *
10877dtrace_aggid2agg(dtrace_state_t *state, dtrace_aggid_t id)
10878{
10879 dtrace_aggregation_t *agg;
b0d623f7 10880#pragma unused(agg) /* __APPLE__ */
2d21ac55
A
10881
10882 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10883
b0d623f7 10884 if (id == 0 || id > (dtrace_aggid_t)state->dts_naggregations)
2d21ac55
A
10885 return (NULL);
10886
10887 ASSERT(state->dts_naggregations > 0 && state->dts_aggregations != NULL);
10888 ASSERT((agg = state->dts_aggregations[id - 1]) == NULL ||
10889 agg->dtag_id == id);
10890
10891 return (state->dts_aggregations[id - 1]);
10892}
10893
10894/*
10895 * DTrace Buffer Functions
10896 *
10897 * The following functions manipulate DTrace buffers. Most of these functions
10898 * are called in the context of establishing or processing consumer state;
10899 * exceptions are explicitly noted.
10900 */
10901
10902/*
10903 * Note: called from cross call context. This function switches the two
10904 * buffers on a given CPU. The atomicity of this operation is assured by
10905 * disabling interrupts while the actual switch takes place; the disabling of
10906 * interrupts serializes the execution with any execution of dtrace_probe() on
10907 * the same CPU.
10908 */
10909static void
10910dtrace_buffer_switch(dtrace_buffer_t *buf)
10911{
10912 caddr_t tomax = buf->dtb_tomax;
10913 caddr_t xamot = buf->dtb_xamot;
10914 dtrace_icookie_t cookie;
04b8595b 10915 hrtime_t now;
2d21ac55
A
10916
10917 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
10918 ASSERT(!(buf->dtb_flags & DTRACEBUF_RING));
10919
10920 cookie = dtrace_interrupt_disable();
04b8595b 10921 now = dtrace_gethrtime();
2d21ac55
A
10922 buf->dtb_tomax = xamot;
10923 buf->dtb_xamot = tomax;
10924 buf->dtb_xamot_drops = buf->dtb_drops;
10925 buf->dtb_xamot_offset = buf->dtb_offset;
10926 buf->dtb_xamot_errors = buf->dtb_errors;
10927 buf->dtb_xamot_flags = buf->dtb_flags;
10928 buf->dtb_offset = 0;
10929 buf->dtb_drops = 0;
10930 buf->dtb_errors = 0;
10931 buf->dtb_flags &= ~(DTRACEBUF_ERROR | DTRACEBUF_DROPPED);
04b8595b
A
10932 buf->dtb_interval = now - buf->dtb_switched;
10933 buf->dtb_switched = now;
39037602
A
10934 buf->dtb_cur_limit = buf->dtb_limit;
10935
2d21ac55
A
10936 dtrace_interrupt_enable(cookie);
10937}
10938
10939/*
10940 * Note: called from cross call context. This function activates a buffer
10941 * on a CPU. As with dtrace_buffer_switch(), the atomicity of the operation
10942 * is guaranteed by the disabling of interrupts.
10943 */
10944static void
10945dtrace_buffer_activate(dtrace_state_t *state)
10946{
10947 dtrace_buffer_t *buf;
10948 dtrace_icookie_t cookie = dtrace_interrupt_disable();
10949
10950 buf = &state->dts_buffer[CPU->cpu_id];
10951
10952 if (buf->dtb_tomax != NULL) {
10953 /*
10954 * We might like to assert that the buffer is marked inactive,
10955 * but this isn't necessarily true: the buffer for the CPU
10956 * that processes the BEGIN probe has its buffer activated
10957 * manually. In this case, we take the (harmless) action
10958 * re-clearing the bit INACTIVE bit.
10959 */
10960 buf->dtb_flags &= ~DTRACEBUF_INACTIVE;
10961 }
10962
10963 dtrace_interrupt_enable(cookie);
10964}
10965
fe8ab488
A
10966static int
10967dtrace_buffer_canalloc(size_t size)
10968{
10969 if (size > (UINT64_MAX - dtrace_buffer_memory_inuse))
10970 return (B_FALSE);
10971 if ((size + dtrace_buffer_memory_inuse) > dtrace_buffer_memory_maxsize)
10972 return (B_FALSE);
10973
10974 return (B_TRUE);
10975}
10976
2d21ac55 10977static int
39037602 10978dtrace_buffer_alloc(dtrace_buffer_t *bufs, size_t limit, size_t size, int flags,
2d21ac55
A
10979 processorid_t cpu)
10980{
6d2010ae 10981 dtrace_cpu_t *cp;
2d21ac55 10982 dtrace_buffer_t *buf;
fe8ab488 10983 size_t size_before_alloc = dtrace_buffer_memory_inuse;
2d21ac55
A
10984
10985 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
10986 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
10987
b0d623f7
A
10988 if (size > (size_t)dtrace_nonroot_maxsize &&
10989 !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL, B_FALSE))
10990 return (EFBIG);
2d21ac55
A
10991
10992 cp = cpu_list;
10993
10994 do {
10995 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
10996 continue;
10997
10998 buf = &bufs[cp->cpu_id];
10999
11000 /*
11001 * If there is already a buffer allocated for this CPU, it
11002 * is only possible that this is a DR event. In this case,
11003 * the buffer size must match our specified size.
11004 */
11005 if (buf->dtb_tomax != NULL) {
11006 ASSERT(buf->dtb_size == size);
11007 continue;
11008 }
11009
11010 ASSERT(buf->dtb_xamot == NULL);
11011
39037602 11012
fe8ab488
A
11013 /* DTrace, please do not eat all the memory. */
11014 if (dtrace_buffer_canalloc(size) == B_FALSE)
11015 goto err;
2d21ac55
A
11016 if ((buf->dtb_tomax = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
11017 goto err;
fe8ab488 11018 dtrace_buffer_memory_inuse += size;
2d21ac55 11019
39037602
A
11020 /* Unsure that limit is always lower than size */
11021 limit = limit == size ? limit - 1 : limit;
11022 buf->dtb_cur_limit = limit;
11023 buf->dtb_limit = limit;
2d21ac55
A
11024 buf->dtb_size = size;
11025 buf->dtb_flags = flags;
11026 buf->dtb_offset = 0;
11027 buf->dtb_drops = 0;
11028
11029 if (flags & DTRACEBUF_NOSWITCH)
11030 continue;
11031
fe8ab488
A
11032 /* DTrace, please do not eat all the memory. */
11033 if (dtrace_buffer_canalloc(size) == B_FALSE)
11034 goto err;
2d21ac55
A
11035 if ((buf->dtb_xamot = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
11036 goto err;
fe8ab488 11037 dtrace_buffer_memory_inuse += size;
2d21ac55
A
11038 } while ((cp = cp->cpu_next) != cpu_list);
11039
fe8ab488
A
11040 ASSERT(dtrace_buffer_memory_inuse <= dtrace_buffer_memory_maxsize);
11041
2d21ac55
A
11042 return (0);
11043
11044err:
11045 cp = cpu_list;
11046
11047 do {
11048 if (cpu != DTRACE_CPUALL && cpu != cp->cpu_id)
11049 continue;
11050
11051 buf = &bufs[cp->cpu_id];
11052
11053 if (buf->dtb_xamot != NULL) {
11054 ASSERT(buf->dtb_tomax != NULL);
11055 ASSERT(buf->dtb_size == size);
11056 kmem_free(buf->dtb_xamot, size);
11057 }
11058
11059 if (buf->dtb_tomax != NULL) {
11060 ASSERT(buf->dtb_size == size);
11061 kmem_free(buf->dtb_tomax, size);
11062 }
11063
11064 buf->dtb_tomax = NULL;
11065 buf->dtb_xamot = NULL;
11066 buf->dtb_size = 0;
11067 } while ((cp = cp->cpu_next) != cpu_list);
11068
fe8ab488
A
11069 /* Restore the size saved before allocating memory */
11070 dtrace_buffer_memory_inuse = size_before_alloc;
11071
2d21ac55
A
11072 return (ENOMEM);
11073}
11074
11075/*
11076 * Note: called from probe context. This function just increments the drop
11077 * count on a buffer. It has been made a function to allow for the
11078 * possibility of understanding the source of mysterious drop counts. (A
11079 * problem for which one may be particularly disappointed that DTrace cannot
11080 * be used to understand DTrace.)
11081 */
11082static void
11083dtrace_buffer_drop(dtrace_buffer_t *buf)
11084{
11085 buf->dtb_drops++;
11086}
11087
11088/*
11089 * Note: called from probe context. This function is called to reserve space
11090 * in a buffer. If mstate is non-NULL, sets the scratch base and size in the
11091 * mstate. Returns the new offset in the buffer, or a negative value if an
11092 * error has occurred.
11093 */
11094static intptr_t
11095dtrace_buffer_reserve(dtrace_buffer_t *buf, size_t needed, size_t align,
11096 dtrace_state_t *state, dtrace_mstate_t *mstate)
11097{
11098 intptr_t offs = buf->dtb_offset, soffs;
11099 intptr_t woffs;
11100 caddr_t tomax;
c910b4d9 11101 size_t total_off;
2d21ac55
A
11102
11103 if (buf->dtb_flags & DTRACEBUF_INACTIVE)
11104 return (-1);
11105
11106 if ((tomax = buf->dtb_tomax) == NULL) {
11107 dtrace_buffer_drop(buf);
11108 return (-1);
11109 }
11110
11111 if (!(buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL))) {
11112 while (offs & (align - 1)) {
11113 /*
11114 * Assert that our alignment is off by a number which
11115 * is itself sizeof (uint32_t) aligned.
11116 */
11117 ASSERT(!((align - (offs & (align - 1))) &
11118 (sizeof (uint32_t) - 1)));
11119 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
11120 offs += sizeof (uint32_t);
11121 }
11122
39037602
A
11123 if ((uint64_t)(soffs = offs + needed) > buf->dtb_cur_limit) {
11124 if (buf->dtb_cur_limit == buf->dtb_limit) {
11125 buf->dtb_cur_limit = buf->dtb_size;
11126
11127 atomic_add_32(&state->dts_buf_over_limit, 1);
11128 /**
11129 * Set an AST on the current processor
11130 * so that we can wake up the process
11131 * outside of probe context, when we know
11132 * it is safe to do so
11133 */
11134 minor_t minor = getminor(state->dts_dev);
11135 ASSERT(minor < 32);
11136
11137 atomic_or_32(&dtrace_wake_clients, 1 << minor);
11138 ast_dtrace_on();
11139 }
11140 if ((uint64_t)soffs > buf->dtb_size) {
11141 dtrace_buffer_drop(buf);
11142 return (-1);
11143 }
2d21ac55
A
11144 }
11145
11146 if (mstate == NULL)
11147 return (offs);
11148
11149 mstate->dtms_scratch_base = (uintptr_t)tomax + soffs;
11150 mstate->dtms_scratch_size = buf->dtb_size - soffs;
11151 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
11152
11153 return (offs);
11154 }
11155
11156 if (buf->dtb_flags & DTRACEBUF_FILL) {
11157 if (state->dts_activity != DTRACE_ACTIVITY_COOLDOWN &&
11158 (buf->dtb_flags & DTRACEBUF_FULL))
11159 return (-1);
11160 goto out;
11161 }
11162
c910b4d9 11163 total_off = needed + (offs & (align - 1));
2d21ac55
A
11164
11165 /*
11166 * For a ring buffer, life is quite a bit more complicated. Before
11167 * we can store any padding, we need to adjust our wrapping offset.
11168 * (If we've never before wrapped or we're not about to, no adjustment
11169 * is required.)
11170 */
11171 if ((buf->dtb_flags & DTRACEBUF_WRAPPED) ||
c910b4d9 11172 offs + total_off > buf->dtb_size) {
2d21ac55
A
11173 woffs = buf->dtb_xamot_offset;
11174
c910b4d9 11175 if (offs + total_off > buf->dtb_size) {
2d21ac55
A
11176 /*
11177 * We can't fit in the end of the buffer. First, a
11178 * sanity check that we can fit in the buffer at all.
11179 */
c910b4d9 11180 if (total_off > buf->dtb_size) {
2d21ac55
A
11181 dtrace_buffer_drop(buf);
11182 return (-1);
11183 }
11184
11185 /*
11186 * We're going to be storing at the top of the buffer,
11187 * so now we need to deal with the wrapped offset. We
11188 * only reset our wrapped offset to 0 if it is
11189 * currently greater than the current offset. If it
11190 * is less than the current offset, it is because a
11191 * previous allocation induced a wrap -- but the
11192 * allocation didn't subsequently take the space due
11193 * to an error or false predicate evaluation. In this
11194 * case, we'll just leave the wrapped offset alone: if
11195 * the wrapped offset hasn't been advanced far enough
11196 * for this allocation, it will be adjusted in the
11197 * lower loop.
11198 */
11199 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
11200 if (woffs >= offs)
11201 woffs = 0;
11202 } else {
11203 woffs = 0;
11204 }
11205
11206 /*
11207 * Now we know that we're going to be storing to the
11208 * top of the buffer and that there is room for us
11209 * there. We need to clear the buffer from the current
11210 * offset to the end (there may be old gunk there).
11211 */
b0d623f7 11212 while ((uint64_t)offs < buf->dtb_size)
2d21ac55
A
11213 tomax[offs++] = 0;
11214
11215 /*
11216 * We need to set our offset to zero. And because we
11217 * are wrapping, we need to set the bit indicating as
11218 * much. We can also adjust our needed space back
11219 * down to the space required by the ECB -- we know
11220 * that the top of the buffer is aligned.
11221 */
11222 offs = 0;
c910b4d9 11223 total_off = needed;
2d21ac55
A
11224 buf->dtb_flags |= DTRACEBUF_WRAPPED;
11225 } else {
11226 /*
11227 * There is room for us in the buffer, so we simply
11228 * need to check the wrapped offset.
11229 */
11230 if (woffs < offs) {
11231 /*
11232 * The wrapped offset is less than the offset.
11233 * This can happen if we allocated buffer space
11234 * that induced a wrap, but then we didn't
11235 * subsequently take the space due to an error
11236 * or false predicate evaluation. This is
11237 * okay; we know that _this_ allocation isn't
11238 * going to induce a wrap. We still can't
11239 * reset the wrapped offset to be zero,
11240 * however: the space may have been trashed in
11241 * the previous failed probe attempt. But at
11242 * least the wrapped offset doesn't need to
11243 * be adjusted at all...
11244 */
11245 goto out;
11246 }
11247 }
11248
b0d623f7 11249 while (offs + total_off > (size_t)woffs) {
2d21ac55
A
11250 dtrace_epid_t epid = *(uint32_t *)(tomax + woffs);
11251 size_t size;
11252
11253 if (epid == DTRACE_EPIDNONE) {
11254 size = sizeof (uint32_t);
11255 } else {
b0d623f7 11256 ASSERT(epid <= (dtrace_epid_t)state->dts_necbs);
2d21ac55
A
11257 ASSERT(state->dts_ecbs[epid - 1] != NULL);
11258
11259 size = state->dts_ecbs[epid - 1]->dte_size;
11260 }
11261
11262 ASSERT(woffs + size <= buf->dtb_size);
11263 ASSERT(size != 0);
11264
11265 if (woffs + size == buf->dtb_size) {
11266 /*
11267 * We've reached the end of the buffer; we want
11268 * to set the wrapped offset to 0 and break
11269 * out. However, if the offs is 0, then we're
11270 * in a strange edge-condition: the amount of
11271 * space that we want to reserve plus the size
11272 * of the record that we're overwriting is
11273 * greater than the size of the buffer. This
11274 * is problematic because if we reserve the
11275 * space but subsequently don't consume it (due
11276 * to a failed predicate or error) the wrapped
11277 * offset will be 0 -- yet the EPID at offset 0
11278 * will not be committed. This situation is
11279 * relatively easy to deal with: if we're in
11280 * this case, the buffer is indistinguishable
11281 * from one that hasn't wrapped; we need only
11282 * finish the job by clearing the wrapped bit,
11283 * explicitly setting the offset to be 0, and
11284 * zero'ing out the old data in the buffer.
11285 */
11286 if (offs == 0) {
11287 buf->dtb_flags &= ~DTRACEBUF_WRAPPED;
11288 buf->dtb_offset = 0;
c910b4d9 11289 woffs = total_off;
2d21ac55 11290
b0d623f7 11291 while ((uint64_t)woffs < buf->dtb_size)
2d21ac55
A
11292 tomax[woffs++] = 0;
11293 }
11294
11295 woffs = 0;
11296 break;
11297 }
11298
11299 woffs += size;
11300 }
11301
11302 /*
11303 * We have a wrapped offset. It may be that the wrapped offset
11304 * has become zero -- that's okay.
11305 */
11306 buf->dtb_xamot_offset = woffs;
11307 }
11308
11309out:
11310 /*
11311 * Now we can plow the buffer with any necessary padding.
11312 */
11313 while (offs & (align - 1)) {
11314 /*
11315 * Assert that our alignment is off by a number which
11316 * is itself sizeof (uint32_t) aligned.
11317 */
11318 ASSERT(!((align - (offs & (align - 1))) &
11319 (sizeof (uint32_t) - 1)));
11320 DTRACE_STORE(uint32_t, tomax, offs, DTRACE_EPIDNONE);
11321 offs += sizeof (uint32_t);
11322 }
11323
11324 if (buf->dtb_flags & DTRACEBUF_FILL) {
11325 if (offs + needed > buf->dtb_size - state->dts_reserve) {
11326 buf->dtb_flags |= DTRACEBUF_FULL;
11327 return (-1);
11328 }
11329 }
11330
11331 if (mstate == NULL)
11332 return (offs);
11333
11334 /*
11335 * For ring buffers and fill buffers, the scratch space is always
11336 * the inactive buffer.
11337 */
11338 mstate->dtms_scratch_base = (uintptr_t)buf->dtb_xamot;
11339 mstate->dtms_scratch_size = buf->dtb_size;
11340 mstate->dtms_scratch_ptr = mstate->dtms_scratch_base;
11341
11342 return (offs);
11343}
11344
11345static void
11346dtrace_buffer_polish(dtrace_buffer_t *buf)
11347{
11348 ASSERT(buf->dtb_flags & DTRACEBUF_RING);
11349 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11350
11351 if (!(buf->dtb_flags & DTRACEBUF_WRAPPED))
11352 return;
11353
11354 /*
11355 * We need to polish the ring buffer. There are three cases:
11356 *
11357 * - The first (and presumably most common) is that there is no gap
11358 * between the buffer offset and the wrapped offset. In this case,
11359 * there is nothing in the buffer that isn't valid data; we can
11360 * mark the buffer as polished and return.
11361 *
11362 * - The second (less common than the first but still more common
11363 * than the third) is that there is a gap between the buffer offset
11364 * and the wrapped offset, and the wrapped offset is larger than the
11365 * buffer offset. This can happen because of an alignment issue, or
11366 * can happen because of a call to dtrace_buffer_reserve() that
11367 * didn't subsequently consume the buffer space. In this case,
11368 * we need to zero the data from the buffer offset to the wrapped
11369 * offset.
11370 *
11371 * - The third (and least common) is that there is a gap between the
11372 * buffer offset and the wrapped offset, but the wrapped offset is
11373 * _less_ than the buffer offset. This can only happen because a
11374 * call to dtrace_buffer_reserve() induced a wrap, but the space
11375 * was not subsequently consumed. In this case, we need to zero the
11376 * space from the offset to the end of the buffer _and_ from the
11377 * top of the buffer to the wrapped offset.
11378 */
11379 if (buf->dtb_offset < buf->dtb_xamot_offset) {
11380 bzero(buf->dtb_tomax + buf->dtb_offset,
11381 buf->dtb_xamot_offset - buf->dtb_offset);
11382 }
11383
11384 if (buf->dtb_offset > buf->dtb_xamot_offset) {
11385 bzero(buf->dtb_tomax + buf->dtb_offset,
11386 buf->dtb_size - buf->dtb_offset);
11387 bzero(buf->dtb_tomax, buf->dtb_xamot_offset);
11388 }
11389}
11390
11391static void
11392dtrace_buffer_free(dtrace_buffer_t *bufs)
11393{
11394 int i;
11395
c910b4d9 11396 for (i = 0; i < (int)NCPU; i++) {
2d21ac55
A
11397 dtrace_buffer_t *buf = &bufs[i];
11398
11399 if (buf->dtb_tomax == NULL) {
11400 ASSERT(buf->dtb_xamot == NULL);
11401 ASSERT(buf->dtb_size == 0);
11402 continue;
11403 }
11404
11405 if (buf->dtb_xamot != NULL) {
11406 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
11407 kmem_free(buf->dtb_xamot, buf->dtb_size);
fe8ab488
A
11408
11409 ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size);
11410 dtrace_buffer_memory_inuse -= buf->dtb_size;
2d21ac55
A
11411 }
11412
11413 kmem_free(buf->dtb_tomax, buf->dtb_size);
fe8ab488
A
11414 ASSERT(dtrace_buffer_memory_inuse >= buf->dtb_size);
11415 dtrace_buffer_memory_inuse -= buf->dtb_size;
11416
2d21ac55
A
11417 buf->dtb_size = 0;
11418 buf->dtb_tomax = NULL;
11419 buf->dtb_xamot = NULL;
11420 }
11421}
11422
11423/*
11424 * DTrace Enabling Functions
11425 */
11426static dtrace_enabling_t *
11427dtrace_enabling_create(dtrace_vstate_t *vstate)
11428{
11429 dtrace_enabling_t *enab;
11430
11431 enab = kmem_zalloc(sizeof (dtrace_enabling_t), KM_SLEEP);
11432 enab->dten_vstate = vstate;
11433
11434 return (enab);
11435}
11436
11437static void
11438dtrace_enabling_add(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb)
11439{
11440 dtrace_ecbdesc_t **ndesc;
11441 size_t osize, nsize;
11442
11443 /*
11444 * We can't add to enablings after we've enabled them, or after we've
11445 * retained them.
11446 */
11447 ASSERT(enab->dten_probegen == 0);
11448 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
11449
fe8ab488
A
11450 /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */
11451 if (ecb == NULL) return;
2d21ac55
A
11452
11453 if (enab->dten_ndesc < enab->dten_maxdesc) {
11454 enab->dten_desc[enab->dten_ndesc++] = ecb;
11455 return;
11456 }
11457
11458 osize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
11459
11460 if (enab->dten_maxdesc == 0) {
11461 enab->dten_maxdesc = 1;
11462 } else {
11463 enab->dten_maxdesc <<= 1;
11464 }
11465
11466 ASSERT(enab->dten_ndesc < enab->dten_maxdesc);
11467
11468 nsize = enab->dten_maxdesc * sizeof (dtrace_enabling_t *);
11469 ndesc = kmem_zalloc(nsize, KM_SLEEP);
11470 bcopy(enab->dten_desc, ndesc, osize);
11471 kmem_free(enab->dten_desc, osize);
11472
11473 enab->dten_desc = ndesc;
11474 enab->dten_desc[enab->dten_ndesc++] = ecb;
11475}
11476
11477static void
11478dtrace_enabling_addlike(dtrace_enabling_t *enab, dtrace_ecbdesc_t *ecb,
11479 dtrace_probedesc_t *pd)
11480{
11481 dtrace_ecbdesc_t *new;
11482 dtrace_predicate_t *pred;
11483 dtrace_actdesc_t *act;
11484
11485 /*
11486 * We're going to create a new ECB description that matches the
11487 * specified ECB in every way, but has the specified probe description.
11488 */
11489 new = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
11490
11491 if ((pred = ecb->dted_pred.dtpdd_predicate) != NULL)
11492 dtrace_predicate_hold(pred);
11493
11494 for (act = ecb->dted_action; act != NULL; act = act->dtad_next)
11495 dtrace_actdesc_hold(act);
11496
11497 new->dted_action = ecb->dted_action;
11498 new->dted_pred = ecb->dted_pred;
11499 new->dted_probe = *pd;
11500 new->dted_uarg = ecb->dted_uarg;
11501
11502 dtrace_enabling_add(enab, new);
11503}
11504
11505static void
11506dtrace_enabling_dump(dtrace_enabling_t *enab)
11507{
11508 int i;
11509
11510 for (i = 0; i < enab->dten_ndesc; i++) {
11511 dtrace_probedesc_t *desc = &enab->dten_desc[i]->dted_probe;
11512
11513 cmn_err(CE_NOTE, "enabling probe %d (%s:%s:%s:%s)", i,
11514 desc->dtpd_provider, desc->dtpd_mod,
11515 desc->dtpd_func, desc->dtpd_name);
11516 }
11517}
11518
11519static void
11520dtrace_enabling_destroy(dtrace_enabling_t *enab)
11521{
11522 int i;
11523 dtrace_ecbdesc_t *ep;
11524 dtrace_vstate_t *vstate = enab->dten_vstate;
11525
11526 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11527
11528 for (i = 0; i < enab->dten_ndesc; i++) {
11529 dtrace_actdesc_t *act, *next;
11530 dtrace_predicate_t *pred;
11531
11532 ep = enab->dten_desc[i];
11533
11534 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL)
11535 dtrace_predicate_release(pred, vstate);
11536
11537 for (act = ep->dted_action; act != NULL; act = next) {
11538 next = act->dtad_next;
11539 dtrace_actdesc_release(act, vstate);
11540 }
11541
11542 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
11543 }
11544
11545 kmem_free(enab->dten_desc,
11546 enab->dten_maxdesc * sizeof (dtrace_enabling_t *));
11547
11548 /*
11549 * If this was a retained enabling, decrement the dts_nretained count
11550 * and take it off of the dtrace_retained list.
11551 */
11552 if (enab->dten_prev != NULL || enab->dten_next != NULL ||
11553 dtrace_retained == enab) {
11554 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11555 ASSERT(enab->dten_vstate->dtvs_state->dts_nretained > 0);
11556 enab->dten_vstate->dtvs_state->dts_nretained--;
b0d623f7 11557 dtrace_retained_gen++;
2d21ac55
A
11558 }
11559
11560 if (enab->dten_prev == NULL) {
11561 if (dtrace_retained == enab) {
11562 dtrace_retained = enab->dten_next;
11563
11564 if (dtrace_retained != NULL)
11565 dtrace_retained->dten_prev = NULL;
11566 }
11567 } else {
11568 ASSERT(enab != dtrace_retained);
11569 ASSERT(dtrace_retained != NULL);
11570 enab->dten_prev->dten_next = enab->dten_next;
11571 }
11572
11573 if (enab->dten_next != NULL) {
11574 ASSERT(dtrace_retained != NULL);
11575 enab->dten_next->dten_prev = enab->dten_prev;
11576 }
11577
11578 kmem_free(enab, sizeof (dtrace_enabling_t));
11579}
11580
11581static int
11582dtrace_enabling_retain(dtrace_enabling_t *enab)
11583{
11584 dtrace_state_t *state;
11585
11586 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11587 ASSERT(enab->dten_next == NULL && enab->dten_prev == NULL);
11588 ASSERT(enab->dten_vstate != NULL);
11589
11590 state = enab->dten_vstate->dtvs_state;
11591 ASSERT(state != NULL);
11592
11593 /*
11594 * We only allow each state to retain dtrace_retain_max enablings.
11595 */
11596 if (state->dts_nretained >= dtrace_retain_max)
11597 return (ENOSPC);
11598
11599 state->dts_nretained++;
b0d623f7 11600 dtrace_retained_gen++;
2d21ac55
A
11601
11602 if (dtrace_retained == NULL) {
11603 dtrace_retained = enab;
11604 return (0);
11605 }
11606
11607 enab->dten_next = dtrace_retained;
11608 dtrace_retained->dten_prev = enab;
11609 dtrace_retained = enab;
11610
11611 return (0);
11612}
11613
11614static int
11615dtrace_enabling_replicate(dtrace_state_t *state, dtrace_probedesc_t *match,
11616 dtrace_probedesc_t *create)
11617{
11618 dtrace_enabling_t *new, *enab;
11619 int found = 0, err = ENOENT;
11620
11621 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11622 ASSERT(strlen(match->dtpd_provider) < DTRACE_PROVNAMELEN);
11623 ASSERT(strlen(match->dtpd_mod) < DTRACE_MODNAMELEN);
11624 ASSERT(strlen(match->dtpd_func) < DTRACE_FUNCNAMELEN);
11625 ASSERT(strlen(match->dtpd_name) < DTRACE_NAMELEN);
11626
11627 new = dtrace_enabling_create(&state->dts_vstate);
11628
11629 /*
11630 * Iterate over all retained enablings, looking for enablings that
11631 * match the specified state.
11632 */
11633 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11634 int i;
11635
11636 /*
11637 * dtvs_state can only be NULL for helper enablings -- and
11638 * helper enablings can't be retained.
11639 */
11640 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11641
11642 if (enab->dten_vstate->dtvs_state != state)
11643 continue;
11644
11645 /*
11646 * Now iterate over each probe description; we're looking for
11647 * an exact match to the specified probe description.
11648 */
11649 for (i = 0; i < enab->dten_ndesc; i++) {
11650 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11651 dtrace_probedesc_t *pd = &ep->dted_probe;
11652
fe8ab488 11653 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7
A
11654 if (strncmp(pd->dtpd_provider, match->dtpd_provider, DTRACE_PROVNAMELEN))
11655 continue;
11656
11657 if (strncmp(pd->dtpd_mod, match->dtpd_mod, DTRACE_MODNAMELEN))
11658 continue;
11659
11660 if (strncmp(pd->dtpd_func, match->dtpd_func, DTRACE_FUNCNAMELEN))
11661 continue;
11662
11663 if (strncmp(pd->dtpd_name, match->dtpd_name, DTRACE_NAMELEN))
11664 continue;
2d21ac55
A
11665
11666 /*
11667 * We have a winning probe! Add it to our growing
11668 * enabling.
11669 */
11670 found = 1;
11671 dtrace_enabling_addlike(new, ep, create);
11672 }
11673 }
11674
11675 if (!found || (err = dtrace_enabling_retain(new)) != 0) {
11676 dtrace_enabling_destroy(new);
11677 return (err);
11678 }
11679
11680 return (0);
11681}
11682
11683static void
11684dtrace_enabling_retract(dtrace_state_t *state)
11685{
11686 dtrace_enabling_t *enab, *next;
11687
11688 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11689
11690 /*
11691 * Iterate over all retained enablings, destroy the enablings retained
11692 * for the specified state.
11693 */
11694 for (enab = dtrace_retained; enab != NULL; enab = next) {
11695 next = enab->dten_next;
11696
11697 /*
11698 * dtvs_state can only be NULL for helper enablings -- and
11699 * helper enablings can't be retained.
11700 */
11701 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11702
11703 if (enab->dten_vstate->dtvs_state == state) {
11704 ASSERT(state->dts_nretained > 0);
11705 dtrace_enabling_destroy(enab);
11706 }
11707 }
11708
11709 ASSERT(state->dts_nretained == 0);
11710}
11711
11712static int
39037602 11713dtrace_enabling_match(dtrace_enabling_t *enab, int *nmatched, dtrace_match_cond_t *cond)
2d21ac55
A
11714{
11715 int i = 0;
6d2010ae 11716 int total_matched = 0, matched = 0;
2d21ac55
A
11717
11718 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
11719 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11720
11721 for (i = 0; i < enab->dten_ndesc; i++) {
11722 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
11723
11724 enab->dten_current = ep;
11725 enab->dten_error = 0;
11726
39037602
A
11727 /**
11728 * Before doing a dtrace_probe_enable, which is really
11729 * expensive, check that this enabling matches the matching precondition
11730 * if we have one
11731 */
11732 if (cond && (cond->dmc_func(&ep->dted_probe, cond->dmc_data) == 0)) {
11733 continue;
11734 }
6d2010ae
A
11735 /*
11736 * If a provider failed to enable a probe then get out and
11737 * let the consumer know we failed.
11738 */
d190cdc3 11739 if ((matched = dtrace_probe_enable(&ep->dted_probe, enab, ep)) < 0)
6d2010ae
A
11740 return (EBUSY);
11741
11742 total_matched += matched;
2d21ac55
A
11743
11744 if (enab->dten_error != 0) {
11745 /*
11746 * If we get an error half-way through enabling the
11747 * probes, we kick out -- perhaps with some number of
11748 * them enabled. Leaving enabled probes enabled may
11749 * be slightly confusing for user-level, but we expect
11750 * that no one will attempt to actually drive on in
11751 * the face of such errors. If this is an anonymous
11752 * enabling (indicated with a NULL nmatched pointer),
11753 * we cmn_err() a message. We aren't expecting to
11754 * get such an error -- such as it can exist at all,
11755 * it would be a result of corrupted DOF in the driver
11756 * properties.
11757 */
11758 if (nmatched == NULL) {
11759 cmn_err(CE_WARN, "dtrace_enabling_match() "
11760 "error on %p: %d", (void *)ep,
11761 enab->dten_error);
11762 }
11763
11764 return (enab->dten_error);
11765 }
d190cdc3
A
11766
11767 ep->dted_probegen = dtrace_probegen;
2d21ac55
A
11768 }
11769
2d21ac55 11770 if (nmatched != NULL)
6d2010ae 11771 *nmatched = total_matched;
2d21ac55
A
11772
11773 return (0);
11774}
11775
11776static void
39037602 11777dtrace_enabling_matchall_with_cond(dtrace_match_cond_t *cond)
2d21ac55
A
11778{
11779 dtrace_enabling_t *enab;
11780
11781 lck_mtx_lock(&cpu_lock);
11782 lck_mtx_lock(&dtrace_lock);
11783
11784 /*
b0d623f7
A
11785 * Iterate over all retained enablings to see if any probes match
11786 * against them. We only perform this operation on enablings for which
11787 * we have sufficient permissions by virtue of being in the global zone
11788 * or in the same zone as the DTrace client. Because we can be called
11789 * after dtrace_detach() has been called, we cannot assert that there
11790 * are retained enablings. We can safely load from dtrace_retained,
11791 * however: the taskq_destroy() at the end of dtrace_detach() will
11792 * block pending our completion.
2d21ac55 11793 */
2d21ac55 11794
fe8ab488
A
11795 /*
11796 * Darwin doesn't do zones.
11797 * Behave as if always in "global" zone."
11798 */
11799 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
39037602 11800 (void) dtrace_enabling_match(enab, NULL, cond);
2d21ac55
A
11801 }
11802
b0d623f7
A
11803 lck_mtx_unlock(&dtrace_lock);
11804 lck_mtx_unlock(&cpu_lock);
39037602
A
11805
11806}
11807
11808static void
11809dtrace_enabling_matchall(void)
11810{
11811 dtrace_enabling_matchall_with_cond(NULL);
2d21ac55
A
11812}
11813
39037602
A
11814
11815
2d21ac55
A
11816/*
11817 * If an enabling is to be enabled without having matched probes (that is, if
11818 * dtrace_state_go() is to be called on the underlying dtrace_state_t), the
11819 * enabling must be _primed_ by creating an ECB for every ECB description.
11820 * This must be done to assure that we know the number of speculations, the
11821 * number of aggregations, the minimum buffer size needed, etc. before we
11822 * transition out of DTRACE_ACTIVITY_INACTIVE. To do this without actually
11823 * enabling any probes, we create ECBs for every ECB decription, but with a
11824 * NULL probe -- which is exactly what this function does.
11825 */
11826static void
11827dtrace_enabling_prime(dtrace_state_t *state)
11828{
11829 dtrace_enabling_t *enab;
11830 int i;
11831
11832 for (enab = dtrace_retained; enab != NULL; enab = enab->dten_next) {
11833 ASSERT(enab->dten_vstate->dtvs_state != NULL);
11834
11835 if (enab->dten_vstate->dtvs_state != state)
11836 continue;
11837
11838 /*
11839 * We don't want to prime an enabling more than once, lest
11840 * we allow a malicious user to induce resource exhaustion.
11841 * (The ECBs that result from priming an enabling aren't
11842 * leaked -- but they also aren't deallocated until the
11843 * consumer state is destroyed.)
11844 */
11845 if (enab->dten_primed)
11846 continue;
11847
11848 for (i = 0; i < enab->dten_ndesc; i++) {
11849 enab->dten_current = enab->dten_desc[i];
d190cdc3 11850 (void) dtrace_probe_enable(NULL, enab, NULL);
2d21ac55
A
11851 }
11852
11853 enab->dten_primed = 1;
11854 }
11855}
11856
11857/*
11858 * Called to indicate that probes should be provided due to retained
11859 * enablings. This is implemented in terms of dtrace_probe_provide(), but it
11860 * must take an initial lap through the enabling calling the dtps_provide()
11861 * entry point explicitly to allow for autocreated probes.
11862 */
11863static void
11864dtrace_enabling_provide(dtrace_provider_t *prv)
11865{
11866 int i, all = 0;
11867 dtrace_probedesc_t desc;
b0d623f7 11868 dtrace_genid_t gen;
2d21ac55
A
11869
11870 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11871 lck_mtx_assert(&dtrace_provider_lock, LCK_MTX_ASSERT_OWNED);
11872
11873 if (prv == NULL) {
11874 all = 1;
11875 prv = dtrace_provider;
11876 }
11877
11878 do {
b0d623f7 11879 dtrace_enabling_t *enab;
2d21ac55
A
11880 void *parg = prv->dtpv_arg;
11881
b0d623f7
A
11882retry:
11883 gen = dtrace_retained_gen;
11884 for (enab = dtrace_retained; enab != NULL;
11885 enab = enab->dten_next) {
2d21ac55
A
11886 for (i = 0; i < enab->dten_ndesc; i++) {
11887 desc = enab->dten_desc[i]->dted_probe;
11888 lck_mtx_unlock(&dtrace_lock);
11889 prv->dtpv_pops.dtps_provide(parg, &desc);
11890 lck_mtx_lock(&dtrace_lock);
b0d623f7
A
11891 /*
11892 * Process the retained enablings again if
11893 * they have changed while we weren't holding
11894 * dtrace_lock.
11895 */
11896 if (gen != dtrace_retained_gen)
11897 goto retry;
2d21ac55
A
11898 }
11899 }
11900 } while (all && (prv = prv->dtpv_next) != NULL);
11901
11902 lck_mtx_unlock(&dtrace_lock);
11903 dtrace_probe_provide(NULL, all ? NULL : prv);
11904 lck_mtx_lock(&dtrace_lock);
11905}
11906
11907/*
11908 * DTrace DOF Functions
11909 */
11910/*ARGSUSED*/
11911static void
11912dtrace_dof_error(dof_hdr_t *dof, const char *str)
11913{
b0d623f7 11914#pragma unused(dof) /* __APPLE__ */
2d21ac55
A
11915 if (dtrace_err_verbose)
11916 cmn_err(CE_WARN, "failed to process DOF: %s", str);
11917
11918#ifdef DTRACE_ERRDEBUG
11919 dtrace_errdebug(str);
11920#endif
11921}
11922
11923/*
11924 * Create DOF out of a currently enabled state. Right now, we only create
11925 * DOF containing the run-time options -- but this could be expanded to create
11926 * complete DOF representing the enabled state.
11927 */
11928static dof_hdr_t *
11929dtrace_dof_create(dtrace_state_t *state)
11930{
11931 dof_hdr_t *dof;
11932 dof_sec_t *sec;
11933 dof_optdesc_t *opt;
11934 int i, len = sizeof (dof_hdr_t) +
11935 roundup(sizeof (dof_sec_t), sizeof (uint64_t)) +
11936 sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11937
11938 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
11939
11940 dof = dt_kmem_zalloc_aligned(len, 8, KM_SLEEP);
11941 dof->dofh_ident[DOF_ID_MAG0] = DOF_MAG_MAG0;
11942 dof->dofh_ident[DOF_ID_MAG1] = DOF_MAG_MAG1;
11943 dof->dofh_ident[DOF_ID_MAG2] = DOF_MAG_MAG2;
11944 dof->dofh_ident[DOF_ID_MAG3] = DOF_MAG_MAG3;
11945
11946 dof->dofh_ident[DOF_ID_MODEL] = DOF_MODEL_NATIVE;
11947 dof->dofh_ident[DOF_ID_ENCODING] = DOF_ENCODE_NATIVE;
11948 dof->dofh_ident[DOF_ID_VERSION] = DOF_VERSION;
11949 dof->dofh_ident[DOF_ID_DIFVERS] = DIF_VERSION;
11950 dof->dofh_ident[DOF_ID_DIFIREG] = DIF_DIR_NREGS;
11951 dof->dofh_ident[DOF_ID_DIFTREG] = DIF_DTR_NREGS;
11952
11953 dof->dofh_flags = 0;
11954 dof->dofh_hdrsize = sizeof (dof_hdr_t);
11955 dof->dofh_secsize = sizeof (dof_sec_t);
11956 dof->dofh_secnum = 1; /* only DOF_SECT_OPTDESC */
11957 dof->dofh_secoff = sizeof (dof_hdr_t);
11958 dof->dofh_loadsz = len;
11959 dof->dofh_filesz = len;
11960 dof->dofh_pad = 0;
11961
11962 /*
11963 * Fill in the option section header...
11964 */
11965 sec = (dof_sec_t *)((uintptr_t)dof + sizeof (dof_hdr_t));
11966 sec->dofs_type = DOF_SECT_OPTDESC;
11967 sec->dofs_align = sizeof (uint64_t);
11968 sec->dofs_flags = DOF_SECF_LOAD;
11969 sec->dofs_entsize = sizeof (dof_optdesc_t);
11970
11971 opt = (dof_optdesc_t *)((uintptr_t)sec +
11972 roundup(sizeof (dof_sec_t), sizeof (uint64_t)));
11973
11974 sec->dofs_offset = (uintptr_t)opt - (uintptr_t)dof;
11975 sec->dofs_size = sizeof (dof_optdesc_t) * DTRACEOPT_MAX;
11976
11977 for (i = 0; i < DTRACEOPT_MAX; i++) {
11978 opt[i].dofo_option = i;
11979 opt[i].dofo_strtab = DOF_SECIDX_NONE;
11980 opt[i].dofo_value = state->dts_options[i];
11981 }
11982
11983 return (dof);
11984}
11985
11986static dof_hdr_t *
b0d623f7 11987dtrace_dof_copyin(user_addr_t uarg, int *errp)
2d21ac55
A
11988{
11989 dof_hdr_t hdr, *dof;
11990
11991 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
11992
11993 /*
11994 * First, we're going to copyin() the sizeof (dof_hdr_t).
11995 */
b0d623f7 11996 if (copyin(uarg, &hdr, sizeof (hdr)) != 0) {
2d21ac55
A
11997 dtrace_dof_error(NULL, "failed to copyin DOF header");
11998 *errp = EFAULT;
11999 return (NULL);
12000 }
12001
12002 /*
12003 * Now we'll allocate the entire DOF and copy it in -- provided
12004 * that the length isn't outrageous.
12005 */
b0d623f7 12006 if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) {
2d21ac55
A
12007 dtrace_dof_error(&hdr, "load size exceeds maximum");
12008 *errp = E2BIG;
12009 return (NULL);
12010 }
12011
12012 if (hdr.dofh_loadsz < sizeof (hdr)) {
12013 dtrace_dof_error(&hdr, "invalid load size");
12014 *errp = EINVAL;
12015 return (NULL);
12016 }
12017
12018 dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP);
12019
6d2010ae
A
12020 if (copyin(uarg, dof, hdr.dofh_loadsz) != 0 ||
12021 dof->dofh_loadsz != hdr.dofh_loadsz) {
12022 dt_kmem_free_aligned(dof, hdr.dofh_loadsz);
12023 *errp = EFAULT;
12024 return (NULL);
12025 }
2d21ac55
A
12026
12027 return (dof);
12028}
12029
2d21ac55
A
12030static dof_hdr_t *
12031dtrace_dof_copyin_from_proc(proc_t* p, user_addr_t uarg, int *errp)
12032{
12033 dof_hdr_t hdr, *dof;
12034
12035 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
12036
12037 /*
12038 * First, we're going to copyin() the sizeof (dof_hdr_t).
12039 */
12040 if (uread(p, &hdr, sizeof(hdr), uarg) != KERN_SUCCESS) {
12041 dtrace_dof_error(NULL, "failed to copyin DOF header");
12042 *errp = EFAULT;
12043 return (NULL);
12044 }
12045
12046 /*
12047 * Now we'll allocate the entire DOF and copy it in -- provided
12048 * that the length isn't outrageous.
12049 */
b0d623f7 12050 if (hdr.dofh_loadsz >= (uint64_t)dtrace_dof_maxsize) {
2d21ac55
A
12051 dtrace_dof_error(&hdr, "load size exceeds maximum");
12052 *errp = E2BIG;
12053 return (NULL);
12054 }
12055
12056 if (hdr.dofh_loadsz < sizeof (hdr)) {
12057 dtrace_dof_error(&hdr, "invalid load size");
12058 *errp = EINVAL;
12059 return (NULL);
12060 }
12061
12062 dof = dt_kmem_alloc_aligned(hdr.dofh_loadsz, 8, KM_SLEEP);
12063
12064 if (uread(p, dof, hdr.dofh_loadsz, uarg) != KERN_SUCCESS) {
12065 dt_kmem_free_aligned(dof, hdr.dofh_loadsz);
12066 *errp = EFAULT;
12067 return (NULL);
12068 }
12069
12070 return (dof);
12071}
12072
2d21ac55
A
12073static dof_hdr_t *
12074dtrace_dof_property(const char *name)
12075{
12076 uchar_t *buf;
12077 uint64_t loadsz;
12078 unsigned int len, i;
12079 dof_hdr_t *dof;
12080
12081 /*
12082 * Unfortunately, array of values in .conf files are always (and
12083 * only) interpreted to be integer arrays. We must read our DOF
12084 * as an integer array, and then squeeze it into a byte array.
12085 */
b0d623f7
A
12086 if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY, dtrace_devi, 0,
12087 name, (int **)&buf, &len) != DDI_PROP_SUCCESS)
12088 return (NULL);
2d21ac55
A
12089
12090 for (i = 0; i < len; i++)
12091 buf[i] = (uchar_t)(((int *)buf)[i]);
12092
12093 if (len < sizeof (dof_hdr_t)) {
12094 ddi_prop_free(buf);
12095 dtrace_dof_error(NULL, "truncated header");
12096 return (NULL);
12097 }
12098
12099 if (len < (loadsz = ((dof_hdr_t *)buf)->dofh_loadsz)) {
12100 ddi_prop_free(buf);
12101 dtrace_dof_error(NULL, "truncated DOF");
12102 return (NULL);
12103 }
12104
b0d623f7 12105 if (loadsz >= (uint64_t)dtrace_dof_maxsize) {
2d21ac55
A
12106 ddi_prop_free(buf);
12107 dtrace_dof_error(NULL, "oversized DOF");
12108 return (NULL);
12109 }
12110
12111 dof = dt_kmem_alloc_aligned(loadsz, 8, KM_SLEEP);
12112 bcopy(buf, dof, loadsz);
12113 ddi_prop_free(buf);
12114
12115 return (dof);
12116}
12117
12118static void
12119dtrace_dof_destroy(dof_hdr_t *dof)
12120{
12121 dt_kmem_free_aligned(dof, dof->dofh_loadsz);
12122}
12123
12124/*
12125 * Return the dof_sec_t pointer corresponding to a given section index. If the
12126 * index is not valid, dtrace_dof_error() is called and NULL is returned. If
12127 * a type other than DOF_SECT_NONE is specified, the header is checked against
12128 * this type and NULL is returned if the types do not match.
12129 */
12130static dof_sec_t *
12131dtrace_dof_sect(dof_hdr_t *dof, uint32_t type, dof_secidx_t i)
12132{
12133 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)
12134 ((uintptr_t)dof + dof->dofh_secoff + i * dof->dofh_secsize);
12135
12136 if (i >= dof->dofh_secnum) {
12137 dtrace_dof_error(dof, "referenced section index is invalid");
12138 return (NULL);
12139 }
12140
12141 if (!(sec->dofs_flags & DOF_SECF_LOAD)) {
12142 dtrace_dof_error(dof, "referenced section is not loadable");
12143 return (NULL);
12144 }
12145
12146 if (type != DOF_SECT_NONE && type != sec->dofs_type) {
12147 dtrace_dof_error(dof, "referenced section is the wrong type");
12148 return (NULL);
12149 }
12150
12151 return (sec);
12152}
12153
12154static dtrace_probedesc_t *
12155dtrace_dof_probedesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_probedesc_t *desc)
12156{
12157 dof_probedesc_t *probe;
12158 dof_sec_t *strtab;
12159 uintptr_t daddr = (uintptr_t)dof;
12160 uintptr_t str;
12161 size_t size;
12162
12163 if (sec->dofs_type != DOF_SECT_PROBEDESC) {
12164 dtrace_dof_error(dof, "invalid probe section");
12165 return (NULL);
12166 }
12167
12168 if (sec->dofs_align != sizeof (dof_secidx_t)) {
12169 dtrace_dof_error(dof, "bad alignment in probe description");
12170 return (NULL);
12171 }
12172
12173 if (sec->dofs_offset + sizeof (dof_probedesc_t) > dof->dofh_loadsz) {
12174 dtrace_dof_error(dof, "truncated probe description");
12175 return (NULL);
12176 }
12177
12178 probe = (dof_probedesc_t *)(uintptr_t)(daddr + sec->dofs_offset);
12179 strtab = dtrace_dof_sect(dof, DOF_SECT_STRTAB, probe->dofp_strtab);
12180
12181 if (strtab == NULL)
12182 return (NULL);
12183
12184 str = daddr + strtab->dofs_offset;
12185 size = strtab->dofs_size;
12186
12187 if (probe->dofp_provider >= strtab->dofs_size) {
12188 dtrace_dof_error(dof, "corrupt probe provider");
12189 return (NULL);
12190 }
12191
12192 (void) strncpy(desc->dtpd_provider,
12193 (char *)(str + probe->dofp_provider),
12194 MIN(DTRACE_PROVNAMELEN - 1, size - probe->dofp_provider));
fe8ab488
A
12195
12196 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12197 desc->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
2d21ac55
A
12198
12199 if (probe->dofp_mod >= strtab->dofs_size) {
12200 dtrace_dof_error(dof, "corrupt probe module");
12201 return (NULL);
12202 }
12203
12204 (void) strncpy(desc->dtpd_mod, (char *)(str + probe->dofp_mod),
12205 MIN(DTRACE_MODNAMELEN - 1, size - probe->dofp_mod));
fe8ab488
A
12206
12207 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12208 desc->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
2d21ac55
A
12209
12210 if (probe->dofp_func >= strtab->dofs_size) {
12211 dtrace_dof_error(dof, "corrupt probe function");
12212 return (NULL);
12213 }
12214
12215 (void) strncpy(desc->dtpd_func, (char *)(str + probe->dofp_func),
12216 MIN(DTRACE_FUNCNAMELEN - 1, size - probe->dofp_func));
fe8ab488
A
12217
12218 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12219 desc->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
2d21ac55
A
12220
12221 if (probe->dofp_name >= strtab->dofs_size) {
12222 dtrace_dof_error(dof, "corrupt probe name");
12223 return (NULL);
12224 }
12225
12226 (void) strncpy(desc->dtpd_name, (char *)(str + probe->dofp_name),
12227 MIN(DTRACE_NAMELEN - 1, size - probe->dofp_name));
fe8ab488
A
12228
12229 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7 12230 desc->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
2d21ac55
A
12231
12232 return (desc);
12233}
12234
12235static dtrace_difo_t *
12236dtrace_dof_difo(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12237 cred_t *cr)
12238{
12239 dtrace_difo_t *dp;
12240 size_t ttl = 0;
12241 dof_difohdr_t *dofd;
12242 uintptr_t daddr = (uintptr_t)dof;
c910b4d9 12243 size_t max_size = dtrace_difo_maxsize;
b0d623f7
A
12244 uint_t i;
12245 int l, n;
b0d623f7 12246
2d21ac55
A
12247
12248 static const struct {
12249 int section;
12250 int bufoffs;
12251 int lenoffs;
12252 int entsize;
12253 int align;
12254 const char *msg;
12255 } difo[] = {
12256 { DOF_SECT_DIF, offsetof(dtrace_difo_t, dtdo_buf),
12257 offsetof(dtrace_difo_t, dtdo_len), sizeof (dif_instr_t),
12258 sizeof (dif_instr_t), "multiple DIF sections" },
12259
12260 { DOF_SECT_INTTAB, offsetof(dtrace_difo_t, dtdo_inttab),
12261 offsetof(dtrace_difo_t, dtdo_intlen), sizeof (uint64_t),
12262 sizeof (uint64_t), "multiple integer tables" },
12263
12264 { DOF_SECT_STRTAB, offsetof(dtrace_difo_t, dtdo_strtab),
12265 offsetof(dtrace_difo_t, dtdo_strlen), 0,
12266 sizeof (char), "multiple string tables" },
12267
12268 { DOF_SECT_VARTAB, offsetof(dtrace_difo_t, dtdo_vartab),
12269 offsetof(dtrace_difo_t, dtdo_varlen), sizeof (dtrace_difv_t),
12270 sizeof (uint_t), "multiple variable tables" },
12271
2d21ac55 12272 { DOF_SECT_NONE, 0, 0, 0, 0, NULL }
2d21ac55
A
12273 };
12274
12275 if (sec->dofs_type != DOF_SECT_DIFOHDR) {
12276 dtrace_dof_error(dof, "invalid DIFO header section");
12277 return (NULL);
12278 }
12279
12280 if (sec->dofs_align != sizeof (dof_secidx_t)) {
12281 dtrace_dof_error(dof, "bad alignment in DIFO header");
12282 return (NULL);
12283 }
12284
12285 if (sec->dofs_size < sizeof (dof_difohdr_t) ||
12286 sec->dofs_size % sizeof (dof_secidx_t)) {
12287 dtrace_dof_error(dof, "bad size in DIFO header");
12288 return (NULL);
12289 }
12290
12291 dofd = (dof_difohdr_t *)(uintptr_t)(daddr + sec->dofs_offset);
12292 n = (sec->dofs_size - sizeof (*dofd)) / sizeof (dof_secidx_t) + 1;
12293
12294 dp = kmem_zalloc(sizeof (dtrace_difo_t), KM_SLEEP);
12295 dp->dtdo_rtype = dofd->dofd_rtype;
12296
12297 for (l = 0; l < n; l++) {
12298 dof_sec_t *subsec;
12299 void **bufp;
12300 uint32_t *lenp;
12301
12302 if ((subsec = dtrace_dof_sect(dof, DOF_SECT_NONE,
12303 dofd->dofd_links[l])) == NULL)
12304 goto err; /* invalid section link */
12305
c910b4d9 12306 if (ttl + subsec->dofs_size > max_size) {
2d21ac55
A
12307 dtrace_dof_error(dof, "exceeds maximum size");
12308 goto err;
12309 }
12310
12311 ttl += subsec->dofs_size;
12312
12313 for (i = 0; difo[i].section != DOF_SECT_NONE; i++) {
b0d623f7 12314
b0d623f7
A
12315 if (subsec->dofs_type != (uint32_t)difo[i].section)
12316 continue;
2d21ac55
A
12317
12318 if (!(subsec->dofs_flags & DOF_SECF_LOAD)) {
12319 dtrace_dof_error(dof, "section not loaded");
12320 goto err;
12321 }
12322
b0d623f7
A
12323 if (subsec->dofs_align != (uint32_t)difo[i].align) {
12324 dtrace_dof_error(dof, "bad alignment");
12325 goto err;
12326 }
2d21ac55
A
12327
12328 bufp = (void **)((uintptr_t)dp + difo[i].bufoffs);
12329 lenp = (uint32_t *)((uintptr_t)dp + difo[i].lenoffs);
12330
12331 if (*bufp != NULL) {
12332 dtrace_dof_error(dof, difo[i].msg);
12333 goto err;
12334 }
12335
b0d623f7
A
12336 if ((uint32_t)difo[i].entsize != subsec->dofs_entsize) {
12337 dtrace_dof_error(dof, "entry size mismatch");
12338 goto err;
12339 }
2d21ac55
A
12340
12341 if (subsec->dofs_entsize != 0 &&
12342 (subsec->dofs_size % subsec->dofs_entsize) != 0) {
12343 dtrace_dof_error(dof, "corrupt entry size");
12344 goto err;
12345 }
12346
12347 *lenp = subsec->dofs_size;
12348 *bufp = kmem_alloc(subsec->dofs_size, KM_SLEEP);
12349 bcopy((char *)(uintptr_t)(daddr + subsec->dofs_offset),
12350 *bufp, subsec->dofs_size);
12351
12352 if (subsec->dofs_entsize != 0)
12353 *lenp /= subsec->dofs_entsize;
12354
12355 break;
12356 }
12357
12358 /*
12359 * If we encounter a loadable DIFO sub-section that is not
12360 * known to us, assume this is a broken program and fail.
12361 */
12362 if (difo[i].section == DOF_SECT_NONE &&
12363 (subsec->dofs_flags & DOF_SECF_LOAD)) {
12364 dtrace_dof_error(dof, "unrecognized DIFO subsection");
12365 goto err;
12366 }
12367 }
b0d623f7 12368
2d21ac55
A
12369 if (dp->dtdo_buf == NULL) {
12370 /*
12371 * We can't have a DIF object without DIF text.
12372 */
12373 dtrace_dof_error(dof, "missing DIF text");
12374 goto err;
12375 }
12376
12377 /*
12378 * Before we validate the DIF object, run through the variable table
12379 * looking for the strings -- if any of their size are under, we'll set
12380 * their size to be the system-wide default string size. Note that
12381 * this should _not_ happen if the "strsize" option has been set --
12382 * in this case, the compiler should have set the size to reflect the
12383 * setting of the option.
12384 */
12385 for (i = 0; i < dp->dtdo_varlen; i++) {
12386 dtrace_difv_t *v = &dp->dtdo_vartab[i];
12387 dtrace_diftype_t *t = &v->dtdv_type;
12388
12389 if (v->dtdv_id < DIF_VAR_OTHER_UBASE)
12390 continue;
12391
12392 if (t->dtdt_kind == DIF_TYPE_STRING && t->dtdt_size == 0)
12393 t->dtdt_size = dtrace_strsize_default;
12394 }
12395
12396 if (dtrace_difo_validate(dp, vstate, DIF_DIR_NREGS, cr) != 0)
12397 goto err;
12398
12399 dtrace_difo_init(dp, vstate);
12400 return (dp);
12401
12402err:
12403 kmem_free(dp->dtdo_buf, dp->dtdo_len * sizeof (dif_instr_t));
12404 kmem_free(dp->dtdo_inttab, dp->dtdo_intlen * sizeof (uint64_t));
12405 kmem_free(dp->dtdo_strtab, dp->dtdo_strlen);
12406 kmem_free(dp->dtdo_vartab, dp->dtdo_varlen * sizeof (dtrace_difv_t));
12407
12408 kmem_free(dp, sizeof (dtrace_difo_t));
12409 return (NULL);
12410}
12411
12412static dtrace_predicate_t *
12413dtrace_dof_predicate(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12414 cred_t *cr)
12415{
12416 dtrace_difo_t *dp;
12417
12418 if ((dp = dtrace_dof_difo(dof, sec, vstate, cr)) == NULL)
12419 return (NULL);
12420
12421 return (dtrace_predicate_create(dp));
12422}
12423
12424static dtrace_actdesc_t *
12425dtrace_dof_actdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12426 cred_t *cr)
12427{
12428 dtrace_actdesc_t *act, *first = NULL, *last = NULL, *next;
12429 dof_actdesc_t *desc;
12430 dof_sec_t *difosec;
12431 size_t offs;
12432 uintptr_t daddr = (uintptr_t)dof;
12433 uint64_t arg;
12434 dtrace_actkind_t kind;
12435
12436 if (sec->dofs_type != DOF_SECT_ACTDESC) {
12437 dtrace_dof_error(dof, "invalid action section");
12438 return (NULL);
12439 }
12440
12441 if (sec->dofs_offset + sizeof (dof_actdesc_t) > dof->dofh_loadsz) {
12442 dtrace_dof_error(dof, "truncated action description");
12443 return (NULL);
12444 }
12445
12446 if (sec->dofs_align != sizeof (uint64_t)) {
12447 dtrace_dof_error(dof, "bad alignment in action description");
12448 return (NULL);
12449 }
12450
12451 if (sec->dofs_size < sec->dofs_entsize) {
12452 dtrace_dof_error(dof, "section entry size exceeds total size");
12453 return (NULL);
12454 }
12455
12456 if (sec->dofs_entsize != sizeof (dof_actdesc_t)) {
12457 dtrace_dof_error(dof, "bad entry size in action description");
12458 return (NULL);
12459 }
12460
12461 if (sec->dofs_size / sec->dofs_entsize > dtrace_actions_max) {
12462 dtrace_dof_error(dof, "actions exceed dtrace_actions_max");
12463 return (NULL);
12464 }
12465
12466 for (offs = 0; offs < sec->dofs_size; offs += sec->dofs_entsize) {
12467 desc = (dof_actdesc_t *)(daddr +
12468 (uintptr_t)sec->dofs_offset + offs);
12469 kind = (dtrace_actkind_t)desc->dofa_kind;
12470
3e170ce0
A
12471 if ((DTRACEACT_ISPRINTFLIKE(kind) &&
12472 (kind != DTRACEACT_PRINTA || desc->dofa_strtab != DOF_SECIDX_NONE)) ||
12473 (kind == DTRACEACT_DIFEXPR && desc->dofa_strtab != DOF_SECIDX_NONE))
12474 {
2d21ac55
A
12475 dof_sec_t *strtab;
12476 char *str, *fmt;
12477 uint64_t i;
12478
12479 /*
3e170ce0
A
12480 * The argument to these actions is an index into the
12481 * DOF string table. For printf()-like actions, this
12482 * is the format string. For print(), this is the
12483 * CTF type of the expression result.
2d21ac55
A
12484 */
12485 if ((strtab = dtrace_dof_sect(dof,
12486 DOF_SECT_STRTAB, desc->dofa_strtab)) == NULL)
12487 goto err;
12488
12489 str = (char *)((uintptr_t)dof +
12490 (uintptr_t)strtab->dofs_offset);
12491
12492 for (i = desc->dofa_arg; i < strtab->dofs_size; i++) {
12493 if (str[i] == '\0')
12494 break;
12495 }
12496
12497 if (i >= strtab->dofs_size) {
12498 dtrace_dof_error(dof, "bogus format string");
12499 goto err;
12500 }
12501
12502 if (i == desc->dofa_arg) {
12503 dtrace_dof_error(dof, "empty format string");
12504 goto err;
12505 }
12506
12507 i -= desc->dofa_arg;
12508 fmt = kmem_alloc(i + 1, KM_SLEEP);
12509 bcopy(&str[desc->dofa_arg], fmt, i + 1);
12510 arg = (uint64_t)(uintptr_t)fmt;
12511 } else {
12512 if (kind == DTRACEACT_PRINTA) {
12513 ASSERT(desc->dofa_strtab == DOF_SECIDX_NONE);
12514 arg = 0;
12515 } else {
12516 arg = desc->dofa_arg;
12517 }
12518 }
12519
12520 act = dtrace_actdesc_create(kind, desc->dofa_ntuple,
12521 desc->dofa_uarg, arg);
12522
12523 if (last != NULL) {
12524 last->dtad_next = act;
12525 } else {
12526 first = act;
12527 }
12528
12529 last = act;
12530
12531 if (desc->dofa_difo == DOF_SECIDX_NONE)
12532 continue;
12533
12534 if ((difosec = dtrace_dof_sect(dof,
12535 DOF_SECT_DIFOHDR, desc->dofa_difo)) == NULL)
12536 goto err;
12537
12538 act->dtad_difo = dtrace_dof_difo(dof, difosec, vstate, cr);
12539
12540 if (act->dtad_difo == NULL)
12541 goto err;
12542 }
12543
12544 ASSERT(first != NULL);
12545 return (first);
12546
12547err:
12548 for (act = first; act != NULL; act = next) {
12549 next = act->dtad_next;
12550 dtrace_actdesc_release(act, vstate);
12551 }
12552
12553 return (NULL);
12554}
12555
12556static dtrace_ecbdesc_t *
12557dtrace_dof_ecbdesc(dof_hdr_t *dof, dof_sec_t *sec, dtrace_vstate_t *vstate,
12558 cred_t *cr)
12559{
12560 dtrace_ecbdesc_t *ep;
12561 dof_ecbdesc_t *ecb;
12562 dtrace_probedesc_t *desc;
12563 dtrace_predicate_t *pred = NULL;
12564
12565 if (sec->dofs_size < sizeof (dof_ecbdesc_t)) {
12566 dtrace_dof_error(dof, "truncated ECB description");
12567 return (NULL);
12568 }
12569
12570 if (sec->dofs_align != sizeof (uint64_t)) {
12571 dtrace_dof_error(dof, "bad alignment in ECB description");
12572 return (NULL);
12573 }
12574
12575 ecb = (dof_ecbdesc_t *)((uintptr_t)dof + (uintptr_t)sec->dofs_offset);
12576 sec = dtrace_dof_sect(dof, DOF_SECT_PROBEDESC, ecb->dofe_probes);
12577
12578 if (sec == NULL)
12579 return (NULL);
12580
12581 ep = kmem_zalloc(sizeof (dtrace_ecbdesc_t), KM_SLEEP);
12582 ep->dted_uarg = ecb->dofe_uarg;
12583 desc = &ep->dted_probe;
12584
12585 if (dtrace_dof_probedesc(dof, sec, desc) == NULL)
12586 goto err;
12587
12588 if (ecb->dofe_pred != DOF_SECIDX_NONE) {
12589 if ((sec = dtrace_dof_sect(dof,
12590 DOF_SECT_DIFOHDR, ecb->dofe_pred)) == NULL)
12591 goto err;
12592
12593 if ((pred = dtrace_dof_predicate(dof, sec, vstate, cr)) == NULL)
12594 goto err;
12595
12596 ep->dted_pred.dtpdd_predicate = pred;
12597 }
12598
12599 if (ecb->dofe_actions != DOF_SECIDX_NONE) {
12600 if ((sec = dtrace_dof_sect(dof,
12601 DOF_SECT_ACTDESC, ecb->dofe_actions)) == NULL)
12602 goto err;
12603
12604 ep->dted_action = dtrace_dof_actdesc(dof, sec, vstate, cr);
12605
12606 if (ep->dted_action == NULL)
12607 goto err;
12608 }
12609
12610 return (ep);
12611
12612err:
12613 if (pred != NULL)
12614 dtrace_predicate_release(pred, vstate);
12615 kmem_free(ep, sizeof (dtrace_ecbdesc_t));
12616 return (NULL);
12617}
12618
2d21ac55 12619/*
fe8ab488
A
12620 * APPLE NOTE: dyld handles dof relocation.
12621 * Darwin does not need dtrace_dof_relocate()
2d21ac55 12622 */
2d21ac55
A
12623
12624/*
12625 * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated
12626 * header: it should be at the front of a memory region that is at least
12627 * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in
12628 * size. It need not be validated in any other way.
12629 */
12630static int
12631dtrace_dof_slurp(dof_hdr_t *dof, dtrace_vstate_t *vstate, cred_t *cr,
12632 dtrace_enabling_t **enabp, uint64_t ubase, int noprobes)
12633{
b0d623f7 12634#pragma unused(ubase) /* __APPLE__ */
2d21ac55
A
12635 uint64_t len = dof->dofh_loadsz, seclen;
12636 uintptr_t daddr = (uintptr_t)dof;
12637 dtrace_ecbdesc_t *ep;
12638 dtrace_enabling_t *enab;
12639 uint_t i;
12640
12641 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
12642 ASSERT(dof->dofh_loadsz >= sizeof (dof_hdr_t));
12643
12644 /*
12645 * Check the DOF header identification bytes. In addition to checking
12646 * valid settings, we also verify that unused bits/bytes are zeroed so
12647 * we can use them later without fear of regressing existing binaries.
12648 */
12649 if (bcmp(&dof->dofh_ident[DOF_ID_MAG0],
12650 DOF_MAG_STRING, DOF_MAG_STRLEN) != 0) {
12651 dtrace_dof_error(dof, "DOF magic string mismatch");
12652 return (-1);
12653 }
12654
12655 if (dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_ILP32 &&
12656 dof->dofh_ident[DOF_ID_MODEL] != DOF_MODEL_LP64) {
12657 dtrace_dof_error(dof, "DOF has invalid data model");
12658 return (-1);
12659 }
12660
12661 if (dof->dofh_ident[DOF_ID_ENCODING] != DOF_ENCODE_NATIVE) {
12662 dtrace_dof_error(dof, "DOF encoding mismatch");
12663 return (-1);
12664 }
12665
2d21ac55 12666 /*
fe8ab488 12667 * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now.
2d21ac55
A
12668 */
12669 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_3) {
12670 dtrace_dof_error(dof, "DOF version mismatch");
12671 return (-1);
12672 }
2d21ac55
A
12673
12674 if (dof->dofh_ident[DOF_ID_DIFVERS] != DIF_VERSION_2) {
12675 dtrace_dof_error(dof, "DOF uses unsupported instruction set");
12676 return (-1);
12677 }
12678
12679 if (dof->dofh_ident[DOF_ID_DIFIREG] > DIF_DIR_NREGS) {
12680 dtrace_dof_error(dof, "DOF uses too many integer registers");
12681 return (-1);
12682 }
12683
12684 if (dof->dofh_ident[DOF_ID_DIFTREG] > DIF_DTR_NREGS) {
12685 dtrace_dof_error(dof, "DOF uses too many tuple registers");
12686 return (-1);
12687 }
12688
12689 for (i = DOF_ID_PAD; i < DOF_ID_SIZE; i++) {
12690 if (dof->dofh_ident[i] != 0) {
12691 dtrace_dof_error(dof, "DOF has invalid ident byte set");
12692 return (-1);
12693 }
12694 }
12695
12696 if (dof->dofh_flags & ~DOF_FL_VALID) {
12697 dtrace_dof_error(dof, "DOF has invalid flag bits set");
12698 return (-1);
12699 }
12700
12701 if (dof->dofh_secsize == 0) {
12702 dtrace_dof_error(dof, "zero section header size");
12703 return (-1);
12704 }
12705
12706 /*
12707 * Check that the section headers don't exceed the amount of DOF
12708 * data. Note that we cast the section size and number of sections
12709 * to uint64_t's to prevent possible overflow in the multiplication.
12710 */
12711 seclen = (uint64_t)dof->dofh_secnum * (uint64_t)dof->dofh_secsize;
12712
12713 if (dof->dofh_secoff > len || seclen > len ||
12714 dof->dofh_secoff + seclen > len) {
12715 dtrace_dof_error(dof, "truncated section headers");
12716 return (-1);
12717 }
12718
12719 if (!IS_P2ALIGNED(dof->dofh_secoff, sizeof (uint64_t))) {
12720 dtrace_dof_error(dof, "misaligned section headers");
12721 return (-1);
12722 }
12723
12724 if (!IS_P2ALIGNED(dof->dofh_secsize, sizeof (uint64_t))) {
12725 dtrace_dof_error(dof, "misaligned section size");
12726 return (-1);
12727 }
12728
12729 /*
12730 * Take an initial pass through the section headers to be sure that
12731 * the headers don't have stray offsets. If the 'noprobes' flag is
12732 * set, do not permit sections relating to providers, probes, or args.
12733 */
12734 for (i = 0; i < dof->dofh_secnum; i++) {
12735 dof_sec_t *sec = (dof_sec_t *)(daddr +
12736 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12737
12738 if (noprobes) {
12739 switch (sec->dofs_type) {
12740 case DOF_SECT_PROVIDER:
12741 case DOF_SECT_PROBES:
12742 case DOF_SECT_PRARGS:
12743 case DOF_SECT_PROFFS:
12744 dtrace_dof_error(dof, "illegal sections "
12745 "for enabling");
12746 return (-1);
12747 }
12748 }
12749
12750 if (!(sec->dofs_flags & DOF_SECF_LOAD))
12751 continue; /* just ignore non-loadable sections */
12752
12753 if (sec->dofs_align & (sec->dofs_align - 1)) {
12754 dtrace_dof_error(dof, "bad section alignment");
12755 return (-1);
12756 }
12757
12758 if (sec->dofs_offset & (sec->dofs_align - 1)) {
12759 dtrace_dof_error(dof, "misaligned section");
12760 return (-1);
12761 }
12762
12763 if (sec->dofs_offset > len || sec->dofs_size > len ||
12764 sec->dofs_offset + sec->dofs_size > len) {
12765 dtrace_dof_error(dof, "corrupt section header");
12766 return (-1);
12767 }
12768
12769 if (sec->dofs_type == DOF_SECT_STRTAB && *((char *)daddr +
12770 sec->dofs_offset + sec->dofs_size - 1) != '\0') {
12771 dtrace_dof_error(dof, "non-terminating string table");
12772 return (-1);
12773 }
12774 }
12775
b0d623f7 12776 /*
fe8ab488
A
12777 * APPLE NOTE: We have no further relocation to perform.
12778 * All dof values are relative offsets.
b0d623f7 12779 */
2d21ac55
A
12780
12781 if ((enab = *enabp) == NULL)
12782 enab = *enabp = dtrace_enabling_create(vstate);
12783
12784 for (i = 0; i < dof->dofh_secnum; i++) {
12785 dof_sec_t *sec = (dof_sec_t *)(daddr +
12786 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12787
12788 if (sec->dofs_type != DOF_SECT_ECBDESC)
12789 continue;
12790
fe8ab488
A
12791 /*
12792 * APPLE NOTE: Defend against gcc 4.0 botch on x86.
12793 * not all paths out of inlined dtrace_dof_ecbdesc
12794 * are checked for the NULL return value.
12795 * Check for NULL explicitly here.
12796 */
2d21ac55
A
12797 ep = dtrace_dof_ecbdesc(dof, sec, vstate, cr);
12798 if (ep == NULL) {
12799 dtrace_enabling_destroy(enab);
12800 *enabp = NULL;
12801 return (-1);
12802 }
2d21ac55
A
12803
12804 dtrace_enabling_add(enab, ep);
12805 }
12806
12807 return (0);
12808}
12809
12810/*
12811 * Process DOF for any options. This routine assumes that the DOF has been
12812 * at least processed by dtrace_dof_slurp().
12813 */
12814static int
12815dtrace_dof_options(dof_hdr_t *dof, dtrace_state_t *state)
12816{
b0d623f7
A
12817 uint_t i;
12818 int rval;
2d21ac55
A
12819 uint32_t entsize;
12820 size_t offs;
12821 dof_optdesc_t *desc;
12822
12823 for (i = 0; i < dof->dofh_secnum; i++) {
12824 dof_sec_t *sec = (dof_sec_t *)((uintptr_t)dof +
12825 (uintptr_t)dof->dofh_secoff + i * dof->dofh_secsize);
12826
12827 if (sec->dofs_type != DOF_SECT_OPTDESC)
12828 continue;
12829
12830 if (sec->dofs_align != sizeof (uint64_t)) {
12831 dtrace_dof_error(dof, "bad alignment in "
12832 "option description");
12833 return (EINVAL);
12834 }
12835
12836 if ((entsize = sec->dofs_entsize) == 0) {
12837 dtrace_dof_error(dof, "zeroed option entry size");
12838 return (EINVAL);
12839 }
12840
12841 if (entsize < sizeof (dof_optdesc_t)) {
12842 dtrace_dof_error(dof, "bad option entry size");
12843 return (EINVAL);
12844 }
12845
12846 for (offs = 0; offs < sec->dofs_size; offs += entsize) {
12847 desc = (dof_optdesc_t *)((uintptr_t)dof +
12848 (uintptr_t)sec->dofs_offset + offs);
12849
12850 if (desc->dofo_strtab != DOF_SECIDX_NONE) {
12851 dtrace_dof_error(dof, "non-zero option string");
12852 return (EINVAL);
12853 }
12854
b0d623f7 12855 if (desc->dofo_value == (uint64_t)DTRACEOPT_UNSET) {
2d21ac55
A
12856 dtrace_dof_error(dof, "unset option");
12857 return (EINVAL);
12858 }
12859
12860 if ((rval = dtrace_state_option(state,
12861 desc->dofo_option, desc->dofo_value)) != 0) {
12862 dtrace_dof_error(dof, "rejected option");
12863 return (rval);
12864 }
12865 }
12866 }
12867
12868 return (0);
12869}
12870
12871/*
12872 * DTrace Consumer State Functions
12873 */
fe8ab488 12874static int
2d21ac55
A
12875dtrace_dstate_init(dtrace_dstate_t *dstate, size_t size)
12876{
c910b4d9 12877 size_t hashsize, maxper, min_size, chunksize = dstate->dtds_chunksize;
2d21ac55
A
12878 void *base;
12879 uintptr_t limit;
12880 dtrace_dynvar_t *dvar, *next, *start;
b0d623f7 12881 size_t i;
2d21ac55
A
12882
12883 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
12884 ASSERT(dstate->dtds_base == NULL && dstate->dtds_percpu == NULL);
12885
12886 bzero(dstate, sizeof (dtrace_dstate_t));
12887
12888 if ((dstate->dtds_chunksize = chunksize) == 0)
12889 dstate->dtds_chunksize = DTRACE_DYNVAR_CHUNKSIZE;
12890
ecc0ceb4
A
12891 VERIFY(dstate->dtds_chunksize < (LONG_MAX - sizeof (dtrace_dynhash_t)));
12892
c910b4d9
A
12893 if (size < (min_size = dstate->dtds_chunksize + sizeof (dtrace_dynhash_t)))
12894 size = min_size;
2d21ac55
A
12895
12896 if ((base = kmem_zalloc(size, KM_NOSLEEP)) == NULL)
12897 return (ENOMEM);
12898
12899 dstate->dtds_size = size;
12900 dstate->dtds_base = base;
12901 dstate->dtds_percpu = kmem_cache_alloc(dtrace_state_cache, KM_SLEEP);
c910b4d9 12902 bzero(dstate->dtds_percpu, (int)NCPU * sizeof (dtrace_dstate_percpu_t));
2d21ac55
A
12903
12904 hashsize = size / (dstate->dtds_chunksize + sizeof (dtrace_dynhash_t));
12905
12906 if (hashsize != 1 && (hashsize & 1))
12907 hashsize--;
12908
12909 dstate->dtds_hashsize = hashsize;
12910 dstate->dtds_hash = dstate->dtds_base;
12911
12912 /*
12913 * Set all of our hash buckets to point to the single sink, and (if
12914 * it hasn't already been set), set the sink's hash value to be the
12915 * sink sentinel value. The sink is needed for dynamic variable
12916 * lookups to know that they have iterated over an entire, valid hash
12917 * chain.
12918 */
12919 for (i = 0; i < hashsize; i++)
12920 dstate->dtds_hash[i].dtdh_chain = &dtrace_dynhash_sink;
12921
12922 if (dtrace_dynhash_sink.dtdv_hashval != DTRACE_DYNHASH_SINK)
12923 dtrace_dynhash_sink.dtdv_hashval = DTRACE_DYNHASH_SINK;
12924
12925 /*
12926 * Determine number of active CPUs. Divide free list evenly among
12927 * active CPUs.
12928 */
12929 start = (dtrace_dynvar_t *)
12930 ((uintptr_t)base + hashsize * sizeof (dtrace_dynhash_t));
12931 limit = (uintptr_t)base + size;
12932
ecc0ceb4
A
12933 VERIFY((uintptr_t)start < limit);
12934 VERIFY((uintptr_t)start >= (uintptr_t)base);
12935
c910b4d9 12936 maxper = (limit - (uintptr_t)start) / (int)NCPU;
2d21ac55
A
12937 maxper = (maxper / dstate->dtds_chunksize) * dstate->dtds_chunksize;
12938
b0d623f7 12939 for (i = 0; i < NCPU; i++) {
2d21ac55
A
12940 dstate->dtds_percpu[i].dtdsc_free = dvar = start;
12941
12942 /*
12943 * If we don't even have enough chunks to make it once through
12944 * NCPUs, we're just going to allocate everything to the first
12945 * CPU. And if we're on the last CPU, we're going to allocate
12946 * whatever is left over. In either case, we set the limit to
12947 * be the limit of the dynamic variable space.
12948 */
b0d623f7 12949 if (maxper == 0 || i == NCPU - 1) {
2d21ac55
A
12950 limit = (uintptr_t)base + size;
12951 start = NULL;
12952 } else {
12953 limit = (uintptr_t)start + maxper;
12954 start = (dtrace_dynvar_t *)limit;
12955 }
12956
ecc0ceb4 12957 VERIFY(limit <= (uintptr_t)base + size);
2d21ac55
A
12958
12959 for (;;) {
12960 next = (dtrace_dynvar_t *)((uintptr_t)dvar +
12961 dstate->dtds_chunksize);
12962
12963 if ((uintptr_t)next + dstate->dtds_chunksize >= limit)
12964 break;
12965
ecc0ceb4
A
12966 VERIFY((uintptr_t)dvar >= (uintptr_t)base &&
12967 (uintptr_t)dvar <= (uintptr_t)base + size);
2d21ac55
A
12968 dvar->dtdv_next = next;
12969 dvar = next;
12970 }
12971
12972 if (maxper == 0)
12973 break;
12974 }
12975
12976 return (0);
12977}
12978
fe8ab488 12979static void
2d21ac55
A
12980dtrace_dstate_fini(dtrace_dstate_t *dstate)
12981{
12982 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
12983
12984 if (dstate->dtds_base == NULL)
12985 return;
12986
12987 kmem_free(dstate->dtds_base, dstate->dtds_size);
12988 kmem_cache_free(dtrace_state_cache, dstate->dtds_percpu);
12989}
12990
12991static void
12992dtrace_vstate_fini(dtrace_vstate_t *vstate)
12993{
12994 /*
12995 * Logical XOR, where are you?
12996 */
12997 ASSERT((vstate->dtvs_nglobals == 0) ^ (vstate->dtvs_globals != NULL));
12998
12999 if (vstate->dtvs_nglobals > 0) {
13000 kmem_free(vstate->dtvs_globals, vstate->dtvs_nglobals *
13001 sizeof (dtrace_statvar_t *));
13002 }
13003
13004 if (vstate->dtvs_ntlocals > 0) {
13005 kmem_free(vstate->dtvs_tlocals, vstate->dtvs_ntlocals *
13006 sizeof (dtrace_difv_t));
13007 }
13008
13009 ASSERT((vstate->dtvs_nlocals == 0) ^ (vstate->dtvs_locals != NULL));
13010
13011 if (vstate->dtvs_nlocals > 0) {
13012 kmem_free(vstate->dtvs_locals, vstate->dtvs_nlocals *
13013 sizeof (dtrace_statvar_t *));
13014 }
13015}
13016
13017static void
13018dtrace_state_clean(dtrace_state_t *state)
13019{
13020 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE)
13021 return;
13022
13023 dtrace_dynvar_clean(&state->dts_vstate.dtvs_dynvars);
13024 dtrace_speculation_clean(state);
13025}
13026
13027static void
13028dtrace_state_deadman(dtrace_state_t *state)
13029{
13030 hrtime_t now;
13031
13032 dtrace_sync();
13033
13034 now = dtrace_gethrtime();
13035
13036 if (state != dtrace_anon.dta_state &&
13037 now - state->dts_laststatus >= dtrace_deadman_user)
13038 return;
13039
13040 /*
13041 * We must be sure that dts_alive never appears to be less than the
13042 * value upon entry to dtrace_state_deadman(), and because we lack a
13043 * dtrace_cas64(), we cannot store to it atomically. We thus instead
13044 * store INT64_MAX to it, followed by a memory barrier, followed by
13045 * the new value. This assures that dts_alive never appears to be
13046 * less than its true value, regardless of the order in which the
13047 * stores to the underlying storage are issued.
13048 */
13049 state->dts_alive = INT64_MAX;
13050 dtrace_membar_producer();
13051 state->dts_alive = now;
13052}
13053
b0d623f7
A
13054static int
13055dtrace_state_create(dev_t *devp, cred_t *cr, dtrace_state_t **new_state)
2d21ac55
A
13056{
13057 minor_t minor;
13058 major_t major;
13059 char c[30];
13060 dtrace_state_t *state;
13061 dtrace_optval_t *opt;
c910b4d9 13062 int bufsize = (int)NCPU * sizeof (dtrace_buffer_t), i;
2d21ac55
A
13063
13064 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13065 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
13066
b0d623f7
A
13067 /* Cause restart */
13068 *new_state = NULL;
13069
39037602 13070 minor = getminor(*devp);
2d21ac55 13071
39037602
A
13072 state = dtrace_state_allocate(minor);
13073 if (NULL == state) {
13074 printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor);
13075 return (ERESTART); /* can't reacquire */
2d21ac55
A
13076 }
13077
2d21ac55
A
13078 state->dts_epid = DTRACE_EPIDNONE + 1;
13079
13080 (void) snprintf(c, sizeof (c), "dtrace_aggid_%d", minor);
13081 state->dts_aggid_arena = vmem_create(c, (void *)1, UINT32_MAX, 1,
13082 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
13083
13084 if (devp != NULL) {
13085 major = getemajor(*devp);
13086 } else {
13087 major = ddi_driver_major(dtrace_devi);
13088 }
13089
13090 state->dts_dev = makedevice(major, minor);
13091
13092 if (devp != NULL)
13093 *devp = state->dts_dev;
13094
13095 /*
13096 * We allocate NCPU buffers. On the one hand, this can be quite
13097 * a bit of memory per instance (nearly 36K on a Starcat). On the
13098 * other hand, it saves an additional memory reference in the probe
13099 * path.
13100 */
13101 state->dts_buffer = kmem_zalloc(bufsize, KM_SLEEP);
13102 state->dts_aggbuffer = kmem_zalloc(bufsize, KM_SLEEP);
39037602 13103 state->dts_buf_over_limit = 0;
2d21ac55
A
13104 state->dts_cleaner = CYCLIC_NONE;
13105 state->dts_deadman = CYCLIC_NONE;
13106 state->dts_vstate.dtvs_state = state;
13107
13108 for (i = 0; i < DTRACEOPT_MAX; i++)
13109 state->dts_options[i] = DTRACEOPT_UNSET;
13110
13111 /*
13112 * Set the default options.
13113 */
13114 opt = state->dts_options;
13115 opt[DTRACEOPT_BUFPOLICY] = DTRACEOPT_BUFPOLICY_SWITCH;
13116 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_AUTO;
13117 opt[DTRACEOPT_NSPEC] = dtrace_nspec_default;
13118 opt[DTRACEOPT_SPECSIZE] = dtrace_specsize_default;
13119 opt[DTRACEOPT_CPU] = (dtrace_optval_t)DTRACE_CPUALL;
13120 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_default;
13121 opt[DTRACEOPT_STACKFRAMES] = dtrace_stackframes_default;
13122 opt[DTRACEOPT_USTACKFRAMES] = dtrace_ustackframes_default;
13123 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_default;
13124 opt[DTRACEOPT_AGGRATE] = dtrace_aggrate_default;
13125 opt[DTRACEOPT_SWITCHRATE] = dtrace_switchrate_default;
13126 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_default;
13127 opt[DTRACEOPT_JSTACKFRAMES] = dtrace_jstackframes_default;
13128 opt[DTRACEOPT_JSTACKSTRSIZE] = dtrace_jstackstrsize_default;
39037602 13129 opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_default;
2d21ac55
A
13130
13131 /*
13132 * Depending on the user credentials, we set flag bits which alter probe
13133 * visibility or the amount of destructiveness allowed. In the case of
13134 * actual anonymous tracing, or the possession of all privileges, all of
13135 * the normal checks are bypassed.
13136 */
39037602
A
13137#if defined(__APPLE__)
13138 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
13139 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) {
13140 /*
13141 * Allow only proc credentials when DTrace is
13142 * restricted by the current security policy
13143 */
13144 state->dts_cred.dcr_visible = DTRACE_CRV_ALLPROC;
13145 state->dts_cred.dcr_action = DTRACE_CRA_PROC | DTRACE_CRA_PROC_CONTROL | DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13146 }
13147 else {
13148 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
13149 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
13150 }
13151 }
13152
13153#else
2d21ac55
A
13154 if (cr == NULL || PRIV_POLICY_ONLY(cr, PRIV_ALL, B_FALSE)) {
13155 state->dts_cred.dcr_visible = DTRACE_CRV_ALL;
13156 state->dts_cred.dcr_action = DTRACE_CRA_ALL;
39037602
A
13157 }
13158 else {
2d21ac55
A
13159 /*
13160 * Set up the credentials for this instantiation. We take a
13161 * hold on the credential to prevent it from disappearing on
13162 * us; this in turn prevents the zone_t referenced by this
13163 * credential from disappearing. This means that we can
13164 * examine the credential and the zone from probe context.
13165 */
13166 crhold(cr);
13167 state->dts_cred.dcr_cred = cr;
13168
13169 /*
13170 * CRA_PROC means "we have *some* privilege for dtrace" and
13171 * unlocks the use of variables like pid, zonename, etc.
13172 */
13173 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE) ||
13174 PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
13175 state->dts_cred.dcr_action |= DTRACE_CRA_PROC;
13176 }
13177
13178 /*
13179 * dtrace_user allows use of syscall and profile providers.
13180 * If the user also has proc_owner and/or proc_zone, we
13181 * extend the scope to include additional visibility and
13182 * destructive power.
13183 */
13184 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_USER, B_FALSE)) {
13185 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE)) {
13186 state->dts_cred.dcr_visible |=
13187 DTRACE_CRV_ALLPROC;
13188
13189 state->dts_cred.dcr_action |=
13190 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13191 }
13192
13193 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE)) {
13194 state->dts_cred.dcr_visible |=
13195 DTRACE_CRV_ALLZONE;
13196
13197 state->dts_cred.dcr_action |=
13198 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13199 }
13200
13201 /*
13202 * If we have all privs in whatever zone this is,
13203 * we can do destructive things to processes which
13204 * have altered credentials.
fe8ab488
A
13205 *
13206 * APPLE NOTE: Darwin doesn't do zones.
13207 * Behave as if zone always has destructive privs.
2d21ac55 13208 */
fe8ab488 13209
2d21ac55
A
13210 state->dts_cred.dcr_action |=
13211 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
2d21ac55
A
13212 }
13213
13214 /*
13215 * Holding the dtrace_kernel privilege also implies that
13216 * the user has the dtrace_user privilege from a visibility
13217 * perspective. But without further privileges, some
13218 * destructive actions are not available.
13219 */
13220 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_KERNEL, B_FALSE)) {
13221 /*
13222 * Make all probes in all zones visible. However,
13223 * this doesn't mean that all actions become available
13224 * to all zones.
13225 */
13226 state->dts_cred.dcr_visible |= DTRACE_CRV_KERNEL |
13227 DTRACE_CRV_ALLPROC | DTRACE_CRV_ALLZONE;
13228
13229 state->dts_cred.dcr_action |= DTRACE_CRA_KERNEL |
13230 DTRACE_CRA_PROC;
13231 /*
13232 * Holding proc_owner means that destructive actions
13233 * for *this* zone are allowed.
13234 */
13235 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
13236 state->dts_cred.dcr_action |=
13237 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13238
13239 /*
13240 * Holding proc_zone means that destructive actions
13241 * for this user/group ID in all zones is allowed.
13242 */
13243 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
13244 state->dts_cred.dcr_action |=
13245 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13246
13247 /*
13248 * If we have all privs in whatever zone this is,
13249 * we can do destructive things to processes which
13250 * have altered credentials.
fe8ab488
A
13251 *
13252 * APPLE NOTE: Darwin doesn't do zones.
13253 * Behave as if zone always has destructive privs.
13254 */
2d21ac55
A
13255 state->dts_cred.dcr_action |=
13256 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG;
2d21ac55
A
13257 }
13258
13259 /*
13260 * Holding the dtrace_proc privilege gives control over fasttrap
13261 * and pid providers. We need to grant wider destructive
13262 * privileges in the event that the user has proc_owner and/or
13263 * proc_zone.
13264 */
13265 if (PRIV_POLICY_ONLY(cr, PRIV_DTRACE_PROC, B_FALSE)) {
13266 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_OWNER, B_FALSE))
13267 state->dts_cred.dcr_action |=
13268 DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER;
13269
13270 if (PRIV_POLICY_ONLY(cr, PRIV_PROC_ZONE, B_FALSE))
13271 state->dts_cred.dcr_action |=
13272 DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE;
13273 }
13274 }
39037602 13275#endif
2d21ac55 13276
b0d623f7
A
13277 *new_state = state;
13278 return(0); /* Success */
2d21ac55
A
13279}
13280
13281static int
13282dtrace_state_buffer(dtrace_state_t *state, dtrace_buffer_t *buf, int which)
13283{
13284 dtrace_optval_t *opt = state->dts_options, size;
c910b4d9 13285 processorid_t cpu = 0;
39037602 13286 size_t limit = buf->dtb_size;
2d21ac55
A
13287 int flags = 0, rval;
13288
13289 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13290 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
13291 ASSERT(which < DTRACEOPT_MAX);
13292 ASSERT(state->dts_activity == DTRACE_ACTIVITY_INACTIVE ||
13293 (state == dtrace_anon.dta_state &&
13294 state->dts_activity == DTRACE_ACTIVITY_ACTIVE));
13295
13296 if (opt[which] == DTRACEOPT_UNSET || opt[which] == 0)
13297 return (0);
13298
13299 if (opt[DTRACEOPT_CPU] != DTRACEOPT_UNSET)
13300 cpu = opt[DTRACEOPT_CPU];
13301
13302 if (which == DTRACEOPT_SPECSIZE)
13303 flags |= DTRACEBUF_NOSWITCH;
13304
13305 if (which == DTRACEOPT_BUFSIZE) {
13306 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_RING)
13307 flags |= DTRACEBUF_RING;
13308
13309 if (opt[DTRACEOPT_BUFPOLICY] == DTRACEOPT_BUFPOLICY_FILL)
13310 flags |= DTRACEBUF_FILL;
13311
13312 if (state != dtrace_anon.dta_state ||
13313 state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
13314 flags |= DTRACEBUF_INACTIVE;
13315 }
13316
b0d623f7 13317 for (size = opt[which]; (size_t)size >= sizeof (uint64_t); size >>= 1) {
2d21ac55
A
13318 /*
13319 * The size must be 8-byte aligned. If the size is not 8-byte
13320 * aligned, drop it down by the difference.
13321 */
13322 if (size & (sizeof (uint64_t) - 1))
13323 size -= size & (sizeof (uint64_t) - 1);
13324
13325 if (size < state->dts_reserve) {
13326 /*
13327 * Buffers always must be large enough to accommodate
13328 * their prereserved space. We return E2BIG instead
13329 * of ENOMEM in this case to allow for user-level
13330 * software to differentiate the cases.
13331 */
13332 return (E2BIG);
13333 }
39037602
A
13334 limit = opt[DTRACEOPT_BUFLIMIT] * size / 100;
13335 rval = dtrace_buffer_alloc(buf, limit, size, flags, cpu);
2d21ac55
A
13336
13337 if (rval != ENOMEM) {
13338 opt[which] = size;
13339 return (rval);
13340 }
13341
13342 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13343 return (rval);
13344 }
13345
13346 return (ENOMEM);
13347}
13348
13349static int
13350dtrace_state_buffers(dtrace_state_t *state)
13351{
13352 dtrace_speculation_t *spec = state->dts_speculations;
13353 int rval, i;
13354
13355 if ((rval = dtrace_state_buffer(state, state->dts_buffer,
13356 DTRACEOPT_BUFSIZE)) != 0)
13357 return (rval);
13358
13359 if ((rval = dtrace_state_buffer(state, state->dts_aggbuffer,
13360 DTRACEOPT_AGGSIZE)) != 0)
13361 return (rval);
13362
13363 for (i = 0; i < state->dts_nspeculations; i++) {
13364 if ((rval = dtrace_state_buffer(state,
13365 spec[i].dtsp_buffer, DTRACEOPT_SPECSIZE)) != 0)
13366 return (rval);
13367 }
13368
13369 return (0);
13370}
13371
13372static void
13373dtrace_state_prereserve(dtrace_state_t *state)
13374{
13375 dtrace_ecb_t *ecb;
13376 dtrace_probe_t *probe;
13377
13378 state->dts_reserve = 0;
13379
13380 if (state->dts_options[DTRACEOPT_BUFPOLICY] != DTRACEOPT_BUFPOLICY_FILL)
13381 return;
13382
13383 /*
13384 * If our buffer policy is a "fill" buffer policy, we need to set the
13385 * prereserved space to be the space required by the END probes.
13386 */
13387 probe = dtrace_probes[dtrace_probeid_end - 1];
13388 ASSERT(probe != NULL);
13389
13390 for (ecb = probe->dtpr_ecb; ecb != NULL; ecb = ecb->dte_next) {
13391 if (ecb->dte_state != state)
13392 continue;
13393
13394 state->dts_reserve += ecb->dte_needed + ecb->dte_alignment;
13395 }
13396}
13397
13398static int
13399dtrace_state_go(dtrace_state_t *state, processorid_t *cpu)
13400{
13401 dtrace_optval_t *opt = state->dts_options, sz, nspec;
13402 dtrace_speculation_t *spec;
13403 dtrace_buffer_t *buf;
13404 cyc_handler_t hdlr;
13405 cyc_time_t when;
c910b4d9 13406 int rval = 0, i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t);
2d21ac55
A
13407 dtrace_icookie_t cookie;
13408
13409 lck_mtx_lock(&cpu_lock);
13410 lck_mtx_lock(&dtrace_lock);
13411
13412 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
13413 rval = EBUSY;
13414 goto out;
13415 }
13416
13417 /*
13418 * Before we can perform any checks, we must prime all of the
13419 * retained enablings that correspond to this state.
13420 */
13421 dtrace_enabling_prime(state);
13422
13423 if (state->dts_destructive && !state->dts_cred.dcr_destructive) {
13424 rval = EACCES;
13425 goto out;
13426 }
13427
13428 dtrace_state_prereserve(state);
13429
13430 /*
13431 * Now we want to do is try to allocate our speculations.
13432 * We do not automatically resize the number of speculations; if
13433 * this fails, we will fail the operation.
13434 */
13435 nspec = opt[DTRACEOPT_NSPEC];
13436 ASSERT(nspec != DTRACEOPT_UNSET);
13437
13438 if (nspec > INT_MAX) {
13439 rval = ENOMEM;
13440 goto out;
13441 }
13442
13443 spec = kmem_zalloc(nspec * sizeof (dtrace_speculation_t), KM_NOSLEEP);
13444
13445 if (spec == NULL) {
13446 rval = ENOMEM;
13447 goto out;
13448 }
13449
13450 state->dts_speculations = spec;
13451 state->dts_nspeculations = (int)nspec;
13452
13453 for (i = 0; i < nspec; i++) {
13454 if ((buf = kmem_zalloc(bufsize, KM_NOSLEEP)) == NULL) {
13455 rval = ENOMEM;
13456 goto err;
13457 }
13458
13459 spec[i].dtsp_buffer = buf;
13460 }
13461
13462 if (opt[DTRACEOPT_GRABANON] != DTRACEOPT_UNSET) {
13463 if (dtrace_anon.dta_state == NULL) {
13464 rval = ENOENT;
13465 goto out;
13466 }
13467
13468 if (state->dts_necbs != 0) {
13469 rval = EALREADY;
13470 goto out;
13471 }
13472
13473 state->dts_anon = dtrace_anon_grab();
13474 ASSERT(state->dts_anon != NULL);
13475 state = state->dts_anon;
13476
13477 /*
13478 * We want "grabanon" to be set in the grabbed state, so we'll
13479 * copy that option value from the grabbing state into the
13480 * grabbed state.
13481 */
13482 state->dts_options[DTRACEOPT_GRABANON] =
13483 opt[DTRACEOPT_GRABANON];
13484
13485 *cpu = dtrace_anon.dta_beganon;
13486
13487 /*
13488 * If the anonymous state is active (as it almost certainly
13489 * is if the anonymous enabling ultimately matched anything),
13490 * we don't allow any further option processing -- but we
13491 * don't return failure.
13492 */
13493 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13494 goto out;
13495 }
13496
13497 if (opt[DTRACEOPT_AGGSIZE] != DTRACEOPT_UNSET &&
13498 opt[DTRACEOPT_AGGSIZE] != 0) {
13499 if (state->dts_aggregations == NULL) {
13500 /*
13501 * We're not going to create an aggregation buffer
13502 * because we don't have any ECBs that contain
13503 * aggregations -- set this option to 0.
13504 */
13505 opt[DTRACEOPT_AGGSIZE] = 0;
13506 } else {
13507 /*
13508 * If we have an aggregation buffer, we must also have
13509 * a buffer to use as scratch.
13510 */
b0d623f7
A
13511 if (opt[DTRACEOPT_BUFSIZE] == DTRACEOPT_UNSET ||
13512 (size_t)opt[DTRACEOPT_BUFSIZE] < state->dts_needed) {
13513 opt[DTRACEOPT_BUFSIZE] = state->dts_needed;
13514 }
2d21ac55
A
13515 }
13516 }
13517
13518 if (opt[DTRACEOPT_SPECSIZE] != DTRACEOPT_UNSET &&
13519 opt[DTRACEOPT_SPECSIZE] != 0) {
13520 if (!state->dts_speculates) {
13521 /*
13522 * We're not going to create speculation buffers
13523 * because we don't have any ECBs that actually
13524 * speculate -- set the speculation size to 0.
13525 */
13526 opt[DTRACEOPT_SPECSIZE] = 0;
13527 }
13528 }
13529
13530 /*
13531 * The bare minimum size for any buffer that we're actually going to
13532 * do anything to is sizeof (uint64_t).
13533 */
13534 sz = sizeof (uint64_t);
13535
13536 if ((state->dts_needed != 0 && opt[DTRACEOPT_BUFSIZE] < sz) ||
13537 (state->dts_speculates && opt[DTRACEOPT_SPECSIZE] < sz) ||
13538 (state->dts_aggregations != NULL && opt[DTRACEOPT_AGGSIZE] < sz)) {
13539 /*
13540 * A buffer size has been explicitly set to 0 (or to a size
13541 * that will be adjusted to 0) and we need the space -- we
13542 * need to return failure. We return ENOSPC to differentiate
13543 * it from failing to allocate a buffer due to failure to meet
13544 * the reserve (for which we return E2BIG).
13545 */
13546 rval = ENOSPC;
13547 goto out;
13548 }
13549
13550 if ((rval = dtrace_state_buffers(state)) != 0)
13551 goto err;
13552
13553 if ((sz = opt[DTRACEOPT_DYNVARSIZE]) == DTRACEOPT_UNSET)
13554 sz = dtrace_dstate_defsize;
13555
13556 do {
13557 rval = dtrace_dstate_init(&state->dts_vstate.dtvs_dynvars, sz);
13558
13559 if (rval == 0)
13560 break;
13561
13562 if (opt[DTRACEOPT_BUFRESIZE] == DTRACEOPT_BUFRESIZE_MANUAL)
13563 goto err;
13564 } while (sz >>= 1);
13565
13566 opt[DTRACEOPT_DYNVARSIZE] = sz;
13567
13568 if (rval != 0)
13569 goto err;
13570
13571 if (opt[DTRACEOPT_STATUSRATE] > dtrace_statusrate_max)
13572 opt[DTRACEOPT_STATUSRATE] = dtrace_statusrate_max;
13573
13574 if (opt[DTRACEOPT_CLEANRATE] == 0)
13575 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13576
13577 if (opt[DTRACEOPT_CLEANRATE] < dtrace_cleanrate_min)
13578 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_min;
13579
13580 if (opt[DTRACEOPT_CLEANRATE] > dtrace_cleanrate_max)
13581 opt[DTRACEOPT_CLEANRATE] = dtrace_cleanrate_max;
13582
39037602
A
13583 if (opt[DTRACEOPT_STRSIZE] > dtrace_strsize_max)
13584 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_max;
13585
13586 if (opt[DTRACEOPT_STRSIZE] < dtrace_strsize_min)
13587 opt[DTRACEOPT_STRSIZE] = dtrace_strsize_min;
13588
13589 if (opt[DTRACEOPT_BUFLIMIT] > dtrace_buflimit_max)
13590 opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_max;
13591
13592 if (opt[DTRACEOPT_BUFLIMIT] < dtrace_buflimit_min)
13593 opt[DTRACEOPT_BUFLIMIT] = dtrace_buflimit_min;
13594
2d21ac55
A
13595 hdlr.cyh_func = (cyc_func_t)dtrace_state_clean;
13596 hdlr.cyh_arg = state;
13597 hdlr.cyh_level = CY_LOW_LEVEL;
13598
13599 when.cyt_when = 0;
13600 when.cyt_interval = opt[DTRACEOPT_CLEANRATE];
13601
13602 state->dts_cleaner = cyclic_add(&hdlr, &when);
13603
13604 hdlr.cyh_func = (cyc_func_t)dtrace_state_deadman;
13605 hdlr.cyh_arg = state;
13606 hdlr.cyh_level = CY_LOW_LEVEL;
13607
13608 when.cyt_when = 0;
13609 when.cyt_interval = dtrace_deadman_interval;
13610
13611 state->dts_alive = state->dts_laststatus = dtrace_gethrtime();
13612 state->dts_deadman = cyclic_add(&hdlr, &when);
13613
13614 state->dts_activity = DTRACE_ACTIVITY_WARMUP;
13615
13616 /*
13617 * Now it's time to actually fire the BEGIN probe. We need to disable
13618 * interrupts here both to record the CPU on which we fired the BEGIN
13619 * probe (the data from this CPU will be processed first at user
13620 * level) and to manually activate the buffer for this CPU.
13621 */
13622 cookie = dtrace_interrupt_disable();
13623 *cpu = CPU->cpu_id;
13624 ASSERT(state->dts_buffer[*cpu].dtb_flags & DTRACEBUF_INACTIVE);
13625 state->dts_buffer[*cpu].dtb_flags &= ~DTRACEBUF_INACTIVE;
13626
13627 dtrace_probe(dtrace_probeid_begin,
13628 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13629 dtrace_interrupt_enable(cookie);
13630 /*
13631 * We may have had an exit action from a BEGIN probe; only change our
13632 * state to ACTIVE if we're still in WARMUP.
13633 */
13634 ASSERT(state->dts_activity == DTRACE_ACTIVITY_WARMUP ||
13635 state->dts_activity == DTRACE_ACTIVITY_DRAINING);
13636
13637 if (state->dts_activity == DTRACE_ACTIVITY_WARMUP)
13638 state->dts_activity = DTRACE_ACTIVITY_ACTIVE;
13639
13640 /*
13641 * Regardless of whether or not now we're in ACTIVE or DRAINING, we
13642 * want each CPU to transition its principal buffer out of the
13643 * INACTIVE state. Doing this assures that no CPU will suddenly begin
13644 * processing an ECB halfway down a probe's ECB chain; all CPUs will
13645 * atomically transition from processing none of a state's ECBs to
13646 * processing all of them.
13647 */
13648 dtrace_xcall(DTRACE_CPUALL,
13649 (dtrace_xcall_t)dtrace_buffer_activate, state);
13650 goto out;
13651
13652err:
13653 dtrace_buffer_free(state->dts_buffer);
13654 dtrace_buffer_free(state->dts_aggbuffer);
13655
13656 if ((nspec = state->dts_nspeculations) == 0) {
13657 ASSERT(state->dts_speculations == NULL);
13658 goto out;
13659 }
13660
13661 spec = state->dts_speculations;
13662 ASSERT(spec != NULL);
13663
13664 for (i = 0; i < state->dts_nspeculations; i++) {
13665 if ((buf = spec[i].dtsp_buffer) == NULL)
13666 break;
13667
13668 dtrace_buffer_free(buf);
13669 kmem_free(buf, bufsize);
13670 }
13671
13672 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13673 state->dts_nspeculations = 0;
13674 state->dts_speculations = NULL;
13675
13676out:
13677 lck_mtx_unlock(&dtrace_lock);
13678 lck_mtx_unlock(&cpu_lock);
13679
13680 return (rval);
13681}
13682
13683static int
13684dtrace_state_stop(dtrace_state_t *state, processorid_t *cpu)
13685{
13686 dtrace_icookie_t cookie;
13687
13688 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13689
13690 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE &&
13691 state->dts_activity != DTRACE_ACTIVITY_DRAINING)
13692 return (EINVAL);
13693
13694 /*
13695 * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync
13696 * to be sure that every CPU has seen it. See below for the details
13697 * on why this is done.
13698 */
13699 state->dts_activity = DTRACE_ACTIVITY_DRAINING;
13700 dtrace_sync();
13701
13702 /*
13703 * By this point, it is impossible for any CPU to be still processing
13704 * with DTRACE_ACTIVITY_ACTIVE. We can thus set our activity to
13705 * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any
13706 * other CPU in dtrace_buffer_reserve(). This allows dtrace_probe()
13707 * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN
13708 * iff we're in the END probe.
13709 */
13710 state->dts_activity = DTRACE_ACTIVITY_COOLDOWN;
13711 dtrace_sync();
13712 ASSERT(state->dts_activity == DTRACE_ACTIVITY_COOLDOWN);
13713
13714 /*
13715 * Finally, we can release the reserve and call the END probe. We
13716 * disable interrupts across calling the END probe to allow us to
13717 * return the CPU on which we actually called the END probe. This
13718 * allows user-land to be sure that this CPU's principal buffer is
13719 * processed last.
13720 */
13721 state->dts_reserve = 0;
13722
13723 cookie = dtrace_interrupt_disable();
13724 *cpu = CPU->cpu_id;
13725 dtrace_probe(dtrace_probeid_end,
13726 (uint64_t)(uintptr_t)state, 0, 0, 0, 0);
13727 dtrace_interrupt_enable(cookie);
13728
13729 state->dts_activity = DTRACE_ACTIVITY_STOPPED;
13730 dtrace_sync();
13731
13732 return (0);
13733}
13734
13735static int
13736dtrace_state_option(dtrace_state_t *state, dtrace_optid_t option,
13737 dtrace_optval_t val)
13738{
13739 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13740
13741 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE)
13742 return (EBUSY);
13743
13744 if (option >= DTRACEOPT_MAX)
13745 return (EINVAL);
13746
13747 if (option != DTRACEOPT_CPU && val < 0)
13748 return (EINVAL);
13749
13750 switch (option) {
13751 case DTRACEOPT_DESTRUCTIVE:
fe8ab488
A
13752 /*
13753 * Prevent consumers from enabling destructive actions if DTrace
13754 * is running in a restricted environment, or if actions are
13755 * disallowed.
13756 */
13757 if (dtrace_is_restricted() || dtrace_destructive_disallow)
2d21ac55
A
13758 return (EACCES);
13759
13760 state->dts_cred.dcr_destructive = 1;
13761 break;
13762
13763 case DTRACEOPT_BUFSIZE:
13764 case DTRACEOPT_DYNVARSIZE:
13765 case DTRACEOPT_AGGSIZE:
13766 case DTRACEOPT_SPECSIZE:
13767 case DTRACEOPT_STRSIZE:
13768 if (val < 0)
13769 return (EINVAL);
13770
13771 if (val >= LONG_MAX) {
13772 /*
13773 * If this is an otherwise negative value, set it to
13774 * the highest multiple of 128m less than LONG_MAX.
13775 * Technically, we're adjusting the size without
13776 * regard to the buffer resizing policy, but in fact,
13777 * this has no effect -- if we set the buffer size to
13778 * ~LONG_MAX and the buffer policy is ultimately set to
13779 * be "manual", the buffer allocation is guaranteed to
13780 * fail, if only because the allocation requires two
13781 * buffers. (We set the the size to the highest
13782 * multiple of 128m because it ensures that the size
13783 * will remain a multiple of a megabyte when
13784 * repeatedly halved -- all the way down to 15m.)
13785 */
13786 val = LONG_MAX - (1 << 27) + 1;
13787 }
13788 }
13789
13790 state->dts_options[option] = val;
13791
13792 return (0);
13793}
13794
13795static void
13796dtrace_state_destroy(dtrace_state_t *state)
13797{
13798 dtrace_ecb_t *ecb;
13799 dtrace_vstate_t *vstate = &state->dts_vstate;
13800 minor_t minor = getminor(state->dts_dev);
c910b4d9 13801 int i, bufsize = (int)NCPU * sizeof (dtrace_buffer_t);
2d21ac55
A
13802 dtrace_speculation_t *spec = state->dts_speculations;
13803 int nspec = state->dts_nspeculations;
13804 uint32_t match;
13805
13806 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13807 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
13808
13809 /*
13810 * First, retract any retained enablings for this state.
13811 */
13812 dtrace_enabling_retract(state);
13813 ASSERT(state->dts_nretained == 0);
13814
13815 if (state->dts_activity == DTRACE_ACTIVITY_ACTIVE ||
13816 state->dts_activity == DTRACE_ACTIVITY_DRAINING) {
13817 /*
13818 * We have managed to come into dtrace_state_destroy() on a
13819 * hot enabling -- almost certainly because of a disorderly
13820 * shutdown of a consumer. (That is, a consumer that is
13821 * exiting without having called dtrace_stop().) In this case,
13822 * we're going to set our activity to be KILLED, and then
13823 * issue a sync to be sure that everyone is out of probe
13824 * context before we start blowing away ECBs.
13825 */
13826 state->dts_activity = DTRACE_ACTIVITY_KILLED;
13827 dtrace_sync();
13828 }
13829
13830 /*
13831 * Release the credential hold we took in dtrace_state_create().
13832 */
13833 if (state->dts_cred.dcr_cred != NULL)
13834 crfree(state->dts_cred.dcr_cred);
13835
13836 /*
13837 * Now we can safely disable and destroy any enabled probes. Because
13838 * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress
13839 * (especially if they're all enabled), we take two passes through the
13840 * ECBs: in the first, we disable just DTRACE_PRIV_KERNEL probes, and
13841 * in the second we disable whatever is left over.
13842 */
13843 for (match = DTRACE_PRIV_KERNEL; ; match = 0) {
13844 for (i = 0; i < state->dts_necbs; i++) {
13845 if ((ecb = state->dts_ecbs[i]) == NULL)
13846 continue;
13847
13848 if (match && ecb->dte_probe != NULL) {
13849 dtrace_probe_t *probe = ecb->dte_probe;
13850 dtrace_provider_t *prov = probe->dtpr_provider;
13851
13852 if (!(prov->dtpv_priv.dtpp_flags & match))
13853 continue;
13854 }
13855
13856 dtrace_ecb_disable(ecb);
13857 dtrace_ecb_destroy(ecb);
13858 }
13859
13860 if (!match)
13861 break;
13862 }
13863
13864 /*
13865 * Before we free the buffers, perform one more sync to assure that
13866 * every CPU is out of probe context.
13867 */
13868 dtrace_sync();
13869
13870 dtrace_buffer_free(state->dts_buffer);
13871 dtrace_buffer_free(state->dts_aggbuffer);
13872
13873 for (i = 0; i < nspec; i++)
13874 dtrace_buffer_free(spec[i].dtsp_buffer);
13875
13876 if (state->dts_cleaner != CYCLIC_NONE)
13877 cyclic_remove(state->dts_cleaner);
13878
13879 if (state->dts_deadman != CYCLIC_NONE)
13880 cyclic_remove(state->dts_deadman);
13881
13882 dtrace_dstate_fini(&vstate->dtvs_dynvars);
13883 dtrace_vstate_fini(vstate);
13884 kmem_free(state->dts_ecbs, state->dts_necbs * sizeof (dtrace_ecb_t *));
13885
13886 if (state->dts_aggregations != NULL) {
b0d623f7 13887#if DEBUG
2d21ac55
A
13888 for (i = 0; i < state->dts_naggregations; i++)
13889 ASSERT(state->dts_aggregations[i] == NULL);
13890#endif
13891 ASSERT(state->dts_naggregations > 0);
13892 kmem_free(state->dts_aggregations,
13893 state->dts_naggregations * sizeof (dtrace_aggregation_t *));
13894 }
13895
13896 kmem_free(state->dts_buffer, bufsize);
13897 kmem_free(state->dts_aggbuffer, bufsize);
13898
13899 for (i = 0; i < nspec; i++)
13900 kmem_free(spec[i].dtsp_buffer, bufsize);
13901
13902 kmem_free(spec, nspec * sizeof (dtrace_speculation_t));
13903
13904 dtrace_format_destroy(state);
13905
13906 vmem_destroy(state->dts_aggid_arena);
39037602 13907 dtrace_state_free(minor);
2d21ac55
A
13908}
13909
13910/*
13911 * DTrace Anonymous Enabling Functions
13912 */
13913static dtrace_state_t *
13914dtrace_anon_grab(void)
13915{
13916 dtrace_state_t *state;
13917
13918 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13919
13920 if ((state = dtrace_anon.dta_state) == NULL) {
13921 ASSERT(dtrace_anon.dta_enabling == NULL);
13922 return (NULL);
13923 }
13924
13925 ASSERT(dtrace_anon.dta_enabling != NULL);
13926 ASSERT(dtrace_retained != NULL);
13927
13928 dtrace_enabling_destroy(dtrace_anon.dta_enabling);
13929 dtrace_anon.dta_enabling = NULL;
13930 dtrace_anon.dta_state = NULL;
13931
13932 return (state);
13933}
13934
13935static void
13936dtrace_anon_property(void)
13937{
13938 int i, rv;
13939 dtrace_state_t *state;
13940 dof_hdr_t *dof;
13941 char c[32]; /* enough for "dof-data-" + digits */
13942
13943 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
13944 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
13945
13946 for (i = 0; ; i++) {
13947 (void) snprintf(c, sizeof (c), "dof-data-%d", i);
13948
13949 dtrace_err_verbose = 1;
13950
13951 if ((dof = dtrace_dof_property(c)) == NULL) {
13952 dtrace_err_verbose = 0;
13953 break;
13954 }
13955
13956 /*
13957 * We want to create anonymous state, so we need to transition
13958 * the kernel debugger to indicate that DTrace is active. If
13959 * this fails (e.g. because the debugger has modified text in
13960 * some way), we won't continue with the processing.
13961 */
13962 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
13963 cmn_err(CE_NOTE, "kernel debugger active; anonymous "
13964 "enabling ignored.");
13965 dtrace_dof_destroy(dof);
13966 break;
13967 }
13968
13969 /*
13970 * If we haven't allocated an anonymous state, we'll do so now.
13971 */
13972 if ((state = dtrace_anon.dta_state) == NULL) {
b0d623f7
A
13973 rv = dtrace_state_create(NULL, NULL, &state);
13974 dtrace_anon.dta_state = state;
13975 if (rv != 0 || state == NULL) {
2d21ac55
A
13976 /*
13977 * This basically shouldn't happen: the only
13978 * failure mode from dtrace_state_create() is a
13979 * failure of ddi_soft_state_zalloc() that
13980 * itself should never happen. Still, the
13981 * interface allows for a failure mode, and
13982 * we want to fail as gracefully as possible:
13983 * we'll emit an error message and cease
13984 * processing anonymous state in this case.
13985 */
13986 cmn_err(CE_WARN, "failed to create "
13987 "anonymous state");
13988 dtrace_dof_destroy(dof);
13989 break;
13990 }
13991 }
13992
13993 rv = dtrace_dof_slurp(dof, &state->dts_vstate, CRED(),
13994 &dtrace_anon.dta_enabling, 0, B_TRUE);
13995
13996 if (rv == 0)
13997 rv = dtrace_dof_options(dof, state);
13998
13999 dtrace_err_verbose = 0;
14000 dtrace_dof_destroy(dof);
14001
14002 if (rv != 0) {
14003 /*
14004 * This is malformed DOF; chuck any anonymous state
14005 * that we created.
14006 */
14007 ASSERT(dtrace_anon.dta_enabling == NULL);
14008 dtrace_state_destroy(state);
14009 dtrace_anon.dta_state = NULL;
14010 break;
14011 }
14012
14013 ASSERT(dtrace_anon.dta_enabling != NULL);
14014 }
14015
14016 if (dtrace_anon.dta_enabling != NULL) {
14017 int rval;
14018
14019 /*
14020 * dtrace_enabling_retain() can only fail because we are
14021 * trying to retain more enablings than are allowed -- but
14022 * we only have one anonymous enabling, and we are guaranteed
14023 * to be allowed at least one retained enabling; we assert
14024 * that dtrace_enabling_retain() returns success.
14025 */
14026 rval = dtrace_enabling_retain(dtrace_anon.dta_enabling);
14027 ASSERT(rval == 0);
14028
14029 dtrace_enabling_dump(dtrace_anon.dta_enabling);
14030 }
14031}
14032
14033/*
14034 * DTrace Helper Functions
14035 */
14036static void
14037dtrace_helper_trace(dtrace_helper_action_t *helper,
14038 dtrace_mstate_t *mstate, dtrace_vstate_t *vstate, int where)
14039{
b0d623f7
A
14040 uint32_t size, next, nnext;
14041 int i;
2d21ac55
A
14042 dtrace_helptrace_t *ent;
14043 uint16_t flags = cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
14044
14045 if (!dtrace_helptrace_enabled)
14046 return;
14047
b0d623f7 14048 ASSERT((uint32_t)vstate->dtvs_nlocals <= dtrace_helptrace_nlocals);
2d21ac55
A
14049
14050 /*
14051 * What would a tracing framework be without its own tracing
14052 * framework? (Well, a hell of a lot simpler, for starters...)
14053 */
14054 size = sizeof (dtrace_helptrace_t) + dtrace_helptrace_nlocals *
14055 sizeof (uint64_t) - sizeof (uint64_t);
14056
14057 /*
14058 * Iterate until we can allocate a slot in the trace buffer.
14059 */
14060 do {
14061 next = dtrace_helptrace_next;
14062
14063 if (next + size < dtrace_helptrace_bufsize) {
14064 nnext = next + size;
14065 } else {
14066 nnext = size;
14067 }
14068 } while (dtrace_cas32(&dtrace_helptrace_next, next, nnext) != next);
14069
14070 /*
14071 * We have our slot; fill it in.
14072 */
14073 if (nnext == size)
14074 next = 0;
14075
14076 ent = (dtrace_helptrace_t *)&dtrace_helptrace_buffer[next];
14077 ent->dtht_helper = helper;
14078 ent->dtht_where = where;
14079 ent->dtht_nlocals = vstate->dtvs_nlocals;
14080
14081 ent->dtht_fltoffs = (mstate->dtms_present & DTRACE_MSTATE_FLTOFFS) ?
14082 mstate->dtms_fltoffs : -1;
14083 ent->dtht_fault = DTRACE_FLAGS2FLT(flags);
14084 ent->dtht_illval = cpu_core[CPU->cpu_id].cpuc_dtrace_illval;
14085
14086 for (i = 0; i < vstate->dtvs_nlocals; i++) {
14087 dtrace_statvar_t *svar;
14088
14089 if ((svar = vstate->dtvs_locals[i]) == NULL)
14090 continue;
14091
c910b4d9 14092 ASSERT(svar->dtsv_size >= (int)NCPU * sizeof (uint64_t));
2d21ac55
A
14093 ent->dtht_locals[i] =
14094 ((uint64_t *)(uintptr_t)svar->dtsv_data)[CPU->cpu_id];
14095 }
14096}
14097
14098static uint64_t
14099dtrace_helper(int which, dtrace_mstate_t *mstate,
14100 dtrace_state_t *state, uint64_t arg0, uint64_t arg1)
14101{
14102 uint16_t *flags = &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;
14103 uint64_t sarg0 = mstate->dtms_arg[0];
14104 uint64_t sarg1 = mstate->dtms_arg[1];
c910b4d9 14105 uint64_t rval = 0;
2d21ac55
A
14106 dtrace_helpers_t *helpers = curproc->p_dtrace_helpers;
14107 dtrace_helper_action_t *helper;
14108 dtrace_vstate_t *vstate;
14109 dtrace_difo_t *pred;
14110 int i, trace = dtrace_helptrace_enabled;
14111
14112 ASSERT(which >= 0 && which < DTRACE_NHELPER_ACTIONS);
14113
14114 if (helpers == NULL)
14115 return (0);
14116
14117 if ((helper = helpers->dthps_actions[which]) == NULL)
14118 return (0);
14119
14120 vstate = &helpers->dthps_vstate;
14121 mstate->dtms_arg[0] = arg0;
14122 mstate->dtms_arg[1] = arg1;
14123
14124 /*
14125 * Now iterate over each helper. If its predicate evaluates to 'true',
14126 * we'll call the corresponding actions. Note that the below calls
14127 * to dtrace_dif_emulate() may set faults in machine state. This is
14128 * okay: our caller (the outer dtrace_dif_emulate()) will simply plow
14129 * the stored DIF offset with its own (which is the desired behavior).
14130 * Also, note the calls to dtrace_dif_emulate() may allocate scratch
14131 * from machine state; this is okay, too.
14132 */
14133 for (; helper != NULL; helper = helper->dtha_next) {
14134 if ((pred = helper->dtha_predicate) != NULL) {
14135 if (trace)
14136 dtrace_helper_trace(helper, mstate, vstate, 0);
14137
14138 if (!dtrace_dif_emulate(pred, mstate, vstate, state))
14139 goto next;
14140
14141 if (*flags & CPU_DTRACE_FAULT)
14142 goto err;
14143 }
14144
14145 for (i = 0; i < helper->dtha_nactions; i++) {
14146 if (trace)
14147 dtrace_helper_trace(helper,
14148 mstate, vstate, i + 1);
14149
14150 rval = dtrace_dif_emulate(helper->dtha_actions[i],
14151 mstate, vstate, state);
14152
14153 if (*flags & CPU_DTRACE_FAULT)
14154 goto err;
14155 }
14156
14157next:
14158 if (trace)
14159 dtrace_helper_trace(helper, mstate, vstate,
14160 DTRACE_HELPTRACE_NEXT);
14161 }
14162
14163 if (trace)
14164 dtrace_helper_trace(helper, mstate, vstate,
14165 DTRACE_HELPTRACE_DONE);
14166
14167 /*
14168 * Restore the arg0 that we saved upon entry.
14169 */
14170 mstate->dtms_arg[0] = sarg0;
14171 mstate->dtms_arg[1] = sarg1;
14172
14173 return (rval);
14174
14175err:
14176 if (trace)
14177 dtrace_helper_trace(helper, mstate, vstate,
14178 DTRACE_HELPTRACE_ERR);
14179
14180 /*
14181 * Restore the arg0 that we saved upon entry.
14182 */
14183 mstate->dtms_arg[0] = sarg0;
14184 mstate->dtms_arg[1] = sarg1;
14185
fe8ab488 14186 return (0);
2d21ac55
A
14187}
14188
14189static void
14190dtrace_helper_action_destroy(dtrace_helper_action_t *helper,
14191 dtrace_vstate_t *vstate)
14192{
14193 int i;
14194
14195 if (helper->dtha_predicate != NULL)
14196 dtrace_difo_release(helper->dtha_predicate, vstate);
14197
14198 for (i = 0; i < helper->dtha_nactions; i++) {
14199 ASSERT(helper->dtha_actions[i] != NULL);
14200 dtrace_difo_release(helper->dtha_actions[i], vstate);
14201 }
14202
14203 kmem_free(helper->dtha_actions,
14204 helper->dtha_nactions * sizeof (dtrace_difo_t *));
14205 kmem_free(helper, sizeof (dtrace_helper_action_t));
14206}
14207
2d21ac55
A
14208static int
14209dtrace_helper_destroygen(proc_t* p, int gen)
14210{
2d21ac55
A
14211 dtrace_helpers_t *help = p->p_dtrace_helpers;
14212 dtrace_vstate_t *vstate;
b0d623f7 14213 uint_t i;
2d21ac55
A
14214
14215 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
14216
14217 if (help == NULL || gen > help->dthps_generation)
14218 return (EINVAL);
14219
14220 vstate = &help->dthps_vstate;
14221
14222 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
14223 dtrace_helper_action_t *last = NULL, *h, *next;
14224
14225 for (h = help->dthps_actions[i]; h != NULL; h = next) {
14226 next = h->dtha_next;
14227
14228 if (h->dtha_generation == gen) {
14229 if (last != NULL) {
14230 last->dtha_next = next;
14231 } else {
14232 help->dthps_actions[i] = next;
14233 }
14234
14235 dtrace_helper_action_destroy(h, vstate);
14236 } else {
14237 last = h;
14238 }
14239 }
14240 }
14241
14242 /*
14243 * Interate until we've cleared out all helper providers with the
14244 * given generation number.
14245 */
14246 for (;;) {
c910b4d9 14247 dtrace_helper_provider_t *prov = NULL;
2d21ac55
A
14248
14249 /*
14250 * Look for a helper provider with the right generation. We
14251 * have to start back at the beginning of the list each time
14252 * because we drop dtrace_lock. It's unlikely that we'll make
14253 * more than two passes.
14254 */
14255 for (i = 0; i < help->dthps_nprovs; i++) {
14256 prov = help->dthps_provs[i];
14257
14258 if (prov->dthp_generation == gen)
14259 break;
14260 }
14261
14262 /*
14263 * If there were no matches, we're done.
14264 */
14265 if (i == help->dthps_nprovs)
14266 break;
14267
14268 /*
14269 * Move the last helper provider into this slot.
14270 */
14271 help->dthps_nprovs--;
14272 help->dthps_provs[i] = help->dthps_provs[help->dthps_nprovs];
14273 help->dthps_provs[help->dthps_nprovs] = NULL;
14274
14275 lck_mtx_unlock(&dtrace_lock);
14276
14277 /*
14278 * If we have a meta provider, remove this helper provider.
14279 */
14280 lck_mtx_lock(&dtrace_meta_lock);
14281 if (dtrace_meta_pid != NULL) {
14282 ASSERT(dtrace_deferred_pid == NULL);
14283 dtrace_helper_provider_remove(&prov->dthp_prov,
d190cdc3 14284 p);
2d21ac55
A
14285 }
14286 lck_mtx_unlock(&dtrace_meta_lock);
14287
14288 dtrace_helper_provider_destroy(prov);
14289
14290 lck_mtx_lock(&dtrace_lock);
14291 }
14292
14293 return (0);
14294}
14295
14296static int
14297dtrace_helper_validate(dtrace_helper_action_t *helper)
14298{
14299 int err = 0, i;
14300 dtrace_difo_t *dp;
14301
14302 if ((dp = helper->dtha_predicate) != NULL)
14303 err += dtrace_difo_validate_helper(dp);
14304
14305 for (i = 0; i < helper->dtha_nactions; i++)
14306 err += dtrace_difo_validate_helper(helper->dtha_actions[i]);
14307
14308 return (err == 0);
14309}
14310
2d21ac55
A
14311static int
14312dtrace_helper_action_add(proc_t* p, int which, dtrace_ecbdesc_t *ep)
2d21ac55
A
14313{
14314 dtrace_helpers_t *help;
14315 dtrace_helper_action_t *helper, *last;
14316 dtrace_actdesc_t *act;
14317 dtrace_vstate_t *vstate;
14318 dtrace_predicate_t *pred;
14319 int count = 0, nactions = 0, i;
14320
14321 if (which < 0 || which >= DTRACE_NHELPER_ACTIONS)
14322 return (EINVAL);
14323
2d21ac55 14324 help = p->p_dtrace_helpers;
2d21ac55
A
14325 last = help->dthps_actions[which];
14326 vstate = &help->dthps_vstate;
14327
14328 for (count = 0; last != NULL; last = last->dtha_next) {
14329 count++;
14330 if (last->dtha_next == NULL)
14331 break;
14332 }
14333
14334 /*
14335 * If we already have dtrace_helper_actions_max helper actions for this
14336 * helper action type, we'll refuse to add a new one.
14337 */
14338 if (count >= dtrace_helper_actions_max)
14339 return (ENOSPC);
14340
14341 helper = kmem_zalloc(sizeof (dtrace_helper_action_t), KM_SLEEP);
14342 helper->dtha_generation = help->dthps_generation;
14343
14344 if ((pred = ep->dted_pred.dtpdd_predicate) != NULL) {
14345 ASSERT(pred->dtp_difo != NULL);
14346 dtrace_difo_hold(pred->dtp_difo);
14347 helper->dtha_predicate = pred->dtp_difo;
14348 }
14349
14350 for (act = ep->dted_action; act != NULL; act = act->dtad_next) {
14351 if (act->dtad_kind != DTRACEACT_DIFEXPR)
14352 goto err;
14353
14354 if (act->dtad_difo == NULL)
14355 goto err;
14356
14357 nactions++;
14358 }
14359
14360 helper->dtha_actions = kmem_zalloc(sizeof (dtrace_difo_t *) *
14361 (helper->dtha_nactions = nactions), KM_SLEEP);
14362
14363 for (act = ep->dted_action, i = 0; act != NULL; act = act->dtad_next) {
14364 dtrace_difo_hold(act->dtad_difo);
14365 helper->dtha_actions[i++] = act->dtad_difo;
14366 }
14367
14368 if (!dtrace_helper_validate(helper))
14369 goto err;
14370
14371 if (last == NULL) {
14372 help->dthps_actions[which] = helper;
14373 } else {
14374 last->dtha_next = helper;
14375 }
14376
b0d623f7 14377 if ((uint32_t)vstate->dtvs_nlocals > dtrace_helptrace_nlocals) {
2d21ac55
A
14378 dtrace_helptrace_nlocals = vstate->dtvs_nlocals;
14379 dtrace_helptrace_next = 0;
14380 }
14381
14382 return (0);
14383err:
14384 dtrace_helper_action_destroy(helper, vstate);
14385 return (EINVAL);
14386}
14387
14388static void
14389dtrace_helper_provider_register(proc_t *p, dtrace_helpers_t *help,
14390 dof_helper_t *dofhp)
14391{
14392 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
14393
14394 lck_mtx_lock(&dtrace_meta_lock);
14395 lck_mtx_lock(&dtrace_lock);
14396
14397 if (!dtrace_attached() || dtrace_meta_pid == NULL) {
14398 /*
14399 * If the dtrace module is loaded but not attached, or if
14400 * there aren't isn't a meta provider registered to deal with
14401 * these provider descriptions, we need to postpone creating
14402 * the actual providers until later.
14403 */
14404
14405 if (help->dthps_next == NULL && help->dthps_prev == NULL &&
14406 dtrace_deferred_pid != help) {
14407 help->dthps_deferred = 1;
14408 help->dthps_pid = p->p_pid;
14409 help->dthps_next = dtrace_deferred_pid;
14410 help->dthps_prev = NULL;
14411 if (dtrace_deferred_pid != NULL)
14412 dtrace_deferred_pid->dthps_prev = help;
14413 dtrace_deferred_pid = help;
14414 }
14415
14416 lck_mtx_unlock(&dtrace_lock);
14417
14418 } else if (dofhp != NULL) {
14419 /*
14420 * If the dtrace module is loaded and we have a particular
14421 * helper provider description, pass that off to the
14422 * meta provider.
14423 */
14424
14425 lck_mtx_unlock(&dtrace_lock);
14426
d190cdc3 14427 dtrace_helper_provide(dofhp, p);
2d21ac55
A
14428
14429 } else {
14430 /*
14431 * Otherwise, just pass all the helper provider descriptions
14432 * off to the meta provider.
14433 */
14434
b0d623f7 14435 uint_t i;
2d21ac55
A
14436 lck_mtx_unlock(&dtrace_lock);
14437
14438 for (i = 0; i < help->dthps_nprovs; i++) {
14439 dtrace_helper_provide(&help->dthps_provs[i]->dthp_prov,
d190cdc3 14440 p);
2d21ac55
A
14441 }
14442 }
14443
14444 lck_mtx_unlock(&dtrace_meta_lock);
14445}
14446
2d21ac55
A
14447static int
14448dtrace_helper_provider_add(proc_t* p, dof_helper_t *dofhp, int gen)
2d21ac55
A
14449{
14450 dtrace_helpers_t *help;
14451 dtrace_helper_provider_t *hprov, **tmp_provs;
14452 uint_t tmp_maxprovs, i;
14453
14454 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
2d21ac55 14455 help = p->p_dtrace_helpers;
2d21ac55
A
14456 ASSERT(help != NULL);
14457
14458 /*
14459 * If we already have dtrace_helper_providers_max helper providers,
14460 * we're refuse to add a new one.
14461 */
14462 if (help->dthps_nprovs >= dtrace_helper_providers_max)
14463 return (ENOSPC);
14464
14465 /*
14466 * Check to make sure this isn't a duplicate.
14467 */
14468 for (i = 0; i < help->dthps_nprovs; i++) {
14469 if (dofhp->dofhp_addr ==
14470 help->dthps_provs[i]->dthp_prov.dofhp_addr)
14471 return (EALREADY);
14472 }
14473
14474 hprov = kmem_zalloc(sizeof (dtrace_helper_provider_t), KM_SLEEP);
14475 hprov->dthp_prov = *dofhp;
14476 hprov->dthp_ref = 1;
14477 hprov->dthp_generation = gen;
14478
14479 /*
14480 * Allocate a bigger table for helper providers if it's already full.
14481 */
14482 if (help->dthps_maxprovs == help->dthps_nprovs) {
14483 tmp_maxprovs = help->dthps_maxprovs;
14484 tmp_provs = help->dthps_provs;
14485
14486 if (help->dthps_maxprovs == 0)
14487 help->dthps_maxprovs = 2;
14488 else
14489 help->dthps_maxprovs *= 2;
14490 if (help->dthps_maxprovs > dtrace_helper_providers_max)
14491 help->dthps_maxprovs = dtrace_helper_providers_max;
14492
14493 ASSERT(tmp_maxprovs < help->dthps_maxprovs);
14494
14495 help->dthps_provs = kmem_zalloc(help->dthps_maxprovs *
14496 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
14497
14498 if (tmp_provs != NULL) {
14499 bcopy(tmp_provs, help->dthps_provs, tmp_maxprovs *
14500 sizeof (dtrace_helper_provider_t *));
14501 kmem_free(tmp_provs, tmp_maxprovs *
14502 sizeof (dtrace_helper_provider_t *));
14503 }
14504 }
14505
14506 help->dthps_provs[help->dthps_nprovs] = hprov;
14507 help->dthps_nprovs++;
14508
14509 return (0);
14510}
14511
14512static void
14513dtrace_helper_provider_destroy(dtrace_helper_provider_t *hprov)
14514{
14515 lck_mtx_lock(&dtrace_lock);
14516
14517 if (--hprov->dthp_ref == 0) {
14518 dof_hdr_t *dof;
14519 lck_mtx_unlock(&dtrace_lock);
14520 dof = (dof_hdr_t *)(uintptr_t)hprov->dthp_prov.dofhp_dof;
14521 dtrace_dof_destroy(dof);
14522 kmem_free(hprov, sizeof (dtrace_helper_provider_t));
14523 } else {
14524 lck_mtx_unlock(&dtrace_lock);
14525 }
14526}
14527
14528static int
14529dtrace_helper_provider_validate(dof_hdr_t *dof, dof_sec_t *sec)
14530{
14531 uintptr_t daddr = (uintptr_t)dof;
14532 dof_sec_t *str_sec, *prb_sec, *arg_sec, *off_sec, *enoff_sec;
14533 dof_provider_t *provider;
14534 dof_probe_t *probe;
14535 uint8_t *arg;
14536 char *strtab, *typestr;
14537 dof_stridx_t typeidx;
14538 size_t typesz;
14539 uint_t nprobes, j, k;
14540
14541 ASSERT(sec->dofs_type == DOF_SECT_PROVIDER);
14542
14543 if (sec->dofs_offset & (sizeof (uint_t) - 1)) {
14544 dtrace_dof_error(dof, "misaligned section offset");
14545 return (-1);
14546 }
14547
14548 /*
14549 * The section needs to be large enough to contain the DOF provider
14550 * structure appropriate for the given version.
14551 */
14552 if (sec->dofs_size <
14553 ((dof->dofh_ident[DOF_ID_VERSION] == DOF_VERSION_1) ?
14554 offsetof(dof_provider_t, dofpv_prenoffs) :
14555 sizeof (dof_provider_t))) {
14556 dtrace_dof_error(dof, "provider section too small");
14557 return (-1);
14558 }
14559
14560 provider = (dof_provider_t *)(uintptr_t)(daddr + sec->dofs_offset);
14561 str_sec = dtrace_dof_sect(dof, DOF_SECT_STRTAB, provider->dofpv_strtab);
14562 prb_sec = dtrace_dof_sect(dof, DOF_SECT_PROBES, provider->dofpv_probes);
14563 arg_sec = dtrace_dof_sect(dof, DOF_SECT_PRARGS, provider->dofpv_prargs);
14564 off_sec = dtrace_dof_sect(dof, DOF_SECT_PROFFS, provider->dofpv_proffs);
14565
14566 if (str_sec == NULL || prb_sec == NULL ||
14567 arg_sec == NULL || off_sec == NULL)
14568 return (-1);
14569
14570 enoff_sec = NULL;
14571
14572 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1 &&
14573 provider->dofpv_prenoffs != DOF_SECT_NONE &&
14574 (enoff_sec = dtrace_dof_sect(dof, DOF_SECT_PRENOFFS,
14575 provider->dofpv_prenoffs)) == NULL)
14576 return (-1);
14577
14578 strtab = (char *)(uintptr_t)(daddr + str_sec->dofs_offset);
14579
14580 if (provider->dofpv_name >= str_sec->dofs_size ||
14581 strlen(strtab + provider->dofpv_name) >= DTRACE_PROVNAMELEN) {
14582 dtrace_dof_error(dof, "invalid provider name");
14583 return (-1);
14584 }
14585
14586 if (prb_sec->dofs_entsize == 0 ||
14587 prb_sec->dofs_entsize > prb_sec->dofs_size) {
14588 dtrace_dof_error(dof, "invalid entry size");
14589 return (-1);
14590 }
14591
14592 if (prb_sec->dofs_entsize & (sizeof (uintptr_t) - 1)) {
14593 dtrace_dof_error(dof, "misaligned entry size");
14594 return (-1);
14595 }
14596
14597 if (off_sec->dofs_entsize != sizeof (uint32_t)) {
14598 dtrace_dof_error(dof, "invalid entry size");
14599 return (-1);
14600 }
14601
14602 if (off_sec->dofs_offset & (sizeof (uint32_t) - 1)) {
14603 dtrace_dof_error(dof, "misaligned section offset");
14604 return (-1);
14605 }
14606
14607 if (arg_sec->dofs_entsize != sizeof (uint8_t)) {
14608 dtrace_dof_error(dof, "invalid entry size");
14609 return (-1);
14610 }
14611
14612 arg = (uint8_t *)(uintptr_t)(daddr + arg_sec->dofs_offset);
14613
14614 nprobes = prb_sec->dofs_size / prb_sec->dofs_entsize;
14615
14616 /*
14617 * Take a pass through the probes to check for errors.
14618 */
14619 for (j = 0; j < nprobes; j++) {
14620 probe = (dof_probe_t *)(uintptr_t)(daddr +
14621 prb_sec->dofs_offset + j * prb_sec->dofs_entsize);
14622
14623 if (probe->dofpr_func >= str_sec->dofs_size) {
14624 dtrace_dof_error(dof, "invalid function name");
14625 return (-1);
14626 }
14627
14628 if (strlen(strtab + probe->dofpr_func) >= DTRACE_FUNCNAMELEN) {
14629 dtrace_dof_error(dof, "function name too long");
14630 return (-1);
14631 }
14632
14633 if (probe->dofpr_name >= str_sec->dofs_size ||
14634 strlen(strtab + probe->dofpr_name) >= DTRACE_NAMELEN) {
14635 dtrace_dof_error(dof, "invalid probe name");
14636 return (-1);
14637 }
14638
14639 /*
14640 * The offset count must not wrap the index, and the offsets
14641 * must also not overflow the section's data.
14642 */
14643 if (probe->dofpr_offidx + probe->dofpr_noffs <
14644 probe->dofpr_offidx ||
14645 (probe->dofpr_offidx + probe->dofpr_noffs) *
14646 off_sec->dofs_entsize > off_sec->dofs_size) {
14647 dtrace_dof_error(dof, "invalid probe offset");
14648 return (-1);
14649 }
14650
14651 if (dof->dofh_ident[DOF_ID_VERSION] != DOF_VERSION_1) {
14652 /*
14653 * If there's no is-enabled offset section, make sure
14654 * there aren't any is-enabled offsets. Otherwise
14655 * perform the same checks as for probe offsets
14656 * (immediately above).
14657 */
14658 if (enoff_sec == NULL) {
14659 if (probe->dofpr_enoffidx != 0 ||
14660 probe->dofpr_nenoffs != 0) {
14661 dtrace_dof_error(dof, "is-enabled "
14662 "offsets with null section");
14663 return (-1);
14664 }
14665 } else if (probe->dofpr_enoffidx +
14666 probe->dofpr_nenoffs < probe->dofpr_enoffidx ||
14667 (probe->dofpr_enoffidx + probe->dofpr_nenoffs) *
14668 enoff_sec->dofs_entsize > enoff_sec->dofs_size) {
14669 dtrace_dof_error(dof, "invalid is-enabled "
14670 "offset");
14671 return (-1);
14672 }
14673
14674 if (probe->dofpr_noffs + probe->dofpr_nenoffs == 0) {
14675 dtrace_dof_error(dof, "zero probe and "
14676 "is-enabled offsets");
14677 return (-1);
14678 }
14679 } else if (probe->dofpr_noffs == 0) {
14680 dtrace_dof_error(dof, "zero probe offsets");
14681 return (-1);
14682 }
14683
14684 if (probe->dofpr_argidx + probe->dofpr_xargc <
14685 probe->dofpr_argidx ||
14686 (probe->dofpr_argidx + probe->dofpr_xargc) *
14687 arg_sec->dofs_entsize > arg_sec->dofs_size) {
14688 dtrace_dof_error(dof, "invalid args");
14689 return (-1);
14690 }
14691
14692 typeidx = probe->dofpr_nargv;
14693 typestr = strtab + probe->dofpr_nargv;
14694 for (k = 0; k < probe->dofpr_nargc; k++) {
14695 if (typeidx >= str_sec->dofs_size) {
14696 dtrace_dof_error(dof, "bad "
14697 "native argument type");
14698 return (-1);
14699 }
14700
14701 typesz = strlen(typestr) + 1;
14702 if (typesz > DTRACE_ARGTYPELEN) {
14703 dtrace_dof_error(dof, "native "
14704 "argument type too long");
14705 return (-1);
14706 }
14707 typeidx += typesz;
14708 typestr += typesz;
14709 }
14710
14711 typeidx = probe->dofpr_xargv;
14712 typestr = strtab + probe->dofpr_xargv;
14713 for (k = 0; k < probe->dofpr_xargc; k++) {
14714 if (arg[probe->dofpr_argidx + k] > probe->dofpr_nargc) {
14715 dtrace_dof_error(dof, "bad "
14716 "native argument index");
14717 return (-1);
14718 }
14719
14720 if (typeidx >= str_sec->dofs_size) {
14721 dtrace_dof_error(dof, "bad "
14722 "translated argument type");
14723 return (-1);
14724 }
14725
14726 typesz = strlen(typestr) + 1;
14727 if (typesz > DTRACE_ARGTYPELEN) {
14728 dtrace_dof_error(dof, "translated argument "
14729 "type too long");
14730 return (-1);
14731 }
14732
14733 typeidx += typesz;
14734 typestr += typesz;
14735 }
14736 }
14737
14738 return (0);
14739}
14740
2d21ac55
A
14741static int
14742dtrace_helper_slurp(proc_t* p, dof_hdr_t *dof, dof_helper_t *dhp)
2d21ac55
A
14743{
14744 dtrace_helpers_t *help;
14745 dtrace_vstate_t *vstate;
14746 dtrace_enabling_t *enab = NULL;
14747 int i, gen, rv, nhelpers = 0, nprovs = 0, destroy = 1;
14748 uintptr_t daddr = (uintptr_t)dof;
14749
14750 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
14751
2d21ac55
A
14752 if ((help = p->p_dtrace_helpers) == NULL)
14753 help = dtrace_helpers_create(p);
2d21ac55
A
14754
14755 vstate = &help->dthps_vstate;
14756
14757 if ((rv = dtrace_dof_slurp(dof, vstate, NULL, &enab,
14758 dhp != NULL ? dhp->dofhp_addr : 0, B_FALSE)) != 0) {
14759 dtrace_dof_destroy(dof);
14760 return (rv);
14761 }
14762
14763 /*
14764 * Look for helper providers and validate their descriptions.
14765 */
14766 if (dhp != NULL) {
b0d623f7 14767 for (i = 0; (uint32_t)i < dof->dofh_secnum; i++) {
2d21ac55
A
14768 dof_sec_t *sec = (dof_sec_t *)(uintptr_t)(daddr +
14769 dof->dofh_secoff + i * dof->dofh_secsize);
14770
14771 if (sec->dofs_type != DOF_SECT_PROVIDER)
14772 continue;
14773
14774 if (dtrace_helper_provider_validate(dof, sec) != 0) {
14775 dtrace_enabling_destroy(enab);
14776 dtrace_dof_destroy(dof);
14777 return (-1);
14778 }
14779
14780 nprovs++;
14781 }
14782 }
14783
14784 /*
14785 * Now we need to walk through the ECB descriptions in the enabling.
14786 */
14787 for (i = 0; i < enab->dten_ndesc; i++) {
14788 dtrace_ecbdesc_t *ep = enab->dten_desc[i];
14789 dtrace_probedesc_t *desc = &ep->dted_probe;
14790
fe8ab488 14791 /* APPLE NOTE: Darwin employs size bounded string operation. */
b0d623f7
A
14792 if (!LIT_STRNEQL(desc->dtpd_provider, "dtrace"))
14793 continue;
2d21ac55 14794
b0d623f7
A
14795 if (!LIT_STRNEQL(desc->dtpd_mod, "helper"))
14796 continue;
14797
14798 if (!LIT_STRNEQL(desc->dtpd_func, "ustack"))
14799 continue;
b0d623f7 14800
b0d623f7
A
14801 if ((rv = dtrace_helper_action_add(p, DTRACE_HELPER_ACTION_USTACK,
14802 ep)) != 0) {
b0d623f7 14803 /*
2d21ac55
A
14804 * Adding this helper action failed -- we are now going
14805 * to rip out the entire generation and return failure.
14806 */
2d21ac55 14807 (void) dtrace_helper_destroygen(p, help->dthps_generation);
2d21ac55
A
14808 dtrace_enabling_destroy(enab);
14809 dtrace_dof_destroy(dof);
14810 return (-1);
14811 }
14812
14813 nhelpers++;
14814 }
14815
14816 if (nhelpers < enab->dten_ndesc)
14817 dtrace_dof_error(dof, "unmatched helpers");
14818
14819 gen = help->dthps_generation++;
14820 dtrace_enabling_destroy(enab);
14821
14822 if (dhp != NULL && nprovs > 0) {
14823 dhp->dofhp_dof = (uint64_t)(uintptr_t)dof;
2d21ac55 14824 if (dtrace_helper_provider_add(p, dhp, gen) == 0) {
2d21ac55 14825 lck_mtx_unlock(&dtrace_lock);
2d21ac55 14826 dtrace_helper_provider_register(p, help, dhp);
2d21ac55
A
14827 lck_mtx_lock(&dtrace_lock);
14828
14829 destroy = 0;
14830 }
14831 }
14832
14833 if (destroy)
14834 dtrace_dof_destroy(dof);
14835
14836 return (gen);
14837}
14838
2d21ac55 14839/*
fe8ab488 14840 * APPLE NOTE: DTrace lazy dof implementation
2d21ac55
A
14841 *
14842 * DTrace user static probes (USDT probes) and helper actions are loaded
14843 * in a process by proccessing dof sections. The dof sections are passed
14844 * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather
14845 * expensive to process dof for a process that will never use it. There
14846 * is a memory cost (allocating the providers/probes), and a cpu cost
14847 * (creating the providers/probes).
14848 *
14849 * To reduce this cost, we use "lazy dof". The normal proceedure for
14850 * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t
14851 * block, and invoke dof_slurp_helper() on them. When "lazy dof" is
14852 * used, each process retains the dof_ioctl_data_t block, instead of
14853 * copying in the data it points to.
14854 *
14855 * The dof_ioctl_data_t blocks are managed as if they were the actual
14856 * processed dof; on fork the block is copied to the child, on exec and
14857 * exit the block is freed.
14858 *
14859 * If the process loads library(s) containing additional dof, the
14860 * new dof_ioctl_data_t is merged with the existing block.
14861 *
14862 * There are a few catches that make this slightly more difficult.
14863 * When dyld registers dof_ioctl_data_t blocks, it expects a unique
14864 * identifier value for each dof in the block. In non-lazy dof terms,
14865 * this is the generation that dof was loaded in. If we hand back
14866 * a UID for a lazy dof, that same UID must be able to unload the
14867 * dof once it has become non-lazy. To meet this requirement, the
14868 * code that loads lazy dof requires that the UID's for dof(s) in
14869 * the lazy dof be sorted, and in ascending order. It is okay to skip
14870 * UID's, I.E., 1 -> 5 -> 6 is legal.
14871 *
14872 * Once a process has become non-lazy, it will stay non-lazy. All
14873 * future dof operations for that process will be non-lazy, even
14874 * if the dof mode transitions back to lazy.
14875 *
14876 * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.).
14877 * That way if the lazy check fails due to transitioning to non-lazy, the
14878 * right thing is done with the newly faulted in dof.
14879 */
14880
14881/*
14882 * This method is a bit squicky. It must handle:
14883 *
14884 * dof should not be lazy.
14885 * dof should have been handled lazily, but there was an error
14886 * dof was handled lazily, and needs to be freed.
14887 * dof was handled lazily, and must not be freed.
14888 *
14889 *
14890 * Returns EACCESS if dof should be handled non-lazily.
14891 *
14892 * KERN_SUCCESS and all other return codes indicate lazy handling of dof.
14893 *
14894 * If the dofs data is claimed by this method, dofs_claimed will be set.
14895 * Callers should not free claimed dofs.
14896 */
b0d623f7 14897static int
2d21ac55
A
14898dtrace_lazy_dofs_add(proc_t *p, dof_ioctl_data_t* incoming_dofs, int *dofs_claimed)
14899{
14900 ASSERT(p);
14901 ASSERT(incoming_dofs && incoming_dofs->dofiod_count > 0);
14902
14903 int rval = 0;
14904 *dofs_claimed = 0;
14905
14906 lck_rw_lock_shared(&dtrace_dof_mode_lock);
14907
2d21ac55
A
14908 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
14909 ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER);
14910
14911 /*
14912 * Any existing helpers force non-lazy behavior.
14913 */
14914 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) {
14915 lck_mtx_lock(&p->p_dtrace_sprlock);
14916
14917 dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs;
14918 unsigned int existing_dofs_count = (existing_dofs) ? existing_dofs->dofiod_count : 0;
14919 unsigned int i, merged_dofs_count = incoming_dofs->dofiod_count + existing_dofs_count;
14920
14921 /*
14922 * Range check...
14923 */
14924 if (merged_dofs_count == 0 || merged_dofs_count > 1024) {
14925 dtrace_dof_error(NULL, "lazy_dofs_add merged_dofs_count out of range");
14926 rval = EINVAL;
14927 goto unlock;
14928 }
14929
14930 /*
14931 * Each dof being added must be assigned a unique generation.
14932 */
14933 uint64_t generation = (existing_dofs) ? existing_dofs->dofiod_helpers[existing_dofs_count - 1].dofhp_dof + 1 : 1;
14934 for (i=0; i<incoming_dofs->dofiod_count; i++) {
14935 /*
14936 * We rely on these being the same so we can overwrite dofhp_dof and not lose info.
14937 */
14938 ASSERT(incoming_dofs->dofiod_helpers[i].dofhp_dof == incoming_dofs->dofiod_helpers[i].dofhp_addr);
14939 incoming_dofs->dofiod_helpers[i].dofhp_dof = generation++;
14940 }
14941
14942
14943 if (existing_dofs) {
14944 /*
14945 * Merge the existing and incoming dofs
14946 */
14947 size_t merged_dofs_size = DOF_IOCTL_DATA_T_SIZE(merged_dofs_count);
14948 dof_ioctl_data_t* merged_dofs = kmem_alloc(merged_dofs_size, KM_SLEEP);
14949
14950 bcopy(&existing_dofs->dofiod_helpers[0],
14951 &merged_dofs->dofiod_helpers[0],
14952 sizeof(dof_helper_t) * existing_dofs_count);
14953 bcopy(&incoming_dofs->dofiod_helpers[0],
14954 &merged_dofs->dofiod_helpers[existing_dofs_count],
14955 sizeof(dof_helper_t) * incoming_dofs->dofiod_count);
14956
14957 merged_dofs->dofiod_count = merged_dofs_count;
14958
14959 kmem_free(existing_dofs, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count));
14960
14961 p->p_dtrace_lazy_dofs = merged_dofs;
14962 } else {
14963 /*
14964 * Claim the incoming dofs
14965 */
14966 *dofs_claimed = 1;
14967 p->p_dtrace_lazy_dofs = incoming_dofs;
14968 }
14969
14970#if DEBUG
14971 dof_ioctl_data_t* all_dofs = p->p_dtrace_lazy_dofs;
14972 for (i=0; i<all_dofs->dofiod_count-1; i++) {
14973 ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof);
14974 }
b0d623f7 14975#endif /* DEBUG */
2d21ac55
A
14976
14977unlock:
14978 lck_mtx_unlock(&p->p_dtrace_sprlock);
14979 } else {
14980 rval = EACCES;
14981 }
14982
14983 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
14984
14985 return rval;
14986}
14987
14988/*
14989 * Returns:
14990 *
14991 * EINVAL: lazy dof is enabled, but the requested generation was not found.
14992 * EACCES: This removal needs to be handled non-lazily.
14993 */
b0d623f7 14994static int
2d21ac55
A
14995dtrace_lazy_dofs_remove(proc_t *p, int generation)
14996{
14997 int rval = EINVAL;
14998
14999 lck_rw_lock_shared(&dtrace_dof_mode_lock);
15000
2d21ac55
A
15001 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15002 ASSERT(dtrace_dof_mode != DTRACE_DOF_MODE_NEVER);
15003
15004 /*
15005 * Any existing helpers force non-lazy behavior.
15006 */
15007 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON && (p->p_dtrace_helpers == NULL)) {
15008 lck_mtx_lock(&p->p_dtrace_sprlock);
15009
15010 dof_ioctl_data_t* existing_dofs = p->p_dtrace_lazy_dofs;
15011
15012 if (existing_dofs) {
15013 int index, existing_dofs_count = existing_dofs->dofiod_count;
15014 for (index=0; index<existing_dofs_count; index++) {
15015 if ((int)existing_dofs->dofiod_helpers[index].dofhp_dof == generation) {
15016 dof_ioctl_data_t* removed_dofs = NULL;
15017
15018 /*
15019 * If there is only 1 dof, we'll delete it and swap in NULL.
15020 */
15021 if (existing_dofs_count > 1) {
15022 int removed_dofs_count = existing_dofs_count - 1;
15023 size_t removed_dofs_size = DOF_IOCTL_DATA_T_SIZE(removed_dofs_count);
15024
15025 removed_dofs = kmem_alloc(removed_dofs_size, KM_SLEEP);
15026 removed_dofs->dofiod_count = removed_dofs_count;
15027
15028 /*
15029 * copy the remaining data.
15030 */
15031 if (index > 0) {
15032 bcopy(&existing_dofs->dofiod_helpers[0],
15033 &removed_dofs->dofiod_helpers[0],
15034 index * sizeof(dof_helper_t));
15035 }
15036
15037 if (index < existing_dofs_count-1) {
15038 bcopy(&existing_dofs->dofiod_helpers[index+1],
15039 &removed_dofs->dofiod_helpers[index],
15040 (existing_dofs_count - index - 1) * sizeof(dof_helper_t));
15041 }
15042 }
15043
15044 kmem_free(existing_dofs, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count));
15045
15046 p->p_dtrace_lazy_dofs = removed_dofs;
15047
15048 rval = KERN_SUCCESS;
15049
15050 break;
15051 }
15052 }
15053
15054#if DEBUG
15055 dof_ioctl_data_t* all_dofs = p->p_dtrace_lazy_dofs;
15056 if (all_dofs) {
15057 unsigned int i;
15058 for (i=0; i<all_dofs->dofiod_count-1; i++) {
15059 ASSERT(all_dofs->dofiod_helpers[i].dofhp_dof < all_dofs->dofiod_helpers[i+1].dofhp_dof);
15060 }
15061 }
15062#endif
15063
15064 }
15065
15066 lck_mtx_unlock(&p->p_dtrace_sprlock);
15067 } else {
15068 rval = EACCES;
15069 }
15070
15071 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
39037602 15072
2d21ac55
A
15073 return rval;
15074}
15075
15076void
15077dtrace_lazy_dofs_destroy(proc_t *p)
15078{
15079 lck_rw_lock_shared(&dtrace_dof_mode_lock);
15080 lck_mtx_lock(&p->p_dtrace_sprlock);
15081
2d21ac55
A
15082 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15083
15084 dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs;
15085 p->p_dtrace_lazy_dofs = NULL;
15086
15087 lck_mtx_unlock(&p->p_dtrace_sprlock);
15088 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15089
15090 if (lazy_dofs) {
15091 kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count));
15092 }
15093}
15094
2d21ac55
A
15095static int
15096dtrace_lazy_dofs_proc_iterate_filter(proc_t *p, void* ignored)
15097{
15098#pragma unused(ignored)
15099 /*
15100 * Okay to NULL test without taking the sprlock.
15101 */
15102 return p->p_dtrace_lazy_dofs != NULL;
15103}
15104
39037602
A
15105static void
15106dtrace_lazy_dofs_process(proc_t *p) {
2d21ac55
A
15107 /*
15108 * It is possible this process may exit during our attempt to
15109 * fault in the dof. We could fix this by holding locks longer,
15110 * but the errors are benign.
15111 */
15112 lck_mtx_lock(&p->p_dtrace_sprlock);
15113
39037602 15114
2d21ac55
A
15115 ASSERT(p->p_dtrace_lazy_dofs == NULL || p->p_dtrace_helpers == NULL);
15116 ASSERT(dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF);
15117
2d21ac55
A
15118 dof_ioctl_data_t* lazy_dofs = p->p_dtrace_lazy_dofs;
15119 p->p_dtrace_lazy_dofs = NULL;
15120
15121 lck_mtx_unlock(&p->p_dtrace_sprlock);
15122
15123 /*
15124 * Process each dof_helper_t
15125 */
15126 if (lazy_dofs != NULL) {
15127 unsigned int i;
15128 int rval;
15129
15130 for (i=0; i<lazy_dofs->dofiod_count; i++) {
15131 /*
15132 * When loading lazy dof, we depend on the generations being sorted in ascending order.
15133 */
15134 ASSERT(i >= (lazy_dofs->dofiod_count - 1) || lazy_dofs->dofiod_helpers[i].dofhp_dof < lazy_dofs->dofiod_helpers[i+1].dofhp_dof);
15135
15136 dof_helper_t *dhp = &lazy_dofs->dofiod_helpers[i];
15137
15138 /*
15139 * We stored the generation in dofhp_dof. Save it, and restore the original value.
15140 */
15141 int generation = dhp->dofhp_dof;
15142 dhp->dofhp_dof = dhp->dofhp_addr;
15143
15144 dof_hdr_t *dof = dtrace_dof_copyin_from_proc(p, dhp->dofhp_dof, &rval);
39037602 15145
2d21ac55
A
15146 if (dof != NULL) {
15147 dtrace_helpers_t *help;
15148
15149 lck_mtx_lock(&dtrace_lock);
15150
15151 /*
15152 * This must be done with the dtrace_lock held
15153 */
15154 if ((help = p->p_dtrace_helpers) == NULL)
15155 help = dtrace_helpers_create(p);
15156
15157 /*
15158 * If the generation value has been bumped, someone snuck in
15159 * when we released the dtrace lock. We have to dump this generation,
15160 * there is no safe way to load it.
15161 */
15162 if (help->dthps_generation <= generation) {
15163 help->dthps_generation = generation;
15164
15165 /*
15166 * dtrace_helper_slurp() takes responsibility for the dof --
15167 * it may free it now or it may save it and free it later.
15168 */
15169 if ((rval = dtrace_helper_slurp(p, dof, dhp)) != generation) {
15170 dtrace_dof_error(NULL, "returned value did not match expected generation");
15171 }
15172 }
15173
15174 lck_mtx_unlock(&dtrace_lock);
15175 }
15176 }
15177
15178 kmem_free(lazy_dofs, DOF_IOCTL_DATA_T_SIZE(lazy_dofs->dofiod_count));
15179 }
39037602
A
15180}
15181
15182static int
15183dtrace_lazy_dofs_proc_iterate_doit(proc_t *p, void* ignored)
15184{
15185#pragma unused(ignored)
15186
15187 dtrace_lazy_dofs_process(p);
2d21ac55
A
15188
15189 return PROC_RETURNED;
15190}
15191
39037602
A
15192#define DTRACE_LAZY_DOFS_DUPLICATED 1
15193
15194static int
15195dtrace_lazy_dofs_duplicate(proc_t *parent, proc_t *child)
15196{
15197 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_NOTOWNED);
15198 lck_mtx_assert(&parent->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
15199 lck_mtx_assert(&child->p_dtrace_sprlock, LCK_MTX_ASSERT_NOTOWNED);
15200
15201 lck_rw_lock_shared(&dtrace_dof_mode_lock);
15202 lck_mtx_lock(&parent->p_dtrace_sprlock);
15203
15204 /*
15205 * We need to make sure that the transition to lazy dofs -> helpers
15206 * was atomic for our parent
15207 */
15208 ASSERT(parent->p_dtrace_lazy_dofs == NULL || parent->p_dtrace_helpers == NULL);
15209 /*
15210 * In theory we should hold the child sprlock, but this is safe...
15211 */
15212 ASSERT(child->p_dtrace_lazy_dofs == NULL && child->p_dtrace_helpers == NULL);
15213
15214 dof_ioctl_data_t* parent_dofs = parent->p_dtrace_lazy_dofs;
15215 dof_ioctl_data_t* child_dofs = NULL;
15216 if (parent_dofs) {
15217 size_t parent_dofs_size = DOF_IOCTL_DATA_T_SIZE(parent_dofs->dofiod_count);
15218 child_dofs = kmem_alloc(parent_dofs_size, KM_SLEEP);
15219 bcopy(parent_dofs, child_dofs, parent_dofs_size);
15220 }
15221
15222 lck_mtx_unlock(&parent->p_dtrace_sprlock);
15223
15224 if (child_dofs) {
15225 lck_mtx_lock(&child->p_dtrace_sprlock);
15226 child->p_dtrace_lazy_dofs = child_dofs;
15227 lck_mtx_unlock(&child->p_dtrace_sprlock);
15228 /**
15229 * We process the DOF at this point if the mode is set to
15230 * LAZY_OFF. This can happen if DTrace is still processing the
15231 * DOF of other process (which can happen because the
15232 * protected pager can have a huge latency)
15233 * but has not processed our parent yet
15234 */
15235 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) {
15236 dtrace_lazy_dofs_process(child);
15237 }
15238 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15239
15240 return DTRACE_LAZY_DOFS_DUPLICATED;
15241 }
15242 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
15243
15244 return 0;
15245}
15246
2d21ac55
A
15247static dtrace_helpers_t *
15248dtrace_helpers_create(proc_t *p)
15249{
15250 dtrace_helpers_t *help;
15251
15252 lck_mtx_assert(&dtrace_lock, LCK_MTX_ASSERT_OWNED);
15253 ASSERT(p->p_dtrace_helpers == NULL);
15254
15255 help = kmem_zalloc(sizeof (dtrace_helpers_t), KM_SLEEP);
15256 help->dthps_actions = kmem_zalloc(sizeof (dtrace_helper_action_t *) *
15257 DTRACE_NHELPER_ACTIONS, KM_SLEEP);
15258
15259 p->p_dtrace_helpers = help;
15260 dtrace_helpers++;
15261
15262 return (help);
15263}
15264
2d21ac55
A
15265static void
15266dtrace_helpers_destroy(proc_t* p)
15267{
2d21ac55
A
15268 dtrace_helpers_t *help;
15269 dtrace_vstate_t *vstate;
b0d623f7 15270 uint_t i;
2d21ac55
A
15271
15272 lck_mtx_lock(&dtrace_lock);
15273
15274 ASSERT(p->p_dtrace_helpers != NULL);
15275 ASSERT(dtrace_helpers > 0);
15276
15277 help = p->p_dtrace_helpers;
15278 vstate = &help->dthps_vstate;
15279
15280 /*
15281 * We're now going to lose the help from this process.
15282 */
15283 p->p_dtrace_helpers = NULL;
15284 dtrace_sync();
15285
15286 /*
15287 * Destory the helper actions.
15288 */
15289 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15290 dtrace_helper_action_t *h, *next;
15291
15292 for (h = help->dthps_actions[i]; h != NULL; h = next) {
15293 next = h->dtha_next;
15294 dtrace_helper_action_destroy(h, vstate);
15295 h = next;
15296 }
15297 }
15298
15299 lck_mtx_unlock(&dtrace_lock);
15300
15301 /*
15302 * Destroy the helper providers.
15303 */
15304 if (help->dthps_maxprovs > 0) {
15305 lck_mtx_lock(&dtrace_meta_lock);
15306 if (dtrace_meta_pid != NULL) {
15307 ASSERT(dtrace_deferred_pid == NULL);
15308
15309 for (i = 0; i < help->dthps_nprovs; i++) {
15310 dtrace_helper_provider_remove(
d190cdc3 15311 &help->dthps_provs[i]->dthp_prov, p);
2d21ac55
A
15312 }
15313 } else {
15314 lck_mtx_lock(&dtrace_lock);
15315 ASSERT(help->dthps_deferred == 0 ||
15316 help->dthps_next != NULL ||
15317 help->dthps_prev != NULL ||
15318 help == dtrace_deferred_pid);
15319
15320 /*
15321 * Remove the helper from the deferred list.
15322 */
15323 if (help->dthps_next != NULL)
15324 help->dthps_next->dthps_prev = help->dthps_prev;
15325 if (help->dthps_prev != NULL)
15326 help->dthps_prev->dthps_next = help->dthps_next;
15327 if (dtrace_deferred_pid == help) {
15328 dtrace_deferred_pid = help->dthps_next;
15329 ASSERT(help->dthps_prev == NULL);
15330 }
15331
15332 lck_mtx_unlock(&dtrace_lock);
15333 }
15334
15335 lck_mtx_unlock(&dtrace_meta_lock);
15336
15337 for (i = 0; i < help->dthps_nprovs; i++) {
15338 dtrace_helper_provider_destroy(help->dthps_provs[i]);
15339 }
15340
15341 kmem_free(help->dthps_provs, help->dthps_maxprovs *
15342 sizeof (dtrace_helper_provider_t *));
15343 }
15344
15345 lck_mtx_lock(&dtrace_lock);
15346
15347 dtrace_vstate_fini(&help->dthps_vstate);
15348 kmem_free(help->dthps_actions,
15349 sizeof (dtrace_helper_action_t *) * DTRACE_NHELPER_ACTIONS);
15350 kmem_free(help, sizeof (dtrace_helpers_t));
15351
15352 --dtrace_helpers;
15353 lck_mtx_unlock(&dtrace_lock);
15354}
15355
15356static void
15357dtrace_helpers_duplicate(proc_t *from, proc_t *to)
15358{
15359 dtrace_helpers_t *help, *newhelp;
15360 dtrace_helper_action_t *helper, *new, *last;
15361 dtrace_difo_t *dp;
15362 dtrace_vstate_t *vstate;
b0d623f7
A
15363 uint_t i;
15364 int j, sz, hasprovs = 0;
2d21ac55
A
15365
15366 lck_mtx_lock(&dtrace_lock);
15367 ASSERT(from->p_dtrace_helpers != NULL);
15368 ASSERT(dtrace_helpers > 0);
15369
15370 help = from->p_dtrace_helpers;
15371 newhelp = dtrace_helpers_create(to);
15372 ASSERT(to->p_dtrace_helpers != NULL);
15373
15374 newhelp->dthps_generation = help->dthps_generation;
15375 vstate = &newhelp->dthps_vstate;
15376
15377 /*
15378 * Duplicate the helper actions.
15379 */
15380 for (i = 0; i < DTRACE_NHELPER_ACTIONS; i++) {
15381 if ((helper = help->dthps_actions[i]) == NULL)
15382 continue;
15383
15384 for (last = NULL; helper != NULL; helper = helper->dtha_next) {
15385 new = kmem_zalloc(sizeof (dtrace_helper_action_t),
15386 KM_SLEEP);
15387 new->dtha_generation = helper->dtha_generation;
15388
15389 if ((dp = helper->dtha_predicate) != NULL) {
15390 dp = dtrace_difo_duplicate(dp, vstate);
15391 new->dtha_predicate = dp;
15392 }
15393
15394 new->dtha_nactions = helper->dtha_nactions;
15395 sz = sizeof (dtrace_difo_t *) * new->dtha_nactions;
15396 new->dtha_actions = kmem_alloc(sz, KM_SLEEP);
15397
b0d623f7
A
15398 for (j = 0; j < new->dtha_nactions; j++) {
15399 dtrace_difo_t *dpj = helper->dtha_actions[j];
15400
15401 ASSERT(dpj != NULL);
15402 dpj = dtrace_difo_duplicate(dpj, vstate);
15403 new->dtha_actions[j] = dpj;
15404 }
2d21ac55
A
15405
15406 if (last != NULL) {
15407 last->dtha_next = new;
15408 } else {
15409 newhelp->dthps_actions[i] = new;
15410 }
15411
15412 last = new;
15413 }
15414 }
15415
15416 /*
15417 * Duplicate the helper providers and register them with the
15418 * DTrace framework.
15419 */
15420 if (help->dthps_nprovs > 0) {
15421 newhelp->dthps_nprovs = help->dthps_nprovs;
15422 newhelp->dthps_maxprovs = help->dthps_nprovs;
15423 newhelp->dthps_provs = kmem_alloc(newhelp->dthps_nprovs *
15424 sizeof (dtrace_helper_provider_t *), KM_SLEEP);
15425 for (i = 0; i < newhelp->dthps_nprovs; i++) {
15426 newhelp->dthps_provs[i] = help->dthps_provs[i];
15427 newhelp->dthps_provs[i]->dthp_ref++;
15428 }
15429
15430 hasprovs = 1;
15431 }
15432
15433 lck_mtx_unlock(&dtrace_lock);
15434
15435 if (hasprovs)
15436 dtrace_helper_provider_register(to, newhelp, NULL);
15437}
15438
39037602
A
15439/**
15440 * DTrace Process functions
15441 */
15442
15443void
15444dtrace_proc_fork(proc_t *parent_proc, proc_t *child_proc, int spawn)
15445{
15446 /*
15447 * This code applies to new processes who are copying the task
15448 * and thread state and address spaces of their parent process.
15449 */
15450 if (!spawn) {
15451 /*
15452 * APPLE NOTE: Solaris does a sprlock() and drops the
15453 * proc_lock here. We're cheating a bit and only taking
15454 * the p_dtrace_sprlock lock. A full sprlock would
15455 * task_suspend the parent.
15456 */
15457 lck_mtx_lock(&parent_proc->p_dtrace_sprlock);
15458
15459 /*
15460 * Remove all DTrace tracepoints from the child process. We
15461 * need to do this _before_ duplicating USDT providers since
15462 * any associated probes may be immediately enabled.
15463 */
15464 if (parent_proc->p_dtrace_count > 0) {
15465 dtrace_fasttrap_fork(parent_proc, child_proc);
15466 }
15467
15468 lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);
15469
15470 /*
15471 * Duplicate any lazy dof(s). This must be done while NOT
15472 * holding the parent sprlock! Lock ordering is
15473 * dtrace_dof_mode_lock, then sprlock. It is imperative we
15474 * always call dtrace_lazy_dofs_duplicate, rather than null
15475 * check and call if !NULL. If we NULL test, during lazy dof
15476 * faulting we can race with the faulting code and proceed
15477 * from here to beyond the helpers copy. The lazy dof
15478 * faulting will then fail to copy the helpers to the child
15479 * process. We return if we duplicated lazy dofs as a process
15480 * can only have one at the same time to avoid a race between
15481 * a dtrace client and dtrace_proc_fork where a process would
15482 * end up with both lazy dofs and helpers.
15483 */
15484 if (dtrace_lazy_dofs_duplicate(parent_proc, child_proc) == DTRACE_LAZY_DOFS_DUPLICATED) {
15485 return;
15486 }
15487
15488 /*
15489 * Duplicate any helper actions and providers if they haven't
15490 * already.
15491 */
15492#if !defined(__APPLE__)
15493 /*
15494 * The SFORKING
15495 * we set above informs the code to enable USDT probes that
15496 * sprlock() may fail because the child is being forked.
15497 */
15498#endif
15499 /*
15500 * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
15501 * never fails to find the child. We do not set SFORKING.
15502 */
15503 if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
15504 (*dtrace_helpers_fork)(parent_proc, child_proc);
15505 }
15506 }
15507}
15508
15509void
15510dtrace_proc_exec(proc_t *p)
15511{
15512 /*
15513 * Invalidate any predicate evaluation already cached for this thread by DTrace.
15514 * That's because we've just stored to p_comm and DTrace refers to that when it
15515 * evaluates the "execname" special variable. uid and gid may have changed as well.
15516 */
15517 dtrace_set_thread_predcache(current_thread(), 0);
15518
15519 /*
15520 * Free any outstanding lazy dof entries. It is imperative we
15521 * always call dtrace_lazy_dofs_destroy, rather than null check
15522 * and call if !NULL. If we NULL test, during lazy dof faulting
15523 * we can race with the faulting code and proceed from here to
15524 * beyond the helpers cleanup. The lazy dof faulting will then
15525 * install new helpers which no longer belong to this process!
15526 */
15527 dtrace_lazy_dofs_destroy(p);
15528
15529
15530 /*
15531 * Clean up any DTrace helpers for the process.
15532 */
15533 if (p->p_dtrace_helpers != NULL && dtrace_helpers_cleanup) {
15534 (*dtrace_helpers_cleanup)(p);
15535 }
15536
15537 /*
15538 * Cleanup the DTrace provider associated with this process.
15539 */
15540 proc_lock(p);
15541 if (p->p_dtrace_probes && dtrace_fasttrap_exec_ptr) {
15542 (*dtrace_fasttrap_exec_ptr)(p);
15543 }
15544 proc_unlock(p);
15545}
15546
15547void
15548dtrace_proc_exit(proc_t *p)
15549{
15550 /*
15551 * Free any outstanding lazy dof entries. It is imperative we
15552 * always call dtrace_lazy_dofs_destroy, rather than null check
15553 * and call if !NULL. If we NULL test, during lazy dof faulting
15554 * we can race with the faulting code and proceed from here to
15555 * beyond the helpers cleanup. The lazy dof faulting will then
15556 * install new helpers which will never be cleaned up, and leak.
15557 */
15558 dtrace_lazy_dofs_destroy(p);
15559
15560 /*
15561 * Clean up any DTrace helper actions or probes for the process.
15562 */
15563 if (p->p_dtrace_helpers != NULL) {
15564 (*dtrace_helpers_cleanup)(p);
15565 }
15566
15567 /*
15568 * Clean up any DTrace probes associated with this process.
15569 */
15570 /*
15571 * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(),
15572 * call this after dtrace_helpers_cleanup()
15573 */
15574 proc_lock(p);
15575 if (p->p_dtrace_probes && dtrace_fasttrap_exit_ptr) {
15576 (*dtrace_fasttrap_exit_ptr)(p);
15577 }
15578 proc_unlock(p);
15579}
15580
2d21ac55
A
15581/*
15582 * DTrace Hook Functions
15583 */
6d2010ae 15584
6d2010ae 15585/*
fe8ab488
A
15586 * APPLE NOTE: dtrace_modctl_* routines for kext support.
15587 * Used to manipulate the modctl list within dtrace xnu.
6d2010ae
A
15588 */
15589
15590modctl_t *dtrace_modctl_list;
15591
15592static void
15593dtrace_modctl_add(struct modctl * newctl)
15594{
15595 struct modctl *nextp, *prevp;
15596
15597 ASSERT(newctl != NULL);
15598 lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
15599
15600 // Insert new module at the front of the list,
15601
15602 newctl->mod_next = dtrace_modctl_list;
15603 dtrace_modctl_list = newctl;
15604
15605 /*
15606 * If a module exists with the same name, then that module
15607 * must have been unloaded with enabled probes. We will move
15608 * the unloaded module to the new module's stale chain and
15609 * then stop traversing the list.
15610 */
15611
15612 prevp = newctl;
15613 nextp = newctl->mod_next;
15614
15615 while (nextp != NULL) {
15616 if (nextp->mod_loaded) {
15617 /* This is a loaded module. Keep traversing. */
15618 prevp = nextp;
15619 nextp = nextp->mod_next;
15620 continue;
15621 }
15622 else {
15623 /* Found an unloaded module */
15624 if (strncmp (newctl->mod_modname, nextp->mod_modname, KMOD_MAX_NAME)) {
15625 /* Names don't match. Keep traversing. */
15626 prevp = nextp;
15627 nextp = nextp->mod_next;
15628 continue;
15629 }
15630 else {
15631 /* We found a stale entry, move it. We're done. */
15632 prevp->mod_next = nextp->mod_next;
15633 newctl->mod_stale = nextp;
15634 nextp->mod_next = NULL;
15635 break;
15636 }
15637 }
15638 }
15639}
15640
15641static modctl_t *
15642dtrace_modctl_lookup(struct kmod_info * kmod)
15643{
15644 lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
15645
15646 struct modctl * ctl;
15647
15648 for (ctl = dtrace_modctl_list; ctl; ctl=ctl->mod_next) {
15649 if (ctl->mod_id == kmod->id)
15650 return(ctl);
15651 }
15652 return (NULL);
15653}
15654
15655/*
15656 * This routine is called from dtrace_module_unloaded().
15657 * It removes a modctl structure and its stale chain
15658 * from the kext shadow list.
15659 */
15660static void
15661dtrace_modctl_remove(struct modctl * ctl)
15662{
15663 ASSERT(ctl != NULL);
15664 lck_mtx_assert(&mod_lock, LCK_MTX_ASSERT_OWNED);
15665 modctl_t *prevp, *nextp, *curp;
15666
15667 // Remove stale chain first
15668 for (curp=ctl->mod_stale; curp != NULL; curp=nextp) {
15669 nextp = curp->mod_stale;
15670 /* There should NEVER be user symbols allocated at this point */
15671 ASSERT(curp->mod_user_symbols == NULL);
15672 kmem_free(curp, sizeof(modctl_t));
15673 }
15674
15675 prevp = NULL;
15676 curp = dtrace_modctl_list;
15677
15678 while (curp != ctl) {
15679 prevp = curp;
15680 curp = curp->mod_next;
15681 }
15682
15683 if (prevp != NULL) {
15684 prevp->mod_next = ctl->mod_next;
15685 }
15686 else {
15687 dtrace_modctl_list = ctl->mod_next;
15688 }
15689
15690 /* There should NEVER be user symbols allocated at this point */
15691 ASSERT(ctl->mod_user_symbols == NULL);
15692
15693 kmem_free (ctl, sizeof(modctl_t));
15694}
15695
6d2010ae
A
15696/*
15697 * APPLE NOTE: The kext loader will call dtrace_module_loaded
15698 * when the kext is loaded in memory, but before calling the
15699 * kext's start routine.
15700 *
15701 * Return 0 on success
15702 * Return -1 on failure
15703 */
15704
6d2010ae 15705static int
316670eb 15706dtrace_module_loaded(struct kmod_info *kmod, uint32_t flag)
2d21ac55
A
15707{
15708 dtrace_provider_t *prv;
15709
6d2010ae
A
15710 /*
15711 * If kernel symbols have been disabled, return immediately
15712 * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks
15713 */
15714 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER)
15715 return 0;
15716
15717 struct modctl *ctl = NULL;
15718 if (!kmod || kmod->address == 0 || kmod->size == 0)
15719 return(-1);
15720
15721 lck_mtx_lock(&dtrace_provider_lock);
15722 lck_mtx_lock(&mod_lock);
15723
15724 /*
15725 * Have we seen this kext before?
15726 */
2d21ac55 15727
6d2010ae
A
15728 ctl = dtrace_modctl_lookup(kmod);
15729
15730 if (ctl != NULL) {
15731 /* bail... we already have this kext in the modctl list */
15732 lck_mtx_unlock(&mod_lock);
15733 lck_mtx_unlock(&dtrace_provider_lock);
15734 if (dtrace_err_verbose)
15735 cmn_err(CE_WARN, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod->name, (uint_t)kmod->id, ctl->mod_modname, ctl->mod_id);
15736 return(-1);
15737 }
15738 else {
15739 ctl = kmem_alloc(sizeof(struct modctl), KM_SLEEP);
15740 if (ctl == NULL) {
15741 if (dtrace_err_verbose)
15742 cmn_err(CE_WARN, "dtrace module load '%s %u' is failing ", kmod->name, (uint_t)kmod->id);
15743 lck_mtx_unlock(&mod_lock);
15744 lck_mtx_unlock(&dtrace_provider_lock);
15745 return (-1);
15746 }
15747 ctl->mod_next = NULL;
15748 ctl->mod_stale = NULL;
15749 strlcpy (ctl->mod_modname, kmod->name, sizeof(ctl->mod_modname));
15750 ctl->mod_loadcnt = kmod->id;
15751 ctl->mod_nenabled = 0;
15752 ctl->mod_address = kmod->address;
15753 ctl->mod_size = kmod->size;
15754 ctl->mod_id = kmod->id;
15755 ctl->mod_loaded = 1;
15756 ctl->mod_flags = 0;
15757 ctl->mod_user_symbols = NULL;
15758
15759 /*
15760 * Find the UUID for this module, if it has one
15761 */
15762 kernel_mach_header_t* header = (kernel_mach_header_t *)ctl->mod_address;
15763 struct load_command* load_cmd = (struct load_command *)&header[1];
15764 uint32_t i;
15765 for (i = 0; i < header->ncmds; i++) {
15766 if (load_cmd->cmd == LC_UUID) {
15767 struct uuid_command* uuid_cmd = (struct uuid_command *)load_cmd;
15768 memcpy(ctl->mod_uuid, uuid_cmd->uuid, sizeof(uuid_cmd->uuid));
15769 ctl->mod_flags |= MODCTL_HAS_UUID;
15770 break;
15771 }
15772 load_cmd = (struct load_command *)((caddr_t)load_cmd + load_cmd->cmdsize);
15773 }
15774
15775 if (ctl->mod_address == g_kernel_kmod_info.address) {
15776 ctl->mod_flags |= MODCTL_IS_MACH_KERNEL;
15777 }
15778 }
15779 dtrace_modctl_add(ctl);
15780
15781 /*
15782 * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s)
15783 */
15784 lck_mtx_lock(&dtrace_lock);
15785
15786 /*
316670eb
A
15787 * DTrace must decide if it will instrument modules lazily via
15788 * userspace symbols (default mode), or instrument immediately via
15789 * kernel symbols (non-default mode)
15790 *
15791 * When in default/lazy mode, DTrace will only support modules
15792 * built with a valid UUID.
15793 *
15794 * Overriding the default can be done explicitly in one of
15795 * the following two ways.
15796 *
15797 * A module can force symbols from kernel space using the plist key,
15798 * OSBundleForceDTraceInit (see kmod.h). If this per kext state is set,
15799 * we fall through and instrument this module now.
15800 *
15801 * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols
15802 * from kernel space (see dtrace_impl.h). If this system state is set
15803 * to a non-userspace mode, we fall through and instrument the module now.
6d2010ae 15804 */
316670eb
A
15805
15806 if ((dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) &&
15807 (!(flag & KMOD_DTRACE_FORCE_INIT)))
15808 {
15809 /* We will instrument the module lazily -- this is the default */
6d2010ae
A
15810 lck_mtx_unlock(&dtrace_lock);
15811 lck_mtx_unlock(&mod_lock);
15812 lck_mtx_unlock(&dtrace_provider_lock);
15813 return 0;
15814 }
15815
316670eb 15816 /* We will instrument the module immediately using kernel symbols */
6d2010ae
A
15817 ctl->mod_flags |= MODCTL_HAS_KERNEL_SYMBOLS;
15818
15819 lck_mtx_unlock(&dtrace_lock);
6d2010ae 15820
2d21ac55
A
15821 /*
15822 * We're going to call each providers per-module provide operation
15823 * specifying only this module.
15824 */
15825 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
6d2010ae
A
15826 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
15827
6d2010ae 15828 /*
fe8ab488
A
15829 * APPLE NOTE: The contract with the kext loader is that once this function
15830 * has completed, it may delete kernel symbols at will.
15831 * We must set this while still holding the mod_lock.
6d2010ae
A
15832 */
15833 ctl->mod_flags &= ~MODCTL_HAS_KERNEL_SYMBOLS;
6d2010ae 15834
2d21ac55
A
15835 lck_mtx_unlock(&mod_lock);
15836 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae 15837
2d21ac55
A
15838 /*
15839 * If we have any retained enablings, we need to match against them.
15840 * Enabling probes requires that cpu_lock be held, and we cannot hold
15841 * cpu_lock here -- it is legal for cpu_lock to be held when loading a
15842 * module. (In particular, this happens when loading scheduling
15843 * classes.) So if we have any retained enablings, we need to dispatch
15844 * our task queue to do the match for us.
15845 */
15846 lck_mtx_lock(&dtrace_lock);
6d2010ae 15847
2d21ac55
A
15848 if (dtrace_retained == NULL) {
15849 lck_mtx_unlock(&dtrace_lock);
6d2010ae 15850 return 0;
2d21ac55 15851 }
6d2010ae 15852
6d2010ae
A
15853 /* APPLE NOTE!
15854 *
15855 * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually
15856 * holds it for any reason. Thus the comment above is invalid, we can directly invoke
15857 * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid
15858 * the delay call as well.
15859 */
15860 lck_mtx_unlock(&dtrace_lock);
15861
15862 dtrace_enabling_matchall();
15863
15864 return 0;
2d21ac55
A
15865}
15866
6d2010ae
A
15867/*
15868 * Return 0 on success
15869 * Return -1 on failure
15870 */
15871static int
15872dtrace_module_unloaded(struct kmod_info *kmod)
2d21ac55 15873{
6d2010ae
A
15874 dtrace_probe_t template, *probe, *first, *next;
15875 dtrace_provider_t *prov;
15876 struct modctl *ctl = NULL;
15877 struct modctl *syncctl = NULL;
15878 struct modctl *nextsyncctl = NULL;
15879 int syncmode = 0;
15880
15881 lck_mtx_lock(&dtrace_provider_lock);
15882 lck_mtx_lock(&mod_lock);
15883 lck_mtx_lock(&dtrace_lock);
2d21ac55 15884
6d2010ae
A
15885 if (kmod == NULL) {
15886 syncmode = 1;
15887 }
15888 else {
15889 ctl = dtrace_modctl_lookup(kmod);
15890 if (ctl == NULL)
15891 {
15892 lck_mtx_unlock(&dtrace_lock);
15893 lck_mtx_unlock(&mod_lock);
15894 lck_mtx_unlock(&dtrace_provider_lock);
15895 return (-1);
15896 }
15897 ctl->mod_loaded = 0;
15898 ctl->mod_address = 0;
15899 ctl->mod_size = 0;
15900 }
15901
15902 if (dtrace_bymod == NULL) {
15903 /*
15904 * The DTrace module is loaded (obviously) but not attached;
15905 * we don't have any work to do.
15906 */
15907 if (ctl != NULL)
15908 (void)dtrace_modctl_remove(ctl);
6d2010ae 15909 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
15910 lck_mtx_unlock(&mod_lock);
15911 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae
A
15912 return(0);
15913 }
15914
15915 /* Syncmode set means we target and traverse entire modctl list. */
15916 if (syncmode)
15917 nextsyncctl = dtrace_modctl_list;
15918
15919syncloop:
15920 if (syncmode)
15921 {
15922 /* find a stale modctl struct */
15923 for (syncctl = nextsyncctl; syncctl != NULL; syncctl=syncctl->mod_next) {
15924 if (syncctl->mod_address == 0)
15925 break;
15926 }
15927 if (syncctl==NULL)
15928 {
15929 /* We have no more work to do */
6d2010ae 15930 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
15931 lck_mtx_unlock(&mod_lock);
15932 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae
A
15933 return(0);
15934 }
15935 else {
15936 /* keep track of next syncctl in case this one is removed */
15937 nextsyncctl = syncctl->mod_next;
15938 ctl = syncctl;
15939 }
15940 }
15941
15942 template.dtpr_mod = ctl->mod_modname;
15943
15944 for (probe = first = dtrace_hash_lookup(dtrace_bymod, &template);
15945 probe != NULL; probe = probe->dtpr_nextmod) {
15946 if (probe->dtpr_ecb != NULL) {
15947 /*
15948 * This shouldn't _actually_ be possible -- we're
15949 * unloading a module that has an enabled probe in it.
15950 * (It's normally up to the provider to make sure that
15951 * this can't happen.) However, because dtps_enable()
15952 * doesn't have a failure mode, there can be an
15953 * enable/unload race. Upshot: we don't want to
15954 * assert, but we're not going to disable the
15955 * probe, either.
15956 */
15957
15958
15959 if (syncmode) {
15960 /* We're syncing, let's look at next in list */
15961 goto syncloop;
15962 }
15963
6d2010ae 15964 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
15965 lck_mtx_unlock(&mod_lock);
15966 lck_mtx_unlock(&dtrace_provider_lock);
6d2010ae
A
15967
15968 if (dtrace_err_verbose) {
15969 cmn_err(CE_WARN, "unloaded module '%s' had "
15970 "enabled probes", ctl->mod_modname);
15971 }
15972 return(-1);
15973 }
15974 }
15975
15976 probe = first;
15977
15978 for (first = NULL; probe != NULL; probe = next) {
15979 ASSERT(dtrace_probes[probe->dtpr_id - 1] == probe);
15980
15981 dtrace_probes[probe->dtpr_id - 1] = NULL;
fe8ab488 15982 probe->dtpr_provider->dtpv_probe_count--;
6d2010ae
A
15983
15984 next = probe->dtpr_nextmod;
15985 dtrace_hash_remove(dtrace_bymod, probe);
15986 dtrace_hash_remove(dtrace_byfunc, probe);
15987 dtrace_hash_remove(dtrace_byname, probe);
15988
15989 if (first == NULL) {
15990 first = probe;
15991 probe->dtpr_nextmod = NULL;
15992 } else {
15993 probe->dtpr_nextmod = first;
15994 first = probe;
15995 }
15996 }
15997
15998 /*
15999 * We've removed all of the module's probes from the hash chains and
16000 * from the probe array. Now issue a dtrace_sync() to be sure that
16001 * everyone has cleared out from any probe array processing.
16002 */
16003 dtrace_sync();
16004
16005 for (probe = first; probe != NULL; probe = first) {
16006 first = probe->dtpr_nextmod;
16007 prov = probe->dtpr_provider;
16008 prov->dtpv_pops.dtps_destroy(prov->dtpv_arg, probe->dtpr_id,
16009 probe->dtpr_arg);
16010 kmem_free(probe->dtpr_mod, strlen(probe->dtpr_mod) + 1);
16011 kmem_free(probe->dtpr_func, strlen(probe->dtpr_func) + 1);
16012 kmem_free(probe->dtpr_name, strlen(probe->dtpr_name) + 1);
16013 vmem_free(dtrace_arena, (void *)(uintptr_t)probe->dtpr_id, 1);
16014
16015 zfree(dtrace_probe_t_zone, probe);
16016 }
16017
16018 dtrace_modctl_remove(ctl);
16019
16020 if (syncmode)
16021 goto syncloop;
16022
16023 lck_mtx_unlock(&dtrace_lock);
16024 lck_mtx_unlock(&mod_lock);
16025 lck_mtx_unlock(&dtrace_provider_lock);
16026
16027 return(0);
16028}
6d2010ae
A
16029
16030void
16031dtrace_suspend(void)
16032{
16033 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_suspend));
16034}
16035
16036void
2d21ac55
A
16037dtrace_resume(void)
16038{
16039 dtrace_probe_foreach(offsetof(dtrace_pops_t, dtps_resume));
16040}
16041
16042static int
16043dtrace_cpu_setup(cpu_setup_t what, processorid_t cpu)
16044{
16045 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
16046 lck_mtx_lock(&dtrace_lock);
16047
16048 switch (what) {
16049 case CPU_CONFIG: {
16050 dtrace_state_t *state;
16051 dtrace_optval_t *opt, rs, c;
16052
16053 /*
16054 * For now, we only allocate a new buffer for anonymous state.
16055 */
16056 if ((state = dtrace_anon.dta_state) == NULL)
16057 break;
16058
16059 if (state->dts_activity != DTRACE_ACTIVITY_ACTIVE)
16060 break;
16061
16062 opt = state->dts_options;
16063 c = opt[DTRACEOPT_CPU];
16064
16065 if (c != DTRACE_CPUALL && c != DTRACEOPT_UNSET && c != cpu)
16066 break;
16067
16068 /*
16069 * Regardless of what the actual policy is, we're going to
16070 * temporarily set our resize policy to be manual. We're
16071 * also going to temporarily set our CPU option to denote
16072 * the newly configured CPU.
16073 */
16074 rs = opt[DTRACEOPT_BUFRESIZE];
16075 opt[DTRACEOPT_BUFRESIZE] = DTRACEOPT_BUFRESIZE_MANUAL;
16076 opt[DTRACEOPT_CPU] = (dtrace_optval_t)cpu;
16077
16078 (void) dtrace_state_buffers(state);
16079
16080 opt[DTRACEOPT_BUFRESIZE] = rs;
16081 opt[DTRACEOPT_CPU] = c;
16082
16083 break;
16084 }
16085
16086 case CPU_UNCONFIG:
16087 /*
16088 * We don't free the buffer in the CPU_UNCONFIG case. (The
16089 * buffer will be freed when the consumer exits.)
16090 */
16091 break;
16092
16093 default:
16094 break;
16095 }
16096
16097 lck_mtx_unlock(&dtrace_lock);
16098 return (0);
16099}
16100
16101static void
16102dtrace_cpu_setup_initial(processorid_t cpu)
16103{
16104 (void) dtrace_cpu_setup(CPU_CONFIG, cpu);
16105}
16106
16107static void
16108dtrace_toxrange_add(uintptr_t base, uintptr_t limit)
16109{
16110 if (dtrace_toxranges >= dtrace_toxranges_max) {
16111 int osize, nsize;
16112 dtrace_toxrange_t *range;
16113
16114 osize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
16115
16116 if (osize == 0) {
16117 ASSERT(dtrace_toxrange == NULL);
16118 ASSERT(dtrace_toxranges_max == 0);
16119 dtrace_toxranges_max = 1;
16120 } else {
16121 dtrace_toxranges_max <<= 1;
16122 }
16123
16124 nsize = dtrace_toxranges_max * sizeof (dtrace_toxrange_t);
16125 range = kmem_zalloc(nsize, KM_SLEEP);
16126
16127 if (dtrace_toxrange != NULL) {
16128 ASSERT(osize != 0);
16129 bcopy(dtrace_toxrange, range, osize);
16130 kmem_free(dtrace_toxrange, osize);
16131 }
16132
16133 dtrace_toxrange = range;
16134 }
16135
fe8ab488
A
16136 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_base == 0);
16137 ASSERT(dtrace_toxrange[dtrace_toxranges].dtt_limit == 0);
2d21ac55
A
16138
16139 dtrace_toxrange[dtrace_toxranges].dtt_base = base;
16140 dtrace_toxrange[dtrace_toxranges].dtt_limit = limit;
16141 dtrace_toxranges++;
16142}
16143
16144/*
16145 * DTrace Driver Cookbook Functions
16146 */
16147/*ARGSUSED*/
16148static int
16149dtrace_attach(dev_info_t *devi, ddi_attach_cmd_t cmd)
16150{
b0d623f7 16151#pragma unused(cmd) /* __APPLE__ */
2d21ac55
A
16152 dtrace_provider_id_t id;
16153 dtrace_state_t *state = NULL;
16154 dtrace_enabling_t *enab;
16155
16156 lck_mtx_lock(&cpu_lock);
16157 lck_mtx_lock(&dtrace_provider_lock);
16158 lck_mtx_lock(&dtrace_lock);
16159
b0d623f7 16160 /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */
2d21ac55
A
16161
16162 ddi_report_dev(devi);
16163 dtrace_devi = devi;
16164
16165 dtrace_modload = dtrace_module_loaded;
16166 dtrace_modunload = dtrace_module_unloaded;
16167 dtrace_cpu_init = dtrace_cpu_setup_initial;
16168 dtrace_helpers_cleanup = dtrace_helpers_destroy;
16169 dtrace_helpers_fork = dtrace_helpers_duplicate;
16170 dtrace_cpustart_init = dtrace_suspend;
16171 dtrace_cpustart_fini = dtrace_resume;
16172 dtrace_debugger_init = dtrace_suspend;
16173 dtrace_debugger_fini = dtrace_resume;
2d21ac55
A
16174
16175 register_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
16176
16177 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
16178
16179 dtrace_arena = vmem_create("dtrace", (void *)1, UINT32_MAX, 1,
16180 NULL, NULL, NULL, 0, VM_SLEEP | VMC_IDENTIFIER);
2d21ac55
A
16181 dtrace_taskq = taskq_create("dtrace_taskq", 1, maxclsyspri,
16182 1, INT_MAX, 0);
16183
16184 dtrace_state_cache = kmem_cache_create("dtrace_state_cache",
c910b4d9 16185 sizeof (dtrace_dstate_percpu_t) * (int)NCPU, DTRACE_STATE_ALIGN,
2d21ac55
A
16186 NULL, NULL, NULL, NULL, NULL, 0);
16187
16188 lck_mtx_assert(&cpu_lock, LCK_MTX_ASSERT_OWNED);
39037602 16189
2d21ac55
A
16190 dtrace_bymod = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_mod),
16191 offsetof(dtrace_probe_t, dtpr_nextmod),
16192 offsetof(dtrace_probe_t, dtpr_prevmod));
16193
16194 dtrace_byfunc = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_func),
16195 offsetof(dtrace_probe_t, dtpr_nextfunc),
16196 offsetof(dtrace_probe_t, dtpr_prevfunc));
16197
16198 dtrace_byname = dtrace_hash_create(offsetof(dtrace_probe_t, dtpr_name),
16199 offsetof(dtrace_probe_t, dtpr_nextname),
16200 offsetof(dtrace_probe_t, dtpr_prevname));
16201
16202 if (dtrace_retain_max < 1) {
16203 cmn_err(CE_WARN, "illegal value (%lu) for dtrace_retain_max; "
16204 "setting to 1", dtrace_retain_max);
16205 dtrace_retain_max = 1;
16206 }
16207
16208 /*
16209 * Now discover our toxic ranges.
16210 */
16211 dtrace_toxic_ranges(dtrace_toxrange_add);
16212
16213 /*
16214 * Before we register ourselves as a provider to our own framework,
16215 * we would like to assert that dtrace_provider is NULL -- but that's
16216 * not true if we were loaded as a dependency of a DTrace provider.
16217 * Once we've registered, we can assert that dtrace_provider is our
16218 * pseudo provider.
16219 */
16220 (void) dtrace_register("dtrace", &dtrace_provider_attr,
16221 DTRACE_PRIV_NONE, 0, &dtrace_provider_ops, NULL, &id);
16222
16223 ASSERT(dtrace_provider != NULL);
16224 ASSERT((dtrace_provider_id_t)dtrace_provider == id);
16225
fe8ab488 16226#if defined (__x86_64__)
2d21ac55
A
16227 dtrace_probeid_begin = dtrace_probe_create((dtrace_provider_id_t)
16228 dtrace_provider, NULL, NULL, "BEGIN", 1, NULL);
16229 dtrace_probeid_end = dtrace_probe_create((dtrace_provider_id_t)
16230 dtrace_provider, NULL, NULL, "END", 0, NULL);
16231 dtrace_probeid_error = dtrace_probe_create((dtrace_provider_id_t)
16232 dtrace_provider, NULL, NULL, "ERROR", 3, NULL);
2d21ac55
A
16233#else
16234#error Unknown Architecture
fe8ab488 16235#endif
2d21ac55
A
16236
16237 dtrace_anon_property();
16238 lck_mtx_unlock(&cpu_lock);
16239
16240 /*
16241 * If DTrace helper tracing is enabled, we need to allocate the
16242 * trace buffer and initialize the values.
16243 */
16244 if (dtrace_helptrace_enabled) {
16245 ASSERT(dtrace_helptrace_buffer == NULL);
16246 dtrace_helptrace_buffer =
16247 kmem_zalloc(dtrace_helptrace_bufsize, KM_SLEEP);
16248 dtrace_helptrace_next = 0;
16249 }
16250
16251 /*
16252 * If there are already providers, we must ask them to provide their
16253 * probes, and then match any anonymous enabling against them. Note
16254 * that there should be no other retained enablings at this time:
16255 * the only retained enablings at this time should be the anonymous
16256 * enabling.
16257 */
16258 if (dtrace_anon.dta_enabling != NULL) {
16259 ASSERT(dtrace_retained == dtrace_anon.dta_enabling);
16260
6d2010ae 16261 /*
fe8ab488 16262 * APPLE NOTE: if handling anonymous dof, switch symbol modes.
6d2010ae
A
16263 */
16264 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
16265 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
16266 }
6d2010ae 16267
2d21ac55
A
16268 dtrace_enabling_provide(NULL);
16269 state = dtrace_anon.dta_state;
16270
16271 /*
16272 * We couldn't hold cpu_lock across the above call to
16273 * dtrace_enabling_provide(), but we must hold it to actually
16274 * enable the probes. We have to drop all of our locks, pick
16275 * up cpu_lock, and regain our locks before matching the
16276 * retained anonymous enabling.
16277 */
16278 lck_mtx_unlock(&dtrace_lock);
16279 lck_mtx_unlock(&dtrace_provider_lock);
16280
16281 lck_mtx_lock(&cpu_lock);
16282 lck_mtx_lock(&dtrace_provider_lock);
16283 lck_mtx_lock(&dtrace_lock);
16284
16285 if ((enab = dtrace_anon.dta_enabling) != NULL)
39037602 16286 (void) dtrace_enabling_match(enab, NULL, NULL);
2d21ac55
A
16287
16288 lck_mtx_unlock(&cpu_lock);
16289 }
16290
16291 lck_mtx_unlock(&dtrace_lock);
16292 lck_mtx_unlock(&dtrace_provider_lock);
16293
16294 if (state != NULL) {
16295 /*
16296 * If we created any anonymous state, set it going now.
16297 */
16298 (void) dtrace_state_go(state, &dtrace_anon.dta_beganon);
16299 }
16300
16301 return (DDI_SUCCESS);
16302}
16303
2d21ac55
A
16304/*ARGSUSED*/
16305static int
16306dtrace_open(dev_t *devp, int flag, int otyp, cred_t *cred_p)
16307{
16308#pragma unused(flag, otyp)
16309 dtrace_state_t *state;
16310 uint32_t priv;
16311 uid_t uid;
16312 zoneid_t zoneid;
b0d623f7 16313 int rv;
2d21ac55 16314
fe8ab488 16315 /* APPLE: Darwin puts Helper on its own major device. */
2d21ac55
A
16316
16317 /*
16318 * If no DTRACE_PRIV_* bits are set in the credential, then the
16319 * caller lacks sufficient permission to do anything with DTrace.
16320 */
16321 dtrace_cred2priv(cred_p, &priv, &uid, &zoneid);
16322 if (priv == DTRACE_PRIV_NONE)
16323 return (EACCES);
16324
2d21ac55 16325 /*
fe8ab488 16326 * APPLE NOTE: We delay the initialization of fasttrap as late as possible.
2d21ac55
A
16327 * It certainly can't be later than now!
16328 */
16329 fasttrap_init();
2d21ac55
A
16330
16331 /*
16332 * Ask all providers to provide all their probes.
16333 */
16334 lck_mtx_lock(&dtrace_provider_lock);
16335 dtrace_probe_provide(NULL, NULL);
16336 lck_mtx_unlock(&dtrace_provider_lock);
16337
16338 lck_mtx_lock(&cpu_lock);
16339 lck_mtx_lock(&dtrace_lock);
16340 dtrace_opens++;
16341 dtrace_membar_producer();
16342
16343 /*
16344 * If the kernel debugger is active (that is, if the kernel debugger
16345 * modified text in some way), we won't allow the open.
16346 */
16347 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE) != 0) {
16348 dtrace_opens--;
b0d623f7 16349 lck_mtx_unlock(&dtrace_lock);
fe8ab488
A
16350 lck_mtx_unlock(&cpu_lock);
16351 return (EBUSY);
16352 }
2d21ac55 16353
fe8ab488
A
16354 rv = dtrace_state_create(devp, cred_p, &state);
16355 lck_mtx_unlock(&cpu_lock);
2d21ac55 16356
fe8ab488
A
16357 if (rv != 0 || state == NULL) {
16358 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
16359 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16360 lck_mtx_unlock(&dtrace_lock);
16361 /* propagate EAGAIN or ERESTART */
16362 return (rv);
16363 }
16364
16365 lck_mtx_unlock(&dtrace_lock);
2d21ac55 16366
fe8ab488 16367 lck_rw_lock_exclusive(&dtrace_dof_mode_lock);
2d21ac55 16368
fe8ab488
A
16369 /*
16370 * If we are currently lazy, transition states.
16371 *
16372 * Unlike dtrace_close, we do not need to check the
16373 * value of dtrace_opens, as any positive value (and
16374 * we count as 1) means we transition states.
16375 */
16376 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_ON) {
16377 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_OFF;
39037602
A
16378 /*
16379 * We do not need to hold the exclusive lock while processing
16380 * DOF on processes. We do need to make sure the mode does not get
16381 * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though
16382 * (which should not happen anyway since it only happens in
16383 * dtrace_close). There is no way imcomplete USDT probes can be
16384 * activate by any DTrace clients here since they all have to
16385 * call dtrace_open and be blocked on dtrace_dof_mode_lock
16386 */
16387 lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock);
fe8ab488
A
16388 /*
16389 * Iterate all existing processes and load lazy dofs.
16390 */
16391 proc_iterate(PROC_ALLPROCLIST | PROC_NOWAITTRANS,
16392 dtrace_lazy_dofs_proc_iterate_doit,
16393 NULL,
16394 dtrace_lazy_dofs_proc_iterate_filter,
16395 NULL);
39037602
A
16396
16397 lck_rw_unlock_shared(&dtrace_dof_mode_lock);
16398 }
16399 else {
16400 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
fe8ab488 16401 }
2d21ac55 16402
2d21ac55 16403
fe8ab488
A
16404 /*
16405 * Update kernel symbol state.
16406 *
16407 * We must own the provider and dtrace locks.
16408 *
16409 * NOTE! It may appear there is a race by setting this value so late
16410 * after dtrace_probe_provide. However, any kext loaded after the
16411 * call to probe provide and before we set LAZY_OFF will be marked as
16412 * eligible for symbols from userspace. The same dtrace that is currently
16413 * calling dtrace_open() (this call!) will get a list of kexts needing
16414 * symbols and fill them in, thus closing the race window.
16415 *
16416 * We want to set this value only after it certain it will succeed, as
16417 * this significantly reduces the complexity of error exits.
16418 */
16419 lck_mtx_lock(&dtrace_lock);
16420 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE) {
16421 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_KERNEL;
2d21ac55 16422 }
fe8ab488 16423 lck_mtx_unlock(&dtrace_lock);
2d21ac55 16424
fe8ab488
A
16425 return (0);
16426}
2d21ac55 16427
fe8ab488
A
16428/*ARGSUSED*/
16429static int
16430dtrace_close(dev_t dev, int flag, int otyp, cred_t *cred_p)
16431{
16432#pragma unused(flag, otyp, cred_p) /* __APPLE__ */
16433 minor_t minor = getminor(dev);
16434 dtrace_state_t *state;
2d21ac55 16435
fe8ab488 16436 /* APPLE NOTE: Darwin puts Helper on its own major device. */
39037602 16437 state = dtrace_state_get(minor);
fe8ab488
A
16438
16439 lck_mtx_lock(&cpu_lock);
16440 lck_mtx_lock(&dtrace_lock);
2d21ac55 16441
fe8ab488 16442 if (state->dts_anon) {
2d21ac55 16443 /*
fe8ab488 16444 * There is anonymous state. Destroy that first.
2d21ac55 16445 */
fe8ab488
A
16446 ASSERT(dtrace_anon.dta_state == NULL);
16447 dtrace_state_destroy(state->dts_anon);
16448 }
2d21ac55 16449
fe8ab488
A
16450 dtrace_state_destroy(state);
16451 ASSERT(dtrace_opens > 0);
2d21ac55 16452
fe8ab488
A
16453 /*
16454 * Only relinquish control of the kernel debugger interface when there
16455 * are no consumers and no anonymous enablings.
16456 */
16457 if (--dtrace_opens == 0 && dtrace_anon.dta_enabling == NULL)
16458 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
16459
16460 lck_mtx_unlock(&dtrace_lock);
16461 lck_mtx_unlock(&cpu_lock);
2d21ac55 16462
fe8ab488
A
16463 /*
16464 * Lock ordering requires the dof mode lock be taken before
16465 * the dtrace_lock.
16466 */
16467 lck_rw_lock_exclusive(&dtrace_dof_mode_lock);
16468 lck_mtx_lock(&dtrace_lock);
16469
16470 if (dtrace_opens == 0) {
16471 /*
16472 * If we are currently lazy-off, and this is the last close, transition to
16473 * lazy state.
16474 */
16475 if (dtrace_dof_mode == DTRACE_DOF_MODE_LAZY_OFF) {
16476 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
2d21ac55
A
16477 }
16478
fe8ab488
A
16479 /*
16480 * If we are the last dtrace client, switch back to lazy (from userspace) symbols
16481 */
16482 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_FROM_KERNEL) {
16483 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
16484 }
2d21ac55 16485 }
fe8ab488
A
16486
16487 lck_mtx_unlock(&dtrace_lock);
16488 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock);
16489
16490 /*
16491 * Kext probes may be retained past the end of the kext's lifespan. The
16492 * probes are kept until the last reference to them has been removed.
16493 * Since closing an active dtrace context is likely to drop that last reference,
16494 * lets take a shot at cleaning out the orphaned probes now.
16495 */
16496 dtrace_module_unloaded(NULL);
2d21ac55 16497
fe8ab488 16498 return (0);
2d21ac55 16499}
fe8ab488 16500
2d21ac55
A
16501/*ARGSUSED*/
16502static int
b0d623f7 16503dtrace_ioctl_helper(u_long cmd, caddr_t arg, int *rv)
2d21ac55 16504{
b0d623f7
A
16505#pragma unused(rv)
16506 /*
16507 * Safe to check this outside the dof mode lock
16508 */
16509 if (dtrace_dof_mode == DTRACE_DOF_MODE_NEVER)
16510 return KERN_SUCCESS;
2d21ac55
A
16511
16512 switch (cmd) {
39236c6e
A
16513 case DTRACEHIOC_ADDDOF:
16514 {
b0d623f7
A
16515 dof_helper_t *dhp = NULL;
16516 size_t dof_ioctl_data_size;
16517 dof_ioctl_data_t* multi_dof;
16518 unsigned int i;
16519 int rval = 0;
16520 user_addr_t user_address = *(user_addr_t*)arg;
16521 uint64_t dof_count;
16522 int multi_dof_claimed = 0;
16523 proc_t* p = current_proc();
2d21ac55 16524
b0d623f7
A
16525 /*
16526 * Read the number of DOF sections being passed in.
16527 */
16528 if (copyin(user_address + offsetof(dof_ioctl_data_t, dofiod_count),
16529 &dof_count,
16530 sizeof(dof_count))) {
16531 dtrace_dof_error(NULL, "failed to copyin dofiod_count");
16532 return (EFAULT);
16533 }
16534
16535 /*
16536 * Range check the count.
16537 */
16538 if (dof_count == 0 || dof_count > 1024) {
16539 dtrace_dof_error(NULL, "dofiod_count is not valid");
16540 return (EINVAL);
16541 }
16542
16543 /*
16544 * Allocate a correctly sized structure and copyin the data.
16545 */
16546 dof_ioctl_data_size = DOF_IOCTL_DATA_T_SIZE(dof_count);
16547 if ((multi_dof = kmem_alloc(dof_ioctl_data_size, KM_SLEEP)) == NULL)
16548 return (ENOMEM);
16549
16550 /* NOTE! We can no longer exit this method via return */
16551 if (copyin(user_address, multi_dof, dof_ioctl_data_size) != 0) {
16552 dtrace_dof_error(NULL, "failed copyin of dof_ioctl_data_t");
16553 rval = EFAULT;
16554 goto cleanup;
16555 }
16556
16557 /*
16558 * Check that the count didn't change between the first copyin and the second.
16559 */
16560 if (multi_dof->dofiod_count != dof_count) {
16561 rval = EINVAL;
16562 goto cleanup;
16563 }
16564
16565 /*
16566 * Try to process lazily first.
16567 */
16568 rval = dtrace_lazy_dofs_add(p, multi_dof, &multi_dof_claimed);
16569
16570 /*
16571 * If rval is EACCES, we must be non-lazy.
16572 */
16573 if (rval == EACCES) {
16574 rval = 0;
16575 /*
16576 * Process each dof_helper_t
16577 */
16578 i = 0;
16579 do {
16580 dhp = &multi_dof->dofiod_helpers[i];
16581
16582 dof_hdr_t *dof = dtrace_dof_copyin(dhp->dofhp_dof, &rval);
16583
16584 if (dof != NULL) {
16585 lck_mtx_lock(&dtrace_lock);
16586
16587 /*
16588 * dtrace_helper_slurp() takes responsibility for the dof --
16589 * it may free it now or it may save it and free it later.
16590 */
16591 if ((dhp->dofhp_dof = (uint64_t)dtrace_helper_slurp(p, dof, dhp)) == -1ULL) {
16592 rval = EINVAL;
16593 }
16594
16595 lck_mtx_unlock(&dtrace_lock);
16596 }
16597 } while (++i < multi_dof->dofiod_count && rval == 0);
16598 }
16599
16600 /*
16601 * We need to copyout the multi_dof struct, because it contains
16602 * the generation (unique id) values needed to call DTRACEHIOC_REMOVE
16603 *
16604 * This could certainly be better optimized.
16605 */
16606 if (copyout(multi_dof, user_address, dof_ioctl_data_size) != 0) {
16607 dtrace_dof_error(NULL, "failed copyout of dof_ioctl_data_t");
16608 /* Don't overwrite pre-existing error code */
16609 if (rval == 0) rval = EFAULT;
16610 }
16611
16612 cleanup:
16613 /*
16614 * If we had to allocate struct memory, free it.
16615 */
16616 if (multi_dof != NULL && !multi_dof_claimed) {
16617 kmem_free(multi_dof, dof_ioctl_data_size);
16618 }
16619
16620 return rval;
16621 }
16622
16623 case DTRACEHIOC_REMOVE: {
16624 int generation = *(int*)arg;
16625 proc_t* p = current_proc();
16626
16627 /*
16628 * Try lazy first.
16629 */
16630 int rval = dtrace_lazy_dofs_remove(p, generation);
16631
16632 /*
16633 * EACCES means non-lazy
16634 */
16635 if (rval == EACCES) {
16636 lck_mtx_lock(&dtrace_lock);
16637 rval = dtrace_helper_destroygen(p, generation);
16638 lck_mtx_unlock(&dtrace_lock);
16639 }
16640
16641 return (rval);
16642 }
16643
16644 default:
16645 break;
16646 }
16647
16648 return ENOTTY;
16649}
16650
16651/*ARGSUSED*/
16652static int
16653dtrace_ioctl(dev_t dev, u_long cmd, user_addr_t arg, int md, cred_t *cr, int *rv)
16654{
16655#pragma unused(md)
16656 minor_t minor = getminor(dev);
16657 dtrace_state_t *state;
16658 int rval;
16659
16660 /* Darwin puts Helper on its own major device. */
16661
39037602 16662 state = dtrace_state_get(minor);
b0d623f7
A
16663
16664 if (state->dts_anon) {
16665 ASSERT(dtrace_anon.dta_state == NULL);
16666 state = state->dts_anon;
16667 }
16668
16669 switch (cmd) {
16670 case DTRACEIOC_PROVIDER: {
16671 dtrace_providerdesc_t pvd;
16672 dtrace_provider_t *pvp;
16673
16674 if (copyin(arg, &pvd, sizeof (pvd)) != 0)
16675 return (EFAULT);
16676
16677 pvd.dtvd_name[DTRACE_PROVNAMELEN - 1] = '\0';
16678 lck_mtx_lock(&dtrace_provider_lock);
16679
16680 for (pvp = dtrace_provider; pvp != NULL; pvp = pvp->dtpv_next) {
16681 if (strncmp(pvp->dtpv_name, pvd.dtvd_name, DTRACE_PROVNAMELEN) == 0)
16682 break;
16683 }
16684
16685 lck_mtx_unlock(&dtrace_provider_lock);
16686
16687 if (pvp == NULL)
16688 return (ESRCH);
16689
16690 bcopy(&pvp->dtpv_priv, &pvd.dtvd_priv, sizeof (dtrace_ppriv_t));
16691 bcopy(&pvp->dtpv_attr, &pvd.dtvd_attr, sizeof (dtrace_pattr_t));
16692 if (copyout(&pvd, arg, sizeof (pvd)) != 0)
16693 return (EFAULT);
16694
16695 return (0);
16696 }
16697
16698 case DTRACEIOC_EPROBE: {
16699 dtrace_eprobedesc_t epdesc;
16700 dtrace_ecb_t *ecb;
16701 dtrace_action_t *act;
16702 void *buf;
16703 size_t size;
16704 uintptr_t dest;
16705 int nrecs;
16706
16707 if (copyin(arg, &epdesc, sizeof (epdesc)) != 0)
16708 return (EFAULT);
16709
16710 lck_mtx_lock(&dtrace_lock);
16711
16712 if ((ecb = dtrace_epid2ecb(state, epdesc.dtepd_epid)) == NULL) {
16713 lck_mtx_unlock(&dtrace_lock);
16714 return (EINVAL);
16715 }
16716
16717 if (ecb->dte_probe == NULL) {
16718 lck_mtx_unlock(&dtrace_lock);
16719 return (EINVAL);
16720 }
16721
16722 epdesc.dtepd_probeid = ecb->dte_probe->dtpr_id;
16723 epdesc.dtepd_uarg = ecb->dte_uarg;
16724 epdesc.dtepd_size = ecb->dte_size;
16725
16726 nrecs = epdesc.dtepd_nrecs;
16727 epdesc.dtepd_nrecs = 0;
16728 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
16729 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
16730 continue;
16731
16732 epdesc.dtepd_nrecs++;
16733 }
16734
16735 /*
16736 * Now that we have the size, we need to allocate a temporary
16737 * buffer in which to store the complete description. We need
16738 * the temporary buffer to be able to drop dtrace_lock()
16739 * across the copyout(), below.
16740 */
16741 size = sizeof (dtrace_eprobedesc_t) +
16742 (epdesc.dtepd_nrecs * sizeof (dtrace_recdesc_t));
16743
16744 buf = kmem_alloc(size, KM_SLEEP);
16745 dest = (uintptr_t)buf;
16746
16747 bcopy(&epdesc, (void *)dest, sizeof (epdesc));
16748 dest += offsetof(dtrace_eprobedesc_t, dtepd_rec[0]);
16749
16750 for (act = ecb->dte_action; act != NULL; act = act->dta_next) {
16751 if (DTRACEACT_ISAGG(act->dta_kind) || act->dta_intuple)
16752 continue;
16753
16754 if (nrecs-- == 0)
16755 break;
16756
16757 bcopy(&act->dta_rec, (void *)dest,
16758 sizeof (dtrace_recdesc_t));
16759 dest += sizeof (dtrace_recdesc_t);
16760 }
16761
16762 lck_mtx_unlock(&dtrace_lock);
16763
16764 if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) {
16765 kmem_free(buf, size);
16766 return (EFAULT);
16767 }
16768
16769 kmem_free(buf, size);
16770 return (0);
16771 }
16772
16773 case DTRACEIOC_AGGDESC: {
16774 dtrace_aggdesc_t aggdesc;
16775 dtrace_action_t *act;
16776 dtrace_aggregation_t *agg;
16777 int nrecs;
16778 uint32_t offs;
16779 dtrace_recdesc_t *lrec;
16780 void *buf;
16781 size_t size;
16782 uintptr_t dest;
16783
16784 if (copyin(arg, &aggdesc, sizeof (aggdesc)) != 0)
16785 return (EFAULT);
16786
16787 lck_mtx_lock(&dtrace_lock);
16788
16789 if ((agg = dtrace_aggid2agg(state, aggdesc.dtagd_id)) == NULL) {
16790 lck_mtx_unlock(&dtrace_lock);
16791 return (EINVAL);
16792 }
16793
16794 aggdesc.dtagd_epid = agg->dtag_ecb->dte_epid;
16795
16796 nrecs = aggdesc.dtagd_nrecs;
16797 aggdesc.dtagd_nrecs = 0;
16798
16799 offs = agg->dtag_base;
16800 lrec = &agg->dtag_action.dta_rec;
16801 aggdesc.dtagd_size = lrec->dtrd_offset + lrec->dtrd_size - offs;
16802
16803 for (act = agg->dtag_first; ; act = act->dta_next) {
16804 ASSERT(act->dta_intuple ||
16805 DTRACEACT_ISAGG(act->dta_kind));
16806
16807 /*
16808 * If this action has a record size of zero, it
16809 * denotes an argument to the aggregating action.
16810 * Because the presence of this record doesn't (or
16811 * shouldn't) affect the way the data is interpreted,
16812 * we don't copy it out to save user-level the
16813 * confusion of dealing with a zero-length record.
16814 */
16815 if (act->dta_rec.dtrd_size == 0) {
16816 ASSERT(agg->dtag_hasarg);
16817 continue;
16818 }
16819
16820 aggdesc.dtagd_nrecs++;
16821
16822 if (act == &agg->dtag_action)
16823 break;
16824 }
16825
16826 /*
16827 * Now that we have the size, we need to allocate a temporary
16828 * buffer in which to store the complete description. We need
16829 * the temporary buffer to be able to drop dtrace_lock()
16830 * across the copyout(), below.
16831 */
16832 size = sizeof (dtrace_aggdesc_t) +
16833 (aggdesc.dtagd_nrecs * sizeof (dtrace_recdesc_t));
16834
16835 buf = kmem_alloc(size, KM_SLEEP);
16836 dest = (uintptr_t)buf;
16837
16838 bcopy(&aggdesc, (void *)dest, sizeof (aggdesc));
16839 dest += offsetof(dtrace_aggdesc_t, dtagd_rec[0]);
16840
16841 for (act = agg->dtag_first; ; act = act->dta_next) {
16842 dtrace_recdesc_t rec = act->dta_rec;
16843
16844 /*
16845 * See the comment in the above loop for why we pass
16846 * over zero-length records.
16847 */
16848 if (rec.dtrd_size == 0) {
16849 ASSERT(agg->dtag_hasarg);
16850 continue;
16851 }
16852
16853 if (nrecs-- == 0)
16854 break;
16855
16856 rec.dtrd_offset -= offs;
16857 bcopy(&rec, (void *)dest, sizeof (rec));
16858 dest += sizeof (dtrace_recdesc_t);
16859
16860 if (act == &agg->dtag_action)
16861 break;
16862 }
16863
16864 lck_mtx_unlock(&dtrace_lock);
16865
16866 if (copyout(buf, arg, dest - (uintptr_t)buf) != 0) {
16867 kmem_free(buf, size);
16868 return (EFAULT);
16869 }
16870
16871 kmem_free(buf, size);
16872 return (0);
16873 }
16874
16875 case DTRACEIOC_ENABLE: {
16876 dof_hdr_t *dof;
16877 dtrace_enabling_t *enab = NULL;
16878 dtrace_vstate_t *vstate;
16879 int err = 0;
16880
16881 *rv = 0;
16882
16883 /*
16884 * If a NULL argument has been passed, we take this as our
16885 * cue to reevaluate our enablings.
16886 */
fe8ab488 16887 if (arg == 0) {
b0d623f7
A
16888 dtrace_enabling_matchall();
16889
16890 return (0);
16891 }
16892
16893 if ((dof = dtrace_dof_copyin(arg, &rval)) == NULL)
16894 return (rval);
16895
16896 lck_mtx_lock(&cpu_lock);
16897 lck_mtx_lock(&dtrace_lock);
16898 vstate = &state->dts_vstate;
16899
16900 if (state->dts_activity != DTRACE_ACTIVITY_INACTIVE) {
16901 lck_mtx_unlock(&dtrace_lock);
16902 lck_mtx_unlock(&cpu_lock);
16903 dtrace_dof_destroy(dof);
16904 return (EBUSY);
16905 }
16906
16907 if (dtrace_dof_slurp(dof, vstate, cr, &enab, 0, B_TRUE) != 0) {
16908 lck_mtx_unlock(&dtrace_lock);
16909 lck_mtx_unlock(&cpu_lock);
16910 dtrace_dof_destroy(dof);
16911 return (EINVAL);
16912 }
16913
16914 if ((rval = dtrace_dof_options(dof, state)) != 0) {
16915 dtrace_enabling_destroy(enab);
16916 lck_mtx_unlock(&dtrace_lock);
16917 lck_mtx_unlock(&cpu_lock);
16918 dtrace_dof_destroy(dof);
16919 return (rval);
16920 }
16921
39037602 16922 if ((err = dtrace_enabling_match(enab, rv, NULL)) == 0) {
b0d623f7
A
16923 err = dtrace_enabling_retain(enab);
16924 } else {
16925 dtrace_enabling_destroy(enab);
16926 }
16927
b0d623f7 16928 lck_mtx_unlock(&dtrace_lock);
fe8ab488 16929 lck_mtx_unlock(&cpu_lock);
b0d623f7
A
16930 dtrace_dof_destroy(dof);
16931
16932 return (err);
16933 }
16934
16935 case DTRACEIOC_REPLICATE: {
16936 dtrace_repldesc_t desc;
16937 dtrace_probedesc_t *match = &desc.dtrpd_match;
16938 dtrace_probedesc_t *create = &desc.dtrpd_create;
16939 int err;
16940
16941 if (copyin(arg, &desc, sizeof (desc)) != 0)
16942 return (EFAULT);
16943
16944 match->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
16945 match->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
16946 match->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
16947 match->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
16948
16949 create->dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
16950 create->dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
16951 create->dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
16952 create->dtpd_name[DTRACE_NAMELEN - 1] = '\0';
16953
16954 lck_mtx_lock(&dtrace_lock);
16955 err = dtrace_enabling_replicate(state, match, create);
16956 lck_mtx_unlock(&dtrace_lock);
16957
16958 return (err);
16959 }
16960
16961 case DTRACEIOC_PROBEMATCH:
16962 case DTRACEIOC_PROBES: {
16963 dtrace_probe_t *probe = NULL;
16964 dtrace_probedesc_t desc;
16965 dtrace_probekey_t pkey;
16966 dtrace_id_t i;
16967 int m = 0;
16968 uint32_t priv;
16969 uid_t uid;
16970 zoneid_t zoneid;
16971
16972 if (copyin(arg, &desc, sizeof (desc)) != 0)
16973 return (EFAULT);
16974
16975 desc.dtpd_provider[DTRACE_PROVNAMELEN - 1] = '\0';
16976 desc.dtpd_mod[DTRACE_MODNAMELEN - 1] = '\0';
16977 desc.dtpd_func[DTRACE_FUNCNAMELEN - 1] = '\0';
16978 desc.dtpd_name[DTRACE_NAMELEN - 1] = '\0';
16979
16980 /*
16981 * Before we attempt to match this probe, we want to give
16982 * all providers the opportunity to provide it.
16983 */
16984 if (desc.dtpd_id == DTRACE_IDNONE) {
16985 lck_mtx_lock(&dtrace_provider_lock);
16986 dtrace_probe_provide(&desc, NULL);
16987 lck_mtx_unlock(&dtrace_provider_lock);
16988 desc.dtpd_id++;
16989 }
16990
16991 if (cmd == DTRACEIOC_PROBEMATCH) {
16992 dtrace_probekey(&desc, &pkey);
16993 pkey.dtpk_id = DTRACE_IDNONE;
16994 }
16995
16996 dtrace_cred2priv(cr, &priv, &uid, &zoneid);
16997
16998 lck_mtx_lock(&dtrace_lock);
16999
17000 if (cmd == DTRACEIOC_PROBEMATCH) {
17001 /* Quiet compiler warning */
17002 for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) {
17003 if ((probe = dtrace_probes[i - 1]) != NULL &&
17004 (m = dtrace_match_probe(probe, &pkey,
17005 priv, uid, zoneid)) != 0)
17006 break;
17007 }
17008
17009 if (m < 0) {
17010 lck_mtx_unlock(&dtrace_lock);
17011 return (EINVAL);
17012 }
17013
17014 } else {
17015 /* Quiet compiler warning */
17016 for (i = desc.dtpd_id; i <= (dtrace_id_t)dtrace_nprobes; i++) {
17017 if ((probe = dtrace_probes[i - 1]) != NULL &&
17018 dtrace_match_priv(probe, priv, uid, zoneid))
17019 break;
17020 }
17021 }
17022
17023 if (probe == NULL) {
17024 lck_mtx_unlock(&dtrace_lock);
17025 return (ESRCH);
17026 }
17027
17028 dtrace_probe_description(probe, &desc);
17029 lck_mtx_unlock(&dtrace_lock);
17030
17031 if (copyout(&desc, arg, sizeof (desc)) != 0)
17032 return (EFAULT);
17033
17034 return (0);
17035 }
17036
17037 case DTRACEIOC_PROBEARG: {
17038 dtrace_argdesc_t desc;
17039 dtrace_probe_t *probe;
17040 dtrace_provider_t *prov;
17041
17042 if (copyin(arg, &desc, sizeof (desc)) != 0)
17043 return (EFAULT);
17044
17045 if (desc.dtargd_id == DTRACE_IDNONE)
17046 return (EINVAL);
17047
17048 if (desc.dtargd_ndx == DTRACE_ARGNONE)
17049 return (EINVAL);
17050
17051 lck_mtx_lock(&dtrace_provider_lock);
17052 lck_mtx_lock(&mod_lock);
17053 lck_mtx_lock(&dtrace_lock);
17054
17055 /* Quiet compiler warning */
17056 if (desc.dtargd_id > (dtrace_id_t)dtrace_nprobes) {
17057 lck_mtx_unlock(&dtrace_lock);
17058 lck_mtx_unlock(&mod_lock);
17059 lck_mtx_unlock(&dtrace_provider_lock);
17060 return (EINVAL);
17061 }
17062
17063 if ((probe = dtrace_probes[desc.dtargd_id - 1]) == NULL) {
17064 lck_mtx_unlock(&dtrace_lock);
17065 lck_mtx_unlock(&mod_lock);
17066 lck_mtx_unlock(&dtrace_provider_lock);
17067 return (EINVAL);
17068 }
17069
17070 lck_mtx_unlock(&dtrace_lock);
17071
17072 prov = probe->dtpr_provider;
17073
17074 if (prov->dtpv_pops.dtps_getargdesc == NULL) {
17075 /*
17076 * There isn't any typed information for this probe.
17077 * Set the argument number to DTRACE_ARGNONE.
17078 */
17079 desc.dtargd_ndx = DTRACE_ARGNONE;
17080 } else {
17081 desc.dtargd_native[0] = '\0';
17082 desc.dtargd_xlate[0] = '\0';
17083 desc.dtargd_mapping = desc.dtargd_ndx;
17084
17085 prov->dtpv_pops.dtps_getargdesc(prov->dtpv_arg,
17086 probe->dtpr_id, probe->dtpr_arg, &desc);
17087 }
17088
17089 lck_mtx_unlock(&mod_lock);
17090 lck_mtx_unlock(&dtrace_provider_lock);
17091
17092 if (copyout(&desc, arg, sizeof (desc)) != 0)
17093 return (EFAULT);
17094
17095 return (0);
17096 }
17097
17098 case DTRACEIOC_GO: {
17099 processorid_t cpuid;
17100 rval = dtrace_state_go(state, &cpuid);
17101
17102 if (rval != 0)
17103 return (rval);
17104
17105 if (copyout(&cpuid, arg, sizeof (cpuid)) != 0)
17106 return (EFAULT);
17107
17108 return (0);
17109 }
17110
17111 case DTRACEIOC_STOP: {
17112 processorid_t cpuid;
17113
17114 lck_mtx_lock(&dtrace_lock);
17115 rval = dtrace_state_stop(state, &cpuid);
17116 lck_mtx_unlock(&dtrace_lock);
17117
17118 if (rval != 0)
17119 return (rval);
17120
17121 if (copyout(&cpuid, arg, sizeof (cpuid)) != 0)
17122 return (EFAULT);
17123
17124 return (0);
17125 }
17126
17127 case DTRACEIOC_DOFGET: {
17128 dof_hdr_t hdr, *dof;
17129 uint64_t len;
17130
17131 if (copyin(arg, &hdr, sizeof (hdr)) != 0)
17132 return (EFAULT);
17133
17134 lck_mtx_lock(&dtrace_lock);
17135 dof = dtrace_dof_create(state);
17136 lck_mtx_unlock(&dtrace_lock);
17137
17138 len = MIN(hdr.dofh_loadsz, dof->dofh_loadsz);
17139 rval = copyout(dof, arg, len);
17140 dtrace_dof_destroy(dof);
17141
17142 return (rval == 0 ? 0 : EFAULT);
17143 }
17144
39037602
A
17145 case DTRACEIOC_SLEEP: {
17146 int64_t time;
17147 uint64_t abstime;
17148 uint64_t rvalue = DTRACE_WAKE_TIMEOUT;
17149
17150 if (copyin(arg, &time, sizeof(time)) != 0)
17151 return (EFAULT);
17152
17153 nanoseconds_to_absolutetime((uint64_t)time, &abstime);
17154 clock_absolutetime_interval_to_deadline(abstime, &abstime);
17155
17156 if (assert_wait_deadline(state, THREAD_ABORTSAFE, abstime) == THREAD_WAITING) {
17157 if (state->dts_buf_over_limit > 0) {
17158 clear_wait(current_thread(), THREAD_INTERRUPTED);
17159 rvalue = DTRACE_WAKE_BUF_LIMIT;
17160 } else {
17161 thread_block(THREAD_CONTINUE_NULL);
17162 if (state->dts_buf_over_limit > 0) {
17163 rvalue = DTRACE_WAKE_BUF_LIMIT;
17164 }
17165 }
17166 }
17167
17168 if (copyout(&rvalue, arg, sizeof(rvalue)) != 0)
17169 return (EFAULT);
17170
17171 return (0);
17172 }
17173
17174 case DTRACEIOC_SIGNAL: {
17175 wakeup(state);
17176 return (0);
17177 }
17178
b0d623f7
A
17179 case DTRACEIOC_AGGSNAP:
17180 case DTRACEIOC_BUFSNAP: {
17181 dtrace_bufdesc_t desc;
17182 caddr_t cached;
39037602 17183 boolean_t over_limit;
b0d623f7
A
17184 dtrace_buffer_t *buf;
17185
17186 if (copyin(arg, &desc, sizeof (desc)) != 0)
17187 return (EFAULT);
17188
17189 if ((int)desc.dtbd_cpu < 0 || desc.dtbd_cpu >= NCPU)
17190 return (EINVAL);
17191
17192 lck_mtx_lock(&dtrace_lock);
17193
17194 if (cmd == DTRACEIOC_BUFSNAP) {
17195 buf = &state->dts_buffer[desc.dtbd_cpu];
17196 } else {
17197 buf = &state->dts_aggbuffer[desc.dtbd_cpu];
17198 }
17199
17200 if (buf->dtb_flags & (DTRACEBUF_RING | DTRACEBUF_FILL)) {
17201 size_t sz = buf->dtb_offset;
17202
17203 if (state->dts_activity != DTRACE_ACTIVITY_STOPPED) {
17204 lck_mtx_unlock(&dtrace_lock);
17205 return (EBUSY);
17206 }
17207
17208 /*
17209 * If this buffer has already been consumed, we're
17210 * going to indicate that there's nothing left here
17211 * to consume.
17212 */
17213 if (buf->dtb_flags & DTRACEBUF_CONSUMED) {
17214 lck_mtx_unlock(&dtrace_lock);
17215
17216 desc.dtbd_size = 0;
17217 desc.dtbd_drops = 0;
17218 desc.dtbd_errors = 0;
17219 desc.dtbd_oldest = 0;
17220 sz = sizeof (desc);
17221
17222 if (copyout(&desc, arg, sz) != 0)
17223 return (EFAULT);
17224
17225 return (0);
17226 }
17227
17228 /*
17229 * If this is a ring buffer that has wrapped, we want
17230 * to copy the whole thing out.
17231 */
17232 if (buf->dtb_flags & DTRACEBUF_WRAPPED) {
17233 dtrace_buffer_polish(buf);
17234 sz = buf->dtb_size;
17235 }
17236
17237 if (copyout(buf->dtb_tomax, (user_addr_t)desc.dtbd_data, sz) != 0) {
17238 lck_mtx_unlock(&dtrace_lock);
17239 return (EFAULT);
17240 }
17241
17242 desc.dtbd_size = sz;
17243 desc.dtbd_drops = buf->dtb_drops;
17244 desc.dtbd_errors = buf->dtb_errors;
17245 desc.dtbd_oldest = buf->dtb_xamot_offset;
04b8595b 17246 desc.dtbd_timestamp = dtrace_gethrtime();
b0d623f7
A
17247
17248 lck_mtx_unlock(&dtrace_lock);
17249
17250 if (copyout(&desc, arg, sizeof (desc)) != 0)
17251 return (EFAULT);
17252
17253 buf->dtb_flags |= DTRACEBUF_CONSUMED;
17254
17255 return (0);
17256 }
17257
17258 if (buf->dtb_tomax == NULL) {
17259 ASSERT(buf->dtb_xamot == NULL);
17260 lck_mtx_unlock(&dtrace_lock);
17261 return (ENOENT);
17262 }
17263
17264 cached = buf->dtb_tomax;
39037602
A
17265 over_limit = buf->dtb_cur_limit == buf->dtb_size;
17266
b0d623f7
A
17267 ASSERT(!(buf->dtb_flags & DTRACEBUF_NOSWITCH));
17268
17269 dtrace_xcall(desc.dtbd_cpu,
17270 (dtrace_xcall_t)dtrace_buffer_switch, buf);
17271
17272 state->dts_errors += buf->dtb_xamot_errors;
17273
17274 /*
17275 * If the buffers did not actually switch, then the cross call
17276 * did not take place -- presumably because the given CPU is
17277 * not in the ready set. If this is the case, we'll return
17278 * ENOENT.
17279 */
17280 if (buf->dtb_tomax == cached) {
17281 ASSERT(buf->dtb_xamot != cached);
17282 lck_mtx_unlock(&dtrace_lock);
17283 return (ENOENT);
17284 }
17285
17286 ASSERT(cached == buf->dtb_xamot);
39037602
A
17287 /*
17288 * At this point we know the buffer have switched, so we
17289 * can decrement the over limit count if the buffer was over
17290 * its limit. The new buffer might already be over its limit
17291 * yet, but we don't care since we're guaranteed not to be
17292 * checking the buffer over limit count at this point.
17293 */
17294 if (over_limit) {
17295 uint32_t old = atomic_add_32(&state->dts_buf_over_limit, -1);
17296 #pragma unused(old)
17297
17298 /*
17299 * Verify that we didn't underflow the value
17300 */
17301 ASSERT(old != 0);
17302 }
b0d623f7
A
17303
17304 /*
17305 * We have our snapshot; now copy it out.
17306 */
17307 if (copyout(buf->dtb_xamot, (user_addr_t)desc.dtbd_data,
17308 buf->dtb_xamot_offset) != 0) {
17309 lck_mtx_unlock(&dtrace_lock);
17310 return (EFAULT);
17311 }
17312
17313 desc.dtbd_size = buf->dtb_xamot_offset;
17314 desc.dtbd_drops = buf->dtb_xamot_drops;
17315 desc.dtbd_errors = buf->dtb_xamot_errors;
17316 desc.dtbd_oldest = 0;
04b8595b 17317 desc.dtbd_timestamp = buf->dtb_switched;
b0d623f7
A
17318
17319 lck_mtx_unlock(&dtrace_lock);
17320
17321 /*
17322 * Finally, copy out the buffer description.
17323 */
17324 if (copyout(&desc, arg, sizeof (desc)) != 0)
17325 return (EFAULT);
17326
17327 return (0);
17328 }
17329
17330 case DTRACEIOC_CONF: {
17331 dtrace_conf_t conf;
17332
17333 bzero(&conf, sizeof (conf));
17334 conf.dtc_difversion = DIF_VERSION;
17335 conf.dtc_difintregs = DIF_DIR_NREGS;
17336 conf.dtc_diftupregs = DIF_DTR_NREGS;
17337 conf.dtc_ctfmodel = CTF_MODEL_NATIVE;
17338
17339 if (copyout(&conf, arg, sizeof (conf)) != 0)
17340 return (EFAULT);
17341
17342 return (0);
17343 }
17344
17345 case DTRACEIOC_STATUS: {
17346 dtrace_status_t stat;
17347 dtrace_dstate_t *dstate;
17348 int i, j;
17349 uint64_t nerrs;
17350
17351 /*
17352 * See the comment in dtrace_state_deadman() for the reason
17353 * for setting dts_laststatus to INT64_MAX before setting
17354 * it to the correct value.
17355 */
17356 state->dts_laststatus = INT64_MAX;
17357 dtrace_membar_producer();
17358 state->dts_laststatus = dtrace_gethrtime();
17359
17360 bzero(&stat, sizeof (stat));
17361
17362 lck_mtx_lock(&dtrace_lock);
17363
17364 if (state->dts_activity == DTRACE_ACTIVITY_INACTIVE) {
17365 lck_mtx_unlock(&dtrace_lock);
17366 return (ENOENT);
17367 }
17368
17369 if (state->dts_activity == DTRACE_ACTIVITY_DRAINING)
17370 stat.dtst_exiting = 1;
17371
17372 nerrs = state->dts_errors;
17373 dstate = &state->dts_vstate.dtvs_dynvars;
17374
17375 for (i = 0; i < (int)NCPU; i++) {
17376 dtrace_dstate_percpu_t *dcpu = &dstate->dtds_percpu[i];
17377
17378 stat.dtst_dyndrops += dcpu->dtdsc_drops;
17379 stat.dtst_dyndrops_dirty += dcpu->dtdsc_dirty_drops;
17380 stat.dtst_dyndrops_rinsing += dcpu->dtdsc_rinsing_drops;
17381
17382 if (state->dts_buffer[i].dtb_flags & DTRACEBUF_FULL)
17383 stat.dtst_filled++;
17384
17385 nerrs += state->dts_buffer[i].dtb_errors;
17386
17387 for (j = 0; j < state->dts_nspeculations; j++) {
17388 dtrace_speculation_t *spec;
17389 dtrace_buffer_t *buf;
17390
17391 spec = &state->dts_speculations[j];
17392 buf = &spec->dtsp_buffer[i];
17393 stat.dtst_specdrops += buf->dtb_xamot_drops;
17394 }
17395 }
17396
17397 stat.dtst_specdrops_busy = state->dts_speculations_busy;
17398 stat.dtst_specdrops_unavail = state->dts_speculations_unavail;
17399 stat.dtst_stkstroverflows = state->dts_stkstroverflows;
17400 stat.dtst_dblerrors = state->dts_dblerrors;
17401 stat.dtst_killed =
17402 (state->dts_activity == DTRACE_ACTIVITY_KILLED);
17403 stat.dtst_errors = nerrs;
17404
17405 lck_mtx_unlock(&dtrace_lock);
17406
17407 if (copyout(&stat, arg, sizeof (stat)) != 0)
17408 return (EFAULT);
17409
17410 return (0);
17411 }
17412
17413 case DTRACEIOC_FORMAT: {
17414 dtrace_fmtdesc_t fmt;
17415 char *str;
17416 int len;
17417
17418 if (copyin(arg, &fmt, sizeof (fmt)) != 0)
17419 return (EFAULT);
17420
17421 lck_mtx_lock(&dtrace_lock);
17422
17423 if (fmt.dtfd_format == 0 ||
17424 fmt.dtfd_format > state->dts_nformats) {
17425 lck_mtx_unlock(&dtrace_lock);
17426 return (EINVAL);
17427 }
17428
17429 /*
17430 * Format strings are allocated contiguously and they are
17431 * never freed; if a format index is less than the number
17432 * of formats, we can assert that the format map is non-NULL
17433 * and that the format for the specified index is non-NULL.
17434 */
17435 ASSERT(state->dts_formats != NULL);
17436 str = state->dts_formats[fmt.dtfd_format - 1];
17437 ASSERT(str != NULL);
17438
17439 len = strlen(str) + 1;
17440
17441 if (len > fmt.dtfd_length) {
17442 fmt.dtfd_length = len;
17443
17444 if (copyout(&fmt, arg, sizeof (fmt)) != 0) {
17445 lck_mtx_unlock(&dtrace_lock);
17446 return (EINVAL);
17447 }
17448 } else {
17449 if (copyout(str, (user_addr_t)fmt.dtfd_string, len) != 0) {
17450 lck_mtx_unlock(&dtrace_lock);
17451 return (EINVAL);
17452 }
17453 }
17454
17455 lck_mtx_unlock(&dtrace_lock);
17456 return (0);
17457 }
17458
6d2010ae
A
17459 case DTRACEIOC_MODUUIDSLIST: {
17460 size_t module_uuids_list_size;
17461 dtrace_module_uuids_list_t* uuids_list;
17462 uint64_t dtmul_count;
fe8ab488
A
17463
17464 /*
17465 * Security restrictions make this operation illegal, if this is enabled DTrace
17466 * must refuse to provide any fbt probes.
17467 */
3e170ce0 17468 if (dtrace_fbt_probes_restricted()) {
fe8ab488
A
17469 cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");
17470 return (EPERM);
17471 }
17472
6d2010ae
A
17473 /*
17474 * Fail if the kernel symbol mode makes this operation illegal.
17475 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
17476 * for them without holding the dtrace_lock.
17477 */
17478 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
17479 dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
17480 cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode);
17481 return (EPERM);
17482 }
17483
17484 /*
17485 * Read the number of symbolsdesc structs being passed in.
17486 */
17487 if (copyin(arg + offsetof(dtrace_module_uuids_list_t, dtmul_count),
17488 &dtmul_count,
17489 sizeof(dtmul_count))) {
17490 cmn_err(CE_WARN, "failed to copyin dtmul_count");
17491 return (EFAULT);
17492 }
17493
17494 /*
17495 * Range check the count. More than 2k kexts is probably an error.
17496 */
17497 if (dtmul_count > 2048) {
17498 cmn_err(CE_WARN, "dtmul_count is not valid");
17499 return (EINVAL);
17500 }
17501
17502 /*
17503 * For all queries, we return EINVAL when the user specified
17504 * count does not match the actual number of modules we find
17505 * available.
17506 *
17507 * If the user specified count is zero, then this serves as a
17508 * simple query to count the available modules in need of symbols.
17509 */
17510
17511 rval = 0;
17512
17513 if (dtmul_count == 0)
17514 {
17515 lck_mtx_lock(&mod_lock);
17516 struct modctl* ctl = dtrace_modctl_list;
17517 while (ctl) {
fe8ab488
A
17518 /* Update the private probes bit */
17519 if (dtrace_provide_private_probes)
17520 ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES;
17521
6d2010ae
A
17522 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
17523 if (!MOD_SYMBOLS_DONE(ctl)) {
17524 dtmul_count++;
17525 rval = EINVAL;
17526 }
17527 ctl = ctl->mod_next;
17528 }
17529 lck_mtx_unlock(&mod_lock);
17530
17531 if (copyout(&dtmul_count, arg, sizeof (dtmul_count)) != 0)
17532 return (EFAULT);
17533 else
17534 return (rval);
17535 }
17536
17537 /*
17538 * If we reach this point, then we have a request for full list data.
17539 * Allocate a correctly sized structure and copyin the data.
17540 */
17541 module_uuids_list_size = DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count);
17542 if ((uuids_list = kmem_alloc(module_uuids_list_size, KM_SLEEP)) == NULL)
17543 return (ENOMEM);
17544
17545 /* NOTE! We can no longer exit this method via return */
17546 if (copyin(arg, uuids_list, module_uuids_list_size) != 0) {
17547 cmn_err(CE_WARN, "failed copyin of dtrace_module_uuids_list_t");
17548 rval = EFAULT;
17549 goto moduuidslist_cleanup;
17550 }
17551
17552 /*
17553 * Check that the count didn't change between the first copyin and the second.
17554 */
17555 if (uuids_list->dtmul_count != dtmul_count) {
17556 rval = EINVAL;
17557 goto moduuidslist_cleanup;
17558 }
17559
17560 /*
17561 * Build the list of UUID's that need symbols
17562 */
17563 lck_mtx_lock(&mod_lock);
17564
17565 dtmul_count = 0;
17566
17567 struct modctl* ctl = dtrace_modctl_list;
17568 while (ctl) {
fe8ab488
A
17569 /* Update the private probes bit */
17570 if (dtrace_provide_private_probes)
17571 ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES;
17572
6d2010ae
A
17573 /*
17574 * We assume that userspace symbols will be "better" than kernel level symbols,
17575 * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms
17576 * are available, add user syms if the module might use them.
17577 */
17578 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
17579 if (!MOD_SYMBOLS_DONE(ctl)) {
17580 UUID* uuid = &uuids_list->dtmul_uuid[dtmul_count];
17581 if (dtmul_count++ < uuids_list->dtmul_count) {
17582 memcpy(uuid, ctl->mod_uuid, sizeof(UUID));
17583 }
17584 }
17585 ctl = ctl->mod_next;
17586 }
17587
17588 lck_mtx_unlock(&mod_lock);
17589
17590 if (uuids_list->dtmul_count < dtmul_count)
17591 rval = EINVAL;
17592
17593 uuids_list->dtmul_count = dtmul_count;
17594
17595 /*
17596 * Copyout the symbols list (or at least the count!)
17597 */
17598 if (copyout(uuids_list, arg, module_uuids_list_size) != 0) {
17599 cmn_err(CE_WARN, "failed copyout of dtrace_symbolsdesc_list_t");
17600 rval = EFAULT;
17601 }
17602
17603 moduuidslist_cleanup:
17604 /*
17605 * If we had to allocate struct memory, free it.
17606 */
17607 if (uuids_list != NULL) {
17608 kmem_free(uuids_list, module_uuids_list_size);
17609 }
17610
17611 return rval;
17612 }
17613
17614 case DTRACEIOC_PROVMODSYMS: {
17615 size_t module_symbols_size;
17616 dtrace_module_symbols_t* module_symbols;
17617 uint64_t dtmodsyms_count;
fe8ab488
A
17618
17619 /*
17620 * Security restrictions make this operation illegal, if this is enabled DTrace
17621 * must refuse to provide any fbt probes.
17622 */
3e170ce0 17623 if (dtrace_fbt_probes_restricted()) {
fe8ab488
A
17624 cmn_err(CE_WARN, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");
17625 return (EPERM);
17626 }
17627
6d2010ae
A
17628 /*
17629 * Fail if the kernel symbol mode makes this operation illegal.
17630 * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check
17631 * for them without holding the dtrace_lock.
17632 */
17633 if (dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_NEVER ||
17634 dtrace_kernel_symbol_mode == DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL) {
17635 cmn_err(CE_WARN, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode);
17636 return (EPERM);
17637 }
17638
17639 /*
17640 * Read the number of module symbols structs being passed in.
17641 */
17642 if (copyin(arg + offsetof(dtrace_module_symbols_t, dtmodsyms_count),
17643 &dtmodsyms_count,
17644 sizeof(dtmodsyms_count))) {
17645 cmn_err(CE_WARN, "failed to copyin dtmodsyms_count");
17646 return (EFAULT);
17647 }
17648
17649 /*
17650 * Range check the count. How much data can we pass around?
17651 * FIX ME!
17652 */
17653 if (dtmodsyms_count == 0 || (dtmodsyms_count > 100 * 1024)) {
17654 cmn_err(CE_WARN, "dtmodsyms_count is not valid");
17655 return (EINVAL);
17656 }
17657
17658 /*
17659 * Allocate a correctly sized structure and copyin the data.
17660 */
17661 module_symbols_size = DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count);
17662 if ((module_symbols = kmem_alloc(module_symbols_size, KM_SLEEP)) == NULL)
17663 return (ENOMEM);
17664
17665 rval = 0;
17666
17667 /* NOTE! We can no longer exit this method via return */
17668 if (copyin(arg, module_symbols, module_symbols_size) != 0) {
39037602 17669 cmn_err(CE_WARN, "failed copyin of dtrace_module_symbols_t");
6d2010ae
A
17670 rval = EFAULT;
17671 goto module_symbols_cleanup;
17672 }
17673
17674 /*
17675 * Check that the count didn't change between the first copyin and the second.
17676 */
17677 if (module_symbols->dtmodsyms_count != dtmodsyms_count) {
17678 rval = EINVAL;
17679 goto module_symbols_cleanup;
17680 }
17681
17682 /*
17683 * Find the modctl to add symbols to.
17684 */
17685 lck_mtx_lock(&dtrace_provider_lock);
17686 lck_mtx_lock(&mod_lock);
17687
17688 struct modctl* ctl = dtrace_modctl_list;
17689 while (ctl) {
fe8ab488
A
17690 /* Update the private probes bit */
17691 if (dtrace_provide_private_probes)
17692 ctl->mod_flags |= MODCTL_FBT_PROVIDE_PRIVATE_PROBES;
17693
6d2010ae
A
17694 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl));
17695 if (MOD_HAS_UUID(ctl) && !MOD_SYMBOLS_DONE(ctl)) {
17696 if (memcmp(module_symbols->dtmodsyms_uuid, ctl->mod_uuid, sizeof(UUID)) == 0) {
17697 /* BINGO! */
17698 ctl->mod_user_symbols = module_symbols;
17699 break;
17700 }
17701 }
17702 ctl = ctl->mod_next;
17703 }
17704
17705 if (ctl) {
17706 dtrace_provider_t *prv;
17707
17708 /*
17709 * We're going to call each providers per-module provide operation
17710 * specifying only this module.
17711 */
17712 for (prv = dtrace_provider; prv != NULL; prv = prv->dtpv_next)
17713 prv->dtpv_pops.dtps_provide_module(prv->dtpv_arg, ctl);
17714
17715 /*
17716 * We gave every provider a chance to provide with the user syms, go ahead and clear them
17717 */
17718 ctl->mod_user_symbols = NULL; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */
17719 }
17720
17721 lck_mtx_unlock(&mod_lock);
17722 lck_mtx_unlock(&dtrace_provider_lock);
17723
17724 module_symbols_cleanup:
17725 /*
17726 * If we had to allocate struct memory, free it.
17727 */
17728 if (module_symbols != NULL) {
17729 kmem_free(module_symbols, module_symbols_size);
17730 }
17731
17732 return rval;
17733 }
fe8ab488
A
17734
17735 case DTRACEIOC_PROCWAITFOR: {
17736 dtrace_procdesc_t pdesc = {
3e170ce0 17737 .p_name = {0},
fe8ab488
A
17738 .p_pid = -1
17739 };
17740
17741 if ((rval = copyin(arg, &pdesc, sizeof(pdesc))) != 0)
17742 goto proc_waitfor_error;
17743
17744 if ((rval = dtrace_proc_waitfor(&pdesc)) != 0)
17745 goto proc_waitfor_error;
17746
17747 if ((rval = copyout(&pdesc, arg, sizeof(pdesc))) != 0)
17748 goto proc_waitfor_error;
17749
17750 return 0;
17751
17752 proc_waitfor_error:
17753 /* The process was suspended, revert this since the client will not do it. */
17754 if (pdesc.p_pid != -1) {
17755 proc_t *proc = proc_find(pdesc.p_pid);
17756 if (proc != PROC_NULL) {
17757 task_pidresume(proc->task);
17758 proc_rele(proc);
17759 }
17760 }
17761
17762 return rval;
17763 }
17764
17765 default:
17766 break;
b0d623f7
A
17767 }
17768
17769 return (ENOTTY);
17770}
b0d623f7 17771
fe8ab488
A
17772/*
17773 * APPLE NOTE: dtrace_detach not implemented
17774 */
b0d623f7
A
17775#if !defined(__APPLE__)
17776/*ARGSUSED*/
17777static int
17778dtrace_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
17779{
17780 dtrace_state_t *state;
17781
17782 switch (cmd) {
17783 case DDI_DETACH:
17784 break;
17785
17786 case DDI_SUSPEND:
17787 return (DDI_SUCCESS);
17788
17789 default:
17790 return (DDI_FAILURE);
17791 }
17792
17793 lck_mtx_lock(&cpu_lock);
17794 lck_mtx_lock(&dtrace_provider_lock);
17795 lck_mtx_lock(&dtrace_lock);
2d21ac55
A
17796
17797 ASSERT(dtrace_opens == 0);
17798
17799 if (dtrace_helpers > 0) {
2d21ac55 17800 lck_mtx_unlock(&dtrace_lock);
fe8ab488 17801 lck_mtx_unlock(&dtrace_provider_lock);
2d21ac55
A
17802 lck_mtx_unlock(&cpu_lock);
17803 return (DDI_FAILURE);
17804 }
17805
17806 if (dtrace_unregister((dtrace_provider_id_t)dtrace_provider) != 0) {
2d21ac55 17807 lck_mtx_unlock(&dtrace_lock);
fe8ab488 17808 lck_mtx_unlock(&dtrace_provider_lock);
2d21ac55
A
17809 lck_mtx_unlock(&cpu_lock);
17810 return (DDI_FAILURE);
17811 }
17812
17813 dtrace_provider = NULL;
17814
17815 if ((state = dtrace_anon_grab()) != NULL) {
17816 /*
17817 * If there were ECBs on this state, the provider should
17818 * have not been allowed to detach; assert that there is
17819 * none.
17820 */
17821 ASSERT(state->dts_necbs == 0);
17822 dtrace_state_destroy(state);
17823
17824 /*
17825 * If we're being detached with anonymous state, we need to
17826 * indicate to the kernel debugger that DTrace is now inactive.
17827 */
17828 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE);
17829 }
17830
17831 bzero(&dtrace_anon, sizeof (dtrace_anon_t));
17832 unregister_cpu_setup_func((cpu_setup_func_t *)dtrace_cpu_setup, NULL);
17833 dtrace_cpu_init = NULL;
17834 dtrace_helpers_cleanup = NULL;
17835 dtrace_helpers_fork = NULL;
17836 dtrace_cpustart_init = NULL;
17837 dtrace_cpustart_fini = NULL;
17838 dtrace_debugger_init = NULL;
17839 dtrace_debugger_fini = NULL;
17840 dtrace_kreloc_init = NULL;
17841 dtrace_kreloc_fini = NULL;
17842 dtrace_modload = NULL;
17843 dtrace_modunload = NULL;
17844
17845 lck_mtx_unlock(&cpu_lock);
17846
17847 if (dtrace_helptrace_enabled) {
17848 kmem_free(dtrace_helptrace_buffer, dtrace_helptrace_bufsize);
17849 dtrace_helptrace_buffer = NULL;
17850 }
17851
17852 kmem_free(dtrace_probes, dtrace_nprobes * sizeof (dtrace_probe_t *));
17853 dtrace_probes = NULL;
17854 dtrace_nprobes = 0;
17855
17856 dtrace_hash_destroy(dtrace_bymod);
17857 dtrace_hash_destroy(dtrace_byfunc);
17858 dtrace_hash_destroy(dtrace_byname);
17859 dtrace_bymod = NULL;
17860 dtrace_byfunc = NULL;
17861 dtrace_byname = NULL;
17862
17863 kmem_cache_destroy(dtrace_state_cache);
2d21ac55
A
17864 vmem_destroy(dtrace_arena);
17865
17866 if (dtrace_toxrange != NULL) {
17867 kmem_free(dtrace_toxrange,
17868 dtrace_toxranges_max * sizeof (dtrace_toxrange_t));
17869 dtrace_toxrange = NULL;
17870 dtrace_toxranges = 0;
17871 dtrace_toxranges_max = 0;
17872 }
17873
17874 ddi_remove_minor_node(dtrace_devi, NULL);
17875 dtrace_devi = NULL;
17876
17877 ddi_soft_state_fini(&dtrace_softstate);
17878
17879 ASSERT(dtrace_vtime_references == 0);
17880 ASSERT(dtrace_opens == 0);
17881 ASSERT(dtrace_retained == NULL);
17882
17883 lck_mtx_unlock(&dtrace_lock);
17884 lck_mtx_unlock(&dtrace_provider_lock);
17885
17886 /*
17887 * We don't destroy the task queue until after we have dropped our
17888 * locks (taskq_destroy() may block on running tasks). To prevent
17889 * attempting to do work after we have effectively detached but before
17890 * the task queue has been destroyed, all tasks dispatched via the
17891 * task queue must check that DTrace is still attached before
17892 * performing any operation.
17893 */
17894 taskq_destroy(dtrace_taskq);
17895 dtrace_taskq = NULL;
17896
17897 return (DDI_SUCCESS);
17898}
fe8ab488 17899#endif /* __APPLE__ */
2d21ac55
A
17900
17901d_open_t _dtrace_open, helper_open;
17902d_close_t _dtrace_close, helper_close;
17903d_ioctl_t _dtrace_ioctl, helper_ioctl;
17904
17905int
17906_dtrace_open(dev_t dev, int flags, int devtype, struct proc *p)
17907{
17908#pragma unused(p)
17909 dev_t locdev = dev;
17910
17911 return dtrace_open( &locdev, flags, devtype, CRED());
17912}
17913
17914int
17915helper_open(dev_t dev, int flags, int devtype, struct proc *p)
17916{
17917#pragma unused(dev,flags,devtype,p)
17918 return 0;
17919}
17920
17921int
17922_dtrace_close(dev_t dev, int flags, int devtype, struct proc *p)
17923{
17924#pragma unused(p)
17925 return dtrace_close( dev, flags, devtype, CRED());
17926}
17927
17928int
17929helper_close(dev_t dev, int flags, int devtype, struct proc *p)
17930{
17931#pragma unused(dev,flags,devtype,p)
17932 return 0;
17933}
17934
17935int
17936_dtrace_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p)
17937{
17938#pragma unused(p)
17939 int err, rv = 0;
b0d623f7
A
17940 user_addr_t uaddrp;
17941
17942 if (proc_is64bit(p))
17943 uaddrp = *(user_addr_t *)data;
17944 else
17945 uaddrp = (user_addr_t) *(uint32_t *)data;
2d21ac55 17946
b0d623f7 17947 err = dtrace_ioctl(dev, cmd, uaddrp, fflag, CRED(), &rv);
2d21ac55 17948
b0d623f7 17949 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
2d21ac55
A
17950 if (err != 0) {
17951 ASSERT( (err & 0xfffff000) == 0 );
b0d623f7 17952 return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
2d21ac55
A
17953 } else if (rv != 0) {
17954 ASSERT( (rv & 0xfff00000) == 0 );
b0d623f7 17955 return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
2d21ac55
A
17956 } else
17957 return 0;
17958}
17959
17960int
17961helper_ioctl(dev_t dev, u_long cmd, caddr_t data, int fflag, struct proc *p)
17962{
17963#pragma unused(dev,fflag,p)
17964 int err, rv = 0;
17965
b0d623f7
A
17966 err = dtrace_ioctl_helper(cmd, data, &rv);
17967 /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */
2d21ac55
A
17968 if (err != 0) {
17969 ASSERT( (err & 0xfffff000) == 0 );
b0d623f7 17970 return (err & 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */
2d21ac55
A
17971 } else if (rv != 0) {
17972 ASSERT( (rv & 0xfff00000) == 0 );
b0d623f7 17973 return (((rv & 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */
2d21ac55
A
17974 } else
17975 return 0;
17976}
17977
17978#define HELPER_MAJOR -24 /* let the kernel pick the device number */
17979
17980/*
17981 * A struct describing which functions will get invoked for certain
17982 * actions.
17983 */
17984static struct cdevsw helper_cdevsw =
17985{
17986 helper_open, /* open */
17987 helper_close, /* close */
17988 eno_rdwrt, /* read */
17989 eno_rdwrt, /* write */
17990 helper_ioctl, /* ioctl */
17991 (stop_fcn_t *)nulldev, /* stop */
17992 (reset_fcn_t *)nulldev, /* reset */
17993 NULL, /* tty's */
17994 eno_select, /* select */
17995 eno_mmap, /* mmap */
17996 eno_strat, /* strategy */
17997 eno_getc, /* getc */
17998 eno_putc, /* putc */
17999 0 /* type */
18000};
18001
18002static int helper_majdevno = 0;
18003
18004static int gDTraceInited = 0;
18005
18006void
18007helper_init( void )
18008{
18009 /*
18010 * Once the "helper" is initialized, it can take ioctl calls that use locks
18011 * and zones initialized in dtrace_init. Make certain dtrace_init was called
18012 * before us.
18013 */
18014
18015 if (!gDTraceInited) {
18016 panic("helper_init before dtrace_init\n");
18017 }
18018
18019 if (0 >= helper_majdevno)
18020 {
18021 helper_majdevno = cdevsw_add(HELPER_MAJOR, &helper_cdevsw);
18022
18023 if (helper_majdevno < 0) {
18024 printf("helper_init: failed to allocate a major number!\n");
18025 return;
18026 }
18027
18028 if (NULL == devfs_make_node( makedev(helper_majdevno, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666,
18029 DTRACEMNR_HELPER, 0 )) {
18030 printf("dtrace_init: failed to devfs_make_node for helper!\n");
18031 return;
18032 }
18033 } else
18034 panic("helper_init: called twice!\n");
18035}
18036
18037#undef HELPER_MAJOR
18038
2d21ac55
A
18039static int
18040dtrace_clone_func(dev_t dev, int action)
18041{
18042#pragma unused(dev)
18043
18044 if (action == DEVFS_CLONE_ALLOC) {
39037602 18045 return dtrace_state_reserve();
2d21ac55
A
18046 }
18047 else if (action == DEVFS_CLONE_FREE) {
18048 return 0;
18049 }
18050 else return -1;
18051}
18052
39037602
A
18053void dtrace_ast(void);
18054
18055void
18056dtrace_ast(void)
18057{
18058 int i;
18059 uint32_t clients = atomic_and_32(&dtrace_wake_clients, 0);
18060 if (clients == 0)
18061 return;
18062 /**
18063 * We disable preemption here to be sure that we won't get
18064 * interrupted by a wakeup to a thread that is higher
18065 * priority than us, so that we do issue all wakeups
18066 */
18067 disable_preemption();
18068 for (i = 0; i < DTRACE_NCLIENTS; i++) {
18069 if (clients & (1 << i)) {
18070 dtrace_state_t *state = dtrace_state_get(i);
18071 if (state) {
18072 wakeup(state);
18073 }
18074
18075 }
18076 }
18077 enable_preemption();
18078}
18079
18080
2d21ac55
A
18081#define DTRACE_MAJOR -24 /* let the kernel pick the device number */
18082
18083static struct cdevsw dtrace_cdevsw =
18084{
18085 _dtrace_open, /* open */
18086 _dtrace_close, /* close */
18087 eno_rdwrt, /* read */
18088 eno_rdwrt, /* write */
18089 _dtrace_ioctl, /* ioctl */
18090 (stop_fcn_t *)nulldev, /* stop */
18091 (reset_fcn_t *)nulldev, /* reset */
18092 NULL, /* tty's */
18093 eno_select, /* select */
18094 eno_mmap, /* mmap */
18095 eno_strat, /* strategy */
18096 eno_getc, /* getc */
18097 eno_putc, /* putc */
18098 0 /* type */
18099};
18100
18101lck_attr_t* dtrace_lck_attr;
18102lck_grp_attr_t* dtrace_lck_grp_attr;
18103lck_grp_t* dtrace_lck_grp;
18104
18105static int gMajDevNo;
18106
18107void
18108dtrace_init( void )
18109{
18110 if (0 == gDTraceInited) {
39236c6e 18111 int i, ncpu;
fe8ab488 18112 size_t size = sizeof(dtrace_buffer_memory_maxsize);
2d21ac55 18113
39236c6e
A
18114 /*
18115 * DTrace allocates buffers based on the maximum number
18116 * of enabled cpus. This call avoids any race when finding
18117 * that count.
18118 */
18119 ASSERT(dtrace_max_cpus == 0);
18120 ncpu = dtrace_max_cpus = ml_get_max_cpus();
fe8ab488
A
18121
18122 /*
18123 * Retrieve the size of the physical memory in order to define
18124 * the state buffer memory maximal size. If we cannot retrieve
18125 * this value, we'll consider that we have 1Gb of memory per CPU, that's
18126 * still better than raising a kernel panic.
18127 */
18128 if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize,
18129 &size, NULL, 0))
18130 {
18131 dtrace_buffer_memory_maxsize = ncpu * 1024 * 1024 * 1024;
18132 printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n",
18133 dtrace_buffer_memory_maxsize);
18134 }
18135
18136 /*
18137 * Finally, divide by three to prevent DTrace from eating too
18138 * much memory.
18139 */
18140 dtrace_buffer_memory_maxsize /= 3;
18141 ASSERT(dtrace_buffer_memory_maxsize > 0);
18142
2d21ac55
A
18143 gMajDevNo = cdevsw_add(DTRACE_MAJOR, &dtrace_cdevsw);
18144
18145 if (gMajDevNo < 0) {
18146 printf("dtrace_init: failed to allocate a major number!\n");
18147 gDTraceInited = 0;
18148 return;
18149 }
18150
18151 if (NULL == devfs_make_node_clone( makedev(gMajDevNo, 0), DEVFS_CHAR, UID_ROOT, GID_WHEEL, 0666,
18152 dtrace_clone_func, DTRACEMNR_DTRACE, 0 )) {
18153 printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n");
18154 gDTraceInited = 0;
18155 return;
18156 }
18157
18158#if defined(DTRACE_MEMORY_ZONES)
2d21ac55
A
18159 /*
18160 * Initialize the dtrace kalloc-emulation zones.
18161 */
18162 dtrace_alloc_init();
2d21ac55
A
18163#endif /* DTRACE_MEMORY_ZONES */
18164
18165 /*
18166 * Allocate the dtrace_probe_t zone
18167 */
18168 dtrace_probe_t_zone = zinit(sizeof(dtrace_probe_t),
18169 1024 * sizeof(dtrace_probe_t),
18170 sizeof(dtrace_probe_t),
18171 "dtrace.dtrace_probe_t");
18172
18173 /*
18174 * Create the dtrace lock group and attrs.
18175 */
18176 dtrace_lck_attr = lck_attr_alloc_init();
18177 dtrace_lck_grp_attr= lck_grp_attr_alloc_init();
18178 dtrace_lck_grp = lck_grp_alloc_init("dtrace", dtrace_lck_grp_attr);
18179
18180 /*
18181 * We have to initialize all locks explicitly
18182 */
18183 lck_mtx_init(&dtrace_lock, dtrace_lck_grp, dtrace_lck_attr);
18184 lck_mtx_init(&dtrace_provider_lock, dtrace_lck_grp, dtrace_lck_attr);
18185 lck_mtx_init(&dtrace_meta_lock, dtrace_lck_grp, dtrace_lck_attr);
fe8ab488 18186 lck_mtx_init(&dtrace_procwaitfor_lock, dtrace_lck_grp, dtrace_lck_attr);
b0d623f7 18187#if DEBUG
2d21ac55
A
18188 lck_mtx_init(&dtrace_errlock, dtrace_lck_grp, dtrace_lck_attr);
18189#endif
18190 lck_rw_init(&dtrace_dof_mode_lock, dtrace_lck_grp, dtrace_lck_attr);
18191
18192 /*
18193 * The cpu_core structure consists of per-CPU state available in any context.
18194 * On some architectures, this may mean that the page(s) containing the
18195 * NCPU-sized array of cpu_core structures must be locked in the TLB -- it
18196 * is up to the platform to assure that this is performed properly. Note that
18197 * the structure is sized to avoid false sharing.
18198 */
18199 lck_mtx_init(&cpu_lock, dtrace_lck_grp, dtrace_lck_attr);
fe8ab488 18200 lck_mtx_init(&cyc_lock, dtrace_lck_grp, dtrace_lck_attr);
2d21ac55
A
18201 lck_mtx_init(&mod_lock, dtrace_lck_grp, dtrace_lck_attr);
18202
fe8ab488
A
18203 /*
18204 * Initialize the CPU offline/online hooks.
18205 */
18206 dtrace_install_cpu_hooks();
18207
6d2010ae
A
18208 dtrace_modctl_list = NULL;
18209
2d21ac55
A
18210 cpu_core = (cpu_core_t *)kmem_zalloc( ncpu * sizeof(cpu_core_t), KM_SLEEP );
18211 for (i = 0; i < ncpu; ++i) {
18212 lck_mtx_init(&cpu_core[i].cpuc_pid_lock, dtrace_lck_grp, dtrace_lck_attr);
18213 }
18214
6d2010ae 18215 cpu_list = (dtrace_cpu_t *)kmem_zalloc( ncpu * sizeof(dtrace_cpu_t), KM_SLEEP );
2d21ac55
A
18216 for (i = 0; i < ncpu; ++i) {
18217 cpu_list[i].cpu_id = (processorid_t)i;
18218 cpu_list[i].cpu_next = &(cpu_list[(i+1) % ncpu]);
fe8ab488 18219 LIST_INIT(&cpu_list[i].cpu_cyc_list);
2d21ac55
A
18220 lck_rw_init(&cpu_list[i].cpu_ft_lock, dtrace_lck_grp, dtrace_lck_attr);
18221 }
18222
18223 lck_mtx_lock(&cpu_lock);
18224 for (i = 0; i < ncpu; ++i)
39037602 18225 /* FIXME: track CPU configuration */
2d21ac55
A
18226 dtrace_cpu_setup_initial( (processorid_t)i ); /* In lieu of register_cpu_setup_func() callback */
18227 lck_mtx_unlock(&cpu_lock);
18228
18229 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */
18230
316670eb 18231 dtrace_isa_init();
2d21ac55
A
18232 /*
18233 * See dtrace_impl.h for a description of dof modes.
18234 * The default is lazy dof.
18235 *
b0d623f7 18236 * FIXME: Warn if state is LAZY_OFF? It won't break anything, but
2d21ac55
A
18237 * makes no sense...
18238 */
593a1d5f 18239 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) {
2d21ac55
A
18240 dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
18241 }
18242
18243 /*
18244 * Sanity check of dof mode value.
18245 */
18246 switch (dtrace_dof_mode) {
18247 case DTRACE_DOF_MODE_NEVER:
18248 case DTRACE_DOF_MODE_LAZY_ON:
18249 /* valid modes, but nothing else we need to do */
18250 break;
18251
18252 case DTRACE_DOF_MODE_LAZY_OFF:
18253 case DTRACE_DOF_MODE_NON_LAZY:
18254 /* Cannot wait for a dtrace_open to init fasttrap */
18255 fasttrap_init();
18256 break;
18257
18258 default:
18259 /* Invalid, clamp to non lazy */
18260 dtrace_dof_mode = DTRACE_DOF_MODE_NON_LAZY;
18261 fasttrap_init();
18262 break;
18263 }
18264
6d2010ae
A
18265 /*
18266 * See dtrace_impl.h for a description of kernel symbol modes.
18267 * The default is to wait for symbols from userspace (lazy symbols).
18268 */
18269 if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode, sizeof (dtrace_kernel_symbol_mode))) {
18270 dtrace_kernel_symbol_mode = DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE;
18271 }
3e170ce0
A
18272
18273 dtrace_restriction_policy_load();
18274
2d21ac55
A
18275 gDTraceInited = 1;
18276
18277 } else
18278 panic("dtrace_init: called twice!\n");
18279}
18280
18281void
18282dtrace_postinit(void)
18283{
6d2010ae
A
18284 /*
18285 * Called from bsd_init after all provider's *_init() routines have been
18286 * run. That way, anonymous DOF enabled under dtrace_attach() is safe
18287 * to go.
18288 */
18289 dtrace_attach( (dev_info_t *)(uintptr_t)makedev(gMajDevNo, 0), 0 ); /* Punning a dev_t to a dev_info_t* */
18290
18291 /*
18292 * Add the mach_kernel to the module list for lazy processing
18293 */
18294 struct kmod_info fake_kernel_kmod;
18295 memset(&fake_kernel_kmod, 0, sizeof(fake_kernel_kmod));
18296
18297 strlcpy(fake_kernel_kmod.name, "mach_kernel", sizeof(fake_kernel_kmod.name));
18298 fake_kernel_kmod.id = 1;
18299 fake_kernel_kmod.address = g_kernel_kmod_info.address;
18300 fake_kernel_kmod.size = g_kernel_kmod_info.size;
18301
316670eb 18302 if (dtrace_module_loaded(&fake_kernel_kmod, 0) != 0) {
6d2010ae
A
18303 printf("dtrace_postinit: Could not register mach_kernel modctl\n");
18304 }
18305
18306 (void)OSKextRegisterKextsWithDTrace();
2d21ac55
A
18307}
18308#undef DTRACE_MAJOR
18309
18310/*
18311 * Routines used to register interest in cpu's being added to or removed
18312 * from the system.
18313 */
18314void
18315register_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2)
18316{
18317#pragma unused(ignore1,ignore2)
18318}
18319
18320void
18321unregister_cpu_setup_func(cpu_setup_func_t *ignore1, void *ignore2)
18322{
18323#pragma unused(ignore1,ignore2)
18324}