4  * The contents of this file are subject to the terms of the 
   5  * Common Development and Distribution License (the "License"). 
   6  * You may not use this file except in compliance with the License. 
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 
   9  * or http://www.opensolaris.org/os/licensing. 
  10  * See the License for the specific language governing permissions 
  11  * and limitations under the License. 
  13  * When distributing Covered Code, include this CDDL HEADER in each 
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 
  15  * If applicable, add the following below this CDDL HEADER, with the 
  16  * fields enclosed by brackets "[]" replaced with your own identifying 
  17  * information: Portions Copyright [yyyy] [name of copyright owner] 
  23  * Portions Copyright (c) 2013, 2016, Joyent, Inc. All rights reserved. 
  24  * Portions Copyright (c) 2013 by Delphix. All rights reserved. 
  28  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved. 
  29  * Use is subject to license terms. 
  32 /* #pragma ident        "@(#)dtrace.c   1.65    08/07/02 SMI" */ 
  35  * DTrace - Dynamic Tracing for Solaris 
  37  * This is the implementation of the Solaris Dynamic Tracing framework 
  38  * (DTrace).  The user-visible interface to DTrace is described at length in 
  39  * the "Solaris Dynamic Tracing Guide".  The interfaces between the libdtrace 
  40  * library, the in-kernel DTrace framework, and the DTrace providers are 
  41  * described in the block comments in the <sys/dtrace.h> header file.  The 
  42  * internal architecture of DTrace is described in the block comments in the 
  43  * <sys/dtrace_impl.h> header file.  The comments contained within the DTrace 
  44  * implementation very much assume mastery of all of these sources; if one has 
  45  * an unanswered question about the implementation, one should consult them 
  48  * The functions here are ordered roughly as follows: 
  50  *   - Probe context functions 
  51  *   - Probe hashing functions 
  52  *   - Non-probe context utility functions 
  53  *   - Matching functions 
  54  *   - Provider-to-Framework API functions 
  55  *   - Probe management functions 
  56  *   - DIF object functions 
  58  *   - Predicate functions 
  61  *   - Enabling functions 
  63  *   - Anonymous enabling functions 
  65  *   - Consumer state functions 
  68  *   - Driver cookbook functions 
  70  * Each group of functions begins with a block comment labelled the "DTrace 
  71  * [Group] Functions", allowing one to find each block by searching forward 
  72  * on capital-f functions. 
  74 #include <sys/errno.h> 
  75 #include <sys/types.h> 
  78 #include <sys/systm.h> 
  79 #include <sys/dtrace_impl.h> 
  80 #include <sys/param.h> 
  81 #include <sys/proc_internal.h> 
  82 #include <sys/ioctl.h> 
  83 #include <sys/fcntl.h> 
  84 #include <miscfs/devfs/devfs.h> 
  85 #include <sys/malloc.h> 
  86 #include <sys/kernel_types.h> 
  87 #include <sys/proc_internal.h> 
  88 #include <sys/uio_internal.h> 
  89 #include <sys/kauth.h> 
  92 #include <mach/exception_types.h> 
  93 #include <sys/signalvar.h> 
  94 #include <mach/task.h> 
  95 #include <kern/zalloc.h> 
  97 #include <kern/sched_prim.h> 
  98 #include <kern/task.h> 
  99 #include <netinet/in.h> 
 100 #include <libkern/sysctl.h> 
 101 #include <sys/kdebug.h> 
 104 #include <kern/monotonic.h> 
 105 #include <machine/monotonic.h> 
 106 #endif /* MONOTONIC */ 
 108 #include <kern/cpu_data.h> 
 109 extern uint32_t pmap_find_phys(void *, uint64_t); 
 110 extern boolean_t 
pmap_valid_page(uint32_t); 
 111 extern void OSKextRegisterKextsWithDTrace(void); 
 112 extern kmod_info_t g_kernel_kmod_info
; 
 114 /* Solaris proc_t is the struct. Darwin's proc_t is a pointer to it. */ 
 115 #define proc_t struct proc /* Steer clear of the Darwin typedef for proc_t */ 
 117 #define t_predcache t_dtrace_predcache /* Cosmetic. Helps readability of thread.h */ 
 119 extern void dtrace_suspend(void); 
 120 extern void dtrace_resume(void); 
 121 extern void dtrace_init(void); 
 122 extern void helper_init(void); 
 123 extern void fasttrap_init(void); 
 125 static int  dtrace_lazy_dofs_duplicate(proc_t 
*, proc_t 
*); 
 126 extern void dtrace_lazy_dofs_destroy(proc_t 
*); 
 127 extern void dtrace_postinit(void); 
 129 extern void dtrace_proc_fork(proc_t
*, proc_t
*, int); 
 130 extern void dtrace_proc_exec(proc_t
*); 
 131 extern void dtrace_proc_exit(proc_t
*); 
 133  * DTrace Tunable Variables 
 135  * The following variables may be dynamically tuned by using sysctl(8), the 
 136  * variables being stored in the kern.dtrace namespace.  For example: 
 137  *      sysctl kern.dtrace.dof_maxsize = 1048575        # 1M 
 139  * In general, the only variables that one should be tuning this way are those 
 140  * that affect system-wide DTrace behavior, and for which the default behavior 
 141  * is undesirable.  Most of these variables are tunable on a per-consumer 
 142  * basis using DTrace options, and need not be tuned on a system-wide basis. 
 143  * When tuning these variables, avoid pathological values; while some attempt 
 144  * is made to verify the integrity of these variables, they are not considered 
 145  * part of the supported interface to DTrace, and they are therefore not 
 146  * checked comprehensively. 
 148 uint64_t        dtrace_buffer_memory_maxsize 
= 0;               /* initialized in dtrace_init */ 
 149 uint64_t        dtrace_buffer_memory_inuse 
= 0; 
 150 int             dtrace_destructive_disallow 
= 0; 
 151 dtrace_optval_t dtrace_nonroot_maxsize 
= (16 * 1024 * 1024); 
 152 size_t          dtrace_difo_maxsize 
= (256 * 1024); 
 153 dtrace_optval_t dtrace_dof_maxsize 
= (512 * 1024); 
 154 dtrace_optval_t dtrace_statvar_maxsize 
= (16 * 1024); 
 155 dtrace_optval_t dtrace_statvar_maxsize_max 
= (16 * 10 * 1024); 
 156 size_t          dtrace_actions_max 
= (16 * 1024); 
 157 size_t          dtrace_retain_max 
= 1024; 
 158 dtrace_optval_t dtrace_helper_actions_max 
= 32; 
 159 dtrace_optval_t dtrace_helper_providers_max 
= 64; 
 160 dtrace_optval_t dtrace_dstate_defsize 
= (1 * 1024 * 1024); 
 161 size_t          dtrace_strsize_default 
= 256; 
 162 dtrace_optval_t dtrace_strsize_min 
= 8; 
 163 dtrace_optval_t dtrace_strsize_max 
= 65536; 
 164 dtrace_optval_t dtrace_cleanrate_default 
= 990099000;           /* 1.1 hz */ 
 165 dtrace_optval_t dtrace_cleanrate_min 
= 20000000;                        /* 50 hz */ 
 166 dtrace_optval_t dtrace_cleanrate_max 
= (uint64_t)60 * NANOSEC
;  /* 1/minute */ 
 167 dtrace_optval_t dtrace_aggrate_default 
= NANOSEC
;               /* 1 hz */ 
 168 dtrace_optval_t dtrace_statusrate_default 
= NANOSEC
;            /* 1 hz */ 
 169 dtrace_optval_t dtrace_statusrate_max 
= (hrtime_t
)10 * NANOSEC
;  /* 6/minute */ 
 170 dtrace_optval_t dtrace_switchrate_default 
= NANOSEC
;            /* 1 hz */ 
 171 dtrace_optval_t dtrace_nspec_default 
= 1; 
 172 dtrace_optval_t dtrace_specsize_default 
= 32 * 1024; 
 173 dtrace_optval_t dtrace_stackframes_default 
= 20; 
 174 dtrace_optval_t dtrace_ustackframes_default 
= 20; 
 175 dtrace_optval_t dtrace_jstackframes_default 
= 50; 
 176 dtrace_optval_t dtrace_jstackstrsize_default 
= 512; 
 177 dtrace_optval_t dtrace_buflimit_default 
= 75; 
 178 dtrace_optval_t dtrace_buflimit_min 
= 1; 
 179 dtrace_optval_t dtrace_buflimit_max 
= 99; 
 180 int             dtrace_msgdsize_max 
= 128; 
 181 hrtime_t        dtrace_chill_max 
= 500 * (NANOSEC 
/ MILLISEC
);  /* 500 ms */ 
 182 hrtime_t        dtrace_chill_interval 
= NANOSEC
;                /* 1000 ms */ 
 183 int             dtrace_devdepth_max 
= 32; 
 184 int             dtrace_err_verbose
; 
 185 int             dtrace_provide_private_probes 
= 0; 
 186 hrtime_t        dtrace_deadman_interval 
= NANOSEC
; 
 187 hrtime_t        dtrace_deadman_timeout 
= (hrtime_t
)10 * NANOSEC
; 
 188 hrtime_t        dtrace_deadman_user 
= (hrtime_t
)30 * NANOSEC
; 
 191  * DTrace External Variables 
 193  * As dtrace(7D) is a kernel module, any DTrace variables are obviously 
 194  * available to DTrace consumers via the backtick (`) syntax.  One of these, 
 195  * dtrace_zero, is made deliberately so:  it is provided as a source of 
 196  * well-known, zero-filled memory.  While this variable is not documented, 
 197  * it is used by some translators as an implementation detail. 
 199 const char      dtrace_zero
[256] = { 0 };       /* zero-filled memory */ 
 200 unsigned int    dtrace_max_cpus 
= 0;            /* number of enabled cpus */ 
 202  * DTrace Internal Variables 
 204 static dev_info_t       
*dtrace_devi
;           /* device info */ 
 205 static vmem_t           
*dtrace_arena
;          /* probe ID arena */ 
 206 static taskq_t          
*dtrace_taskq
;          /* task queue */ 
 207 static dtrace_probe_t   
**dtrace_probes
;        /* array of all probes */ 
 208 static int              dtrace_nprobes
;         /* number of probes */ 
 209 static dtrace_provider_t 
*dtrace_provider
;      /* provider list */ 
 210 static dtrace_meta_t    
*dtrace_meta_pid
;       /* user-land meta provider */ 
 211 static int              dtrace_opens
;           /* number of opens */ 
 212 static int              dtrace_helpers
;         /* number of helpers */ 
 213 static dtrace_hash_t    
*dtrace_bymod
;          /* probes hashed by module */ 
 214 static dtrace_hash_t    
*dtrace_byfunc
;         /* probes hashed by function */ 
 215 static dtrace_hash_t    
*dtrace_byname
;         /* probes hashed by name */ 
 216 static dtrace_toxrange_t 
*dtrace_toxrange
;      /* toxic range array */ 
 217 static int              dtrace_toxranges
;       /* number of toxic ranges */ 
 218 static int              dtrace_toxranges_max
;   /* size of toxic range array */ 
 219 static dtrace_anon_t    dtrace_anon
;            /* anonymous enabling */ 
 220 static kmem_cache_t     
*dtrace_state_cache
;    /* cache for dynamic state */ 
 221 static uint64_t         dtrace_vtime_references
; /* number of vtimestamp refs */ 
 222 static kthread_t        
*dtrace_panicked
;       /* panicking thread */ 
 223 static dtrace_ecb_t     
*dtrace_ecb_create_cache
; /* cached created ECB */ 
 224 static dtrace_genid_t   dtrace_probegen
;        /* current probe generation */ 
 225 static dtrace_helpers_t 
*dtrace_deferred_pid
;   /* deferred helper list */ 
 226 static dtrace_enabling_t 
*dtrace_retained
;      /* list of retained enablings */ 
 227 static dtrace_genid_t   dtrace_retained_gen
;    /* current retained enab gen */ 
 228 static dtrace_dynvar_t  dtrace_dynhash_sink
;    /* end of dynamic hash chains */ 
 230 static int              dtrace_dof_mode
;        /* See dtrace_impl.h for a description of Darwin's dof modes. */ 
 233                          * This does't quite fit as an internal variable, as it must be accessed in 
 234                          * fbt_provide and sdt_provide. Its clearly not a dtrace tunable variable either... 
 236 int                     dtrace_kernel_symbol_mode
;      /* See dtrace_impl.h for a description of Darwin's kernel symbol modes. */ 
 237 static uint32_t         dtrace_wake_clients
; 
 241  * To save memory, some common memory allocations are given a 
 242  * unique zone. For example, dtrace_probe_t is 72 bytes in size, 
 243  * which means it would fall into the kalloc.128 bucket. With 
 244  * 20k elements allocated, the space saved is substantial. 
 247 struct zone 
*dtrace_probe_t_zone
; 
 249 static int dtrace_module_unloaded(struct kmod_info 
*kmod
); 
 253  * DTrace is protected by three (relatively coarse-grained) locks: 
 255  * (1) dtrace_lock is required to manipulate essentially any DTrace state, 
 256  *     including enabling state, probes, ECBs, consumer state, helper state, 
 257  *     etc.  Importantly, dtrace_lock is _not_ required when in probe context; 
 258  *     probe context is lock-free -- synchronization is handled via the 
 259  *     dtrace_sync() cross call mechanism. 
 261  * (2) dtrace_provider_lock is required when manipulating provider state, or 
 262  *     when provider state must be held constant. 
 264  * (3) dtrace_meta_lock is required when manipulating meta provider state, or 
 265  *     when meta provider state must be held constant. 
 267  * The lock ordering between these three locks is dtrace_meta_lock before 
 268  * dtrace_provider_lock before dtrace_lock.  (In particular, there are 
 269  * several places where dtrace_provider_lock is held by the framework as it 
 270  * calls into the providers -- which then call back into the framework, 
 271  * grabbing dtrace_lock.) 
 273  * There are two other locks in the mix:  mod_lock and cpu_lock.  With respect 
 274  * to dtrace_provider_lock and dtrace_lock, cpu_lock continues its historical 
 275  * role as a coarse-grained lock; it is acquired before both of these locks. 
 276  * With respect to dtrace_meta_lock, its behavior is stranger:  cpu_lock must 
 277  * be acquired _between_ dtrace_meta_lock and any other DTrace locks. 
 278  * mod_lock is similar with respect to dtrace_provider_lock in that it must be 
 279  * acquired _between_ dtrace_provider_lock and dtrace_lock. 
 286  * For porting purposes, all kmutex_t vars have been changed 
 287  * to lck_mtx_t, which require explicit initialization. 
 289  * kmutex_t becomes lck_mtx_t 
 290  * mutex_enter() becomes lck_mtx_lock() 
 291  * mutex_exit() becomes lck_mtx_unlock() 
 293  * Lock asserts are changed like this: 
 295  * ASSERT(MUTEX_HELD(&cpu_lock)); 
 297  * LCK_MTX_ASSERT(&cpu_lock, LCK_MTX_ASSERT_OWNED); 
 300 static lck_mtx_t        dtrace_lock
;            /* probe state lock */ 
 301 static lck_mtx_t        dtrace_provider_lock
;   /* provider state lock */ 
 302 static lck_mtx_t        dtrace_meta_lock
;       /* meta-provider state lock */ 
 303 static lck_rw_t         dtrace_dof_mode_lock
;   /* dof mode lock */ 
 306  * DTrace Provider Variables 
 308  * These are the variables relating to DTrace as a provider (that is, the 
 309  * provider of the BEGIN, END, and ERROR probes). 
 311 static dtrace_pattr_t   dtrace_provider_attr 
= { 
 312 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON 
}, 
 313 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN 
}, 
 314 { DTRACE_STABILITY_PRIVATE
, DTRACE_STABILITY_PRIVATE
, DTRACE_CLASS_UNKNOWN 
}, 
 315 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON 
}, 
 316 { DTRACE_STABILITY_STABLE
, DTRACE_STABILITY_STABLE
, DTRACE_CLASS_COMMON 
}, 
 324 dtrace_enable_nullop(void) 
 329 static dtrace_pops_t    dtrace_provider_ops 
= { 
 330         (void (*)(void *, const dtrace_probedesc_t 
*))dtrace_nullop
, 
 331         (void (*)(void *, struct modctl 
*))dtrace_nullop
, 
 332         (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
, 
 333         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 334         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 335         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
, 
 339         (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
 
 342 static dtrace_id_t      dtrace_probeid_begin
;   /* special BEGIN probe */ 
 343 static dtrace_id_t      dtrace_probeid_end
;     /* special END probe */ 
 344 dtrace_id_t             dtrace_probeid_error
;   /* special ERROR probe */ 
 347  * DTrace Helper Tracing Variables 
 349 uint32_t dtrace_helptrace_next 
= 0; 
 350 uint32_t dtrace_helptrace_nlocals
; 
 351 char    *dtrace_helptrace_buffer
; 
 352 size_t  dtrace_helptrace_bufsize 
= 512 * 1024; 
 355 int     dtrace_helptrace_enabled 
= 1; 
 357 int     dtrace_helptrace_enabled 
= 0; 
 360 #if defined (__arm64__) 
 362  * The ioctl for adding helper DOF is based on the 
 363  * size of a user_addr_t.  We need to recognize both 
 364  * U32 and U64 as the same action. 
 366 #define DTRACEHIOC_ADDDOF_U32       _IOW('h', 4, user32_addr_t)                                                              
 367 #define DTRACEHIOC_ADDDOF_U64       _IOW('h', 4, user64_addr_t) 
 368 #endif  /* __arm64__ */ 
 371  * DTrace Error Hashing 
 373  * On DEBUG kernels, DTrace will track the errors that has seen in a hash 
 374  * table.  This is very useful for checking coverage of tests that are 
 375  * expected to induce DIF or DOF processing errors, and may be useful for 
 376  * debugging problems in the DIF code generator or in DOF generation .  The 
 377  * error hash may be examined with the ::dtrace_errhash MDB dcmd. 
 380 static dtrace_errhash_t dtrace_errhash
[DTRACE_ERRHASHSZ
]; 
 381 static const char *dtrace_errlast
; 
 382 static kthread_t 
*dtrace_errthread
; 
 383 static lck_mtx_t dtrace_errlock
; 
 387  * DTrace Macros and Constants 
 389  * These are various macros that are useful in various spots in the 
 390  * implementation, along with a few random constants that have no meaning 
 391  * outside of the implementation.  There is no real structure to this cpp 
 392  * mishmash -- but is there ever? 
 394 #define DTRACE_HASHSTR(hash, probe)     \ 
 395         dtrace_hash_str(*((char **)((uintptr_t)(probe) + (hash)->dth_stroffs))) 
 397 #define DTRACE_HASHNEXT(hash, probe)    \ 
 398         (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_nextoffs) 
 400 #define DTRACE_HASHPREV(hash, probe)    \ 
 401         (dtrace_probe_t **)((uintptr_t)(probe) + (hash)->dth_prevoffs) 
 403 #define DTRACE_HASHEQ(hash, lhs, rhs)   \ 
 404         (strcmp(*((char **)((uintptr_t)(lhs) + (hash)->dth_stroffs)), \ 
 405             *((char **)((uintptr_t)(rhs) + (hash)->dth_stroffs))) == 0) 
 407 #define DTRACE_AGGHASHSIZE_SLEW         17 
 409 #define DTRACE_V4MAPPED_OFFSET          (sizeof (uint32_t) * 3) 
 412  * The key for a thread-local variable consists of the lower 61 bits of the 
 413  * current_thread(), plus the 3 bits of the highest active interrupt above LOCK_LEVEL. 
 414  * We add DIF_VARIABLE_MAX to t_did to assure that the thread key is never 
 415  * equal to a variable identifier.  This is necessary (but not sufficient) to 
 416  * assure that global associative arrays never collide with thread-local 
 417  * variables.  To guarantee that they cannot collide, we must also define the 
 418  * order for keying dynamic variables.  That order is: 
 420  *   [ key0 ] ... [ keyn ] [ variable-key ] [ tls-key ] 
 422  * Because the variable-key and the tls-key are in orthogonal spaces, there is 
 423  * no way for a global variable key signature to match a thread-local key 
 426 #if defined (__x86_64__) 
 427 /* FIXME: two function calls!! */ 
 428 #define DTRACE_TLS_THRKEY(where) { \ 
 429         uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ 
 430         uint64_t thr = (uintptr_t)current_thread(); \ 
 431         ASSERT(intr < (1 << 3)); \ 
 432         (where) = ((thr + DIF_VARIABLE_MAX) & \ 
 433             (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 
 435 #elif defined(__arm__) 
 436 /* FIXME: three function calls!!! */ 
 437 #define DTRACE_TLS_THRKEY(where) { \ 
 438         uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ 
 439         uint64_t thr = (uintptr_t)current_thread(); \ 
 440         uint_t pid = (uint_t)dtrace_proc_selfpid(); \ 
 441         ASSERT(intr < (1 << 3)); \ 
 442         (where) = (((thr << 32 | pid) + DIF_VARIABLE_MAX) & \ 
 443             (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 
 445 #elif defined (__arm64__) 
 446 /* FIXME: two function calls!! */ 
 447 #define DTRACE_TLS_THRKEY(where) { \ 
 448         uint_t intr = ml_at_interrupt_context(); /* Note: just one measly bit */ \ 
 449         uint64_t thr = (uintptr_t)current_thread(); \ 
 450         ASSERT(intr < (1 << 3)); \ 
 451         (where) = ((thr + DIF_VARIABLE_MAX) & \ 
 452             (((uint64_t)1 << 61) - 1)) | ((uint64_t)intr << 61); \ 
 455 #error Unknown architecture 
 458 #define DT_BSWAP_8(x)   ((x) & 0xff) 
 459 #define DT_BSWAP_16(x)  ((DT_BSWAP_8(x) << 8) | DT_BSWAP_8((x) >> 8)) 
 460 #define DT_BSWAP_32(x)  ((DT_BSWAP_16(x) << 16) | DT_BSWAP_16((x) >> 16)) 
 461 #define DT_BSWAP_64(x)  ((DT_BSWAP_32(x) << 32) | DT_BSWAP_32((x) >> 32)) 
 463 #define DT_MASK_LO 0x00000000FFFFFFFFULL 
 465 #define DTRACE_STORE(type, tomax, offset, what) \ 
 466         *((type *)((uintptr_t)(tomax) + (uintptr_t)offset)) = (type)(what); 
 469 #define DTRACE_ALIGNCHECK(addr, size, flags)                            \ 
 470         if (addr & (MIN(size,4) - 1)) {                                 \ 
 471                 *flags |= CPU_DTRACE_BADALIGN;                          \ 
 472                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 476 #define DTRACE_RANGE_REMAIN(remp, addr, baseaddr, basesz)               \ 
 478         if ((remp) != NULL) {                                           \ 
 479                 *(remp) = (uintptr_t)(baseaddr) + (basesz) - (addr);    \ 
 485  * Test whether a range of memory starting at testaddr of size testsz falls 
 486  * within the range of memory described by addr, sz.  We take care to avoid 
 487  * problems with overflow and underflow of the unsigned quantities, and 
 488  * disallow all negative sizes.  Ranges of size 0 are allowed. 
 490 #define DTRACE_INRANGE(testaddr, testsz, baseaddr, basesz) \ 
 491         ((testaddr) - (baseaddr) < (basesz) && \ 
 492         (testaddr) + (testsz) - (baseaddr) <= (basesz) && \ 
 493         (testaddr) + (testsz) >= (testaddr)) 
 496  * Test whether alloc_sz bytes will fit in the scratch region.  We isolate 
 497  * alloc_sz on the righthand side of the comparison in order to avoid overflow 
 498  * or underflow in the comparison with it.  This is simpler than the INRANGE 
 499  * check above, because we know that the dtms_scratch_ptr is valid in the 
 500  * range.  Allocations of size zero are allowed. 
 502 #define DTRACE_INSCRATCH(mstate, alloc_sz) \ 
 503         ((mstate)->dtms_scratch_base + (mstate)->dtms_scratch_size - \ 
 504         (mstate)->dtms_scratch_ptr >= (alloc_sz)) 
 506 #define RECOVER_LABEL(bits) dtraceLoadRecover##bits: 
 508 #if defined (__x86_64__) || (defined (__arm__) || defined (__arm64__)) 
 509 #define DTRACE_LOADFUNC(bits)                                           \ 
 511 uint##bits##_t dtrace_load##bits(uintptr_t addr);                       \ 
 514 dtrace_load##bits(uintptr_t addr)                                       \ 
 516         size_t size = bits / NBBY;                                      \ 
 518         uint##bits##_t rval = 0;                                        \ 
 520         volatile uint16_t *flags = (volatile uint16_t *)                \ 
 521             &cpu_core[CPU->cpu_id].cpuc_dtrace_flags;                   \ 
 523         DTRACE_ALIGNCHECK(addr, size, flags);                           \ 
 525         for (i = 0; i < dtrace_toxranges; i++) {                        \ 
 526                 if (addr >= dtrace_toxrange[i].dtt_limit)               \ 
 529                 if (addr + size <= dtrace_toxrange[i].dtt_base)         \ 
 533                  * This address falls within a toxic region; return 0.  \ 
 535                 *flags |= CPU_DTRACE_BADADDR;                           \ 
 536                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 541         volatile vm_offset_t recover = (vm_offset_t)&&dtraceLoadRecover##bits;          \ 
 542         *flags |= CPU_DTRACE_NOFAULT;                                   \ 
 543         recover = dtrace_set_thread_recover(current_thread(), recover); \ 
 546         * PR6394061 - avoid device memory that is unpredictably         \ 
 547         * mapped and unmapped                                           \ 
 549         if (pmap_valid_page(pmap_find_phys(kernel_pmap, addr)))         \ 
 550             rval = *((volatile uint##bits##_t *)addr);                  \ 
 552                 *flags |= CPU_DTRACE_BADADDR;                           \ 
 553                 cpu_core[CPU->cpu_id].cpuc_dtrace_illval = addr;        \ 
 557         RECOVER_LABEL(bits);                                            \ 
 558         (void)dtrace_set_thread_recover(current_thread(), recover);     \ 
 559         *flags &= ~CPU_DTRACE_NOFAULT;                                  \ 
 564 #else /* all other architectures */ 
 565 #error Unknown Architecture 
 569 #define dtrace_loadptr  dtrace_load64 
 571 #define dtrace_loadptr  dtrace_load32 
 574 #define DTRACE_DYNHASH_FREE     0 
 575 #define DTRACE_DYNHASH_SINK     1 
 576 #define DTRACE_DYNHASH_VALID    2 
 578 #define DTRACE_MATCH_FAIL       -1 
 579 #define DTRACE_MATCH_NEXT       0 
 580 #define DTRACE_MATCH_DONE       1 
 581 #define DTRACE_ANCHORED(probe)  ((probe)->dtpr_func[0] != '\0') 
 582 #define DTRACE_STATE_ALIGN      64 
 584 #define DTRACE_FLAGS2FLT(flags)                                         \ 
 585         (((flags) & CPU_DTRACE_BADADDR) ? DTRACEFLT_BADADDR :           \ 
 586         ((flags) & CPU_DTRACE_ILLOP) ? DTRACEFLT_ILLOP :                \ 
 587         ((flags) & CPU_DTRACE_DIVZERO) ? DTRACEFLT_DIVZERO :            \ 
 588         ((flags) & CPU_DTRACE_KPRIV) ? DTRACEFLT_KPRIV :                \ 
 589         ((flags) & CPU_DTRACE_UPRIV) ? DTRACEFLT_UPRIV :                \ 
 590         ((flags) & CPU_DTRACE_TUPOFLOW) ?  DTRACEFLT_TUPOFLOW :         \ 
 591         ((flags) & CPU_DTRACE_BADALIGN) ?  DTRACEFLT_BADALIGN :         \ 
 592         ((flags) & CPU_DTRACE_NOSCRATCH) ?  DTRACEFLT_NOSCRATCH :       \ 
 593         ((flags) & CPU_DTRACE_BADSTACK) ?  DTRACEFLT_BADSTACK :         \ 
 596 #define DTRACEACT_ISSTRING(act)                                         \ 
 597         ((act)->dta_kind == DTRACEACT_DIFEXPR &&                        \ 
 598         (act)->dta_difo->dtdo_rtype.dtdt_kind == DIF_TYPE_STRING) 
 601 static size_t dtrace_strlen(const char *, size_t); 
 602 static dtrace_probe_t 
*dtrace_probe_lookup_id(dtrace_id_t id
); 
 603 static void dtrace_enabling_provide(dtrace_provider_t 
*); 
 604 static int dtrace_enabling_match(dtrace_enabling_t 
*, int *, dtrace_match_cond_t 
*cond
); 
 605 static void dtrace_enabling_matchall_with_cond(dtrace_match_cond_t 
*cond
); 
 606 static void dtrace_enabling_matchall(void); 
 607 static dtrace_state_t 
*dtrace_anon_grab(void); 
 608 static uint64_t dtrace_helper(int, dtrace_mstate_t 
*, 
 609     dtrace_state_t 
*, uint64_t, uint64_t); 
 610 static dtrace_helpers_t 
*dtrace_helpers_create(proc_t 
*); 
 611 static void dtrace_buffer_drop(dtrace_buffer_t 
*); 
 612 static intptr_t dtrace_buffer_reserve(dtrace_buffer_t 
*, size_t, size_t, 
 613     dtrace_state_t 
*, dtrace_mstate_t 
*); 
 614 static int dtrace_state_option(dtrace_state_t 
*, dtrace_optid_t
, 
 616 static int dtrace_ecb_create_enable(dtrace_probe_t 
*, void *, void *); 
 617 static void dtrace_helper_provider_destroy(dtrace_helper_provider_t 
*); 
 618 static int dtrace_canload_remains(uint64_t, size_t, size_t *, 
 619         dtrace_mstate_t 
*, dtrace_vstate_t 
*); 
 620 static int dtrace_canstore_remains(uint64_t, size_t, size_t *, 
 621         dtrace_mstate_t 
*, dtrace_vstate_t 
*); 
 625  * DTrace sysctl handlers 
 627  * These declarations and functions are used for a deeper DTrace configuration. 
 628  * Most of them are not per-consumer basis and may impact the other DTrace 
 629  * consumers.  Correctness may not be supported for all the variables, so you 
 630  * should be careful about what values you are using. 
 633 SYSCTL_DECL(_kern_dtrace
); 
 634 SYSCTL_NODE(_kern
, OID_AUTO
, dtrace
, CTLFLAG_RW 
| CTLFLAG_LOCKED
, 0, "dtrace");  
 637 sysctl_dtrace_err_verbose SYSCTL_HANDLER_ARGS
 
 639 #pragma unused(oidp, arg2) 
 641         int value 
= *(int *) arg1
; 
 643         error 
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
); 
 644         if (error 
|| !changed
) 
 647         if (value 
!= 0 && value 
!= 1) 
 650         lck_mtx_lock(&dtrace_lock
); 
 651                 dtrace_err_verbose 
= value
; 
 652         lck_mtx_unlock(&dtrace_lock
); 
 658  * kern.dtrace.err_verbose 
 660  * Set DTrace verbosity when an error occured (0 = disabled, 1 = enabld). 
 661  * Errors are reported when a DIFO or a DOF has been rejected by the kernel. 
 663 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, err_verbose
, 
 664         CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 665         &dtrace_err_verbose
, 0, 
 666         sysctl_dtrace_err_verbose
, "I", "dtrace error verbose"); 
 669 sysctl_dtrace_buffer_memory_maxsize SYSCTL_HANDLER_ARGS
 
 671 #pragma unused(oidp, arg2, req) 
 673         uint64_t value 
= *(uint64_t *) arg1
; 
 675         error 
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
); 
 676         if (error 
|| !changed
) 
 679         if (value 
<= dtrace_buffer_memory_inuse
) 
 682         lck_mtx_lock(&dtrace_lock
); 
 683                 dtrace_buffer_memory_maxsize 
= value
;    
 684         lck_mtx_unlock(&dtrace_lock
); 
 690  * kern.dtrace.buffer_memory_maxsize 
 692  * Set DTrace maximal size in bytes used by all the consumers' state buffers.  By default 
 693  * the limit is PHYS_MEM / 3 for *all* consumers.  Attempting to set a null, a negative value 
 694  * or a value <= to dtrace_buffer_memory_inuse will result in a failure. 
 696 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, buffer_memory_maxsize
, 
 697         CTLTYPE_QUAD 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 698         &dtrace_buffer_memory_maxsize
, 0, 
 699         sysctl_dtrace_buffer_memory_maxsize
, "Q", "dtrace state buffer memory maxsize"); 
 702  * kern.dtrace.buffer_memory_inuse 
 704  * Current state buffer memory used, in bytes, by all the DTrace consumers. 
 705  * This value is read-only. 
 707 SYSCTL_QUAD(_kern_dtrace
, OID_AUTO
, buffer_memory_inuse
, CTLFLAG_RD 
| CTLFLAG_LOCKED
, 
 708         &dtrace_buffer_memory_inuse
, "dtrace state buffer memory in-use"); 
 711 sysctl_dtrace_difo_maxsize SYSCTL_HANDLER_ARGS
 
 713 #pragma unused(oidp, arg2, req) 
 715         size_t value 
= *(size_t*) arg1
; 
 717         error 
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
); 
 718         if (error 
|| !changed
) 
 724         lck_mtx_lock(&dtrace_lock
); 
 725                 dtrace_difo_maxsize 
= value
; 
 726         lck_mtx_unlock(&dtrace_lock
); 
 732  * kern.dtrace.difo_maxsize 
 734  * Set the DIFO max size in bytes, check the definition of dtrace_difo_maxsize 
 735  * to get the default value.  Attempting to set a null or negative size will 
 736  * result in a failure. 
 738 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, difo_maxsize
, 
 739         CTLTYPE_QUAD 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 740         &dtrace_difo_maxsize
, 0, 
 741         sysctl_dtrace_difo_maxsize
, "Q", "dtrace difo maxsize"); 
 744 sysctl_dtrace_dof_maxsize SYSCTL_HANDLER_ARGS
 
 746 #pragma unused(oidp, arg2, req) 
 748         dtrace_optval_t value 
= *(dtrace_optval_t 
*) arg1
; 
 750         error 
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
); 
 751         if (error 
|| !changed
) 
 757         lck_mtx_lock(&dtrace_lock
); 
 758                 dtrace_dof_maxsize 
= value
; 
 759         lck_mtx_unlock(&dtrace_lock
); 
 765  * kern.dtrace.dof_maxsize 
 767  * Set the DOF max size in bytes, check the definition of dtrace_dof_maxsize to 
 768  * get the default value.  Attempting to set a null or negative size will result 
 771 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, dof_maxsize
, 
 772         CTLTYPE_QUAD 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 773         &dtrace_dof_maxsize
, 0, 
 774         sysctl_dtrace_dof_maxsize
, "Q", "dtrace dof maxsize"); 
 777 sysctl_dtrace_statvar_maxsize SYSCTL_HANDLER_ARGS
 
 779 #pragma unused(oidp, arg2, req) 
 781         dtrace_optval_t value 
= *(dtrace_optval_t
*) arg1
; 
 783         error 
= sysctl_io_number(req
, value
, sizeof(value
), &value
, &changed
); 
 784         if (error 
|| !changed
) 
 789         if (value 
> dtrace_statvar_maxsize_max
) 
 792         lck_mtx_lock(&dtrace_lock
); 
 793                 dtrace_statvar_maxsize 
= value
; 
 794         lck_mtx_unlock(&dtrace_lock
); 
 800  * kern.dtrace.global_maxsize 
 802  * Set the variable max size in bytes, check the definition of 
 803  * dtrace_statvar_maxsize to get the default value.  Attempting to set a null, 
 804  * too high or negative size will result in a failure. 
 806 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, global_maxsize
, 
 807         CTLTYPE_QUAD 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 808         &dtrace_statvar_maxsize
, 0, 
 809         sysctl_dtrace_statvar_maxsize
, "Q", "dtrace statvar maxsize"); 
 812 sysctl_dtrace_provide_private_probes SYSCTL_HANDLER_ARGS
 
 814 #pragma unused(oidp, arg2) 
 816         int value 
= *(int *) arg1
; 
 818         error 
= sysctl_io_number(req
, value
, sizeof(value
), &value
, NULL
); 
 823                 if (value 
!= 0 && value 
!= 1) 
 827                  * We do not allow changing this back to zero, as private probes 
 828                  * would still be left registered 
 833                 lck_mtx_lock(&dtrace_lock
); 
 834                 dtrace_provide_private_probes 
= value
; 
 835                 lck_mtx_unlock(&dtrace_lock
); 
 841  * kern.dtrace.provide_private_probes 
 843  * Set whether the providers must provide the private probes.  This is 
 844  * mainly used by the FBT provider to request probes for the private/static 
 847 SYSCTL_PROC(_kern_dtrace
, OID_AUTO
, provide_private_probes
, 
 848         CTLTYPE_INT 
| CTLFLAG_RW 
| CTLFLAG_LOCKED
, 
 849         &dtrace_provide_private_probes
, 0, 
 850         sysctl_dtrace_provide_private_probes
, "I", "provider must provide the private probes"); 
 853  * DTrace Probe Context Functions 
 855  * These functions are called from probe context.  Because probe context is 
 856  * any context in which C may be called, arbitrarily locks may be held, 
 857  * interrupts may be disabled, we may be in arbitrary dispatched state, etc. 
 858  * As a result, functions called from probe context may only call other DTrace 
 859  * support functions -- they may not interact at all with the system at large. 
 860  * (Note that the ASSERT macro is made probe-context safe by redefining it in 
 861  * terms of dtrace_assfail(), a probe-context safe function.) If arbitrary 
 862  * loads are to be performed from probe context, they _must_ be in terms of 
 863  * the safe dtrace_load*() variants. 
 865  * Some functions in this block are not actually called from probe context; 
 866  * for these functions, there will be a comment above the function reading 
 867  * "Note:  not called from probe context." 
 871 dtrace_assfail(const char *a
, const char *f
, int l
) 
 873         panic("dtrace: assertion failed: %s, file: %s, line: %d", a
, f
, l
); 
 876          * We just need something here that even the most clever compiler 
 877          * cannot optimize away. 
 879         return (a
[(uintptr_t)f
]); 
 883  * Atomically increment a specified error counter from probe context. 
 886 dtrace_error(uint32_t *counter
) 
 889          * Most counters stored to in probe context are per-CPU counters. 
 890          * However, there are some error conditions that are sufficiently 
 891          * arcane that they don't merit per-CPU storage.  If these counters 
 892          * are incremented concurrently on different CPUs, scalability will be 
 893          * adversely affected -- but we don't expect them to be white-hot in a 
 894          * correctly constructed enabling... 
 901                 if ((nval 
= oval 
+ 1) == 0) { 
 903                          * If the counter would wrap, set it to 1 -- assuring 
 904                          * that the counter is never zero when we have seen 
 905                          * errors.  (The counter must be 32-bits because we 
 906                          * aren't guaranteed a 64-bit compare&swap operation.) 
 907                          * To save this code both the infamy of being fingered 
 908                          * by a priggish news story and the indignity of being 
 909                          * the target of a neo-puritan witch trial, we're 
 910                          * carefully avoiding any colorful description of the 
 911                          * likelihood of this condition -- but suffice it to 
 912                          * say that it is only slightly more likely than the 
 913                          * overflow of predicate cache IDs, as discussed in 
 914                          * dtrace_predicate_create(). 
 918         } while (dtrace_cas32(counter
, oval
, nval
) != oval
); 
 922  * Use the DTRACE_LOADFUNC macro to define functions for each of loading a 
 923  * uint8_t, a uint16_t, a uint32_t and a uint64_t. 
 931 dtrace_inscratch(uintptr_t dest
, size_t size
, dtrace_mstate_t 
*mstate
) 
 933         if (dest 
< mstate
->dtms_scratch_base
) 
 936         if (dest 
+ size 
< dest
) 
 939         if (dest 
+ size 
> mstate
->dtms_scratch_ptr
) 
 946 dtrace_canstore_statvar(uint64_t addr
, size_t sz
, size_t *remain
, 
 947     dtrace_statvar_t 
**svars
, int nsvars
) 
 951         size_t maxglobalsize
, maxlocalsize
; 
 953         maxglobalsize 
= dtrace_statvar_maxsize 
+ sizeof (uint64_t); 
 954         maxlocalsize 
= (maxglobalsize
) * NCPU
; 
 959         for (i 
= 0; i 
< nsvars
; i
++) { 
 960                 dtrace_statvar_t 
*svar 
= svars
[i
]; 
 964                 if (svar 
== NULL 
|| (size 
= svar
->dtsv_size
) == 0) 
 967                 scope 
= svar
->dtsv_var
.dtdv_scope
; 
 970                  * We verify that our size is valid in the spirit of providing 
 971                  * defense in depth:  we want to prevent attackers from using 
 972                  * DTrace to escalate an orthogonal kernel heap corruption bug 
 973                  * into the ability to store to arbitrary locations in memory. 
 975                 VERIFY((scope 
== DIFV_SCOPE_GLOBAL 
&& size 
<= maxglobalsize
) || 
 976                         (scope 
== DIFV_SCOPE_LOCAL 
&& size 
<= maxlocalsize
)); 
 978                 if (DTRACE_INRANGE(addr
, sz
, svar
->dtsv_data
, svar
->dtsv_size
)) { 
 979                         DTRACE_RANGE_REMAIN(remain
, addr
, svar
->dtsv_data
, 
 989  * Check to see if the address is within a memory region to which a store may 
 990  * be issued.  This includes the DTrace scratch areas, and any DTrace variable 
 991  * region.  The caller of dtrace_canstore() is responsible for performing any 
 992  * alignment checks that are needed before stores are actually executed. 
 995 dtrace_canstore(uint64_t addr
, size_t sz
, dtrace_mstate_t 
*mstate
, 
 996     dtrace_vstate_t 
*vstate
) 
 998         return (dtrace_canstore_remains(addr
, sz
, NULL
, mstate
, vstate
)); 
1001  * Implementation of dtrace_canstore which communicates the upper bound of the 
1002  * allowed memory region. 
1005 dtrace_canstore_remains(uint64_t addr
, size_t sz
, size_t *remain
, 
1006         dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
) 
1009          * First, check to see if the address is in scratch space... 
1011         if (DTRACE_INRANGE(addr
, sz
, mstate
->dtms_scratch_base
, 
1012             mstate
->dtms_scratch_size
)) { 
1013                 DTRACE_RANGE_REMAIN(remain
, addr
, mstate
->dtms_scratch_base
, 
1014                         mstate
->dtms_scratch_size
); 
1018          * Now check to see if it's a dynamic variable.  This check will pick 
1019          * up both thread-local variables and any global dynamically-allocated 
1022         if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)vstate
->dtvs_dynvars
.dtds_base
, 
1023             vstate
->dtvs_dynvars
.dtds_size
)) { 
1024                 dtrace_dstate_t 
*dstate 
= &vstate
->dtvs_dynvars
; 
1025                 uintptr_t base 
= (uintptr_t)dstate
->dtds_base 
+ 
1026                     (dstate
->dtds_hashsize 
* sizeof (dtrace_dynhash_t
)); 
1027                 uintptr_t chunkoffs
; 
1028                 dtrace_dynvar_t 
*dvar
; 
1031                  * Before we assume that we can store here, we need to make 
1032                  * sure that it isn't in our metadata -- storing to our 
1033                  * dynamic variable metadata would corrupt our state.  For 
1034                  * the range to not include any dynamic variable metadata, 
1037                  *      (1) Start above the hash table that is at the base of 
1038                  *      the dynamic variable space 
1040                  *      (2) Have a starting chunk offset that is beyond the 
1041                  *      dtrace_dynvar_t that is at the base of every chunk 
1043                  *      (3) Not span a chunk boundary 
1045                  *      (4) Not be in the tuple space of a dynamic variable 
1051                 chunkoffs 
= (addr 
- base
) % dstate
->dtds_chunksize
; 
1053                 if (chunkoffs 
< sizeof (dtrace_dynvar_t
)) 
1056                 if (chunkoffs 
+ sz 
> dstate
->dtds_chunksize
) 
1059                 dvar 
= (dtrace_dynvar_t 
*)((uintptr_t)addr 
- chunkoffs
); 
1061                 if (dvar
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
) 
1064                 if (chunkoffs 
< sizeof (dtrace_dynvar_t
) + 
1065                         ((dvar
->dtdv_tuple
.dtt_nkeys 
- 1) * sizeof (dtrace_key_t
))) 
1072          * Finally, check the static local and global variables.  These checks 
1073          * take the longest, so we perform them last. 
1075         if (dtrace_canstore_statvar(addr
, sz
, remain
, 
1076             vstate
->dtvs_locals
, vstate
->dtvs_nlocals
)) 
1079         if (dtrace_canstore_statvar(addr
, sz
, remain
, 
1080             vstate
->dtvs_globals
, vstate
->dtvs_nglobals
)) 
1088  * Convenience routine to check to see if the address is within a memory 
1089  * region in which a load may be issued given the user's privilege level; 
1090  * if not, it sets the appropriate error flags and loads 'addr' into the 
1091  * illegal value slot. 
1093  * DTrace subroutines (DIF_SUBR_*) should use this helper to implement 
1094  * appropriate memory access protection. 
1097 dtrace_canload(uint64_t addr
, size_t sz
, dtrace_mstate_t 
*mstate
, 
1098     dtrace_vstate_t 
*vstate
) 
1100         return (dtrace_canload_remains(addr
, sz
, NULL
, mstate
, vstate
)); 
1104  * Implementation of dtrace_canload which communicates the upper bound of the 
1105  * allowed memory region. 
1108 dtrace_canload_remains(uint64_t addr
, size_t sz
, size_t *remain
, 
1109         dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
) 
1111         volatile uint64_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
1114          * If we hold the privilege to read from kernel memory, then 
1115          * everything is readable. 
1117         if ((mstate
->dtms_access 
& DTRACE_ACCESS_KERNEL
) != 0) { 
1118                 DTRACE_RANGE_REMAIN(remain
, addr
, addr
, sz
); 
1123          * You can obviously read that which you can store. 
1125         if (dtrace_canstore_remains(addr
, sz
, remain
, mstate
, vstate
)) 
1129          * We're allowed to read from our own string table. 
1131         if (DTRACE_INRANGE(addr
, sz
, (uintptr_t)mstate
->dtms_difo
->dtdo_strtab
, 
1132             mstate
->dtms_difo
->dtdo_strlen
)) { 
1133                 DTRACE_RANGE_REMAIN(remain
, addr
, 
1134                         mstate
->dtms_difo
->dtdo_strtab
, 
1135                         mstate
->dtms_difo
->dtdo_strlen
); 
1139         DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV
); 
1145  * Convenience routine to check to see if a given string is within a memory 
1146  * region in which a load may be issued given the user's privilege level; 
1147  * this exists so that we don't need to issue unnecessary dtrace_strlen() 
1148  * calls in the event that the user has all privileges. 
1151 dtrace_strcanload(uint64_t addr
, size_t sz
, size_t *remain
, 
1152         dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
) 
1157          * If we hold the privilege to read from kernel memory, then 
1158          * everything is readable. 
1160         if ((mstate
->dtms_access 
& DTRACE_ACCESS_KERNEL
) != 0) { 
1161                 DTRACE_RANGE_REMAIN(remain
, addr
, addr
, sz
); 
1166          * Even if the caller is uninterested in querying the remaining valid 
1167          * range, it is required to ensure that the access is allowed. 
1169         if (remain 
== NULL
) { 
1172         if (dtrace_canload_remains(addr
, 0, remain
, mstate
, vstate
)) { 
1175                  * Perform the strlen after determining the length of the 
1176                  * memory region which is accessible.  This prevents timing 
1177                  * information from being used to find NULs in memory which is 
1178                  * not accessible to the caller. 
1180                 strsz 
= 1 + dtrace_strlen((char *)(uintptr_t)addr
, 
1182                 if (strsz 
<= *remain
) { 
1191  * Convenience routine to check to see if a given variable is within a memory 
1192  * region in which a load may be issued given the user's privilege level. 
1195 dtrace_vcanload(void *src
, dtrace_diftype_t 
*type
, size_t *remain
, 
1196         dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
) 
1199         ASSERT(type
->dtdt_flags 
& DIF_TF_BYREF
); 
1202          * Calculate the max size before performing any checks since even 
1203          * DTRACE_ACCESS_KERNEL-credentialed callers expect that this function 
1204          * return the max length via 'remain'. 
1206         if (type
->dtdt_kind 
== DIF_TYPE_STRING
) { 
1207                 dtrace_state_t 
*state 
= vstate
->dtvs_state
; 
1209                 if (state 
!= NULL
) { 
1210                         sz 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
1213                          * In helper context, we have a NULL state; fall back 
1214                          * to using the system-wide default for the string size 
1217                         sz 
= dtrace_strsize_default
; 
1220                 sz 
= type
->dtdt_size
; 
1224          * If we hold the privilege to read from kernel memory, then 
1225          * everything is readable. 
1227         if ((mstate
->dtms_access 
& DTRACE_ACCESS_KERNEL
) != 0) { 
1228                 DTRACE_RANGE_REMAIN(remain
, (uintptr_t)src
, src
, sz
); 
1232         if (type
->dtdt_kind 
== DIF_TYPE_STRING
) { 
1233                 return (dtrace_strcanload((uintptr_t)src
, sz
, remain
, mstate
, 
1236         return (dtrace_canload_remains((uintptr_t)src
, sz
, remain
, mstate
, 
1241  * Compare two strings using safe loads. 
1244 dtrace_strncmp(char *s1
, char *s2
, size_t limit
) 
1247         volatile uint16_t *flags
; 
1249         if (s1 
== s2 
|| limit 
== 0) 
1252         flags 
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
1258                         c1 
= dtrace_load8((uintptr_t)s1
++); 
1264                         c2 
= dtrace_load8((uintptr_t)s2
++); 
1269         } while (--limit 
&& c1 
!= '\0' && !(*flags 
& CPU_DTRACE_FAULT
)); 
1275  * Compute strlen(s) for a string using safe memory accesses.  The additional 
1276  * len parameter is used to specify a maximum length to ensure completion. 
1279 dtrace_strlen(const char *s
, size_t lim
) 
1283         for (len 
= 0; len 
!= lim
; len
++) { 
1284                 if (dtrace_load8((uintptr_t)s
++) == '\0') 
1292  * Check if an address falls within a toxic region. 
1295 dtrace_istoxic(uintptr_t kaddr
, size_t size
) 
1297         uintptr_t taddr
, tsize
; 
1300         for (i 
= 0; i 
< dtrace_toxranges
; i
++) { 
1301                 taddr 
= dtrace_toxrange
[i
].dtt_base
; 
1302                 tsize 
= dtrace_toxrange
[i
].dtt_limit 
- taddr
; 
1304                 if (kaddr 
- taddr 
< tsize
) { 
1305                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1306                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= kaddr
; 
1310                 if (taddr 
- kaddr 
< size
) { 
1311                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
1312                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= taddr
; 
1321  * Copy src to dst using safe memory accesses.  The src is assumed to be unsafe 
1322  * memory specified by the DIF program.  The dst is assumed to be safe memory 
1323  * that we can store to directly because it is managed by DTrace.  As with 
1324  * standard bcopy, overlapping copies are handled properly. 
1327 dtrace_bcopy(const void *src
, void *dst
, size_t len
) 
1331                 const uint8_t *s2 
= src
; 
1335                                 *s1
++ = dtrace_load8((uintptr_t)s2
++); 
1336                         } while (--len 
!= 0); 
1342                                 *--s1 
= dtrace_load8((uintptr_t)--s2
); 
1343                         } while (--len 
!= 0); 
1349  * Copy src to dst using safe memory accesses, up to either the specified 
1350  * length, or the point that a nul byte is encountered.  The src is assumed to 
1351  * be unsafe memory specified by the DIF program.  The dst is assumed to be 
1352  * safe memory that we can store to directly because it is managed by DTrace. 
1353  * Unlike dtrace_bcopy(), overlapping regions are not handled. 
1356 dtrace_strcpy(const void *src
, void *dst
, size_t len
) 
1359                 uint8_t *s1 
= dst
, c
; 
1360                 const uint8_t *s2 
= src
; 
1363                         *s1
++ = c 
= dtrace_load8((uintptr_t)s2
++); 
1364                 } while (--len 
!= 0 && c 
!= '\0'); 
1369  * Copy src to dst, deriving the size and type from the specified (BYREF) 
1370  * variable type.  The src is assumed to be unsafe memory specified by the DIF 
1371  * program.  The dst is assumed to be DTrace variable memory that is of the 
1372  * specified type; we assume that we can store to directly. 
1375 dtrace_vcopy(void *src
, void *dst
, dtrace_diftype_t 
*type
, size_t limit
) 
1377         ASSERT(type
->dtdt_flags 
& DIF_TF_BYREF
); 
1379         if (type
->dtdt_kind 
== DIF_TYPE_STRING
) { 
1380                 dtrace_strcpy(src
, dst
, MIN(type
->dtdt_size
, limit
)); 
1382                 dtrace_bcopy(src
, dst
, MIN(type
->dtdt_size
, limit
)); 
1387  * Compare s1 to s2 using safe memory accesses.  The s1 data is assumed to be 
1388  * unsafe memory specified by the DIF program.  The s2 data is assumed to be 
1389  * safe memory that we can access directly because it is managed by DTrace. 
1392 dtrace_bcmp(const void *s1
, const void *s2
, size_t len
) 
1394         volatile uint16_t *flags
; 
1396         flags 
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
1401         if (s1 
== NULL 
|| s2 
== NULL
) 
1404         if (s1 
!= s2 
&& len 
!= 0) { 
1405                 const uint8_t *ps1 
= s1
; 
1406                 const uint8_t *ps2 
= s2
; 
1409                         if (dtrace_load8((uintptr_t)ps1
++) != *ps2
++) 
1411                 } while (--len 
!= 0 && !(*flags 
& CPU_DTRACE_FAULT
)); 
1417  * Zero the specified region using a simple byte-by-byte loop.  Note that this 
1418  * is for safe DTrace-managed memory only. 
1421 dtrace_bzero(void *dst
, size_t len
) 
1425         for (cp 
= dst
; len 
!= 0; len
--) 
1430 dtrace_add_128(uint64_t *addend1
, uint64_t *addend2
, uint64_t *sum
) 
1434         result
[0] = addend1
[0] + addend2
[0]; 
1435         result
[1] = addend1
[1] + addend2
[1] + 
1436             (result
[0] < addend1
[0] || result
[0] < addend2
[0] ? 1 : 0); 
1443  * Shift the 128-bit value in a by b. If b is positive, shift left. 
1444  * If b is negative, shift right. 
1447 dtrace_shift_128(uint64_t *a
, int b
) 
1457                         a
[0] = a
[1] >> (b 
- 64); 
1461                         mask 
= 1LL << (64 - b
); 
1463                         a
[0] |= ((a
[1] & mask
) << (64 - b
)); 
1468                         a
[1] = a
[0] << (b 
- 64); 
1472                         mask 
= a
[0] >> (64 - b
); 
1480  * The basic idea is to break the 2 64-bit values into 4 32-bit values, 
1481  * use native multiplication on those, and then re-combine into the 
1482  * resulting 128-bit value. 
1484  * (hi1 << 32 + lo1) * (hi2 << 32 + lo2) = 
1491 dtrace_multiply_128(uint64_t factor1
, uint64_t factor2
, uint64_t *product
) 
1493         uint64_t hi1
, hi2
, lo1
, lo2
; 
1496         hi1 
= factor1 
>> 32; 
1497         hi2 
= factor2 
>> 32; 
1499         lo1 
= factor1 
& DT_MASK_LO
; 
1500         lo2 
= factor2 
& DT_MASK_LO
; 
1502         product
[0] = lo1 
* lo2
; 
1503         product
[1] = hi1 
* hi2
; 
1507         dtrace_shift_128(tmp
, 32); 
1508         dtrace_add_128(product
, tmp
, product
); 
1512         dtrace_shift_128(tmp
, 32); 
1513         dtrace_add_128(product
, tmp
, product
); 
1517  * This privilege check should be used by actions and subroutines to 
1518  * verify that the user credentials of the process that enabled the 
1519  * invoking ECB match the target credentials 
1522 dtrace_priv_proc_common_user(dtrace_state_t 
*state
) 
1524         cred_t 
*cr
, *s_cr 
= state
->dts_cred
.dcr_cred
; 
1527          * We should always have a non-NULL state cred here, since if cred 
1528          * is null (anonymous tracing), we fast-path bypass this routine. 
1530         ASSERT(s_cr 
!= NULL
); 
1532         if ((cr 
= dtrace_CRED()) != NULL 
&& 
1533             posix_cred_get(s_cr
)->cr_uid 
== posix_cred_get(cr
)->cr_uid 
&& 
1534             posix_cred_get(s_cr
)->cr_uid 
== posix_cred_get(cr
)->cr_ruid 
&& 
1535             posix_cred_get(s_cr
)->cr_uid 
== posix_cred_get(cr
)->cr_suid 
&& 
1536             posix_cred_get(s_cr
)->cr_gid 
== posix_cred_get(cr
)->cr_gid 
&& 
1537             posix_cred_get(s_cr
)->cr_gid 
== posix_cred_get(cr
)->cr_rgid 
&& 
1538             posix_cred_get(s_cr
)->cr_gid 
== posix_cred_get(cr
)->cr_sgid
) 
1545  * This privilege check should be used by actions and subroutines to 
1546  * verify that the zone of the process that enabled the invoking ECB 
1547  * matches the target credentials 
1550 dtrace_priv_proc_common_zone(dtrace_state_t 
*state
) 
1552         cred_t 
*cr
, *s_cr 
= state
->dts_cred
.dcr_cred
; 
1553 #pragma unused(cr, s_cr, state) /* __APPLE__ */ 
1556          * We should always have a non-NULL state cred here, since if cred 
1557          * is null (anonymous tracing), we fast-path bypass this routine. 
1559         ASSERT(s_cr 
!= NULL
); 
1561         return 1; /* APPLE NOTE: Darwin doesn't do zones. */ 
1565  * This privilege check should be used by actions and subroutines to 
1566  * verify that the process has not setuid or changed credentials. 
1569 dtrace_priv_proc_common_nocd(void) 
1571         return 1; /* Darwin omits "No Core Dump" flag. */ 
1575 dtrace_priv_proc_destructive(dtrace_state_t 
*state
) 
1577         int action 
= state
->dts_cred
.dcr_action
; 
1579         if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) 
1582         if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) 
1585         if (((action 
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
) == 0) && 
1586             dtrace_priv_proc_common_zone(state
) == 0) 
1589         if (((action 
& DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
) == 0) && 
1590             dtrace_priv_proc_common_user(state
) == 0) 
1593         if (((action 
& DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
) == 0) && 
1594             dtrace_priv_proc_common_nocd() == 0) 
1600         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1606 dtrace_priv_proc_control(dtrace_state_t 
*state
) 
1608         if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) 
1611         if (dtrace_is_restricted() && !dtrace_can_attach_to_proc(current_proc())) 
1614         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_PROC_CONTROL
) 
1617         if (dtrace_priv_proc_common_zone(state
) && 
1618             dtrace_priv_proc_common_user(state
) && 
1619             dtrace_priv_proc_common_nocd()) 
1623         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1629 dtrace_priv_proc(dtrace_state_t 
*state
) 
1631         if (ISSET(current_proc()->p_lflag
, P_LNOATTACH
)) 
1634         if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed() && !dtrace_can_attach_to_proc(current_proc())) 
1637         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_PROC
) 
1641         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1647  * The P_LNOATTACH check is an Apple specific check. 
1648  * We need a version of dtrace_priv_proc() that omits 
1649  * that check for PID and EXECNAME accesses 
1652 dtrace_priv_proc_relaxed(dtrace_state_t 
*state
) 
1655         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_PROC
) 
1658         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_UPRIV
; 
1664 dtrace_priv_kernel(dtrace_state_t 
*state
) 
1666         if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) 
1669         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_KERNEL
) 
1673         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_KPRIV
; 
1679 dtrace_priv_kernel_destructive(dtrace_state_t 
*state
) 
1681         if (dtrace_is_restricted()) 
1684         if (state
->dts_cred
.dcr_action 
& DTRACE_CRA_KERNEL_DESTRUCTIVE
) 
1688         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags 
|= CPU_DTRACE_KPRIV
; 
1694  * Note:  not called from probe context.  This function is called 
1695  * asynchronously (and at a regular interval) from outside of probe context to 
1696  * clean the dirty dynamic variable lists on all CPUs.  Dynamic variable 
1697  * cleaning is explained in detail in <sys/dtrace_impl.h>. 
1700 dtrace_dynvar_clean(dtrace_dstate_t 
*dstate
) 
1702         dtrace_dynvar_t 
*dirty
; 
1703         dtrace_dstate_percpu_t 
*dcpu
; 
1706         for (i 
= 0; i 
< (int)NCPU
; i
++) { 
1707                 dcpu 
= &dstate
->dtds_percpu
[i
]; 
1709                 ASSERT(dcpu
->dtdsc_rinsing 
== NULL
); 
1712                  * If the dirty list is NULL, there is no dirty work to do. 
1714                 if (dcpu
->dtdsc_dirty 
== NULL
) 
1718                  * If the clean list is non-NULL, then we're not going to do 
1719                  * any work for this CPU -- it means that there has not been 
1720                  * a dtrace_dynvar() allocation on this CPU (or from this CPU) 
1721                  * since the last time we cleaned house. 
1723                 if (dcpu
->dtdsc_clean 
!= NULL
) 
1729                  * Atomically move the dirty list aside. 
1732                         dirty 
= dcpu
->dtdsc_dirty
; 
1735                          * Before we zap the dirty list, set the rinsing list. 
1736                          * (This allows for a potential assertion in 
1737                          * dtrace_dynvar():  if a free dynamic variable appears 
1738                          * on a hash chain, either the dirty list or the 
1739                          * rinsing list for some CPU must be non-NULL.) 
1741                         dcpu
->dtdsc_rinsing 
= dirty
; 
1742                         dtrace_membar_producer(); 
1743                 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, 
1744                     dirty
, NULL
) != dirty
); 
1749                  * We have no work to do; we can simply return. 
1756         for (i 
= 0; i 
< (int)NCPU
; i
++) { 
1757                 dcpu 
= &dstate
->dtds_percpu
[i
]; 
1759                 if (dcpu
->dtdsc_rinsing 
== NULL
) 
1763                  * We are now guaranteed that no hash chain contains a pointer 
1764                  * into this dirty list; we can make it clean. 
1766                 ASSERT(dcpu
->dtdsc_clean 
== NULL
); 
1767                 dcpu
->dtdsc_clean 
= dcpu
->dtdsc_rinsing
; 
1768                 dcpu
->dtdsc_rinsing 
= NULL
; 
1772          * Before we actually set the state to be DTRACE_DSTATE_CLEAN, make 
1773          * sure that all CPUs have seen all of the dtdsc_clean pointers. 
1774          * This prevents a race whereby a CPU incorrectly decides that 
1775          * the state should be something other than DTRACE_DSTATE_CLEAN 
1776          * after dtrace_dynvar_clean() has completed. 
1780         dstate
->dtds_state 
= DTRACE_DSTATE_CLEAN
; 
1784  * Depending on the value of the op parameter, this function looks-up, 
1785  * allocates or deallocates an arbitrarily-keyed dynamic variable.  If an 
1786  * allocation is requested, this function will return a pointer to a 
1787  * dtrace_dynvar_t corresponding to the allocated variable -- or NULL if no 
1788  * variable can be allocated.  If NULL is returned, the appropriate counter 
1789  * will be incremented. 
1791 static dtrace_dynvar_t 
* 
1792 dtrace_dynvar(dtrace_dstate_t 
*dstate
, uint_t nkeys
, 
1793     dtrace_key_t 
*key
, size_t dsize
, dtrace_dynvar_op_t op
, 
1794     dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
) 
1796         uint64_t hashval 
= DTRACE_DYNHASH_VALID
; 
1797         dtrace_dynhash_t 
*hash 
= dstate
->dtds_hash
; 
1798         dtrace_dynvar_t 
*free
, *new_free
, *next
, *dvar
, *start
, *prev 
= NULL
; 
1799         processorid_t me 
= CPU
->cpu_id
, cpu 
= me
; 
1800         dtrace_dstate_percpu_t 
*dcpu 
= &dstate
->dtds_percpu
[me
]; 
1801         size_t bucket
, ksize
; 
1802         size_t chunksize 
= dstate
->dtds_chunksize
; 
1803         uintptr_t kdata
, lock
, nstate
; 
1809          * Hash the key.  As with aggregations, we use Jenkins' "One-at-a-time" 
1810          * algorithm.  For the by-value portions, we perform the algorithm in 
1811          * 16-bit chunks (as opposed to 8-bit chunks).  This speeds things up a 
1812          * bit, and seems to have only a minute effect on distribution.  For 
1813          * the by-reference data, we perform "One-at-a-time" iterating (safely) 
1814          * over each referenced byte.  It's painful to do this, but it's much 
1815          * better than pathological hash distribution.  The efficacy of the 
1816          * hashing algorithm (and a comparison with other algorithms) may be 
1817          * found by running the ::dtrace_dynstat MDB dcmd. 
1819         for (i 
= 0; i 
< nkeys
; i
++) { 
1820                 if (key
[i
].dttk_size 
== 0) { 
1821                         uint64_t val 
= key
[i
].dttk_value
; 
1823                         hashval 
+= (val 
>> 48) & 0xffff; 
1824                         hashval 
+= (hashval 
<< 10); 
1825                         hashval 
^= (hashval 
>> 6); 
1827                         hashval 
+= (val 
>> 32) & 0xffff; 
1828                         hashval 
+= (hashval 
<< 10); 
1829                         hashval 
^= (hashval 
>> 6); 
1831                         hashval 
+= (val 
>> 16) & 0xffff; 
1832                         hashval 
+= (hashval 
<< 10); 
1833                         hashval 
^= (hashval 
>> 6); 
1835                         hashval 
+= val 
& 0xffff; 
1836                         hashval 
+= (hashval 
<< 10); 
1837                         hashval 
^= (hashval 
>> 6); 
1840                          * This is incredibly painful, but it beats the hell 
1841                          * out of the alternative. 
1843                         uint64_t j
, size 
= key
[i
].dttk_size
; 
1844                         uintptr_t base 
= (uintptr_t)key
[i
].dttk_value
; 
1846                         if (!dtrace_canload(base
, size
, mstate
, vstate
)) 
1849                         for (j 
= 0; j 
< size
; j
++) { 
1850                                 hashval 
+= dtrace_load8(base 
+ j
); 
1851                                 hashval 
+= (hashval 
<< 10); 
1852                                 hashval 
^= (hashval 
>> 6); 
1857         if (DTRACE_CPUFLAG_ISSET(CPU_DTRACE_FAULT
)) 
1860         hashval 
+= (hashval 
<< 3); 
1861         hashval 
^= (hashval 
>> 11); 
1862         hashval 
+= (hashval 
<< 15); 
1865          * There is a remote chance (ideally, 1 in 2^31) that our hashval 
1866          * comes out to be one of our two sentinel hash values.  If this 
1867          * actually happens, we set the hashval to be a value known to be a 
1868          * non-sentinel value. 
1870         if (hashval 
== DTRACE_DYNHASH_FREE 
|| hashval 
== DTRACE_DYNHASH_SINK
) 
1871                 hashval 
= DTRACE_DYNHASH_VALID
; 
1874          * Yes, it's painful to do a divide here.  If the cycle count becomes 
1875          * important here, tricks can be pulled to reduce it.  (However, it's 
1876          * critical that hash collisions be kept to an absolute minimum; 
1877          * they're much more painful than a divide.)  It's better to have a 
1878          * solution that generates few collisions and still keeps things 
1879          * relatively simple. 
1881         bucket 
= hashval 
% dstate
->dtds_hashsize
; 
1883         if (op 
== DTRACE_DYNVAR_DEALLOC
) { 
1884                 volatile uintptr_t *lockp 
= &hash
[bucket
].dtdh_lock
; 
1887                         while ((lock 
= *lockp
) & 1) 
1890                         if (dtrace_casptr((void *)(uintptr_t)lockp
, 
1891                             (void *)lock
, (void *)(lock 
+ 1)) == (void *)lock
) 
1895                 dtrace_membar_producer(); 
1900         lock 
= hash
[bucket
].dtdh_lock
; 
1902         dtrace_membar_consumer(); 
1904         start 
= hash
[bucket
].dtdh_chain
; 
1905         ASSERT(start 
!= NULL 
&& (start
->dtdv_hashval 
== DTRACE_DYNHASH_SINK 
|| 
1906             start
->dtdv_hashval 
!= DTRACE_DYNHASH_FREE 
|| 
1907             op 
!= DTRACE_DYNVAR_DEALLOC
)); 
1909         for (dvar 
= start
; dvar 
!= NULL
; dvar 
= dvar
->dtdv_next
) { 
1910                 dtrace_tuple_t 
*dtuple 
= &dvar
->dtdv_tuple
; 
1911                 dtrace_key_t 
*dkey 
= &dtuple
->dtt_key
[0]; 
1913                 if (dvar
->dtdv_hashval 
!= hashval
) { 
1914                         if (dvar
->dtdv_hashval 
== DTRACE_DYNHASH_SINK
) { 
1916                                  * We've reached the sink, and therefore the 
1917                                  * end of the hash chain; we can kick out of 
1918                                  * the loop knowing that we have seen a valid 
1919                                  * snapshot of state. 
1921                                 ASSERT(dvar
->dtdv_next 
== NULL
); 
1922                                 ASSERT(dvar 
== &dtrace_dynhash_sink
); 
1926                         if (dvar
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
) { 
1928                                  * We've gone off the rails:  somewhere along 
1929                                  * the line, one of the members of this hash 
1930                                  * chain was deleted.  Note that we could also 
1931                                  * detect this by simply letting this loop run 
1932                                  * to completion, as we would eventually hit 
1933                                  * the end of the dirty list.  However, we 
1934                                  * want to avoid running the length of the 
1935                                  * dirty list unnecessarily (it might be quite 
1936                                  * long), so we catch this as early as 
1937                                  * possible by detecting the hash marker.  In 
1938                                  * this case, we simply set dvar to NULL and 
1939                                  * break; the conditional after the loop will 
1940                                  * send us back to top. 
1949                 if (dtuple
->dtt_nkeys 
!= nkeys
) 
1952                 for (i 
= 0; i 
< nkeys
; i
++, dkey
++) { 
1953                         if (dkey
->dttk_size 
!= key
[i
].dttk_size
) 
1954                                 goto next
; /* size or type mismatch */ 
1956                         if (dkey
->dttk_size 
!= 0) { 
1958                                     (void *)(uintptr_t)key
[i
].dttk_value
, 
1959                                     (void *)(uintptr_t)dkey
->dttk_value
, 
1963                                 if (dkey
->dttk_value 
!= key
[i
].dttk_value
) 
1968                 if (op 
!= DTRACE_DYNVAR_DEALLOC
) 
1971                 ASSERT(dvar
->dtdv_next 
== NULL 
|| 
1972                     dvar
->dtdv_next
->dtdv_hashval 
!= DTRACE_DYNHASH_FREE
); 
1975                         ASSERT(hash
[bucket
].dtdh_chain 
!= dvar
); 
1976                         ASSERT(start 
!= dvar
); 
1977                         ASSERT(prev
->dtdv_next 
== dvar
); 
1978                         prev
->dtdv_next 
= dvar
->dtdv_next
; 
1980                         if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, 
1981                             start
, dvar
->dtdv_next
) != start
) { 
1983                                  * We have failed to atomically swing the 
1984                                  * hash table head pointer, presumably because 
1985                                  * of a conflicting allocation on another CPU. 
1986                                  * We need to reread the hash chain and try 
1993                 dtrace_membar_producer(); 
1996                  * Now set the hash value to indicate that it's free. 
1998                 ASSERT(hash
[bucket
].dtdh_chain 
!= dvar
); 
1999                 dvar
->dtdv_hashval 
= DTRACE_DYNHASH_FREE
; 
2001                 dtrace_membar_producer(); 
2004                  * Set the next pointer to point at the dirty list, and 
2005                  * atomically swing the dirty pointer to the newly freed dvar. 
2008                         next 
= dcpu
->dtdsc_dirty
; 
2009                         dvar
->dtdv_next 
= next
; 
2010                 } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, next
, dvar
) != next
); 
2013                  * Finally, unlock this hash bucket. 
2015                 ASSERT(hash
[bucket
].dtdh_lock 
== lock
); 
2017                 hash
[bucket
].dtdh_lock
++; 
2027                  * If dvar is NULL, it is because we went off the rails: 
2028                  * one of the elements that we traversed in the hash chain 
2029                  * was deleted while we were traversing it.  In this case, 
2030                  * we assert that we aren't doing a dealloc (deallocs lock 
2031                  * the hash bucket to prevent themselves from racing with 
2032                  * one another), and retry the hash chain traversal. 
2034                 ASSERT(op 
!= DTRACE_DYNVAR_DEALLOC
); 
2038         if (op 
!= DTRACE_DYNVAR_ALLOC
) { 
2040                  * If we are not to allocate a new variable, we want to 
2041                  * return NULL now.  Before we return, check that the value 
2042                  * of the lock word hasn't changed.  If it has, we may have 
2043                  * seen an inconsistent snapshot. 
2045                 if (op 
== DTRACE_DYNVAR_NOALLOC
) { 
2046                         if (hash
[bucket
].dtdh_lock 
!= lock
) 
2049                         ASSERT(op 
== DTRACE_DYNVAR_DEALLOC
); 
2050                         ASSERT(hash
[bucket
].dtdh_lock 
== lock
); 
2052                         hash
[bucket
].dtdh_lock
++; 
2059          * We need to allocate a new dynamic variable.  The size we need is the 
2060          * size of dtrace_dynvar plus the size of nkeys dtrace_key_t's plus the 
2061          * size of any auxiliary key data (rounded up to 8-byte alignment) plus 
2062          * the size of any referred-to data (dsize).  We then round the final 
2063          * size up to the chunksize for allocation. 
2065         for (ksize 
= 0, i 
= 0; i 
< nkeys
; i
++) 
2066                 ksize 
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t)); 
2069          * This should be pretty much impossible, but could happen if, say, 
2070          * strange DIF specified the tuple.  Ideally, this should be an 
2071          * assertion and not an error condition -- but that requires that the 
2072          * chunksize calculation in dtrace_difo_chunksize() be absolutely 
2073          * bullet-proof.  (That is, it must not be able to be fooled by 
2074          * malicious DIF.)  Given the lack of backwards branches in DIF, 
2075          * solving this would presumably not amount to solving the Halting 
2076          * Problem -- but it still seems awfully hard. 
2078         if (sizeof (dtrace_dynvar_t
) + sizeof (dtrace_key_t
) * (nkeys 
- 1) + 
2079             ksize 
+ dsize 
> chunksize
) { 
2080                 dcpu
->dtdsc_drops
++; 
2084         nstate 
= DTRACE_DSTATE_EMPTY
; 
2088                 free 
= dcpu
->dtdsc_free
; 
2091                         dtrace_dynvar_t 
*clean 
= dcpu
->dtdsc_clean
; 
2094                         if (clean 
== NULL
) { 
2096                                  * We're out of dynamic variable space on 
2097                                  * this CPU.  Unless we have tried all CPUs, 
2098                                  * we'll try to allocate from a different 
2101                                 switch (dstate
->dtds_state
) { 
2102                                 case DTRACE_DSTATE_CLEAN
: { 
2103                                         void *sp 
= &dstate
->dtds_state
; 
2105                                         if (++cpu 
>= (int)NCPU
) 
2108                                         if (dcpu
->dtdsc_dirty 
!= NULL 
&& 
2109                                             nstate 
== DTRACE_DSTATE_EMPTY
) 
2110                                                 nstate 
= DTRACE_DSTATE_DIRTY
; 
2112                                         if (dcpu
->dtdsc_rinsing 
!= NULL
) 
2113                                                 nstate 
= DTRACE_DSTATE_RINSING
; 
2115                                         dcpu 
= &dstate
->dtds_percpu
[cpu
]; 
2120                                         (void) dtrace_cas32(sp
, 
2121                                             DTRACE_DSTATE_CLEAN
, nstate
); 
2124                                          * To increment the correct bean 
2125                                          * counter, take another lap. 
2130                                 case DTRACE_DSTATE_DIRTY
: 
2131                                         dcpu
->dtdsc_dirty_drops
++; 
2134                                 case DTRACE_DSTATE_RINSING
: 
2135                                         dcpu
->dtdsc_rinsing_drops
++; 
2138                                 case DTRACE_DSTATE_EMPTY
: 
2139                                         dcpu
->dtdsc_drops
++; 
2143                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_DROP
); 
2148                          * The clean list appears to be non-empty.  We want to 
2149                          * move the clean list to the free list; we start by 
2150                          * moving the clean pointer aside. 
2152                         if (dtrace_casptr(&dcpu
->dtdsc_clean
, 
2153                             clean
, NULL
) != clean
) { 
2155                                  * We are in one of two situations: 
2157                                  *  (a) The clean list was switched to the 
2158                                  *      free list by another CPU. 
2160                                  *  (b) The clean list was added to by the 
2163                                  * In either of these situations, we can 
2164                                  * just reattempt the free list allocation. 
2169                         ASSERT(clean
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
); 
2172                          * Now we'll move the clean list to the free list. 
2173                          * It's impossible for this to fail:  the only way 
2174                          * the free list can be updated is through this 
2175                          * code path, and only one CPU can own the clean list. 
2176                          * Thus, it would only be possible for this to fail if 
2177                          * this code were racing with dtrace_dynvar_clean(). 
2178                          * (That is, if dtrace_dynvar_clean() updated the clean 
2179                          * list, and we ended up racing to update the free 
2180                          * list.)  This race is prevented by the dtrace_sync() 
2181                          * in dtrace_dynvar_clean() -- which flushes the 
2182                          * owners of the clean lists out before resetting 
2185                         rval 
= dtrace_casptr(&dcpu
->dtdsc_free
, NULL
, clean
); 
2186                         ASSERT(rval 
== NULL
); 
2191                 new_free 
= dvar
->dtdv_next
; 
2192         } while (dtrace_casptr(&dcpu
->dtdsc_free
, free
, new_free
) != free
); 
2195          * We have now allocated a new chunk.  We copy the tuple keys into the 
2196          * tuple array and copy any referenced key data into the data space 
2197          * following the tuple array.  As we do this, we relocate dttk_value 
2198          * in the final tuple to point to the key data address in the chunk. 
2200         kdata 
= (uintptr_t)&dvar
->dtdv_tuple
.dtt_key
[nkeys
]; 
2201         dvar
->dtdv_data 
= (void *)(kdata 
+ ksize
); 
2202         dvar
->dtdv_tuple
.dtt_nkeys 
= nkeys
; 
2204         for (i 
= 0; i 
< nkeys
; i
++) { 
2205                 dtrace_key_t 
*dkey 
= &dvar
->dtdv_tuple
.dtt_key
[i
]; 
2206                 size_t kesize 
= key
[i
].dttk_size
; 
2210                             (const void *)(uintptr_t)key
[i
].dttk_value
, 
2211                             (void *)kdata
, kesize
); 
2212                         dkey
->dttk_value 
= kdata
; 
2213                         kdata 
+= P2ROUNDUP(kesize
, sizeof (uint64_t)); 
2215                         dkey
->dttk_value 
= key
[i
].dttk_value
; 
2218                 dkey
->dttk_size 
= kesize
; 
2221         ASSERT(dvar
->dtdv_hashval 
== DTRACE_DYNHASH_FREE
); 
2222         dvar
->dtdv_hashval 
= hashval
; 
2223         dvar
->dtdv_next 
= start
; 
2225         if (dtrace_casptr(&hash
[bucket
].dtdh_chain
, start
, dvar
) == start
) 
2229          * The cas has failed.  Either another CPU is adding an element to 
2230          * this hash chain, or another CPU is deleting an element from this 
2231          * hash chain.  The simplest way to deal with both of these cases 
2232          * (though not necessarily the most efficient) is to free our 
2233          * allocated block and tail-call ourselves.  Note that the free is 
2234          * to the dirty list and _not_ to the free list.  This is to prevent 
2235          * races with allocators, above. 
2237         dvar
->dtdv_hashval 
= DTRACE_DYNHASH_FREE
; 
2239         dtrace_membar_producer(); 
2242                 free 
= dcpu
->dtdsc_dirty
; 
2243                 dvar
->dtdv_next 
= free
; 
2244         } while (dtrace_casptr(&dcpu
->dtdsc_dirty
, free
, dvar
) != free
); 
2246         return (dtrace_dynvar(dstate
, nkeys
, key
, dsize
, op
, mstate
, vstate
)); 
2251 dtrace_aggregate_min(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
2253 #pragma unused(arg) /* __APPLE__ */ 
2254         if ((int64_t)nval 
< (int64_t)*oval
) 
2260 dtrace_aggregate_max(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
2262 #pragma unused(arg) /* __APPLE__ */ 
2263         if ((int64_t)nval 
> (int64_t)*oval
) 
2268 dtrace_aggregate_quantize(uint64_t *quanta
, uint64_t nval
, uint64_t incr
) 
2270         int i
, zero 
= DTRACE_QUANTIZE_ZEROBUCKET
; 
2271         int64_t val 
= (int64_t)nval
; 
2274                 for (i 
= 0; i 
< zero
; i
++) { 
2275                         if (val 
<= DTRACE_QUANTIZE_BUCKETVAL(i
)) { 
2281                 for (i 
= zero 
+ 1; i 
< DTRACE_QUANTIZE_NBUCKETS
; i
++) { 
2282                         if (val 
< DTRACE_QUANTIZE_BUCKETVAL(i
)) { 
2283                                 quanta
[i 
- 1] += incr
; 
2288                 quanta
[DTRACE_QUANTIZE_NBUCKETS 
- 1] += incr
; 
2296 dtrace_aggregate_lquantize(uint64_t *lquanta
, uint64_t nval
, uint64_t incr
) 
2298         uint64_t arg 
= *lquanta
++; 
2299         int32_t base 
= DTRACE_LQUANTIZE_BASE(arg
); 
2300         uint16_t step 
= DTRACE_LQUANTIZE_STEP(arg
); 
2301         uint16_t levels 
= DTRACE_LQUANTIZE_LEVELS(arg
); 
2302         int32_t val 
= (int32_t)nval
, level
; 
2305         ASSERT(levels 
!= 0); 
2309                  * This is an underflow. 
2315         level 
= (val 
- base
) / step
; 
2317         if (level 
< levels
) { 
2318                 lquanta
[level 
+ 1] += incr
; 
2323          * This is an overflow. 
2325         lquanta
[levels 
+ 1] += incr
; 
2329 dtrace_aggregate_llquantize_bucket(int16_t factor
, int16_t low
, int16_t high
, 
2330                                    int16_t nsteps
, int64_t value
) 
2332         int64_t this = 1, last
, next
; 
2333         int base 
= 1, order
; 
2335         for (order 
= 0; order 
< low
; ++order
) 
2339          * If our value is less than our factor taken to the power of the 
2340          * low order of magnitude, it goes into the zeroth bucket. 
2347         for (this *= factor
; order 
<= high
; ++order
) { 
2348                 int nbuckets 
= this > nsteps 
? nsteps 
: this; 
2351                  * We should not generally get log/linear quantizations 
2352                  * with a high magnitude that allows 64-bits to 
2353                  * overflow, but we nonetheless protect against this 
2354                  * by explicitly checking for overflow, and clamping 
2355                  * our value accordingly. 
2357                 next 
= this * factor
; 
2363                  * If our value lies within this order of magnitude, 
2364                  * determine its position by taking the offset within 
2365                  * the order of magnitude, dividing by the bucket 
2366                  * width, and adding to our (accumulated) base. 
2369                         return (base 
+ (value 
- last
) / (this / nbuckets
)); 
2372                 base 
+= nbuckets 
- (nbuckets 
/ factor
); 
2378          * Our value is greater than or equal to our factor taken to the 
2379          * power of one plus the high magnitude -- return the top bucket. 
2385 dtrace_aggregate_llquantize(uint64_t *llquanta
, uint64_t nval
, uint64_t incr
) 
2387         uint64_t arg    
= *llquanta
++; 
2388         uint16_t factor 
= DTRACE_LLQUANTIZE_FACTOR(arg
); 
2389         uint16_t low    
= DTRACE_LLQUANTIZE_LOW(arg
); 
2390         uint16_t high   
= DTRACE_LLQUANTIZE_HIGH(arg
); 
2391         uint16_t nsteps 
= DTRACE_LLQUANTIZE_NSTEP(arg
); 
2393         llquanta
[dtrace_aggregate_llquantize_bucket(factor
, low
, high
, nsteps
, nval
)] += incr
; 
2398 dtrace_aggregate_avg(uint64_t *data
, uint64_t nval
, uint64_t arg
) 
2400 #pragma unused(arg) /* __APPLE__ */ 
2407 dtrace_aggregate_stddev(uint64_t *data
, uint64_t nval
, uint64_t arg
) 
2409 #pragma unused(arg) /* __APPLE__ */ 
2410         int64_t snval 
= (int64_t)nval
; 
2417          * What we want to say here is: 
2419          * data[2] += nval * nval; 
2421          * But given that nval is 64-bit, we could easily overflow, so 
2422          * we do this as 128-bit arithmetic. 
2427         dtrace_multiply_128((uint64_t)snval
, (uint64_t)snval
, tmp
); 
2428         dtrace_add_128(data 
+ 2, tmp
, data 
+ 2); 
2433 dtrace_aggregate_count(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
2435 #pragma unused(nval, arg) /* __APPLE__ */ 
2441 dtrace_aggregate_sum(uint64_t *oval
, uint64_t nval
, uint64_t arg
) 
2443 #pragma unused(arg) /* __APPLE__ */ 
2448  * Aggregate given the tuple in the principal data buffer, and the aggregating 
2449  * action denoted by the specified dtrace_aggregation_t.  The aggregation 
2450  * buffer is specified as the buf parameter.  This routine does not return 
2451  * failure; if there is no space in the aggregation buffer, the data will be 
2452  * dropped, and a corresponding counter incremented. 
2455 dtrace_aggregate(dtrace_aggregation_t 
*agg
, dtrace_buffer_t 
*dbuf
, 
2456     intptr_t offset
, dtrace_buffer_t 
*buf
, uint64_t expr
, uint64_t arg
) 
2459         dtrace_recdesc_t 
*rec 
= &agg
->dtag_action
.dta_rec
; 
2460         uint32_t i
, ndx
, size
, fsize
; 
2461         uint32_t align 
= sizeof (uint64_t) - 1; 
2462         dtrace_aggbuffer_t 
*agb
; 
2463         dtrace_aggkey_t 
*key
; 
2464         uint32_t hashval 
= 0, limit
, isstr
; 
2465         caddr_t tomax
, data
, kdata
; 
2466         dtrace_actkind_t action
; 
2467         dtrace_action_t 
*act
; 
2473         if (!agg
->dtag_hasarg
) { 
2475                  * Currently, only quantize() and lquantize() take additional 
2476                  * arguments, and they have the same semantics:  an increment 
2477                  * value that defaults to 1 when not present.  If additional 
2478                  * aggregating actions take arguments, the setting of the 
2479                  * default argument value will presumably have to become more 
2485         action 
= agg
->dtag_action
.dta_kind 
- DTRACEACT_AGGREGATION
; 
2486         size 
= rec
->dtrd_offset 
- agg
->dtag_base
; 
2487         fsize 
= size 
+ rec
->dtrd_size
; 
2489         ASSERT(dbuf
->dtb_tomax 
!= NULL
); 
2490         data 
= dbuf
->dtb_tomax 
+ offset 
+ agg
->dtag_base
; 
2492         if ((tomax 
= buf
->dtb_tomax
) == NULL
) { 
2493                 dtrace_buffer_drop(buf
); 
2498          * The metastructure is always at the bottom of the buffer. 
2500         agb 
= (dtrace_aggbuffer_t 
*)(tomax 
+ buf
->dtb_size 
- 
2501             sizeof (dtrace_aggbuffer_t
)); 
2503         if (buf
->dtb_offset 
== 0) { 
2505                  * We just kludge up approximately 1/8th of the size to be 
2506                  * buckets.  If this guess ends up being routinely 
2507                  * off-the-mark, we may need to dynamically readjust this 
2508                  * based on past performance. 
2510                 uintptr_t hashsize 
= (buf
->dtb_size 
>> 3) / sizeof (uintptr_t); 
2512                 if ((uintptr_t)agb 
- hashsize 
* sizeof (dtrace_aggkey_t 
*) < 
2513                     (uintptr_t)tomax 
|| hashsize 
== 0) { 
2515                          * We've been given a ludicrously small buffer; 
2516                          * increment our drop count and leave. 
2518                         dtrace_buffer_drop(buf
); 
2523                  * And now, a pathetic attempt to try to get a an odd (or 
2524                  * perchance, a prime) hash size for better hash distribution. 
2526                 if (hashsize 
> (DTRACE_AGGHASHSIZE_SLEW 
<< 3)) 
2527                         hashsize 
-= DTRACE_AGGHASHSIZE_SLEW
; 
2529                 agb
->dtagb_hashsize 
= hashsize
; 
2530                 agb
->dtagb_hash 
= (dtrace_aggkey_t 
**)((uintptr_t)agb 
- 
2531                     agb
->dtagb_hashsize 
* sizeof (dtrace_aggkey_t 
*)); 
2532                 agb
->dtagb_free 
= (uintptr_t)agb
->dtagb_hash
; 
2534                 for (i 
= 0; i 
< agb
->dtagb_hashsize
; i
++) 
2535                         agb
->dtagb_hash
[i
] = NULL
; 
2538         ASSERT(agg
->dtag_first 
!= NULL
); 
2539         ASSERT(agg
->dtag_first
->dta_intuple
); 
2542          * Calculate the hash value based on the key.  Note that we _don't_ 
2543          * include the aggid in the hashing (but we will store it as part of 
2544          * the key).  The hashing algorithm is Bob Jenkins' "One-at-a-time" 
2545          * algorithm: a simple, quick algorithm that has no known funnels, and 
2546          * gets good distribution in practice.  The efficacy of the hashing 
2547          * algorithm (and a comparison with other algorithms) may be found by 
2548          * running the ::dtrace_aggstat MDB dcmd. 
2550         for (act 
= agg
->dtag_first
; act
->dta_intuple
; act 
= act
->dta_next
) { 
2551                 i 
= act
->dta_rec
.dtrd_offset 
- agg
->dtag_base
; 
2552                 limit 
= i 
+ act
->dta_rec
.dtrd_size
; 
2553                 ASSERT(limit 
<= size
); 
2554                 isstr 
= DTRACEACT_ISSTRING(act
); 
2556                 for (; i 
< limit
; i
++) { 
2558                         hashval 
+= (hashval 
<< 10); 
2559                         hashval 
^= (hashval 
>> 6); 
2561                         if (isstr 
&& data
[i
] == '\0') 
2566         hashval 
+= (hashval 
<< 3); 
2567         hashval 
^= (hashval 
>> 11); 
2568         hashval 
+= (hashval 
<< 15); 
2571          * Yes, the divide here is expensive -- but it's generally the least 
2572          * of the performance issues given the amount of data that we iterate 
2573          * over to compute hash values, compare data, etc. 
2575         ndx 
= hashval 
% agb
->dtagb_hashsize
; 
2577         for (key 
= agb
->dtagb_hash
[ndx
]; key 
!= NULL
; key 
= key
->dtak_next
) { 
2578                 ASSERT((caddr_t
)key 
>= tomax
); 
2579                 ASSERT((caddr_t
)key 
< tomax 
+ buf
->dtb_size
); 
2581                 if (hashval 
!= key
->dtak_hashval 
|| key
->dtak_size 
!= size
) 
2584                 kdata 
= key
->dtak_data
; 
2585                 ASSERT(kdata 
>= tomax 
&& kdata 
< tomax 
+ buf
->dtb_size
); 
2587                 for (act 
= agg
->dtag_first
; act
->dta_intuple
; 
2588                     act 
= act
->dta_next
) { 
2589                         i 
= act
->dta_rec
.dtrd_offset 
- agg
->dtag_base
; 
2590                         limit 
= i 
+ act
->dta_rec
.dtrd_size
; 
2591                         ASSERT(limit 
<= size
); 
2592                         isstr 
= DTRACEACT_ISSTRING(act
); 
2594                         for (; i 
< limit
; i
++) { 
2595                                 if (kdata
[i
] != data
[i
]) 
2598                                 if (isstr 
&& data
[i
] == '\0') 
2603                 if (action 
!= key
->dtak_action
) { 
2605                          * We are aggregating on the same value in the same 
2606                          * aggregation with two different aggregating actions. 
2607                          * (This should have been picked up in the compiler, 
2608                          * so we may be dealing with errant or devious DIF.) 
2609                          * This is an error condition; we indicate as much, 
2612                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
2617                  * This is a hit:  we need to apply the aggregator to 
2618                  * the value at this key. 
2620                 agg
->dtag_aggregate((uint64_t *)(kdata 
+ size
), expr
, arg
); 
2627          * We didn't find it.  We need to allocate some zero-filled space, 
2628          * link it into the hash table appropriately, and apply the aggregator 
2629          * to the (zero-filled) value. 
2631         offs 
= buf
->dtb_offset
; 
2632         while (offs 
& (align 
- 1)) 
2633                 offs 
+= sizeof (uint32_t); 
2636          * If we don't have enough room to both allocate a new key _and_ 
2637          * its associated data, increment the drop count and return. 
2639         if ((uintptr_t)tomax 
+ offs 
+ fsize 
> 
2640             agb
->dtagb_free 
- sizeof (dtrace_aggkey_t
)) { 
2641                 dtrace_buffer_drop(buf
); 
2646         ASSERT(!(sizeof (dtrace_aggkey_t
) & (sizeof (uintptr_t) - 1))); 
2647         key 
= (dtrace_aggkey_t 
*)(agb
->dtagb_free 
- sizeof (dtrace_aggkey_t
)); 
2648         agb
->dtagb_free 
-= sizeof (dtrace_aggkey_t
); 
2650         key
->dtak_data 
= kdata 
= tomax 
+ offs
; 
2651         buf
->dtb_offset 
= offs 
+ fsize
; 
2654          * Now copy the data across. 
2656         *((dtrace_aggid_t 
*)kdata
) = agg
->dtag_id
; 
2658         for (i 
= sizeof (dtrace_aggid_t
); i 
< size
; i
++) 
2662          * Because strings are not zeroed out by default, we need to iterate 
2663          * looking for actions that store strings, and we need to explicitly 
2664          * pad these strings out with zeroes. 
2666         for (act 
= agg
->dtag_first
; act
->dta_intuple
; act 
= act
->dta_next
) { 
2669                 if (!DTRACEACT_ISSTRING(act
)) 
2672                 i 
= act
->dta_rec
.dtrd_offset 
- agg
->dtag_base
; 
2673                 limit 
= i 
+ act
->dta_rec
.dtrd_size
; 
2674                 ASSERT(limit 
<= size
); 
2676                 for (nul 
= 0; i 
< limit
; i
++) { 
2682                         if (data
[i
] != '\0') 
2689         for (i 
= size
; i 
< fsize
; i
++) 
2692         key
->dtak_hashval 
= hashval
; 
2693         key
->dtak_size 
= size
; 
2694         key
->dtak_action 
= action
; 
2695         key
->dtak_next 
= agb
->dtagb_hash
[ndx
]; 
2696         agb
->dtagb_hash
[ndx
] = key
; 
2699          * Finally, apply the aggregator. 
2701         *((uint64_t *)(key
->dtak_data 
+ size
)) = agg
->dtag_initial
; 
2702         agg
->dtag_aggregate((uint64_t *)(key
->dtak_data 
+ size
), expr
, arg
); 
2706  * Given consumer state, this routine finds a speculation in the INACTIVE 
2707  * state and transitions it into the ACTIVE state.  If there is no speculation 
2708  * in the INACTIVE state, 0 is returned.  In this case, no error counter is 
2709  * incremented -- it is up to the caller to take appropriate action. 
2712 dtrace_speculation(dtrace_state_t 
*state
) 
2715         dtrace_speculation_state_t current
; 
2716         uint32_t *stat 
= &state
->dts_speculations_unavail
, count
; 
2718         while (i 
< state
->dts_nspeculations
) { 
2719                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
2721                 current 
= spec
->dtsp_state
; 
2723                 if (current 
!= DTRACESPEC_INACTIVE
) { 
2724                         if (current 
== DTRACESPEC_COMMITTINGMANY 
|| 
2725                             current 
== DTRACESPEC_COMMITTING 
|| 
2726                             current 
== DTRACESPEC_DISCARDING
) 
2727                                 stat 
= &state
->dts_speculations_busy
; 
2732                 if (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2733                     current
, DTRACESPEC_ACTIVE
) == current
) 
2738          * We couldn't find a speculation.  If we found as much as a single 
2739          * busy speculation buffer, we'll attribute this failure as "busy" 
2740          * instead of "unavail". 
2744         } while (dtrace_cas32(stat
, count
, count 
+ 1) != count
); 
2750  * This routine commits an active speculation.  If the specified speculation 
2751  * is not in a valid state to perform a commit(), this routine will silently do 
2752  * nothing.  The state of the specified speculation is transitioned according 
2753  * to the state transition diagram outlined in <sys/dtrace_impl.h> 
2756 dtrace_speculation_commit(dtrace_state_t 
*state
, processorid_t cpu
, 
2757     dtrace_specid_t which
) 
2759         dtrace_speculation_t 
*spec
; 
2760         dtrace_buffer_t 
*src
, *dest
; 
2761         uintptr_t daddr
, saddr
, dlimit
, slimit
; 
2762         dtrace_speculation_state_t current
,  new = DTRACESPEC_INACTIVE
; 
2769         if (which 
> (dtrace_specid_t
)state
->dts_nspeculations
) { 
2770                 cpu_core
[cpu
].cpuc_dtrace_flags 
|= CPU_DTRACE_ILLOP
; 
2774         spec 
= &state
->dts_speculations
[which 
- 1]; 
2775         src 
= &spec
->dtsp_buffer
[cpu
]; 
2776         dest 
= &state
->dts_buffer
[cpu
]; 
2779                 current 
= spec
->dtsp_state
; 
2781                 if (current 
== DTRACESPEC_COMMITTINGMANY
) 
2785                 case DTRACESPEC_INACTIVE
: 
2786                 case DTRACESPEC_DISCARDING
: 
2789                 case DTRACESPEC_COMMITTING
: 
2791                          * This is only possible if we are (a) commit()'ing 
2792                          * without having done a prior speculate() on this CPU 
2793                          * and (b) racing with another commit() on a different 
2794                          * CPU.  There's nothing to do -- we just assert that 
2797                         ASSERT(src
->dtb_offset 
== 0); 
2800                 case DTRACESPEC_ACTIVE
: 
2801                         new = DTRACESPEC_COMMITTING
; 
2804                 case DTRACESPEC_ACTIVEONE
: 
2806                          * This speculation is active on one CPU.  If our 
2807                          * buffer offset is non-zero, we know that the one CPU 
2808                          * must be us.  Otherwise, we are committing on a 
2809                          * different CPU from the speculate(), and we must 
2810                          * rely on being asynchronously cleaned. 
2812                         if (src
->dtb_offset 
!= 0) { 
2813                                 new = DTRACESPEC_COMMITTING
; 
2818                 case DTRACESPEC_ACTIVEMANY
: 
2819                         new = DTRACESPEC_COMMITTINGMANY
; 
2825         } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2826             current
, new) != current
); 
2829          * We have set the state to indicate that we are committing this 
2830          * speculation.  Now reserve the necessary space in the destination 
2833         if ((offs 
= dtrace_buffer_reserve(dest
, src
->dtb_offset
, 
2834             sizeof (uint64_t), state
, NULL
)) < 0) { 
2835                 dtrace_buffer_drop(dest
); 
2840          * We have sufficient space to copy the speculative buffer into the 
2841          * primary buffer.  First, modify the speculative buffer, filling 
2842          * in the timestamp of all entries with the current time.  The data 
2843          * must have the commit() time rather than the time it was traced, 
2844          * so that all entries in the primary buffer are in timestamp order. 
2846         timestamp 
= dtrace_gethrtime(); 
2847         saddr 
= (uintptr_t)src
->dtb_tomax
; 
2848         slimit 
= saddr 
+ src
->dtb_offset
; 
2849         while (saddr 
< slimit
) { 
2851                 dtrace_rechdr_t 
*dtrh 
= (dtrace_rechdr_t 
*)saddr
; 
2853                 if (dtrh
->dtrh_epid 
== DTRACE_EPIDNONE
) { 
2854                         saddr 
+= sizeof (dtrace_epid_t
); 
2858                 ASSERT(dtrh
->dtrh_epid 
<= ((dtrace_epid_t
) state
->dts_necbs
)); 
2859                 size 
= state
->dts_ecbs
[dtrh
->dtrh_epid 
- 1]->dte_size
; 
2861                 ASSERT(saddr 
+ size 
<= slimit
); 
2862                 ASSERT(size 
>= sizeof(dtrace_rechdr_t
)); 
2863                 ASSERT(DTRACE_RECORD_LOAD_TIMESTAMP(dtrh
) == UINT64_MAX
); 
2865                 DTRACE_RECORD_STORE_TIMESTAMP(dtrh
, timestamp
); 
2871          * Copy the buffer across.  (Note that this is a 
2872          * highly subobtimal bcopy(); in the unlikely event that this becomes 
2873          * a serious performance issue, a high-performance DTrace-specific 
2874          * bcopy() should obviously be invented.) 
2876         daddr 
= (uintptr_t)dest
->dtb_tomax 
+ offs
; 
2877         dlimit 
= daddr 
+ src
->dtb_offset
; 
2878         saddr 
= (uintptr_t)src
->dtb_tomax
; 
2881          * First, the aligned portion. 
2883         while (dlimit 
- daddr 
>= sizeof (uint64_t)) { 
2884                 *((uint64_t *)daddr
) = *((uint64_t *)saddr
); 
2886                 daddr 
+= sizeof (uint64_t); 
2887                 saddr 
+= sizeof (uint64_t); 
2891          * Now any left-over bit... 
2893         while (dlimit 
- daddr
) 
2894                 *((uint8_t *)daddr
++) = *((uint8_t *)saddr
++); 
2897          * Finally, commit the reserved space in the destination buffer. 
2899         dest
->dtb_offset 
= offs 
+ src
->dtb_offset
; 
2903          * If we're lucky enough to be the only active CPU on this speculation 
2904          * buffer, we can just set the state back to DTRACESPEC_INACTIVE. 
2906         if (current 
== DTRACESPEC_ACTIVE 
|| 
2907             (current 
== DTRACESPEC_ACTIVEONE 
&& new == DTRACESPEC_COMMITTING
)) { 
2908                 uint32_t rval 
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2909                     DTRACESPEC_COMMITTING
, DTRACESPEC_INACTIVE
); 
2910 #pragma unused(rval) /* __APPLE__ */ 
2912                 ASSERT(rval 
== DTRACESPEC_COMMITTING
); 
2915         src
->dtb_offset 
= 0; 
2916         src
->dtb_xamot_drops 
+= src
->dtb_drops
; 
2921  * This routine discards an active speculation.  If the specified speculation 
2922  * is not in a valid state to perform a discard(), this routine will silently 
2923  * do nothing.  The state of the specified speculation is transitioned 
2924  * according to the state transition diagram outlined in <sys/dtrace_impl.h> 
2927 dtrace_speculation_discard(dtrace_state_t 
*state
, processorid_t cpu
, 
2928     dtrace_specid_t which
) 
2930         dtrace_speculation_t 
*spec
; 
2931         dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
; 
2932         dtrace_buffer_t 
*buf
; 
2937         if (which 
> (dtrace_specid_t
)state
->dts_nspeculations
) { 
2938                 cpu_core
[cpu
].cpuc_dtrace_flags 
|= CPU_DTRACE_ILLOP
; 
2942         spec 
= &state
->dts_speculations
[which 
- 1]; 
2943         buf 
= &spec
->dtsp_buffer
[cpu
]; 
2946                 current 
= spec
->dtsp_state
; 
2949                 case DTRACESPEC_INACTIVE
: 
2950                 case DTRACESPEC_COMMITTINGMANY
: 
2951                 case DTRACESPEC_COMMITTING
: 
2952                 case DTRACESPEC_DISCARDING
: 
2955                 case DTRACESPEC_ACTIVE
: 
2956                 case DTRACESPEC_ACTIVEMANY
: 
2957                         new = DTRACESPEC_DISCARDING
; 
2960                 case DTRACESPEC_ACTIVEONE
: 
2961                         if (buf
->dtb_offset 
!= 0) { 
2962                                 new = DTRACESPEC_INACTIVE
; 
2964                                 new = DTRACESPEC_DISCARDING
; 
2971         } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
2972             current
, new) != current
); 
2974         buf
->dtb_offset 
= 0; 
2979  * Note:  not called from probe context.  This function is called 
2980  * asynchronously from cross call context to clean any speculations that are 
2981  * in the COMMITTINGMANY or DISCARDING states.  These speculations may not be 
2982  * transitioned back to the INACTIVE state until all CPUs have cleaned the 
2986 dtrace_speculation_clean_here(dtrace_state_t 
*state
) 
2988         dtrace_icookie_t cookie
; 
2989         processorid_t cpu 
= CPU
->cpu_id
; 
2990         dtrace_buffer_t 
*dest 
= &state
->dts_buffer
[cpu
]; 
2993         cookie 
= dtrace_interrupt_disable(); 
2995         if (dest
->dtb_tomax 
== NULL
) { 
2996                 dtrace_interrupt_enable(cookie
); 
3000         for (i 
= 0; i 
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) { 
3001                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
3002                 dtrace_buffer_t 
*src 
= &spec
->dtsp_buffer
[cpu
]; 
3004                 if (src
->dtb_tomax 
== NULL
) 
3007                 if (spec
->dtsp_state 
== DTRACESPEC_DISCARDING
) { 
3008                         src
->dtb_offset 
= 0; 
3012                 if (spec
->dtsp_state 
!= DTRACESPEC_COMMITTINGMANY
) 
3015                 if (src
->dtb_offset 
== 0) 
3018                 dtrace_speculation_commit(state
, cpu
, i 
+ 1); 
3021         dtrace_interrupt_enable(cookie
); 
3025  * Note:  not called from probe context.  This function is called 
3026  * asynchronously (and at a regular interval) to clean any speculations that 
3027  * are in the COMMITTINGMANY or DISCARDING states.  If it discovers that there 
3028  * is work to be done, it cross calls all CPUs to perform that work; 
3029  * COMMITMANY and DISCARDING speculations may not be transitioned back to the 
3030  * INACTIVE state until they have been cleaned by all CPUs. 
3033 dtrace_speculation_clean(dtrace_state_t 
*state
) 
3039         for (i 
= 0; i 
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) { 
3040                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
3042                 ASSERT(!spec
->dtsp_cleaning
); 
3044                 if (spec
->dtsp_state 
!= DTRACESPEC_DISCARDING 
&& 
3045                     spec
->dtsp_state 
!= DTRACESPEC_COMMITTINGMANY
) 
3049                 spec
->dtsp_cleaning 
= 1; 
3055         dtrace_xcall(DTRACE_CPUALL
, 
3056             (dtrace_xcall_t
)dtrace_speculation_clean_here
, state
); 
3059          * We now know that all CPUs have committed or discarded their 
3060          * speculation buffers, as appropriate.  We can now set the state 
3063         for (i 
= 0; i 
< (dtrace_specid_t
)state
->dts_nspeculations
; i
++) { 
3064                 dtrace_speculation_t 
*spec 
= &state
->dts_speculations
[i
]; 
3065                 dtrace_speculation_state_t current
, new; 
3067                 if (!spec
->dtsp_cleaning
) 
3070                 current 
= spec
->dtsp_state
; 
3071                 ASSERT(current 
== DTRACESPEC_DISCARDING 
|| 
3072                     current 
== DTRACESPEC_COMMITTINGMANY
); 
3074                 new = DTRACESPEC_INACTIVE
; 
3076                 rv 
= dtrace_cas32((uint32_t *)&spec
->dtsp_state
, current
, new); 
3077                 ASSERT(rv 
== current
); 
3078                 spec
->dtsp_cleaning 
= 0; 
3083  * Called as part of a speculate() to get the speculative buffer associated 
3084  * with a given speculation.  Returns NULL if the specified speculation is not 
3085  * in an ACTIVE state.  If the speculation is in the ACTIVEONE state -- and 
3086  * the active CPU is not the specified CPU -- the speculation will be 
3087  * atomically transitioned into the ACTIVEMANY state. 
3089 static dtrace_buffer_t 
* 
3090 dtrace_speculation_buffer(dtrace_state_t 
*state
, processorid_t cpuid
, 
3091     dtrace_specid_t which
) 
3093         dtrace_speculation_t 
*spec
; 
3094         dtrace_speculation_state_t current
, new = DTRACESPEC_INACTIVE
; 
3095         dtrace_buffer_t 
*buf
; 
3100         if (which 
> (dtrace_specid_t
)state
->dts_nspeculations
) { 
3101                 cpu_core
[cpuid
].cpuc_dtrace_flags 
|= CPU_DTRACE_ILLOP
; 
3105         spec 
= &state
->dts_speculations
[which 
- 1]; 
3106         buf 
= &spec
->dtsp_buffer
[cpuid
]; 
3109                 current 
= spec
->dtsp_state
; 
3112                 case DTRACESPEC_INACTIVE
: 
3113                 case DTRACESPEC_COMMITTINGMANY
: 
3114                 case DTRACESPEC_DISCARDING
: 
3117                 case DTRACESPEC_COMMITTING
: 
3118                         ASSERT(buf
->dtb_offset 
== 0); 
3121                 case DTRACESPEC_ACTIVEONE
: 
3123                          * This speculation is currently active on one CPU. 
3124                          * Check the offset in the buffer; if it's non-zero, 
3125                          * that CPU must be us (and we leave the state alone). 
3126                          * If it's zero, assume that we're starting on a new 
3127                          * CPU -- and change the state to indicate that the 
3128                          * speculation is active on more than one CPU. 
3130                         if (buf
->dtb_offset 
!= 0) 
3133                         new = DTRACESPEC_ACTIVEMANY
; 
3136                 case DTRACESPEC_ACTIVEMANY
: 
3139                 case DTRACESPEC_ACTIVE
: 
3140                         new = DTRACESPEC_ACTIVEONE
; 
3146         } while (dtrace_cas32((uint32_t *)&spec
->dtsp_state
, 
3147             current
, new) != current
); 
3149         ASSERT(new == DTRACESPEC_ACTIVEONE 
|| new == DTRACESPEC_ACTIVEMANY
); 
3154  * Return a string.  In the event that the user lacks the privilege to access 
3155  * arbitrary kernel memory, we copy the string out to scratch memory so that we 
3156  * don't fail access checking. 
3158  * dtrace_dif_variable() uses this routine as a helper for various 
3159  * builtin values such as 'execname' and 'probefunc.' 
3163 dtrace_dif_varstr(uintptr_t addr
, dtrace_state_t 
*state
, 
3164     dtrace_mstate_t 
*mstate
) 
3166         uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3171          * The easy case: this probe is allowed to read all of memory, so 
3172          * we can just return this as a vanilla pointer. 
3174         if ((mstate
->dtms_access 
& DTRACE_ACCESS_KERNEL
) != 0) 
3178          * This is the tougher case: we copy the string in question from 
3179          * kernel memory into scratch memory and return it that way: this 
3180          * ensures that we won't trip up when access checking tests the 
3181          * BYREF return value. 
3183         strsz 
= dtrace_strlen((char *)addr
, size
) + 1; 
3185         if (mstate
->dtms_scratch_ptr 
+ strsz 
> 
3186             mstate
->dtms_scratch_base 
+ mstate
->dtms_scratch_size
) { 
3187                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3191         dtrace_strcpy((const void *)addr
, (void *)mstate
->dtms_scratch_ptr
, 
3193         ret 
= mstate
->dtms_scratch_ptr
; 
3194         mstate
->dtms_scratch_ptr 
+= strsz
; 
3199  * This function implements the DIF emulator's variable lookups.  The emulator 
3200  * passes a reserved variable identifier and optional built-in array index. 
3203 dtrace_dif_variable(dtrace_mstate_t 
*mstate
, dtrace_state_t 
*state
, uint64_t v
, 
3207          * If we're accessing one of the uncached arguments, we'll turn this 
3208          * into a reference in the args array. 
3210         if (v 
>= DIF_VAR_ARG0 
&& v 
<= DIF_VAR_ARG9
) { 
3211                 ndx 
= v 
- DIF_VAR_ARG0
; 
3217                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_ARGS
); 
3218                 if (ndx 
>= sizeof (mstate
->dtms_arg
) / 
3219                     sizeof (mstate
->dtms_arg
[0])) { 
3221                          * APPLE NOTE: Account for introduction of __dtrace_probe() 
3223                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 3; 
3224                         dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
3225                         dtrace_provider_t 
*pv
; 
3228                         pv 
= mstate
->dtms_probe
->dtpr_provider
; 
3229                         if (pv
->dtpv_pops
.dtps_getargval 
!= NULL
) 
3230                                 val 
= pv
->dtpv_pops
.dtps_getargval(pv
->dtpv_arg
, 
3231                                     mstate
->dtms_probe
->dtpr_id
, 
3232                                     mstate
->dtms_probe
->dtpr_arg
, ndx
, aframes
); 
3233                         /* Special case access of arg5 as passed to dtrace_probe_error() (which see.) */ 
3234                         else if (mstate
->dtms_probe
->dtpr_id 
== dtrace_probeid_error 
&& ndx 
== 5) { 
3235                                 return ((dtrace_state_t 
*)(uintptr_t)(mstate
->dtms_arg
[0]))->dts_arg_error_illval
; 
3239                                 val 
= dtrace_getarg(ndx
, aframes
, mstate
, vstate
); 
3242                          * This is regrettably required to keep the compiler 
3243                          * from tail-optimizing the call to dtrace_getarg(). 
3244                          * The condition always evaluates to true, but the 
3245                          * compiler has no way of figuring that out a priori. 
3246                          * (None of this would be necessary if the compiler 
3247                          * could be relied upon to _always_ tail-optimize 
3248                          * the call to dtrace_getarg() -- but it can't.) 
3250                         if (mstate
->dtms_probe 
!= NULL
) 
3256                 return (mstate
->dtms_arg
[ndx
]); 
3258         case DIF_VAR_UREGS
: { 
3261                 if (!dtrace_priv_proc(state
)) 
3264                 if ((thread 
= current_thread()) == NULL
) { 
3265                         DTRACE_CPUFLAG_SET(CPU_DTRACE_BADADDR
); 
3266                         cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval 
= 0; 
3270                 return (dtrace_getreg(find_user_regs(thread
), ndx
)); 
3274         case DIF_VAR_CURTHREAD
: 
3275                 if (!dtrace_priv_kernel(state
)) 
3278                 return ((uint64_t)(uintptr_t)current_thread()); 
3280         case DIF_VAR_TIMESTAMP
: 
3281                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_TIMESTAMP
)) { 
3282                         mstate
->dtms_timestamp 
= dtrace_gethrtime(); 
3283                         mstate
->dtms_present 
|= DTRACE_MSTATE_TIMESTAMP
; 
3285                 return (mstate
->dtms_timestamp
); 
3287         case DIF_VAR_VTIMESTAMP
: 
3288                 ASSERT(dtrace_vtime_references 
!= 0); 
3289                 return (dtrace_get_thread_vtime(current_thread())); 
3291         case DIF_VAR_WALLTIMESTAMP
: 
3292                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_WALLTIMESTAMP
)) { 
3293                         mstate
->dtms_walltimestamp 
= dtrace_gethrestime(); 
3294                         mstate
->dtms_present 
|= DTRACE_MSTATE_WALLTIMESTAMP
; 
3296                 return (mstate
->dtms_walltimestamp
); 
3298         case DIF_VAR_MACHTIMESTAMP
: 
3299                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_MACHTIMESTAMP
)) { 
3300                         mstate
->dtms_machtimestamp 
= mach_absolute_time(); 
3301                         mstate
->dtms_present 
|= DTRACE_MSTATE_MACHTIMESTAMP
; 
3303                 return (mstate
->dtms_machtimestamp
); 
3306                 return ((uint64_t) dtrace_get_thread_last_cpu_id(current_thread())); 
3309                 if (!dtrace_priv_kernel(state
)) 
3311                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_IPL
)) { 
3312                         mstate
->dtms_ipl 
= dtrace_getipl(); 
3313                         mstate
->dtms_present 
|= DTRACE_MSTATE_IPL
; 
3315                 return (mstate
->dtms_ipl
); 
3318                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_EPID
); 
3319                 return (mstate
->dtms_epid
); 
3322                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
3323                 return (mstate
->dtms_probe
->dtpr_id
); 
3325         case DIF_VAR_STACKDEPTH
: 
3326                 if (!dtrace_priv_kernel(state
)) 
3328                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_STACKDEPTH
)) { 
3330                          * APPLE NOTE: Account for introduction of __dtrace_probe() 
3332                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 3; 
3334                         mstate
->dtms_stackdepth 
= dtrace_getstackdepth(aframes
); 
3335                         mstate
->dtms_present 
|= DTRACE_MSTATE_STACKDEPTH
; 
3337                 return (mstate
->dtms_stackdepth
); 
3339         case DIF_VAR_USTACKDEPTH
: 
3340                 if (!dtrace_priv_proc(state
)) 
3342                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_USTACKDEPTH
)) { 
3344                          * See comment in DIF_VAR_PID. 
3346                         if (DTRACE_ANCHORED(mstate
->dtms_probe
) && 
3348                                 mstate
->dtms_ustackdepth 
= 0; 
3350                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3351                                 mstate
->dtms_ustackdepth 
= 
3352                                     dtrace_getustackdepth(); 
3353                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3355                         mstate
->dtms_present 
|= DTRACE_MSTATE_USTACKDEPTH
; 
3357                 return (mstate
->dtms_ustackdepth
); 
3359         case DIF_VAR_CALLER
: 
3360                 if (!dtrace_priv_kernel(state
)) 
3362                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_CALLER
)) { 
3364                          * APPLE NOTE: Account for introduction of __dtrace_probe() 
3366                         int aframes 
= mstate
->dtms_probe
->dtpr_aframes 
+ 3; 
3368                         if (!DTRACE_ANCHORED(mstate
->dtms_probe
)) { 
3370                                  * If this is an unanchored probe, we are 
3371                                  * required to go through the slow path: 
3372                                  * dtrace_caller() only guarantees correct 
3373                                  * results for anchored probes. 
3377                                 dtrace_getpcstack(caller
, 2, aframes
, 
3378                                     (uint32_t *)(uintptr_t)mstate
->dtms_arg
[0]); 
3379                                 mstate
->dtms_caller 
= caller
[1]; 
3380                         } else if ((mstate
->dtms_caller 
= 
3381                                 dtrace_caller(aframes
)) == (uintptr_t)-1) { 
3383                                  * We have failed to do this the quick way; 
3384                                  * we must resort to the slower approach of 
3385                                  * calling dtrace_getpcstack(). 
3389                                 dtrace_getpcstack(&caller
, 1, aframes
, NULL
); 
3390                                 mstate
->dtms_caller 
= caller
; 
3393                         mstate
->dtms_present 
|= DTRACE_MSTATE_CALLER
; 
3395                 return (mstate
->dtms_caller
); 
3397         case DIF_VAR_UCALLER
: 
3398                 if (!dtrace_priv_proc(state
)) 
3401                 if (!(mstate
->dtms_present 
& DTRACE_MSTATE_UCALLER
)) { 
3405                          * dtrace_getupcstack() fills in the first uint64_t 
3406                          * with the current PID.  The second uint64_t will 
3407                          * be the program counter at user-level.  The third 
3408                          * uint64_t will contain the caller, which is what 
3412                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3413                         dtrace_getupcstack(ustack
, 3); 
3414                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3415                         mstate
->dtms_ucaller 
= ustack
[2]; 
3416                         mstate
->dtms_present 
|= DTRACE_MSTATE_UCALLER
; 
3419                 return (mstate
->dtms_ucaller
); 
3421         case DIF_VAR_PROBEPROV
: 
3422                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
3423                 return (dtrace_dif_varstr( 
3424                     (uintptr_t)mstate
->dtms_probe
->dtpr_provider
->dtpv_name
, 
3427         case DIF_VAR_PROBEMOD
: 
3428                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
3429                 return (dtrace_dif_varstr( 
3430                     (uintptr_t)mstate
->dtms_probe
->dtpr_mod
, 
3433         case DIF_VAR_PROBEFUNC
: 
3434                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
3435                 return (dtrace_dif_varstr( 
3436                     (uintptr_t)mstate
->dtms_probe
->dtpr_func
, 
3439         case DIF_VAR_PROBENAME
: 
3440                 ASSERT(mstate
->dtms_present 
& DTRACE_MSTATE_PROBE
); 
3441                 return (dtrace_dif_varstr( 
3442                     (uintptr_t)mstate
->dtms_probe
->dtpr_name
, 
3446                 if (!dtrace_priv_proc_relaxed(state
)) 
3450                  * Note that we are assuming that an unanchored probe is 
3451                  * always due to a high-level interrupt.  (And we're assuming 
3452                  * that there is only a single high level interrupt.) 
3454                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
3455                         /* Anchored probe that fires while on an interrupt accrues to process 0 */ 
3458                 return ((uint64_t)dtrace_proc_selfpid()); 
3461                 if (!dtrace_priv_proc_relaxed(state
)) 
3465                  * See comment in DIF_VAR_PID. 
3467                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
3470                 return ((uint64_t)dtrace_proc_selfppid()); 
3473                 /* We do not need to check for null current_thread() */ 
3474                 return thread_tid(current_thread()); /* globally unique */ 
3476         case DIF_VAR_PTHREAD_SELF
: 
3477                 if (!dtrace_priv_proc(state
)) 
3480                 /* Not currently supported, but we should be able to delta the dispatchqaddr and dispatchqoffset to get pthread_self */ 
3483         case DIF_VAR_DISPATCHQADDR
: 
3484                 if (!dtrace_priv_proc(state
)) 
3487                 /* We do not need to check for null current_thread() */ 
3488                 return thread_dispatchqaddr(current_thread()); 
3490         case DIF_VAR_EXECNAME
: 
3492                 char *xname 
= (char *)mstate
->dtms_scratch_ptr
; 
3493                 size_t scratch_size 
= MAXCOMLEN
+1; 
3495                 /* The scratch allocation's lifetime is that of the clause. */ 
3496                 if (!DTRACE_INSCRATCH(mstate
, scratch_size
)) { 
3497                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3501                 if (!dtrace_priv_proc_relaxed(state
)) 
3504                 mstate
->dtms_scratch_ptr 
+= scratch_size
; 
3505                 proc_selfname( xname
, scratch_size 
); 
3507                 return ((uint64_t)(uintptr_t)xname
); 
3511         case DIF_VAR_ZONENAME
: 
3513                 /* scratch_size is equal to length('global') + 1 for the null-terminator. */ 
3514                 char *zname 
= (char *)mstate
->dtms_scratch_ptr
; 
3515                 size_t scratch_size 
= 6 + 1; 
3517                 if (!dtrace_priv_proc(state
)) 
3520                 /* The scratch allocation's lifetime is that of the clause. */ 
3521                 if (!DTRACE_INSCRATCH(mstate
, scratch_size
)) { 
3522                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3526                 mstate
->dtms_scratch_ptr 
+= scratch_size
; 
3528                 /* The kernel does not provide zonename, it will always return 'global'. */ 
3529                 strlcpy(zname
, "global", scratch_size
); 
3531                 return ((uint64_t)(uintptr_t)zname
); 
3535         case DIF_VAR_CPUINSTRS
: 
3536                 return mt_cur_cpu_instrs(); 
3538         case DIF_VAR_CPUCYCLES
: 
3539                 return mt_cur_cpu_cycles(); 
3541         case DIF_VAR_VINSTRS
: 
3542                 return mt_cur_thread_instrs(); 
3544         case DIF_VAR_VCYCLES
: 
3545                 return mt_cur_thread_cycles(); 
3546 #else /* MONOTONIC */ 
3547         case DIF_VAR_CPUINSTRS
: /* FALLTHROUGH */ 
3548         case DIF_VAR_CPUCYCLES
: /* FALLTHROUGH */ 
3549         case DIF_VAR_VINSTRS
: /* FALLTHROUGH */ 
3550         case DIF_VAR_VCYCLES
: /* FALLTHROUGH */ 
3552 #endif /* !MONOTONIC */ 
3555                 if (!dtrace_priv_proc_relaxed(state
)) 
3559                  * See comment in DIF_VAR_PID. 
3561                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
3564                 return ((uint64_t) dtrace_proc_selfruid()); 
3567                 if (!dtrace_priv_proc(state
)) 
3571                  * See comment in DIF_VAR_PID. 
3573                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
3576                 if (dtrace_CRED() != NULL
) 
3577                         /* Credential does not require lazy initialization. */ 
3578                         return ((uint64_t)kauth_getgid()); 
3580                         /* proc_lock would be taken under kauth_cred_proc_ref() in kauth_cred_get(). */ 
3581                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
3585         case DIF_VAR_ERRNO
: { 
3586                 uthread_t uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
3587                 if (!dtrace_priv_proc(state
)) 
3591                  * See comment in DIF_VAR_PID. 
3593                 if (DTRACE_ANCHORED(mstate
->dtms_probe
) && CPU_ON_INTR(CPU
)) 
3597                         return (uint64_t)uthread
->t_dtrace_errno
; 
3599                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
3605                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
3611  * Emulate the execution of DTrace ID subroutines invoked by the call opcode. 
3612  * Notice that we don't bother validating the proper number of arguments or 
3613  * their types in the tuple stack.  This isn't needed because all argument 
3614  * interpretation is safe because of our load safety -- the worst that can 
3615  * happen is that a bogus program can obtain bogus results. 
3618 dtrace_dif_subr(uint_t subr
, uint_t rd
, uint64_t *regs
, 
3619     dtrace_key_t 
*tupregs
, int nargs
, 
3620     dtrace_mstate_t 
*mstate
, dtrace_state_t 
*state
) 
3622         volatile uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
3623         volatile uint64_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
3624         dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
3626 #if !defined(__APPLE__) 
3637 /* FIXME: awaits lock/mutex work */ 
3638 #endif /* __APPLE__ */ 
3642                 regs
[rd
] = (dtrace_gethrtime() * 2416 + 374441) % 1771875; 
3645 #if !defined(__APPLE__) 
3646         case DIF_SUBR_MUTEX_OWNED
: 
3647                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
), 
3653                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
3654                 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
)) 
3655                         regs
[rd
] = MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
; 
3657                         regs
[rd
] = LOCK_HELD(&m
.mi
.m_spin
.m_spinlock
); 
3660         case DIF_SUBR_MUTEX_OWNER
: 
3661                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
), 
3667                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
3668                 if (MUTEX_TYPE_ADAPTIVE(&m
.mi
) && 
3669                     MUTEX_OWNER(&m
.mi
) != MUTEX_NO_OWNER
) 
3670                         regs
[rd
] = (uintptr_t)MUTEX_OWNER(&m
.mi
); 
3675         case DIF_SUBR_MUTEX_TYPE_ADAPTIVE
: 
3676                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
), 
3682                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
3683                 regs
[rd
] = MUTEX_TYPE_ADAPTIVE(&m
.mi
); 
3686         case DIF_SUBR_MUTEX_TYPE_SPIN
: 
3687                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (kmutex_t
), 
3693                 m
.mx 
= dtrace_load64(tupregs
[0].dttk_value
); 
3694                 regs
[rd
] = MUTEX_TYPE_SPIN(&m
.mi
); 
3697         case DIF_SUBR_RW_READ_HELD
: { 
3700                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (uintptr_t), 
3706                 r
.rw 
= dtrace_loadptr(tupregs
[0].dttk_value
); 
3707                 regs
[rd
] = _RW_READ_HELD(&r
.ri
, tmp
); 
3711         case DIF_SUBR_RW_WRITE_HELD
: 
3712                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
), 
3718                 r
.rw 
= dtrace_loadptr(tupregs
[0].dttk_value
); 
3719                 regs
[rd
] = _RW_WRITE_HELD(&r
.ri
); 
3722         case DIF_SUBR_RW_ISWRITER
: 
3723                 if (!dtrace_canload(tupregs
[0].dttk_value
, sizeof (krwlock_t
), 
3729                 r
.rw 
= dtrace_loadptr(tupregs
[0].dttk_value
); 
3730                 regs
[rd
] = _RW_ISWRITER(&r
.ri
); 
3733 /* FIXME: awaits lock/mutex work */ 
3734 #endif /* __APPLE__ */ 
3736         case DIF_SUBR_BCOPY
: { 
3738                  * We need to be sure that the destination is in the scratch 
3739                  * region -- no other region is allowed. 
3741                 uintptr_t src 
= tupregs
[0].dttk_value
; 
3742                 uintptr_t dest 
= tupregs
[1].dttk_value
; 
3743                 size_t size 
= tupregs
[2].dttk_value
; 
3745                 if (!dtrace_inscratch(dest
, size
, mstate
)) { 
3746                         *flags 
|= CPU_DTRACE_BADADDR
; 
3751                 if (!dtrace_canload(src
, size
, mstate
, vstate
)) { 
3756                 dtrace_bcopy((void *)src
, (void *)dest
, size
); 
3760         case DIF_SUBR_ALLOCA
: 
3761         case DIF_SUBR_COPYIN
: { 
3762                 uintptr_t dest 
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8); 
3764                     tupregs
[subr 
== DIF_SUBR_ALLOCA 
? 0 : 1].dttk_value
; 
3765                 size_t scratch_size 
= (dest 
- mstate
->dtms_scratch_ptr
) + size
; 
3768                  * Check whether the user can access kernel memory 
3770                 if (dtrace_priv_kernel(state
) == 0) { 
3771                         DTRACE_CPUFLAG_SET(CPU_DTRACE_KPRIV
); 
3776                  * This action doesn't require any credential checks since 
3777                  * probes will not activate in user contexts to which the 
3778                  * enabling user does not have permissions. 
3782                  * Rounding up the user allocation size could have overflowed 
3783                  * a large, bogus allocation (like -1ULL) to 0. 
3785                 if (scratch_size 
< size 
|| 
3786                     !DTRACE_INSCRATCH(mstate
, scratch_size
)) { 
3787                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3792                 if (subr 
== DIF_SUBR_COPYIN
) { 
3793                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3794                         if (dtrace_priv_proc(state
)) 
3795                                 dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
); 
3796                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3799                 mstate
->dtms_scratch_ptr 
+= scratch_size
; 
3804         case DIF_SUBR_COPYINTO
: { 
3805                 uint64_t size 
= tupregs
[1].dttk_value
; 
3806                 uintptr_t dest 
= tupregs
[2].dttk_value
; 
3809                  * This action doesn't require any credential checks since 
3810                  * probes will not activate in user contexts to which the 
3811                  * enabling user does not have permissions. 
3813                 if (!dtrace_inscratch(dest
, size
, mstate
)) { 
3814                         *flags 
|= CPU_DTRACE_BADADDR
; 
3819                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3820                 if (dtrace_priv_proc(state
)) 
3821                         dtrace_copyin(tupregs
[0].dttk_value
, dest
, size
, flags
); 
3822                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3826         case DIF_SUBR_COPYINSTR
: { 
3827                 uintptr_t dest 
= mstate
->dtms_scratch_ptr
; 
3828                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3830                 if (nargs 
> 1 && tupregs
[1].dttk_value 
< size
) 
3831                         size 
= tupregs
[1].dttk_value 
+ 1; 
3834                  * This action doesn't require any credential checks since 
3835                  * probes will not activate in user contexts to which the 
3836                  * enabling user does not have permissions. 
3838                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
3839                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
3844                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3845                 if (dtrace_priv_proc(state
)) 
3846                         dtrace_copyinstr(tupregs
[0].dttk_value
, dest
, size
, flags
); 
3847                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3849                 ((char *)dest
)[size 
- 1] = '\0'; 
3850                 mstate
->dtms_scratch_ptr 
+= size
; 
3855         case DIF_SUBR_MSGSIZE
: 
3856         case DIF_SUBR_MSGDSIZE
: { 
3857                 /* Darwin does not implement SysV streams messages */ 
3858                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
3863         case DIF_SUBR_PROGENYOF
: { 
3864                 pid_t pid 
= tupregs
[0].dttk_value
; 
3865                 struct proc 
*p 
= current_proc(); 
3866                 int rval 
= 0, lim 
= nprocs
; 
3868                 while(p 
&& (lim
-- > 0)) { 
3871                         ppid 
= (pid_t
)dtrace_load32((uintptr_t)&(p
->p_pid
)); 
3872                         if (*flags 
& CPU_DTRACE_FAULT
) 
3881                                 break; /* Can't climb process tree any further. */ 
3883                         p 
= (struct proc 
*)dtrace_loadptr((uintptr_t)&(p
->p_pptr
)); 
3884                         if (*flags 
& CPU_DTRACE_FAULT
) 
3892         case DIF_SUBR_SPECULATION
: 
3893                 regs
[rd
] = dtrace_speculation(state
); 
3897         case DIF_SUBR_COPYOUT
: { 
3898                 uintptr_t kaddr 
= tupregs
[0].dttk_value
; 
3899                 user_addr_t uaddr 
= tupregs
[1].dttk_value
; 
3900                 uint64_t size 
= tupregs
[2].dttk_value
; 
3902                 if (!dtrace_destructive_disallow 
&& 
3903                     dtrace_priv_proc_control(state
) && 
3904                     !dtrace_istoxic(kaddr
, size
) && 
3905                     dtrace_canload(kaddr
, size
, mstate
, vstate
)) { 
3906                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3907                         dtrace_copyout(kaddr
, uaddr
, size
, flags
); 
3908                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3913         case DIF_SUBR_COPYOUTSTR
: { 
3914                 uintptr_t kaddr 
= tupregs
[0].dttk_value
; 
3915                 user_addr_t uaddr 
= tupregs
[1].dttk_value
; 
3916                 uint64_t size 
= tupregs
[2].dttk_value
; 
3919                 if (!dtrace_destructive_disallow 
&& 
3920                     dtrace_priv_proc_control(state
) && 
3921                     !dtrace_istoxic(kaddr
, size
) && 
3922                     dtrace_strcanload(kaddr
, size
, &lim
, mstate
, vstate
)) { 
3923                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
3924                         dtrace_copyoutstr(kaddr
, uaddr
, lim
, flags
); 
3925                         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
3930         case DIF_SUBR_STRLEN
: { 
3931                 size_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3932                 uintptr_t addr 
= (uintptr_t)tupregs
[0].dttk_value
; 
3935                 if (!dtrace_strcanload(addr
, size
, &lim
, mstate
, vstate
)) { 
3940                 regs
[rd
] = dtrace_strlen((char *)addr
, lim
); 
3945         case DIF_SUBR_STRCHR
: 
3946         case DIF_SUBR_STRRCHR
: { 
3948                  * We're going to iterate over the string looking for the 
3949                  * specified character.  We will iterate until we have reached 
3950                  * the string length or we have found the character.  If this 
3951                  * is DIF_SUBR_STRRCHR, we will look for the last occurrence 
3952                  * of the specified character instead of the first. 
3954                 uintptr_t addr 
= tupregs
[0].dttk_value
; 
3955                 uintptr_t addr_limit
; 
3956                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3958                 char c
, target 
= (char)tupregs
[1].dttk_value
; 
3960                 if (!dtrace_strcanload(addr
, size
, &lim
, mstate
, vstate
)) { 
3964                 addr_limit 
= addr 
+ lim
; 
3966                 for (regs
[rd
] = 0; addr 
< addr_limit
; addr
++) { 
3967                         if ((c 
= dtrace_load8(addr
)) == target
) { 
3970                                 if (subr 
== DIF_SUBR_STRCHR
) 
3981         case DIF_SUBR_STRSTR
: 
3982         case DIF_SUBR_INDEX
: 
3983         case DIF_SUBR_RINDEX
: { 
3985                  * We're going to iterate over the string looking for the 
3986                  * specified string.  We will iterate until we have reached 
3987                  * the string length or we have found the string.  (Yes, this 
3988                  * is done in the most naive way possible -- but considering 
3989                  * that the string we're searching for is likely to be 
3990                  * relatively short, the complexity of Rabin-Karp or similar 
3991                  * hardly seems merited.) 
3993                 char *addr 
= (char *)(uintptr_t)tupregs
[0].dttk_value
; 
3994                 char *substr 
= (char *)(uintptr_t)tupregs
[1].dttk_value
; 
3995                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
3996                 size_t len 
= dtrace_strlen(addr
, size
); 
3997                 size_t sublen 
= dtrace_strlen(substr
, size
); 
3998                 char *limit 
= addr 
+ len
, *orig 
= addr
; 
3999                 int notfound 
= subr 
== DIF_SUBR_STRSTR 
? 0 : -1; 
4002                 regs
[rd
] = notfound
; 
4004                 if (!dtrace_canload((uintptr_t)addr
, len 
+ 1, mstate
, vstate
)) { 
4009                 if (!dtrace_canload((uintptr_t)substr
, sublen 
+ 1, mstate
, 
4016                  * strstr() and index()/rindex() have similar semantics if 
4017                  * both strings are the empty string: strstr() returns a 
4018                  * pointer to the (empty) string, and index() and rindex() 
4019                  * both return index 0 (regardless of any position argument). 
4021                 if (sublen 
== 0 && len 
== 0) { 
4022                         if (subr 
== DIF_SUBR_STRSTR
) 
4023                                 regs
[rd
] = (uintptr_t)addr
; 
4029                 if (subr 
!= DIF_SUBR_STRSTR
) { 
4030                         if (subr 
== DIF_SUBR_RINDEX
) { 
4037                          * Both index() and rindex() take an optional position 
4038                          * argument that denotes the starting position. 
4041                                 int64_t pos 
= (int64_t)tupregs
[2].dttk_value
; 
4044                                  * If the position argument to index() is 
4045                                  * negative, Perl implicitly clamps it at 
4046                                  * zero.  This semantic is a little surprising 
4047                                  * given the special meaning of negative 
4048                                  * positions to similar Perl functions like 
4049                                  * substr(), but it appears to reflect a 
4050                                  * notion that index() can start from a 
4051                                  * negative index and increment its way up to 
4052                                  * the string.  Given this notion, Perl's 
4053                                  * rindex() is at least self-consistent in 
4054                                  * that it implicitly clamps positions greater 
4055                                  * than the string length to be the string 
4056                                  * length.  Where Perl completely loses 
4057                                  * coherence, however, is when the specified 
4058                                  * substring is the empty string ("").  In 
4059                                  * this case, even if the position is 
4060                                  * negative, rindex() returns 0 -- and even if 
4061                                  * the position is greater than the length, 
4062                                  * index() returns the string length.  These 
4063                                  * semantics violate the notion that index() 
4064                                  * should never return a value less than the 
4065                                  * specified position and that rindex() should 
4066                                  * never return a value greater than the 
4067                                  * specified position.  (One assumes that 
4068                                  * these semantics are artifacts of Perl's 
4069                                  * implementation and not the results of 
4070                                  * deliberate design -- it beggars belief that 
4071                                  * even Larry Wall could desire such oddness.) 
4072                                  * While in the abstract one would wish for 
4073                                  * consistent position semantics across 
4074                                  * substr(), index() and rindex() -- or at the 
4075                                  * very least self-consistent position 
4076                                  * semantics for index() and rindex() -- we 
4077                                  * instead opt to keep with the extant Perl 
4078                                  * semantics, in all their broken glory.  (Do 
4079                                  * we have more desire to maintain Perl's 
4080                                  * semantics than Perl does?  Probably.) 
4082                                 if (subr 
== DIF_SUBR_RINDEX
) { 
4089                                         if ((size_t)pos 
> len
) 
4095                                         if ((size_t)pos 
>= len
) { 
4106                 for (regs
[rd
] = notfound
; addr 
!= limit
; addr 
+= inc
) { 
4107                         if (dtrace_strncmp(addr
, substr
, sublen
) == 0) { 
4108                                 if (subr 
!= DIF_SUBR_STRSTR
) { 
4110                                          * As D index() and rindex() are 
4111                                          * modeled on Perl (and not on awk), 
4112                                          * we return a zero-based (and not a 
4113                                          * one-based) index.  (For you Perl 
4114                                          * weenies: no, we're not going to add 
4115                                          * $[ -- and shouldn't you be at a con 
4118                                         regs
[rd
] = (uintptr_t)(addr 
- orig
); 
4122                                 ASSERT(subr 
== DIF_SUBR_STRSTR
); 
4123                                 regs
[rd
] = (uintptr_t)addr
; 
4131         case DIF_SUBR_STRTOK
: { 
4132                 uintptr_t addr 
= tupregs
[0].dttk_value
; 
4133                 uintptr_t tokaddr 
= tupregs
[1].dttk_value
; 
4134                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4135                 uintptr_t limit
, toklimit
; 
4137                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
; 
4138                 uint8_t c
='\0', tokmap
[32];      /* 256 / 8 */ 
4142                  * Check both the token buffer and (later) the input buffer, 
4143                  * since both could be non-scratch addresses. 
4145                 if (!dtrace_strcanload(tokaddr
, size
, &clim
, mstate
, vstate
)) { 
4149                 toklimit 
= tokaddr 
+ clim
; 
4151                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4152                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4159                          * If the address specified is NULL, we use our saved 
4160                          * strtok pointer from the mstate.  Note that this 
4161                          * means that the saved strtok pointer is _only_ 
4162                          * valid within multiple enablings of the same probe -- 
4163                          * it behaves like an implicit clause-local variable. 
4165                         addr 
= mstate
->dtms_strtok
; 
4166                         limit 
= mstate
->dtms_strtok_limit
; 
4169                          * If the user-specified address is non-NULL we must 
4170                          * access check it.  This is the only time we have 
4171                          * a chance to do so, since this address may reside 
4172                          * in the string table of this clause-- future calls 
4173                          * (when we fetch addr from mstate->dtms_strtok) 
4174                          * would fail this access check. 
4176                         if (!dtrace_strcanload(addr
, size
, &clim
, mstate
, 
4181                         limit 
= addr 
+ clim
; 
4185                  * First, zero the token map, and then process the token 
4186                  * string -- setting a bit in the map for every character 
4187                  * found in the token string. 
4189                 for (i 
= 0; i 
< (int)sizeof (tokmap
); i
++) 
4192                 for (; tokaddr 
< toklimit
; tokaddr
++) { 
4193                         if ((c 
= dtrace_load8(tokaddr
)) == '\0') 
4196                         ASSERT((c 
>> 3) < sizeof (tokmap
)); 
4197                         tokmap
[c 
>> 3] |= (1 << (c 
& 0x7)); 
4200                 for (; addr 
< limit
; addr
++) { 
4202                          * We're looking for a character that is _not_ 
4203                          * contained in the token string. 
4205                         if ((c 
= dtrace_load8(addr
)) == '\0') 
4208                         if (!(tokmap
[c 
>> 3] & (1 << (c 
& 0x7)))) 
4214                          * We reached the end of the string without finding 
4215                          * any character that was not in the token string. 
4216                          * We return NULL in this case, and we set the saved 
4217                          * address to NULL as well. 
4220                         mstate
->dtms_strtok 
= 0; 
4221                         mstate
->dtms_strtok_limit 
= 0; 
4226                  * From here on, we're copying into the destination string. 
4228                 for (i 
= 0; addr 
< limit 
&& i 
< size 
- 1; addr
++) { 
4229                         if ((c 
= dtrace_load8(addr
)) == '\0') 
4232                         if (tokmap
[c 
>> 3] & (1 << (c 
& 0x7))) 
4241                 regs
[rd
] = (uintptr_t)dest
; 
4242                 mstate
->dtms_scratch_ptr 
+= size
; 
4243                 mstate
->dtms_strtok 
= addr
; 
4244                 mstate
->dtms_strtok_limit 
= limit
; 
4248         case DIF_SUBR_SUBSTR
: { 
4249                 uintptr_t s 
= tupregs
[0].dttk_value
; 
4250                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4251                 char *d 
= (char *)mstate
->dtms_scratch_ptr
; 
4252                 int64_t index 
= (int64_t)tupregs
[1].dttk_value
; 
4253                 int64_t remaining 
= (int64_t)tupregs
[2].dttk_value
; 
4254                 size_t len 
= dtrace_strlen((char *)s
, size
); 
4257                 if (!dtrace_canload(s
, len 
+ 1, mstate
, vstate
)) { 
4262                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4263                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4269                         remaining 
= (int64_t)size
; 
4274                         if (index 
< 0 && index 
+ remaining 
> 0) { 
4280                 if ((size_t)index 
>= len 
|| index 
< 0) { 
4282                 } else if (remaining 
< 0) { 
4283                         remaining 
+= len 
- index
; 
4284                 } else if ((uint64_t)index 
+ (uint64_t)remaining 
> size
) { 
4285                         remaining 
= size 
- index
; 
4288                 for (i 
= 0; i 
< remaining
; i
++) { 
4289                         if ((d
[i
] = dtrace_load8(s 
+ index 
+ i
)) == '\0') 
4295                 mstate
->dtms_scratch_ptr 
+= size
; 
4296                 regs
[rd
] = (uintptr_t)d
; 
4300         case DIF_SUBR_GETMAJOR
: 
4301                 regs
[rd
] = (uintptr_t)major( (dev_t
)tupregs
[0].dttk_value 
); 
4304         case DIF_SUBR_GETMINOR
: 
4305                 regs
[rd
] = (uintptr_t)minor( (dev_t
)tupregs
[0].dttk_value 
); 
4308         case DIF_SUBR_DDI_PATHNAME
: { 
4309                 /* APPLE NOTE: currently unsupported on Darwin */ 
4310                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
4315         case DIF_SUBR_STRJOIN
: { 
4316                 char *d 
= (char *)mstate
->dtms_scratch_ptr
; 
4317                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4318                 uintptr_t s1 
= tupregs
[0].dttk_value
; 
4319                 uintptr_t s2 
= tupregs
[1].dttk_value
; 
4320                 uint64_t i 
= 0, j 
= 0; 
4324                 if (!dtrace_strcanload(s1
, size
, &lim1
, mstate
, vstate
) || 
4325                     !dtrace_strcanload(s2
, size
, &lim2
, mstate
, vstate
)) { 
4330                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4331                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4338                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4342                         c 
= (i 
>= lim1
) ? '\0' : dtrace_load8(s1
++); 
4343                         if ((d
[i
++] = c
) == '\0') { 
4351                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4355                         c 
= (j
++ >= lim2
) ? '\0' : dtrace_load8(s2
++); 
4356                         if ((d
[i
++] = c
) == '\0') 
4361                         mstate
->dtms_scratch_ptr 
+= i
; 
4362                         regs
[rd
] = (uintptr_t)d
; 
4368         case DIF_SUBR_LLTOSTR
: { 
4369                 int64_t i 
= (int64_t)tupregs
[0].dttk_value
; 
4370                 uint64_t val
, digit
; 
4371                 uint64_t size 
= 65;     /* enough room for 2^64 in binary */ 
4372                 char *end 
= (char *)mstate
->dtms_scratch_ptr 
+ size 
- 1; 
4376                         if ((base 
= tupregs
[1].dttk_value
) <= 1 || 
4377                              base 
> ('z' - 'a' + 1) + ('9' - '0' + 1)) { 
4378                                 *flags 
|= CPU_DTRACE_ILLOP
; 
4383                 val 
= (base 
== 10 && i 
< 0) ? i 
* -1 : i
; 
4385                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4386                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4391                 for (*end
-- = '\0'; val
; val 
/= base
) { 
4392                         if ((digit 
= val 
% base
) <= '9' - '0') { 
4393                                 *end
-- = '0' + digit
; 
4395                                 *end
-- = 'a' + (digit 
- ('9' - '0') - 1); 
4399                 if (i 
== 0 && base 
== 16) 
4405                 if (i 
== 0 || base 
== 8 || base 
== 16) 
4408                 if (i 
< 0 && base 
== 10) 
4411                 regs
[rd
] = (uintptr_t)end 
+ 1; 
4412                 mstate
->dtms_scratch_ptr 
+= size
; 
4416         case DIF_SUBR_HTONS
: 
4417         case DIF_SUBR_NTOHS
: 
4419                 regs
[rd
] = (uint16_t)tupregs
[0].dttk_value
; 
4421                 regs
[rd
] = DT_BSWAP_16((uint16_t)tupregs
[0].dttk_value
); 
4426         case DIF_SUBR_HTONL
: 
4427         case DIF_SUBR_NTOHL
: 
4429                 regs
[rd
] = (uint32_t)tupregs
[0].dttk_value
; 
4431                 regs
[rd
] = DT_BSWAP_32((uint32_t)tupregs
[0].dttk_value
); 
4436         case DIF_SUBR_HTONLL
: 
4437         case DIF_SUBR_NTOHLL
: 
4439                 regs
[rd
] = (uint64_t)tupregs
[0].dttk_value
; 
4441                 regs
[rd
] = DT_BSWAP_64((uint64_t)tupregs
[0].dttk_value
); 
4446         case DIF_SUBR_DIRNAME
: 
4447         case DIF_SUBR_BASENAME
: { 
4448                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
; 
4449                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4450                 uintptr_t src 
= tupregs
[0].dttk_value
; 
4451                 int i
, j
, len 
= dtrace_strlen((char *)src
, size
); 
4452                 int lastbase 
= -1, firstbase 
= -1, lastdir 
= -1; 
4455                 if (!dtrace_canload(src
, len 
+ 1, mstate
, vstate
)) { 
4460                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4461                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4467                  * The basename and dirname for a zero-length string is 
4472                         src 
= (uintptr_t)"."; 
4476                  * Start from the back of the string, moving back toward the 
4477                  * front until we see a character that isn't a slash.  That 
4478                  * character is the last character in the basename. 
4480                 for (i 
= len 
- 1; i 
>= 0; i
--) { 
4481                         if (dtrace_load8(src 
+ i
) != '/') 
4489                  * Starting from the last character in the basename, move 
4490                  * towards the front until we find a slash.  The character 
4491                  * that we processed immediately before that is the first 
4492                  * character in the basename. 
4494                 for (; i 
>= 0; i
--) { 
4495                         if (dtrace_load8(src 
+ i
) == '/') 
4503                  * Now keep going until we find a non-slash character.  That 
4504                  * character is the last character in the dirname. 
4506                 for (; i 
>= 0; i
--) { 
4507                         if (dtrace_load8(src 
+ i
) != '/') 
4514                 ASSERT(!(lastbase 
== -1 && firstbase 
!= -1)); 
4515                 ASSERT(!(firstbase 
== -1 && lastdir 
!= -1)); 
4517                 if (lastbase 
== -1) { 
4519                          * We didn't find a non-slash character.  We know that 
4520                          * the length is non-zero, so the whole string must be 
4521                          * slashes.  In either the dirname or the basename 
4522                          * case, we return '/'. 
4524                         ASSERT(firstbase 
== -1); 
4525                         firstbase 
= lastbase 
= lastdir 
= 0; 
4528                 if (firstbase 
== -1) { 
4530                          * The entire string consists only of a basename 
4531                          * component.  If we're looking for dirname, we need 
4532                          * to change our string to be just "."; if we're 
4533                          * looking for a basename, we'll just set the first 
4534                          * character of the basename to be 0. 
4536                         if (subr 
== DIF_SUBR_DIRNAME
) { 
4537                                 ASSERT(lastdir 
== -1); 
4538                                 src 
= (uintptr_t)"."; 
4545                 if (subr 
== DIF_SUBR_DIRNAME
) { 
4546                         if (lastdir 
== -1) { 
4548                                  * We know that we have a slash in the name -- 
4549                                  * or lastdir would be set to 0, above.  And 
4550                                  * because lastdir is -1, we know that this 
4551                                  * slash must be the first character.  (That 
4552                                  * is, the full string must be of the form 
4553                                  * "/basename".)  In this case, the last 
4554                                  * character of the directory name is 0. 
4562                         ASSERT(subr 
== DIF_SUBR_BASENAME
); 
4563                         ASSERT(firstbase 
!= -1 && lastbase 
!= -1); 
4568                 for (i 
= start
, j 
= 0; i 
<= end 
&& (uint64_t)j 
< size 
- 1; i
++, j
++) 
4569                         dest
[j
] = dtrace_load8(src 
+ i
); 
4572                 regs
[rd
] = (uintptr_t)dest
; 
4573                 mstate
->dtms_scratch_ptr 
+= size
; 
4577         case DIF_SUBR_CLEANPATH
: { 
4578                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
, c
; 
4579                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4580                 uintptr_t src 
= tupregs
[0].dttk_value
; 
4582                 size_t i 
= 0, j 
= 0; 
4584                 if (!dtrace_strcanload(src
, size
, &lim
, mstate
, vstate
)) { 
4589                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4590                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4596                  * Move forward, loading each character. 
4599                         c 
= (i 
>= lim
) ? '\0' : dtrace_load8(src 
+ i
++); 
4601                         if ((uint64_t)(j 
+ 5) >= size
)  /* 5 = strlen("/..c\0") */ 
4609                         c 
= (i 
>= lim
) ? '\0' : dtrace_load8(src 
+ i
++); 
4613                                  * We have two slashes -- we can just advance 
4614                                  * to the next character. 
4621                                  * This is not "." and it's not ".." -- we can 
4622                                  * just store the "/" and this character and 
4630                         c 
= (i 
>= lim
) ? '\0' : dtrace_load8(src 
+ i
++); 
4634                                  * This is a "/./" component.  We're not going 
4635                                  * to store anything in the destination buffer; 
4636                                  * we're just going to go to the next component. 
4643                                  * This is not ".." -- we can just store the 
4644                                  * "/." and this character and continue 
4653                         c 
= (i 
>= lim
) ? '\0' : dtrace_load8(src 
+ i
++); 
4655                         if (c 
!= '/' && c 
!= '\0') { 
4657                                  * This is not ".." -- it's "..[mumble]". 
4658                                  * We'll store the "/.." and this character 
4659                                  * and continue processing. 
4669                          * This is "/../" or "/..\0".  We need to back up 
4670                          * our destination pointer until we find a "/". 
4673                         while (j 
!= 0 && dest
[--j
] != '/') 
4678                 } while (c 
!= '\0'); 
4681                 regs
[rd
] = (uintptr_t)dest
; 
4682                 mstate
->dtms_scratch_ptr 
+= size
; 
4686         case DIF_SUBR_INET_NTOA
: 
4687         case DIF_SUBR_INET_NTOA6
: 
4688         case DIF_SUBR_INET_NTOP
: { 
4693                 if (subr 
== DIF_SUBR_INET_NTOP
) { 
4694                         af 
= (int)tupregs
[0].dttk_value
; 
4697                         af 
= subr 
== DIF_SUBR_INET_NTOA 
? AF_INET
: AF_INET6
; 
4701                 if (af 
== AF_INET
) { 
4702 #if !defined(__APPLE__) 
4706 #endif /* __APPLE__ */ 
4710                          * Safely load the IPv4 address. 
4712 #if !defined(__APPLE__)                  
4713                         ip4 
= dtrace_load32(tupregs
[argi
].dttk_value
); 
4715                         if (!dtrace_canload(tupregs
[argi
].dttk_value
, sizeof(ip4
), 
4722                             (void *)(uintptr_t)tupregs
[argi
].dttk_value
, 
4723                             (void *)(uintptr_t)&ip4
, sizeof (ip4
)); 
4724 #endif /* __APPLE__ */                   
4726                          * Check an IPv4 string will fit in scratch. 
4728 #if !defined(__APPLE__) 
4729                         size 
= INET_ADDRSTRLEN
; 
4731                         size 
= MAX_IPv4_STR_LEN
; 
4732 #endif /* __APPLE__ */ 
4733                         if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4734                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4738                         base 
= (char *)mstate
->dtms_scratch_ptr
; 
4739                         end 
= (char *)mstate
->dtms_scratch_ptr 
+ size 
- 1; 
4742                          * Stringify as a dotted decimal quad. 
4745                         ptr8 
= (uint8_t *)&ip4
; 
4746                         for (i 
= 3; i 
>= 0; i
--) { 
4752                                         for (; val
; val 
/= 10) { 
4753                                                 *end
-- = '0' + (val 
% 10); 
4760                         ASSERT(end 
+ 1 >= base
); 
4762                 } else if (af 
== AF_INET6
) { 
4763 #if defined(__APPLE__) 
4764 #define _S6_un __u6_addr 
4765 #define _S6_u8 __u6_addr8 
4766 #endif /* __APPLE__ */ 
4767                         struct in6_addr ip6
; 
4768                         int firstzero
, tryzero
, numzero
, v6end
; 
4770                         const char digits
[] = "0123456789abcdef"; 
4773                          * Stringify using RFC 1884 convention 2 - 16 bit 
4774                          * hexadecimal values with a zero-run compression. 
4775                          * Lower case hexadecimal digits are used. 
4776                          *      eg, fe80::214:4fff:fe0b:76c8. 
4777                          * The IPv4 embedded form is returned for inet_ntop, 
4778                          * just the IPv4 string is returned for inet_ntoa6. 
4781                         if (!dtrace_canload(tupregs
[argi
].dttk_value
, 
4782                                 sizeof(struct in6_addr
), mstate
, vstate
)) { 
4788                          * Safely load the IPv6 address. 
4791                             (void *)(uintptr_t)tupregs
[argi
].dttk_value
, 
4792                             (void *)(uintptr_t)&ip6
, sizeof (struct in6_addr
)); 
4795                          * Check an IPv6 string will fit in scratch. 
4797                         size 
= INET6_ADDRSTRLEN
; 
4798                         if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4799                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4803                         base 
= (char *)mstate
->dtms_scratch_ptr
; 
4804                         end 
= (char *)mstate
->dtms_scratch_ptr 
+ size 
- 1; 
4808                          * Find the longest run of 16 bit zero values 
4809                          * for the single allowed zero compression - "::". 
4814                         for (i 
= 0; i 
< (int)sizeof (struct in6_addr
); i
++) { 
4815                                 if (ip6
._S6_un
._S6_u8
[i
] == 0 && 
4816                                     tryzero 
== -1 && i 
% 2 == 0) { 
4821                                 if (tryzero 
!= -1 && 
4822                                     (ip6
._S6_un
._S6_u8
[i
] != 0 || 
4823                                     i 
== sizeof (struct in6_addr
) - 1)) { 
4825                                         if (i 
- tryzero 
<= numzero
) { 
4830                                         firstzero 
= tryzero
; 
4831                                         numzero 
= i 
- i 
% 2 - tryzero
; 
4834                                         if (ip6
._S6_un
._S6_u8
[i
] == 0 && 
4835                                             i 
== sizeof (struct in6_addr
) - 1) 
4839                         ASSERT(firstzero 
+ numzero 
<= (int)sizeof (struct in6_addr
)); 
4842                          * Check for an IPv4 embedded address. 
4844                         v6end 
= sizeof (struct in6_addr
) - 2; 
4845                         if (IN6_IS_ADDR_V4MAPPED(&ip6
) || 
4846                             IN6_IS_ADDR_V4COMPAT(&ip6
)) { 
4847                                 for (i 
= sizeof (struct in6_addr
) - 1; 
4848                                      i 
>= (int)DTRACE_V4MAPPED_OFFSET
; i
--) { 
4849                                         ASSERT(end 
>= base
); 
4851                                         val 
= ip6
._S6_un
._S6_u8
[i
]; 
4856                                                 for (; val
; val 
/= 10) { 
4857                                                         *end
-- = '0' + val 
% 10; 
4861                                         if (i 
> (int)DTRACE_V4MAPPED_OFFSET
) 
4865                                 if (subr 
== DIF_SUBR_INET_NTOA6
) 
4869                                  * Set v6end to skip the IPv4 address that 
4870                                  * we have already stringified. 
4876                          * Build the IPv6 string by working through the 
4877                          * address in reverse. 
4879                         for (i 
= v6end
; i 
>= 0; i 
-= 2) { 
4880                                 ASSERT(end 
>= base
); 
4882                                 if (i 
== firstzero 
+ numzero 
- 2) { 
4889                                 if (i 
< 14 && i 
!= firstzero 
- 2) 
4892                                 val 
= (ip6
._S6_un
._S6_u8
[i
] << 8) + 
4893                                     ip6
._S6_un
._S6_u8
[i 
+ 1]; 
4898                                         for (; val
; val 
/= 16) { 
4899                                                 *end
-- = digits
[val 
% 16]; 
4903                         ASSERT(end 
+ 1 >= base
); 
4905 #if defined(__APPLE__) 
4908 #endif /* __APPLE__ */ 
4911                          * The user didn't use AH_INET or AH_INET6. 
4913                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
4918 inetout
:        regs
[rd
] = (uintptr_t)end 
+ 1; 
4919                 mstate
->dtms_scratch_ptr 
+= size
; 
4923         case DIF_SUBR_TOUPPER
: 
4924         case DIF_SUBR_TOLOWER
: { 
4925                 uintptr_t src 
= tupregs
[0].dttk_value
; 
4926                 char *dest 
= (char *)mstate
->dtms_scratch_ptr
; 
4927                 char lower
, upper
, base
, c
; 
4928                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
4929                 size_t len 
= dtrace_strlen((char*) src
, size
); 
4932                 lower 
= (subr 
== DIF_SUBR_TOUPPER
) ? 'a' : 'A'; 
4933                 upper 
= (subr 
== DIF_SUBR_TOUPPER
) ? 'z' : 'Z'; 
4934                 base  
= (subr 
== DIF_SUBR_TOUPPER
) ? 'A' : 'a'; 
4936                 if (!dtrace_canload(src
, len 
+ 1, mstate
, vstate
)) { 
4941                 if (!DTRACE_INSCRATCH(mstate
, size
)) { 
4942                         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
4947                 for (i 
= 0; i 
< size 
- 1; ++i
) { 
4948                         if ((c 
= dtrace_load8(src 
+ i
)) == '\0') 
4950                         if (c 
>= lower 
&& c 
<= upper
) 
4951                                 c 
= base 
+ (c 
- lower
); 
4958                 regs
[rd
] = (uintptr_t) dest
; 
4959                 mstate
->dtms_scratch_ptr 
+= size
; 
4964 #if defined(__APPLE__) 
4965         case DIF_SUBR_VM_KERNEL_ADDRPERM
: { 
4966                 if (!dtrace_priv_kernel(state
)) { 
4969                         regs
[rd
] = VM_KERNEL_ADDRPERM((vm_offset_t
) tupregs
[0].dttk_value
); 
4975         case DIF_SUBR_KDEBUG_TRACE
: { 
4977                 uintptr_t args
[4] = {0}; 
4980                 if (nargs 
< 2 || nargs 
> 5) { 
4981                         DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
4985                 if (dtrace_destructive_disallow
) 
4988                 debugid 
= tupregs
[0].dttk_value
; 
4989                 for (i 
= 0; i 
< nargs 
- 1; i
++) 
4990                         args
[i
] = tupregs
[i 
+ 1].dttk_value
; 
4992                 kernel_debug(debugid
, args
[0], args
[1], args
[2], args
[3], 0); 
4997         case DIF_SUBR_KDEBUG_TRACE_STRING
: { 
5002                 if (dtrace_destructive_disallow
) 
5005                 uint64_t size 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
5006                 uint32_t debugid 
= tupregs
[0].dttk_value
; 
5007                 uint64_t str_id 
= tupregs
[1].dttk_value
; 
5008                 uintptr_t src 
= tupregs
[2].dttk_value
; 
5013                 if (src 
!= (uintptr_t)0) { 
5015                         if (!dtrace_strcanload(src
, size
, &lim
, mstate
, vstate
)) { 
5018                         dtrace_strcpy((void*)src
, buf
, size
); 
5021                 (void)kernel_debug_string(debugid
, &str_id
, str
); 
5032  * Emulate the execution of DTrace IR instructions specified by the given 
5033  * DIF object.  This function is deliberately void of assertions as all of 
5034  * the necessary checks are handled by a call to dtrace_difo_validate(). 
5037 dtrace_dif_emulate(dtrace_difo_t 
*difo
, dtrace_mstate_t 
*mstate
, 
5038     dtrace_vstate_t 
*vstate
, dtrace_state_t 
*state
) 
5040         const dif_instr_t 
*text 
= difo
->dtdo_buf
; 
5041         const uint_t textlen 
= difo
->dtdo_len
; 
5042         const char *strtab 
= difo
->dtdo_strtab
; 
5043         const uint64_t *inttab 
= difo
->dtdo_inttab
; 
5046         dtrace_statvar_t 
*svar
; 
5047         dtrace_dstate_t 
*dstate 
= &vstate
->dtvs_dynvars
; 
5049         volatile uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
5050         volatile uint64_t *illval 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
5052         dtrace_key_t tupregs
[DIF_DTR_NREGS 
+ 2]; /* +2 for thread and id */ 
5053         uint64_t regs
[DIF_DIR_NREGS
]; 
5056         uint8_t cc_n 
= 0, cc_z 
= 0, cc_v 
= 0, cc_c 
= 0; 
5058         uint_t pc 
= 0, id
, opc 
= 0; 
5064          * We stash the current DIF object into the machine state: we need it 
5065          * for subsequent access checking. 
5067         mstate
->dtms_difo 
= difo
; 
5069         regs
[DIF_REG_R0
] = 0;           /* %r0 is fixed at zero */ 
5071         while (pc 
< textlen 
&& !(*flags 
& CPU_DTRACE_FAULT
)) { 
5075                 r1 
= DIF_INSTR_R1(instr
); 
5076                 r2 
= DIF_INSTR_R2(instr
); 
5077                 rd 
= DIF_INSTR_RD(instr
); 
5079                 switch (DIF_INSTR_OP(instr
)) { 
5081                         regs
[rd
] = regs
[r1
] | regs
[r2
]; 
5084                         regs
[rd
] = regs
[r1
] ^ regs
[r2
]; 
5087                         regs
[rd
] = regs
[r1
] & regs
[r2
]; 
5090                         regs
[rd
] = regs
[r1
] << regs
[r2
]; 
5093                         regs
[rd
] = regs
[r1
] >> regs
[r2
]; 
5096                         regs
[rd
] = regs
[r1
] - regs
[r2
]; 
5099                         regs
[rd
] = regs
[r1
] + regs
[r2
]; 
5102                         regs
[rd
] = regs
[r1
] * regs
[r2
]; 
5105                         if (regs
[r2
] == 0) { 
5107                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
5109                                 regs
[rd
] = (int64_t)regs
[r1
] / 
5115                         if (regs
[r2
] == 0) { 
5117                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
5119                                 regs
[rd
] = regs
[r1
] / regs
[r2
]; 
5124                         if (regs
[r2
] == 0) { 
5126                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
5128                                 regs
[rd
] = (int64_t)regs
[r1
] % 
5134                         if (regs
[r2
] == 0) { 
5136                                 *flags 
|= CPU_DTRACE_DIVZERO
; 
5138                                 regs
[rd
] = regs
[r1
] % regs
[r2
]; 
5143                         regs
[rd
] = ~regs
[r1
]; 
5146                         regs
[rd
] = regs
[r1
]; 
5149                         cc_r 
= regs
[r1
] - regs
[r2
]; 
5153                         cc_c 
= regs
[r1
] < regs
[r2
]; 
5156                         cc_n 
= cc_v 
= cc_c 
= 0; 
5157                         cc_z 
= regs
[r1
] == 0; 
5160                         pc 
= DIF_INSTR_LABEL(instr
); 
5164                                 pc 
= DIF_INSTR_LABEL(instr
); 
5168                                 pc 
= DIF_INSTR_LABEL(instr
); 
5171                         if ((cc_z 
| (cc_n 
^ cc_v
)) == 0) 
5172                                 pc 
= DIF_INSTR_LABEL(instr
); 
5175                         if ((cc_c 
| cc_z
) == 0) 
5176                                 pc 
= DIF_INSTR_LABEL(instr
); 
5179                         if ((cc_n 
^ cc_v
) == 0) 
5180                                 pc 
= DIF_INSTR_LABEL(instr
); 
5184                                 pc 
= DIF_INSTR_LABEL(instr
); 
5188                                 pc 
= DIF_INSTR_LABEL(instr
); 
5192                                 pc 
= DIF_INSTR_LABEL(instr
); 
5195                         if (cc_z 
| (cc_n 
^ cc_v
)) 
5196                                 pc 
= DIF_INSTR_LABEL(instr
); 
5200                                 pc 
= DIF_INSTR_LABEL(instr
); 
5203                         if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) { 
5204                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5210                         regs
[rd
] = (int8_t)dtrace_load8(regs
[r1
]); 
5213                         if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) { 
5214                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5220                         regs
[rd
] = (int16_t)dtrace_load16(regs
[r1
]); 
5223                         if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) { 
5224                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5230                         regs
[rd
] = (int32_t)dtrace_load32(regs
[r1
]); 
5233                         if (!dtrace_canstore(regs
[r1
], 1, mstate
, vstate
)) { 
5234                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5240                         regs
[rd
] = dtrace_load8(regs
[r1
]); 
5243                         if (!dtrace_canstore(regs
[r1
], 2, mstate
, vstate
)) { 
5244                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5250                         regs
[rd
] = dtrace_load16(regs
[r1
]); 
5253                         if (!dtrace_canstore(regs
[r1
], 4, mstate
, vstate
)) { 
5254                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5260                         regs
[rd
] = dtrace_load32(regs
[r1
]); 
5263                         if (!dtrace_canstore(regs
[r1
], 8, mstate
, vstate
)) { 
5264                                 *flags 
|= CPU_DTRACE_KPRIV
; 
5270                         regs
[rd
] = dtrace_load64(regs
[r1
]); 
5273  * Darwin 32-bit kernel may fetch from 64-bit user. 
5274  * Do not cast regs to uintptr_t 
5275  * DIF_OP_ULDSB,DIF_OP_ULDSH, DIF_OP_ULDSW, DIF_OP_ULDUB 
5276  * DIF_OP_ULDUH, DIF_OP_ULDUW, DIF_OP_ULDX 
5280                             dtrace_fuword8(regs
[r1
]); 
5283                         regs
[rd
] = (int16_t) 
5284                             dtrace_fuword16(regs
[r1
]); 
5287                         regs
[rd
] = (int32_t) 
5288                             dtrace_fuword32(regs
[r1
]); 
5292                             dtrace_fuword8(regs
[r1
]); 
5296                             dtrace_fuword16(regs
[r1
]); 
5300                             dtrace_fuword32(regs
[r1
]); 
5304                             dtrace_fuword64(regs
[r1
]); 
5313                         regs
[rd
] = inttab
[DIF_INSTR_INTEGER(instr
)]; 
5316                         regs
[rd
] = (uint64_t)(uintptr_t) 
5317                             (strtab 
+ DIF_INSTR_STRING(instr
)); 
5320                         size_t sz 
= state
->dts_options
[DTRACEOPT_STRSIZE
]; 
5321                         uintptr_t s1 
= regs
[r1
]; 
5322                         uintptr_t s2 
= regs
[r2
]; 
5323                         size_t lim1 
= sz
, lim2 
= sz
; 
5326                             !dtrace_strcanload(s1
, sz
, &lim1
, mstate
, vstate
)) 
5329                             !dtrace_strcanload(s2
, sz
, &lim2
, mstate
, vstate
)) 
5332                         cc_r 
= dtrace_strncmp((char *)s1
, (char *)s2
, 
5341                         regs
[rd
] = dtrace_dif_variable(mstate
, state
, 
5345                         id 
= DIF_INSTR_VAR(instr
); 
5347                         if (id 
>= DIF_VAR_OTHER_UBASE
) { 
5350                                 id 
-= DIF_VAR_OTHER_UBASE
; 
5351                                 svar 
= vstate
->dtvs_globals
[id
]; 
5352                                 ASSERT(svar 
!= NULL
); 
5353                                 v 
= &svar
->dtsv_var
; 
5355                                 if (!(v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
)) { 
5356                                         regs
[rd
] = svar
->dtsv_data
; 
5360                                 a 
= (uintptr_t)svar
->dtsv_data
; 
5362                                 if (*(uint8_t *)a 
== UINT8_MAX
) { 
5364                                          * If the 0th byte is set to UINT8_MAX 
5365                                          * then this is to be treated as a 
5366                                          * reference to a NULL variable. 
5370                                         regs
[rd
] = a 
+ sizeof (uint64_t); 
5376                         regs
[rd
] = dtrace_dif_variable(mstate
, state
, id
, 0); 
5380                         id 
= DIF_INSTR_VAR(instr
); 
5382                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
5383                         id 
-= DIF_VAR_OTHER_UBASE
; 
5385                         VERIFY(id 
< (uint_t
)vstate
->dtvs_nglobals
); 
5386                         svar 
= vstate
->dtvs_globals
[id
]; 
5387                         ASSERT(svar 
!= NULL
); 
5388                         v 
= &svar
->dtsv_var
; 
5390                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5391                                 uintptr_t a 
= (uintptr_t)svar
->dtsv_data
; 
5395                                 ASSERT(svar
->dtsv_size 
!= 0); 
5397                                 if (regs
[rd
] == 0) { 
5398                                         *(uint8_t *)a 
= UINT8_MAX
; 
5402                                         a 
+= sizeof (uint64_t); 
5404                                 if (!dtrace_vcanload( 
5405                                     (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
, 
5406                                         &lim
, mstate
, vstate
)) 
5409                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
5410                                     (void *)a
, &v
->dtdv_type
, lim
); 
5414                         svar
->dtsv_data 
= regs
[rd
]; 
5419                          * There are no DTrace built-in thread-local arrays at 
5420                          * present.  This opcode is saved for future work. 
5422                         *flags 
|= CPU_DTRACE_ILLOP
; 
5427                         id 
= DIF_INSTR_VAR(instr
); 
5429                         if (id 
< DIF_VAR_OTHER_UBASE
) { 
5431                                  * For now, this has no meaning. 
5437                         id 
-= DIF_VAR_OTHER_UBASE
; 
5439                         ASSERT(id 
< (uint_t
)vstate
->dtvs_nlocals
); 
5440                         ASSERT(vstate
->dtvs_locals 
!= NULL
); 
5441                         svar 
= vstate
->dtvs_locals
[id
]; 
5442                         ASSERT(svar 
!= NULL
); 
5443                         v 
= &svar
->dtsv_var
; 
5445                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5446                                 uintptr_t a 
= (uintptr_t)svar
->dtsv_data
; 
5447                                 size_t sz 
= v
->dtdv_type
.dtdt_size
; 
5449                                 sz 
+= sizeof (uint64_t); 
5450                                 ASSERT(svar
->dtsv_size 
== (int)NCPU 
* sz
); 
5451                                 a 
+= CPU
->cpu_id 
* sz
; 
5453                                 if (*(uint8_t *)a 
== UINT8_MAX
) { 
5455                                          * If the 0th byte is set to UINT8_MAX 
5456                                          * then this is to be treated as a 
5457                                          * reference to a NULL variable. 
5461                                         regs
[rd
] = a 
+ sizeof (uint64_t); 
5467                         ASSERT(svar
->dtsv_size 
== (int)NCPU 
* sizeof (uint64_t)); 
5468                         tmp 
= (uint64_t *)(uintptr_t)svar
->dtsv_data
; 
5469                         regs
[rd
] = tmp
[CPU
->cpu_id
]; 
5473                         id 
= DIF_INSTR_VAR(instr
); 
5475                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
5476                         id 
-= DIF_VAR_OTHER_UBASE
; 
5477                         VERIFY(id 
< (uint_t
)vstate
->dtvs_nlocals
); 
5478                         ASSERT(vstate
->dtvs_locals 
!= NULL
); 
5479                         svar 
= vstate
->dtvs_locals
[id
]; 
5480                         ASSERT(svar 
!= NULL
); 
5481                         v 
= &svar
->dtsv_var
; 
5483                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5484                                 uintptr_t a 
= (uintptr_t)svar
->dtsv_data
; 
5485                                 size_t sz 
= v
->dtdv_type
.dtdt_size
; 
5488                                 sz 
+= sizeof (uint64_t); 
5489                                 ASSERT(svar
->dtsv_size 
== (int)NCPU 
* sz
); 
5490                                 a 
+= CPU
->cpu_id 
* sz
; 
5492                                 if (regs
[rd
] == 0) { 
5493                                         *(uint8_t *)a 
= UINT8_MAX
; 
5497                                         a 
+= sizeof (uint64_t); 
5500                                 if (!dtrace_vcanload( 
5501                                     (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
, 
5502                                     &lim
, mstate
, vstate
)) 
5505                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
5506                                     (void *)a
, &v
->dtdv_type
, lim
); 
5510                         ASSERT(svar
->dtsv_size 
== (int)NCPU 
* sizeof (uint64_t)); 
5511                         tmp 
= (uint64_t *)(uintptr_t)svar
->dtsv_data
; 
5512                         tmp
[CPU
->cpu_id
] = regs
[rd
]; 
5516                         dtrace_dynvar_t 
*dvar
; 
5519                         id 
= DIF_INSTR_VAR(instr
); 
5520                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
5521                         id 
-= DIF_VAR_OTHER_UBASE
; 
5522                         v 
= &vstate
->dtvs_tlocals
[id
]; 
5524                         key 
= &tupregs
[DIF_DTR_NREGS
]; 
5525                         key
[0].dttk_value 
= (uint64_t)id
; 
5526                         key
[0].dttk_size 
= 0; 
5527                         DTRACE_TLS_THRKEY(key
[1].dttk_value
); 
5528                         key
[1].dttk_size 
= 0; 
5530                         dvar 
= dtrace_dynvar(dstate
, 2, key
, 
5531                             sizeof (uint64_t), DTRACE_DYNVAR_NOALLOC
, 
5539                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5540                                 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
; 
5542                                 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
); 
5549                         dtrace_dynvar_t 
*dvar
; 
5552                         id 
= DIF_INSTR_VAR(instr
); 
5553                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
5554                         id 
-= DIF_VAR_OTHER_UBASE
; 
5555                         VERIFY(id 
< (uint_t
)vstate
->dtvs_ntlocals
); 
5557                         key 
= &tupregs
[DIF_DTR_NREGS
]; 
5558                         key
[0].dttk_value 
= (uint64_t)id
; 
5559                         key
[0].dttk_size 
= 0; 
5560                         DTRACE_TLS_THRKEY(key
[1].dttk_value
); 
5561                         key
[1].dttk_size 
= 0; 
5562                         v 
= &vstate
->dtvs_tlocals
[id
]; 
5564                         dvar 
= dtrace_dynvar(dstate
, 2, key
, 
5565                             v
->dtdv_type
.dtdt_size 
> sizeof (uint64_t) ? 
5566                             v
->dtdv_type
.dtdt_size 
: sizeof (uint64_t), 
5567                             regs
[rd
] ? DTRACE_DYNVAR_ALLOC 
: 
5568                             DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
); 
5571                          * Given that we're storing to thread-local data, 
5572                          * we need to flush our predicate cache. 
5574                         dtrace_set_thread_predcache(current_thread(), 0); 
5579                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5582                                 if (!dtrace_vcanload( 
5583                                     (void *)(uintptr_t)regs
[rd
], 
5584                                     &v
->dtdv_type
, &lim
, mstate
, vstate
)) 
5587                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
5588                                     dvar
->dtdv_data
, &v
->dtdv_type
, lim
); 
5590                                 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
]; 
5597                         regs
[rd
] = (int64_t)regs
[r1
] >> regs
[r2
]; 
5601                         dtrace_dif_subr(DIF_INSTR_SUBR(instr
), rd
, 
5602                             regs
, tupregs
, ttop
, mstate
, state
); 
5606                         if (ttop 
== DIF_DTR_NREGS
) { 
5607                                 *flags 
|= CPU_DTRACE_TUPOFLOW
; 
5611                         if (r1 
== DIF_TYPE_STRING
) { 
5613                                  * If this is a string type and the size is 0, 
5614                                  * we'll use the system-wide default string 
5615                                  * size.  Note that we are _not_ looking at 
5616                                  * the value of the DTRACEOPT_STRSIZE option; 
5617                                  * had this been set, we would expect to have 
5618                                  * a non-zero size value in the "pushtr". 
5620                                 tupregs
[ttop
].dttk_size 
= 
5621                                     dtrace_strlen((char *)(uintptr_t)regs
[rd
], 
5622                                     regs
[r2
] ? regs
[r2
] : 
5623                                     dtrace_strsize_default
) + 1; 
5625                                 if (regs
[r2
] > LONG_MAX
) { 
5626                                         *flags 
|= CPU_DTRACE_ILLOP
; 
5629                                 tupregs
[ttop
].dttk_size 
= regs
[r2
]; 
5632                         tupregs
[ttop
++].dttk_value 
= regs
[rd
]; 
5636                         if (ttop 
== DIF_DTR_NREGS
) { 
5637                                 *flags 
|= CPU_DTRACE_TUPOFLOW
; 
5641                         tupregs
[ttop
].dttk_value 
= regs
[rd
]; 
5642                         tupregs
[ttop
++].dttk_size 
= 0; 
5650                 case DIF_OP_FLUSHTS
: 
5655                 case DIF_OP_LDTAA
: { 
5656                         dtrace_dynvar_t 
*dvar
; 
5657                         dtrace_key_t 
*key 
= tupregs
; 
5658                         uint_t nkeys 
= ttop
; 
5660                         id 
= DIF_INSTR_VAR(instr
); 
5661                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
5662                         id 
-= DIF_VAR_OTHER_UBASE
; 
5664                         key
[nkeys
].dttk_value 
= (uint64_t)id
; 
5665                         key
[nkeys
++].dttk_size 
= 0; 
5667                         if (DIF_INSTR_OP(instr
) == DIF_OP_LDTAA
) { 
5668                                 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
); 
5669                                 key
[nkeys
++].dttk_size 
= 0; 
5670                                 VERIFY(id 
< (uint_t
)vstate
->dtvs_ntlocals
); 
5671                                 v 
= &vstate
->dtvs_tlocals
[id
]; 
5673                                 VERIFY(id 
< (uint_t
)vstate
->dtvs_nglobals
); 
5674                                 v 
= &vstate
->dtvs_globals
[id
]->dtsv_var
; 
5677                         dvar 
= dtrace_dynvar(dstate
, nkeys
, key
, 
5678                             v
->dtdv_type
.dtdt_size 
> sizeof (uint64_t) ? 
5679                             v
->dtdv_type
.dtdt_size 
: sizeof (uint64_t), 
5680                             DTRACE_DYNVAR_NOALLOC
, mstate
, vstate
); 
5687                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5688                                 regs
[rd
] = (uint64_t)(uintptr_t)dvar
->dtdv_data
; 
5690                                 regs
[rd
] = *((uint64_t *)dvar
->dtdv_data
); 
5697                 case DIF_OP_STTAA
: { 
5698                         dtrace_dynvar_t 
*dvar
; 
5699                         dtrace_key_t 
*key 
= tupregs
; 
5700                         uint_t nkeys 
= ttop
; 
5702                         id 
= DIF_INSTR_VAR(instr
); 
5703                         ASSERT(id 
>= DIF_VAR_OTHER_UBASE
); 
5704                         id 
-= DIF_VAR_OTHER_UBASE
; 
5706                         key
[nkeys
].dttk_value 
= (uint64_t)id
; 
5707                         key
[nkeys
++].dttk_size 
= 0; 
5709                         if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) { 
5710                                 DTRACE_TLS_THRKEY(key
[nkeys
].dttk_value
); 
5711                                 key
[nkeys
++].dttk_size 
= 0; 
5712                                 VERIFY(id 
< (uint_t
)vstate
->dtvs_ntlocals
); 
5713                                 v 
= &vstate
->dtvs_tlocals
[id
]; 
5715                                 VERIFY(id 
< (uint_t
)vstate
->dtvs_nglobals
); 
5716                                 v 
= &vstate
->dtvs_globals
[id
]->dtsv_var
; 
5719                         dvar 
= dtrace_dynvar(dstate
, nkeys
, key
, 
5720                             v
->dtdv_type
.dtdt_size 
> sizeof (uint64_t) ? 
5721                             v
->dtdv_type
.dtdt_size 
: sizeof (uint64_t), 
5722                             regs
[rd
] ? DTRACE_DYNVAR_ALLOC 
: 
5723                             DTRACE_DYNVAR_DEALLOC
, mstate
, vstate
); 
5728                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) { 
5731                                 if (!dtrace_vcanload( 
5732                                     (void *)(uintptr_t)regs
[rd
], &v
->dtdv_type
, 
5733                                     &lim
, mstate
, vstate
)) 
5736                                 dtrace_vcopy((void *)(uintptr_t)regs
[rd
], 
5737                                     dvar
->dtdv_data
, &v
->dtdv_type
, lim
); 
5739                                 *((uint64_t *)dvar
->dtdv_data
) = regs
[rd
]; 
5745                 case DIF_OP_ALLOCS
: { 
5746                         uintptr_t ptr 
= P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8); 
5747                         size_t size 
= ptr 
- mstate
->dtms_scratch_ptr 
+ regs
[r1
]; 
5750                          * Rounding up the user allocation size could have 
5751                          * overflowed large, bogus allocations (like -1ULL) to 
5754                         if (size 
< regs
[r1
] || 
5755                             !DTRACE_INSCRATCH(mstate
, size
)) { 
5756                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
5761                         dtrace_bzero((void *) mstate
->dtms_scratch_ptr
, size
); 
5762                                 mstate
->dtms_scratch_ptr 
+= size
; 
5768                         if (!dtrace_canstore(regs
[rd
], regs
[r2
], 
5770                                 *flags 
|= CPU_DTRACE_BADADDR
; 
5775                         if (!dtrace_canload(regs
[r1
], regs
[r2
], mstate
, vstate
)) 
5778                         dtrace_bcopy((void *)(uintptr_t)regs
[r1
], 
5779                             (void *)(uintptr_t)regs
[rd
], (size_t)regs
[r2
]); 
5783                         if (!dtrace_canstore(regs
[rd
], 1, mstate
, vstate
)) { 
5784                                 *flags 
|= CPU_DTRACE_BADADDR
; 
5788                         *((uint8_t *)(uintptr_t)regs
[rd
]) = (uint8_t)regs
[r1
]; 
5792                         if (!dtrace_canstore(regs
[rd
], 2, mstate
, vstate
)) { 
5793                                 *flags 
|= CPU_DTRACE_BADADDR
; 
5798                                 *flags 
|= CPU_DTRACE_BADALIGN
; 
5802                         *((uint16_t *)(uintptr_t)regs
[rd
]) = (uint16_t)regs
[r1
]; 
5806                         if (!dtrace_canstore(regs
[rd
], 4, mstate
, vstate
)) { 
5807                                 *flags 
|= CPU_DTRACE_BADADDR
; 
5812                                 *flags 
|= CPU_DTRACE_BADALIGN
; 
5816                         *((uint32_t *)(uintptr_t)regs
[rd
]) = (uint32_t)regs
[r1
]; 
5820                         if (!dtrace_canstore(regs
[rd
], 8, mstate
, vstate
)) { 
5821                                 *flags 
|= CPU_DTRACE_BADADDR
; 
5827                         * Darwin kmem_zalloc() called from 
5828                         * dtrace_difo_init() is 4-byte aligned. 
5831                                 *flags 
|= CPU_DTRACE_BADALIGN
; 
5835                         *((uint64_t *)(uintptr_t)regs
[rd
]) = regs
[r1
]; 
5840         if (!(*flags 
& CPU_DTRACE_FAULT
)) 
5843         mstate
->dtms_fltoffs 
= opc 
* sizeof (dif_instr_t
); 
5844         mstate
->dtms_present 
|= DTRACE_MSTATE_FLTOFFS
; 
5850 dtrace_action_breakpoint(dtrace_ecb_t 
*ecb
) 
5852         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
5853         dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
5854         char c
[DTRACE_FULLNAMELEN 
+ 80], *str
; 
5855         const char *msg 
= "dtrace: breakpoint action at probe "; 
5856         const char *ecbmsg 
= " (ecb "; 
5857         uintptr_t mask 
= (0xf << (sizeof (uintptr_t) * NBBY 
/ 4)); 
5858         uintptr_t val 
= (uintptr_t)ecb
; 
5859         int shift 
= (sizeof (uintptr_t) * NBBY
) - 4, i 
= 0; 
5861         if (dtrace_destructive_disallow
) 
5865          * It's impossible to be taking action on the NULL probe. 
5867         ASSERT(probe 
!= NULL
); 
5870          * This is a poor man's (destitute man's?) sprintf():  we want to 
5871          * print the provider name, module name, function name and name of 
5872          * the probe, along with the hex address of the ECB with the breakpoint 
5873          * action -- all of which we must place in the character buffer by 
5876         while (*msg 
!= '\0') 
5879         for (str 
= prov
->dtpv_name
; *str 
!= '\0'; str
++) 
5883         for (str 
= probe
->dtpr_mod
; *str 
!= '\0'; str
++) 
5887         for (str 
= probe
->dtpr_func
; *str 
!= '\0'; str
++) 
5891         for (str 
= probe
->dtpr_name
; *str 
!= '\0'; str
++) 
5894         while (*ecbmsg 
!= '\0') 
5897         while (shift 
>= 0) { 
5898                 mask 
= (uintptr_t)0xf << shift
; 
5900                 if (val 
>= ((uintptr_t)1 << shift
)) 
5901                         c
[i
++] = "0123456789abcdef"[(val 
& mask
) >> shift
]; 
5912 dtrace_action_panic(dtrace_ecb_t 
*ecb
) 
5914         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
5917          * It's impossible to be taking action on the NULL probe. 
5919         ASSERT(probe 
!= NULL
); 
5921         if (dtrace_destructive_disallow
) 
5924         if (dtrace_panicked 
!= NULL
) 
5927         if (dtrace_casptr(&dtrace_panicked
, NULL
, current_thread()) != NULL
) 
5931          * We won the right to panic.  (We want to be sure that only one 
5932          * thread calls panic() from dtrace_probe(), and that panic() is 
5933          * called exactly once.) 
5935         panic("dtrace: panic action at probe %s:%s:%s:%s (ecb %p)", 
5936             probe
->dtpr_provider
->dtpv_name
, probe
->dtpr_mod
, 
5937             probe
->dtpr_func
, probe
->dtpr_name
, (void *)ecb
); 
5940          * APPLE NOTE: this was for an old Mac OS X debug feature 
5941          * allowing a return from panic().  Revisit someday. 
5943         dtrace_panicked 
= NULL
; 
5947 dtrace_action_raise(uint64_t sig
) 
5949         if (dtrace_destructive_disallow
) 
5953                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
); 
5958          * raise() has a queue depth of 1 -- we ignore all subsequent 
5959          * invocations of the raise() action. 
5962         uthread_t uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
5964         if (uthread 
&& uthread
->t_dtrace_sig 
== 0) { 
5965                 uthread
->t_dtrace_sig 
= sig
; 
5966                 act_set_astbsd(current_thread()); 
5971 dtrace_action_stop(void) 
5973         if (dtrace_destructive_disallow
) 
5976         uthread_t uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
5979                  * The currently running process will be set to task_suspend 
5980                  * when it next leaves the kernel. 
5982                 uthread
->t_dtrace_stop 
= 1; 
5983                 act_set_astbsd(current_thread()); 
5989  * APPLE NOTE: pidresume works in conjunction with the dtrace stop action. 
5990  * Both activate only when the currently running process next leaves the 
5994 dtrace_action_pidresume(uint64_t pid
) 
5996         if (dtrace_destructive_disallow
) 
5999         if (kauth_cred_issuser(kauth_cred_get()) == 0) { 
6000                 DTRACE_CPUFLAG_SET(CPU_DTRACE_ILLOP
);            
6003         uthread_t uthread 
= (uthread_t
)get_bsdthread_info(current_thread()); 
6006          * When the currently running process leaves the kernel, it attempts to 
6007          * task_resume the process (denoted by pid), if that pid appears to have 
6008          * been stopped by dtrace_action_stop(). 
6009          * The currently running process has a pidresume() queue depth of 1 -- 
6010          * subsequent invocations of the pidresume() action are ignored. 
6013         if (pid 
!= 0 && uthread 
&& uthread
->t_dtrace_resumepid 
== 0) { 
6014                 uthread
->t_dtrace_resumepid 
= pid
; 
6015                 act_set_astbsd(current_thread()); 
6020 dtrace_action_chill(dtrace_mstate_t 
*mstate
, hrtime_t val
) 
6023         volatile uint16_t *flags
; 
6024         dtrace_cpu_t 
*cpu 
= CPU
; 
6026         if (dtrace_destructive_disallow
) 
6029         flags 
= (volatile uint16_t *)&cpu_core
[cpu
->cpu_id
].cpuc_dtrace_flags
; 
6031         now 
= dtrace_gethrtime(); 
6033         if (now 
- cpu
->cpu_dtrace_chillmark 
> dtrace_chill_interval
) { 
6035                  * We need to advance the mark to the current time. 
6037                 cpu
->cpu_dtrace_chillmark 
= now
; 
6038                 cpu
->cpu_dtrace_chilled 
= 0; 
6042          * Now check to see if the requested chill time would take us over 
6043          * the maximum amount of time allowed in the chill interval.  (Or 
6044          * worse, if the calculation itself induces overflow.) 
6046         if (cpu
->cpu_dtrace_chilled 
+ val 
> dtrace_chill_max 
|| 
6047             cpu
->cpu_dtrace_chilled 
+ val 
< cpu
->cpu_dtrace_chilled
) { 
6048                 *flags 
|= CPU_DTRACE_ILLOP
; 
6052         while (dtrace_gethrtime() - now 
< val
) 
6056          * Normally, we assure that the value of the variable "timestamp" does 
6057          * not change within an ECB.  The presence of chill() represents an 
6058          * exception to this rule, however. 
6060         mstate
->dtms_present 
&= ~DTRACE_MSTATE_TIMESTAMP
; 
6061         cpu
->cpu_dtrace_chilled 
+= val
; 
6065 dtrace_action_ustack(dtrace_mstate_t 
*mstate
, dtrace_state_t 
*state
, 
6066     uint64_t *buf
, uint64_t arg
) 
6068         int nframes 
= DTRACE_USTACK_NFRAMES(arg
); 
6069         int strsize 
= DTRACE_USTACK_STRSIZE(arg
); 
6070         uint64_t *pcs 
= &buf
[1], *fps
; 
6071         char *str 
= (char *)&pcs
[nframes
]; 
6072         int size
, offs 
= 0, i
, j
; 
6073         uintptr_t old 
= mstate
->dtms_scratch_ptr
, saved
; 
6074         uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
6078          * Should be taking a faster path if string space has not been 
6081         ASSERT(strsize 
!= 0); 
6084          * We will first allocate some temporary space for the frame pointers. 
6086         fps 
= (uint64_t *)P2ROUNDUP(mstate
->dtms_scratch_ptr
, 8); 
6087         size 
= (uintptr_t)fps 
- mstate
->dtms_scratch_ptr 
+ 
6088             (nframes 
* sizeof (uint64_t)); 
6090         if (!DTRACE_INSCRATCH(mstate
, (uintptr_t)size
)) { 
6092                  * Not enough room for our frame pointers -- need to indicate 
6093                  * that we ran out of scratch space. 
6095                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOSCRATCH
); 
6099         mstate
->dtms_scratch_ptr 
+= size
; 
6100         saved 
= mstate
->dtms_scratch_ptr
; 
6103          * Now get a stack with both program counters and frame pointers. 
6105         DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
6106         dtrace_getufpstack(buf
, fps
, nframes 
+ 1); 
6107         DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
6110          * If that faulted, we're cooked. 
6112         if (*flags 
& CPU_DTRACE_FAULT
) 
6116          * Now we want to walk up the stack, calling the USTACK helper.  For 
6117          * each iteration, we restore the scratch pointer. 
6119         for (i 
= 0; i 
< nframes
; i
++) { 
6120                 mstate
->dtms_scratch_ptr 
= saved
; 
6122                 if (offs 
>= strsize
) 
6125                 sym 
= (char *)(uintptr_t)dtrace_helper( 
6126                     DTRACE_HELPER_ACTION_USTACK
, 
6127                     mstate
, state
, pcs
[i
], fps
[i
]); 
6130                  * If we faulted while running the helper, we're going to 
6131                  * clear the fault and null out the corresponding string. 
6133                 if (*flags 
& CPU_DTRACE_FAULT
) { 
6134                         *flags 
&= ~CPU_DTRACE_FAULT
; 
6144                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
6147                  * Now copy in the string that the helper returned to us. 
6149                 for (j 
= 0; offs 
+ j 
< strsize
; j
++) { 
6150                         if ((str
[offs 
+ j
] = sym
[j
]) == '\0') 
6154                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
6159         if (offs 
>= strsize
) { 
6161                  * If we didn't have room for all of the strings, we don't 
6162                  * abort processing -- this needn't be a fatal error -- but we 
6163                  * still want to increment a counter (dts_stkstroverflows) to 
6164                  * allow this condition to be warned about.  (If this is from 
6165                  * a jstack() action, it is easily tuned via jstackstrsize.) 
6167                 dtrace_error(&state
->dts_stkstroverflows
); 
6170         while (offs 
< strsize
) 
6174         mstate
->dtms_scratch_ptr 
= old
; 
6178 dtrace_store_by_ref(dtrace_difo_t 
*dp
, caddr_t tomax
, size_t size
, 
6179     size_t *valoffsp
, uint64_t *valp
, uint64_t end
, int intuple
, int dtkind
) 
6181         volatile uint16_t *flags
; 
6182         uint64_t val 
= *valp
; 
6183         size_t valoffs 
= *valoffsp
; 
6185         flags 
= (volatile uint16_t *)&cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
6186         ASSERT(dtkind 
== DIF_TF_BYREF 
|| dtkind 
== DIF_TF_BYUREF
); 
6189          * If this is a string, we're going to only load until we find the zero 
6190          * byte -- after which we'll store zero bytes. 
6192         if (dp
->dtdo_rtype
.dtdt_kind 
== DIF_TYPE_STRING
) { 
6196                 for (s 
= 0; s 
< size
; s
++) { 
6197                         if (c 
!= '\0' && dtkind 
== DIF_TF_BYREF
) { 
6198                                 c 
= dtrace_load8(val
++); 
6199                         } else if (c 
!= '\0' && dtkind 
== DIF_TF_BYUREF
) { 
6200                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
6201                                 c 
= dtrace_fuword8((user_addr_t
)(uintptr_t)val
++); 
6202                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
6203                                 if (*flags 
& CPU_DTRACE_FAULT
) 
6207                         DTRACE_STORE(uint8_t, tomax
, valoffs
++, c
); 
6209                         if (c 
== '\0' && intuple
) 
6214                 while (valoffs 
< end
) { 
6215                         if (dtkind 
== DIF_TF_BYREF
) { 
6216                                 c 
= dtrace_load8(val
++); 
6217                         } else if (dtkind 
== DIF_TF_BYUREF
) { 
6218                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
6219                                 c 
= dtrace_fuword8((user_addr_t
)(uintptr_t)val
++); 
6220                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
6221                                 if (*flags 
& CPU_DTRACE_FAULT
) 
6225                         DTRACE_STORE(uint8_t, tomax
, 
6231         *valoffsp 
= valoffs
; 
6235  * If you're looking for the epicenter of DTrace, you just found it.  This 
6236  * is the function called by the provider to fire a probe -- from which all 
6237  * subsequent probe-context DTrace activity emanates. 
6240 __dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
, 
6241     uint64_t arg2
, uint64_t arg3
, uint64_t arg4
) 
6243         processorid_t cpuid
; 
6244         dtrace_icookie_t cookie
; 
6245         dtrace_probe_t 
*probe
; 
6246         dtrace_mstate_t mstate
; 
6248         dtrace_action_t 
*act
; 
6252         volatile uint16_t *flags
; 
6255         cookie 
= dtrace_interrupt_disable(); 
6256         probe 
= dtrace_probes
[id 
- 1]; 
6257         cpuid 
= CPU
->cpu_id
; 
6258         onintr 
= CPU_ON_INTR(CPU
); 
6260         if (!onintr 
&& probe
->dtpr_predcache 
!= DTRACE_CACHEIDNONE 
&& 
6261             probe
->dtpr_predcache 
== dtrace_get_thread_predcache(current_thread())) { 
6263                  * We have hit in the predicate cache; we know that 
6264                  * this predicate would evaluate to be false. 
6266                 dtrace_interrupt_enable(cookie
); 
6270         if (panic_quiesce
) { 
6272                  * We don't trace anything if we're panicking. 
6274                 dtrace_interrupt_enable(cookie
); 
6278 #if !defined(__APPLE__) 
6279         now 
= dtrace_gethrtime(); 
6280         vtime 
= dtrace_vtime_references 
!= 0; 
6282         if (vtime 
&& curthread
->t_dtrace_start
) 
6283                 curthread
->t_dtrace_vtime 
+= now 
- curthread
->t_dtrace_start
; 
6286          * APPLE NOTE:  The time spent entering DTrace and arriving 
6287          * to this point, is attributed to the current thread. 
6288          * Instead it should accrue to DTrace.  FIXME 
6290         vtime 
= dtrace_vtime_references 
!= 0; 
6294                 int64_t dtrace_accum_time
, recent_vtime
; 
6295                 thread_t thread 
= current_thread(); 
6297                 dtrace_accum_time 
= dtrace_get_thread_tracing(thread
); /* Time spent inside DTrace so far (nanoseconds) */ 
6299                 if (dtrace_accum_time 
>= 0) { 
6300                         recent_vtime 
= dtrace_abs_to_nano(dtrace_calc_thread_recent_vtime(thread
)); /* up to the moment thread vtime */ 
6302                         recent_vtime 
= recent_vtime 
- dtrace_accum_time
; /* Time without DTrace contribution */ 
6304                         dtrace_set_thread_vtime(thread
, recent_vtime
); 
6308         now 
= dtrace_gethrtime(); /* must not precede dtrace_calc_thread_recent_vtime() call! */ 
6309 #endif /* __APPLE__ */ 
6312          * APPLE NOTE: A provider may call dtrace_probe_error() in lieu of 
6313          * dtrace_probe() in some circumstances.   See, e.g. fasttrap_isa.c. 
6314          * However the provider has no access to ECB context, so passes 
6315          * 0 through "arg0" and the probe_id of the overridden probe as arg1. 
6316          * Detect that here and cons up a viable state (from the probe_id). 
6318         if (dtrace_probeid_error 
== id 
&& 0 == arg0
) { 
6319                 dtrace_id_t ftp_id 
= (dtrace_id_t
)arg1
; 
6320                 dtrace_probe_t 
*ftp_probe 
= dtrace_probes
[ftp_id 
- 1]; 
6321                 dtrace_ecb_t 
*ftp_ecb 
= ftp_probe
->dtpr_ecb
; 
6323                 if (NULL 
!= ftp_ecb
) { 
6324                         dtrace_state_t 
*ftp_state 
= ftp_ecb
->dte_state
; 
6326                         arg0 
= (uint64_t)(uintptr_t)ftp_state
; 
6327                         arg1 
= ftp_ecb
->dte_epid
; 
6329                          * args[2-4] established by caller. 
6331                         ftp_state
->dts_arg_error_illval 
= -1; /* arg5 */ 
6335         mstate
.dtms_difo 
= NULL
; 
6336         mstate
.dtms_probe 
= probe
; 
6337         mstate
.dtms_strtok 
= 0; 
6338         mstate
.dtms_arg
[0] = arg0
; 
6339         mstate
.dtms_arg
[1] = arg1
; 
6340         mstate
.dtms_arg
[2] = arg2
; 
6341         mstate
.dtms_arg
[3] = arg3
; 
6342         mstate
.dtms_arg
[4] = arg4
; 
6344         flags 
= (volatile uint16_t *)&cpu_core
[cpuid
].cpuc_dtrace_flags
; 
6346         for (ecb 
= probe
->dtpr_ecb
; ecb 
!= NULL
; ecb 
= ecb
->dte_next
) { 
6347                 dtrace_predicate_t 
*pred 
= ecb
->dte_predicate
; 
6348                 dtrace_state_t 
*state 
= ecb
->dte_state
; 
6349                 dtrace_buffer_t 
*buf 
= &state
->dts_buffer
[cpuid
]; 
6350                 dtrace_buffer_t 
*aggbuf 
= &state
->dts_aggbuffer
[cpuid
]; 
6351                 dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
6352                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
6353                 uint64_t tracememsize 
= 0; 
6358                  * A little subtlety with the following (seemingly innocuous) 
6359                  * declaration of the automatic 'val':  by looking at the 
6360                  * code, you might think that it could be declared in the 
6361                  * action processing loop, below.  (That is, it's only used in 
6362                  * the action processing loop.)  However, it must be declared 
6363                  * out of that scope because in the case of DIF expression 
6364                  * arguments to aggregating actions, one iteration of the 
6365                  * action loop will use the last iteration's value. 
6373                 mstate
.dtms_present 
= DTRACE_MSTATE_ARGS 
| DTRACE_MSTATE_PROBE
; 
6374                 *flags 
&= ~CPU_DTRACE_ERROR
; 
6376                 if (prov 
== dtrace_provider
) { 
6378                          * If dtrace itself is the provider of this probe, 
6379                          * we're only going to continue processing the ECB if 
6380                          * arg0 (the dtrace_state_t) is equal to the ECB's 
6381                          * creating state.  (This prevents disjoint consumers 
6382                          * from seeing one another's metaprobes.) 
6384                         if (arg0 
!= (uint64_t)(uintptr_t)state
) 
6388                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE
) { 
6390                          * We're not currently active.  If our provider isn't 
6391                          * the dtrace pseudo provider, we're not interested. 
6393                         if (prov 
!= dtrace_provider
) 
6397                          * Now we must further check if we are in the BEGIN 
6398                          * probe.  If we are, we will only continue processing 
6399                          * if we're still in WARMUP -- if one BEGIN enabling 
6400                          * has invoked the exit() action, we don't want to 
6401                          * evaluate subsequent BEGIN enablings. 
6403                         if (probe
->dtpr_id 
== dtrace_probeid_begin 
&& 
6404                             state
->dts_activity 
!= DTRACE_ACTIVITY_WARMUP
) { 
6405                                 ASSERT(state
->dts_activity 
== 
6406                                     DTRACE_ACTIVITY_DRAINING
); 
6411                 if (ecb
->dte_cond
) { 
6413                          * If the dte_cond bits indicate that this 
6414                          * consumer is only allowed to see user-mode firings 
6415                          * of this probe, call the provider's dtps_usermode() 
6416                          * entry point to check that the probe was fired 
6417                          * while in a user context. Skip this ECB if that's 
6420                         if ((ecb
->dte_cond 
& DTRACE_COND_USERMODE
) && 
6421                             prov
->dtpv_pops
.dtps_usermode 
&& 
6422                             prov
->dtpv_pops
.dtps_usermode(prov
->dtpv_arg
, 
6423                             probe
->dtpr_id
, probe
->dtpr_arg
) == 0) 
6427                          * This is more subtle than it looks. We have to be 
6428                          * absolutely certain that CRED() isn't going to 
6429                          * change out from under us so it's only legit to 
6430                          * examine that structure if we're in constrained 
6431                          * situations. Currently, the only times we'll this 
6432                          * check is if a non-super-user has enabled the 
6433                          * profile or syscall providers -- providers that 
6434                          * allow visibility of all processes. For the 
6435                          * profile case, the check above will ensure that 
6436                          * we're examining a user context. 
6438                         if (ecb
->dte_cond 
& DTRACE_COND_OWNER
) { 
6441                                     ecb
->dte_state
->dts_cred
.dcr_cred
; 
6443 #pragma unused(proc) /* __APPLE__ */ 
6445                                 ASSERT(s_cr 
!= NULL
); 
6448                          * XXX this is hackish, but so is setting a variable 
6449                          * XXX in a McCarthy OR... 
6451                                 if ((cr 
= dtrace_CRED()) == NULL 
|| 
6452                                     posix_cred_get(s_cr
)->cr_uid 
!= posix_cred_get(cr
)->cr_uid 
|| 
6453                                     posix_cred_get(s_cr
)->cr_uid 
!= posix_cred_get(cr
)->cr_ruid 
|| 
6454                                     posix_cred_get(s_cr
)->cr_uid 
!= posix_cred_get(cr
)->cr_suid 
|| 
6455                                     posix_cred_get(s_cr
)->cr_gid 
!= posix_cred_get(cr
)->cr_gid 
|| 
6456                                     posix_cred_get(s_cr
)->cr_gid 
!= posix_cred_get(cr
)->cr_rgid 
|| 
6457                                     posix_cred_get(s_cr
)->cr_gid 
!= posix_cred_get(cr
)->cr_sgid 
|| 
6458 #if !defined(__APPLE__) 
6459                                     (proc 
= ttoproc(curthread
)) == NULL 
|| 
6460                                     (proc
->p_flag 
& SNOCD
)) 
6462                                         1) /* APPLE NOTE: Darwin omits "No Core Dump" flag */ 
6463 #endif /* __APPLE__ */ 
6467                         if (ecb
->dte_cond 
& DTRACE_COND_ZONEOWNER
) { 
6470                                     ecb
->dte_state
->dts_cred
.dcr_cred
; 
6471 #pragma unused(cr, s_cr) /* __APPLE__ */ 
6473                                 ASSERT(s_cr 
!= NULL
); 
6475 #if !defined(__APPLE__) 
6476                                 if ((cr 
= CRED()) == NULL 
|| 
6477                                     s_cr
->cr_zone
->zone_id 
!= 
6478                                     cr
->cr_zone
->zone_id
) 
6481                                 /* APPLE NOTE: Darwin doesn't do zones. */ 
6482 #endif /* __APPLE__ */ 
6486                 if (now 
- state
->dts_alive 
> dtrace_deadman_timeout
) { 
6488                          * We seem to be dead.  Unless we (a) have kernel 
6489                          * destructive permissions (b) have expicitly enabled 
6490                          * destructive actions and (c) destructive actions have 
6491                          * not been disabled, we're going to transition into 
6492                          * the KILLED state, from which no further processing 
6493                          * on this state will be performed. 
6495                         if (!dtrace_priv_kernel_destructive(state
) || 
6496                             !state
->dts_cred
.dcr_destructive 
|| 
6497                             dtrace_destructive_disallow
) { 
6498                                 void *activity 
= &state
->dts_activity
; 
6499                                 dtrace_activity_t current
; 
6502                                         current 
= state
->dts_activity
; 
6503                                 } while (dtrace_cas32(activity
, current
, 
6504                                     DTRACE_ACTIVITY_KILLED
) != current
); 
6510                 if ((offs 
= dtrace_buffer_reserve(buf
, ecb
->dte_needed
, 
6511                     ecb
->dte_alignment
, state
, &mstate
)) < 0) 
6514                 tomax 
= buf
->dtb_tomax
; 
6515                 ASSERT(tomax 
!= NULL
); 
6518                  * Build and store the record header corresponding to the ECB. 
6520                 if (ecb
->dte_size 
!= 0) { 
6521                         dtrace_rechdr_t dtrh
; 
6523                         if (!(mstate
.dtms_present 
& DTRACE_MSTATE_TIMESTAMP
)) { 
6524                                 mstate
.dtms_timestamp 
= dtrace_gethrtime(); 
6525                                 mstate
.dtms_present 
|= DTRACE_MSTATE_TIMESTAMP
; 
6528                         ASSERT(ecb
->dte_size 
>= sizeof(dtrace_rechdr_t
)); 
6530                         dtrh
.dtrh_epid 
= ecb
->dte_epid
; 
6531                         DTRACE_RECORD_STORE_TIMESTAMP(&dtrh
, mstate
.dtms_timestamp
); 
6532                         DTRACE_STORE(dtrace_rechdr_t
, tomax
, offs
, dtrh
); 
6535                 mstate
.dtms_epid 
= ecb
->dte_epid
; 
6536                 mstate
.dtms_present 
|= DTRACE_MSTATE_EPID
; 
6538                 if (state
->dts_cred
.dcr_visible 
& DTRACE_CRV_KERNEL
) 
6539                         mstate
.dtms_access 
= DTRACE_ACCESS_KERNEL
; 
6541                         mstate
.dtms_access 
= 0; 
6544                         dtrace_difo_t 
*dp 
= pred
->dtp_difo
; 
6547                         rval 
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
); 
6549                         if (!(*flags 
& CPU_DTRACE_ERROR
) && !rval
) { 
6550                                 dtrace_cacheid_t cid 
= probe
->dtpr_predcache
; 
6552                                 if (cid 
!= DTRACE_CACHEIDNONE 
&& !onintr
) { 
6554                                          * Update the predicate cache... 
6556                                         ASSERT(cid 
== pred
->dtp_cacheid
); 
6558                                         dtrace_set_thread_predcache(current_thread(), cid
); 
6565                 for (act 
= ecb
->dte_action
; !(*flags 
& CPU_DTRACE_ERROR
) && 
6566                     act 
!= NULL
; act 
= act
->dta_next
) { 
6569                         dtrace_recdesc_t 
*rec 
= &act
->dta_rec
; 
6571                         size 
= rec
->dtrd_size
; 
6572                         valoffs 
= offs 
+ rec
->dtrd_offset
; 
6574                         if (DTRACEACT_ISAGG(act
->dta_kind
)) { 
6576                                 dtrace_aggregation_t 
*agg
; 
6578                                 agg 
= (dtrace_aggregation_t 
*)act
; 
6580                                 if ((dp 
= act
->dta_difo
) != NULL
) 
6581                                         v 
= dtrace_dif_emulate(dp
, 
6582                                             &mstate
, vstate
, state
); 
6584                                 if (*flags 
& CPU_DTRACE_ERROR
) 
6588                                  * Note that we always pass the expression 
6589                                  * value from the previous iteration of the 
6590                                  * action loop.  This value will only be used 
6591                                  * if there is an expression argument to the 
6592                                  * aggregating action, denoted by the 
6593                                  * dtag_hasarg field. 
6595                                 dtrace_aggregate(agg
, buf
, 
6596                                     offs
, aggbuf
, v
, val
); 
6600                         switch (act
->dta_kind
) { 
6601                         case DTRACEACT_STOP
: 
6602                                 if (dtrace_priv_proc_destructive(state
)) 
6603                                         dtrace_action_stop(); 
6606                         case DTRACEACT_BREAKPOINT
: 
6607                                 if (dtrace_priv_kernel_destructive(state
)) 
6608                                         dtrace_action_breakpoint(ecb
); 
6611                         case DTRACEACT_PANIC
: 
6612                                 if (dtrace_priv_kernel_destructive(state
)) 
6613                                         dtrace_action_panic(ecb
); 
6616                         case DTRACEACT_STACK
: 
6617                                 if (!dtrace_priv_kernel(state
)) 
6620                                 dtrace_getpcstack((pc_t 
*)(tomax 
+ valoffs
), 
6621                                     size 
/ sizeof (pc_t
), probe
->dtpr_aframes
, 
6622                                     DTRACE_ANCHORED(probe
) ? NULL 
: 
6623                                   (uint32_t *)(uintptr_t)arg0
); 
6626                         case DTRACEACT_JSTACK
: 
6627                         case DTRACEACT_USTACK
: 
6628                                 if (!dtrace_priv_proc(state
)) 
6632                                  * See comment in DIF_VAR_PID. 
6634                                 if (DTRACE_ANCHORED(mstate
.dtms_probe
) && 
6636                                         int depth 
= DTRACE_USTACK_NFRAMES( 
6639                                         dtrace_bzero((void *)(tomax 
+ valoffs
), 
6640                                             DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) 
6641                                             + depth 
* sizeof (uint64_t)); 
6646                                 if (DTRACE_USTACK_STRSIZE(rec
->dtrd_arg
) != 0 && 
6647                                     curproc
->p_dtrace_helpers 
!= NULL
) { 
6649                                          * This is the slow path -- we have 
6650                                          * allocated string space, and we're 
6651                                          * getting the stack of a process that 
6652                                          * has helpers.  Call into a separate 
6653                                          * routine to perform this processing. 
6655                                         dtrace_action_ustack(&mstate
, state
, 
6656                                             (uint64_t *)(tomax 
+ valoffs
), 
6661                                 DTRACE_CPUFLAG_SET(CPU_DTRACE_NOFAULT
); 
6662                                 dtrace_getupcstack((uint64_t *) 
6664                                     DTRACE_USTACK_NFRAMES(rec
->dtrd_arg
) + 1); 
6665                                 DTRACE_CPUFLAG_CLEAR(CPU_DTRACE_NOFAULT
); 
6675                         val 
= dtrace_dif_emulate(dp
, &mstate
, vstate
, state
); 
6677                         if (*flags 
& CPU_DTRACE_ERROR
) 
6680                         switch (act
->dta_kind
) { 
6681                         case DTRACEACT_SPECULATE
: { 
6682                                 dtrace_rechdr_t 
*dtrh 
= NULL
; 
6684                                 ASSERT(buf 
== &state
->dts_buffer
[cpuid
]); 
6685                                 buf 
= dtrace_speculation_buffer(state
, 
6689                                         *flags 
|= CPU_DTRACE_DROP
; 
6693                                 offs 
= dtrace_buffer_reserve(buf
, 
6694                                     ecb
->dte_needed
, ecb
->dte_alignment
, 
6698                                         *flags 
|= CPU_DTRACE_DROP
; 
6702                                 tomax 
= buf
->dtb_tomax
; 
6703                                 ASSERT(tomax 
!= NULL
); 
6705                                 if (ecb
->dte_size 
== 0) 
6708                                 ASSERT(ecb
->dte_size 
>= sizeof(dtrace_rechdr_t
)); 
6709                                 dtrh 
= ((void *)(tomax 
+ offs
)); 
6710                                 dtrh
->dtrh_epid 
= ecb
->dte_epid
; 
6713                                  * When the speculation is committed, all of 
6714                                  * the records in the speculative buffer will 
6715                                  * have their timestamps set to the commit 
6716                                  * time.  Until then, it is set to a sentinel 
6717                                  * value, for debugability. 
6719                                 DTRACE_RECORD_STORE_TIMESTAMP(dtrh
, UINT64_MAX
); 
6724                         case DTRACEACT_CHILL
: 
6725                                 if (dtrace_priv_kernel_destructive(state
)) 
6726                                         dtrace_action_chill(&mstate
, val
); 
6729                         case DTRACEACT_RAISE
: 
6730                                 if (dtrace_priv_proc_destructive(state
)) 
6731                                         dtrace_action_raise(val
); 
6734                         case DTRACEACT_PIDRESUME
:   /* __APPLE__ */ 
6735                                 if (dtrace_priv_proc_destructive(state
)) 
6736                                         dtrace_action_pidresume(val
); 
6739                         case DTRACEACT_COMMIT
: 
6743                                  * We need to commit our buffer state. 
6746                                         buf
->dtb_offset 
= offs 
+ ecb
->dte_size
; 
6747                                 buf 
= &state
->dts_buffer
[cpuid
]; 
6748                                 dtrace_speculation_commit(state
, cpuid
, val
); 
6752                         case DTRACEACT_DISCARD
: 
6753                                 dtrace_speculation_discard(state
, cpuid
, val
); 
6756                         case DTRACEACT_DIFEXPR
: 
6757                         case DTRACEACT_LIBACT
: 
6758                         case DTRACEACT_PRINTF
: 
6759                         case DTRACEACT_PRINTA
: 
6760                         case DTRACEACT_SYSTEM
: 
6761                         case DTRACEACT_FREOPEN
: 
6762                         case DTRACEACT_APPLEBINARY
:   /* __APPLE__ */ 
6763                         case DTRACEACT_TRACEMEM
: 
6766                         case DTRACEACT_TRACEMEM_DYNSIZE
: 
6772                                 if (!dtrace_priv_kernel(state
)) 
6776                         case DTRACEACT_USYM
: 
6777                         case DTRACEACT_UMOD
: 
6778                         case DTRACEACT_UADDR
: { 
6779                                 if (!dtrace_priv_proc(state
)) 
6782                                 DTRACE_STORE(uint64_t, tomax
, 
6783                                     valoffs
, (uint64_t)dtrace_proc_selfpid()); 
6784                                 DTRACE_STORE(uint64_t, tomax
, 
6785                                     valoffs 
+ sizeof (uint64_t), val
); 
6790                         case DTRACEACT_EXIT
: { 
6792                                  * For the exit action, we are going to attempt 
6793                                  * to atomically set our activity to be 
6794                                  * draining.  If this fails (either because 
6795                                  * another CPU has beat us to the exit action, 
6796                                  * or because our current activity is something 
6797                                  * other than ACTIVE or WARMUP), we will 
6798                                  * continue.  This assures that the exit action 
6799                                  * can be successfully recorded at most once 
6800                                  * when we're in the ACTIVE state.  If we're 
6801                                  * encountering the exit() action while in 
6802                                  * COOLDOWN, however, we want to honor the new 
6803                                  * status code.  (We know that we're the only 
6804                                  * thread in COOLDOWN, so there is no race.) 
6806                                 void *activity 
= &state
->dts_activity
; 
6807                                 dtrace_activity_t current 
= state
->dts_activity
; 
6809                                 if (current 
== DTRACE_ACTIVITY_COOLDOWN
) 
6812                                 if (current 
!= DTRACE_ACTIVITY_WARMUP
) 
6813                                         current 
= DTRACE_ACTIVITY_ACTIVE
; 
6815                                 if (dtrace_cas32(activity
, current
, 
6816                                     DTRACE_ACTIVITY_DRAINING
) != current
) { 
6817                                         *flags 
|= CPU_DTRACE_DROP
; 
6828                         if (dp
->dtdo_rtype
.dtdt_flags 
& (DIF_TF_BYREF 
| DIF_TF_BYUREF
)) { 
6829                                 uintptr_t end 
= valoffs 
+ size
; 
6831                                 if (tracememsize 
!= 0 && 
6832                                     valoffs 
+ tracememsize 
< end
) 
6834                                         end 
= valoffs 
+ tracememsize
; 
6838                                 if (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF 
&& 
6839                                     !dtrace_vcanload((void *)(uintptr_t)val
, 
6840                                     &dp
->dtdo_rtype
, NULL
, &mstate
, vstate
)) 
6845                                 dtrace_store_by_ref(dp
, tomax
, size
, &valoffs
, 
6846                                     &val
, end
, act
->dta_intuple
, 
6847                                     dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF 
? 
6848                                     DIF_TF_BYREF
: DIF_TF_BYUREF
); 
6857                         case sizeof (uint8_t): 
6858                                 DTRACE_STORE(uint8_t, tomax
, valoffs
, val
); 
6860                         case sizeof (uint16_t): 
6861                                 DTRACE_STORE(uint16_t, tomax
, valoffs
, val
); 
6863                         case sizeof (uint32_t): 
6864                                 DTRACE_STORE(uint32_t, tomax
, valoffs
, val
); 
6866                         case sizeof (uint64_t): 
6867                                 DTRACE_STORE(uint64_t, tomax
, valoffs
, val
); 
6871                                  * Any other size should have been returned by 
6872                                  * reference, not by value. 
6879                 if (*flags 
& CPU_DTRACE_DROP
) 
6882                 if (*flags 
& CPU_DTRACE_FAULT
) { 
6884                         dtrace_action_t 
*err
; 
6888                         if (probe
->dtpr_id 
== dtrace_probeid_error
) { 
6890                                  * There's nothing we can do -- we had an 
6891                                  * error on the error probe.  We bump an 
6892                                  * error counter to at least indicate that 
6893                                  * this condition happened. 
6895                                 dtrace_error(&state
->dts_dblerrors
); 
6901                                  * Before recursing on dtrace_probe(), we 
6902                                  * need to explicitly clear out our start 
6903                                  * time to prevent it from being accumulated 
6904                                  * into t_dtrace_vtime. 
6908                                  * Darwin sets the sign bit on t_dtrace_tracing 
6909                                  * to suspend accumulation to it. 
6911                                 dtrace_set_thread_tracing(current_thread(),  
6912                                     (1ULL<<63) | dtrace_get_thread_tracing(current_thread())); 
6917                          * Iterate over the actions to figure out which action 
6918                          * we were processing when we experienced the error. 
6919                          * Note that act points _past_ the faulting action; if 
6920                          * act is ecb->dte_action, the fault was in the 
6921                          * predicate, if it's ecb->dte_action->dta_next it's 
6922                          * in action #1, and so on. 
6924                         for (err 
= ecb
->dte_action
, ndx 
= 0; 
6925                             err 
!= act
; err 
= err
->dta_next
, ndx
++) 
6928                         dtrace_probe_error(state
, ecb
->dte_epid
, ndx
, 
6929                             (mstate
.dtms_present 
& DTRACE_MSTATE_FLTOFFS
) ? 
6930                             mstate
.dtms_fltoffs 
: -1, DTRACE_FLAGS2FLT(*flags
), 
6931                             cpu_core
[cpuid
].cpuc_dtrace_illval
); 
6937                         buf
->dtb_offset 
= offs 
+ ecb
->dte_size
; 
6940         /* FIXME: On Darwin the time spent leaving DTrace from this point to the rti is attributed 
6941            to the current thread. Instead it should accrue to DTrace. */ 
6943                 thread_t thread 
= current_thread(); 
6944                 int64_t t 
= dtrace_get_thread_tracing(thread
); 
6947                         /* Usual case, accumulate time spent here into t_dtrace_tracing */ 
6948                         dtrace_set_thread_tracing(thread
, t 
+ (dtrace_gethrtime() - now
)); 
6950                         /* Return from error recursion. No accumulation, just clear the sign bit on t_dtrace_tracing. */ 
6951                         dtrace_set_thread_tracing(thread
, (~(1ULL<<63)) & t
);  
6955         dtrace_interrupt_enable(cookie
); 
6959  * APPLE NOTE:  Don't allow a thread to re-enter dtrace_probe(). 
6960  * This could occur if a probe is encountered on some function in the 
6961  * transitive closure of the call to dtrace_probe(). 
6962  * Solaris has some strong guarantees that this won't happen. 
6963  * The Darwin implementation is not so mature as to make those guarantees. 
6964  * Hence, the introduction of __dtrace_probe() on xnu. 
6968 dtrace_probe(dtrace_id_t id
, uint64_t arg0
, uint64_t arg1
, 
6969     uint64_t arg2
, uint64_t arg3
, uint64_t arg4
) 
6971         thread_t thread 
= current_thread(); 
6972         disable_preemption(); 
6973         if (id 
== dtrace_probeid_error
) { 
6974                 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
); 
6975                 dtrace_getipl(); /* Defeat tail-call optimization of __dtrace_probe() */ 
6976         } else if (!dtrace_get_thread_reentering(thread
)) { 
6977                 dtrace_set_thread_reentering(thread
, TRUE
); 
6978                 __dtrace_probe(id
, arg0
, arg1
, arg2
, arg3
, arg4
); 
6979                 dtrace_set_thread_reentering(thread
, FALSE
); 
6982         else __dtrace_probe(dtrace_probeid_error
, 0, id
, 1, -1, DTRACEFLT_UNKNOWN
); 
6984         enable_preemption(); 
6988  * DTrace Probe Hashing Functions 
6990  * The functions in this section (and indeed, the functions in remaining 
6991  * sections) are not _called_ from probe context.  (Any exceptions to this are 
6992  * marked with a "Note:".)  Rather, they are called from elsewhere in the 
6993  * DTrace framework to look-up probes in, add probes to and remove probes from 
6994  * the DTrace probe hashes.  (Each probe is hashed by each element of the 
6995  * probe tuple -- allowing for fast lookups, regardless of what was 
6999 dtrace_hash_str(const char *p
) 
7005                 hval 
= (hval 
<< 4) + *p
++; 
7006                 if ((g 
= (hval 
& 0xf0000000)) != 0) 
7013 static dtrace_hash_t 
* 
7014 dtrace_hash_create(uintptr_t stroffs
, uintptr_t nextoffs
, uintptr_t prevoffs
) 
7016         dtrace_hash_t 
*hash 
= kmem_zalloc(sizeof (dtrace_hash_t
), KM_SLEEP
); 
7018         hash
->dth_stroffs 
= stroffs
; 
7019         hash
->dth_nextoffs 
= nextoffs
; 
7020         hash
->dth_prevoffs 
= prevoffs
; 
7023         hash
->dth_mask 
= hash
->dth_size 
- 1; 
7025         hash
->dth_tab 
= kmem_zalloc(hash
->dth_size 
* 
7026             sizeof (dtrace_hashbucket_t 
*), KM_SLEEP
); 
7032  * APPLE NOTE: dtrace_hash_destroy is not used. 
7033  * It is called by dtrace_detach which is not 
7034  * currently implemented.  Revisit someday. 
7036 #if !defined(__APPLE__) 
7038 dtrace_hash_destroy(dtrace_hash_t 
*hash
) 
7043         for (i 
= 0; i 
< hash
->dth_size
; i
++) 
7044                 ASSERT(hash
->dth_tab
[i
] == NULL
); 
7047         kmem_free(hash
->dth_tab
, 
7048             hash
->dth_size 
* sizeof (dtrace_hashbucket_t 
*)); 
7049         kmem_free(hash
, sizeof (dtrace_hash_t
)); 
7051 #endif /* __APPLE__ */ 
7054 dtrace_hash_resize(dtrace_hash_t 
*hash
) 
7056         int size 
= hash
->dth_size
, i
, ndx
; 
7057         int new_size 
= hash
->dth_size 
<< 1; 
7058         int new_mask 
= new_size 
- 1; 
7059         dtrace_hashbucket_t 
**new_tab
, *bucket
, *next
; 
7061         ASSERT((new_size 
& new_mask
) == 0); 
7063         new_tab 
= kmem_zalloc(new_size 
* sizeof (void *), KM_SLEEP
); 
7065         for (i 
= 0; i 
< size
; i
++) { 
7066                 for (bucket 
= hash
->dth_tab
[i
]; bucket 
!= NULL
; bucket 
= next
) { 
7067                         dtrace_probe_t 
*probe 
= bucket
->dthb_chain
; 
7069                         ASSERT(probe 
!= NULL
); 
7070                         ndx 
= DTRACE_HASHSTR(hash
, probe
) & new_mask
; 
7072                         next 
= bucket
->dthb_next
; 
7073                         bucket
->dthb_next 
= new_tab
[ndx
]; 
7074                         new_tab
[ndx
] = bucket
; 
7078         kmem_free(hash
->dth_tab
, hash
->dth_size 
* sizeof (void *)); 
7079         hash
->dth_tab 
= new_tab
; 
7080         hash
->dth_size 
= new_size
; 
7081         hash
->dth_mask 
= new_mask
; 
7085 dtrace_hash_add(dtrace_hash_t 
*hash
, dtrace_probe_t 
*new) 
7087         int hashval 
= DTRACE_HASHSTR(hash
, new); 
7088         int ndx 
= hashval 
& hash
->dth_mask
; 
7089         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
7090         dtrace_probe_t 
**nextp
, **prevp
; 
7092         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
7093                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, new)) 
7097         if ((hash
->dth_nbuckets 
>> 1) > hash
->dth_size
) { 
7098                 dtrace_hash_resize(hash
); 
7099                 dtrace_hash_add(hash
, new); 
7103         bucket 
= kmem_zalloc(sizeof (dtrace_hashbucket_t
), KM_SLEEP
); 
7104         bucket
->dthb_next 
= hash
->dth_tab
[ndx
]; 
7105         hash
->dth_tab
[ndx
] = bucket
; 
7106         hash
->dth_nbuckets
++; 
7109         nextp 
= DTRACE_HASHNEXT(hash
, new); 
7110         ASSERT(*nextp 
== NULL 
&& *(DTRACE_HASHPREV(hash
, new)) == NULL
); 
7111         *nextp 
= bucket
->dthb_chain
; 
7113         if (bucket
->dthb_chain 
!= NULL
) { 
7114                 prevp 
= DTRACE_HASHPREV(hash
, bucket
->dthb_chain
); 
7115                 ASSERT(*prevp 
== NULL
); 
7119         bucket
->dthb_chain 
= new; 
7123 static dtrace_probe_t 
* 
7124 dtrace_hash_lookup(dtrace_hash_t 
*hash
, dtrace_probe_t 
*template) 
7126         int hashval 
= DTRACE_HASHSTR(hash
, template); 
7127         int ndx 
= hashval 
& hash
->dth_mask
; 
7128         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
7130         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
7131                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template)) 
7132                         return (bucket
->dthb_chain
); 
7139 dtrace_hash_collisions(dtrace_hash_t 
*hash
, dtrace_probe_t 
*template) 
7141         int hashval 
= DTRACE_HASHSTR(hash
, template); 
7142         int ndx 
= hashval 
& hash
->dth_mask
; 
7143         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
7145         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
7146                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, template)) 
7147                         return (bucket
->dthb_len
); 
7154 dtrace_hash_remove(dtrace_hash_t 
*hash
, dtrace_probe_t 
*probe
) 
7156         int ndx 
= DTRACE_HASHSTR(hash
, probe
) & hash
->dth_mask
; 
7157         dtrace_hashbucket_t 
*bucket 
= hash
->dth_tab
[ndx
]; 
7159         dtrace_probe_t 
**prevp 
= DTRACE_HASHPREV(hash
, probe
); 
7160         dtrace_probe_t 
**nextp 
= DTRACE_HASHNEXT(hash
, probe
); 
7163          * Find the bucket that we're removing this probe from. 
7165         for (; bucket 
!= NULL
; bucket 
= bucket
->dthb_next
) { 
7166                 if (DTRACE_HASHEQ(hash
, bucket
->dthb_chain
, probe
)) 
7170         ASSERT(bucket 
!= NULL
); 
7172         if (*prevp 
== NULL
) { 
7173                 if (*nextp 
== NULL
) { 
7175                          * The removed probe was the only probe on this 
7176                          * bucket; we need to remove the bucket. 
7178                         dtrace_hashbucket_t 
*b 
= hash
->dth_tab
[ndx
]; 
7180                         ASSERT(bucket
->dthb_chain 
== probe
); 
7184                                 hash
->dth_tab
[ndx
] = bucket
->dthb_next
; 
7186                                 while (b
->dthb_next 
!= bucket
) 
7188                                 b
->dthb_next 
= bucket
->dthb_next
; 
7191                         ASSERT(hash
->dth_nbuckets 
> 0); 
7192                         hash
->dth_nbuckets
--; 
7193                         kmem_free(bucket
, sizeof (dtrace_hashbucket_t
)); 
7197                 bucket
->dthb_chain 
= *nextp
; 
7199                 *(DTRACE_HASHNEXT(hash
, *prevp
)) = *nextp
; 
7203                 *(DTRACE_HASHPREV(hash
, *nextp
)) = *prevp
; 
7207  * DTrace Utility Functions 
7209  * These are random utility functions that are _not_ called from probe context. 
7212 dtrace_badattr(const dtrace_attribute_t 
*a
) 
7214         return (a
->dtat_name 
> DTRACE_STABILITY_MAX 
|| 
7215             a
->dtat_data 
> DTRACE_STABILITY_MAX 
|| 
7216             a
->dtat_class 
> DTRACE_CLASS_MAX
); 
7220  * Return a duplicate copy of a string.  If the specified string is NULL, 
7221  * this function returns a zero-length string. 
7222  * APPLE NOTE: Darwin employs size bounded string operation. 
7225 dtrace_strdup(const char *str
) 
7227         size_t bufsize 
= (str 
!= NULL 
? strlen(str
) : 0) + 1; 
7228         char *new = kmem_zalloc(bufsize
, KM_SLEEP
); 
7231                 (void) strlcpy(new, str
, bufsize
); 
7236 #define DTRACE_ISALPHA(c)       \ 
7237         (((c) >= 'a' && (c) <= 'z') || ((c) >= 'A' && (c) <= 'Z')) 
7240 dtrace_badname(const char *s
) 
7244         if (s 
== NULL 
|| (c 
= *s
++) == '\0') 
7247         if (!DTRACE_ISALPHA(c
) && c 
!= '-' && c 
!= '_' && c 
!= '.') 
7250         while ((c 
= *s
++) != '\0') { 
7251                 if (!DTRACE_ISALPHA(c
) && (c 
< '0' || c 
> '9') && 
7252                     c 
!= '-' && c 
!= '_' && c 
!= '.' && c 
!= '`') 
7260 dtrace_cred2priv(cred_t 
*cr
, uint32_t *privp
, uid_t 
*uidp
, zoneid_t 
*zoneidp
) 
7264         if (cr 
== NULL 
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) { 
7265                 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { 
7266                         priv 
= DTRACE_PRIV_USER 
| DTRACE_PRIV_PROC 
| DTRACE_PRIV_OWNER
; 
7269                         priv 
= DTRACE_PRIV_ALL
; 
7274                 *uidp 
= crgetuid(cr
); 
7275                 *zoneidp 
= crgetzoneid(cr
); 
7278                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) 
7279                         priv 
|= DTRACE_PRIV_KERNEL 
| DTRACE_PRIV_USER
; 
7280                 else if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) 
7281                         priv 
|= DTRACE_PRIV_USER
; 
7282                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) 
7283                         priv 
|= DTRACE_PRIV_PROC
; 
7284                 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) 
7285                         priv 
|= DTRACE_PRIV_OWNER
; 
7286                 if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) 
7287                         priv 
|= DTRACE_PRIV_ZONEOWNER
; 
7293 #ifdef DTRACE_ERRDEBUG 
7295 dtrace_errdebug(const char *str
) 
7297         int hval 
= dtrace_hash_str(str
) % DTRACE_ERRHASHSZ
; 
7300         lck_mtx_lock(&dtrace_errlock
); 
7301         dtrace_errlast 
= str
; 
7302         dtrace_errthread 
= (kthread_t 
*)current_thread(); 
7304         while (occupied
++ < DTRACE_ERRHASHSZ
) { 
7305                 if (dtrace_errhash
[hval
].dter_msg 
== str
) { 
7306                         dtrace_errhash
[hval
].dter_count
++; 
7310                 if (dtrace_errhash
[hval
].dter_msg 
!= NULL
) { 
7311                         hval 
= (hval 
+ 1) % DTRACE_ERRHASHSZ
; 
7315                 dtrace_errhash
[hval
].dter_msg 
= str
; 
7316                 dtrace_errhash
[hval
].dter_count 
= 1; 
7320         panic("dtrace: undersized error hash"); 
7322         lck_mtx_unlock(&dtrace_errlock
); 
7327  * DTrace Matching Functions 
7329  * These functions are used to match groups of probes, given some elements of 
7330  * a probe tuple, or some globbed expressions for elements of a probe tuple. 
7333 dtrace_match_priv(const dtrace_probe_t 
*prp
, uint32_t priv
, uid_t uid
, 
7336         if (priv 
!= DTRACE_PRIV_ALL
) { 
7337                 uint32_t ppriv 
= prp
->dtpr_provider
->dtpv_priv
.dtpp_flags
; 
7338                 uint32_t match 
= priv 
& ppriv
; 
7341                  * No PRIV_DTRACE_* privileges... 
7343                 if ((priv 
& (DTRACE_PRIV_PROC 
| DTRACE_PRIV_USER 
| 
7344                     DTRACE_PRIV_KERNEL
)) == 0) 
7348                  * No matching bits, but there were bits to match... 
7350                 if (match 
== 0 && ppriv 
!= 0) 
7354                  * Need to have permissions to the process, but don't... 
7356                 if (((ppriv 
& ~match
) & DTRACE_PRIV_OWNER
) != 0 && 
7357                     uid 
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_uid
) { 
7362                  * Need to be in the same zone unless we possess the 
7363                  * privilege to examine all zones. 
7365                 if (((ppriv 
& ~match
) & DTRACE_PRIV_ZONEOWNER
) != 0 && 
7366                     zoneid 
!= prp
->dtpr_provider
->dtpv_priv
.dtpp_zoneid
) { 
7375  * dtrace_match_probe compares a dtrace_probe_t to a pre-compiled key, which 
7376  * consists of input pattern strings and an ops-vector to evaluate them. 
7377  * This function returns >0 for match, 0 for no match, and <0 for error. 
7380 dtrace_match_probe(const dtrace_probe_t 
*prp
, const dtrace_probekey_t 
*pkp
, 
7381     uint32_t priv
, uid_t uid
, zoneid_t zoneid
) 
7383         dtrace_provider_t 
*pvp 
= prp
->dtpr_provider
; 
7386         if (pvp
->dtpv_defunct
) 
7389         if ((rv 
= pkp
->dtpk_pmatch(pvp
->dtpv_name
, pkp
->dtpk_prov
, 0)) <= 0) 
7392         if ((rv 
= pkp
->dtpk_mmatch(prp
->dtpr_mod
, pkp
->dtpk_mod
, 0)) <= 0) 
7395         if ((rv 
= pkp
->dtpk_fmatch(prp
->dtpr_func
, pkp
->dtpk_func
, 0)) <= 0) 
7398         if ((rv 
= pkp
->dtpk_nmatch(prp
->dtpr_name
, pkp
->dtpk_name
, 0)) <= 0) 
7401         if (dtrace_match_priv(prp
, priv
, uid
, zoneid
) == 0) 
7408  * dtrace_match_glob() is a safe kernel implementation of the gmatch(3GEN) 
7409  * interface for matching a glob pattern 'p' to an input string 's'.  Unlike 
7410  * libc's version, the kernel version only applies to 8-bit ASCII strings. 
7411  * In addition, all of the recursion cases except for '*' matching have been 
7412  * unwound.  For '*', we still implement recursive evaluation, but a depth 
7413  * counter is maintained and matching is aborted if we recurse too deep. 
7414  * The function returns 0 if no match, >0 if match, and <0 if recursion error. 
7417 dtrace_match_glob(const char *s
, const char *p
, int depth
) 
7423         if (depth 
> DTRACE_PROBEKEY_MAXDEPTH
) 
7427                 s 
= ""; /* treat NULL as empty string */ 
7436         if ((c 
= *p
++) == '\0') 
7437                 return (s1 
== '\0'); 
7441                 int ok 
= 0, notflag 
= 0; 
7452                 if ((c 
= *p
++) == '\0') 
7456                         if (c 
== '-' && lc 
!= '\0' && *p 
!= ']') { 
7457                                 if ((c 
= *p
++) == '\0') 
7459                                 if (c 
== '\\' && (c 
= *p
++) == '\0') 
7463                                         if (s1 
< lc 
|| s1 
> c
) 
7467                                 } else if (lc 
<= s1 
&& s1 
<= c
) 
7470                         } else if (c 
== '\\' && (c 
= *p
++) == '\0') 
7473                         lc 
= c
; /* save left-hand 'c' for next iteration */ 
7483                         if ((c 
= *p
++) == '\0') 
7495                 if ((c 
= *p
++) == '\0') 
7511                         p
++; /* consecutive *'s are identical to a single one */ 
7516                 for (s 
= olds
; *s 
!= '\0'; s
++) { 
7517                         if ((gs 
= dtrace_match_glob(s
, p
, depth 
+ 1)) != 0) 
7527 dtrace_match_string(const char *s
, const char *p
, int depth
) 
7529 #pragma unused(depth) /* __APPLE__ */ 
7531         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
7532         return (s 
!= NULL 
&& strncmp(s
, p
, strlen(s
) + 1) == 0); 
7537 dtrace_match_nul(const char *s
, const char *p
, int depth
) 
7539 #pragma unused(s, p, depth) /* __APPLE__ */ 
7540         return (1); /* always match the empty pattern */ 
7545 dtrace_match_nonzero(const char *s
, const char *p
, int depth
) 
7547 #pragma unused(p, depth) /* __APPLE__ */ 
7548         return (s 
!= NULL 
&& s
[0] != '\0'); 
7552 dtrace_match(const dtrace_probekey_t 
*pkp
, uint32_t priv
, uid_t uid
, 
7553     zoneid_t zoneid
, int (*matched
)(dtrace_probe_t 
*, void *, void *), void *arg1
, void *arg2
) 
7555         dtrace_probe_t 
template, *probe
; 
7556         dtrace_hash_t 
*hash 
= NULL
; 
7557         int len
, rc
, best 
= INT_MAX
, nmatched 
= 0; 
7560         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
7563          * If the probe ID is specified in the key, just lookup by ID and 
7564          * invoke the match callback once if a matching probe is found. 
7566         if (pkp
->dtpk_id 
!= DTRACE_IDNONE
) { 
7567                 if ((probe 
= dtrace_probe_lookup_id(pkp
->dtpk_id
)) != NULL 
&& 
7568                     dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) > 0) { 
7569                         if ((*matched
)(probe
, arg1
, arg2
) == DTRACE_MATCH_FAIL
) 
7570                                return (DTRACE_MATCH_FAIL
); 
7576         template.dtpr_mod 
=  (char *)(uintptr_t)pkp
->dtpk_mod
; 
7577         template.dtpr_func 
= (char *)(uintptr_t)pkp
->dtpk_func
; 
7578         template.dtpr_name 
= (char *)(uintptr_t)pkp
->dtpk_name
; 
7581          * We want to find the most distinct of the module name, function 
7582          * name, and name.  So for each one that is not a glob pattern or 
7583          * empty string, we perform a lookup in the corresponding hash and 
7584          * use the hash table with the fewest collisions to do our search. 
7586         if (pkp
->dtpk_mmatch 
== &dtrace_match_string 
&& 
7587             (len 
= dtrace_hash_collisions(dtrace_bymod
, &template)) < best
) { 
7589                 hash 
= dtrace_bymod
; 
7592         if (pkp
->dtpk_fmatch 
== &dtrace_match_string 
&& 
7593             (len 
= dtrace_hash_collisions(dtrace_byfunc
, &template)) < best
) { 
7595                 hash 
= dtrace_byfunc
; 
7598         if (pkp
->dtpk_nmatch 
== &dtrace_match_string 
&& 
7599             (len 
= dtrace_hash_collisions(dtrace_byname
, &template)) < best
) { 
7601                 hash 
= dtrace_byname
; 
7605          * If we did not select a hash table, iterate over every probe and 
7606          * invoke our callback for each one that matches our input probe key. 
7609                 for (i 
= 0; i 
< (dtrace_id_t
)dtrace_nprobes
; i
++) { 
7610                         if ((probe 
= dtrace_probes
[i
]) == NULL 
|| 
7611                             dtrace_match_probe(probe
, pkp
, priv
, uid
, 
7617                        if ((rc 
= (*matched
)(probe
, arg1
, arg2
)) != DTRACE_MATCH_NEXT
) { 
7618                                if (rc 
== DTRACE_MATCH_FAIL
) 
7619                                        return (DTRACE_MATCH_FAIL
); 
7628          * If we selected a hash table, iterate over each probe of the same key 
7629          * name and invoke the callback for every probe that matches the other 
7630          * attributes of our input probe key. 
7632         for (probe 
= dtrace_hash_lookup(hash
, &template); probe 
!= NULL
; 
7633             probe 
= *(DTRACE_HASHNEXT(hash
, probe
))) { 
7635                 if (dtrace_match_probe(probe
, pkp
, priv
, uid
, zoneid
) <= 0) 
7640                 if ((rc 
= (*matched
)(probe
, arg1
, arg2
)) != DTRACE_MATCH_NEXT
) { 
7641                     if (rc 
== DTRACE_MATCH_FAIL
) 
7642                         return (DTRACE_MATCH_FAIL
); 
7651  * Return the function pointer dtrace_probecmp() should use to compare the 
7652  * specified pattern with a string.  For NULL or empty patterns, we select 
7653  * dtrace_match_nul().  For glob pattern strings, we use dtrace_match_glob(). 
7654  * For non-empty non-glob strings, we use dtrace_match_string(). 
7656 static dtrace_probekey_f 
* 
7657 dtrace_probekey_func(const char *p
) 
7661         if (p 
== NULL 
|| *p 
== '\0') 
7662                 return (&dtrace_match_nul
); 
7664         while ((c 
= *p
++) != '\0') { 
7665                 if (c 
== '[' || c 
== '?' || c 
== '*' || c 
== '\\') 
7666                         return (&dtrace_match_glob
); 
7669         return (&dtrace_match_string
); 
7673  * Build a probe comparison key for use with dtrace_match_probe() from the 
7674  * given probe description.  By convention, a null key only matches anchored 
7675  * probes: if each field is the empty string, reset dtpk_fmatch to 
7676  * dtrace_match_nonzero(). 
7679 dtrace_probekey(const dtrace_probedesc_t 
*pdp
, dtrace_probekey_t 
*pkp
) 
7681         pkp
->dtpk_prov 
= pdp
->dtpd_provider
; 
7682         pkp
->dtpk_pmatch 
= dtrace_probekey_func(pdp
->dtpd_provider
); 
7684         pkp
->dtpk_mod 
= pdp
->dtpd_mod
; 
7685         pkp
->dtpk_mmatch 
= dtrace_probekey_func(pdp
->dtpd_mod
); 
7687         pkp
->dtpk_func 
= pdp
->dtpd_func
; 
7688         pkp
->dtpk_fmatch 
= dtrace_probekey_func(pdp
->dtpd_func
); 
7690         pkp
->dtpk_name 
= pdp
->dtpd_name
; 
7691         pkp
->dtpk_nmatch 
= dtrace_probekey_func(pdp
->dtpd_name
); 
7693         pkp
->dtpk_id 
= pdp
->dtpd_id
; 
7695         if (pkp
->dtpk_id 
== DTRACE_IDNONE 
&& 
7696             pkp
->dtpk_pmatch 
== &dtrace_match_nul 
&& 
7697             pkp
->dtpk_mmatch 
== &dtrace_match_nul 
&& 
7698             pkp
->dtpk_fmatch 
== &dtrace_match_nul 
&& 
7699             pkp
->dtpk_nmatch 
== &dtrace_match_nul
) 
7700                 pkp
->dtpk_fmatch 
= &dtrace_match_nonzero
; 
7704 dtrace_cond_provider_match(dtrace_probedesc_t 
*desc
, void *data
) 
7709         dtrace_probekey_f 
*func 
= dtrace_probekey_func(desc
->dtpd_provider
); 
7711         return func((char*)data
, desc
->dtpd_provider
, 0); 
7715  * DTrace Provider-to-Framework API Functions 
7717  * These functions implement much of the Provider-to-Framework API, as 
7718  * described in <sys/dtrace.h>.  The parts of the API not in this section are 
7719  * the functions in the API for probe management (found below), and 
7720  * dtrace_probe() itself (found above). 
7724  * Register the calling provider with the DTrace framework.  This should 
7725  * generally be called by DTrace providers in their attach(9E) entry point. 
7728 dtrace_register(const char *name
, const dtrace_pattr_t 
*pap
, uint32_t priv
, 
7729     cred_t 
*cr
, const dtrace_pops_t 
*pops
, void *arg
, dtrace_provider_id_t 
*idp
) 
7731         dtrace_provider_t 
*provider
; 
7733         if (name 
== NULL 
|| pap 
== NULL 
|| pops 
== NULL 
|| idp 
== NULL
) { 
7734                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
7735                     "arguments", name 
? name 
: "<NULL>"); 
7739         if (name
[0] == '\0' || dtrace_badname(name
)) { 
7740                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
7741                     "provider name", name
); 
7745         if ((pops
->dtps_provide 
== NULL 
&& pops
->dtps_provide_module 
== NULL
) || 
7746             pops
->dtps_enable 
== NULL 
|| pops
->dtps_disable 
== NULL 
|| 
7747             pops
->dtps_destroy 
== NULL 
|| 
7748             ((pops
->dtps_resume 
== NULL
) != (pops
->dtps_suspend 
== NULL
))) { 
7749                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
7750                     "provider ops", name
); 
7754         if (dtrace_badattr(&pap
->dtpa_provider
) || 
7755             dtrace_badattr(&pap
->dtpa_mod
) || 
7756             dtrace_badattr(&pap
->dtpa_func
) || 
7757             dtrace_badattr(&pap
->dtpa_name
) || 
7758             dtrace_badattr(&pap
->dtpa_args
)) { 
7759                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
7760                     "provider attributes", name
); 
7764         if (priv 
& ~DTRACE_PRIV_ALL
) { 
7765                 cmn_err(CE_WARN
, "failed to register provider '%s': invalid " 
7766                     "privilege attributes", name
); 
7770         if ((priv 
& DTRACE_PRIV_KERNEL
) && 
7771             (priv 
& (DTRACE_PRIV_USER 
| DTRACE_PRIV_OWNER
)) && 
7772             pops
->dtps_usermode 
== NULL
) { 
7773                 cmn_err(CE_WARN
, "failed to register provider '%s': need " 
7774                     "dtps_usermode() op for given privilege attributes", name
); 
7778         provider 
= kmem_zalloc(sizeof (dtrace_provider_t
), KM_SLEEP
); 
7780         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
7782         size_t bufsize 
= strlen(name
) + 1; 
7783         provider
->dtpv_name 
= kmem_alloc(bufsize
, KM_SLEEP
); 
7784         (void) strlcpy(provider
->dtpv_name
, name
, bufsize
); 
7787         provider
->dtpv_attr 
= *pap
; 
7788         provider
->dtpv_priv
.dtpp_flags 
= priv
; 
7790                 provider
->dtpv_priv
.dtpp_uid 
= crgetuid(cr
); 
7791                 provider
->dtpv_priv
.dtpp_zoneid 
= crgetzoneid(cr
); 
7793         provider
->dtpv_pops 
= *pops
; 
7795         if (pops
->dtps_provide 
== NULL
) { 
7796                 ASSERT(pops
->dtps_provide_module 
!= NULL
); 
7797                 provider
->dtpv_pops
.dtps_provide 
= 
7798                     (void (*)(void *, const dtrace_probedesc_t 
*))dtrace_nullop
; 
7801         if (pops
->dtps_provide_module 
== NULL
) { 
7802                 ASSERT(pops
->dtps_provide 
!= NULL
); 
7803                 provider
->dtpv_pops
.dtps_provide_module 
= 
7804                     (void (*)(void *, struct modctl 
*))dtrace_nullop
; 
7807         if (pops
->dtps_suspend 
== NULL
) { 
7808                 ASSERT(pops
->dtps_resume 
== NULL
); 
7809                 provider
->dtpv_pops
.dtps_suspend 
= 
7810                     (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
; 
7811                 provider
->dtpv_pops
.dtps_resume 
= 
7812                     (void (*)(void *, dtrace_id_t
, void *))dtrace_nullop
; 
7815         provider
->dtpv_arg 
= arg
; 
7816         *idp 
= (dtrace_provider_id_t
)provider
; 
7818         if (pops 
== &dtrace_provider_ops
) { 
7819                 LCK_MTX_ASSERT(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
7820                 LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
7821                 ASSERT(dtrace_anon
.dta_enabling 
== NULL
); 
7824                  * We make sure that the DTrace provider is at the head of 
7825                  * the provider chain. 
7827                 provider
->dtpv_next 
= dtrace_provider
; 
7828                 dtrace_provider 
= provider
; 
7832         lck_mtx_lock(&dtrace_provider_lock
); 
7833         lck_mtx_lock(&dtrace_lock
); 
7836          * If there is at least one provider registered, we'll add this 
7837          * provider after the first provider. 
7839         if (dtrace_provider 
!= NULL
) { 
7840                 provider
->dtpv_next 
= dtrace_provider
->dtpv_next
; 
7841                 dtrace_provider
->dtpv_next 
= provider
; 
7843                 dtrace_provider 
= provider
; 
7846         if (dtrace_retained 
!= NULL
) { 
7847                 dtrace_enabling_provide(provider
); 
7850                  * Now we need to call dtrace_enabling_matchall_with_cond() -- 
7851                  * with a condition matching the provider name we just added, 
7852                  * which will acquire cpu_lock and dtrace_lock.  We therefore need 
7853                  * to drop all of our locks before calling into it... 
7855                 lck_mtx_unlock(&dtrace_lock
); 
7856                 lck_mtx_unlock(&dtrace_provider_lock
); 
7858                 dtrace_match_cond_t cond 
= {dtrace_cond_provider_match
, provider
->dtpv_name
}; 
7859                 dtrace_enabling_matchall_with_cond(&cond
); 
7864         lck_mtx_unlock(&dtrace_lock
); 
7865         lck_mtx_unlock(&dtrace_provider_lock
); 
7871  * Unregister the specified provider from the DTrace framework.  This should 
7872  * generally be called by DTrace providers in their detach(9E) entry point. 
7875 dtrace_unregister(dtrace_provider_id_t id
) 
7877         dtrace_provider_t 
*old 
= (dtrace_provider_t 
*)id
; 
7878         dtrace_provider_t 
*prev 
= NULL
; 
7880         dtrace_probe_t 
*probe
, *first 
= NULL
; 
7882         if (old
->dtpv_pops
.dtps_enable 
== 
7883             (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
) { 
7885                  * If DTrace itself is the provider, we're called with locks 
7888                 ASSERT(old 
== dtrace_provider
); 
7889                 ASSERT(dtrace_devi 
!= NULL
); 
7890                 LCK_MTX_ASSERT(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
7891                 LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
7894                 if (dtrace_provider
->dtpv_next 
!= NULL
) { 
7896                          * There's another provider here; return failure. 
7901                 lck_mtx_lock(&dtrace_provider_lock
); 
7902                 lck_mtx_lock(&mod_lock
); 
7903                 lck_mtx_lock(&dtrace_lock
); 
7907          * If anyone has /dev/dtrace open, or if there are anonymous enabled 
7908          * probes, we refuse to let providers slither away, unless this 
7909          * provider has already been explicitly invalidated. 
7911         if (!old
->dtpv_defunct 
&& 
7912             (dtrace_opens 
|| (dtrace_anon
.dta_state 
!= NULL 
&& 
7913             dtrace_anon
.dta_state
->dts_necbs 
> 0))) { 
7915                         lck_mtx_unlock(&dtrace_lock
); 
7916                         lck_mtx_unlock(&mod_lock
); 
7917                         lck_mtx_unlock(&dtrace_provider_lock
); 
7923          * Attempt to destroy the probes associated with this provider. 
7925         if (old
->dtpv_ecb_count
!=0) { 
7927                  * We have at least one ECB; we can't remove this provider. 
7930                         lck_mtx_unlock(&dtrace_lock
); 
7931                         lck_mtx_unlock(&mod_lock
); 
7932                         lck_mtx_unlock(&dtrace_provider_lock
); 
7938          * All of the probes for this provider are disabled; we can safely 
7939          * remove all of them from their hash chains and from the probe array. 
7941         for (i 
= 0; i 
< dtrace_nprobes 
&& old
->dtpv_probe_count
!=0; i
++) { 
7942                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
7945                 if (probe
->dtpr_provider 
!= old
) 
7948                 dtrace_probes
[i
] = NULL
; 
7949                 old
->dtpv_probe_count
--; 
7951                 dtrace_hash_remove(dtrace_bymod
, probe
); 
7952                 dtrace_hash_remove(dtrace_byfunc
, probe
); 
7953                 dtrace_hash_remove(dtrace_byname
, probe
); 
7955                 if (first 
== NULL
) { 
7957                         probe
->dtpr_nextmod 
= NULL
; 
7959                         probe
->dtpr_nextmod 
= first
; 
7965          * The provider's probes have been removed from the hash chains and 
7966          * from the probe array.  Now issue a dtrace_sync() to be sure that 
7967          * everyone has cleared out from any probe array processing. 
7971         for (probe 
= first
; probe 
!= NULL
; probe 
= first
) { 
7972                 first 
= probe
->dtpr_nextmod
; 
7974                 old
->dtpv_pops
.dtps_destroy(old
->dtpv_arg
, probe
->dtpr_id
, 
7976                 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1); 
7977                 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1); 
7978                 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1); 
7979                 vmem_free(dtrace_arena
, (void *)(uintptr_t)(probe
->dtpr_id
), 1); 
7980                 zfree(dtrace_probe_t_zone
, probe
); 
7983         if ((prev 
= dtrace_provider
) == old
) { 
7984                 ASSERT(self 
|| dtrace_devi 
== NULL
); 
7985                 ASSERT(old
->dtpv_next 
== NULL 
|| dtrace_devi 
== NULL
); 
7986                 dtrace_provider 
= old
->dtpv_next
; 
7988                 while (prev 
!= NULL 
&& prev
->dtpv_next 
!= old
) 
7989                         prev 
= prev
->dtpv_next
; 
7992                         panic("attempt to unregister non-existent " 
7993                             "dtrace provider %p\n", (void *)id
); 
7996                 prev
->dtpv_next 
= old
->dtpv_next
; 
8000                 lck_mtx_unlock(&dtrace_lock
); 
8001                 lck_mtx_unlock(&mod_lock
); 
8002                 lck_mtx_unlock(&dtrace_provider_lock
); 
8005         kmem_free(old
->dtpv_name
, strlen(old
->dtpv_name
) + 1); 
8006         kmem_free(old
, sizeof (dtrace_provider_t
)); 
8012  * Invalidate the specified provider.  All subsequent probe lookups for the 
8013  * specified provider will fail, but its probes will not be removed. 
8016 dtrace_invalidate(dtrace_provider_id_t id
) 
8018         dtrace_provider_t 
*pvp 
= (dtrace_provider_t 
*)id
; 
8020         ASSERT(pvp
->dtpv_pops
.dtps_enable 
!= 
8021             (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
); 
8023         lck_mtx_lock(&dtrace_provider_lock
); 
8024         lck_mtx_lock(&dtrace_lock
); 
8026         pvp
->dtpv_defunct 
= 1; 
8028         lck_mtx_unlock(&dtrace_lock
); 
8029         lck_mtx_unlock(&dtrace_provider_lock
); 
8033  * Indicate whether or not DTrace has attached. 
8036 dtrace_attached(void) 
8039          * dtrace_provider will be non-NULL iff the DTrace driver has 
8040          * attached.  (It's non-NULL because DTrace is always itself a 
8043         return (dtrace_provider 
!= NULL
); 
8047  * Remove all the unenabled probes for the given provider.  This function is 
8048  * not unlike dtrace_unregister(), except that it doesn't remove the provider 
8049  * -- just as many of its associated probes as it can. 
8052 dtrace_condense(dtrace_provider_id_t id
) 
8054         dtrace_provider_t 
*prov 
= (dtrace_provider_t 
*)id
; 
8056         dtrace_probe_t 
*probe
; 
8059          * Make sure this isn't the dtrace provider itself. 
8061         ASSERT(prov
->dtpv_pops
.dtps_enable 
!= 
8062           (int (*)(void *, dtrace_id_t
, void *))dtrace_enable_nullop
); 
8064         lck_mtx_lock(&dtrace_provider_lock
); 
8065         lck_mtx_lock(&dtrace_lock
); 
8068          * Attempt to destroy the probes associated with this provider. 
8070         for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
8071                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
8074                 if (probe
->dtpr_provider 
!= prov
) 
8077                 if (probe
->dtpr_ecb 
!= NULL
) 
8080                 dtrace_probes
[i
] = NULL
; 
8081                 prov
->dtpv_probe_count
--; 
8083                 dtrace_hash_remove(dtrace_bymod
, probe
); 
8084                 dtrace_hash_remove(dtrace_byfunc
, probe
); 
8085                 dtrace_hash_remove(dtrace_byname
, probe
); 
8087                 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, i 
+ 1, 
8089                 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1); 
8090                 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1); 
8091                 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1); 
8092                 zfree(dtrace_probe_t_zone
, probe
); 
8093                 vmem_free(dtrace_arena
, (void *)((uintptr_t)i 
+ 1), 1); 
8096         lck_mtx_unlock(&dtrace_lock
); 
8097         lck_mtx_unlock(&dtrace_provider_lock
); 
8103  * DTrace Probe Management Functions 
8105  * The functions in this section perform the DTrace probe management, 
8106  * including functions to create probes, look-up probes, and call into the 
8107  * providers to request that probes be provided.  Some of these functions are 
8108  * in the Provider-to-Framework API; these functions can be identified by the 
8109  * fact that they are not declared "static". 
8113  * Create a probe with the specified module name, function name, and name. 
8116 dtrace_probe_create(dtrace_provider_id_t prov
, const char *mod
, 
8117     const char *func
, const char *name
, int aframes
, void *arg
) 
8119         dtrace_probe_t 
*probe
, **probes
; 
8120         dtrace_provider_t 
*provider 
= (dtrace_provider_t 
*)prov
; 
8123         if (provider 
== dtrace_provider
) { 
8124                 LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8126                 lck_mtx_lock(&dtrace_lock
); 
8129         id 
= (dtrace_id_t
)(uintptr_t)vmem_alloc(dtrace_arena
, 1, 
8130             VM_BESTFIT 
| VM_SLEEP
); 
8132         probe 
= zalloc(dtrace_probe_t_zone
); 
8133         bzero(probe
, sizeof (dtrace_probe_t
)); 
8135         probe
->dtpr_id 
= id
; 
8136         probe
->dtpr_gen 
= dtrace_probegen
++; 
8137         probe
->dtpr_mod 
= dtrace_strdup(mod
); 
8138         probe
->dtpr_func 
= dtrace_strdup(func
); 
8139         probe
->dtpr_name 
= dtrace_strdup(name
); 
8140         probe
->dtpr_arg 
= arg
; 
8141         probe
->dtpr_aframes 
= aframes
; 
8142         probe
->dtpr_provider 
= provider
; 
8144         dtrace_hash_add(dtrace_bymod
, probe
); 
8145         dtrace_hash_add(dtrace_byfunc
, probe
); 
8146         dtrace_hash_add(dtrace_byname
, probe
); 
8148         if (id 
- 1 >= (dtrace_id_t
)dtrace_nprobes
) { 
8149                 size_t osize 
= dtrace_nprobes 
* sizeof (dtrace_probe_t 
*); 
8150                 size_t nsize 
= osize 
<< 1; 
8154                         ASSERT(dtrace_probes 
== NULL
); 
8155                         nsize 
= sizeof (dtrace_probe_t 
*); 
8158                 probes 
= kmem_zalloc(nsize
, KM_SLEEP
); 
8160                 if (dtrace_probes 
== NULL
) { 
8162                         dtrace_probes 
= probes
; 
8165                         dtrace_probe_t 
**oprobes 
= dtrace_probes
; 
8167                         bcopy(oprobes
, probes
, osize
); 
8168                         dtrace_membar_producer(); 
8169                         dtrace_probes 
= probes
; 
8174                          * All CPUs are now seeing the new probes array; we can 
8175                          * safely free the old array. 
8177                         kmem_free(oprobes
, osize
); 
8178                         dtrace_nprobes 
<<= 1; 
8181                 ASSERT(id 
- 1 < (dtrace_id_t
)dtrace_nprobes
); 
8184         ASSERT(dtrace_probes
[id 
- 1] == NULL
); 
8185         dtrace_probes
[id 
- 1] = probe
; 
8186         provider
->dtpv_probe_count
++;    
8188         if (provider 
!= dtrace_provider
) 
8189                 lck_mtx_unlock(&dtrace_lock
); 
8194 static dtrace_probe_t 
* 
8195 dtrace_probe_lookup_id(dtrace_id_t id
) 
8197         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8199         if (id 
== 0 || id 
> (dtrace_id_t
)dtrace_nprobes
) 
8202         return (dtrace_probes
[id 
- 1]); 
8206 dtrace_probe_lookup_match(dtrace_probe_t 
*probe
, void *arg1
, void *arg2
) 
8208 #pragma unused(arg2) 
8209         *((dtrace_id_t 
*)arg1
) = probe
->dtpr_id
; 
8211         return (DTRACE_MATCH_DONE
); 
8215  * Look up a probe based on provider and one or more of module name, function 
8216  * name and probe name. 
8219 dtrace_probe_lookup(dtrace_provider_id_t prid
, const char *mod
, 
8220     const char *func
, const char *name
) 
8222         dtrace_probekey_t pkey
; 
8226         pkey
.dtpk_prov 
= ((dtrace_provider_t 
*)prid
)->dtpv_name
; 
8227         pkey
.dtpk_pmatch 
= &dtrace_match_string
; 
8228         pkey
.dtpk_mod 
= mod
; 
8229         pkey
.dtpk_mmatch 
= mod 
? &dtrace_match_string 
: &dtrace_match_nul
; 
8230         pkey
.dtpk_func 
= func
; 
8231         pkey
.dtpk_fmatch 
= func 
? &dtrace_match_string 
: &dtrace_match_nul
; 
8232         pkey
.dtpk_name 
= name
; 
8233         pkey
.dtpk_nmatch 
= name 
? &dtrace_match_string 
: &dtrace_match_nul
; 
8234         pkey
.dtpk_id 
= DTRACE_IDNONE
; 
8236         lck_mtx_lock(&dtrace_lock
); 
8237         match 
= dtrace_match(&pkey
, DTRACE_PRIV_ALL
, 0, 0, 
8238             dtrace_probe_lookup_match
, &id
, NULL
); 
8239         lck_mtx_unlock(&dtrace_lock
); 
8241         ASSERT(match 
== 1 || match 
== 0); 
8242         return (match 
? id 
: 0); 
8246  * Returns the probe argument associated with the specified probe. 
8249 dtrace_probe_arg(dtrace_provider_id_t id
, dtrace_id_t pid
) 
8251         dtrace_probe_t 
*probe
; 
8254         lck_mtx_lock(&dtrace_lock
); 
8256         if ((probe 
= dtrace_probe_lookup_id(pid
)) != NULL 
&& 
8257             probe
->dtpr_provider 
== (dtrace_provider_t 
*)id
) 
8258                 rval 
= probe
->dtpr_arg
; 
8260         lck_mtx_unlock(&dtrace_lock
); 
8266  * Copy a probe into a probe description. 
8269 dtrace_probe_description(const dtrace_probe_t 
*prp
, dtrace_probedesc_t 
*pdp
) 
8271         bzero(pdp
, sizeof (dtrace_probedesc_t
)); 
8272         pdp
->dtpd_id 
= prp
->dtpr_id
; 
8274         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
8275         (void) strlcpy(pdp
->dtpd_provider
, 
8276             prp
->dtpr_provider
->dtpv_name
, DTRACE_PROVNAMELEN
); 
8278         (void) strlcpy(pdp
->dtpd_mod
, prp
->dtpr_mod
, DTRACE_MODNAMELEN
); 
8279         (void) strlcpy(pdp
->dtpd_func
, prp
->dtpr_func
, DTRACE_FUNCNAMELEN
); 
8280         (void) strlcpy(pdp
->dtpd_name
, prp
->dtpr_name
, DTRACE_NAMELEN
); 
8284  * Called to indicate that a probe -- or probes -- should be provided by a 
8285  * specfied provider.  If the specified description is NULL, the provider will 
8286  * be told to provide all of its probes.  (This is done whenever a new 
8287  * consumer comes along, or whenever a retained enabling is to be matched.) If 
8288  * the specified description is non-NULL, the provider is given the 
8289  * opportunity to dynamically provide the specified probe, allowing providers 
8290  * to support the creation of probes on-the-fly.  (So-called _autocreated_ 
8291  * probes.)  If the provider is NULL, the operations will be applied to all 
8292  * providers; if the provider is non-NULL the operations will only be applied 
8293  * to the specified provider.  The dtrace_provider_lock must be held, and the 
8294  * dtrace_lock must _not_ be held -- the provider's dtps_provide() operation 
8295  * will need to grab the dtrace_lock when it reenters the framework through 
8296  * dtrace_probe_lookup(), dtrace_probe_create(), etc. 
8299 dtrace_probe_provide(dtrace_probedesc_t 
*desc
, dtrace_provider_t 
*prv
) 
8304         LCK_MTX_ASSERT(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
8308                 prv 
= dtrace_provider
; 
8313                  * First, call the blanket provide operation. 
8315                 prv
->dtpv_pops
.dtps_provide(prv
->dtpv_arg
, desc
); 
8318                  * Now call the per-module provide operation.  We will grab 
8319                  * mod_lock to prevent the list from being modified.  Note 
8320                  * that this also prevents the mod_busy bits from changing. 
8321                  * (mod_busy can only be changed with mod_lock held.) 
8323                 lck_mtx_lock(&mod_lock
); 
8325                 ctl 
= dtrace_modctl_list
; 
8327                         prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
); 
8328                         ctl 
= ctl
->mod_next
; 
8331                 lck_mtx_unlock(&mod_lock
); 
8332         } while (all 
&& (prv 
= prv
->dtpv_next
) != NULL
); 
8336  * Iterate over each probe, and call the Framework-to-Provider API function 
8340 dtrace_probe_foreach(uintptr_t offs
) 
8342         dtrace_provider_t 
*prov
; 
8343         void (*func
)(void *, dtrace_id_t
, void *); 
8344         dtrace_probe_t 
*probe
; 
8345         dtrace_icookie_t cookie
; 
8349          * We disable interrupts to walk through the probe array.  This is 
8350          * safe -- the dtrace_sync() in dtrace_unregister() assures that we 
8351          * won't see stale data. 
8353         cookie 
= dtrace_interrupt_disable(); 
8355         for (i 
= 0; i 
< dtrace_nprobes
; i
++) { 
8356                 if ((probe 
= dtrace_probes
[i
]) == NULL
) 
8359                 if (probe
->dtpr_ecb 
== NULL
) { 
8361                          * This probe isn't enabled -- don't call the function. 
8366                 prov 
= probe
->dtpr_provider
; 
8367                 func 
= *((void(**)(void *, dtrace_id_t
, void *)) 
8368                     ((uintptr_t)&prov
->dtpv_pops 
+ offs
)); 
8370                 func(prov
->dtpv_arg
, i 
+ 1, probe
->dtpr_arg
); 
8373         dtrace_interrupt_enable(cookie
); 
8377 dtrace_probe_enable(const dtrace_probedesc_t 
*desc
, dtrace_enabling_t 
*enab
, dtrace_ecbdesc_t 
*ep
) 
8379         dtrace_probekey_t pkey
; 
8384         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
8386         dtrace_ecb_create_cache 
= NULL
; 
8390                  * If we're passed a NULL description, we're being asked to 
8391                  * create an ECB with a NULL probe. 
8393                 (void) dtrace_ecb_create_enable(NULL
, enab
, ep
); 
8397         dtrace_probekey(desc
, &pkey
); 
8398         dtrace_cred2priv(enab
->dten_vstate
->dtvs_state
->dts_cred
.dcr_cred
, 
8399             &priv
, &uid
, &zoneid
); 
8401         return (dtrace_match(&pkey
, priv
, uid
, zoneid
, dtrace_ecb_create_enable
, 
8406  * DTrace Helper Provider Functions 
8409 dtrace_dofattr2attr(dtrace_attribute_t 
*attr
, const dof_attr_t dofattr
) 
8411         attr
->dtat_name 
= DOF_ATTR_NAME(dofattr
); 
8412         attr
->dtat_data 
= DOF_ATTR_DATA(dofattr
); 
8413         attr
->dtat_class 
= DOF_ATTR_CLASS(dofattr
); 
8417 dtrace_dofprov2hprov(dtrace_helper_provdesc_t 
*hprov
, 
8418     const dof_provider_t 
*dofprov
, char *strtab
) 
8420         hprov
->dthpv_provname 
= strtab 
+ dofprov
->dofpv_name
; 
8421         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_provider
, 
8422             dofprov
->dofpv_provattr
); 
8423         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_mod
, 
8424             dofprov
->dofpv_modattr
); 
8425         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_func
, 
8426             dofprov
->dofpv_funcattr
); 
8427         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_name
, 
8428             dofprov
->dofpv_nameattr
); 
8429         dtrace_dofattr2attr(&hprov
->dthpv_pattr
.dtpa_args
, 
8430             dofprov
->dofpv_argsattr
); 
8434 dtrace_helper_provide_one(dof_helper_t 
*dhp
, dof_sec_t 
*sec
, proc_t 
*p
) 
8436         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
8437         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
8438         dof_sec_t 
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
; 
8439         dof_provider_t 
*provider
; 
8441         uint32_t *off
, *enoff
; 
8445         dtrace_helper_provdesc_t dhpv
; 
8446         dtrace_helper_probedesc_t dhpb
; 
8447         dtrace_meta_t 
*meta 
= dtrace_meta_pid
; 
8448         dtrace_mops_t 
*mops 
= &meta
->dtm_mops
; 
8451         provider 
= (dof_provider_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
8452         str_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
8453             provider
->dofpv_strtab 
* dof
->dofh_secsize
); 
8454         prb_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
8455             provider
->dofpv_probes 
* dof
->dofh_secsize
); 
8456         arg_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
8457             provider
->dofpv_prargs 
* dof
->dofh_secsize
); 
8458         off_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
8459             provider
->dofpv_proffs 
* dof
->dofh_secsize
); 
8461         strtab 
= (char *)(uintptr_t)(daddr 
+ str_sec
->dofs_offset
); 
8462         off 
= (uint32_t *)(uintptr_t)(daddr 
+ off_sec
->dofs_offset
); 
8463         arg 
= (uint8_t *)(uintptr_t)(daddr 
+ arg_sec
->dofs_offset
); 
8467          * See dtrace_helper_provider_validate(). 
8469         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1 
&& 
8470             provider
->dofpv_prenoffs 
!= DOF_SECT_NONE
) { 
8471                 enoff_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
8472                     provider
->dofpv_prenoffs 
* dof
->dofh_secsize
); 
8473                 enoff 
= (uint32_t *)(uintptr_t)(daddr 
+ enoff_sec
->dofs_offset
); 
8476         nprobes 
= prb_sec
->dofs_size 
/ prb_sec
->dofs_entsize
; 
8479          * Create the provider. 
8481         dtrace_dofprov2hprov(&dhpv
, provider
, strtab
); 
8483         if ((parg 
= mops
->dtms_provide_proc(meta
->dtm_arg
, &dhpv
, p
)) == NULL
) 
8489          * Create the probes. 
8491         for (i 
= 0; i 
< nprobes
; i
++) { 
8492                 probe 
= (dof_probe_t 
*)(uintptr_t)(daddr 
+ 
8493                     prb_sec
->dofs_offset 
+ i 
* prb_sec
->dofs_entsize
); 
8495                 dhpb
.dthpb_mod 
= dhp
->dofhp_mod
; 
8496                 dhpb
.dthpb_func 
= strtab 
+ probe
->dofpr_func
; 
8497                 dhpb
.dthpb_name 
= strtab 
+ probe
->dofpr_name
; 
8498 #if !defined(__APPLE__) 
8499                 dhpb
.dthpb_base 
= probe
->dofpr_addr
; 
8501                 dhpb
.dthpb_base 
= dhp
->dofhp_addr
; /* FIXME: James, why? */ 
8503                 dhpb
.dthpb_offs 
= (int32_t *)(off 
+ probe
->dofpr_offidx
); 
8504                 dhpb
.dthpb_noffs 
= probe
->dofpr_noffs
; 
8505                 if (enoff 
!= NULL
) { 
8506                         dhpb
.dthpb_enoffs 
= (int32_t *)(enoff 
+ probe
->dofpr_enoffidx
); 
8507                         dhpb
.dthpb_nenoffs 
= probe
->dofpr_nenoffs
; 
8509                         dhpb
.dthpb_enoffs 
= NULL
; 
8510                         dhpb
.dthpb_nenoffs 
= 0; 
8512                 dhpb
.dthpb_args 
= arg 
+ probe
->dofpr_argidx
; 
8513                 dhpb
.dthpb_nargc 
= probe
->dofpr_nargc
; 
8514                 dhpb
.dthpb_xargc 
= probe
->dofpr_xargc
; 
8515                 dhpb
.dthpb_ntypes 
= strtab 
+ probe
->dofpr_nargv
; 
8516                 dhpb
.dthpb_xtypes 
= strtab 
+ probe
->dofpr_xargv
; 
8518                 mops
->dtms_create_probe(meta
->dtm_arg
, parg
, &dhpb
); 
8522          * Since we just created probes, we need to match our enablings 
8523          * against those, with a precondition knowing that we have only 
8524          * added probes from this provider 
8526         char *prov_name 
= mops
->dtms_provider_name(parg
); 
8527         ASSERT(prov_name 
!= NULL
); 
8528         dtrace_match_cond_t cond 
= {dtrace_cond_provider_match
, (void*)prov_name
}; 
8530         dtrace_enabling_matchall_with_cond(&cond
); 
8534 dtrace_helper_provide(dof_helper_t 
*dhp
, proc_t 
*p
) 
8536         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
8537         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
8540         LCK_MTX_ASSERT(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
); 
8542         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
8543                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ 
8544                     dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
8546                 if (sec
->dofs_type 
!= DOF_SECT_PROVIDER
) 
8549                 dtrace_helper_provide_one(dhp
, sec
, p
); 
8554 dtrace_helper_provider_remove_one(dof_helper_t 
*dhp
, dof_sec_t 
*sec
, proc_t 
*p
) 
8556         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
8557         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
8559         dof_provider_t 
*provider
; 
8561         dtrace_helper_provdesc_t dhpv
; 
8562         dtrace_meta_t 
*meta 
= dtrace_meta_pid
; 
8563         dtrace_mops_t 
*mops 
= &meta
->dtm_mops
; 
8565         provider 
= (dof_provider_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
8566         str_sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ dof
->dofh_secoff 
+ 
8567             provider
->dofpv_strtab 
* dof
->dofh_secsize
); 
8569         strtab 
= (char *)(uintptr_t)(daddr 
+ str_sec
->dofs_offset
); 
8572          * Create the provider. 
8574         dtrace_dofprov2hprov(&dhpv
, provider
, strtab
); 
8576         mops
->dtms_remove_proc(meta
->dtm_arg
, &dhpv
, p
); 
8582 dtrace_helper_provider_remove(dof_helper_t 
*dhp
, proc_t 
*p
) 
8584         uintptr_t daddr 
= (uintptr_t)dhp
->dofhp_dof
; 
8585         dof_hdr_t 
*dof 
= (dof_hdr_t 
*)daddr
; 
8588         LCK_MTX_ASSERT(&dtrace_meta_lock
, LCK_MTX_ASSERT_OWNED
); 
8590         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
8591                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ 
8592                     dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
8594                 if (sec
->dofs_type 
!= DOF_SECT_PROVIDER
) 
8597                 dtrace_helper_provider_remove_one(dhp
, sec
, p
); 
8602  * DTrace Meta Provider-to-Framework API Functions 
8604  * These functions implement the Meta Provider-to-Framework API, as described 
8605  * in <sys/dtrace.h>. 
8608 dtrace_meta_register(const char *name
, const dtrace_mops_t 
*mops
, void *arg
, 
8609     dtrace_meta_provider_id_t 
*idp
) 
8611         dtrace_meta_t 
*meta
; 
8612         dtrace_helpers_t 
*help
, *next
; 
8615         *idp 
= DTRACE_METAPROVNONE
; 
8618          * We strictly don't need the name, but we hold onto it for 
8619          * debuggability. All hail error queues! 
8622                 cmn_err(CE_WARN
, "failed to register meta-provider: " 
8628             mops
->dtms_create_probe 
== NULL 
|| 
8629             mops
->dtms_provide_proc 
== NULL 
|| 
8630             mops
->dtms_remove_proc 
== NULL
) { 
8631                 cmn_err(CE_WARN
, "failed to register meta-register %s: " 
8632                     "invalid ops", name
); 
8636         meta 
= kmem_zalloc(sizeof (dtrace_meta_t
), KM_SLEEP
); 
8637         meta
->dtm_mops 
= *mops
; 
8639         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
8641         size_t bufsize 
= strlen(name
) + 1; 
8642         meta
->dtm_name 
= kmem_alloc(bufsize
, KM_SLEEP
); 
8643         (void) strlcpy(meta
->dtm_name
, name
, bufsize
); 
8646         meta
->dtm_arg 
= arg
; 
8648         lck_mtx_lock(&dtrace_meta_lock
); 
8649         lck_mtx_lock(&dtrace_lock
); 
8651         if (dtrace_meta_pid 
!= NULL
) { 
8652                 lck_mtx_unlock(&dtrace_lock
); 
8653                 lck_mtx_unlock(&dtrace_meta_lock
); 
8654                 cmn_err(CE_WARN
, "failed to register meta-register %s: " 
8655                     "user-land meta-provider exists", name
); 
8656                 kmem_free(meta
->dtm_name
, strlen(meta
->dtm_name
) + 1); 
8657                 kmem_free(meta
, sizeof (dtrace_meta_t
)); 
8661         dtrace_meta_pid 
= meta
; 
8662         *idp 
= (dtrace_meta_provider_id_t
)meta
; 
8665          * If there are providers and probes ready to go, pass them 
8666          * off to the new meta provider now. 
8669         help 
= dtrace_deferred_pid
; 
8670         dtrace_deferred_pid 
= NULL
; 
8672         lck_mtx_unlock(&dtrace_lock
); 
8674         while (help 
!= NULL
) { 
8675                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
8676                         proc_t 
*p 
= proc_find(help
->dthps_pid
); 
8679                         dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
, 
8684                 next 
= help
->dthps_next
; 
8685                 help
->dthps_next 
= NULL
; 
8686                 help
->dthps_prev 
= NULL
; 
8687                 help
->dthps_deferred 
= 0; 
8691         lck_mtx_unlock(&dtrace_meta_lock
); 
8697 dtrace_meta_unregister(dtrace_meta_provider_id_t id
) 
8699         dtrace_meta_t 
**pp
, *old 
= (dtrace_meta_t 
*)id
; 
8701         lck_mtx_lock(&dtrace_meta_lock
); 
8702         lck_mtx_lock(&dtrace_lock
); 
8704         if (old 
== dtrace_meta_pid
) { 
8705                 pp 
= &dtrace_meta_pid
; 
8707                 panic("attempt to unregister non-existent " 
8708                     "dtrace meta-provider %p\n", (void *)old
); 
8711         if (old
->dtm_count 
!= 0) { 
8712                 lck_mtx_unlock(&dtrace_lock
); 
8713                 lck_mtx_unlock(&dtrace_meta_lock
); 
8719         lck_mtx_unlock(&dtrace_lock
); 
8720         lck_mtx_unlock(&dtrace_meta_lock
); 
8722         kmem_free(old
->dtm_name
, strlen(old
->dtm_name
) + 1); 
8723         kmem_free(old
, sizeof (dtrace_meta_t
)); 
8730  * DTrace DIF Object Functions 
8733 dtrace_difo_err(uint_t pc
, const char *format
, ...) 
8735         if (dtrace_err_verbose
) { 
8738                 (void) uprintf("dtrace DIF object error: [%u]: ", pc
); 
8739                 va_start(alist
, format
); 
8740                 (void) vuprintf(format
, alist
); 
8744 #ifdef DTRACE_ERRDEBUG 
8745         dtrace_errdebug(format
); 
8751  * Validate a DTrace DIF object by checking the IR instructions.  The following 
8752  * rules are currently enforced by dtrace_difo_validate(): 
8754  * 1. Each instruction must have a valid opcode 
8755  * 2. Each register, string, variable, or subroutine reference must be valid 
8756  * 3. No instruction can modify register %r0 (must be zero) 
8757  * 4. All instruction reserved bits must be set to zero 
8758  * 5. The last instruction must be a "ret" instruction 
8759  * 6. All branch targets must reference a valid instruction _after_ the branch 
8762 dtrace_difo_validate(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
, uint_t nregs
, 
8768         int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
; 
8771         int maxglobal 
= -1, maxlocal 
= -1, maxtlocal 
= -1; 
8773         kcheckload 
= cr 
== NULL 
|| 
8774             (vstate
->dtvs_state
->dts_cred
.dcr_visible 
& DTRACE_CRV_KERNEL
) == 0; 
8776         dp
->dtdo_destructive 
= 0; 
8778         for (pc 
= 0; pc 
< dp
->dtdo_len 
&& err 
== 0; pc
++) { 
8779                 dif_instr_t instr 
= dp
->dtdo_buf
[pc
]; 
8781                 uint_t r1 
= DIF_INSTR_R1(instr
); 
8782                 uint_t r2 
= DIF_INSTR_R2(instr
); 
8783                 uint_t rd 
= DIF_INSTR_RD(instr
); 
8784                 uint_t rs 
= DIF_INSTR_RS(instr
); 
8785                 uint_t label 
= DIF_INSTR_LABEL(instr
); 
8786                 uint_t v 
= DIF_INSTR_VAR(instr
); 
8787                 uint_t subr 
= DIF_INSTR_SUBR(instr
); 
8788                 uint_t type 
= DIF_INSTR_TYPE(instr
); 
8789                 uint_t op 
= DIF_INSTR_OP(instr
); 
8807                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8809                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
8811                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8813                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8819                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8821                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8823                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8825                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8835                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8837                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8839                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8841                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8843                                 dp
->dtdo_buf
[pc
] = DIF_INSTR_LOAD(op 
+ 
8844                                     DIF_OP_RLDSB 
- DIF_OP_LDSB
, r1
, rd
); 
8854                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8856                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8858                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8860                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8870                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8872                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8874                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8876                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8883                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8885                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8887                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8889                                 err 
+= efunc(pc
, "cannot write to 0 address\n"); 
8894                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8896                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
8898                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8902                                 err 
+= efunc(pc
, "invalid register %u\n", r1
); 
8903                         if (r2 
!= 0 || rd 
!= 0) 
8904                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8917                         if (label 
>= dp
->dtdo_len
) { 
8918                                 err 
+= efunc(pc
, "invalid branch target %u\n", 
8922                                 err 
+= efunc(pc
, "backward branch to %u\n", 
8927                         if (r1 
!= 0 || r2 
!= 0) 
8928                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8930                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8934                 case DIF_OP_FLUSHTS
: 
8935                         if (r1 
!= 0 || r2 
!= 0 || rd 
!= 0) 
8936                                 err 
+= efunc(pc
, "non-zero reserved bits\n"); 
8939                         if (DIF_INSTR_INTEGER(instr
) >= dp
->dtdo_intlen
) { 
8940                                 err 
+= efunc(pc
, "invalid integer ref %u\n", 
8941                                     DIF_INSTR_INTEGER(instr
)); 
8944                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8946                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8949                         if (DIF_INSTR_STRING(instr
) >= dp
->dtdo_strlen
) { 
8950                                 err 
+= efunc(pc
, "invalid string ref %u\n", 
8951                                     DIF_INSTR_STRING(instr
)); 
8954                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8956                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8960                         if (r1 
> DIF_VAR_ARRAY_MAX
) 
8961                                 err 
+= efunc(pc
, "invalid array %u\n", r1
); 
8963                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
8965                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8967                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8974                         if (v 
< DIF_VAR_OTHER_MIN 
|| v 
> DIF_VAR_OTHER_MAX
) 
8975                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
8977                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8979                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
8986                         if (v 
< DIF_VAR_OTHER_UBASE 
|| v 
> DIF_VAR_OTHER_MAX
) 
8987                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
8989                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8992                         if (subr 
> DIF_SUBR_MAX 
&& 
8993                            !(subr 
>= DIF_SUBR_APPLE_MIN 
&& subr 
<= DIF_SUBR_APPLE_MAX
)) 
8994                                 err 
+= efunc(pc
, "invalid subr %u\n", subr
); 
8996                                 err 
+= efunc(pc
, "invalid register %u\n", rd
); 
8998                                 err 
+= efunc(pc
, "cannot write to %r0\n"); 
9000                         if (subr 
== DIF_SUBR_COPYOUT 
|| 
9001                             subr 
== DIF_SUBR_COPYOUTSTR 
|| 
9002                             subr 
== DIF_SUBR_KDEBUG_TRACE 
|| 
9003                             subr 
== DIF_SUBR_KDEBUG_TRACE_STRING
) { 
9004                                 dp
->dtdo_destructive 
= 1; 
9008                         if (type 
!= DIF_TYPE_STRING 
&& type 
!= DIF_TYPE_CTF
) 
9009                                 err 
+= efunc(pc
, "invalid ref type %u\n", type
); 
9011                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
9013                                 err 
+= efunc(pc
, "invalid register %u\n", rs
); 
9016                         if (type 
!= DIF_TYPE_CTF
) 
9017                                 err 
+= efunc(pc
, "invalid val type %u\n", type
); 
9019                                 err 
+= efunc(pc
, "invalid register %u\n", r2
); 
9021                                 err 
+= efunc(pc
, "invalid register %u\n", rs
); 
9024                         err 
+= efunc(pc
, "invalid opcode %u\n", 
9025                             DIF_INSTR_OP(instr
)); 
9029         if (dp
->dtdo_len 
!= 0 && 
9030             DIF_INSTR_OP(dp
->dtdo_buf
[dp
->dtdo_len 
- 1]) != DIF_OP_RET
) { 
9031                 err 
+= efunc(dp
->dtdo_len 
- 1, 
9032                     "expected 'ret' as last DIF instruction\n"); 
9035         if (!(dp
->dtdo_rtype
.dtdt_flags 
& (DIF_TF_BYREF 
| DIF_TF_BYUREF
))) { 
9037                  * If we're not returning by reference, the size must be either 
9038                  * 0 or the size of one of the base types. 
9040                 switch (dp
->dtdo_rtype
.dtdt_size
) { 
9042                 case sizeof (uint8_t): 
9043                 case sizeof (uint16_t): 
9044                 case sizeof (uint32_t): 
9045                 case sizeof (uint64_t): 
9049                         err 
+= efunc(dp
->dtdo_len 
- 1, "bad return size\n"); 
9053         for (i 
= 0; i 
< dp
->dtdo_varlen 
&& err 
== 0; i
++) { 
9054                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
], *existing 
= NULL
; 
9055                 dtrace_diftype_t 
*vt
, *et
; 
9059                 if (v
->dtdv_scope 
!= DIFV_SCOPE_GLOBAL 
&& 
9060                     v
->dtdv_scope 
!= DIFV_SCOPE_THREAD 
&& 
9061                     v
->dtdv_scope 
!= DIFV_SCOPE_LOCAL
) { 
9062                         err 
+= efunc(i
, "unrecognized variable scope %d\n", 
9067                 if (v
->dtdv_kind 
!= DIFV_KIND_ARRAY 
&& 
9068                     v
->dtdv_kind 
!= DIFV_KIND_SCALAR
) { 
9069                         err 
+= efunc(i
, "unrecognized variable type %d\n", 
9074                 if ((id 
= v
->dtdv_id
) > DIF_VARIABLE_MAX
) { 
9075                         err 
+= efunc(i
, "%d exceeds variable id limit\n", id
); 
9079                 if (id 
< DIF_VAR_OTHER_UBASE
) 
9083                  * For user-defined variables, we need to check that this 
9084                  * definition is identical to any previous definition that we 
9087                 ndx 
= id 
- DIF_VAR_OTHER_UBASE
; 
9089                 switch (v
->dtdv_scope
) { 
9090                 case DIFV_SCOPE_GLOBAL
: 
9091                         if (maxglobal 
== -1 || ndx 
> maxglobal
) 
9094                         if (ndx 
< vstate
->dtvs_nglobals
) { 
9095                                 dtrace_statvar_t 
*svar
; 
9097                                 if ((svar 
= vstate
->dtvs_globals
[ndx
]) != NULL
) 
9098                                         existing 
= &svar
->dtsv_var
; 
9103                 case DIFV_SCOPE_THREAD
: 
9104                         if (maxtlocal 
== -1 || ndx 
> maxtlocal
) 
9107                         if (ndx 
< vstate
->dtvs_ntlocals
) 
9108                                 existing 
= &vstate
->dtvs_tlocals
[ndx
]; 
9111                 case DIFV_SCOPE_LOCAL
: 
9112                         if (maxlocal 
== -1 || ndx 
> maxlocal
) 
9114                         if (ndx 
< vstate
->dtvs_nlocals
) { 
9115                                 dtrace_statvar_t 
*svar
; 
9117                                 if ((svar 
= vstate
->dtvs_locals
[ndx
]) != NULL
) 
9118                                         existing 
= &svar
->dtsv_var
; 
9126                 if (vt
->dtdt_flags 
& DIF_TF_BYREF
) { 
9127                         if (vt
->dtdt_size 
== 0) { 
9128                                 err 
+= efunc(i
, "zero-sized variable\n"); 
9132                         if ((v
->dtdv_scope 
== DIFV_SCOPE_GLOBAL 
|| 
9133                             v
->dtdv_scope 
== DIFV_SCOPE_LOCAL
) && 
9134                             vt
->dtdt_size 
> dtrace_statvar_maxsize
) { 
9135                                 err 
+= efunc(i
, "oversized by-ref static\n"); 
9140                 if (existing 
== NULL 
|| existing
->dtdv_id 
== 0) 
9143                 ASSERT(existing
->dtdv_id 
== v
->dtdv_id
); 
9144                 ASSERT(existing
->dtdv_scope 
== v
->dtdv_scope
); 
9146                 if (existing
->dtdv_kind 
!= v
->dtdv_kind
) 
9147                         err 
+= efunc(i
, "%d changed variable kind\n", id
); 
9149                 et 
= &existing
->dtdv_type
; 
9151                 if (vt
->dtdt_flags 
!= et
->dtdt_flags
) { 
9152                         err 
+= efunc(i
, "%d changed variable type flags\n", id
); 
9156                 if (vt
->dtdt_size 
!= 0 && vt
->dtdt_size 
!= et
->dtdt_size
) { 
9157                         err 
+= efunc(i
, "%d changed variable type size\n", id
); 
9162         for (pc 
= 0; pc 
< dp
->dtdo_len 
&& err 
== 0; pc
++) { 
9163                 dif_instr_t instr 
= dp
->dtdo_buf
[pc
]; 
9165                 uint_t v 
= DIF_INSTR_VAR(instr
); 
9166                 uint_t op 
= DIF_INSTR_OP(instr
); 
9173                         if (v 
> (uint_t
)(DIF_VAR_OTHER_UBASE 
+ maxglobal
)) 
9174                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
9180                         if (v 
> (uint_t
)(DIF_VAR_OTHER_UBASE 
+ maxtlocal
)) 
9181                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
9185                         if (v 
> (uint_t
)(DIF_VAR_OTHER_UBASE 
+ maxlocal
)) 
9186                                 err 
+= efunc(pc
, "invalid variable %u\n", v
); 
9197  * Validate a DTrace DIF object that it is to be used as a helper.  Helpers 
9198  * are much more constrained than normal DIFOs.  Specifically, they may 
9201  * 1. Make calls to subroutines other than copyin(), copyinstr() or 
9202  *    miscellaneous string routines 
9203  * 2. Access DTrace variables other than the args[] array, and the 
9204  *    curthread, pid, ppid, tid, execname, zonename, uid and gid variables. 
9205  * 3. Have thread-local variables. 
9206  * 4. Have dynamic variables. 
9209 dtrace_difo_validate_helper(dtrace_difo_t 
*dp
) 
9211         int (*efunc
)(uint_t pc
, const char *, ...) = dtrace_difo_err
; 
9215         for (pc 
= 0; pc 
< dp
->dtdo_len
; pc
++) { 
9216                 dif_instr_t instr 
= dp
->dtdo_buf
[pc
]; 
9218                 uint_t v 
= DIF_INSTR_VAR(instr
); 
9219                 uint_t subr 
= DIF_INSTR_SUBR(instr
); 
9220                 uint_t op 
= DIF_INSTR_OP(instr
); 
9275                 case DIF_OP_FLUSHTS
: 
9287                         if (v 
>= DIF_VAR_OTHER_UBASE
) 
9290                         if (v 
>= DIF_VAR_ARG0 
&& v 
<= DIF_VAR_ARG9
) 
9293                         if (v 
== DIF_VAR_CURTHREAD 
|| v 
== DIF_VAR_PID 
|| 
9294                             v 
== DIF_VAR_PPID 
|| v 
== DIF_VAR_TID 
|| 
9295                             v 
== DIF_VAR_EXECNAME 
|| v 
== DIF_VAR_ZONENAME 
|| 
9296                             v 
== DIF_VAR_UID 
|| v 
== DIF_VAR_GID
) 
9299                         err 
+= efunc(pc
, "illegal variable %u\n", v
); 
9306                         err 
+= efunc(pc
, "illegal dynamic variable load\n"); 
9312                         err 
+= efunc(pc
, "illegal dynamic variable store\n"); 
9316                         if (subr 
== DIF_SUBR_ALLOCA 
|| 
9317                             subr 
== DIF_SUBR_BCOPY 
|| 
9318                             subr 
== DIF_SUBR_COPYIN 
|| 
9319                             subr 
== DIF_SUBR_COPYINTO 
|| 
9320                             subr 
== DIF_SUBR_COPYINSTR 
|| 
9321                             subr 
== DIF_SUBR_INDEX 
|| 
9322                             subr 
== DIF_SUBR_INET_NTOA 
|| 
9323                             subr 
== DIF_SUBR_INET_NTOA6 
|| 
9324                             subr 
== DIF_SUBR_INET_NTOP 
|| 
9325                             subr 
== DIF_SUBR_LLTOSTR 
|| 
9326                             subr 
== DIF_SUBR_RINDEX 
|| 
9327                             subr 
== DIF_SUBR_STRCHR 
|| 
9328                             subr 
== DIF_SUBR_STRJOIN 
|| 
9329                             subr 
== DIF_SUBR_STRRCHR 
|| 
9330                             subr 
== DIF_SUBR_STRSTR 
|| 
9331                             subr 
== DIF_SUBR_KDEBUG_TRACE 
|| 
9332                             subr 
== DIF_SUBR_KDEBUG_TRACE_STRING 
|| 
9333                             subr 
== DIF_SUBR_HTONS 
|| 
9334                             subr 
== DIF_SUBR_HTONL 
|| 
9335                             subr 
== DIF_SUBR_HTONLL 
|| 
9336                             subr 
== DIF_SUBR_NTOHS 
|| 
9337                             subr 
== DIF_SUBR_NTOHL 
|| 
9338                             subr 
== DIF_SUBR_NTOHLL
) 
9341                         err 
+= efunc(pc
, "invalid subr %u\n", subr
); 
9345                         err 
+= efunc(pc
, "invalid opcode %u\n", 
9346                             DIF_INSTR_OP(instr
)); 
9354  * Returns 1 if the expression in the DIF object can be cached on a per-thread 
9358 dtrace_difo_cacheable(dtrace_difo_t 
*dp
) 
9365         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
9366                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
9368                 if (v
->dtdv_scope 
!= DIFV_SCOPE_GLOBAL
) 
9371                 switch (v
->dtdv_id
) { 
9372                 case DIF_VAR_CURTHREAD
: 
9375                 case DIF_VAR_EXECNAME
: 
9376                 case DIF_VAR_ZONENAME
: 
9385          * This DIF object may be cacheable.  Now we need to look for any 
9386          * array loading instructions, any memory loading instructions, or 
9387          * any stores to thread-local variables. 
9389         for (i 
= 0; i 
< dp
->dtdo_len
; i
++) { 
9390                 uint_t op 
= DIF_INSTR_OP(dp
->dtdo_buf
[i
]); 
9392                 if ((op 
>= DIF_OP_LDSB 
&& op 
<= DIF_OP_LDX
) || 
9393                     (op 
>= DIF_OP_ULDSB 
&& op 
<= DIF_OP_ULDX
) || 
9394                     (op 
>= DIF_OP_RLDSB 
&& op 
<= DIF_OP_RLDX
) || 
9395                     op 
== DIF_OP_LDGA 
|| op 
== DIF_OP_STTS
) 
9403 dtrace_difo_hold(dtrace_difo_t 
*dp
) 
9407         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9410         ASSERT(dp
->dtdo_refcnt 
!= 0); 
9413          * We need to check this DIF object for references to the variable 
9414          * DIF_VAR_VTIMESTAMP. 
9416         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
9417                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
9419                 if (v
->dtdv_id 
!= DIF_VAR_VTIMESTAMP
) 
9422                 if (dtrace_vtime_references
++ == 0) 
9423                         dtrace_vtime_enable(); 
9428  * This routine calculates the dynamic variable chunksize for a given DIF 
9429  * object.  The calculation is not fool-proof, and can probably be tricked by 
9430  * malicious DIF -- but it works for all compiler-generated DIF.  Because this 
9431  * calculation is likely imperfect, dtrace_dynvar() is able to gracefully fail 
9432  * if a dynamic variable size exceeds the chunksize. 
9435 dtrace_difo_chunksize(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
9438         dtrace_key_t tupregs
[DIF_DTR_NREGS 
+ 2]; /* +2 for thread and id */ 
9439         const dif_instr_t 
*text 
= dp
->dtdo_buf
; 
9445         for (pc 
= 0; pc 
< dp
->dtdo_len
; pc
++) { 
9446                 dif_instr_t instr 
= text
[pc
]; 
9447                 uint_t op 
= DIF_INSTR_OP(instr
); 
9448                 uint_t rd 
= DIF_INSTR_RD(instr
); 
9449                 uint_t r1 
= DIF_INSTR_R1(instr
); 
9453                 dtrace_key_t 
*key 
= tupregs
; 
9457                         sval 
= dp
->dtdo_inttab
[DIF_INSTR_INTEGER(instr
)]; 
9462                         key 
= &tupregs
[DIF_DTR_NREGS
]; 
9463                         key
[0].dttk_size 
= 0; 
9464                         key
[1].dttk_size 
= 0; 
9466                         scope 
= DIFV_SCOPE_THREAD
; 
9473                         if (DIF_INSTR_OP(instr
) == DIF_OP_STTAA
) 
9474                                 key
[nkeys
++].dttk_size 
= 0; 
9476                         key
[nkeys
++].dttk_size 
= 0; 
9478                         if (op 
== DIF_OP_STTAA
) { 
9479                                 scope 
= DIFV_SCOPE_THREAD
; 
9481                                 scope 
= DIFV_SCOPE_GLOBAL
; 
9487                         if (ttop 
== DIF_DTR_NREGS
) 
9490                         if ((srd 
== 0 || sval 
== 0) && r1 
== DIF_TYPE_STRING
) { 
9492                                  * If the register for the size of the "pushtr" 
9493                                  * is %r0 (or the value is 0) and the type is 
9494                                  * a string, we'll use the system-wide default 
9497                                 tupregs
[ttop
++].dttk_size 
= 
9498                                     dtrace_strsize_default
; 
9503                                 if (sval 
> LONG_MAX
) 
9506                                 tupregs
[ttop
++].dttk_size 
= sval
; 
9512                         if (ttop 
== DIF_DTR_NREGS
) 
9515                         tupregs
[ttop
++].dttk_size 
= 0; 
9518                 case DIF_OP_FLUSHTS
: 
9535                  * We have a dynamic variable allocation; calculate its size. 
9537                 for (ksize 
= 0, i 
= 0; i 
< nkeys
; i
++) 
9538                         ksize 
+= P2ROUNDUP(key
[i
].dttk_size
, sizeof (uint64_t)); 
9540                 size 
= sizeof (dtrace_dynvar_t
); 
9541                 size 
+= sizeof (dtrace_key_t
) * (nkeys 
- 1); 
9545                  * Now we need to determine the size of the stored data. 
9547                 id 
= DIF_INSTR_VAR(instr
); 
9549                 for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
9550                         dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
9552                         if (v
->dtdv_id 
== id 
&& v
->dtdv_scope 
== scope
) { 
9553                                 size 
+= v
->dtdv_type
.dtdt_size
; 
9558                 if (i 
== dp
->dtdo_varlen
) 
9562                  * We have the size.  If this is larger than the chunk size 
9563                  * for our dynamic variable state, reset the chunk size. 
9565                 size 
= P2ROUNDUP(size
, sizeof (uint64_t)); 
9568                  * Before setting the chunk size, check that we're not going 
9569                  * to set it to a negative value... 
9571                 if (size 
> LONG_MAX
) 
9575                  * ...and make certain that we didn't badly overflow. 
9577                 if (size 
< ksize 
|| size 
< sizeof (dtrace_dynvar_t
)) 
9580                 if (size 
> vstate
->dtvs_dynvars
.dtds_chunksize
) 
9581                         vstate
->dtvs_dynvars
.dtds_chunksize 
= size
; 
9586 dtrace_difo_init(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
9588         int oldsvars
, osz
, nsz
, otlocals
, ntlocals
; 
9591         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9592         ASSERT(dp
->dtdo_buf 
!= NULL 
&& dp
->dtdo_len 
!= 0); 
9594         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
9595                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
9596                 dtrace_statvar_t 
*svar
; 
9597                 dtrace_statvar_t 
***svarp 
= NULL
; 
9599                 uint8_t scope 
= v
->dtdv_scope
; 
9600                 int *np 
= (int *)NULL
; 
9602                 if ((id 
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
) 
9605                 id 
-= DIF_VAR_OTHER_UBASE
; 
9608                 case DIFV_SCOPE_THREAD
: 
9609                         while (id 
>= (uint_t
)(otlocals 
= vstate
->dtvs_ntlocals
)) { 
9610                                 dtrace_difv_t 
*tlocals
; 
9612                                 if ((ntlocals 
= (otlocals 
<< 1)) == 0) 
9615                                 osz 
= otlocals 
* sizeof (dtrace_difv_t
); 
9616                                 nsz 
= ntlocals 
* sizeof (dtrace_difv_t
); 
9618                                 tlocals 
= kmem_zalloc(nsz
, KM_SLEEP
); 
9621                                         bcopy(vstate
->dtvs_tlocals
, 
9623                                         kmem_free(vstate
->dtvs_tlocals
, osz
); 
9626                                 vstate
->dtvs_tlocals 
= tlocals
; 
9627                                 vstate
->dtvs_ntlocals 
= ntlocals
; 
9630                         vstate
->dtvs_tlocals
[id
] = *v
; 
9633                 case DIFV_SCOPE_LOCAL
: 
9634                         np 
= &vstate
->dtvs_nlocals
; 
9635                         svarp 
= &vstate
->dtvs_locals
; 
9637                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) 
9638                                 dsize 
= (int)NCPU 
* (v
->dtdv_type
.dtdt_size 
+ 
9641                                 dsize 
= (int)NCPU 
* sizeof (uint64_t); 
9645                 case DIFV_SCOPE_GLOBAL
: 
9646                         np 
= &vstate
->dtvs_nglobals
; 
9647                         svarp 
= &vstate
->dtvs_globals
; 
9649                         if (v
->dtdv_type
.dtdt_flags 
& DIF_TF_BYREF
) 
9650                                 dsize 
= v
->dtdv_type
.dtdt_size 
+ 
9659                 while (id 
>= (uint_t
)(oldsvars 
= *np
)) { 
9660                         dtrace_statvar_t 
**statics
; 
9661                         int newsvars
, oldsize
, newsize
; 
9663                         if ((newsvars 
= (oldsvars 
<< 1)) == 0) 
9666                         oldsize 
= oldsvars 
* sizeof (dtrace_statvar_t 
*); 
9667                         newsize 
= newsvars 
* sizeof (dtrace_statvar_t 
*); 
9669                         statics 
= kmem_zalloc(newsize
, KM_SLEEP
); 
9672                                 bcopy(*svarp
, statics
, oldsize
); 
9673                                 kmem_free(*svarp
, oldsize
); 
9680                 if ((svar 
= (*svarp
)[id
]) == NULL
) { 
9681                         svar 
= kmem_zalloc(sizeof (dtrace_statvar_t
), KM_SLEEP
); 
9682                         svar
->dtsv_var 
= *v
; 
9684                         if ((svar
->dtsv_size 
= dsize
) != 0) { 
9685                                 svar
->dtsv_data 
= (uint64_t)(uintptr_t) 
9686                                     kmem_zalloc(dsize
, KM_SLEEP
); 
9689                         (*svarp
)[id
] = svar
; 
9692                 svar
->dtsv_refcnt
++; 
9695         dtrace_difo_chunksize(dp
, vstate
); 
9696         dtrace_difo_hold(dp
); 
9699 static dtrace_difo_t 
* 
9700 dtrace_difo_duplicate(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
9705         ASSERT(dp
->dtdo_buf 
!= NULL
); 
9706         ASSERT(dp
->dtdo_refcnt 
!= 0); 
9708         new = kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
); 
9710         ASSERT(dp
->dtdo_buf 
!= NULL
); 
9711         sz 
= dp
->dtdo_len 
* sizeof (dif_instr_t
); 
9712         new->dtdo_buf 
= kmem_alloc(sz
, KM_SLEEP
); 
9713         bcopy(dp
->dtdo_buf
, new->dtdo_buf
, sz
); 
9714         new->dtdo_len 
= dp
->dtdo_len
; 
9716         if (dp
->dtdo_strtab 
!= NULL
) { 
9717                 ASSERT(dp
->dtdo_strlen 
!= 0); 
9718                 new->dtdo_strtab 
= kmem_alloc(dp
->dtdo_strlen
, KM_SLEEP
); 
9719                 bcopy(dp
->dtdo_strtab
, new->dtdo_strtab
, dp
->dtdo_strlen
); 
9720                 new->dtdo_strlen 
= dp
->dtdo_strlen
; 
9723         if (dp
->dtdo_inttab 
!= NULL
) { 
9724                 ASSERT(dp
->dtdo_intlen 
!= 0); 
9725                 sz 
= dp
->dtdo_intlen 
* sizeof (uint64_t); 
9726                 new->dtdo_inttab 
= kmem_alloc(sz
, KM_SLEEP
); 
9727                 bcopy(dp
->dtdo_inttab
, new->dtdo_inttab
, sz
); 
9728                 new->dtdo_intlen 
= dp
->dtdo_intlen
; 
9731         if (dp
->dtdo_vartab 
!= NULL
) { 
9732                 ASSERT(dp
->dtdo_varlen 
!= 0); 
9733                 sz 
= dp
->dtdo_varlen 
* sizeof (dtrace_difv_t
); 
9734                 new->dtdo_vartab 
= kmem_alloc(sz
, KM_SLEEP
); 
9735                 bcopy(dp
->dtdo_vartab
, new->dtdo_vartab
, sz
); 
9736                 new->dtdo_varlen 
= dp
->dtdo_varlen
; 
9739         dtrace_difo_init(new, vstate
); 
9744 dtrace_difo_destroy(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
9748         ASSERT(dp
->dtdo_refcnt 
== 0); 
9750         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
9751                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
9752                 dtrace_statvar_t 
*svar
; 
9753                 dtrace_statvar_t 
**svarp 
= NULL
; 
9755                 uint8_t scope 
= v
->dtdv_scope
; 
9759                 case DIFV_SCOPE_THREAD
: 
9762                 case DIFV_SCOPE_LOCAL
: 
9763                         np 
= &vstate
->dtvs_nlocals
; 
9764                         svarp 
= vstate
->dtvs_locals
; 
9767                 case DIFV_SCOPE_GLOBAL
: 
9768                         np 
= &vstate
->dtvs_nglobals
; 
9769                         svarp 
= vstate
->dtvs_globals
; 
9776                 if ((id 
= v
->dtdv_id
) < DIF_VAR_OTHER_UBASE
) 
9779                 id 
-= DIF_VAR_OTHER_UBASE
; 
9781                 ASSERT(id 
< (uint_t
)*np
); 
9784                 ASSERT(svar 
!= NULL
); 
9785                 ASSERT(svar
->dtsv_refcnt 
> 0); 
9787                 if (--svar
->dtsv_refcnt 
> 0) 
9790                 if (svar
->dtsv_size 
!= 0) { 
9791                         ASSERT(svar
->dtsv_data 
!= 0); 
9792                         kmem_free((void *)(uintptr_t)svar
->dtsv_data
, 
9796                 kmem_free(svar
, sizeof (dtrace_statvar_t
)); 
9800         kmem_free(dp
->dtdo_buf
, dp
->dtdo_len 
* sizeof (dif_instr_t
)); 
9801         kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen 
* sizeof (uint64_t)); 
9802         kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
); 
9803         kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen 
* sizeof (dtrace_difv_t
)); 
9805         kmem_free(dp
, sizeof (dtrace_difo_t
)); 
9809 dtrace_difo_release(dtrace_difo_t 
*dp
, dtrace_vstate_t 
*vstate
) 
9813         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9814         ASSERT(dp
->dtdo_refcnt 
!= 0); 
9816         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
9817                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
9819                 if (v
->dtdv_id 
!= DIF_VAR_VTIMESTAMP
) 
9822                 ASSERT(dtrace_vtime_references 
> 0); 
9823                 if (--dtrace_vtime_references 
== 0) 
9824                         dtrace_vtime_disable(); 
9827         if (--dp
->dtdo_refcnt 
== 0) 
9828                 dtrace_difo_destroy(dp
, vstate
); 
9832  * DTrace Format Functions 
9835 dtrace_format_add(dtrace_state_t 
*state
, char *str
) 
9838         uint16_t ndx
, len 
= strlen(str
) + 1; 
9840         fmt 
= kmem_zalloc(len
, KM_SLEEP
); 
9841         bcopy(str
, fmt
, len
); 
9843         for (ndx 
= 0; ndx 
< state
->dts_nformats
; ndx
++) { 
9844                 if (state
->dts_formats
[ndx
] == NULL
) { 
9845                         state
->dts_formats
[ndx
] = fmt
; 
9850         if (state
->dts_nformats 
== USHRT_MAX
) { 
9852                  * This is only likely if a denial-of-service attack is being 
9853                  * attempted.  As such, it's okay to fail silently here. 
9855                 kmem_free(fmt
, len
); 
9860          * For simplicity, we always resize the formats array to be exactly the 
9861          * number of formats. 
9863         ndx 
= state
->dts_nformats
++; 
9864         new = kmem_alloc((ndx 
+ 1) * sizeof (char *), KM_SLEEP
); 
9866         if (state
->dts_formats 
!= NULL
) { 
9868                 bcopy(state
->dts_formats
, new, ndx 
* sizeof (char *)); 
9869                 kmem_free(state
->dts_formats
, ndx 
* sizeof (char *)); 
9872         state
->dts_formats 
= new; 
9873         state
->dts_formats
[ndx
] = fmt
; 
9879 dtrace_format_remove(dtrace_state_t 
*state
, uint16_t format
) 
9883         ASSERT(state
->dts_formats 
!= NULL
); 
9884         ASSERT(format 
<= state
->dts_nformats
); 
9885         ASSERT(state
->dts_formats
[format 
- 1] != NULL
); 
9887         fmt 
= state
->dts_formats
[format 
- 1]; 
9888         kmem_free(fmt
, strlen(fmt
) + 1); 
9889         state
->dts_formats
[format 
- 1] = NULL
; 
9893 dtrace_format_destroy(dtrace_state_t 
*state
) 
9897         if (state
->dts_nformats 
== 0) { 
9898                 ASSERT(state
->dts_formats 
== NULL
); 
9902         ASSERT(state
->dts_formats 
!= NULL
); 
9904         for (i 
= 0; i 
< state
->dts_nformats
; i
++) { 
9905                 char *fmt 
= state
->dts_formats
[i
]; 
9910                 kmem_free(fmt
, strlen(fmt
) + 1); 
9913         kmem_free(state
->dts_formats
, state
->dts_nformats 
* sizeof (char *)); 
9914         state
->dts_nformats 
= 0; 
9915         state
->dts_formats 
= NULL
; 
9919  * DTrace Predicate Functions 
9921 static dtrace_predicate_t 
* 
9922 dtrace_predicate_create(dtrace_difo_t 
*dp
) 
9924         dtrace_predicate_t 
*pred
; 
9926         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9927         ASSERT(dp
->dtdo_refcnt 
!= 0); 
9929         pred 
= kmem_zalloc(sizeof (dtrace_predicate_t
), KM_SLEEP
); 
9930         pred
->dtp_difo 
= dp
; 
9931         pred
->dtp_refcnt 
= 1; 
9933         if (!dtrace_difo_cacheable(dp
)) 
9936         if (dtrace_predcache_id 
== DTRACE_CACHEIDNONE
) { 
9938                  * This is only theoretically possible -- we have had 2^32 
9939                  * cacheable predicates on this machine.  We cannot allow any 
9940                  * more predicates to become cacheable:  as unlikely as it is, 
9941                  * there may be a thread caching a (now stale) predicate cache 
9942                  * ID. (N.B.: the temptation is being successfully resisted to 
9943                  * have this cmn_err() "Holy shit -- we executed this code!") 
9948         pred
->dtp_cacheid 
= dtrace_predcache_id
++; 
9954 dtrace_predicate_hold(dtrace_predicate_t 
*pred
) 
9956         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9957         ASSERT(pred
->dtp_difo 
!= NULL 
&& pred
->dtp_difo
->dtdo_refcnt 
!= 0); 
9958         ASSERT(pred
->dtp_refcnt 
> 0); 
9964 dtrace_predicate_release(dtrace_predicate_t 
*pred
, dtrace_vstate_t 
*vstate
) 
9966         dtrace_difo_t 
*dp 
= pred
->dtp_difo
; 
9967 #pragma unused(dp) /* __APPLE__ */ 
9969         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
9970         ASSERT(dp 
!= NULL 
&& dp
->dtdo_refcnt 
!= 0); 
9971         ASSERT(pred
->dtp_refcnt 
> 0); 
9973         if (--pred
->dtp_refcnt 
== 0) { 
9974                 dtrace_difo_release(pred
->dtp_difo
, vstate
); 
9975                 kmem_free(pred
, sizeof (dtrace_predicate_t
)); 
9980  * DTrace Action Description Functions 
9982 static dtrace_actdesc_t 
* 
9983 dtrace_actdesc_create(dtrace_actkind_t kind
, uint32_t ntuple
, 
9984     uint64_t uarg
, uint64_t arg
) 
9986         dtrace_actdesc_t 
*act
; 
9988         ASSERT(!DTRACEACT_ISPRINTFLIKE(kind
) || (arg 
!= 0 && 
9989             arg 
>= KERNELBASE
) || (arg 
== 0 && kind 
== DTRACEACT_PRINTA
)); 
9991         act 
= kmem_zalloc(sizeof (dtrace_actdesc_t
), KM_SLEEP
); 
9992         act
->dtad_kind 
= kind
; 
9993         act
->dtad_ntuple 
= ntuple
; 
9994         act
->dtad_uarg 
= uarg
; 
9995         act
->dtad_arg 
= arg
; 
9996         act
->dtad_refcnt 
= 1; 
10002 dtrace_actdesc_hold(dtrace_actdesc_t 
*act
) 
10004         ASSERT(act
->dtad_refcnt 
>= 1); 
10005         act
->dtad_refcnt
++; 
10009 dtrace_actdesc_release(dtrace_actdesc_t 
*act
, dtrace_vstate_t 
*vstate
) 
10011         dtrace_actkind_t kind 
= act
->dtad_kind
; 
10014         ASSERT(act
->dtad_refcnt 
>= 1); 
10016         if (--act
->dtad_refcnt 
!= 0) 
10019         if ((dp 
= act
->dtad_difo
) != NULL
) 
10020                 dtrace_difo_release(dp
, vstate
); 
10022         if (DTRACEACT_ISPRINTFLIKE(kind
)) { 
10023                 char *str 
= (char *)(uintptr_t)act
->dtad_arg
; 
10025                 ASSERT((str 
!= NULL 
&& (uintptr_t)str 
>= KERNELBASE
) || 
10026                     (str 
== NULL 
&& act
->dtad_kind 
== DTRACEACT_PRINTA
)); 
10029                         kmem_free(str
, strlen(str
) + 1); 
10032         kmem_free(act
, sizeof (dtrace_actdesc_t
)); 
10036  * DTrace ECB Functions 
10038 static dtrace_ecb_t 
* 
10039 dtrace_ecb_add(dtrace_state_t 
*state
, dtrace_probe_t 
*probe
) 
10042         dtrace_epid_t epid
; 
10044         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10046         ecb 
= kmem_zalloc(sizeof (dtrace_ecb_t
), KM_SLEEP
); 
10047         ecb
->dte_predicate 
= NULL
; 
10048         ecb
->dte_probe 
= probe
; 
10051          * The default size is the size of the default action: recording 
10054         ecb
->dte_size 
= ecb
->dte_needed 
= sizeof (dtrace_rechdr_t
); 
10055         ecb
->dte_alignment 
= sizeof (dtrace_epid_t
); 
10057         epid 
= state
->dts_epid
++; 
10059         if (epid 
- 1 >= (dtrace_epid_t
)state
->dts_necbs
) { 
10060                 dtrace_ecb_t 
**oecbs 
= state
->dts_ecbs
, **ecbs
; 
10061                 int necbs 
= state
->dts_necbs 
<< 1; 
10063                 ASSERT(epid 
== (dtrace_epid_t
)state
->dts_necbs 
+ 1); 
10066                         ASSERT(oecbs 
== NULL
); 
10070                 ecbs 
= kmem_zalloc(necbs 
* sizeof (*ecbs
), KM_SLEEP
); 
10073                         bcopy(oecbs
, ecbs
, state
->dts_necbs 
* sizeof (*ecbs
)); 
10075                 dtrace_membar_producer(); 
10076                 state
->dts_ecbs 
= ecbs
; 
10078                 if (oecbs 
!= NULL
) { 
10080                          * If this state is active, we must dtrace_sync() 
10081                          * before we can free the old dts_ecbs array:  we're 
10082                          * coming in hot, and there may be active ring 
10083                          * buffer processing (which indexes into the dts_ecbs 
10084                          * array) on another CPU. 
10086                         if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) 
10089                         kmem_free(oecbs
, state
->dts_necbs 
* sizeof (*ecbs
)); 
10092                 dtrace_membar_producer(); 
10093                 state
->dts_necbs 
= necbs
; 
10096         ecb
->dte_state 
= state
; 
10098         ASSERT(state
->dts_ecbs
[epid 
- 1] == NULL
); 
10099         dtrace_membar_producer(); 
10100         state
->dts_ecbs
[(ecb
->dte_epid 
= epid
) - 1] = ecb
; 
10106 dtrace_ecb_enable(dtrace_ecb_t 
*ecb
) 
10108         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
10110         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
10111         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10112         ASSERT(ecb
->dte_next 
== NULL
); 
10114         if (probe 
== NULL
) { 
10116                  * This is the NULL probe -- there's nothing to do. 
10121         probe
->dtpr_provider
->dtpv_ecb_count
++; 
10122         if (probe
->dtpr_ecb 
== NULL
) { 
10123                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
10126                  * We're the first ECB on this probe. 
10128                 probe
->dtpr_ecb 
= probe
->dtpr_ecb_last 
= ecb
; 
10130                 if (ecb
->dte_predicate 
!= NULL
) 
10131                         probe
->dtpr_predcache 
= ecb
->dte_predicate
->dtp_cacheid
; 
10133                 return (prov
->dtpv_pops
.dtps_enable(prov
->dtpv_arg
, 
10134                     probe
->dtpr_id
, probe
->dtpr_arg
)); 
10137                  * This probe is already active.  Swing the last pointer to 
10138                  * point to the new ECB, and issue a dtrace_sync() to assure 
10139                  * that all CPUs have seen the change. 
10141                 ASSERT(probe
->dtpr_ecb_last 
!= NULL
); 
10142                 probe
->dtpr_ecb_last
->dte_next 
= ecb
; 
10143                 probe
->dtpr_ecb_last 
= ecb
; 
10144                 probe
->dtpr_predcache 
= 0; 
10152 dtrace_ecb_resize(dtrace_ecb_t 
*ecb
) 
10154         dtrace_action_t 
*act
; 
10155         uint32_t curneeded 
= UINT32_MAX
; 
10156         uint32_t aggbase 
= UINT32_MAX
; 
10159          * If we record anything, we always record the dtrace_rechdr_t.  (And 
10160          * we always record it first.) 
10162         ecb
->dte_size 
= sizeof (dtrace_rechdr_t
); 
10163         ecb
->dte_alignment 
= sizeof (dtrace_epid_t
); 
10165         for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
10166                 dtrace_recdesc_t 
*rec 
= &act
->dta_rec
; 
10167                 ASSERT(rec
->dtrd_size 
> 0 || rec
->dtrd_alignment 
== 1); 
10169                 ecb
->dte_alignment 
= MAX(ecb
->dte_alignment
, rec
->dtrd_alignment
); 
10171                 if (DTRACEACT_ISAGG(act
->dta_kind
)) { 
10172                         dtrace_aggregation_t 
*agg 
= (dtrace_aggregation_t 
*)act
; 
10174                         ASSERT(rec
->dtrd_size 
!= 0); 
10175                         ASSERT(agg
->dtag_first 
!= NULL
); 
10176                         ASSERT(act
->dta_prev
->dta_intuple
); 
10177                         ASSERT(aggbase 
!= UINT32_MAX
); 
10178                         ASSERT(curneeded 
!= UINT32_MAX
); 
10180                         agg
->dtag_base 
= aggbase
; 
10181                         curneeded 
= P2ROUNDUP(curneeded
, rec
->dtrd_alignment
); 
10182                         rec
->dtrd_offset 
= curneeded
; 
10183                         if (curneeded 
+ rec
->dtrd_size 
< curneeded
) 
10185                         curneeded 
+= rec
->dtrd_size
; 
10186                         ecb
->dte_needed 
= MAX(ecb
->dte_needed
, curneeded
); 
10188                         aggbase 
= UINT32_MAX
; 
10189                         curneeded 
= UINT32_MAX
; 
10190                 } else if (act
->dta_intuple
) { 
10191                         if (curneeded 
== UINT32_MAX
) { 
10193                                  * This is the first record in a tuple.  Align 
10194                                  * curneeded to be at offset 4 in an 8-byte 
10197                                 ASSERT(act
->dta_prev 
== NULL 
|| !act
->dta_prev
->dta_intuple
); 
10198                                 ASSERT(aggbase 
== UINT32_MAX
); 
10200                                 curneeded 
= P2PHASEUP(ecb
->dte_size
, 
10201                                     sizeof (uint64_t), sizeof (dtrace_aggid_t
)); 
10203                                 aggbase 
= curneeded 
- sizeof (dtrace_aggid_t
); 
10204                                 ASSERT(IS_P2ALIGNED(aggbase
, 
10205                                     sizeof (uint64_t))); 
10208                         curneeded 
= P2ROUNDUP(curneeded
, rec
->dtrd_alignment
); 
10209                         rec
->dtrd_offset 
= curneeded
; 
10210                         curneeded 
+= rec
->dtrd_size
; 
10211                         if (curneeded 
+ rec
->dtrd_size 
< curneeded
) 
10214                         /* tuples must be followed by an aggregation */ 
10215                         ASSERT(act
->dta_prev 
== NULL 
|| !act
->dta_prev
->dta_intuple
); 
10216                         ecb
->dte_size 
= P2ROUNDUP(ecb
->dte_size
, rec
->dtrd_alignment
); 
10217                         rec
->dtrd_offset 
= ecb
->dte_size
; 
10218                         if (ecb
->dte_size 
+ rec
->dtrd_size 
< ecb
->dte_size
) 
10220                         ecb
->dte_size 
+= rec
->dtrd_size
; 
10221                         ecb
->dte_needed 
= MAX(ecb
->dte_needed
, ecb
->dte_size
); 
10225         if ((act 
= ecb
->dte_action
) != NULL 
&& 
10226             !(act
->dta_kind 
== DTRACEACT_SPECULATE 
&& act
->dta_next 
== NULL
) && 
10227             ecb
->dte_size 
== sizeof (dtrace_rechdr_t
)) { 
10229                  * If the size is still sizeof (dtrace_rechdr_t), then all 
10230                  * actions store no data; set the size to 0. 
10235         ecb
->dte_size 
= P2ROUNDUP(ecb
->dte_size
, sizeof (dtrace_epid_t
)); 
10236         ecb
->dte_needed 
= P2ROUNDUP(ecb
->dte_needed
, (sizeof (dtrace_epid_t
))); 
10237         ecb
->dte_state
->dts_needed 
= MAX(ecb
->dte_state
->dts_needed
, ecb
->dte_needed
); 
10241 static dtrace_action_t 
* 
10242 dtrace_ecb_aggregation_create(dtrace_ecb_t 
*ecb
, dtrace_actdesc_t 
*desc
) 
10244         dtrace_aggregation_t 
*agg
; 
10245         size_t size 
= sizeof (uint64_t); 
10246         int ntuple 
= desc
->dtad_ntuple
; 
10247         dtrace_action_t 
*act
; 
10248         dtrace_recdesc_t 
*frec
; 
10249         dtrace_aggid_t aggid
; 
10250         dtrace_state_t 
*state 
= ecb
->dte_state
; 
10252         agg 
= kmem_zalloc(sizeof (dtrace_aggregation_t
), KM_SLEEP
); 
10253         agg
->dtag_ecb 
= ecb
; 
10255         ASSERT(DTRACEACT_ISAGG(desc
->dtad_kind
)); 
10257         switch (desc
->dtad_kind
) { 
10258         case DTRACEAGG_MIN
: 
10259                 agg
->dtag_initial 
= INT64_MAX
; 
10260                 agg
->dtag_aggregate 
= dtrace_aggregate_min
; 
10263         case DTRACEAGG_MAX
: 
10264                 agg
->dtag_initial 
= INT64_MIN
; 
10265                 agg
->dtag_aggregate 
= dtrace_aggregate_max
; 
10268         case DTRACEAGG_COUNT
: 
10269                 agg
->dtag_aggregate 
= dtrace_aggregate_count
; 
10272         case DTRACEAGG_QUANTIZE
: 
10273                 agg
->dtag_aggregate 
= dtrace_aggregate_quantize
; 
10274                 size 
= (((sizeof (uint64_t) * NBBY
) - 1) * 2 + 1) * 
10278         case DTRACEAGG_LQUANTIZE
: { 
10279                 uint16_t step 
= DTRACE_LQUANTIZE_STEP(desc
->dtad_arg
); 
10280                 uint16_t levels 
= DTRACE_LQUANTIZE_LEVELS(desc
->dtad_arg
); 
10282                 agg
->dtag_initial 
= desc
->dtad_arg
; 
10283                 agg
->dtag_aggregate 
= dtrace_aggregate_lquantize
; 
10285                 if (step 
== 0 || levels 
== 0) 
10288                 size 
= levels 
* sizeof (uint64_t) + 3 * sizeof (uint64_t); 
10292         case DTRACEAGG_LLQUANTIZE
: { 
10293                 uint16_t factor 
= DTRACE_LLQUANTIZE_FACTOR(desc
->dtad_arg
); 
10294                 uint16_t low    
= DTRACE_LLQUANTIZE_LOW(desc
->dtad_arg
); 
10295                 uint16_t high   
= DTRACE_LLQUANTIZE_HIGH(desc
->dtad_arg
); 
10296                 uint16_t nsteps 
= DTRACE_LLQUANTIZE_NSTEP(desc
->dtad_arg
); 
10299                 agg
->dtag_initial 
= desc
->dtad_arg
; 
10300                 agg
->dtag_aggregate 
= dtrace_aggregate_llquantize
; 
10302                 if (factor 
< 2 || low 
>= high 
|| nsteps 
< factor
) 
10306                  * Now check that the number of steps evenly divides a power 
10307                  * of the factor.  (This assures both integer bucket size and 
10308                  * linearity within each magnitude.) 
10310                 for (v 
= factor
; v 
< nsteps
; v 
*= factor
) 
10313                 if ((v 
% nsteps
) || (nsteps 
% factor
)) 
10316                 size 
= (dtrace_aggregate_llquantize_bucket(factor
, low
, high
, nsteps
, INT64_MAX
) + 2) * sizeof (uint64_t); 
10320         case DTRACEAGG_AVG
: 
10321                 agg
->dtag_aggregate 
= dtrace_aggregate_avg
; 
10322                 size 
= sizeof (uint64_t) * 2; 
10325         case DTRACEAGG_STDDEV
: 
10326                 agg
->dtag_aggregate 
= dtrace_aggregate_stddev
; 
10327                 size 
= sizeof (uint64_t) * 4; 
10330         case DTRACEAGG_SUM
: 
10331                 agg
->dtag_aggregate 
= dtrace_aggregate_sum
; 
10338         agg
->dtag_action
.dta_rec
.dtrd_size 
= size
; 
10344          * We must make sure that we have enough actions for the n-tuple. 
10346         for (act 
= ecb
->dte_action_last
; act 
!= NULL
; act 
= act
->dta_prev
) { 
10347                 if (DTRACEACT_ISAGG(act
->dta_kind
)) 
10350                 if (--ntuple 
== 0) { 
10352                          * This is the action with which our n-tuple begins. 
10354                         agg
->dtag_first 
= act
; 
10360          * This n-tuple is short by ntuple elements.  Return failure. 
10362         ASSERT(ntuple 
!= 0); 
10364         kmem_free(agg
, sizeof (dtrace_aggregation_t
)); 
10369          * If the last action in the tuple has a size of zero, it's actually 
10370          * an expression argument for the aggregating action. 
10372         ASSERT(ecb
->dte_action_last 
!= NULL
); 
10373         act 
= ecb
->dte_action_last
; 
10375         if (act
->dta_kind 
== DTRACEACT_DIFEXPR
) { 
10376                 ASSERT(act
->dta_difo 
!= NULL
); 
10378                 if (act
->dta_difo
->dtdo_rtype
.dtdt_size 
== 0) 
10379                         agg
->dtag_hasarg 
= 1; 
10383          * We need to allocate an id for this aggregation. 
10385         aggid 
= (dtrace_aggid_t
)(uintptr_t)vmem_alloc(state
->dts_aggid_arena
, 1, 
10386             VM_BESTFIT 
| VM_SLEEP
); 
10388         if (aggid 
- 1 >= (dtrace_aggid_t
)state
->dts_naggregations
) { 
10389                 dtrace_aggregation_t 
**oaggs 
= state
->dts_aggregations
; 
10390                 dtrace_aggregation_t 
**aggs
; 
10391                 int naggs 
= state
->dts_naggregations 
<< 1; 
10392                 int onaggs 
= state
->dts_naggregations
; 
10394                 ASSERT(aggid 
== (dtrace_aggid_t
)state
->dts_naggregations 
+ 1); 
10397                         ASSERT(oaggs 
== NULL
); 
10401                 aggs 
= kmem_zalloc(naggs 
* sizeof (*aggs
), KM_SLEEP
); 
10403                 if (oaggs 
!= NULL
) { 
10404                         bcopy(oaggs
, aggs
, onaggs 
* sizeof (*aggs
)); 
10405                         kmem_free(oaggs
, onaggs 
* sizeof (*aggs
)); 
10408                 state
->dts_aggregations 
= aggs
; 
10409                 state
->dts_naggregations 
= naggs
; 
10412         ASSERT(state
->dts_aggregations
[aggid 
- 1] == NULL
); 
10413         state
->dts_aggregations
[(agg
->dtag_id 
= aggid
) - 1] = agg
; 
10415         frec 
= &agg
->dtag_first
->dta_rec
; 
10416         if (frec
->dtrd_alignment 
< sizeof (dtrace_aggid_t
)) 
10417                 frec
->dtrd_alignment 
= sizeof (dtrace_aggid_t
); 
10419         for (act 
= agg
->dtag_first
; act 
!= NULL
; act 
= act
->dta_next
) { 
10420                 ASSERT(!act
->dta_intuple
); 
10421                 act
->dta_intuple 
= 1; 
10424         return (&agg
->dtag_action
); 
10428 dtrace_ecb_aggregation_destroy(dtrace_ecb_t 
*ecb
, dtrace_action_t 
*act
) 
10430         dtrace_aggregation_t 
*agg 
= (dtrace_aggregation_t 
*)act
; 
10431         dtrace_state_t 
*state 
= ecb
->dte_state
; 
10432         dtrace_aggid_t aggid 
= agg
->dtag_id
; 
10434         ASSERT(DTRACEACT_ISAGG(act
->dta_kind
)); 
10435         vmem_free(state
->dts_aggid_arena
, (void *)(uintptr_t)aggid
, 1); 
10437         ASSERT(state
->dts_aggregations
[aggid 
- 1] == agg
); 
10438         state
->dts_aggregations
[aggid 
- 1] = NULL
; 
10440         kmem_free(agg
, sizeof (dtrace_aggregation_t
)); 
10444 dtrace_ecb_action_add(dtrace_ecb_t 
*ecb
, dtrace_actdesc_t 
*desc
) 
10446         dtrace_action_t 
*action
, *last
; 
10447         dtrace_difo_t 
*dp 
= desc
->dtad_difo
; 
10448         uint32_t size 
= 0, align 
= sizeof (uint8_t), mask
; 
10449         uint16_t format 
= 0; 
10450         dtrace_recdesc_t 
*rec
; 
10451         dtrace_state_t 
*state 
= ecb
->dte_state
; 
10452         dtrace_optval_t 
*opt 
= state
->dts_options
; 
10453         dtrace_optval_t nframes
=0, strsize
; 
10454         uint64_t arg 
= desc
->dtad_arg
; 
10456         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10457         ASSERT(ecb
->dte_action 
== NULL 
|| ecb
->dte_action
->dta_refcnt 
== 1); 
10459         if (DTRACEACT_ISAGG(desc
->dtad_kind
)) { 
10461                  * If this is an aggregating action, there must be neither 
10462                  * a speculate nor a commit on the action chain. 
10464                 dtrace_action_t 
*act
; 
10466                 for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
10467                         if (act
->dta_kind 
== DTRACEACT_COMMIT
) 
10470                         if (act
->dta_kind 
== DTRACEACT_SPECULATE
) 
10474                 action 
= dtrace_ecb_aggregation_create(ecb
, desc
); 
10476                 if (action 
== NULL
) 
10479                 if (DTRACEACT_ISDESTRUCTIVE(desc
->dtad_kind
) || 
10480                     (desc
->dtad_kind 
== DTRACEACT_DIFEXPR 
&& 
10481                     dp 
!= NULL 
&& dp
->dtdo_destructive
)) { 
10482                         state
->dts_destructive 
= 1; 
10485                 switch (desc
->dtad_kind
) { 
10486                 case DTRACEACT_PRINTF
: 
10487                 case DTRACEACT_PRINTA
: 
10488                 case DTRACEACT_SYSTEM
: 
10489                 case DTRACEACT_FREOPEN
: 
10490                 case DTRACEACT_DIFEXPR
: 
10492                          * We know that our arg is a string -- turn it into a 
10496                                 ASSERT(desc
->dtad_kind 
== DTRACEACT_PRINTA 
|| 
10497                                        desc
->dtad_kind 
== DTRACEACT_DIFEXPR
); 
10501                                 ASSERT(arg 
> KERNELBASE
); 
10502                                 format 
= dtrace_format_add(state
, 
10503                                     (char *)(uintptr_t)arg
); 
10507                 case DTRACEACT_LIBACT
: 
10508                 case DTRACEACT_TRACEMEM
: 
10509                 case DTRACEACT_TRACEMEM_DYNSIZE
: 
10510                 case DTRACEACT_APPLEBINARY
:     /* __APPLE__ */ 
10514                         if ((size 
= dp
->dtdo_rtype
.dtdt_size
) != 0) 
10517                         if (dp
->dtdo_rtype
.dtdt_kind 
== DIF_TYPE_STRING
) { 
10518                                 if (!(dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
10521                                 size 
= opt
[DTRACEOPT_STRSIZE
]; 
10526                 case DTRACEACT_STACK
: 
10527                         if ((nframes 
= arg
) == 0) { 
10528                                 nframes 
= opt
[DTRACEOPT_STACKFRAMES
]; 
10529                                 ASSERT(nframes 
> 0); 
10533                         size 
= nframes 
* sizeof (pc_t
); 
10536                 case DTRACEACT_JSTACK
: 
10537                         if ((strsize 
= DTRACE_USTACK_STRSIZE(arg
)) == 0) 
10538                                 strsize 
= opt
[DTRACEOPT_JSTACKSTRSIZE
]; 
10540                         if ((nframes 
= DTRACE_USTACK_NFRAMES(arg
)) == 0) 
10541                                 nframes 
= opt
[DTRACEOPT_JSTACKFRAMES
]; 
10543                         arg 
= DTRACE_USTACK_ARG(nframes
, strsize
); 
10546                 case DTRACEACT_USTACK
: 
10547                         if (desc
->dtad_kind 
!= DTRACEACT_JSTACK 
&& 
10548                             (nframes 
= DTRACE_USTACK_NFRAMES(arg
)) == 0) { 
10549                                 strsize 
= DTRACE_USTACK_STRSIZE(arg
); 
10550                                 nframes 
= opt
[DTRACEOPT_USTACKFRAMES
]; 
10551                                 ASSERT(nframes 
> 0); 
10552                                 arg 
= DTRACE_USTACK_ARG(nframes
, strsize
); 
10556                          * Save a slot for the pid. 
10558                         size 
= (nframes 
+ 1) * sizeof (uint64_t); 
10559                         size 
+= DTRACE_USTACK_STRSIZE(arg
); 
10560                         size 
= P2ROUNDUP(size
, (uint32_t)(sizeof (uintptr_t))); 
10564                 case DTRACEACT_SYM
: 
10565                 case DTRACEACT_MOD
: 
10566                         if (dp 
== NULL 
|| ((size 
= dp
->dtdo_rtype
.dtdt_size
) != 
10567                             sizeof (uint64_t)) || 
10568                             (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
10572                 case DTRACEACT_USYM
: 
10573                 case DTRACEACT_UMOD
: 
10574                 case DTRACEACT_UADDR
: 
10576                             (dp
->dtdo_rtype
.dtdt_size 
!= sizeof (uint64_t)) || 
10577                             (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
10581                          * We have a slot for the pid, plus a slot for the 
10582                          * argument.  To keep things simple (aligned with 
10583                          * bitness-neutral sizing), we store each as a 64-bit 
10586                         size 
= 2 * sizeof (uint64_t); 
10589                 case DTRACEACT_STOP
: 
10590                 case DTRACEACT_BREAKPOINT
: 
10591                 case DTRACEACT_PANIC
: 
10594                 case DTRACEACT_CHILL
: 
10595                 case DTRACEACT_DISCARD
: 
10596                 case DTRACEACT_RAISE
: 
10597                 case DTRACEACT_PIDRESUME
:       /* __APPLE__ */ 
10602                 case DTRACEACT_EXIT
: 
10604                             (size 
= dp
->dtdo_rtype
.dtdt_size
) != sizeof (int) || 
10605                             (dp
->dtdo_rtype
.dtdt_flags 
& DIF_TF_BYREF
)) 
10609                 case DTRACEACT_SPECULATE
: 
10610                         if (ecb
->dte_size 
> sizeof (dtrace_rechdr_t
)) 
10616                         state
->dts_speculates 
= 1; 
10619                 case DTRACEACT_COMMIT
: { 
10620                         dtrace_action_t 
*act 
= ecb
->dte_action
; 
10622                         for (; act 
!= NULL
; act 
= act
->dta_next
) { 
10623                                 if (act
->dta_kind 
== DTRACEACT_COMMIT
) 
10636                 if (size 
!= 0 || desc
->dtad_kind 
== DTRACEACT_SPECULATE
) { 
10638                          * If this is a data-storing action or a speculate, 
10639                          * we must be sure that there isn't a commit on the 
10642                         dtrace_action_t 
*act 
= ecb
->dte_action
; 
10644                         for (; act 
!= NULL
; act 
= act
->dta_next
) { 
10645                                 if (act
->dta_kind 
== DTRACEACT_COMMIT
) 
10650                 action 
= kmem_zalloc(sizeof (dtrace_action_t
), KM_SLEEP
); 
10651                 action
->dta_rec
.dtrd_size 
= size
; 
10654         action
->dta_refcnt 
= 1; 
10655         rec 
= &action
->dta_rec
; 
10656         size 
= rec
->dtrd_size
; 
10658         for (mask 
= sizeof (uint64_t) - 1; size 
!= 0 && mask 
> 0; mask 
>>= 1) { 
10659                 if (!(size 
& mask
)) { 
10665         action
->dta_kind 
= desc
->dtad_kind
; 
10667         if ((action
->dta_difo 
= dp
) != NULL
) 
10668                 dtrace_difo_hold(dp
); 
10670         rec
->dtrd_action 
= action
->dta_kind
; 
10671         rec
->dtrd_arg 
= arg
; 
10672         rec
->dtrd_uarg 
= desc
->dtad_uarg
; 
10673         rec
->dtrd_alignment 
= (uint16_t)align
; 
10674         rec
->dtrd_format 
= format
; 
10676         if ((last 
= ecb
->dte_action_last
) != NULL
) { 
10677                 ASSERT(ecb
->dte_action 
!= NULL
); 
10678                 action
->dta_prev 
= last
; 
10679                 last
->dta_next 
= action
; 
10681                 ASSERT(ecb
->dte_action 
== NULL
); 
10682                 ecb
->dte_action 
= action
; 
10685         ecb
->dte_action_last 
= action
; 
10691 dtrace_ecb_action_remove(dtrace_ecb_t 
*ecb
) 
10693         dtrace_action_t 
*act 
= ecb
->dte_action
, *next
; 
10694         dtrace_vstate_t 
*vstate 
= &ecb
->dte_state
->dts_vstate
; 
10698         if (act 
!= NULL 
&& act
->dta_refcnt 
> 1) { 
10699                 ASSERT(act
->dta_next 
== NULL 
|| act
->dta_next
->dta_refcnt 
== 1); 
10702                 for (; act 
!= NULL
; act 
= next
) { 
10703                         next 
= act
->dta_next
; 
10704                         ASSERT(next 
!= NULL 
|| act 
== ecb
->dte_action_last
); 
10705                         ASSERT(act
->dta_refcnt 
== 1); 
10707                         if ((format 
= act
->dta_rec
.dtrd_format
) != 0) 
10708                                 dtrace_format_remove(ecb
->dte_state
, format
); 
10710                         if ((dp 
= act
->dta_difo
) != NULL
) 
10711                                 dtrace_difo_release(dp
, vstate
); 
10713                         if (DTRACEACT_ISAGG(act
->dta_kind
)) { 
10714                                 dtrace_ecb_aggregation_destroy(ecb
, act
); 
10716                                 kmem_free(act
, sizeof (dtrace_action_t
)); 
10721         ecb
->dte_action 
= NULL
; 
10722         ecb
->dte_action_last 
= NULL
; 
10727 dtrace_ecb_disable(dtrace_ecb_t 
*ecb
) 
10730          * We disable the ECB by removing it from its probe. 
10732         dtrace_ecb_t 
*pecb
, *prev 
= NULL
; 
10733         dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
10735         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10737         if (probe 
== NULL
) { 
10739                  * This is the NULL probe; there is nothing to disable. 
10744         for (pecb 
= probe
->dtpr_ecb
; pecb 
!= NULL
; pecb 
= pecb
->dte_next
) { 
10750         ASSERT(pecb 
!= NULL
); 
10752         if (prev 
== NULL
) { 
10753                 probe
->dtpr_ecb 
= ecb
->dte_next
; 
10755                 prev
->dte_next 
= ecb
->dte_next
; 
10758         if (ecb 
== probe
->dtpr_ecb_last
) { 
10759                 ASSERT(ecb
->dte_next 
== NULL
); 
10760                 probe
->dtpr_ecb_last 
= prev
; 
10763         probe
->dtpr_provider
->dtpv_ecb_count
--; 
10765          * The ECB has been disconnected from the probe; now sync to assure 
10766          * that all CPUs have seen the change before returning. 
10770         if (probe
->dtpr_ecb 
== NULL
) { 
10772                  * That was the last ECB on the probe; clear the predicate 
10773                  * cache ID for the probe, disable it and sync one more time 
10774                  * to assure that we'll never hit it again. 
10776                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
10778                 ASSERT(ecb
->dte_next 
== NULL
); 
10779                 ASSERT(probe
->dtpr_ecb_last 
== NULL
); 
10780                 probe
->dtpr_predcache 
= DTRACE_CACHEIDNONE
; 
10781                 prov
->dtpv_pops
.dtps_disable(prov
->dtpv_arg
, 
10782                     probe
->dtpr_id
, probe
->dtpr_arg
); 
10786                  * There is at least one ECB remaining on the probe.  If there 
10787                  * is _exactly_ one, set the probe's predicate cache ID to be 
10788                  * the predicate cache ID of the remaining ECB. 
10790                 ASSERT(probe
->dtpr_ecb_last 
!= NULL
); 
10791                 ASSERT(probe
->dtpr_predcache 
== DTRACE_CACHEIDNONE
); 
10793                 if (probe
->dtpr_ecb 
== probe
->dtpr_ecb_last
) { 
10794                         dtrace_predicate_t 
*p 
= probe
->dtpr_ecb
->dte_predicate
; 
10796                         ASSERT(probe
->dtpr_ecb
->dte_next 
== NULL
); 
10799                                 probe
->dtpr_predcache 
= p
->dtp_cacheid
; 
10802                 ecb
->dte_next 
= NULL
; 
10807 dtrace_ecb_destroy(dtrace_ecb_t 
*ecb
) 
10809         dtrace_state_t 
*state 
= ecb
->dte_state
; 
10810         dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
10811         dtrace_predicate_t 
*pred
; 
10812         dtrace_epid_t epid 
= ecb
->dte_epid
; 
10814         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10815         ASSERT(ecb
->dte_next 
== NULL
); 
10816         ASSERT(ecb
->dte_probe 
== NULL 
|| ecb
->dte_probe
->dtpr_ecb 
!= ecb
); 
10818         if ((pred 
= ecb
->dte_predicate
) != NULL
) 
10819                 dtrace_predicate_release(pred
, vstate
); 
10821         dtrace_ecb_action_remove(ecb
); 
10823         ASSERT(state
->dts_ecbs
[epid 
- 1] == ecb
); 
10824         state
->dts_ecbs
[epid 
- 1] = NULL
; 
10826         kmem_free(ecb
, sizeof (dtrace_ecb_t
)); 
10829 static dtrace_ecb_t 
* 
10830 dtrace_ecb_create(dtrace_state_t 
*state
, dtrace_probe_t 
*probe
, 
10831     dtrace_enabling_t 
*enab
) 
10834         dtrace_predicate_t 
*pred
; 
10835         dtrace_actdesc_t 
*act
; 
10836         dtrace_provider_t 
*prov
; 
10837         dtrace_ecbdesc_t 
*desc 
= enab
->dten_current
; 
10839         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10840         ASSERT(state 
!= NULL
); 
10842         ecb 
= dtrace_ecb_add(state
, probe
); 
10843         ecb
->dte_uarg 
= desc
->dted_uarg
; 
10845         if ((pred 
= desc
->dted_pred
.dtpdd_predicate
) != NULL
) { 
10846                 dtrace_predicate_hold(pred
); 
10847                 ecb
->dte_predicate 
= pred
; 
10850         if (probe 
!= NULL
) { 
10852                  * If the provider shows more leg than the consumer is old 
10853                  * enough to see, we need to enable the appropriate implicit 
10854                  * predicate bits to prevent the ecb from activating at 
10857                  * Providers specifying DTRACE_PRIV_USER at register time 
10858                  * are stating that they need the /proc-style privilege 
10859                  * model to be enforced, and this is what DTRACE_COND_OWNER 
10860                  * and DTRACE_COND_ZONEOWNER will then do at probe time. 
10862                 prov 
= probe
->dtpr_provider
; 
10863                 if (!(state
->dts_cred
.dcr_visible 
& DTRACE_CRV_ALLPROC
) && 
10864                     (prov
->dtpv_priv
.dtpp_flags 
& DTRACE_PRIV_USER
)) 
10865                         ecb
->dte_cond 
|= DTRACE_COND_OWNER
; 
10867                 if (!(state
->dts_cred
.dcr_visible 
& DTRACE_CRV_ALLZONE
) && 
10868                     (prov
->dtpv_priv
.dtpp_flags 
& DTRACE_PRIV_USER
)) 
10869                         ecb
->dte_cond 
|= DTRACE_COND_ZONEOWNER
; 
10872                  * If the provider shows us kernel innards and the user 
10873                  * is lacking sufficient privilege, enable the 
10874                  * DTRACE_COND_USERMODE implicit predicate. 
10876                 if (!(state
->dts_cred
.dcr_visible 
& DTRACE_CRV_KERNEL
) && 
10877                     (prov
->dtpv_priv
.dtpp_flags 
& DTRACE_PRIV_KERNEL
)) 
10878                         ecb
->dte_cond 
|= DTRACE_COND_USERMODE
; 
10881         if (dtrace_ecb_create_cache 
!= NULL
) { 
10883                  * If we have a cached ecb, we'll use its action list instead 
10884                  * of creating our own (saving both time and space). 
10886                 dtrace_ecb_t 
*cached 
= dtrace_ecb_create_cache
; 
10887                 dtrace_action_t 
*act_if 
= cached
->dte_action
; 
10889                 if (act_if 
!= NULL
) { 
10890                         ASSERT(act_if
->dta_refcnt 
> 0); 
10891                         act_if
->dta_refcnt
++; 
10892                         ecb
->dte_action 
= act_if
; 
10893                         ecb
->dte_action_last 
= cached
->dte_action_last
; 
10894                         ecb
->dte_needed 
= cached
->dte_needed
; 
10895                         ecb
->dte_size 
= cached
->dte_size
; 
10896                         ecb
->dte_alignment 
= cached
->dte_alignment
; 
10902         for (act 
= desc
->dted_action
; act 
!= NULL
; act 
= act
->dtad_next
) { 
10903                 if ((enab
->dten_error 
= dtrace_ecb_action_add(ecb
, act
)) != 0) { 
10904                         dtrace_ecb_destroy(ecb
); 
10909         if ((enab
->dten_error 
= dtrace_ecb_resize(ecb
)) != 0) { 
10910                 dtrace_ecb_destroy(ecb
); 
10914         return (dtrace_ecb_create_cache 
= ecb
); 
10918 dtrace_ecb_create_enable(dtrace_probe_t 
*probe
, void *arg1
, void *arg2
) 
10921         dtrace_enabling_t 
*enab 
= arg1
; 
10922         dtrace_ecbdesc_t 
*ep 
= arg2
; 
10923         dtrace_state_t 
*state 
= enab
->dten_vstate
->dtvs_state
; 
10925         ASSERT(state 
!= NULL
); 
10927         if (probe 
!= NULL 
&& ep 
!= NULL 
&& probe
->dtpr_gen 
< ep
->dted_probegen
) { 
10929                  * This probe was created in a generation for which this 
10930                  * enabling has previously created ECBs; we don't want to 
10931                  * enable it again, so just kick out. 
10933                 return (DTRACE_MATCH_NEXT
); 
10936         if ((ecb 
= dtrace_ecb_create(state
, probe
, enab
)) == NULL
) 
10937                 return (DTRACE_MATCH_DONE
); 
10939         if (dtrace_ecb_enable(ecb
) < 0) 
10940                return (DTRACE_MATCH_FAIL
); 
10942         return (DTRACE_MATCH_NEXT
); 
10945 static dtrace_ecb_t 
* 
10946 dtrace_epid2ecb(dtrace_state_t 
*state
, dtrace_epid_t id
) 
10949 #pragma unused(ecb) /* __APPLE__ */ 
10951         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10953         if (id 
== 0 || id 
> (dtrace_epid_t
)state
->dts_necbs
) 
10956         ASSERT(state
->dts_necbs 
> 0 && state
->dts_ecbs 
!= NULL
); 
10957         ASSERT((ecb 
= state
->dts_ecbs
[id 
- 1]) == NULL 
|| ecb
->dte_epid 
== id
); 
10959         return (state
->dts_ecbs
[id 
- 1]); 
10962 static dtrace_aggregation_t 
* 
10963 dtrace_aggid2agg(dtrace_state_t 
*state
, dtrace_aggid_t id
) 
10965         dtrace_aggregation_t 
*agg
; 
10966 #pragma unused(agg) /* __APPLE__ */ 
10968         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
10970         if (id 
== 0 || id 
> (dtrace_aggid_t
)state
->dts_naggregations
) 
10973         ASSERT(state
->dts_naggregations 
> 0 && state
->dts_aggregations 
!= NULL
); 
10974         ASSERT((agg 
= state
->dts_aggregations
[id 
- 1]) == NULL 
|| 
10975             agg
->dtag_id 
== id
); 
10977         return (state
->dts_aggregations
[id 
- 1]); 
10981  * DTrace Buffer Functions 
10983  * The following functions manipulate DTrace buffers.  Most of these functions 
10984  * are called in the context of establishing or processing consumer state; 
10985  * exceptions are explicitly noted. 
10989  * Note:  called from cross call context.  This function switches the two 
10990  * buffers on a given CPU.  The atomicity of this operation is assured by 
10991  * disabling interrupts while the actual switch takes place; the disabling of 
10992  * interrupts serializes the execution with any execution of dtrace_probe() on 
10996 dtrace_buffer_switch(dtrace_buffer_t 
*buf
) 
10998         caddr_t tomax 
= buf
->dtb_tomax
; 
10999         caddr_t xamot 
= buf
->dtb_xamot
; 
11000         dtrace_icookie_t cookie
; 
11003         ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_NOSWITCH
)); 
11004         ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_RING
)); 
11006         cookie 
= dtrace_interrupt_disable(); 
11007         now 
= dtrace_gethrtime(); 
11008         buf
->dtb_tomax 
= xamot
; 
11009         buf
->dtb_xamot 
= tomax
; 
11010         buf
->dtb_xamot_drops 
= buf
->dtb_drops
; 
11011         buf
->dtb_xamot_offset 
= buf
->dtb_offset
; 
11012         buf
->dtb_xamot_errors 
= buf
->dtb_errors
; 
11013         buf
->dtb_xamot_flags 
= buf
->dtb_flags
; 
11014         buf
->dtb_offset 
= 0; 
11015         buf
->dtb_drops 
= 0; 
11016         buf
->dtb_errors 
= 0; 
11017         buf
->dtb_flags 
&= ~(DTRACEBUF_ERROR 
| DTRACEBUF_DROPPED
); 
11018         buf
->dtb_interval 
= now 
- buf
->dtb_switched
; 
11019         buf
->dtb_switched 
= now
; 
11020         buf
->dtb_cur_limit 
= buf
->dtb_limit
; 
11022         dtrace_interrupt_enable(cookie
); 
11026  * Note:  called from cross call context.  This function activates a buffer 
11027  * on a CPU.  As with dtrace_buffer_switch(), the atomicity of the operation 
11028  * is guaranteed by the disabling of interrupts. 
11031 dtrace_buffer_activate(dtrace_state_t 
*state
) 
11033         dtrace_buffer_t 
*buf
; 
11034         dtrace_icookie_t cookie 
= dtrace_interrupt_disable(); 
11036         buf 
= &state
->dts_buffer
[CPU
->cpu_id
]; 
11038         if (buf
->dtb_tomax 
!= NULL
) { 
11040                  * We might like to assert that the buffer is marked inactive, 
11041                  * but this isn't necessarily true:  the buffer for the CPU 
11042                  * that processes the BEGIN probe has its buffer activated 
11043                  * manually.  In this case, we take the (harmless) action 
11044                  * re-clearing the bit INACTIVE bit. 
11046                 buf
->dtb_flags 
&= ~DTRACEBUF_INACTIVE
; 
11049         dtrace_interrupt_enable(cookie
); 
11053 dtrace_buffer_canalloc(size_t size
) 
11055         if (size 
> (UINT64_MAX 
- dtrace_buffer_memory_inuse
)) 
11057         if ((size 
+ dtrace_buffer_memory_inuse
) > dtrace_buffer_memory_maxsize
) 
11064 dtrace_buffer_alloc(dtrace_buffer_t 
*bufs
, size_t limit
, size_t size
, int flags
, 
11068         dtrace_buffer_t 
*buf
; 
11069         size_t size_before_alloc 
= dtrace_buffer_memory_inuse
; 
11071         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
11072         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11074         if (size 
> (size_t)dtrace_nonroot_maxsize 
&& 
11075             !PRIV_POLICY_CHOICE(CRED(), PRIV_ALL
, B_FALSE
)) 
11081                 if (cpu 
!= DTRACE_CPUALL 
&& cpu 
!= cp
->cpu_id
) 
11084                 buf 
= &bufs
[cp
->cpu_id
]; 
11087                  * If there is already a buffer allocated for this CPU, it 
11088                  * is only possible that this is a DR event.  In this case, 
11089                  * the buffer size must match our specified size. 
11091                 if (buf
->dtb_tomax 
!= NULL
) { 
11092                         ASSERT(buf
->dtb_size 
== size
); 
11096                 ASSERT(buf
->dtb_xamot 
== NULL
); 
11098                 /* DTrace, please do not eat all the memory. */ 
11099                 if (dtrace_buffer_canalloc(size
) == B_FALSE
) 
11101                 if ((buf
->dtb_tomax 
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
) 
11103                 dtrace_buffer_memory_inuse 
+= size
; 
11105                 /* Unsure that limit is always lower than size */ 
11106                 limit 
= limit 
== size 
? limit 
- 1 : limit
; 
11107                 buf
->dtb_cur_limit 
= limit
; 
11108                 buf
->dtb_limit 
= limit
; 
11109                 buf
->dtb_size 
= size
; 
11110                 buf
->dtb_flags 
= flags
; 
11111                 buf
->dtb_offset 
= 0; 
11112                 buf
->dtb_drops 
= 0; 
11114                 if (flags 
& DTRACEBUF_NOSWITCH
) 
11117                 /* DTrace, please do not eat all the memory. */ 
11118                 if (dtrace_buffer_canalloc(size
) == B_FALSE
) 
11120                 if ((buf
->dtb_xamot 
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
) 
11122                 dtrace_buffer_memory_inuse 
+= size
; 
11123         } while ((cp 
= cp
->cpu_next
) != cpu_list
); 
11125         ASSERT(dtrace_buffer_memory_inuse 
<= dtrace_buffer_memory_maxsize
); 
11133                 if (cpu 
!= DTRACE_CPUALL 
&& cpu 
!= cp
->cpu_id
) 
11136                 buf 
= &bufs
[cp
->cpu_id
]; 
11138                 if (buf
->dtb_xamot 
!= NULL
) { 
11139                         ASSERT(buf
->dtb_tomax 
!= NULL
); 
11140                         ASSERT(buf
->dtb_size 
== size
); 
11141                         kmem_free(buf
->dtb_xamot
, size
); 
11144                 if (buf
->dtb_tomax 
!= NULL
) { 
11145                         ASSERT(buf
->dtb_size 
== size
); 
11146                         kmem_free(buf
->dtb_tomax
, size
); 
11149                 buf
->dtb_tomax 
= NULL
; 
11150                 buf
->dtb_xamot 
= NULL
; 
11152         } while ((cp 
= cp
->cpu_next
) != cpu_list
); 
11154         /* Restore the size saved before allocating memory */ 
11155         dtrace_buffer_memory_inuse 
= size_before_alloc
; 
11161  * Note:  called from probe context.  This function just increments the drop 
11162  * count on a buffer.  It has been made a function to allow for the 
11163  * possibility of understanding the source of mysterious drop counts.  (A 
11164  * problem for which one may be particularly disappointed that DTrace cannot 
11165  * be used to understand DTrace.) 
11168 dtrace_buffer_drop(dtrace_buffer_t 
*buf
) 
11174  * Note:  called from probe context.  This function is called to reserve space 
11175  * in a buffer.  If mstate is non-NULL, sets the scratch base and size in the 
11176  * mstate.  Returns the new offset in the buffer, or a negative value if an 
11177  * error has occurred. 
11180 dtrace_buffer_reserve(dtrace_buffer_t 
*buf
, size_t needed
, size_t align
, 
11181     dtrace_state_t 
*state
, dtrace_mstate_t 
*mstate
) 
11183         intptr_t offs 
= buf
->dtb_offset
, soffs
; 
11188         if (buf
->dtb_flags 
& DTRACEBUF_INACTIVE
) 
11191         if ((tomax 
= buf
->dtb_tomax
) == NULL
) { 
11192                 dtrace_buffer_drop(buf
); 
11196         if (!(buf
->dtb_flags 
& (DTRACEBUF_RING 
| DTRACEBUF_FILL
))) { 
11197                 while (offs 
& (align 
- 1)) { 
11199                          * Assert that our alignment is off by a number which 
11200                          * is itself sizeof (uint32_t) aligned. 
11202                         ASSERT(!((align 
- (offs 
& (align 
- 1))) & 
11203                             (sizeof (uint32_t) - 1))); 
11204                         DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
); 
11205                         offs 
+= sizeof (uint32_t); 
11208                 if ((uint64_t)(soffs 
= offs 
+ needed
) > buf
->dtb_cur_limit
) { 
11209                         if (buf
->dtb_cur_limit 
== buf
->dtb_limit
) { 
11210                                 buf
->dtb_cur_limit 
= buf
->dtb_size
; 
11212                                 atomic_add_32(&state
->dts_buf_over_limit
, 1); 
11214                                  * Set an AST on the current processor 
11215                                  * so that we can wake up the process 
11216                                  * outside of probe context, when we know 
11217                                  * it is safe to do so 
11219                                 minor_t minor 
= getminor(state
->dts_dev
); 
11220                                 ASSERT(minor 
< 32); 
11222                                 atomic_or_32(&dtrace_wake_clients
, 1 << minor
); 
11225                         if ((uint64_t)soffs 
> buf
->dtb_size
) { 
11226                                 dtrace_buffer_drop(buf
); 
11231                 if (mstate 
== NULL
) 
11234                 mstate
->dtms_scratch_base 
= (uintptr_t)tomax 
+ soffs
; 
11235                 mstate
->dtms_scratch_size 
= buf
->dtb_size 
- soffs
; 
11236                 mstate
->dtms_scratch_ptr 
= mstate
->dtms_scratch_base
; 
11241         if (buf
->dtb_flags 
& DTRACEBUF_FILL
) { 
11242                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_COOLDOWN 
&& 
11243                     (buf
->dtb_flags 
& DTRACEBUF_FULL
)) 
11248         total_off 
= needed 
+ (offs 
& (align 
- 1)); 
11251          * For a ring buffer, life is quite a bit more complicated.  Before 
11252          * we can store any padding, we need to adjust our wrapping offset. 
11253          * (If we've never before wrapped or we're not about to, no adjustment 
11256         if ((buf
->dtb_flags 
& DTRACEBUF_WRAPPED
) || 
11257             offs 
+ total_off 
> buf
->dtb_size
) { 
11258                 woffs 
= buf
->dtb_xamot_offset
; 
11260                 if (offs 
+ total_off 
> buf
->dtb_size
) { 
11262                          * We can't fit in the end of the buffer.  First, a 
11263                          * sanity check that we can fit in the buffer at all. 
11265                         if (total_off 
> buf
->dtb_size
) { 
11266                                 dtrace_buffer_drop(buf
); 
11271                          * We're going to be storing at the top of the buffer, 
11272                          * so now we need to deal with the wrapped offset.  We 
11273                          * only reset our wrapped offset to 0 if it is 
11274                          * currently greater than the current offset.  If it 
11275                          * is less than the current offset, it is because a 
11276                          * previous allocation induced a wrap -- but the 
11277                          * allocation didn't subsequently take the space due 
11278                          * to an error or false predicate evaluation.  In this 
11279                          * case, we'll just leave the wrapped offset alone: if 
11280                          * the wrapped offset hasn't been advanced far enough 
11281                          * for this allocation, it will be adjusted in the 
11284                         if (buf
->dtb_flags 
& DTRACEBUF_WRAPPED
) { 
11292                          * Now we know that we're going to be storing to the 
11293                          * top of the buffer and that there is room for us 
11294                          * there.  We need to clear the buffer from the current 
11295                          * offset to the end (there may be old gunk there). 
11297                         while ((uint64_t)offs 
< buf
->dtb_size
) 
11301                          * We need to set our offset to zero.  And because we 
11302                          * are wrapping, we need to set the bit indicating as 
11303                          * much.  We can also adjust our needed space back 
11304                          * down to the space required by the ECB -- we know 
11305                          * that the top of the buffer is aligned. 
11308                         total_off 
= needed
; 
11309                         buf
->dtb_flags 
|= DTRACEBUF_WRAPPED
; 
11312                          * There is room for us in the buffer, so we simply 
11313                          * need to check the wrapped offset. 
11315                         if (woffs 
< offs
) { 
11317                                  * The wrapped offset is less than the offset. 
11318                                  * This can happen if we allocated buffer space 
11319                                  * that induced a wrap, but then we didn't 
11320                                  * subsequently take the space due to an error 
11321                                  * or false predicate evaluation.  This is 
11322                                  * okay; we know that _this_ allocation isn't 
11323                                  * going to induce a wrap.  We still can't 
11324                                  * reset the wrapped offset to be zero, 
11325                                  * however: the space may have been trashed in 
11326                                  * the previous failed probe attempt.  But at 
11327                                  * least the wrapped offset doesn't need to 
11328                                  * be adjusted at all... 
11334                 while (offs 
+ total_off 
> (size_t)woffs
) { 
11335                         dtrace_epid_t epid 
= *(uint32_t *)(tomax 
+ woffs
); 
11338                         if (epid 
== DTRACE_EPIDNONE
) { 
11339                                 size 
= sizeof (uint32_t); 
11341                                 ASSERT(epid 
<= (dtrace_epid_t
)state
->dts_necbs
); 
11342                                 ASSERT(state
->dts_ecbs
[epid 
- 1] != NULL
); 
11344                                 size 
= state
->dts_ecbs
[epid 
- 1]->dte_size
; 
11347                         ASSERT(woffs 
+ size 
<= buf
->dtb_size
); 
11350                         if (woffs 
+ size 
== buf
->dtb_size
) { 
11352                                  * We've reached the end of the buffer; we want 
11353                                  * to set the wrapped offset to 0 and break 
11354                                  * out.  However, if the offs is 0, then we're 
11355                                  * in a strange edge-condition:  the amount of 
11356                                  * space that we want to reserve plus the size 
11357                                  * of the record that we're overwriting is 
11358                                  * greater than the size of the buffer.  This 
11359                                  * is problematic because if we reserve the 
11360                                  * space but subsequently don't consume it (due 
11361                                  * to a failed predicate or error) the wrapped 
11362                                  * offset will be 0 -- yet the EPID at offset 0 
11363                                  * will not be committed.  This situation is 
11364                                  * relatively easy to deal with:  if we're in 
11365                                  * this case, the buffer is indistinguishable 
11366                                  * from one that hasn't wrapped; we need only 
11367                                  * finish the job by clearing the wrapped bit, 
11368                                  * explicitly setting the offset to be 0, and 
11369                                  * zero'ing out the old data in the buffer. 
11372                                         buf
->dtb_flags 
&= ~DTRACEBUF_WRAPPED
; 
11373                                         buf
->dtb_offset 
= 0; 
11376                                         while ((uint64_t)woffs 
< buf
->dtb_size
) 
11377                                                 tomax
[woffs
++] = 0; 
11388                  * We have a wrapped offset.  It may be that the wrapped offset 
11389                  * has become zero -- that's okay. 
11391                 buf
->dtb_xamot_offset 
= woffs
; 
11396          * Now we can plow the buffer with any necessary padding. 
11398         while (offs 
& (align 
- 1)) { 
11400                  * Assert that our alignment is off by a number which 
11401                  * is itself sizeof (uint32_t) aligned. 
11403                 ASSERT(!((align 
- (offs 
& (align 
- 1))) & 
11404                     (sizeof (uint32_t) - 1))); 
11405                 DTRACE_STORE(uint32_t, tomax
, offs
, DTRACE_EPIDNONE
); 
11406                 offs 
+= sizeof (uint32_t); 
11409         if (buf
->dtb_flags 
& DTRACEBUF_FILL
) { 
11410                 if (offs 
+ needed 
> buf
->dtb_size 
- state
->dts_reserve
) { 
11411                         buf
->dtb_flags 
|= DTRACEBUF_FULL
; 
11416         if (mstate 
== NULL
) 
11420          * For ring buffers and fill buffers, the scratch space is always 
11421          * the inactive buffer. 
11423         mstate
->dtms_scratch_base 
= (uintptr_t)buf
->dtb_xamot
; 
11424         mstate
->dtms_scratch_size 
= buf
->dtb_size
; 
11425         mstate
->dtms_scratch_ptr 
= mstate
->dtms_scratch_base
; 
11431 dtrace_buffer_polish(dtrace_buffer_t 
*buf
) 
11433         ASSERT(buf
->dtb_flags 
& DTRACEBUF_RING
); 
11434         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11436         if (!(buf
->dtb_flags 
& DTRACEBUF_WRAPPED
)) 
11440          * We need to polish the ring buffer.  There are three cases: 
11442          * - The first (and presumably most common) is that there is no gap 
11443          *   between the buffer offset and the wrapped offset.  In this case, 
11444          *   there is nothing in the buffer that isn't valid data; we can 
11445          *   mark the buffer as polished and return. 
11447          * - The second (less common than the first but still more common 
11448          *   than the third) is that there is a gap between the buffer offset 
11449          *   and the wrapped offset, and the wrapped offset is larger than the 
11450          *   buffer offset.  This can happen because of an alignment issue, or 
11451          *   can happen because of a call to dtrace_buffer_reserve() that 
11452          *   didn't subsequently consume the buffer space.  In this case, 
11453          *   we need to zero the data from the buffer offset to the wrapped 
11456          * - The third (and least common) is that there is a gap between the 
11457          *   buffer offset and the wrapped offset, but the wrapped offset is 
11458          *   _less_ than the buffer offset.  This can only happen because a 
11459          *   call to dtrace_buffer_reserve() induced a wrap, but the space 
11460          *   was not subsequently consumed.  In this case, we need to zero the 
11461          *   space from the offset to the end of the buffer _and_ from the 
11462          *   top of the buffer to the wrapped offset. 
11464         if (buf
->dtb_offset 
< buf
->dtb_xamot_offset
) { 
11465                 bzero(buf
->dtb_tomax 
+ buf
->dtb_offset
, 
11466                     buf
->dtb_xamot_offset 
- buf
->dtb_offset
); 
11469         if (buf
->dtb_offset 
> buf
->dtb_xamot_offset
) { 
11470                 bzero(buf
->dtb_tomax 
+ buf
->dtb_offset
, 
11471                     buf
->dtb_size 
- buf
->dtb_offset
); 
11472                 bzero(buf
->dtb_tomax
, buf
->dtb_xamot_offset
); 
11477 dtrace_buffer_free(dtrace_buffer_t 
*bufs
) 
11481         for (i 
= 0; i 
< (int)NCPU
; i
++) { 
11482                 dtrace_buffer_t 
*buf 
= &bufs
[i
]; 
11484                 if (buf
->dtb_tomax 
== NULL
) { 
11485                         ASSERT(buf
->dtb_xamot 
== NULL
); 
11486                         ASSERT(buf
->dtb_size 
== 0); 
11490                 if (buf
->dtb_xamot 
!= NULL
) { 
11491                         ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_NOSWITCH
)); 
11492                         kmem_free(buf
->dtb_xamot
, buf
->dtb_size
); 
11494                         ASSERT(dtrace_buffer_memory_inuse 
>= buf
->dtb_size
); 
11495                         dtrace_buffer_memory_inuse 
-= buf
->dtb_size
; 
11498                 kmem_free(buf
->dtb_tomax
, buf
->dtb_size
); 
11499                 ASSERT(dtrace_buffer_memory_inuse 
>= buf
->dtb_size
); 
11500                 dtrace_buffer_memory_inuse 
-= buf
->dtb_size
; 
11503                 buf
->dtb_tomax 
= NULL
; 
11504                 buf
->dtb_xamot 
= NULL
; 
11509  * DTrace Enabling Functions 
11511 static dtrace_enabling_t 
* 
11512 dtrace_enabling_create(dtrace_vstate_t 
*vstate
) 
11514         dtrace_enabling_t 
*enab
; 
11516         enab 
= kmem_zalloc(sizeof (dtrace_enabling_t
), KM_SLEEP
); 
11517         enab
->dten_vstate 
= vstate
; 
11523 dtrace_enabling_add(dtrace_enabling_t 
*enab
, dtrace_ecbdesc_t 
*ecb
) 
11525         dtrace_ecbdesc_t 
**ndesc
; 
11526         size_t osize
, nsize
; 
11529          * We can't add to enablings after we've enabled them, or after we've 
11532         ASSERT(enab
->dten_probegen 
== 0); 
11533         ASSERT(enab
->dten_next 
== NULL 
&& enab
->dten_prev 
== NULL
); 
11535         /* APPLE NOTE: this protects against gcc 4.0 botch on x86 */ 
11536         if (ecb 
== NULL
) return; 
11538         if (enab
->dten_ndesc 
< enab
->dten_maxdesc
) { 
11539                 enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
; 
11543         osize 
= enab
->dten_maxdesc 
* sizeof (dtrace_enabling_t 
*); 
11545         if (enab
->dten_maxdesc 
== 0) { 
11546                 enab
->dten_maxdesc 
= 1; 
11548                 enab
->dten_maxdesc 
<<= 1; 
11551         ASSERT(enab
->dten_ndesc 
< enab
->dten_maxdesc
); 
11553         nsize 
= enab
->dten_maxdesc 
* sizeof (dtrace_enabling_t 
*); 
11554         ndesc 
= kmem_zalloc(nsize
, KM_SLEEP
); 
11555         bcopy(enab
->dten_desc
, ndesc
, osize
); 
11556         kmem_free(enab
->dten_desc
, osize
); 
11558         enab
->dten_desc 
= ndesc
; 
11559         enab
->dten_desc
[enab
->dten_ndesc
++] = ecb
; 
11563 dtrace_enabling_addlike(dtrace_enabling_t 
*enab
, dtrace_ecbdesc_t 
*ecb
, 
11564     dtrace_probedesc_t 
*pd
) 
11566         dtrace_ecbdesc_t 
*new; 
11567         dtrace_predicate_t 
*pred
; 
11568         dtrace_actdesc_t 
*act
; 
11571          * We're going to create a new ECB description that matches the 
11572          * specified ECB in every way, but has the specified probe description. 
11574         new = kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
); 
11576         if ((pred 
= ecb
->dted_pred
.dtpdd_predicate
) != NULL
) 
11577                 dtrace_predicate_hold(pred
); 
11579         for (act 
= ecb
->dted_action
; act 
!= NULL
; act 
= act
->dtad_next
) 
11580                 dtrace_actdesc_hold(act
); 
11582         new->dted_action 
= ecb
->dted_action
; 
11583         new->dted_pred 
= ecb
->dted_pred
; 
11584         new->dted_probe 
= *pd
; 
11585         new->dted_uarg 
= ecb
->dted_uarg
; 
11587         dtrace_enabling_add(enab
, new); 
11591 dtrace_enabling_dump(dtrace_enabling_t 
*enab
) 
11595         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
11596                 dtrace_probedesc_t 
*desc 
= &enab
->dten_desc
[i
]->dted_probe
; 
11598                 cmn_err(CE_NOTE
, "enabling probe %d (%s:%s:%s:%s)", i
, 
11599                     desc
->dtpd_provider
, desc
->dtpd_mod
, 
11600                     desc
->dtpd_func
, desc
->dtpd_name
); 
11605 dtrace_enabling_destroy(dtrace_enabling_t 
*enab
) 
11608         dtrace_ecbdesc_t 
*ep
; 
11609         dtrace_vstate_t 
*vstate 
= enab
->dten_vstate
; 
11611         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11613         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
11614                 dtrace_actdesc_t 
*act
, *next
; 
11615                 dtrace_predicate_t 
*pred
; 
11617                 ep 
= enab
->dten_desc
[i
]; 
11619                 if ((pred 
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) 
11620                         dtrace_predicate_release(pred
, vstate
); 
11622                 for (act 
= ep
->dted_action
; act 
!= NULL
; act 
= next
) { 
11623                         next 
= act
->dtad_next
; 
11624                         dtrace_actdesc_release(act
, vstate
); 
11627                 kmem_free(ep
, sizeof (dtrace_ecbdesc_t
)); 
11630         kmem_free(enab
->dten_desc
, 
11631             enab
->dten_maxdesc 
* sizeof (dtrace_enabling_t 
*)); 
11634          * If this was a retained enabling, decrement the dts_nretained count 
11635          * and take it off of the dtrace_retained list. 
11637         if (enab
->dten_prev 
!= NULL 
|| enab
->dten_next 
!= NULL 
|| 
11638             dtrace_retained 
== enab
) { 
11639                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
11640                 ASSERT(enab
->dten_vstate
->dtvs_state
->dts_nretained 
> 0); 
11641                 enab
->dten_vstate
->dtvs_state
->dts_nretained
--; 
11642                 dtrace_retained_gen
++; 
11645         if (enab
->dten_prev 
== NULL
) { 
11646                 if (dtrace_retained 
== enab
) { 
11647                         dtrace_retained 
= enab
->dten_next
; 
11649                         if (dtrace_retained 
!= NULL
) 
11650                                 dtrace_retained
->dten_prev 
= NULL
; 
11653                 ASSERT(enab 
!= dtrace_retained
); 
11654                 ASSERT(dtrace_retained 
!= NULL
); 
11655                 enab
->dten_prev
->dten_next 
= enab
->dten_next
; 
11658         if (enab
->dten_next 
!= NULL
) { 
11659                 ASSERT(dtrace_retained 
!= NULL
); 
11660                 enab
->dten_next
->dten_prev 
= enab
->dten_prev
; 
11663         kmem_free(enab
, sizeof (dtrace_enabling_t
)); 
11667 dtrace_enabling_retain(dtrace_enabling_t 
*enab
) 
11669         dtrace_state_t 
*state
; 
11671         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11672         ASSERT(enab
->dten_next 
== NULL 
&& enab
->dten_prev 
== NULL
); 
11673         ASSERT(enab
->dten_vstate 
!= NULL
); 
11675         state 
= enab
->dten_vstate
->dtvs_state
; 
11676         ASSERT(state 
!= NULL
); 
11679          * We only allow each state to retain dtrace_retain_max enablings. 
11681         if (state
->dts_nretained 
>= dtrace_retain_max
) 
11684         state
->dts_nretained
++; 
11685         dtrace_retained_gen
++; 
11687         if (dtrace_retained 
== NULL
) { 
11688                 dtrace_retained 
= enab
; 
11692         enab
->dten_next 
= dtrace_retained
; 
11693         dtrace_retained
->dten_prev 
= enab
; 
11694         dtrace_retained 
= enab
; 
11700 dtrace_enabling_replicate(dtrace_state_t 
*state
, dtrace_probedesc_t 
*match
, 
11701     dtrace_probedesc_t 
*create
) 
11703         dtrace_enabling_t 
*new, *enab
; 
11704         int found 
= 0, err 
= ENOENT
; 
11706         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11707         ASSERT(strlen(match
->dtpd_provider
) < DTRACE_PROVNAMELEN
); 
11708         ASSERT(strlen(match
->dtpd_mod
) < DTRACE_MODNAMELEN
); 
11709         ASSERT(strlen(match
->dtpd_func
) < DTRACE_FUNCNAMELEN
); 
11710         ASSERT(strlen(match
->dtpd_name
) < DTRACE_NAMELEN
); 
11712         new = dtrace_enabling_create(&state
->dts_vstate
); 
11715          * Iterate over all retained enablings, looking for enablings that 
11716          * match the specified state. 
11718         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
11722                  * dtvs_state can only be NULL for helper enablings -- and 
11723                  * helper enablings can't be retained. 
11725                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
11727                 if (enab
->dten_vstate
->dtvs_state 
!= state
) 
11731                  * Now iterate over each probe description; we're looking for 
11732                  * an exact match to the specified probe description. 
11734                 for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
11735                         dtrace_ecbdesc_t 
*ep 
= enab
->dten_desc
[i
]; 
11736                         dtrace_probedesc_t 
*pd 
= &ep
->dted_probe
; 
11738                         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
11739                         if (strncmp(pd
->dtpd_provider
, match
->dtpd_provider
, DTRACE_PROVNAMELEN
)) 
11742                         if (strncmp(pd
->dtpd_mod
, match
->dtpd_mod
, DTRACE_MODNAMELEN
)) 
11745                         if (strncmp(pd
->dtpd_func
, match
->dtpd_func
, DTRACE_FUNCNAMELEN
)) 
11748                         if (strncmp(pd
->dtpd_name
, match
->dtpd_name
, DTRACE_NAMELEN
)) 
11752                          * We have a winning probe!  Add it to our growing 
11756                         dtrace_enabling_addlike(new, ep
, create
); 
11760         if (!found 
|| (err 
= dtrace_enabling_retain(new)) != 0) { 
11761                 dtrace_enabling_destroy(new); 
11769 dtrace_enabling_retract(dtrace_state_t 
*state
) 
11771         dtrace_enabling_t 
*enab
, *next
; 
11773         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11776          * Iterate over all retained enablings, destroy the enablings retained 
11777          * for the specified state. 
11779         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= next
) { 
11780                 next 
= enab
->dten_next
; 
11783                  * dtvs_state can only be NULL for helper enablings -- and 
11784                  * helper enablings can't be retained. 
11786                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
11788                 if (enab
->dten_vstate
->dtvs_state 
== state
) { 
11789                         ASSERT(state
->dts_nretained 
> 0); 
11790                         dtrace_enabling_destroy(enab
); 
11794         ASSERT(state
->dts_nretained 
== 0); 
11798 dtrace_enabling_match(dtrace_enabling_t 
*enab
, int *nmatched
, dtrace_match_cond_t 
*cond
) 
11801         int total_matched 
= 0, matched 
= 0; 
11803         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
11804         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11806         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
11807                 dtrace_ecbdesc_t 
*ep 
= enab
->dten_desc
[i
]; 
11809                 enab
->dten_current 
= ep
; 
11810                 enab
->dten_error 
= 0; 
11813                  * Before doing a dtrace_probe_enable, which is really 
11814                  * expensive, check that this enabling matches the matching precondition 
11817                 if (cond 
&& (cond
->dmc_func(&ep
->dted_probe
, cond
->dmc_data
) == 0)) { 
11821                  * If a provider failed to enable a probe then get out and 
11822                  * let the consumer know we failed. 
11824                 if ((matched 
= dtrace_probe_enable(&ep
->dted_probe
, enab
, ep
)) < 0) 
11827                 total_matched 
+= matched
; 
11829                 if (enab
->dten_error 
!= 0) { 
11831                          * If we get an error half-way through enabling the 
11832                          * probes, we kick out -- perhaps with some number of 
11833                          * them enabled.  Leaving enabled probes enabled may 
11834                          * be slightly confusing for user-level, but we expect 
11835                          * that no one will attempt to actually drive on in 
11836                          * the face of such errors.  If this is an anonymous 
11837                          * enabling (indicated with a NULL nmatched pointer), 
11838                          * we cmn_err() a message.  We aren't expecting to 
11839                          * get such an error -- such as it can exist at all, 
11840                          * it would be a result of corrupted DOF in the driver 
11843                         if (nmatched 
== NULL
) { 
11844                                 cmn_err(CE_WARN
, "dtrace_enabling_match() " 
11845                                     "error on %p: %d", (void *)ep
, 
11849                         return (enab
->dten_error
); 
11852                 ep
->dted_probegen 
= dtrace_probegen
; 
11855         if (nmatched 
!= NULL
) 
11856                 *nmatched 
= total_matched
; 
11862 dtrace_enabling_matchall_with_cond(dtrace_match_cond_t 
*cond
) 
11864         dtrace_enabling_t 
*enab
; 
11866         lck_mtx_lock(&cpu_lock
); 
11867         lck_mtx_lock(&dtrace_lock
); 
11870          * Iterate over all retained enablings to see if any probes match 
11871          * against them.  We only perform this operation on enablings for which 
11872          * we have sufficient permissions by virtue of being in the global zone 
11873          * or in the same zone as the DTrace client.  Because we can be called 
11874          * after dtrace_detach() has been called, we cannot assert that there 
11875          * are retained enablings.  We can safely load from dtrace_retained, 
11876          * however:  the taskq_destroy() at the end of dtrace_detach() will 
11877          * block pending our completion. 
11881          * Darwin doesn't do zones. 
11882          * Behave as if always in "global" zone." 
11884         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
11885                 (void) dtrace_enabling_match(enab
, NULL
, cond
); 
11888         lck_mtx_unlock(&dtrace_lock
); 
11889         lck_mtx_unlock(&cpu_lock
); 
11894 dtrace_enabling_matchall(void) 
11896         dtrace_enabling_matchall_with_cond(NULL
); 
11902  * If an enabling is to be enabled without having matched probes (that is, if 
11903  * dtrace_state_go() is to be called on the underlying dtrace_state_t), the 
11904  * enabling must be _primed_ by creating an ECB for every ECB description. 
11905  * This must be done to assure that we know the number of speculations, the 
11906  * number of aggregations, the minimum buffer size needed, etc. before we 
11907  * transition out of DTRACE_ACTIVITY_INACTIVE.  To do this without actually 
11908  * enabling any probes, we create ECBs for every ECB decription, but with a 
11909  * NULL probe -- which is exactly what this function does. 
11912 dtrace_enabling_prime(dtrace_state_t 
*state
) 
11914         dtrace_enabling_t 
*enab
; 
11917         for (enab 
= dtrace_retained
; enab 
!= NULL
; enab 
= enab
->dten_next
) { 
11918                 ASSERT(enab
->dten_vstate
->dtvs_state 
!= NULL
); 
11920                 if (enab
->dten_vstate
->dtvs_state 
!= state
) 
11924                  * We don't want to prime an enabling more than once, lest 
11925                  * we allow a malicious user to induce resource exhaustion. 
11926                  * (The ECBs that result from priming an enabling aren't 
11927                  * leaked -- but they also aren't deallocated until the 
11928                  * consumer state is destroyed.) 
11930                 if (enab
->dten_primed
) 
11933                 for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
11934                         enab
->dten_current 
= enab
->dten_desc
[i
]; 
11935                         (void) dtrace_probe_enable(NULL
, enab
, NULL
); 
11938                 enab
->dten_primed 
= 1; 
11943  * Called to indicate that probes should be provided due to retained 
11944  * enablings.  This is implemented in terms of dtrace_probe_provide(), but it 
11945  * must take an initial lap through the enabling calling the dtps_provide() 
11946  * entry point explicitly to allow for autocreated probes. 
11949 dtrace_enabling_provide(dtrace_provider_t 
*prv
) 
11952         dtrace_probedesc_t desc
; 
11953         dtrace_genid_t gen
; 
11955         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
11956         LCK_MTX_ASSERT(&dtrace_provider_lock
, LCK_MTX_ASSERT_OWNED
); 
11960                 prv 
= dtrace_provider
; 
11964                 dtrace_enabling_t 
*enab
; 
11965                 void *parg 
= prv
->dtpv_arg
; 
11968                 gen 
= dtrace_retained_gen
; 
11969                 for (enab 
= dtrace_retained
; enab 
!= NULL
; 
11970                     enab 
= enab
->dten_next
) { 
11971                         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
11972                                 desc 
= enab
->dten_desc
[i
]->dted_probe
; 
11973                                 lck_mtx_unlock(&dtrace_lock
); 
11974                                 prv
->dtpv_pops
.dtps_provide(parg
, &desc
); 
11975                                 lck_mtx_lock(&dtrace_lock
); 
11977                                  * Process the retained enablings again if 
11978                                  * they have changed while we weren't holding 
11981                                 if (gen 
!= dtrace_retained_gen
) 
11985         } while (all 
&& (prv 
= prv
->dtpv_next
) != NULL
); 
11987         lck_mtx_unlock(&dtrace_lock
); 
11988         dtrace_probe_provide(NULL
, all 
? NULL 
: prv
); 
11989         lck_mtx_lock(&dtrace_lock
); 
11993  * DTrace DOF Functions 
11997 dtrace_dof_error(dof_hdr_t 
*dof
, const char *str
) 
11999 #pragma unused(dof) /* __APPLE__ */ 
12000         if (dtrace_err_verbose
) 
12001                 cmn_err(CE_WARN
, "failed to process DOF: %s", str
); 
12003 #ifdef DTRACE_ERRDEBUG 
12004         dtrace_errdebug(str
); 
12009  * Create DOF out of a currently enabled state.  Right now, we only create 
12010  * DOF containing the run-time options -- but this could be expanded to create 
12011  * complete DOF representing the enabled state. 
12014 dtrace_dof_create(dtrace_state_t 
*state
) 
12018         dof_optdesc_t 
*opt
; 
12019         int i
, len 
= sizeof (dof_hdr_t
) + 
12020             roundup(sizeof (dof_sec_t
), sizeof (uint64_t)) + 
12021             sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
; 
12023         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12025         dof 
= dt_kmem_zalloc_aligned(len
, 8, KM_SLEEP
); 
12026         dof
->dofh_ident
[DOF_ID_MAG0
] = DOF_MAG_MAG0
; 
12027         dof
->dofh_ident
[DOF_ID_MAG1
] = DOF_MAG_MAG1
; 
12028         dof
->dofh_ident
[DOF_ID_MAG2
] = DOF_MAG_MAG2
; 
12029         dof
->dofh_ident
[DOF_ID_MAG3
] = DOF_MAG_MAG3
; 
12031         dof
->dofh_ident
[DOF_ID_MODEL
] = DOF_MODEL_NATIVE
; 
12032         dof
->dofh_ident
[DOF_ID_ENCODING
] = DOF_ENCODE_NATIVE
; 
12033         dof
->dofh_ident
[DOF_ID_VERSION
] = DOF_VERSION
; 
12034         dof
->dofh_ident
[DOF_ID_DIFVERS
] = DIF_VERSION
; 
12035         dof
->dofh_ident
[DOF_ID_DIFIREG
] = DIF_DIR_NREGS
; 
12036         dof
->dofh_ident
[DOF_ID_DIFTREG
] = DIF_DTR_NREGS
; 
12038         dof
->dofh_flags 
= 0; 
12039         dof
->dofh_hdrsize 
= sizeof (dof_hdr_t
); 
12040         dof
->dofh_secsize 
= sizeof (dof_sec_t
); 
12041         dof
->dofh_secnum 
= 1;   /* only DOF_SECT_OPTDESC */ 
12042         dof
->dofh_secoff 
= sizeof (dof_hdr_t
); 
12043         dof
->dofh_loadsz 
= len
; 
12044         dof
->dofh_filesz 
= len
; 
12048          * Fill in the option section header... 
12050         sec 
= (dof_sec_t 
*)((uintptr_t)dof 
+ sizeof (dof_hdr_t
)); 
12051         sec
->dofs_type 
= DOF_SECT_OPTDESC
; 
12052         sec
->dofs_align 
= sizeof (uint64_t); 
12053         sec
->dofs_flags 
= DOF_SECF_LOAD
; 
12054         sec
->dofs_entsize 
= sizeof (dof_optdesc_t
); 
12056         opt 
= (dof_optdesc_t 
*)((uintptr_t)sec 
+ 
12057             roundup(sizeof (dof_sec_t
), sizeof (uint64_t))); 
12059         sec
->dofs_offset 
= (uintptr_t)opt 
- (uintptr_t)dof
; 
12060         sec
->dofs_size 
= sizeof (dof_optdesc_t
) * DTRACEOPT_MAX
; 
12062         for (i 
= 0; i 
< DTRACEOPT_MAX
; i
++) { 
12063                 opt
[i
].dofo_option 
= i
; 
12064                 opt
[i
].dofo_strtab 
= DOF_SECIDX_NONE
; 
12065                 opt
[i
].dofo_value 
= state
->dts_options
[i
]; 
12072 dtrace_dof_copyin(user_addr_t uarg
, int *errp
) 
12074         dof_hdr_t hdr
, *dof
; 
12076         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
12079          * First, we're going to copyin() the sizeof (dof_hdr_t). 
12081         if (copyin(uarg
, &hdr
, sizeof (hdr
)) != 0) { 
12082                 dtrace_dof_error(NULL
, "failed to copyin DOF header"); 
12088          * Now we'll allocate the entire DOF and copy it in -- provided 
12089          * that the length isn't outrageous. 
12091         if (hdr
.dofh_loadsz 
>= (uint64_t)dtrace_dof_maxsize
) { 
12092                 dtrace_dof_error(&hdr
, "load size exceeds maximum"); 
12097         if (hdr
.dofh_loadsz 
< sizeof (hdr
)) { 
12098                 dtrace_dof_error(&hdr
, "invalid load size"); 
12103         dof 
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
); 
12105         if (copyin(uarg
, dof
, hdr
.dofh_loadsz
) != 0  || 
12106           dof
->dofh_loadsz 
!= hdr
.dofh_loadsz
) { 
12107             dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
); 
12116 dtrace_dof_copyin_from_proc(proc_t
* p
, user_addr_t uarg
, int *errp
) 
12118         dof_hdr_t hdr
, *dof
; 
12120         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
12123          * First, we're going to copyin() the sizeof (dof_hdr_t). 
12125         if (uread(p
, &hdr
, sizeof(hdr
), uarg
) != KERN_SUCCESS
) { 
12126                 dtrace_dof_error(NULL
, "failed to copyin DOF header"); 
12132          * Now we'll allocate the entire DOF and copy it in -- provided 
12133          * that the length isn't outrageous. 
12135         if (hdr
.dofh_loadsz 
>= (uint64_t)dtrace_dof_maxsize
) { 
12136                 dtrace_dof_error(&hdr
, "load size exceeds maximum"); 
12141         if (hdr
.dofh_loadsz 
< sizeof (hdr
)) { 
12142                 dtrace_dof_error(&hdr
, "invalid load size"); 
12147         dof 
= dt_kmem_alloc_aligned(hdr
.dofh_loadsz
, 8, KM_SLEEP
); 
12149         if (uread(p
, dof
, hdr
.dofh_loadsz
, uarg
) != KERN_SUCCESS
) { 
12150                 dt_kmem_free_aligned(dof
, hdr
.dofh_loadsz
); 
12159 dtrace_dof_property(const char *name
) 
12163         unsigned int len
, i
; 
12167          * Unfortunately, array of values in .conf files are always (and 
12168          * only) interpreted to be integer arrays.  We must read our DOF 
12169          * as an integer array, and then squeeze it into a byte array. 
12171         if (ddi_prop_lookup_int_array(DDI_DEV_T_ANY
, dtrace_devi
, 0, 
12172             name
, (int **)&buf
, &len
) != DDI_PROP_SUCCESS
) 
12175         for (i 
= 0; i 
< len
; i
++) 
12176                 buf
[i
] = (uchar_t
)(((int *)buf
)[i
]); 
12178         if (len 
< sizeof (dof_hdr_t
)) { 
12179                 ddi_prop_free(buf
); 
12180                 dtrace_dof_error(NULL
, "truncated header"); 
12184         if (len 
< (loadsz 
= ((dof_hdr_t 
*)buf
)->dofh_loadsz
)) { 
12185                 ddi_prop_free(buf
); 
12186                 dtrace_dof_error(NULL
, "truncated DOF"); 
12190         if (loadsz 
>= (uint64_t)dtrace_dof_maxsize
) { 
12191                 ddi_prop_free(buf
); 
12192                 dtrace_dof_error(NULL
, "oversized DOF"); 
12196         dof 
= dt_kmem_alloc_aligned(loadsz
, 8, KM_SLEEP
); 
12197         bcopy(buf
, dof
, loadsz
); 
12198         ddi_prop_free(buf
); 
12204 dtrace_dof_destroy(dof_hdr_t 
*dof
) 
12206         dt_kmem_free_aligned(dof
, dof
->dofh_loadsz
); 
12210  * Return the dof_sec_t pointer corresponding to a given section index.  If the 
12211  * index is not valid, dtrace_dof_error() is called and NULL is returned.  If 
12212  * a type other than DOF_SECT_NONE is specified, the header is checked against 
12213  * this type and NULL is returned if the types do not match. 
12216 dtrace_dof_sect(dof_hdr_t 
*dof
, uint32_t type
, dof_secidx_t i
) 
12218         dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t) 
12219             ((uintptr_t)dof 
+ dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
12221         if (i 
>= dof
->dofh_secnum
) { 
12222                 dtrace_dof_error(dof
, "referenced section index is invalid"); 
12226         if (!(sec
->dofs_flags 
& DOF_SECF_LOAD
)) { 
12227                 dtrace_dof_error(dof
, "referenced section is not loadable"); 
12231         if (type 
!= DOF_SECT_NONE 
&& type 
!= sec
->dofs_type
) { 
12232                 dtrace_dof_error(dof
, "referenced section is the wrong type"); 
12239 static dtrace_probedesc_t 
* 
12240 dtrace_dof_probedesc(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_probedesc_t 
*desc
) 
12242         dof_probedesc_t 
*probe
; 
12244         uintptr_t daddr 
= (uintptr_t)dof
; 
12248         if (sec
->dofs_type 
!= DOF_SECT_PROBEDESC
) { 
12249                 dtrace_dof_error(dof
, "invalid probe section"); 
12253         if (sec
->dofs_align 
!= sizeof (dof_secidx_t
)) { 
12254                 dtrace_dof_error(dof
, "bad alignment in probe description"); 
12258         if (sec
->dofs_offset 
+ sizeof (dof_probedesc_t
) > dof
->dofh_loadsz
) { 
12259                 dtrace_dof_error(dof
, "truncated probe description"); 
12263         probe 
= (dof_probedesc_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
12264         strtab 
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, probe
->dofp_strtab
); 
12266         if (strtab 
== NULL
) 
12269         str 
= daddr 
+ strtab
->dofs_offset
; 
12270         size 
= strtab
->dofs_size
; 
12272         if (probe
->dofp_provider 
>= strtab
->dofs_size
) { 
12273                 dtrace_dof_error(dof
, "corrupt probe provider"); 
12277         (void) strncpy(desc
->dtpd_provider
, 
12278             (char *)(str 
+ probe
->dofp_provider
), 
12279             MIN(DTRACE_PROVNAMELEN 
- 1, size 
- probe
->dofp_provider
)); 
12281         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
12282         desc
->dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
12284         if (probe
->dofp_mod 
>= strtab
->dofs_size
) { 
12285                 dtrace_dof_error(dof
, "corrupt probe module"); 
12289         (void) strncpy(desc
->dtpd_mod
, (char *)(str 
+ probe
->dofp_mod
), 
12290             MIN(DTRACE_MODNAMELEN 
- 1, size 
- probe
->dofp_mod
)); 
12292         /* APPLE NOTE: Darwin employs size bounded string operation. */ 
12293         desc
->dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
12295         if (probe
->dofp_func 
>= strtab
->dofs_size
) { 
12296                 dtrace_dof_error(dof
, "corrupt probe function"); 
12300         (void) strncpy(desc
->dtpd_func
, (char *)(str 
+ probe
->dofp_func
), 
12301             MIN(DTRACE_FUNCNAMELEN 
- 1, size 
- probe
->dofp_func
)); 
12303         /* APPLE NOTE: Darwin employs size bounded string operation. */  
12304         desc
->dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
12306         if (probe
->dofp_name 
>= strtab
->dofs_size
) { 
12307                 dtrace_dof_error(dof
, "corrupt probe name"); 
12311         (void) strncpy(desc
->dtpd_name
, (char *)(str 
+ probe
->dofp_name
), 
12312             MIN(DTRACE_NAMELEN 
- 1, size 
- probe
->dofp_name
)); 
12314         /* APPLE NOTE: Darwin employs size bounded string operation. */  
12315         desc
->dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
12320 static dtrace_difo_t 
* 
12321 dtrace_dof_difo(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
12326         dof_difohdr_t 
*dofd
; 
12327         uintptr_t daddr 
= (uintptr_t)dof
; 
12328         size_t max_size 
= dtrace_difo_maxsize
; 
12333         static const struct { 
12341                 { DOF_SECT_DIF
, offsetof(dtrace_difo_t
, dtdo_buf
), 
12342                 offsetof(dtrace_difo_t
, dtdo_len
), sizeof (dif_instr_t
), 
12343                 sizeof (dif_instr_t
), "multiple DIF sections" }, 
12345                 { DOF_SECT_INTTAB
, offsetof(dtrace_difo_t
, dtdo_inttab
), 
12346                 offsetof(dtrace_difo_t
, dtdo_intlen
), sizeof (uint64_t), 
12347                 sizeof (uint64_t), "multiple integer tables" }, 
12349                 { DOF_SECT_STRTAB
, offsetof(dtrace_difo_t
, dtdo_strtab
), 
12350                 offsetof(dtrace_difo_t
, dtdo_strlen
), 0, 
12351                 sizeof (char), "multiple string tables" }, 
12353                 { DOF_SECT_VARTAB
, offsetof(dtrace_difo_t
, dtdo_vartab
), 
12354                 offsetof(dtrace_difo_t
, dtdo_varlen
), sizeof (dtrace_difv_t
), 
12355                 sizeof (uint_t
), "multiple variable tables" }, 
12357                 { DOF_SECT_NONE
, 0, 0, 0, 0, NULL 
} 
12360         if (sec
->dofs_type 
!= DOF_SECT_DIFOHDR
) { 
12361                 dtrace_dof_error(dof
, "invalid DIFO header section"); 
12365         if (sec
->dofs_align 
!= sizeof (dof_secidx_t
)) { 
12366                 dtrace_dof_error(dof
, "bad alignment in DIFO header"); 
12370         if (sec
->dofs_size 
< sizeof (dof_difohdr_t
) || 
12371             sec
->dofs_size 
% sizeof (dof_secidx_t
)) { 
12372                 dtrace_dof_error(dof
, "bad size in DIFO header"); 
12376         dofd 
= (dof_difohdr_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
12377         n 
= (sec
->dofs_size 
- sizeof (*dofd
)) / sizeof (dof_secidx_t
) + 1; 
12379         dp 
= kmem_zalloc(sizeof (dtrace_difo_t
), KM_SLEEP
); 
12380         dp
->dtdo_rtype 
= dofd
->dofd_rtype
; 
12382         for (l 
= 0; l 
< n
; l
++) { 
12387                 if ((subsec 
= dtrace_dof_sect(dof
, DOF_SECT_NONE
, 
12388                     dofd
->dofd_links
[l
])) == NULL
) 
12389                         goto err
; /* invalid section link */ 
12391                 if (ttl 
+ subsec
->dofs_size 
> max_size
) { 
12392                         dtrace_dof_error(dof
, "exceeds maximum size"); 
12396                 ttl 
+= subsec
->dofs_size
; 
12398                 for (i 
= 0; difo
[i
].section 
!= DOF_SECT_NONE
; i
++) { 
12400                         if (subsec
->dofs_type 
!= (uint32_t)difo
[i
].section
) 
12403                         if (!(subsec
->dofs_flags 
& DOF_SECF_LOAD
)) { 
12404                                 dtrace_dof_error(dof
, "section not loaded"); 
12408                         if (subsec
->dofs_align 
!= (uint32_t)difo
[i
].align
) { 
12409                                 dtrace_dof_error(dof
, "bad alignment"); 
12413                         bufp 
= (void **)((uintptr_t)dp 
+ difo
[i
].bufoffs
); 
12414                         lenp 
= (uint32_t *)((uintptr_t)dp 
+ difo
[i
].lenoffs
); 
12416                         if (*bufp 
!= NULL
) { 
12417                                 dtrace_dof_error(dof
, difo
[i
].msg
); 
12421                         if ((uint32_t)difo
[i
].entsize 
!= subsec
->dofs_entsize
) { 
12422                                 dtrace_dof_error(dof
, "entry size mismatch"); 
12426                         if (subsec
->dofs_entsize 
!= 0 && 
12427                             (subsec
->dofs_size 
% subsec
->dofs_entsize
) != 0) { 
12428                                 dtrace_dof_error(dof
, "corrupt entry size"); 
12432                         *lenp 
= subsec
->dofs_size
; 
12433                         *bufp 
= kmem_alloc(subsec
->dofs_size
, KM_SLEEP
); 
12434                         bcopy((char *)(uintptr_t)(daddr 
+ subsec
->dofs_offset
), 
12435                             *bufp
, subsec
->dofs_size
); 
12437                         if (subsec
->dofs_entsize 
!= 0) 
12438                                 *lenp 
/= subsec
->dofs_entsize
; 
12444                  * If we encounter a loadable DIFO sub-section that is not 
12445                  * known to us, assume this is a broken program and fail. 
12447                 if (difo
[i
].section 
== DOF_SECT_NONE 
&& 
12448                     (subsec
->dofs_flags 
& DOF_SECF_LOAD
)) { 
12449                         dtrace_dof_error(dof
, "unrecognized DIFO subsection"); 
12454         if (dp
->dtdo_buf 
== NULL
) { 
12456                  * We can't have a DIF object without DIF text. 
12458                 dtrace_dof_error(dof
, "missing DIF text"); 
12463          * Before we validate the DIF object, run through the variable table 
12464          * looking for the strings -- if any of their size are under, we'll set 
12465          * their size to be the system-wide default string size.  Note that 
12466          * this should _not_ happen if the "strsize" option has been set -- 
12467          * in this case, the compiler should have set the size to reflect the 
12468          * setting of the option. 
12470         for (i 
= 0; i 
< dp
->dtdo_varlen
; i
++) { 
12471                 dtrace_difv_t 
*v 
= &dp
->dtdo_vartab
[i
]; 
12472                 dtrace_diftype_t 
*t 
= &v
->dtdv_type
; 
12474                 if (v
->dtdv_id 
< DIF_VAR_OTHER_UBASE
) 
12477                 if (t
->dtdt_kind 
== DIF_TYPE_STRING 
&& t
->dtdt_size 
== 0) 
12478                         t
->dtdt_size 
= dtrace_strsize_default
; 
12481         if (dtrace_difo_validate(dp
, vstate
, DIF_DIR_NREGS
, cr
) != 0) 
12484         dtrace_difo_init(dp
, vstate
); 
12488         kmem_free(dp
->dtdo_buf
, dp
->dtdo_len 
* sizeof (dif_instr_t
)); 
12489         kmem_free(dp
->dtdo_inttab
, dp
->dtdo_intlen 
* sizeof (uint64_t)); 
12490         kmem_free(dp
->dtdo_strtab
, dp
->dtdo_strlen
); 
12491         kmem_free(dp
->dtdo_vartab
, dp
->dtdo_varlen 
* sizeof (dtrace_difv_t
)); 
12493         kmem_free(dp
, sizeof (dtrace_difo_t
)); 
12497 static dtrace_predicate_t 
* 
12498 dtrace_dof_predicate(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
12503         if ((dp 
= dtrace_dof_difo(dof
, sec
, vstate
, cr
)) == NULL
) 
12506         return (dtrace_predicate_create(dp
)); 
12509 static dtrace_actdesc_t 
* 
12510 dtrace_dof_actdesc(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
12513         dtrace_actdesc_t 
*act
, *first 
= NULL
, *last 
= NULL
, *next
; 
12514         dof_actdesc_t 
*desc
; 
12515         dof_sec_t 
*difosec
; 
12517         uintptr_t daddr 
= (uintptr_t)dof
; 
12519         dtrace_actkind_t kind
; 
12521         if (sec
->dofs_type 
!= DOF_SECT_ACTDESC
) { 
12522                 dtrace_dof_error(dof
, "invalid action section"); 
12526         if (sec
->dofs_offset 
+ sizeof (dof_actdesc_t
) > dof
->dofh_loadsz
) { 
12527                 dtrace_dof_error(dof
, "truncated action description"); 
12531         if (sec
->dofs_align 
!= sizeof (uint64_t)) { 
12532                 dtrace_dof_error(dof
, "bad alignment in action description"); 
12536         if (sec
->dofs_size 
< sec
->dofs_entsize
) { 
12537                 dtrace_dof_error(dof
, "section entry size exceeds total size"); 
12541         if (sec
->dofs_entsize 
!= sizeof (dof_actdesc_t
)) { 
12542                 dtrace_dof_error(dof
, "bad entry size in action description"); 
12546         if (sec
->dofs_size 
/ sec
->dofs_entsize 
> dtrace_actions_max
) { 
12547                 dtrace_dof_error(dof
, "actions exceed dtrace_actions_max"); 
12551         for (offs 
= 0; offs 
< sec
->dofs_size
; offs 
+= sec
->dofs_entsize
) { 
12552                 desc 
= (dof_actdesc_t 
*)(daddr 
+ 
12553                     (uintptr_t)sec
->dofs_offset 
+ offs
); 
12554                 kind 
= (dtrace_actkind_t
)desc
->dofa_kind
; 
12556                 if ((DTRACEACT_ISPRINTFLIKE(kind
) && 
12557                     (kind 
!= DTRACEACT_PRINTA 
|| desc
->dofa_strtab 
!= DOF_SECIDX_NONE
)) || 
12558                     (kind 
== DTRACEACT_DIFEXPR 
&& desc
->dofa_strtab 
!= DOF_SECIDX_NONE
)) 
12565                          * The argument to these actions is an index into the 
12566                          * DOF string table.  For printf()-like actions, this 
12567                          * is the format string.  For print(), this is the 
12568                          * CTF type of the expression result. 
12570                         if ((strtab 
= dtrace_dof_sect(dof
, 
12571                             DOF_SECT_STRTAB
, desc
->dofa_strtab
)) == NULL
) 
12574                         str 
= (char *)((uintptr_t)dof 
+ 
12575                             (uintptr_t)strtab
->dofs_offset
); 
12577                         for (i 
= desc
->dofa_arg
; i 
< strtab
->dofs_size
; i
++) { 
12578                                 if (str
[i
] == '\0') 
12582                         if (i 
>= strtab
->dofs_size
) { 
12583                                 dtrace_dof_error(dof
, "bogus format string"); 
12587                         if (i 
== desc
->dofa_arg
) { 
12588                                 dtrace_dof_error(dof
, "empty format string"); 
12592                         i 
-= desc
->dofa_arg
; 
12593                         fmt 
= kmem_alloc(i 
+ 1, KM_SLEEP
); 
12594                         bcopy(&str
[desc
->dofa_arg
], fmt
, i 
+ 1); 
12595                         arg 
= (uint64_t)(uintptr_t)fmt
; 
12597                         if (kind 
== DTRACEACT_PRINTA
) { 
12598                                 ASSERT(desc
->dofa_strtab 
== DOF_SECIDX_NONE
); 
12601                                 arg 
= desc
->dofa_arg
; 
12605                 act 
= dtrace_actdesc_create(kind
, desc
->dofa_ntuple
, 
12606                     desc
->dofa_uarg
, arg
); 
12608                 if (last 
!= NULL
) { 
12609                         last
->dtad_next 
= act
; 
12616                 if (desc
->dofa_difo 
== DOF_SECIDX_NONE
) 
12619                 if ((difosec 
= dtrace_dof_sect(dof
, 
12620                     DOF_SECT_DIFOHDR
, desc
->dofa_difo
)) == NULL
) 
12623                 act
->dtad_difo 
= dtrace_dof_difo(dof
, difosec
, vstate
, cr
); 
12625                 if (act
->dtad_difo 
== NULL
) 
12629         ASSERT(first 
!= NULL
); 
12633         for (act 
= first
; act 
!= NULL
; act 
= next
) { 
12634                 next 
= act
->dtad_next
; 
12635                 dtrace_actdesc_release(act
, vstate
); 
12641 static dtrace_ecbdesc_t 
* 
12642 dtrace_dof_ecbdesc(dof_hdr_t 
*dof
, dof_sec_t 
*sec
, dtrace_vstate_t 
*vstate
, 
12645         dtrace_ecbdesc_t 
*ep
; 
12646         dof_ecbdesc_t 
*ecb
; 
12647         dtrace_probedesc_t 
*desc
; 
12648         dtrace_predicate_t 
*pred 
= NULL
; 
12650         if (sec
->dofs_size 
< sizeof (dof_ecbdesc_t
)) { 
12651                 dtrace_dof_error(dof
, "truncated ECB description"); 
12655         if (sec
->dofs_align 
!= sizeof (uint64_t)) { 
12656                 dtrace_dof_error(dof
, "bad alignment in ECB description"); 
12660         ecb 
= (dof_ecbdesc_t 
*)((uintptr_t)dof 
+ (uintptr_t)sec
->dofs_offset
); 
12661         sec 
= dtrace_dof_sect(dof
, DOF_SECT_PROBEDESC
, ecb
->dofe_probes
); 
12666         ep 
= kmem_zalloc(sizeof (dtrace_ecbdesc_t
), KM_SLEEP
); 
12667         ep
->dted_uarg 
= ecb
->dofe_uarg
; 
12668         desc 
= &ep
->dted_probe
; 
12670         if (dtrace_dof_probedesc(dof
, sec
, desc
) == NULL
) 
12673         if (ecb
->dofe_pred 
!= DOF_SECIDX_NONE
) { 
12674                 if ((sec 
= dtrace_dof_sect(dof
, 
12675                     DOF_SECT_DIFOHDR
, ecb
->dofe_pred
)) == NULL
) 
12678                 if ((pred 
= dtrace_dof_predicate(dof
, sec
, vstate
, cr
)) == NULL
) 
12681                 ep
->dted_pred
.dtpdd_predicate 
= pred
; 
12684         if (ecb
->dofe_actions 
!= DOF_SECIDX_NONE
) { 
12685                 if ((sec 
= dtrace_dof_sect(dof
, 
12686                     DOF_SECT_ACTDESC
, ecb
->dofe_actions
)) == NULL
) 
12689                 ep
->dted_action 
= dtrace_dof_actdesc(dof
, sec
, vstate
, cr
); 
12691                 if (ep
->dted_action 
== NULL
) 
12699                 dtrace_predicate_release(pred
, vstate
); 
12700         kmem_free(ep
, sizeof (dtrace_ecbdesc_t
)); 
12705  * APPLE NOTE: dyld handles dof relocation. 
12706  * Darwin does not need dtrace_dof_relocate() 
12710  * The dof_hdr_t passed to dtrace_dof_slurp() should be a partially validated 
12711  * header:  it should be at the front of a memory region that is at least 
12712  * sizeof (dof_hdr_t) in size -- and then at least dof_hdr.dofh_loadsz in 
12713  * size.  It need not be validated in any other way. 
12716 dtrace_dof_slurp(dof_hdr_t 
*dof
, dtrace_vstate_t 
*vstate
, cred_t 
*cr
, 
12717     dtrace_enabling_t 
**enabp
, uint64_t ubase
, int noprobes
) 
12719 #pragma unused(ubase) /* __APPLE__ */ 
12720         uint64_t len 
= dof
->dofh_loadsz
, seclen
; 
12721         uintptr_t daddr 
= (uintptr_t)dof
; 
12722         dtrace_ecbdesc_t 
*ep
; 
12723         dtrace_enabling_t 
*enab
; 
12726         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12727         ASSERT(dof
->dofh_loadsz 
>= sizeof (dof_hdr_t
)); 
12730          * Check the DOF header identification bytes.  In addition to checking 
12731          * valid settings, we also verify that unused bits/bytes are zeroed so 
12732          * we can use them later without fear of regressing existing binaries. 
12734         if (bcmp(&dof
->dofh_ident
[DOF_ID_MAG0
], 
12735             DOF_MAG_STRING
, DOF_MAG_STRLEN
) != 0) { 
12736                 dtrace_dof_error(dof
, "DOF magic string mismatch"); 
12740         if (dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_ILP32 
&& 
12741             dof
->dofh_ident
[DOF_ID_MODEL
] != DOF_MODEL_LP64
) { 
12742                 dtrace_dof_error(dof
, "DOF has invalid data model"); 
12746         if (dof
->dofh_ident
[DOF_ID_ENCODING
] != DOF_ENCODE_NATIVE
) { 
12747                 dtrace_dof_error(dof
, "DOF encoding mismatch"); 
12752          * APPLE NOTE: Darwin only supports DOF_VERSION_3 for now. 
12754         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_3
) { 
12755                 dtrace_dof_error(dof
, "DOF version mismatch"); 
12759         if (dof
->dofh_ident
[DOF_ID_DIFVERS
] != DIF_VERSION_2
) { 
12760                 dtrace_dof_error(dof
, "DOF uses unsupported instruction set"); 
12764         if (dof
->dofh_ident
[DOF_ID_DIFIREG
] > DIF_DIR_NREGS
) { 
12765                 dtrace_dof_error(dof
, "DOF uses too many integer registers"); 
12769         if (dof
->dofh_ident
[DOF_ID_DIFTREG
] > DIF_DTR_NREGS
) { 
12770                 dtrace_dof_error(dof
, "DOF uses too many tuple registers"); 
12774         for (i 
= DOF_ID_PAD
; i 
< DOF_ID_SIZE
; i
++) { 
12775                 if (dof
->dofh_ident
[i
] != 0) { 
12776                         dtrace_dof_error(dof
, "DOF has invalid ident byte set"); 
12781         if (dof
->dofh_flags 
& ~DOF_FL_VALID
) { 
12782                 dtrace_dof_error(dof
, "DOF has invalid flag bits set"); 
12786         if (dof
->dofh_secsize 
== 0) { 
12787                 dtrace_dof_error(dof
, "zero section header size"); 
12792          * Check that the section headers don't exceed the amount of DOF 
12793          * data.  Note that we cast the section size and number of sections 
12794          * to uint64_t's to prevent possible overflow in the multiplication. 
12796         seclen 
= (uint64_t)dof
->dofh_secnum 
* (uint64_t)dof
->dofh_secsize
; 
12798         if (dof
->dofh_secoff 
> len 
|| seclen 
> len 
|| 
12799             dof
->dofh_secoff 
+ seclen 
> len
) { 
12800                 dtrace_dof_error(dof
, "truncated section headers"); 
12804         if (!IS_P2ALIGNED(dof
->dofh_secoff
, sizeof (uint64_t))) { 
12805                 dtrace_dof_error(dof
, "misaligned section headers"); 
12809         if (!IS_P2ALIGNED(dof
->dofh_secsize
, sizeof (uint64_t))) { 
12810                 dtrace_dof_error(dof
, "misaligned section size"); 
12815          * Take an initial pass through the section headers to be sure that 
12816          * the headers don't have stray offsets.  If the 'noprobes' flag is 
12817          * set, do not permit sections relating to providers, probes, or args. 
12819         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
12820                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(daddr 
+ 
12821                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
12824                         switch (sec
->dofs_type
) { 
12825                         case DOF_SECT_PROVIDER
: 
12826                         case DOF_SECT_PROBES
: 
12827                         case DOF_SECT_PRARGS
: 
12828                         case DOF_SECT_PROFFS
: 
12829                                 dtrace_dof_error(dof
, "illegal sections " 
12835                 if (!(sec
->dofs_flags 
& DOF_SECF_LOAD
)) 
12836                         continue; /* just ignore non-loadable sections */ 
12838                 if (sec
->dofs_align 
& (sec
->dofs_align 
- 1)) { 
12839                         dtrace_dof_error(dof
, "bad section alignment"); 
12843                 if (sec
->dofs_offset 
& (sec
->dofs_align 
- 1)) { 
12844                         dtrace_dof_error(dof
, "misaligned section"); 
12848                 if (sec
->dofs_offset 
> len 
|| sec
->dofs_size 
> len 
|| 
12849                     sec
->dofs_offset 
+ sec
->dofs_size 
> len
) { 
12850                         dtrace_dof_error(dof
, "corrupt section header"); 
12854                 if (sec
->dofs_type 
== DOF_SECT_STRTAB 
&& *((char *)daddr 
+ 
12855                     sec
->dofs_offset 
+ sec
->dofs_size 
- 1) != '\0') { 
12856                         dtrace_dof_error(dof
, "non-terminating string table"); 
12862          * APPLE NOTE: We have no further relocation to perform. 
12863          * All dof values are relative offsets. 
12866         if ((enab 
= *enabp
) == NULL
) 
12867                 enab 
= *enabp 
= dtrace_enabling_create(vstate
); 
12869         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
12870                 dof_sec_t 
*sec 
= (dof_sec_t 
*)(daddr 
+ 
12871                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
12873                 if (sec
->dofs_type 
!= DOF_SECT_ECBDESC
) 
12877                  * APPLE NOTE: Defend against gcc 4.0 botch on x86. 
12878                  * not all paths out of inlined dtrace_dof_ecbdesc 
12879                  * are checked for the NULL return value. 
12880                  * Check for NULL explicitly here. 
12882                 ep 
= dtrace_dof_ecbdesc(dof
, sec
, vstate
, cr
); 
12884                         dtrace_enabling_destroy(enab
); 
12889                 dtrace_enabling_add(enab
, ep
); 
12896  * Process DOF for any options.  This routine assumes that the DOF has been 
12897  * at least processed by dtrace_dof_slurp(). 
12900 dtrace_dof_options(dof_hdr_t 
*dof
, dtrace_state_t 
*state
) 
12906         dof_optdesc_t 
*desc
; 
12908         for (i 
= 0; i 
< dof
->dofh_secnum
; i
++) { 
12909                 dof_sec_t 
*sec 
= (dof_sec_t 
*)((uintptr_t)dof 
+ 
12910                     (uintptr_t)dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
12912                 if (sec
->dofs_type 
!= DOF_SECT_OPTDESC
) 
12915                 if (sec
->dofs_align 
!= sizeof (uint64_t)) { 
12916                         dtrace_dof_error(dof
, "bad alignment in " 
12917                             "option description"); 
12921                 if ((entsize 
= sec
->dofs_entsize
) == 0) { 
12922                         dtrace_dof_error(dof
, "zeroed option entry size"); 
12926                 if (entsize 
< sizeof (dof_optdesc_t
)) { 
12927                         dtrace_dof_error(dof
, "bad option entry size"); 
12931                 for (offs 
= 0; offs 
< sec
->dofs_size
; offs 
+= entsize
) { 
12932                         desc 
= (dof_optdesc_t 
*)((uintptr_t)dof 
+ 
12933                             (uintptr_t)sec
->dofs_offset 
+ offs
); 
12935                         if (desc
->dofo_strtab 
!= DOF_SECIDX_NONE
) { 
12936                                 dtrace_dof_error(dof
, "non-zero option string"); 
12940                         if (desc
->dofo_value 
== (uint64_t)DTRACEOPT_UNSET
) { 
12941                                 dtrace_dof_error(dof
, "unset option"); 
12945                         if ((rval 
= dtrace_state_option(state
, 
12946                             desc
->dofo_option
, desc
->dofo_value
)) != 0) { 
12947                                 dtrace_dof_error(dof
, "rejected option"); 
12957  * DTrace Consumer State Functions 
12960 dtrace_dstate_init(dtrace_dstate_t 
*dstate
, size_t size
) 
12962         size_t hashsize
, maxper
, min_size
, chunksize 
= dstate
->dtds_chunksize
; 
12965         dtrace_dynvar_t 
*dvar
, *next
, *start
; 
12968         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
12969         ASSERT(dstate
->dtds_base 
== NULL 
&& dstate
->dtds_percpu 
== NULL
); 
12971         bzero(dstate
, sizeof (dtrace_dstate_t
)); 
12973         if ((dstate
->dtds_chunksize 
= chunksize
) == 0) 
12974                 dstate
->dtds_chunksize 
= DTRACE_DYNVAR_CHUNKSIZE
; 
12976         VERIFY(dstate
->dtds_chunksize 
< (LONG_MAX 
- sizeof (dtrace_dynhash_t
))); 
12978         if (size 
< (min_size 
= dstate
->dtds_chunksize 
+ sizeof (dtrace_dynhash_t
))) 
12981         if ((base 
= kmem_zalloc(size
, KM_NOSLEEP
)) == NULL
) 
12984         dstate
->dtds_size 
= size
; 
12985         dstate
->dtds_base 
= base
; 
12986         dstate
->dtds_percpu 
= kmem_cache_alloc(dtrace_state_cache
, KM_SLEEP
); 
12987         bzero(dstate
->dtds_percpu
, (int)NCPU 
* sizeof (dtrace_dstate_percpu_t
)); 
12989         hashsize 
= size 
/ (dstate
->dtds_chunksize 
+ sizeof (dtrace_dynhash_t
)); 
12991         if (hashsize 
!= 1 && (hashsize 
& 1)) 
12994         dstate
->dtds_hashsize 
= hashsize
; 
12995         dstate
->dtds_hash 
= dstate
->dtds_base
; 
12998          * Set all of our hash buckets to point to the single sink, and (if 
12999          * it hasn't already been set), set the sink's hash value to be the 
13000          * sink sentinel value.  The sink is needed for dynamic variable 
13001          * lookups to know that they have iterated over an entire, valid hash 
13004         for (i 
= 0; i 
< hashsize
; i
++) 
13005                 dstate
->dtds_hash
[i
].dtdh_chain 
= &dtrace_dynhash_sink
; 
13007         if (dtrace_dynhash_sink
.dtdv_hashval 
!= DTRACE_DYNHASH_SINK
) 
13008                 dtrace_dynhash_sink
.dtdv_hashval 
= DTRACE_DYNHASH_SINK
; 
13011          * Determine number of active CPUs.  Divide free list evenly among 
13014         start 
= (dtrace_dynvar_t 
*) 
13015             ((uintptr_t)base 
+ hashsize 
* sizeof (dtrace_dynhash_t
)); 
13016         limit 
= (uintptr_t)base 
+ size
; 
13018         VERIFY((uintptr_t)start 
< limit
); 
13019         VERIFY((uintptr_t)start 
>= (uintptr_t)base
); 
13021         maxper 
= (limit 
- (uintptr_t)start
) / (int)NCPU
; 
13022         maxper 
= (maxper 
/ dstate
->dtds_chunksize
) * dstate
->dtds_chunksize
; 
13024         for (i 
= 0; i 
< NCPU
; i
++) { 
13025                 dstate
->dtds_percpu
[i
].dtdsc_free 
= dvar 
= start
; 
13028                  * If we don't even have enough chunks to make it once through 
13029                  * NCPUs, we're just going to allocate everything to the first 
13030                  * CPU.  And if we're on the last CPU, we're going to allocate 
13031                  * whatever is left over.  In either case, we set the limit to 
13032                  * be the limit of the dynamic variable space. 
13034                 if (maxper 
== 0 || i 
== NCPU 
- 1) { 
13035                         limit 
= (uintptr_t)base 
+ size
; 
13038                         limit 
= (uintptr_t)start 
+ maxper
; 
13039                         start 
= (dtrace_dynvar_t 
*)limit
; 
13042                 VERIFY(limit 
<= (uintptr_t)base 
+ size
); 
13045                         next 
= (dtrace_dynvar_t 
*)((uintptr_t)dvar 
+ 
13046                             dstate
->dtds_chunksize
); 
13048                         if ((uintptr_t)next 
+ dstate
->dtds_chunksize 
>= limit
) 
13051                         VERIFY((uintptr_t)dvar 
>= (uintptr_t)base 
&& 
13052                             (uintptr_t)dvar 
<= (uintptr_t)base 
+ size
); 
13053                         dvar
->dtdv_next 
= next
; 
13065 dtrace_dstate_fini(dtrace_dstate_t 
*dstate
) 
13067         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
13069         if (dstate
->dtds_base 
== NULL
) 
13072         kmem_free(dstate
->dtds_base
, dstate
->dtds_size
); 
13073         kmem_cache_free(dtrace_state_cache
, dstate
->dtds_percpu
); 
13077 dtrace_vstate_fini(dtrace_vstate_t 
*vstate
) 
13080          * Logical XOR, where are you? 
13082         ASSERT((vstate
->dtvs_nglobals 
== 0) ^ (vstate
->dtvs_globals 
!= NULL
)); 
13084         if (vstate
->dtvs_nglobals 
> 0) { 
13085                 kmem_free(vstate
->dtvs_globals
, vstate
->dtvs_nglobals 
* 
13086                     sizeof (dtrace_statvar_t 
*)); 
13089         if (vstate
->dtvs_ntlocals 
> 0) { 
13090                 kmem_free(vstate
->dtvs_tlocals
, vstate
->dtvs_ntlocals 
* 
13091                     sizeof (dtrace_difv_t
)); 
13094         ASSERT((vstate
->dtvs_nlocals 
== 0) ^ (vstate
->dtvs_locals 
!= NULL
)); 
13096         if (vstate
->dtvs_nlocals 
> 0) { 
13097                 kmem_free(vstate
->dtvs_locals
, vstate
->dtvs_nlocals 
* 
13098                     sizeof (dtrace_statvar_t 
*)); 
13103 dtrace_state_clean(dtrace_state_t 
*state
) 
13105         if (state
->dts_activity 
== DTRACE_ACTIVITY_INACTIVE
) 
13108         dtrace_dynvar_clean(&state
->dts_vstate
.dtvs_dynvars
); 
13109         dtrace_speculation_clean(state
); 
13113 dtrace_state_deadman(dtrace_state_t 
*state
) 
13119         now 
= dtrace_gethrtime(); 
13121         if (state 
!= dtrace_anon
.dta_state 
&& 
13122             now 
- state
->dts_laststatus 
>= dtrace_deadman_user
) 
13126          * We must be sure that dts_alive never appears to be less than the 
13127          * value upon entry to dtrace_state_deadman(), and because we lack a 
13128          * dtrace_cas64(), we cannot store to it atomically.  We thus instead 
13129          * store INT64_MAX to it, followed by a memory barrier, followed by 
13130          * the new value.  This assures that dts_alive never appears to be 
13131          * less than its true value, regardless of the order in which the 
13132          * stores to the underlying storage are issued. 
13134         state
->dts_alive 
= INT64_MAX
; 
13135         dtrace_membar_producer(); 
13136         state
->dts_alive 
= now
; 
13140 dtrace_state_create(dev_t 
*devp
, cred_t 
*cr
, dtrace_state_t 
**new_state
) 
13145         dtrace_state_t 
*state
; 
13146         dtrace_optval_t 
*opt
; 
13147         int bufsize 
= (int)NCPU 
* sizeof (dtrace_buffer_t
), i
; 
13149         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13150         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
13152         /* Cause restart */ 
13155         if (devp 
!= NULL
) { 
13156                 minor 
= getminor(*devp
); 
13159                 minor 
= DTRACE_NCLIENTS 
- 1; 
13162         state 
= dtrace_state_allocate(minor
); 
13163         if (NULL 
== state
) { 
13164                 printf("dtrace_open: couldn't acquire minor number %d. This usually means that too many DTrace clients are in use at the moment", minor
); 
13165                 return (ERESTART
);      /* can't reacquire */ 
13168         state
->dts_epid 
= DTRACE_EPIDNONE 
+ 1; 
13170         (void) snprintf(c
, sizeof (c
), "dtrace_aggid_%d", minor
); 
13171         state
->dts_aggid_arena 
= vmem_create(c
, (void *)1, UINT32_MAX
, 1, 
13172             NULL
, NULL
, NULL
, 0, VM_SLEEP 
| VMC_IDENTIFIER
); 
13174         if (devp 
!= NULL
) { 
13175                 major 
= getemajor(*devp
); 
13177                 major 
= ddi_driver_major(dtrace_devi
); 
13180         state
->dts_dev 
= makedevice(major
, minor
); 
13183                 *devp 
= state
->dts_dev
; 
13186          * We allocate NCPU buffers.  On the one hand, this can be quite 
13187          * a bit of memory per instance (nearly 36K on a Starcat).  On the 
13188          * other hand, it saves an additional memory reference in the probe 
13191         state
->dts_buffer 
= kmem_zalloc(bufsize
, KM_SLEEP
); 
13192         state
->dts_aggbuffer 
= kmem_zalloc(bufsize
, KM_SLEEP
); 
13193         state
->dts_buf_over_limit 
= 0; 
13194         state
->dts_cleaner 
= CYCLIC_NONE
; 
13195         state
->dts_deadman 
= CYCLIC_NONE
; 
13196         state
->dts_vstate
.dtvs_state 
= state
; 
13198         for (i 
= 0; i 
< DTRACEOPT_MAX
; i
++) 
13199                 state
->dts_options
[i
] = DTRACEOPT_UNSET
; 
13202          * Set the default options. 
13204         opt 
= state
->dts_options
; 
13205         opt
[DTRACEOPT_BUFPOLICY
] = DTRACEOPT_BUFPOLICY_SWITCH
; 
13206         opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_AUTO
; 
13207         opt
[DTRACEOPT_NSPEC
] = dtrace_nspec_default
; 
13208         opt
[DTRACEOPT_SPECSIZE
] = dtrace_specsize_default
; 
13209         opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)DTRACE_CPUALL
; 
13210         opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_default
; 
13211         opt
[DTRACEOPT_STACKFRAMES
] = dtrace_stackframes_default
; 
13212         opt
[DTRACEOPT_USTACKFRAMES
] = dtrace_ustackframes_default
; 
13213         opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_default
; 
13214         opt
[DTRACEOPT_AGGRATE
] = dtrace_aggrate_default
; 
13215         opt
[DTRACEOPT_SWITCHRATE
] = dtrace_switchrate_default
; 
13216         opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_default
; 
13217         opt
[DTRACEOPT_JSTACKFRAMES
] = dtrace_jstackframes_default
; 
13218         opt
[DTRACEOPT_JSTACKSTRSIZE
] = dtrace_jstackstrsize_default
; 
13219         opt
[DTRACEOPT_BUFLIMIT
] = dtrace_buflimit_default
; 
13222          * Depending on the user credentials, we set flag bits which alter probe 
13223          * visibility or the amount of destructiveness allowed.  In the case of 
13224          * actual anonymous tracing, or the possession of all privileges, all of 
13225          * the normal checks are bypassed. 
13227 #if defined(__APPLE__) 
13228         if (cr 
== NULL 
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) { 
13229                 if (dtrace_is_restricted() && !dtrace_are_restrictions_relaxed()) { 
13231                          * Allow only proc credentials when DTrace is 
13232                          * restricted by the current security policy 
13234                         state
->dts_cred
.dcr_visible 
= DTRACE_CRV_ALLPROC
; 
13235                         state
->dts_cred
.dcr_action 
= DTRACE_CRA_PROC 
| DTRACE_CRA_PROC_CONTROL 
| DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
13238                         state
->dts_cred
.dcr_visible 
= DTRACE_CRV_ALL
; 
13239                         state
->dts_cred
.dcr_action 
= DTRACE_CRA_ALL
; 
13244         if (cr 
== NULL 
|| PRIV_POLICY_ONLY(cr
, PRIV_ALL
, B_FALSE
)) { 
13245                 state
->dts_cred
.dcr_visible 
= DTRACE_CRV_ALL
; 
13246                 state
->dts_cred
.dcr_action 
= DTRACE_CRA_ALL
; 
13250                  * Set up the credentials for this instantiation.  We take a 
13251                  * hold on the credential to prevent it from disappearing on 
13252                  * us; this in turn prevents the zone_t referenced by this 
13253                  * credential from disappearing.  This means that we can 
13254                  * examine the credential and the zone from probe context. 
13257                 state
->dts_cred
.dcr_cred 
= cr
; 
13260                  * CRA_PROC means "we have *some* privilege for dtrace" and 
13261                  * unlocks the use of variables like pid, zonename, etc. 
13263                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
) || 
13264                     PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) { 
13265                         state
->dts_cred
.dcr_action 
|= DTRACE_CRA_PROC
; 
13269                  * dtrace_user allows use of syscall and profile providers. 
13270                  * If the user also has proc_owner and/or proc_zone, we 
13271                  * extend the scope to include additional visibility and 
13272                  * destructive power. 
13274                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_USER
, B_FALSE
)) { 
13275                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) { 
13276                                 state
->dts_cred
.dcr_visible 
|= 
13277                                     DTRACE_CRV_ALLPROC
; 
13279                                 state
->dts_cred
.dcr_action 
|= 
13280                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
13283                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) { 
13284                                 state
->dts_cred
.dcr_visible 
|= 
13285                                     DTRACE_CRV_ALLZONE
; 
13287                                 state
->dts_cred
.dcr_action 
|= 
13288                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
; 
13292                          * If we have all privs in whatever zone this is, 
13293                          * we can do destructive things to processes which 
13294                          * have altered credentials. 
13296                          * APPLE NOTE: Darwin doesn't do zones. 
13297                          * Behave as if zone always has destructive privs. 
13300                         state
->dts_cred
.dcr_action 
|= 
13301                                 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
; 
13305                  * Holding the dtrace_kernel privilege also implies that 
13306                  * the user has the dtrace_user privilege from a visibility 
13307                  * perspective.  But without further privileges, some 
13308                  * destructive actions are not available. 
13310                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_KERNEL
, B_FALSE
)) { 
13312                          * Make all probes in all zones visible.  However, 
13313                          * this doesn't mean that all actions become available 
13316                         state
->dts_cred
.dcr_visible 
|= DTRACE_CRV_KERNEL 
| 
13317                             DTRACE_CRV_ALLPROC 
| DTRACE_CRV_ALLZONE
; 
13319                         state
->dts_cred
.dcr_action 
|= DTRACE_CRA_KERNEL 
| 
13322                          * Holding proc_owner means that destructive actions 
13323                          * for *this* zone are allowed. 
13325                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) 
13326                                 state
->dts_cred
.dcr_action 
|= 
13327                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
13330                          * Holding proc_zone means that destructive actions 
13331                          * for this user/group ID in all zones is allowed. 
13333                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) 
13334                                 state
->dts_cred
.dcr_action 
|= 
13335                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
; 
13338                          * If we have all privs in whatever zone this is, 
13339                          * we can do destructive things to processes which 
13340                          * have altered credentials. 
13342                          * APPLE NOTE: Darwin doesn't do zones.                   
13343                          * Behave as if zone always has destructive privs. 
13345                         state
->dts_cred
.dcr_action 
|= 
13346                                 DTRACE_CRA_PROC_DESTRUCTIVE_CREDCHG
; 
13350                  * Holding the dtrace_proc privilege gives control over fasttrap 
13351                  * and pid providers.  We need to grant wider destructive 
13352                  * privileges in the event that the user has proc_owner and/or 
13355                 if (PRIV_POLICY_ONLY(cr
, PRIV_DTRACE_PROC
, B_FALSE
)) { 
13356                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_OWNER
, B_FALSE
)) 
13357                                 state
->dts_cred
.dcr_action 
|= 
13358                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLUSER
; 
13360                         if (PRIV_POLICY_ONLY(cr
, PRIV_PROC_ZONE
, B_FALSE
)) 
13361                                 state
->dts_cred
.dcr_action 
|= 
13362                                     DTRACE_CRA_PROC_DESTRUCTIVE_ALLZONE
; 
13367         *new_state 
= state
; 
13368         return(0);  /* Success */ 
13372 dtrace_state_buffer(dtrace_state_t 
*state
, dtrace_buffer_t 
*buf
, int which
) 
13374         dtrace_optval_t 
*opt 
= state
->dts_options
, size
; 
13375         processorid_t cpu 
= 0; 
13376         size_t limit 
= buf
->dtb_size
; 
13377         int flags 
= 0, rval
; 
13379         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13380         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
13381         ASSERT(which 
< DTRACEOPT_MAX
); 
13382         ASSERT(state
->dts_activity 
== DTRACE_ACTIVITY_INACTIVE 
|| 
13383             (state 
== dtrace_anon
.dta_state 
&& 
13384             state
->dts_activity 
== DTRACE_ACTIVITY_ACTIVE
)); 
13386         if (opt
[which
] == DTRACEOPT_UNSET 
|| opt
[which
] == 0) 
13389         if (opt
[DTRACEOPT_CPU
] != DTRACEOPT_UNSET
) 
13390                 cpu 
= opt
[DTRACEOPT_CPU
]; 
13392         if (which 
== DTRACEOPT_SPECSIZE
) 
13393                 flags 
|= DTRACEBUF_NOSWITCH
; 
13395         if (which 
== DTRACEOPT_BUFSIZE
) { 
13396                 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_RING
) 
13397                         flags 
|= DTRACEBUF_RING
; 
13399                 if (opt
[DTRACEOPT_BUFPOLICY
] == DTRACEOPT_BUFPOLICY_FILL
) 
13400                         flags 
|= DTRACEBUF_FILL
; 
13402                 if (state 
!= dtrace_anon
.dta_state 
|| 
13403                     state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE
) 
13404                         flags 
|= DTRACEBUF_INACTIVE
; 
13407         for (size 
= opt
[which
]; (size_t)size 
>= sizeof (uint64_t); size 
>>= 1) { 
13409                  * The size must be 8-byte aligned.  If the size is not 8-byte 
13410                  * aligned, drop it down by the difference. 
13412                 if (size 
& (sizeof (uint64_t) - 1)) 
13413                         size 
-= size 
& (sizeof (uint64_t) - 1); 
13415                 if (size 
< state
->dts_reserve
) { 
13417                          * Buffers always must be large enough to accommodate 
13418                          * their prereserved space.  We return E2BIG instead 
13419                          * of ENOMEM in this case to allow for user-level 
13420                          * software to differentiate the cases. 
13424                 limit 
= opt
[DTRACEOPT_BUFLIMIT
] * size 
/ 100; 
13425                 rval 
= dtrace_buffer_alloc(buf
, limit
, size
, flags
, cpu
); 
13427                 if (rval 
!= ENOMEM
) { 
13432                 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
) 
13440 dtrace_state_buffers(dtrace_state_t 
*state
) 
13442         dtrace_speculation_t 
*spec 
= state
->dts_speculations
; 
13445         if ((rval 
= dtrace_state_buffer(state
, state
->dts_buffer
, 
13446             DTRACEOPT_BUFSIZE
)) != 0) 
13449         if ((rval 
= dtrace_state_buffer(state
, state
->dts_aggbuffer
, 
13450             DTRACEOPT_AGGSIZE
)) != 0) 
13453         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
13454                 if ((rval 
= dtrace_state_buffer(state
, 
13455                     spec
[i
].dtsp_buffer
, DTRACEOPT_SPECSIZE
)) != 0) 
13463 dtrace_state_prereserve(dtrace_state_t 
*state
) 
13466         dtrace_probe_t 
*probe
; 
13468         state
->dts_reserve 
= 0; 
13470         if (state
->dts_options
[DTRACEOPT_BUFPOLICY
] != DTRACEOPT_BUFPOLICY_FILL
) 
13474          * If our buffer policy is a "fill" buffer policy, we need to set the 
13475          * prereserved space to be the space required by the END probes. 
13477         probe 
= dtrace_probes
[dtrace_probeid_end 
- 1]; 
13478         ASSERT(probe 
!= NULL
); 
13480         for (ecb 
= probe
->dtpr_ecb
; ecb 
!= NULL
; ecb 
= ecb
->dte_next
) { 
13481                 if (ecb
->dte_state 
!= state
) 
13484                 state
->dts_reserve 
+= ecb
->dte_needed 
+ ecb
->dte_alignment
; 
13489 dtrace_state_go(dtrace_state_t 
*state
, processorid_t 
*cpu
) 
13491         dtrace_optval_t 
*opt 
= state
->dts_options
, sz
, nspec
; 
13492         dtrace_speculation_t 
*spec
; 
13493         dtrace_buffer_t 
*buf
; 
13494         cyc_handler_t hdlr
; 
13496         int rval 
= 0, i
, bufsize 
= (int)NCPU 
* sizeof (dtrace_buffer_t
); 
13497         dtrace_icookie_t cookie
; 
13499         lck_mtx_lock(&cpu_lock
); 
13500         lck_mtx_lock(&dtrace_lock
); 
13502         if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) { 
13508          * Before we can perform any checks, we must prime all of the 
13509          * retained enablings that correspond to this state. 
13511         dtrace_enabling_prime(state
); 
13513         if (state
->dts_destructive 
&& !state
->dts_cred
.dcr_destructive
) { 
13518         dtrace_state_prereserve(state
); 
13521          * Now we want to do is try to allocate our speculations. 
13522          * We do not automatically resize the number of speculations; if 
13523          * this fails, we will fail the operation. 
13525         nspec 
= opt
[DTRACEOPT_NSPEC
]; 
13526         ASSERT(nspec 
!= DTRACEOPT_UNSET
); 
13528         if (nspec 
> INT_MAX
) { 
13533         spec 
= kmem_zalloc(nspec 
* sizeof (dtrace_speculation_t
), KM_NOSLEEP
); 
13535         if (spec 
== NULL
) { 
13540         state
->dts_speculations 
= spec
; 
13541         state
->dts_nspeculations 
= (int)nspec
; 
13543         for (i 
= 0; i 
< nspec
; i
++) { 
13544                 if ((buf 
= kmem_zalloc(bufsize
, KM_NOSLEEP
)) == NULL
) { 
13549                 spec
[i
].dtsp_buffer 
= buf
; 
13552         if (opt
[DTRACEOPT_GRABANON
] != DTRACEOPT_UNSET
) { 
13553                 if (dtrace_anon
.dta_state 
== NULL
) { 
13558                 if (state
->dts_necbs 
!= 0) { 
13563                 state
->dts_anon 
= dtrace_anon_grab(); 
13564                 ASSERT(state
->dts_anon 
!= NULL
); 
13565                 state 
= state
->dts_anon
; 
13568                  * We want "grabanon" to be set in the grabbed state, so we'll 
13569                  * copy that option value from the grabbing state into the 
13572                 state
->dts_options
[DTRACEOPT_GRABANON
] = 
13573                     opt
[DTRACEOPT_GRABANON
]; 
13575                 *cpu 
= dtrace_anon
.dta_beganon
; 
13578                  * If the anonymous state is active (as it almost certainly 
13579                  * is if the anonymous enabling ultimately matched anything), 
13580                  * we don't allow any further option processing -- but we 
13581                  * don't return failure. 
13583                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) 
13587         if (opt
[DTRACEOPT_AGGSIZE
] != DTRACEOPT_UNSET 
&& 
13588             opt
[DTRACEOPT_AGGSIZE
] != 0) { 
13589                 if (state
->dts_aggregations 
== NULL
) { 
13591                          * We're not going to create an aggregation buffer 
13592                          * because we don't have any ECBs that contain 
13593                          * aggregations -- set this option to 0. 
13595                         opt
[DTRACEOPT_AGGSIZE
] = 0; 
13598                          * If we have an aggregation buffer, we must also have 
13599                          * a buffer to use as scratch. 
13601                         if (opt
[DTRACEOPT_BUFSIZE
] == DTRACEOPT_UNSET 
|| 
13602                           (size_t)opt
[DTRACEOPT_BUFSIZE
] < state
->dts_needed
) { 
13603                                 opt
[DTRACEOPT_BUFSIZE
] = state
->dts_needed
; 
13608         if (opt
[DTRACEOPT_SPECSIZE
] != DTRACEOPT_UNSET 
&& 
13609             opt
[DTRACEOPT_SPECSIZE
] != 0) { 
13610                 if (!state
->dts_speculates
) { 
13612                          * We're not going to create speculation buffers 
13613                          * because we don't have any ECBs that actually 
13614                          * speculate -- set the speculation size to 0. 
13616                         opt
[DTRACEOPT_SPECSIZE
] = 0; 
13621          * The bare minimum size for any buffer that we're actually going to 
13622          * do anything to is sizeof (uint64_t). 
13624         sz 
= sizeof (uint64_t); 
13626         if ((state
->dts_needed 
!= 0 && opt
[DTRACEOPT_BUFSIZE
] < sz
) || 
13627             (state
->dts_speculates 
&& opt
[DTRACEOPT_SPECSIZE
] < sz
) || 
13628             (state
->dts_aggregations 
!= NULL 
&& opt
[DTRACEOPT_AGGSIZE
] < sz
)) { 
13630                  * A buffer size has been explicitly set to 0 (or to a size 
13631                  * that will be adjusted to 0) and we need the space -- we 
13632                  * need to return failure.  We return ENOSPC to differentiate 
13633                  * it from failing to allocate a buffer due to failure to meet 
13634                  * the reserve (for which we return E2BIG). 
13640         if ((rval 
= dtrace_state_buffers(state
)) != 0) 
13643         if ((sz 
= opt
[DTRACEOPT_DYNVARSIZE
]) == DTRACEOPT_UNSET
) 
13644                 sz 
= dtrace_dstate_defsize
; 
13647                 rval 
= dtrace_dstate_init(&state
->dts_vstate
.dtvs_dynvars
, sz
); 
13652                 if (opt
[DTRACEOPT_BUFRESIZE
] == DTRACEOPT_BUFRESIZE_MANUAL
) 
13654         } while (sz 
>>= 1); 
13656         opt
[DTRACEOPT_DYNVARSIZE
] = sz
; 
13661         if (opt
[DTRACEOPT_STATUSRATE
] > dtrace_statusrate_max
) 
13662                 opt
[DTRACEOPT_STATUSRATE
] = dtrace_statusrate_max
; 
13664         if (opt
[DTRACEOPT_CLEANRATE
] == 0) 
13665                 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
; 
13667         if (opt
[DTRACEOPT_CLEANRATE
] < dtrace_cleanrate_min
) 
13668                 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_min
; 
13670         if (opt
[DTRACEOPT_CLEANRATE
] > dtrace_cleanrate_max
) 
13671                 opt
[DTRACEOPT_CLEANRATE
] = dtrace_cleanrate_max
; 
13673         if (opt
[DTRACEOPT_STRSIZE
] > dtrace_strsize_max
) 
13674                 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_max
; 
13676         if (opt
[DTRACEOPT_STRSIZE
] < dtrace_strsize_min
) 
13677                 opt
[DTRACEOPT_STRSIZE
] = dtrace_strsize_min
; 
13679         if (opt
[DTRACEOPT_BUFLIMIT
] > dtrace_buflimit_max
) 
13680                 opt
[DTRACEOPT_BUFLIMIT
] = dtrace_buflimit_max
; 
13682         if (opt
[DTRACEOPT_BUFLIMIT
] < dtrace_buflimit_min
) 
13683                 opt
[DTRACEOPT_BUFLIMIT
] = dtrace_buflimit_min
; 
13685         hdlr
.cyh_func 
= (cyc_func_t
)dtrace_state_clean
; 
13686         hdlr
.cyh_arg 
= state
; 
13687         hdlr
.cyh_level 
= CY_LOW_LEVEL
; 
13690         when
.cyt_interval 
= opt
[DTRACEOPT_CLEANRATE
]; 
13692         state
->dts_cleaner 
= cyclic_add(&hdlr
, &when
); 
13694         hdlr
.cyh_func 
= (cyc_func_t
)dtrace_state_deadman
; 
13695         hdlr
.cyh_arg 
= state
; 
13696         hdlr
.cyh_level 
= CY_LOW_LEVEL
; 
13699         when
.cyt_interval 
= dtrace_deadman_interval
; 
13701         state
->dts_alive 
= state
->dts_laststatus 
= dtrace_gethrtime(); 
13702         state
->dts_deadman 
= cyclic_add(&hdlr
, &when
); 
13704         state
->dts_activity 
= DTRACE_ACTIVITY_WARMUP
; 
13707          * Now it's time to actually fire the BEGIN probe.  We need to disable 
13708          * interrupts here both to record the CPU on which we fired the BEGIN 
13709          * probe (the data from this CPU will be processed first at user 
13710          * level) and to manually activate the buffer for this CPU. 
13712         cookie 
= dtrace_interrupt_disable(); 
13713         *cpu 
= CPU
->cpu_id
; 
13714         ASSERT(state
->dts_buffer
[*cpu
].dtb_flags 
& DTRACEBUF_INACTIVE
); 
13715         state
->dts_buffer
[*cpu
].dtb_flags 
&= ~DTRACEBUF_INACTIVE
; 
13717         dtrace_probe(dtrace_probeid_begin
, 
13718             (uint64_t)(uintptr_t)state
, 0, 0, 0, 0); 
13719         dtrace_interrupt_enable(cookie
); 
13721          * We may have had an exit action from a BEGIN probe; only change our 
13722          * state to ACTIVE if we're still in WARMUP. 
13724         ASSERT(state
->dts_activity 
== DTRACE_ACTIVITY_WARMUP 
|| 
13725             state
->dts_activity 
== DTRACE_ACTIVITY_DRAINING
); 
13727         if (state
->dts_activity 
== DTRACE_ACTIVITY_WARMUP
) 
13728                 state
->dts_activity 
= DTRACE_ACTIVITY_ACTIVE
; 
13731          * Regardless of whether or not now we're in ACTIVE or DRAINING, we 
13732          * want each CPU to transition its principal buffer out of the 
13733          * INACTIVE state.  Doing this assures that no CPU will suddenly begin 
13734          * processing an ECB halfway down a probe's ECB chain; all CPUs will 
13735          * atomically transition from processing none of a state's ECBs to 
13736          * processing all of them. 
13738         dtrace_xcall(DTRACE_CPUALL
, 
13739             (dtrace_xcall_t
)dtrace_buffer_activate
, state
); 
13743         dtrace_buffer_free(state
->dts_buffer
); 
13744         dtrace_buffer_free(state
->dts_aggbuffer
); 
13746         if ((nspec 
= state
->dts_nspeculations
) == 0) { 
13747                 ASSERT(state
->dts_speculations 
== NULL
); 
13751         spec 
= state
->dts_speculations
; 
13752         ASSERT(spec 
!= NULL
); 
13754         for (i 
= 0; i 
< state
->dts_nspeculations
; i
++) { 
13755                 if ((buf 
= spec
[i
].dtsp_buffer
) == NULL
) 
13758                 dtrace_buffer_free(buf
); 
13759                 kmem_free(buf
, bufsize
); 
13762         kmem_free(spec
, nspec 
* sizeof (dtrace_speculation_t
)); 
13763         state
->dts_nspeculations 
= 0; 
13764         state
->dts_speculations 
= NULL
; 
13767         lck_mtx_unlock(&dtrace_lock
); 
13768         lck_mtx_unlock(&cpu_lock
); 
13774 dtrace_state_stop(dtrace_state_t 
*state
, processorid_t 
*cpu
) 
13776         dtrace_icookie_t cookie
; 
13778         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13780         if (state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE 
&& 
13781             state
->dts_activity 
!= DTRACE_ACTIVITY_DRAINING
) 
13785          * We'll set the activity to DTRACE_ACTIVITY_DRAINING, and issue a sync 
13786          * to be sure that every CPU has seen it.  See below for the details 
13787          * on why this is done. 
13789         state
->dts_activity 
= DTRACE_ACTIVITY_DRAINING
; 
13793          * By this point, it is impossible for any CPU to be still processing 
13794          * with DTRACE_ACTIVITY_ACTIVE.  We can thus set our activity to 
13795          * DTRACE_ACTIVITY_COOLDOWN and know that we're not racing with any 
13796          * other CPU in dtrace_buffer_reserve().  This allows dtrace_probe() 
13797          * and callees to know that the activity is DTRACE_ACTIVITY_COOLDOWN 
13798          * iff we're in the END probe. 
13800         state
->dts_activity 
= DTRACE_ACTIVITY_COOLDOWN
; 
13802         ASSERT(state
->dts_activity 
== DTRACE_ACTIVITY_COOLDOWN
); 
13805          * Finally, we can release the reserve and call the END probe.  We 
13806          * disable interrupts across calling the END probe to allow us to 
13807          * return the CPU on which we actually called the END probe.  This 
13808          * allows user-land to be sure that this CPU's principal buffer is 
13811         state
->dts_reserve 
= 0; 
13813         cookie 
= dtrace_interrupt_disable(); 
13814         *cpu 
= CPU
->cpu_id
; 
13815         dtrace_probe(dtrace_probeid_end
, 
13816             (uint64_t)(uintptr_t)state
, 0, 0, 0, 0); 
13817         dtrace_interrupt_enable(cookie
); 
13819         state
->dts_activity 
= DTRACE_ACTIVITY_STOPPED
; 
13826 dtrace_state_option(dtrace_state_t 
*state
, dtrace_optid_t option
, 
13827     dtrace_optval_t val
) 
13829         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13831         if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) 
13834         if (option 
>= DTRACEOPT_MAX
) 
13837         if (option 
!= DTRACEOPT_CPU 
&& val 
< 0) 
13841         case DTRACEOPT_DESTRUCTIVE
: 
13843                  * Prevent consumers from enabling destructive actions if DTrace 
13844                  * is running in a restricted environment, or if actions are 
13847                 if (dtrace_is_restricted() || dtrace_destructive_disallow
) 
13850                 state
->dts_cred
.dcr_destructive 
= 1; 
13853         case DTRACEOPT_BUFSIZE
: 
13854         case DTRACEOPT_DYNVARSIZE
: 
13855         case DTRACEOPT_AGGSIZE
: 
13856         case DTRACEOPT_SPECSIZE
: 
13857         case DTRACEOPT_STRSIZE
: 
13861                 if (val 
>= LONG_MAX
) { 
13863                          * If this is an otherwise negative value, set it to 
13864                          * the highest multiple of 128m less than LONG_MAX. 
13865                          * Technically, we're adjusting the size without 
13866                          * regard to the buffer resizing policy, but in fact, 
13867                          * this has no effect -- if we set the buffer size to 
13868                          * ~LONG_MAX and the buffer policy is ultimately set to 
13869                          * be "manual", the buffer allocation is guaranteed to 
13870                          * fail, if only because the allocation requires two 
13871                          * buffers.  (We set the the size to the highest 
13872                          * multiple of 128m because it ensures that the size 
13873                          * will remain a multiple of a megabyte when 
13874                          * repeatedly halved -- all the way down to 15m.) 
13876                         val 
= LONG_MAX 
- (1 << 27) + 1; 
13880         state
->dts_options
[option
] = val
; 
13886 dtrace_state_destroy(dtrace_state_t 
*state
) 
13889         dtrace_vstate_t 
*vstate 
= &state
->dts_vstate
; 
13890         minor_t minor 
= getminor(state
->dts_dev
); 
13891         int i
, bufsize 
= (int)NCPU 
* sizeof (dtrace_buffer_t
); 
13892         dtrace_speculation_t 
*spec 
= state
->dts_speculations
; 
13893         int nspec 
= state
->dts_nspeculations
; 
13896         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
13897         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
13900          * First, retract any retained enablings for this state. 
13902         dtrace_enabling_retract(state
); 
13903         ASSERT(state
->dts_nretained 
== 0); 
13905         if (state
->dts_activity 
== DTRACE_ACTIVITY_ACTIVE 
|| 
13906             state
->dts_activity 
== DTRACE_ACTIVITY_DRAINING
) { 
13908                  * We have managed to come into dtrace_state_destroy() on a 
13909                  * hot enabling -- almost certainly because of a disorderly 
13910                  * shutdown of a consumer.  (That is, a consumer that is 
13911                  * exiting without having called dtrace_stop().) In this case, 
13912                  * we're going to set our activity to be KILLED, and then 
13913                  * issue a sync to be sure that everyone is out of probe 
13914                  * context before we start blowing away ECBs. 
13916                 state
->dts_activity 
= DTRACE_ACTIVITY_KILLED
; 
13921          * Release the credential hold we took in dtrace_state_create(). 
13923         if (state
->dts_cred
.dcr_cred 
!= NULL
) 
13924                 crfree(state
->dts_cred
.dcr_cred
); 
13927          * Now we can safely disable and destroy any enabled probes.  Because 
13928          * any DTRACE_PRIV_KERNEL probes may actually be slowing our progress 
13929          * (especially if they're all enabled), we take two passes through the 
13930          * ECBs:  in the first, we disable just DTRACE_PRIV_KERNEL probes, and 
13931          * in the second we disable whatever is left over. 
13933         for (match 
= DTRACE_PRIV_KERNEL
; ; match 
= 0) { 
13934                 for (i 
= 0; i 
< state
->dts_necbs
; i
++) { 
13935                         if ((ecb 
= state
->dts_ecbs
[i
]) == NULL
) 
13938                         if (match 
&& ecb
->dte_probe 
!= NULL
) { 
13939                                 dtrace_probe_t 
*probe 
= ecb
->dte_probe
; 
13940                                 dtrace_provider_t 
*prov 
= probe
->dtpr_provider
; 
13942                                 if (!(prov
->dtpv_priv
.dtpp_flags 
& match
)) 
13946                         dtrace_ecb_disable(ecb
); 
13947                         dtrace_ecb_destroy(ecb
); 
13955          * Before we free the buffers, perform one more sync to assure that 
13956          * every CPU is out of probe context. 
13960         dtrace_buffer_free(state
->dts_buffer
); 
13961         dtrace_buffer_free(state
->dts_aggbuffer
); 
13963         for (i 
= 0; i 
< nspec
; i
++) 
13964                 dtrace_buffer_free(spec
[i
].dtsp_buffer
); 
13966         if (state
->dts_cleaner 
!= CYCLIC_NONE
) 
13967                 cyclic_remove(state
->dts_cleaner
); 
13969         if (state
->dts_deadman 
!= CYCLIC_NONE
) 
13970                 cyclic_remove(state
->dts_deadman
); 
13972         dtrace_dstate_fini(&vstate
->dtvs_dynvars
); 
13973         dtrace_vstate_fini(vstate
); 
13974         kmem_free(state
->dts_ecbs
, state
->dts_necbs 
* sizeof (dtrace_ecb_t 
*)); 
13976         if (state
->dts_aggregations 
!= NULL
) { 
13978                 for (i 
= 0; i 
< state
->dts_naggregations
; i
++) 
13979                         ASSERT(state
->dts_aggregations
[i
] == NULL
); 
13981                 ASSERT(state
->dts_naggregations 
> 0); 
13982                 kmem_free(state
->dts_aggregations
, 
13983                     state
->dts_naggregations 
* sizeof (dtrace_aggregation_t 
*)); 
13986         kmem_free(state
->dts_buffer
, bufsize
); 
13987         kmem_free(state
->dts_aggbuffer
, bufsize
); 
13989         for (i 
= 0; i 
< nspec
; i
++) 
13990                 kmem_free(spec
[i
].dtsp_buffer
, bufsize
); 
13992         kmem_free(spec
, nspec 
* sizeof (dtrace_speculation_t
)); 
13994         dtrace_format_destroy(state
); 
13996         vmem_destroy(state
->dts_aggid_arena
); 
13997         dtrace_state_free(minor
); 
14001  * DTrace Anonymous Enabling Functions 
14003 static dtrace_state_t 
* 
14004 dtrace_anon_grab(void) 
14006         dtrace_state_t 
*state
; 
14008         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
14010         if ((state 
= dtrace_anon
.dta_state
) == NULL
) { 
14011                 ASSERT(dtrace_anon
.dta_enabling 
== NULL
); 
14015         ASSERT(dtrace_anon
.dta_enabling 
!= NULL
); 
14016         ASSERT(dtrace_retained 
!= NULL
); 
14018         dtrace_enabling_destroy(dtrace_anon
.dta_enabling
); 
14019         dtrace_anon
.dta_enabling 
= NULL
; 
14020         dtrace_anon
.dta_state 
= NULL
; 
14026 dtrace_anon_property(void) 
14029         dtrace_state_t 
*state
; 
14031         char c
[32];             /* enough for "dof-data-" + digits */ 
14033         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
14034         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
14036         for (i 
= 0; ; i
++) { 
14037                 (void) snprintf(c
, sizeof (c
), "dof-data-%d", i
); 
14039                 dtrace_err_verbose 
= 1; 
14041                 if ((dof 
= dtrace_dof_property(c
)) == NULL
) { 
14042                         dtrace_err_verbose 
= 0; 
14047                  * We want to create anonymous state, so we need to transition 
14048                  * the kernel debugger to indicate that DTrace is active.  If 
14049                  * this fails (e.g. because the debugger has modified text in 
14050                  * some way), we won't continue with the processing. 
14052                 if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) { 
14053                         cmn_err(CE_NOTE
, "kernel debugger active; anonymous " 
14054                             "enabling ignored."); 
14055                         dtrace_dof_destroy(dof
); 
14060                  * If we haven't allocated an anonymous state, we'll do so now. 
14062                 if ((state 
= dtrace_anon
.dta_state
) == NULL
) { 
14063                         rv 
= dtrace_state_create(NULL
, NULL
, &state
); 
14064                         dtrace_anon
.dta_state 
= state
; 
14065                         if (rv 
!= 0 || state 
== NULL
) { 
14067                                  * This basically shouldn't happen:  the only 
14068                                  * failure mode from dtrace_state_create() is a 
14069                                  * failure of ddi_soft_state_zalloc() that 
14070                                  * itself should never happen.  Still, the 
14071                                  * interface allows for a failure mode, and 
14072                                  * we want to fail as gracefully as possible: 
14073                                  * we'll emit an error message and cease 
14074                                  * processing anonymous state in this case. 
14076                                 cmn_err(CE_WARN
, "failed to create " 
14077                                     "anonymous state"); 
14078                                 dtrace_dof_destroy(dof
); 
14083                 rv 
= dtrace_dof_slurp(dof
, &state
->dts_vstate
, CRED(), 
14084                     &dtrace_anon
.dta_enabling
, 0, B_TRUE
); 
14087                         rv 
= dtrace_dof_options(dof
, state
); 
14089                 dtrace_err_verbose 
= 0; 
14090                 dtrace_dof_destroy(dof
); 
14094                          * This is malformed DOF; chuck any anonymous state 
14097                         ASSERT(dtrace_anon
.dta_enabling 
== NULL
); 
14098                         dtrace_state_destroy(state
); 
14099                         dtrace_anon
.dta_state 
= NULL
; 
14103                 ASSERT(dtrace_anon
.dta_enabling 
!= NULL
); 
14106         if (dtrace_anon
.dta_enabling 
!= NULL
) { 
14110                  * dtrace_enabling_retain() can only fail because we are 
14111                  * trying to retain more enablings than are allowed -- but 
14112                  * we only have one anonymous enabling, and we are guaranteed 
14113                  * to be allowed at least one retained enabling; we assert 
14114                  * that dtrace_enabling_retain() returns success. 
14116                 rval 
= dtrace_enabling_retain(dtrace_anon
.dta_enabling
); 
14119                 dtrace_enabling_dump(dtrace_anon
.dta_enabling
); 
14124  * DTrace Helper Functions 
14127 dtrace_helper_trace(dtrace_helper_action_t 
*helper
, 
14128     dtrace_mstate_t 
*mstate
, dtrace_vstate_t 
*vstate
, int where
) 
14130         uint32_t size
, next
, nnext
; 
14132         dtrace_helptrace_t 
*ent
; 
14133         uint16_t flags 
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
14135         if (!dtrace_helptrace_enabled
) 
14138         ASSERT((uint32_t)vstate
->dtvs_nlocals 
<= dtrace_helptrace_nlocals
); 
14141          * What would a tracing framework be without its own tracing 
14142          * framework?  (Well, a hell of a lot simpler, for starters...) 
14144         size 
= sizeof (dtrace_helptrace_t
) + dtrace_helptrace_nlocals 
* 
14145             sizeof (uint64_t) - sizeof (uint64_t); 
14148          * Iterate until we can allocate a slot in the trace buffer. 
14151                 next 
= dtrace_helptrace_next
; 
14153                 if (next 
+ size 
< dtrace_helptrace_bufsize
) { 
14154                         nnext 
= next 
+ size
; 
14158         } while (dtrace_cas32(&dtrace_helptrace_next
, next
, nnext
) != next
); 
14161          * We have our slot; fill it in. 
14166         ent 
= (dtrace_helptrace_t 
*)&dtrace_helptrace_buffer
[next
]; 
14167         ent
->dtht_helper 
= helper
; 
14168         ent
->dtht_where 
= where
; 
14169         ent
->dtht_nlocals 
= vstate
->dtvs_nlocals
; 
14171         ent
->dtht_fltoffs 
= (mstate
->dtms_present 
& DTRACE_MSTATE_FLTOFFS
) ? 
14172             mstate
->dtms_fltoffs 
: -1; 
14173         ent
->dtht_fault 
= DTRACE_FLAGS2FLT(flags
); 
14174         ent
->dtht_illval 
= cpu_core
[CPU
->cpu_id
].cpuc_dtrace_illval
; 
14176         for (i 
= 0; i 
< vstate
->dtvs_nlocals
; i
++) { 
14177                 dtrace_statvar_t 
*svar
; 
14179                 if ((svar 
= vstate
->dtvs_locals
[i
]) == NULL
) 
14182                 ASSERT(svar
->dtsv_size 
>= (int)NCPU 
* sizeof (uint64_t)); 
14183                 ent
->dtht_locals
[i
] = 
14184                     ((uint64_t *)(uintptr_t)svar
->dtsv_data
)[CPU
->cpu_id
]; 
14189 dtrace_helper(int which
, dtrace_mstate_t 
*mstate
, 
14190     dtrace_state_t 
*state
, uint64_t arg0
, uint64_t arg1
) 
14192         uint16_t *flags 
= &cpu_core
[CPU
->cpu_id
].cpuc_dtrace_flags
; 
14193         uint64_t sarg0 
= mstate
->dtms_arg
[0]; 
14194         uint64_t sarg1 
= mstate
->dtms_arg
[1]; 
14196         dtrace_helpers_t 
*helpers 
= curproc
->p_dtrace_helpers
; 
14197         dtrace_helper_action_t 
*helper
; 
14198         dtrace_vstate_t 
*vstate
; 
14199         dtrace_difo_t 
*pred
; 
14200         int i
, trace 
= dtrace_helptrace_enabled
; 
14202         ASSERT(which 
>= 0 && which 
< DTRACE_NHELPER_ACTIONS
); 
14204         if (helpers 
== NULL
) 
14207         if ((helper 
= helpers
->dthps_actions
[which
]) == NULL
) 
14210         vstate 
= &helpers
->dthps_vstate
; 
14211         mstate
->dtms_arg
[0] = arg0
; 
14212         mstate
->dtms_arg
[1] = arg1
; 
14215          * Now iterate over each helper.  If its predicate evaluates to 'true', 
14216          * we'll call the corresponding actions.  Note that the below calls 
14217          * to dtrace_dif_emulate() may set faults in machine state.  This is 
14218          * okay:  our caller (the outer dtrace_dif_emulate()) will simply plow 
14219          * the stored DIF offset with its own (which is the desired behavior). 
14220          * Also, note the calls to dtrace_dif_emulate() may allocate scratch 
14221          * from machine state; this is okay, too. 
14223         for (; helper 
!= NULL
; helper 
= helper
->dtha_next
) { 
14224                 if ((pred 
= helper
->dtha_predicate
) != NULL
) { 
14226                                 dtrace_helper_trace(helper
, mstate
, vstate
, 0); 
14228                         if (!dtrace_dif_emulate(pred
, mstate
, vstate
, state
)) 
14231                         if (*flags 
& CPU_DTRACE_FAULT
) 
14235                 for (i 
= 0; i 
< helper
->dtha_nactions
; i
++) { 
14237                                 dtrace_helper_trace(helper
, 
14238                                     mstate
, vstate
, i 
+ 1); 
14240                         rval 
= dtrace_dif_emulate(helper
->dtha_actions
[i
], 
14241                             mstate
, vstate
, state
); 
14243                         if (*flags 
& CPU_DTRACE_FAULT
) 
14249                         dtrace_helper_trace(helper
, mstate
, vstate
, 
14250                             DTRACE_HELPTRACE_NEXT
); 
14254                 dtrace_helper_trace(helper
, mstate
, vstate
, 
14255                     DTRACE_HELPTRACE_DONE
); 
14258          * Restore the arg0 that we saved upon entry. 
14260         mstate
->dtms_arg
[0] = sarg0
; 
14261         mstate
->dtms_arg
[1] = sarg1
; 
14267                 dtrace_helper_trace(helper
, mstate
, vstate
, 
14268                     DTRACE_HELPTRACE_ERR
); 
14271          * Restore the arg0 that we saved upon entry. 
14273         mstate
->dtms_arg
[0] = sarg0
; 
14274         mstate
->dtms_arg
[1] = sarg1
; 
14280 dtrace_helper_action_destroy(dtrace_helper_action_t 
*helper
, 
14281     dtrace_vstate_t 
*vstate
) 
14285         if (helper
->dtha_predicate 
!= NULL
) 
14286                 dtrace_difo_release(helper
->dtha_predicate
, vstate
); 
14288         for (i 
= 0; i 
< helper
->dtha_nactions
; i
++) { 
14289                 ASSERT(helper
->dtha_actions
[i
] != NULL
); 
14290                 dtrace_difo_release(helper
->dtha_actions
[i
], vstate
); 
14293         kmem_free(helper
->dtha_actions
, 
14294             helper
->dtha_nactions 
* sizeof (dtrace_difo_t 
*)); 
14295         kmem_free(helper
, sizeof (dtrace_helper_action_t
)); 
14299 dtrace_helper_destroygen(proc_t
* p
, int gen
) 
14301         dtrace_helpers_t 
*help 
= p
->p_dtrace_helpers
; 
14302         dtrace_vstate_t 
*vstate
; 
14305         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
14307         if (help 
== NULL 
|| gen 
> help
->dthps_generation
) 
14310         vstate 
= &help
->dthps_vstate
; 
14312         for (i 
= 0; i 
< DTRACE_NHELPER_ACTIONS
; i
++) { 
14313                 dtrace_helper_action_t 
*last 
= NULL
, *h
, *next
; 
14315                 for (h 
= help
->dthps_actions
[i
]; h 
!= NULL
; h 
= next
) { 
14316                         next 
= h
->dtha_next
; 
14318                         if (h
->dtha_generation 
== gen
) { 
14319                                 if (last 
!= NULL
) { 
14320                                         last
->dtha_next 
= next
; 
14322                                         help
->dthps_actions
[i
] = next
; 
14325                                 dtrace_helper_action_destroy(h
, vstate
); 
14333          * Interate until we've cleared out all helper providers with the 
14334          * given generation number. 
14337                 dtrace_helper_provider_t 
*prov 
= NULL
; 
14340                  * Look for a helper provider with the right generation. We 
14341                  * have to start back at the beginning of the list each time 
14342                  * because we drop dtrace_lock. It's unlikely that we'll make 
14343                  * more than two passes. 
14345                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
14346                         prov 
= help
->dthps_provs
[i
]; 
14348                         if (prov
->dthp_generation 
== gen
) 
14353                  * If there were no matches, we're done. 
14355                 if (i 
== help
->dthps_nprovs
) 
14359                  * Move the last helper provider into this slot. 
14361                 help
->dthps_nprovs
--; 
14362                 help
->dthps_provs
[i
] = help
->dthps_provs
[help
->dthps_nprovs
]; 
14363                 help
->dthps_provs
[help
->dthps_nprovs
] = NULL
; 
14365                 lck_mtx_unlock(&dtrace_lock
); 
14368                  * If we have a meta provider, remove this helper provider. 
14370                 lck_mtx_lock(&dtrace_meta_lock
); 
14371                 if (dtrace_meta_pid 
!= NULL
) { 
14372                         ASSERT(dtrace_deferred_pid 
== NULL
); 
14373                         dtrace_helper_provider_remove(&prov
->dthp_prov
, 
14376                 lck_mtx_unlock(&dtrace_meta_lock
); 
14378                 dtrace_helper_provider_destroy(prov
); 
14380                 lck_mtx_lock(&dtrace_lock
); 
14387 dtrace_helper_validate(dtrace_helper_action_t 
*helper
) 
14392         if ((dp 
= helper
->dtha_predicate
) != NULL
) 
14393                 err 
+= dtrace_difo_validate_helper(dp
); 
14395         for (i 
= 0; i 
< helper
->dtha_nactions
; i
++) 
14396                 err 
+= dtrace_difo_validate_helper(helper
->dtha_actions
[i
]); 
14402 dtrace_helper_action_add(proc_t
* p
, int which
, dtrace_ecbdesc_t 
*ep
) 
14404         dtrace_helpers_t 
*help
; 
14405         dtrace_helper_action_t 
*helper
, *last
; 
14406         dtrace_actdesc_t 
*act
; 
14407         dtrace_vstate_t 
*vstate
; 
14408         dtrace_predicate_t 
*pred
; 
14409         int count 
= 0, nactions 
= 0, i
; 
14411         if (which 
< 0 || which 
>= DTRACE_NHELPER_ACTIONS
) 
14414         help 
= p
->p_dtrace_helpers
; 
14415         last 
= help
->dthps_actions
[which
]; 
14416         vstate 
= &help
->dthps_vstate
; 
14418         for (count 
= 0; last 
!= NULL
; last 
= last
->dtha_next
) { 
14420                 if (last
->dtha_next 
== NULL
) 
14425          * If we already have dtrace_helper_actions_max helper actions for this 
14426          * helper action type, we'll refuse to add a new one. 
14428         if (count 
>= dtrace_helper_actions_max
) 
14431         helper 
= kmem_zalloc(sizeof (dtrace_helper_action_t
), KM_SLEEP
); 
14432         helper
->dtha_generation 
= help
->dthps_generation
; 
14434         if ((pred 
= ep
->dted_pred
.dtpdd_predicate
) != NULL
) { 
14435                 ASSERT(pred
->dtp_difo 
!= NULL
); 
14436                 dtrace_difo_hold(pred
->dtp_difo
); 
14437                 helper
->dtha_predicate 
= pred
->dtp_difo
; 
14440         for (act 
= ep
->dted_action
; act 
!= NULL
; act 
= act
->dtad_next
) { 
14441                 if (act
->dtad_kind 
!= DTRACEACT_DIFEXPR
) 
14444                 if (act
->dtad_difo 
== NULL
) 
14450         helper
->dtha_actions 
= kmem_zalloc(sizeof (dtrace_difo_t 
*) * 
14451             (helper
->dtha_nactions 
= nactions
), KM_SLEEP
); 
14453         for (act 
= ep
->dted_action
, i 
= 0; act 
!= NULL
; act 
= act
->dtad_next
) { 
14454                 dtrace_difo_hold(act
->dtad_difo
); 
14455                 helper
->dtha_actions
[i
++] = act
->dtad_difo
; 
14458         if (!dtrace_helper_validate(helper
)) 
14461         if (last 
== NULL
) { 
14462                 help
->dthps_actions
[which
] = helper
; 
14464                 last
->dtha_next 
= helper
; 
14467         if ((uint32_t)vstate
->dtvs_nlocals 
> dtrace_helptrace_nlocals
) { 
14468                 dtrace_helptrace_nlocals 
= vstate
->dtvs_nlocals
; 
14469                 dtrace_helptrace_next 
= 0; 
14474         dtrace_helper_action_destroy(helper
, vstate
); 
14479 dtrace_helper_provider_register(proc_t 
*p
, dtrace_helpers_t 
*help
, 
14480     dof_helper_t 
*dofhp
) 
14482         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
14484         lck_mtx_lock(&dtrace_meta_lock
); 
14485         lck_mtx_lock(&dtrace_lock
); 
14487         if (!dtrace_attached() || dtrace_meta_pid 
== NULL
) { 
14489                  * If the dtrace module is loaded but not attached, or if 
14490                  * there aren't isn't a meta provider registered to deal with 
14491                  * these provider descriptions, we need to postpone creating 
14492                  * the actual providers until later. 
14495                 if (help
->dthps_next 
== NULL 
&& help
->dthps_prev 
== NULL 
&& 
14496                     dtrace_deferred_pid 
!= help
) { 
14497                         help
->dthps_deferred 
= 1; 
14498                         help
->dthps_pid 
= p
->p_pid
; 
14499                         help
->dthps_next 
= dtrace_deferred_pid
; 
14500                         help
->dthps_prev 
= NULL
; 
14501                         if (dtrace_deferred_pid 
!= NULL
) 
14502                                 dtrace_deferred_pid
->dthps_prev 
= help
; 
14503                         dtrace_deferred_pid 
= help
; 
14506                 lck_mtx_unlock(&dtrace_lock
); 
14508         } else if (dofhp 
!= NULL
) { 
14510                  * If the dtrace module is loaded and we have a particular 
14511                  * helper provider description, pass that off to the 
14515                 lck_mtx_unlock(&dtrace_lock
); 
14517                 dtrace_helper_provide(dofhp
, p
); 
14521                  * Otherwise, just pass all the helper provider descriptions 
14522                  * off to the meta provider. 
14526                 lck_mtx_unlock(&dtrace_lock
); 
14528                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
14529                         dtrace_helper_provide(&help
->dthps_provs
[i
]->dthp_prov
, 
14534         lck_mtx_unlock(&dtrace_meta_lock
); 
14538 dtrace_helper_provider_add(proc_t
* p
, dof_helper_t 
*dofhp
, int gen
) 
14540         dtrace_helpers_t 
*help
; 
14541         dtrace_helper_provider_t 
*hprov
, **tmp_provs
; 
14542         uint_t tmp_maxprovs
, i
; 
14544         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
14545         help 
= p
->p_dtrace_helpers
; 
14546         ASSERT(help 
!= NULL
); 
14549          * If we already have dtrace_helper_providers_max helper providers, 
14550          * we're refuse to add a new one. 
14552         if (help
->dthps_nprovs 
>= dtrace_helper_providers_max
) 
14556          * Check to make sure this isn't a duplicate. 
14558         for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
14559                 if (dofhp
->dofhp_addr 
== 
14560                     help
->dthps_provs
[i
]->dthp_prov
.dofhp_addr
) 
14564         hprov 
= kmem_zalloc(sizeof (dtrace_helper_provider_t
), KM_SLEEP
); 
14565         hprov
->dthp_prov 
= *dofhp
; 
14566         hprov
->dthp_ref 
= 1; 
14567         hprov
->dthp_generation 
= gen
; 
14570          * Allocate a bigger table for helper providers if it's already full. 
14572         if (help
->dthps_maxprovs 
== help
->dthps_nprovs
) { 
14573                 tmp_maxprovs 
= help
->dthps_maxprovs
; 
14574                 tmp_provs 
= help
->dthps_provs
; 
14576                 if (help
->dthps_maxprovs 
== 0) 
14577                         help
->dthps_maxprovs 
= 2; 
14579                         help
->dthps_maxprovs 
*= 2; 
14580                 if (help
->dthps_maxprovs 
> dtrace_helper_providers_max
) 
14581                         help
->dthps_maxprovs 
= dtrace_helper_providers_max
; 
14583                 ASSERT(tmp_maxprovs 
< help
->dthps_maxprovs
); 
14585                 help
->dthps_provs 
= kmem_zalloc(help
->dthps_maxprovs 
* 
14586                     sizeof (dtrace_helper_provider_t 
*), KM_SLEEP
); 
14588                 if (tmp_provs 
!= NULL
) { 
14589                         bcopy(tmp_provs
, help
->dthps_provs
, tmp_maxprovs 
* 
14590                             sizeof (dtrace_helper_provider_t 
*)); 
14591                         kmem_free(tmp_provs
, tmp_maxprovs 
* 
14592                             sizeof (dtrace_helper_provider_t 
*)); 
14596         help
->dthps_provs
[help
->dthps_nprovs
] = hprov
; 
14597         help
->dthps_nprovs
++; 
14603 dtrace_helper_provider_destroy(dtrace_helper_provider_t 
*hprov
) 
14605         lck_mtx_lock(&dtrace_lock
); 
14607         if (--hprov
->dthp_ref 
== 0) { 
14609                 lck_mtx_unlock(&dtrace_lock
); 
14610                 dof 
= (dof_hdr_t 
*)(uintptr_t)hprov
->dthp_prov
.dofhp_dof
; 
14611                 dtrace_dof_destroy(dof
); 
14612                 kmem_free(hprov
, sizeof (dtrace_helper_provider_t
)); 
14614                 lck_mtx_unlock(&dtrace_lock
); 
14619 dtrace_helper_provider_validate(dof_hdr_t 
*dof
, dof_sec_t 
*sec
) 
14621         uintptr_t daddr 
= (uintptr_t)dof
; 
14622         dof_sec_t 
*str_sec
, *prb_sec
, *arg_sec
, *off_sec
, *enoff_sec
; 
14623         dof_provider_t 
*provider
; 
14624         dof_probe_t 
*probe
; 
14626         char *strtab
, *typestr
; 
14627         dof_stridx_t typeidx
; 
14629         uint_t nprobes
, j
, k
; 
14631         ASSERT(sec
->dofs_type 
== DOF_SECT_PROVIDER
); 
14633         if (sec
->dofs_offset 
& (sizeof (uint_t
) - 1)) { 
14634                 dtrace_dof_error(dof
, "misaligned section offset"); 
14639          * The section needs to be large enough to contain the DOF provider 
14640          * structure appropriate for the given version. 
14642         if (sec
->dofs_size 
< 
14643             ((dof
->dofh_ident
[DOF_ID_VERSION
] == DOF_VERSION_1
) ? 
14644             offsetof(dof_provider_t
, dofpv_prenoffs
) : 
14645             sizeof (dof_provider_t
))) { 
14646                 dtrace_dof_error(dof
, "provider section too small"); 
14650         provider 
= (dof_provider_t 
*)(uintptr_t)(daddr 
+ sec
->dofs_offset
); 
14651         str_sec 
= dtrace_dof_sect(dof
, DOF_SECT_STRTAB
, provider
->dofpv_strtab
); 
14652         prb_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PROBES
, provider
->dofpv_probes
); 
14653         arg_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PRARGS
, provider
->dofpv_prargs
); 
14654         off_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PROFFS
, provider
->dofpv_proffs
); 
14656         if (str_sec 
== NULL 
|| prb_sec 
== NULL 
|| 
14657             arg_sec 
== NULL 
|| off_sec 
== NULL
) 
14662         if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1 
&& 
14663             provider
->dofpv_prenoffs 
!= DOF_SECT_NONE 
&& 
14664             (enoff_sec 
= dtrace_dof_sect(dof
, DOF_SECT_PRENOFFS
, 
14665             provider
->dofpv_prenoffs
)) == NULL
) 
14668         strtab 
= (char *)(uintptr_t)(daddr 
+ str_sec
->dofs_offset
); 
14670         if (provider
->dofpv_name 
>= str_sec
->dofs_size 
|| 
14671             strlen(strtab 
+ provider
->dofpv_name
) >= DTRACE_PROVNAMELEN
) { 
14672                 dtrace_dof_error(dof
, "invalid provider name"); 
14676         if (prb_sec
->dofs_entsize 
== 0 || 
14677             prb_sec
->dofs_entsize 
> prb_sec
->dofs_size
) { 
14678                 dtrace_dof_error(dof
, "invalid entry size"); 
14682         if (prb_sec
->dofs_entsize 
& (sizeof (uintptr_t) - 1)) { 
14683                 dtrace_dof_error(dof
, "misaligned entry size"); 
14687         if (off_sec
->dofs_entsize 
!= sizeof (uint32_t)) { 
14688                 dtrace_dof_error(dof
, "invalid entry size"); 
14692         if (off_sec
->dofs_offset 
& (sizeof (uint32_t) - 1)) { 
14693                 dtrace_dof_error(dof
, "misaligned section offset"); 
14697         if (arg_sec
->dofs_entsize 
!= sizeof (uint8_t)) { 
14698                 dtrace_dof_error(dof
, "invalid entry size"); 
14702         arg 
= (uint8_t *)(uintptr_t)(daddr 
+ arg_sec
->dofs_offset
); 
14704         nprobes 
= prb_sec
->dofs_size 
/ prb_sec
->dofs_entsize
; 
14707          * Take a pass through the probes to check for errors. 
14709         for (j 
= 0; j 
< nprobes
; j
++) { 
14710                 probe 
= (dof_probe_t 
*)(uintptr_t)(daddr 
+ 
14711                     prb_sec
->dofs_offset 
+ j 
* prb_sec
->dofs_entsize
); 
14713                 if (probe
->dofpr_func 
>= str_sec
->dofs_size
) { 
14714                         dtrace_dof_error(dof
, "invalid function name"); 
14718                 if (strlen(strtab 
+ probe
->dofpr_func
) >= DTRACE_FUNCNAMELEN
) { 
14719                         dtrace_dof_error(dof
, "function name too long"); 
14723                 if (probe
->dofpr_name 
>= str_sec
->dofs_size 
|| 
14724                     strlen(strtab 
+ probe
->dofpr_name
) >= DTRACE_NAMELEN
) { 
14725                         dtrace_dof_error(dof
, "invalid probe name"); 
14730                  * The offset count must not wrap the index, and the offsets 
14731                  * must also not overflow the section's data. 
14733                 if (probe
->dofpr_offidx 
+ probe
->dofpr_noffs 
< 
14734                     probe
->dofpr_offidx 
|| 
14735                     (probe
->dofpr_offidx 
+ probe
->dofpr_noffs
) * 
14736                     off_sec
->dofs_entsize 
> off_sec
->dofs_size
) { 
14737                         dtrace_dof_error(dof
, "invalid probe offset"); 
14741                 if (dof
->dofh_ident
[DOF_ID_VERSION
] != DOF_VERSION_1
) { 
14743                          * If there's no is-enabled offset section, make sure 
14744                          * there aren't any is-enabled offsets. Otherwise 
14745                          * perform the same checks as for probe offsets 
14746                          * (immediately above). 
14748                         if (enoff_sec 
== NULL
) { 
14749                                 if (probe
->dofpr_enoffidx 
!= 0 || 
14750                                     probe
->dofpr_nenoffs 
!= 0) { 
14751                                         dtrace_dof_error(dof
, "is-enabled " 
14752                                             "offsets with null section"); 
14755                         } else if (probe
->dofpr_enoffidx 
+ 
14756                             probe
->dofpr_nenoffs 
< probe
->dofpr_enoffidx 
|| 
14757                             (probe
->dofpr_enoffidx 
+ probe
->dofpr_nenoffs
) * 
14758                             enoff_sec
->dofs_entsize 
> enoff_sec
->dofs_size
) { 
14759                                 dtrace_dof_error(dof
, "invalid is-enabled " 
14764                         if (probe
->dofpr_noffs 
+ probe
->dofpr_nenoffs 
== 0) { 
14765                                 dtrace_dof_error(dof
, "zero probe and " 
14766                                     "is-enabled offsets"); 
14769                 } else if (probe
->dofpr_noffs 
== 0) { 
14770                         dtrace_dof_error(dof
, "zero probe offsets"); 
14774                 if (probe
->dofpr_argidx 
+ probe
->dofpr_xargc 
< 
14775                     probe
->dofpr_argidx 
|| 
14776                     (probe
->dofpr_argidx 
+ probe
->dofpr_xargc
) * 
14777                     arg_sec
->dofs_entsize 
> arg_sec
->dofs_size
) { 
14778                         dtrace_dof_error(dof
, "invalid args"); 
14782                 typeidx 
= probe
->dofpr_nargv
; 
14783                 typestr 
= strtab 
+ probe
->dofpr_nargv
; 
14784                 for (k 
= 0; k 
< probe
->dofpr_nargc
; k
++) { 
14785                         if (typeidx 
>= str_sec
->dofs_size
) { 
14786                                 dtrace_dof_error(dof
, "bad " 
14787                                     "native argument type"); 
14791                         typesz 
= strlen(typestr
) + 1; 
14792                         if (typesz 
> DTRACE_ARGTYPELEN
) { 
14793                                 dtrace_dof_error(dof
, "native " 
14794                                     "argument type too long"); 
14801                 typeidx 
= probe
->dofpr_xargv
; 
14802                 typestr 
= strtab 
+ probe
->dofpr_xargv
; 
14803                 for (k 
= 0; k 
< probe
->dofpr_xargc
; k
++) { 
14804                         if (arg
[probe
->dofpr_argidx 
+ k
] > probe
->dofpr_nargc
) { 
14805                                 dtrace_dof_error(dof
, "bad " 
14806                                     "native argument index"); 
14810                         if (typeidx 
>= str_sec
->dofs_size
) { 
14811                                 dtrace_dof_error(dof
, "bad " 
14812                                     "translated argument type"); 
14816                         typesz 
= strlen(typestr
) + 1; 
14817                         if (typesz 
> DTRACE_ARGTYPELEN
) { 
14818                                 dtrace_dof_error(dof
, "translated argument " 
14832 dtrace_helper_slurp(proc_t
* p
, dof_hdr_t 
*dof
, dof_helper_t 
*dhp
) 
14834         dtrace_helpers_t 
*help
; 
14835         dtrace_vstate_t 
*vstate
; 
14836         dtrace_enabling_t 
*enab 
= NULL
; 
14837         int i
, gen
, rv
, nhelpers 
= 0, nprovs 
= 0, destroy 
= 1; 
14838         uintptr_t daddr 
= (uintptr_t)dof
; 
14840         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
14842         if ((help 
= p
->p_dtrace_helpers
) == NULL
) 
14843                 help 
= dtrace_helpers_create(p
); 
14845         vstate 
= &help
->dthps_vstate
; 
14847         if ((rv 
= dtrace_dof_slurp(dof
, vstate
, NULL
, &enab
, 
14848             dhp 
!= NULL 
? dhp
->dofhp_addr 
: 0, B_FALSE
)) != 0) { 
14849                 dtrace_dof_destroy(dof
); 
14854          * Look for helper providers and validate their descriptions. 
14857                 for (i 
= 0; (uint32_t)i 
< dof
->dofh_secnum
; i
++) { 
14858                         dof_sec_t 
*sec 
= (dof_sec_t 
*)(uintptr_t)(daddr 
+ 
14859                             dof
->dofh_secoff 
+ i 
* dof
->dofh_secsize
); 
14861                         if (sec
->dofs_type 
!= DOF_SECT_PROVIDER
) 
14864                         if (dtrace_helper_provider_validate(dof
, sec
) != 0) { 
14865                                 dtrace_enabling_destroy(enab
); 
14866                                 dtrace_dof_destroy(dof
); 
14875          * Now we need to walk through the ECB descriptions in the enabling. 
14877         for (i 
= 0; i 
< enab
->dten_ndesc
; i
++) { 
14878                 dtrace_ecbdesc_t 
*ep 
= enab
->dten_desc
[i
]; 
14879                 dtrace_probedesc_t 
*desc 
= &ep
->dted_probe
; 
14881                 /* APPLE NOTE: Darwin employs size bounded string operation. */ 
14882                 if (!LIT_STRNEQL(desc
->dtpd_provider
, "dtrace")) 
14885                 if (!LIT_STRNEQL(desc
->dtpd_mod
, "helper")) 
14888                 if (!LIT_STRNEQL(desc
->dtpd_func
, "ustack")) 
14891                 if ((rv 
= dtrace_helper_action_add(p
, DTRACE_HELPER_ACTION_USTACK
, 
14894                          * Adding this helper action failed -- we are now going 
14895                          * to rip out the entire generation and return failure. 
14897                         (void) dtrace_helper_destroygen(p
, help
->dthps_generation
); 
14898                         dtrace_enabling_destroy(enab
); 
14899                         dtrace_dof_destroy(dof
); 
14906         if (nhelpers 
< enab
->dten_ndesc
) 
14907                 dtrace_dof_error(dof
, "unmatched helpers"); 
14909         gen 
= help
->dthps_generation
++; 
14910         dtrace_enabling_destroy(enab
); 
14912         if (dhp 
!= NULL 
&& nprovs 
> 0) { 
14913                 dhp
->dofhp_dof 
= (uint64_t)(uintptr_t)dof
; 
14914                 if (dtrace_helper_provider_add(p
, dhp
, gen
) == 0) { 
14915                         lck_mtx_unlock(&dtrace_lock
); 
14916                         dtrace_helper_provider_register(p
, help
, dhp
); 
14917                         lck_mtx_lock(&dtrace_lock
); 
14924                 dtrace_dof_destroy(dof
); 
14930  * APPLE NOTE:  DTrace lazy dof implementation 
14932  * DTrace user static probes (USDT probes) and helper actions are loaded 
14933  * in a process by proccessing dof sections. The dof sections are passed 
14934  * into the kernel by dyld, in a dof_ioctl_data_t block. It is rather 
14935  * expensive to process dof for a process that will never use it. There 
14936  * is a memory cost (allocating the providers/probes), and a cpu cost 
14937  * (creating the providers/probes). 
14939  * To reduce this cost, we use "lazy dof". The normal proceedure for 
14940  * dof processing is to copyin the dof(s) pointed to by the dof_ioctl_data_t 
14941  * block, and invoke dof_slurp_helper() on them. When "lazy dof" is 
14942  * used, each process retains the dof_ioctl_data_t block, instead of 
14943  * copying in the data it points to. 
14945  * The dof_ioctl_data_t blocks are managed as if they were the actual 
14946  * processed dof; on fork the block is copied to the child, on exec and 
14947  * exit the block is freed. 
14949  * If the process loads library(s) containing additional dof, the 
14950  * new dof_ioctl_data_t is merged with the existing block. 
14952  * There are a few catches that make this slightly more difficult. 
14953  * When dyld registers dof_ioctl_data_t blocks, it expects a unique 
14954  * identifier value for each dof in the block. In non-lazy dof terms, 
14955  * this is the generation that dof was loaded in. If we hand back 
14956  * a UID for a lazy dof, that same UID must be able to unload the 
14957  * dof once it has become non-lazy. To meet this requirement, the 
14958  * code that loads lazy dof requires that the UID's for dof(s) in 
14959  * the lazy dof be sorted, and in ascending order. It is okay to skip 
14960  * UID's, I.E., 1 -> 5 -> 6 is legal. 
14962  * Once a process has become non-lazy, it will stay non-lazy. All 
14963  * future dof operations for that process will be non-lazy, even 
14964  * if the dof mode transitions back to lazy. 
14966  * Always do lazy dof checks before non-lazy (I.E. In fork, exit, exec.). 
14967  * That way if the lazy check fails due to transitioning to non-lazy, the 
14968  * right thing is done with the newly faulted in dof. 
14972  * This method is a bit squicky. It must handle: 
14974  * dof should not be lazy. 
14975  * dof should have been handled lazily, but there was an error 
14976  * dof was handled lazily, and needs to be freed. 
14977  * dof was handled lazily, and must not be freed. 
14980  * Returns EACCESS if dof should be handled non-lazily. 
14982  * KERN_SUCCESS and all other return codes indicate lazy handling of dof. 
14984  * If the dofs data is claimed by this method, dofs_claimed will be set. 
14985  * Callers should not free claimed dofs. 
14988 dtrace_lazy_dofs_add(proc_t 
*p
, dof_ioctl_data_t
* incoming_dofs
, int *dofs_claimed
) 
14991         ASSERT(incoming_dofs 
&& incoming_dofs
->dofiod_count 
> 0); 
14996         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
14998         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
14999         ASSERT(dtrace_dof_mode 
!= DTRACE_DOF_MODE_NEVER
); 
15002          * Any existing helpers force non-lazy behavior. 
15004         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON 
&& (p
->p_dtrace_helpers 
== NULL
)) { 
15005                 lck_mtx_lock(&p
->p_dtrace_sprlock
); 
15007                 dof_ioctl_data_t
* existing_dofs 
= p
->p_dtrace_lazy_dofs
; 
15008                 unsigned int existing_dofs_count 
= (existing_dofs
) ? existing_dofs
->dofiod_count 
: 0; 
15009                 unsigned int i
, merged_dofs_count 
= incoming_dofs
->dofiod_count 
+ existing_dofs_count
; 
15014                 if (merged_dofs_count 
== 0 || merged_dofs_count 
> 1024) { 
15015                         dtrace_dof_error(NULL
, "lazy_dofs_add merged_dofs_count out of range"); 
15021                  * Each dof being added must be assigned a unique generation. 
15023                 uint64_t generation 
= (existing_dofs
) ? existing_dofs
->dofiod_helpers
[existing_dofs_count 
- 1].dofhp_dof 
+ 1 : 1; 
15024                 for (i
=0; i
<incoming_dofs
->dofiod_count
; i
++) { 
15026                          * We rely on these being the same so we can overwrite dofhp_dof and not lose info. 
15028                         ASSERT(incoming_dofs
->dofiod_helpers
[i
].dofhp_dof 
== incoming_dofs
->dofiod_helpers
[i
].dofhp_addr
); 
15029                         incoming_dofs
->dofiod_helpers
[i
].dofhp_dof 
= generation
++; 
15033                 if (existing_dofs
) { 
15035                          * Merge the existing and incoming dofs 
15037                         size_t merged_dofs_size 
= DOF_IOCTL_DATA_T_SIZE(merged_dofs_count
); 
15038                         dof_ioctl_data_t
* merged_dofs 
= kmem_alloc(merged_dofs_size
, KM_SLEEP
); 
15040                         bcopy(&existing_dofs
->dofiod_helpers
[0], 
15041                               &merged_dofs
->dofiod_helpers
[0], 
15042                               sizeof(dof_helper_t
) * existing_dofs_count
); 
15043                         bcopy(&incoming_dofs
->dofiod_helpers
[0], 
15044                               &merged_dofs
->dofiod_helpers
[existing_dofs_count
], 
15045                               sizeof(dof_helper_t
) * incoming_dofs
->dofiod_count
); 
15047                         merged_dofs
->dofiod_count 
= merged_dofs_count
; 
15049                         kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
)); 
15051                         p
->p_dtrace_lazy_dofs 
= merged_dofs
; 
15054                          * Claim the incoming dofs 
15057                         p
->p_dtrace_lazy_dofs 
= incoming_dofs
; 
15061                 dof_ioctl_data_t
* all_dofs 
= p
->p_dtrace_lazy_dofs
; 
15062                 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) { 
15063                         ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof 
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
); 
15068                 lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
15073         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
15081  * EINVAL: lazy dof is enabled, but the requested generation was not found. 
15082  * EACCES: This removal needs to be handled non-lazily.  
15085 dtrace_lazy_dofs_remove(proc_t 
*p
, int generation
) 
15089         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
15091         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
15092         ASSERT(dtrace_dof_mode 
!= DTRACE_DOF_MODE_NEVER
); 
15095          * Any existing helpers force non-lazy behavior. 
15097         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON 
&& (p
->p_dtrace_helpers 
== NULL
)) { 
15098                 lck_mtx_lock(&p
->p_dtrace_sprlock
); 
15100                 dof_ioctl_data_t
* existing_dofs 
= p
->p_dtrace_lazy_dofs
; 
15102                 if (existing_dofs
) {             
15103                         int index
, existing_dofs_count 
= existing_dofs
->dofiod_count
; 
15104                         for (index
=0; index
<existing_dofs_count
; index
++) { 
15105                                 if ((int)existing_dofs
->dofiod_helpers
[index
].dofhp_dof 
== generation
) { 
15106                                         dof_ioctl_data_t
* removed_dofs 
= NULL
; 
15109                                          * If there is only 1 dof, we'll delete it and swap in NULL. 
15111                                         if (existing_dofs_count 
> 1) { 
15112                                                 int removed_dofs_count 
= existing_dofs_count 
- 1; 
15113                                                 size_t removed_dofs_size 
= DOF_IOCTL_DATA_T_SIZE(removed_dofs_count
); 
15115                                                 removed_dofs 
= kmem_alloc(removed_dofs_size
, KM_SLEEP
); 
15116                                                 removed_dofs
->dofiod_count 
= removed_dofs_count
; 
15119                                                  * copy the remaining data. 
15122                                                         bcopy(&existing_dofs
->dofiod_helpers
[0], 
15123                                                               &removed_dofs
->dofiod_helpers
[0], 
15124                                                               index 
* sizeof(dof_helper_t
)); 
15127                                                 if (index 
< existing_dofs_count
-1) { 
15128                                                         bcopy(&existing_dofs
->dofiod_helpers
[index
+1], 
15129                                                               &removed_dofs
->dofiod_helpers
[index
], 
15130                                                               (existing_dofs_count 
- index 
- 1) * sizeof(dof_helper_t
)); 
15134                                         kmem_free(existing_dofs
, DOF_IOCTL_DATA_T_SIZE(existing_dofs_count
)); 
15136                                         p
->p_dtrace_lazy_dofs 
= removed_dofs
; 
15138                                         rval 
= KERN_SUCCESS
; 
15145                         dof_ioctl_data_t
* all_dofs 
= p
->p_dtrace_lazy_dofs
; 
15148                                 for (i
=0; i
<all_dofs
->dofiod_count
-1; i
++) { 
15149                                         ASSERT(all_dofs
->dofiod_helpers
[i
].dofhp_dof 
< all_dofs
->dofiod_helpers
[i
+1].dofhp_dof
); 
15156                 lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
15161         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
15167 dtrace_lazy_dofs_destroy(proc_t 
*p
) 
15169         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
15170         lck_mtx_lock(&p
->p_dtrace_sprlock
); 
15172         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
15174         dof_ioctl_data_t
* lazy_dofs 
= p
->p_dtrace_lazy_dofs
; 
15175         p
->p_dtrace_lazy_dofs 
= NULL
; 
15177         lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
15178         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
15181                 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
)); 
15186 dtrace_lazy_dofs_proc_iterate_filter(proc_t 
*p
, void* ignored
) 
15188 #pragma unused(ignored) 
15190          * Okay to NULL test without taking the sprlock. 
15192         return p
->p_dtrace_lazy_dofs 
!= NULL
; 
15196 dtrace_lazy_dofs_process(proc_t 
*p
) { 
15198          * It is possible this process may exit during our attempt to 
15199          * fault in the dof. We could fix this by holding locks longer, 
15200          * but the errors are benign. 
15202         lck_mtx_lock(&p
->p_dtrace_sprlock
); 
15205         ASSERT(p
->p_dtrace_lazy_dofs 
== NULL 
|| p
->p_dtrace_helpers 
== NULL
); 
15206         ASSERT(dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_OFF
); 
15208         dof_ioctl_data_t
* lazy_dofs 
= p
->p_dtrace_lazy_dofs
; 
15209         p
->p_dtrace_lazy_dofs 
= NULL
; 
15211         lck_mtx_unlock(&p
->p_dtrace_sprlock
); 
15214          * Process each dof_helper_t 
15216         if (lazy_dofs 
!= NULL
) { 
15220                 for (i
=0; i
<lazy_dofs
->dofiod_count
; i
++) { 
15222                          * When loading lazy dof, we depend on the generations being sorted in ascending order. 
15224                         ASSERT(i 
>= (lazy_dofs
->dofiod_count 
- 1) || lazy_dofs
->dofiod_helpers
[i
].dofhp_dof 
< lazy_dofs
->dofiod_helpers
[i
+1].dofhp_dof
); 
15226                         dof_helper_t 
*dhp 
= &lazy_dofs
->dofiod_helpers
[i
]; 
15229                          * We stored the generation in dofhp_dof. Save it, and restore the original value. 
15231                         int generation 
= dhp
->dofhp_dof
; 
15232                         dhp
->dofhp_dof 
= dhp
->dofhp_addr
; 
15234                         dof_hdr_t 
*dof 
= dtrace_dof_copyin_from_proc(p
, dhp
->dofhp_dof
, &rval
); 
15237                                 dtrace_helpers_t 
*help
; 
15239                                 lck_mtx_lock(&dtrace_lock
); 
15242                                  * This must be done with the dtrace_lock held 
15244                                 if ((help 
= p
->p_dtrace_helpers
) == NULL
) 
15245                                         help 
= dtrace_helpers_create(p
); 
15248                                  * If the generation value has been bumped, someone snuck in 
15249                                  * when we released the dtrace lock. We have to dump this generation, 
15250                                  * there is no safe way to load it. 
15252                                 if (help
->dthps_generation 
<= generation
) { 
15253                                         help
->dthps_generation 
= generation
; 
15256                                          * dtrace_helper_slurp() takes responsibility for the dof -- 
15257                                          * it may free it now or it may save it and free it later. 
15259                                         if ((rval 
= dtrace_helper_slurp(p
, dof
, dhp
)) != generation
) { 
15260                                                 dtrace_dof_error(NULL
, "returned value did not match expected generation"); 
15264                                 lck_mtx_unlock(&dtrace_lock
); 
15268                 kmem_free(lazy_dofs
, DOF_IOCTL_DATA_T_SIZE(lazy_dofs
->dofiod_count
)); 
15273 dtrace_lazy_dofs_proc_iterate_doit(proc_t 
*p
, void* ignored
) 
15275 #pragma unused(ignored) 
15277         dtrace_lazy_dofs_process(p
); 
15279         return PROC_RETURNED
; 
15282 #define DTRACE_LAZY_DOFS_DUPLICATED 1 
15285 dtrace_lazy_dofs_duplicate(proc_t 
*parent
, proc_t 
*child
) 
15287         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_NOTOWNED
); 
15288         LCK_MTX_ASSERT(&parent
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
); 
15289         LCK_MTX_ASSERT(&child
->p_dtrace_sprlock
, LCK_MTX_ASSERT_NOTOWNED
); 
15291         lck_rw_lock_shared(&dtrace_dof_mode_lock
); 
15292         lck_mtx_lock(&parent
->p_dtrace_sprlock
); 
15295          * We need to make sure that the transition to lazy dofs -> helpers 
15296          * was atomic for our parent 
15298         ASSERT(parent
->p_dtrace_lazy_dofs 
== NULL 
|| parent
->p_dtrace_helpers 
== NULL
); 
15300          * In theory we should hold the child sprlock, but this is safe... 
15302         ASSERT(child
->p_dtrace_lazy_dofs 
== NULL 
&& child
->p_dtrace_helpers 
== NULL
); 
15304         dof_ioctl_data_t
* parent_dofs 
= parent
->p_dtrace_lazy_dofs
; 
15305         dof_ioctl_data_t
* child_dofs 
= NULL
; 
15307                 size_t parent_dofs_size 
= DOF_IOCTL_DATA_T_SIZE(parent_dofs
->dofiod_count
); 
15308                 child_dofs 
= kmem_alloc(parent_dofs_size
, KM_SLEEP
); 
15309                 bcopy(parent_dofs
, child_dofs
, parent_dofs_size
); 
15312         lck_mtx_unlock(&parent
->p_dtrace_sprlock
); 
15315                 lck_mtx_lock(&child
->p_dtrace_sprlock
); 
15316                 child
->p_dtrace_lazy_dofs 
= child_dofs
; 
15317                 lck_mtx_unlock(&child
->p_dtrace_sprlock
); 
15319                  * We process the DOF at this point if the mode is set to 
15320                  * LAZY_OFF. This can happen if DTrace is still processing the 
15321                  * DOF of other process (which can happen because the 
15322                  * protected pager can have a huge latency) 
15323                  * but has not processed our parent yet 
15325                 if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_OFF
) { 
15326                         dtrace_lazy_dofs_process(child
); 
15328                 lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
15330                 return DTRACE_LAZY_DOFS_DUPLICATED
; 
15332         lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
15337 static dtrace_helpers_t 
* 
15338 dtrace_helpers_create(proc_t 
*p
) 
15340         dtrace_helpers_t 
*help
; 
15342         LCK_MTX_ASSERT(&dtrace_lock
, LCK_MTX_ASSERT_OWNED
); 
15343         ASSERT(p
->p_dtrace_helpers 
== NULL
); 
15345         help 
= kmem_zalloc(sizeof (dtrace_helpers_t
), KM_SLEEP
); 
15346         help
->dthps_actions 
= kmem_zalloc(sizeof (dtrace_helper_action_t 
*) * 
15347             DTRACE_NHELPER_ACTIONS
, KM_SLEEP
); 
15349         p
->p_dtrace_helpers 
= help
; 
15356 dtrace_helpers_destroy(proc_t
* p
) 
15358         dtrace_helpers_t 
*help
; 
15359         dtrace_vstate_t 
*vstate
; 
15362         lck_mtx_lock(&dtrace_lock
); 
15364         ASSERT(p
->p_dtrace_helpers 
!= NULL
); 
15365         ASSERT(dtrace_helpers 
> 0); 
15367         help 
= p
->p_dtrace_helpers
; 
15368         vstate 
= &help
->dthps_vstate
; 
15371          * We're now going to lose the help from this process. 
15373         p
->p_dtrace_helpers 
= NULL
; 
15377          * Destory the helper actions. 
15379         for (i 
= 0; i 
< DTRACE_NHELPER_ACTIONS
; i
++) { 
15380                 dtrace_helper_action_t 
*h
, *next
; 
15382                 for (h 
= help
->dthps_actions
[i
]; h 
!= NULL
; h 
= next
) { 
15383                         next 
= h
->dtha_next
; 
15384                         dtrace_helper_action_destroy(h
, vstate
); 
15389         lck_mtx_unlock(&dtrace_lock
); 
15392          * Destroy the helper providers. 
15394         if (help
->dthps_maxprovs 
> 0) { 
15395                 lck_mtx_lock(&dtrace_meta_lock
); 
15396                 if (dtrace_meta_pid 
!= NULL
) { 
15397                         ASSERT(dtrace_deferred_pid 
== NULL
); 
15399                         for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
15400                                 dtrace_helper_provider_remove( 
15401                                     &help
->dthps_provs
[i
]->dthp_prov
, p
); 
15404                         lck_mtx_lock(&dtrace_lock
); 
15405                         ASSERT(help
->dthps_deferred 
== 0 || 
15406                             help
->dthps_next 
!= NULL 
|| 
15407                             help
->dthps_prev 
!= NULL 
|| 
15408                             help 
== dtrace_deferred_pid
); 
15411                          * Remove the helper from the deferred list. 
15413                         if (help
->dthps_next 
!= NULL
) 
15414                                 help
->dthps_next
->dthps_prev 
= help
->dthps_prev
; 
15415                         if (help
->dthps_prev 
!= NULL
) 
15416                                 help
->dthps_prev
->dthps_next 
= help
->dthps_next
; 
15417                         if (dtrace_deferred_pid 
== help
) { 
15418                                 dtrace_deferred_pid 
= help
->dthps_next
; 
15419                                 ASSERT(help
->dthps_prev 
== NULL
); 
15422                         lck_mtx_unlock(&dtrace_lock
); 
15425                 lck_mtx_unlock(&dtrace_meta_lock
); 
15427                 for (i 
= 0; i 
< help
->dthps_nprovs
; i
++) { 
15428                         dtrace_helper_provider_destroy(help
->dthps_provs
[i
]); 
15431                 kmem_free(help
->dthps_provs
, help
->dthps_maxprovs 
* 
15432                     sizeof (dtrace_helper_provider_t 
*)); 
15435         lck_mtx_lock(&dtrace_lock
); 
15437         dtrace_vstate_fini(&help
->dthps_vstate
); 
15438         kmem_free(help
->dthps_actions
, 
15439             sizeof (dtrace_helper_action_t 
*) * DTRACE_NHELPER_ACTIONS
); 
15440         kmem_free(help
, sizeof (dtrace_helpers_t
)); 
15443         lck_mtx_unlock(&dtrace_lock
); 
15447 dtrace_helpers_duplicate(proc_t 
*from
, proc_t 
*to
) 
15449         dtrace_helpers_t 
*help
, *newhelp
; 
15450         dtrace_helper_action_t 
*helper
, *new, *last
; 
15452         dtrace_vstate_t 
*vstate
; 
15454         int j
, sz
, hasprovs 
= 0; 
15456         lck_mtx_lock(&dtrace_lock
); 
15457         ASSERT(from
->p_dtrace_helpers 
!= NULL
); 
15458         ASSERT(dtrace_helpers 
> 0); 
15460         help 
= from
->p_dtrace_helpers
; 
15461         newhelp 
= dtrace_helpers_create(to
); 
15462         ASSERT(to
->p_dtrace_helpers 
!= NULL
); 
15464         newhelp
->dthps_generation 
= help
->dthps_generation
; 
15465         vstate 
= &newhelp
->dthps_vstate
; 
15468          * Duplicate the helper actions. 
15470         for (i 
= 0; i 
< DTRACE_NHELPER_ACTIONS
; i
++) { 
15471                 if ((helper 
= help
->dthps_actions
[i
]) == NULL
) 
15474                 for (last 
= NULL
; helper 
!= NULL
; helper 
= helper
->dtha_next
) { 
15475                         new = kmem_zalloc(sizeof (dtrace_helper_action_t
), 
15477                         new->dtha_generation 
= helper
->dtha_generation
; 
15479                         if ((dp 
= helper
->dtha_predicate
) != NULL
) { 
15480                                 dp 
= dtrace_difo_duplicate(dp
, vstate
); 
15481                                 new->dtha_predicate 
= dp
; 
15484                         new->dtha_nactions 
= helper
->dtha_nactions
; 
15485                         sz 
= sizeof (dtrace_difo_t 
*) * new->dtha_nactions
; 
15486                         new->dtha_actions 
= kmem_alloc(sz
, KM_SLEEP
); 
15488                         for (j 
= 0; j 
< new->dtha_nactions
; j
++) { 
15489                                 dtrace_difo_t 
*dpj 
= helper
->dtha_actions
[j
]; 
15491                                 ASSERT(dpj 
!= NULL
); 
15492                                 dpj 
= dtrace_difo_duplicate(dpj
, vstate
); 
15493                                 new->dtha_actions
[j
] = dpj
; 
15496                         if (last 
!= NULL
) { 
15497                                 last
->dtha_next 
= new; 
15499                                 newhelp
->dthps_actions
[i
] = new; 
15507          * Duplicate the helper providers and register them with the 
15508          * DTrace framework. 
15510         if (help
->dthps_nprovs 
> 0) { 
15511                 newhelp
->dthps_nprovs 
= help
->dthps_nprovs
; 
15512                 newhelp
->dthps_maxprovs 
= help
->dthps_nprovs
; 
15513                 newhelp
->dthps_provs 
= kmem_alloc(newhelp
->dthps_nprovs 
* 
15514                     sizeof (dtrace_helper_provider_t 
*), KM_SLEEP
); 
15515                 for (i 
= 0; i 
< newhelp
->dthps_nprovs
; i
++) { 
15516                         newhelp
->dthps_provs
[i
] = help
->dthps_provs
[i
]; 
15517                         newhelp
->dthps_provs
[i
]->dthp_ref
++; 
15523         lck_mtx_unlock(&dtrace_lock
); 
15526                 dtrace_helper_provider_register(to
, newhelp
, NULL
); 
15530  * DTrace Process functions 
15534 dtrace_proc_fork(proc_t 
*parent_proc
, proc_t 
*child_proc
, int spawn
) 
15537          * This code applies to new processes who are copying the task 
15538          * and thread state and address spaces of their parent process. 
15542                  * APPLE NOTE: Solaris does a sprlock() and drops the 
15543                  * proc_lock here. We're cheating a bit and only taking 
15544                  * the p_dtrace_sprlock lock. A full sprlock would 
15545                  * task_suspend the parent. 
15547                 lck_mtx_lock(&parent_proc
->p_dtrace_sprlock
); 
15550                  * Remove all DTrace tracepoints from the child process. We 
15551                  * need to do this _before_ duplicating USDT providers since 
15552                  * any associated probes may be immediately enabled. 
15554                 if (parent_proc
->p_dtrace_count 
> 0) { 
15555                         dtrace_fasttrap_fork(parent_proc
, child_proc
); 
15558                 lck_mtx_unlock(&parent_proc
->p_dtrace_sprlock
); 
15561                  * Duplicate any lazy dof(s). This must be done while NOT 
15562                  * holding the parent sprlock! Lock ordering is 
15563                  * dtrace_dof_mode_lock, then sprlock.  It is imperative we 
15564                  * always call dtrace_lazy_dofs_duplicate, rather than null 
15565                  * check and call if !NULL. If we NULL test, during lazy dof 
15566                  * faulting we can race with the faulting code and proceed 
15567                  * from here to beyond the helpers copy. The lazy dof 
15568                  * faulting will then fail to copy the helpers to the child 
15569                  * process. We return if we duplicated lazy dofs as a process 
15570                  * can only have one at the same time to avoid a race between 
15571                  * a dtrace client and dtrace_proc_fork where a process would 
15572                  * end up with both lazy dofs and helpers. 
15574                 if (dtrace_lazy_dofs_duplicate(parent_proc
, child_proc
) == DTRACE_LAZY_DOFS_DUPLICATED
) { 
15579                  * Duplicate any helper actions and providers if they haven't 
15582 #if !defined(__APPLE__) 
15585                  * we set above informs the code to enable USDT probes that 
15586                  * sprlock() may fail because the child is being forked. 
15590                  * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent 
15591                  * never fails to find the child. We do not set SFORKING. 
15593                 if (parent_proc
->p_dtrace_helpers 
!= NULL 
&& dtrace_helpers_fork
) { 
15594                         (*dtrace_helpers_fork
)(parent_proc
, child_proc
); 
15600 dtrace_proc_exec(proc_t 
*p
) 
15603          * Invalidate any predicate evaluation already cached for this thread by DTrace. 
15604          * That's because we've just stored to p_comm and DTrace refers to that when it 
15605          * evaluates the "execname" special variable. uid and gid may have changed as well. 
15607         dtrace_set_thread_predcache(current_thread(), 0); 
15610          * Free any outstanding lazy dof entries. It is imperative we 
15611          * always call dtrace_lazy_dofs_destroy, rather than null check 
15612          * and call if !NULL. If we NULL test, during lazy dof faulting 
15613          * we can race with the faulting code and proceed from here to 
15614          * beyond the helpers cleanup. The lazy dof faulting will then 
15615          * install new helpers which no longer belong to this process! 
15617         dtrace_lazy_dofs_destroy(p
); 
15621          * Clean up any DTrace helpers for the process. 
15623         if (p
->p_dtrace_helpers 
!= NULL 
&& dtrace_helpers_cleanup
) { 
15624                 (*dtrace_helpers_cleanup
)(p
); 
15628          * Cleanup the DTrace provider associated with this process. 
15631         if (p
->p_dtrace_probes 
&& dtrace_fasttrap_exec_ptr
) { 
15632                 (*dtrace_fasttrap_exec_ptr
)(p
); 
15638 dtrace_proc_exit(proc_t 
*p
) 
15641          * Free any outstanding lazy dof entries. It is imperative we 
15642          * always call dtrace_lazy_dofs_destroy, rather than null check 
15643          * and call if !NULL. If we NULL test, during lazy dof faulting 
15644          * we can race with the faulting code and proceed from here to 
15645          * beyond the helpers cleanup. The lazy dof faulting will then 
15646          * install new helpers which will never be cleaned up, and leak. 
15648         dtrace_lazy_dofs_destroy(p
); 
15651          * Clean up any DTrace helper actions or probes for the process. 
15653         if (p
->p_dtrace_helpers 
!= NULL
) { 
15654                 (*dtrace_helpers_cleanup
)(p
); 
15658          * Clean up any DTrace probes associated with this process. 
15661          * APPLE NOTE: We release ptss pages/entries in dtrace_fasttrap_exit_ptr(), 
15662          * call this after dtrace_helpers_cleanup() 
15665         if (p
->p_dtrace_probes 
&& dtrace_fasttrap_exit_ptr
) { 
15666                 (*dtrace_fasttrap_exit_ptr
)(p
); 
15672  * DTrace Hook Functions 
15676  * APPLE NOTE:  dtrace_modctl_* routines for kext support. 
15677  * Used to manipulate the modctl list within dtrace xnu. 
15680 modctl_t 
*dtrace_modctl_list
; 
15683 dtrace_modctl_add(struct modctl 
* newctl
) 
15685         struct modctl 
*nextp
, *prevp
; 
15687         ASSERT(newctl 
!= NULL
); 
15688         LCK_MTX_ASSERT(&mod_lock
, LCK_MTX_ASSERT_OWNED
); 
15690         // Insert new module at the front of the list, 
15692         newctl
->mod_next 
= dtrace_modctl_list
; 
15693         dtrace_modctl_list 
= newctl
; 
15696          * If a module exists with the same name, then that module 
15697          * must have been unloaded with enabled probes. We will move 
15698          * the unloaded module to the new module's stale chain and 
15699          * then stop traversing the list. 
15703         nextp 
= newctl
->mod_next
; 
15705         while (nextp 
!= NULL
) { 
15706                 if (nextp
->mod_loaded
) { 
15707                         /* This is a loaded module. Keep traversing. */ 
15709                         nextp 
= nextp
->mod_next
; 
15713                         /* Found an unloaded module */ 
15714                         if (strncmp (newctl
->mod_modname
, nextp
->mod_modname
, KMOD_MAX_NAME
)) { 
15715                                 /* Names don't match. Keep traversing. */ 
15717                                 nextp 
= nextp
->mod_next
; 
15721                                 /* We found a stale entry, move it. We're done. */ 
15722                                 prevp
->mod_next 
= nextp
->mod_next
; 
15723                                 newctl
->mod_stale 
= nextp
; 
15724                                 nextp
->mod_next 
= NULL
; 
15732 dtrace_modctl_lookup(struct kmod_info 
* kmod
) 
15734     LCK_MTX_ASSERT(&mod_lock
, LCK_MTX_ASSERT_OWNED
); 
15736     struct modctl 
* ctl
; 
15738     for (ctl 
= dtrace_modctl_list
; ctl
; ctl
=ctl
->mod_next
) { 
15739         if (ctl
->mod_id 
== kmod
->id
) 
15746  * This routine is called from dtrace_module_unloaded(). 
15747  * It removes a modctl structure and its stale chain 
15748  * from the kext shadow list. 
15751 dtrace_modctl_remove(struct modctl 
* ctl
) 
15753         ASSERT(ctl 
!= NULL
); 
15754         LCK_MTX_ASSERT(&mod_lock
, LCK_MTX_ASSERT_OWNED
); 
15755         modctl_t 
*prevp
, *nextp
, *curp
; 
15757         // Remove stale chain first 
15758         for (curp
=ctl
->mod_stale
; curp 
!= NULL
; curp
=nextp
) { 
15759                 nextp 
= curp
->mod_stale
; 
15760                 /* There should NEVER be user symbols allocated at this point */ 
15761                 ASSERT(curp
->mod_user_symbols 
== NULL
);  
15762                 kmem_free(curp
, sizeof(modctl_t
)); 
15766         curp 
= dtrace_modctl_list
; 
15768         while (curp 
!= ctl
) { 
15770                 curp 
= curp
->mod_next
; 
15773         if (prevp 
!= NULL
) { 
15774                 prevp
->mod_next 
= ctl
->mod_next
; 
15777                 dtrace_modctl_list 
= ctl
->mod_next
; 
15780         /* There should NEVER be user symbols allocated at this point */ 
15781         ASSERT(ctl
->mod_user_symbols 
== NULL
); 
15783         kmem_free (ctl
, sizeof(modctl_t
)); 
15787  * APPLE NOTE: The kext loader will call dtrace_module_loaded 
15788  * when the kext is loaded in memory, but before calling the 
15789  * kext's start routine. 
15791  * Return 0 on success 
15792  * Return -1 on failure 
15796 dtrace_module_loaded(struct kmod_info 
*kmod
, uint32_t flag
) 
15798         dtrace_provider_t 
*prv
; 
15801          * If kernel symbols have been disabled, return immediately 
15802          * DTRACE_KERNEL_SYMBOLS_NEVER is a permanent mode, it is safe to test without holding locks 
15804         if (dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_NEVER
) 
15807         struct modctl 
*ctl 
= NULL
; 
15808         if (!kmod 
|| kmod
->address 
== 0 || kmod
->size 
== 0) 
15811         lck_mtx_lock(&dtrace_provider_lock
); 
15812         lck_mtx_lock(&mod_lock
);         
15815          * Have we seen this kext before? 
15818         ctl 
= dtrace_modctl_lookup(kmod
); 
15821                 /* bail... we already have this kext in the modctl list */ 
15822                 lck_mtx_unlock(&mod_lock
); 
15823                 lck_mtx_unlock(&dtrace_provider_lock
); 
15824                 if (dtrace_err_verbose
) 
15825                         cmn_err(CE_WARN
, "dtrace load module already exists '%s %u' is failing against '%s %u'", kmod
->name
, (uint_t
)kmod
->id
, ctl
->mod_modname
, ctl
->mod_id
); 
15829                 ctl 
= kmem_alloc(sizeof(struct modctl
), KM_SLEEP
); 
15831                         if (dtrace_err_verbose
) 
15832                                 cmn_err(CE_WARN
, "dtrace module load '%s %u' is failing ", kmod
->name
, (uint_t
)kmod
->id
); 
15833                         lck_mtx_unlock(&mod_lock
); 
15834                         lck_mtx_unlock(&dtrace_provider_lock
); 
15837                 ctl
->mod_next 
= NULL
; 
15838                 ctl
->mod_stale 
= NULL
; 
15839                 strlcpy (ctl
->mod_modname
, kmod
->name
, sizeof(ctl
->mod_modname
)); 
15840                 ctl
->mod_loadcnt 
= kmod
->id
; 
15841                 ctl
->mod_nenabled 
= 0; 
15842                 ctl
->mod_address  
= kmod
->address
; 
15843                 ctl
->mod_size 
= kmod
->size
; 
15844                 ctl
->mod_id 
= kmod
->id
; 
15845                 ctl
->mod_loaded 
= 1; 
15846                 ctl
->mod_flags 
= 0; 
15847                 ctl
->mod_user_symbols 
= NULL
; 
15850                  * Find the UUID for this module, if it has one 
15852                 kernel_mach_header_t
* header 
= (kernel_mach_header_t 
*)ctl
->mod_address
; 
15853                 struct load_command
* load_cmd 
= (struct load_command 
*)&header
[1]; 
15855                 for (i 
= 0; i 
< header
->ncmds
; i
++) { 
15856                         if (load_cmd
->cmd 
== LC_UUID
) { 
15857                                 struct uuid_command
* uuid_cmd 
= (struct uuid_command 
*)load_cmd
; 
15858                                 memcpy(ctl
->mod_uuid
, uuid_cmd
->uuid
, sizeof(uuid_cmd
->uuid
)); 
15859                                 ctl
->mod_flags 
|= MODCTL_HAS_UUID
; 
15862                         load_cmd 
= (struct load_command 
*)((caddr_t
)load_cmd 
+ load_cmd
->cmdsize
); 
15865                 if (ctl
->mod_address 
== g_kernel_kmod_info
.address
) { 
15866                         ctl
->mod_flags 
|= MODCTL_IS_MACH_KERNEL
; 
15869         dtrace_modctl_add(ctl
); 
15872          * We must hold the dtrace_lock to safely test non permanent dtrace_fbt_symbol_mode(s) 
15874         lck_mtx_lock(&dtrace_lock
); 
15877          * DTrace must decide if it will instrument modules lazily via 
15878          * userspace symbols (default mode), or instrument immediately via  
15879          * kernel symbols (non-default mode) 
15881          * When in default/lazy mode, DTrace will only support modules 
15882          * built with a valid UUID. 
15884          * Overriding the default can be done explicitly in one of 
15885          * the following two ways. 
15887          * A module can force symbols from kernel space using the plist key, 
15888          * OSBundleForceDTraceInit (see kmod.h).  If this per kext state is set, 
15889          * we fall through and instrument this module now. 
15891          * Or, the boot-arg, dtrace_kernel_symbol_mode, can be set to force symbols 
15892          * from kernel space (see dtrace_impl.h).  If this system state is set 
15893          * to a non-userspace mode, we fall through and instrument the module now. 
15896         if ((dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) && 
15897             (!(flag 
& KMOD_DTRACE_FORCE_INIT
))) 
15899                 /* We will instrument the module lazily -- this is the default */ 
15900                 lck_mtx_unlock(&dtrace_lock
); 
15901                 lck_mtx_unlock(&mod_lock
); 
15902                 lck_mtx_unlock(&dtrace_provider_lock
); 
15906         /* We will instrument the module immediately using kernel symbols */ 
15907         ctl
->mod_flags 
|= MODCTL_HAS_KERNEL_SYMBOLS
; 
15909         lck_mtx_unlock(&dtrace_lock
); 
15912          * We're going to call each providers per-module provide operation 
15913          * specifying only this module. 
15915         for (prv 
= dtrace_provider
; prv 
!= NULL
; prv 
= prv
->dtpv_next
) 
15916                 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);  
15919          * APPLE NOTE: The contract with the kext loader is that once this function 
15920          * has completed, it may delete kernel symbols at will. 
15921          * We must set this while still holding the mod_lock. 
15923         ctl
->mod_flags 
&= ~MODCTL_HAS_KERNEL_SYMBOLS
; 
15925         lck_mtx_unlock(&mod_lock
); 
15926         lck_mtx_unlock(&dtrace_provider_lock
); 
15929          * If we have any retained enablings, we need to match against them. 
15930          * Enabling probes requires that cpu_lock be held, and we cannot hold 
15931          * cpu_lock here -- it is legal for cpu_lock to be held when loading a 
15932          * module.  (In particular, this happens when loading scheduling 
15933          * classes.)  So if we have any retained enablings, we need to dispatch 
15934          * our task queue to do the match for us. 
15936         lck_mtx_lock(&dtrace_lock
); 
15938         if (dtrace_retained 
== NULL
) { 
15939                 lck_mtx_unlock(&dtrace_lock
); 
15945          * The cpu_lock mentioned above is only held by dtrace code, Apple's xnu never actually 
15946          * holds it for any reason. Thus the comment above is invalid, we can directly invoke 
15947          * dtrace_enabling_matchall without jumping through all the hoops, and we can avoid 
15948          * the delay call as well. 
15950         lck_mtx_unlock(&dtrace_lock
); 
15952         dtrace_enabling_matchall(); 
15958  * Return 0 on success 
15959  * Return -1 on failure 
15962 dtrace_module_unloaded(struct kmod_info 
*kmod
) 
15964         dtrace_probe_t 
template, *probe
, *first
, *next
; 
15965         dtrace_provider_t 
*prov
; 
15966         struct modctl 
*ctl 
= NULL
; 
15967         struct modctl 
*syncctl 
= NULL
; 
15968         struct modctl 
*nextsyncctl 
= NULL
; 
15971         lck_mtx_lock(&dtrace_provider_lock
); 
15972         lck_mtx_lock(&mod_lock
); 
15973         lck_mtx_lock(&dtrace_lock
); 
15975         if (kmod 
== NULL
) { 
15979             ctl 
= dtrace_modctl_lookup(kmod
); 
15982                 lck_mtx_unlock(&dtrace_lock
); 
15983                 lck_mtx_unlock(&mod_lock
); 
15984                 lck_mtx_unlock(&dtrace_provider_lock
); 
15987             ctl
->mod_loaded 
= 0; 
15988             ctl
->mod_address 
= 0; 
15992         if (dtrace_bymod 
== NULL
) { 
15994                  * The DTrace module is loaded (obviously) but not attached; 
15995                  * we don't have any work to do. 
15998                          (void)dtrace_modctl_remove(ctl
); 
15999                  lck_mtx_unlock(&dtrace_lock
); 
16000                  lck_mtx_unlock(&mod_lock
); 
16001                  lck_mtx_unlock(&dtrace_provider_lock
); 
16005         /* Syncmode set means we target and traverse entire modctl list. */ 
16007             nextsyncctl 
= dtrace_modctl_list
; 
16012             /* find a stale modctl struct */ 
16013             for (syncctl 
= nextsyncctl
; syncctl 
!= NULL
; syncctl
=syncctl
->mod_next
) { 
16014                 if (syncctl
->mod_address 
== 0) 
16019                 /* We have no more work to do */ 
16020                 lck_mtx_unlock(&dtrace_lock
); 
16021                 lck_mtx_unlock(&mod_lock
); 
16022                 lck_mtx_unlock(&dtrace_provider_lock
); 
16026                 /* keep track of next syncctl in case this one is removed */ 
16027                 nextsyncctl 
= syncctl
->mod_next
; 
16032         template.dtpr_mod 
= ctl
->mod_modname
; 
16034         for (probe 
= first 
= dtrace_hash_lookup(dtrace_bymod
, &template); 
16035             probe 
!= NULL
; probe 
= probe
->dtpr_nextmod
) { 
16036                 if (probe
->dtpr_ecb 
!= NULL
) { 
16038                          * This shouldn't _actually_ be possible -- we're 
16039                          * unloading a module that has an enabled probe in it. 
16040                          * (It's normally up to the provider to make sure that 
16041                          * this can't happen.)  However, because dtps_enable() 
16042                          * doesn't have a failure mode, there can be an 
16043                          * enable/unload race.  Upshot:  we don't want to 
16044                          * assert, but we're not going to disable the 
16050                             /* We're syncing, let's look at next in list */ 
16054                         lck_mtx_unlock(&dtrace_lock
); 
16055                         lck_mtx_unlock(&mod_lock
); 
16056                         lck_mtx_unlock(&dtrace_provider_lock
); 
16058                         if (dtrace_err_verbose
) { 
16059                                 cmn_err(CE_WARN
, "unloaded module '%s' had " 
16060                                     "enabled probes", ctl
->mod_modname
); 
16068         for (first 
= NULL
; probe 
!= NULL
; probe 
= next
) { 
16069                 ASSERT(dtrace_probes
[probe
->dtpr_id 
- 1] == probe
); 
16071                 dtrace_probes
[probe
->dtpr_id 
- 1] = NULL
; 
16072                 probe
->dtpr_provider
->dtpv_probe_count
--;                                        
16074                 next 
= probe
->dtpr_nextmod
; 
16075                 dtrace_hash_remove(dtrace_bymod
, probe
); 
16076                 dtrace_hash_remove(dtrace_byfunc
, probe
); 
16077                 dtrace_hash_remove(dtrace_byname
, probe
); 
16079                 if (first 
== NULL
) { 
16081                         probe
->dtpr_nextmod 
= NULL
; 
16083                         probe
->dtpr_nextmod 
= first
; 
16089          * We've removed all of the module's probes from the hash chains and 
16090          * from the probe array.  Now issue a dtrace_sync() to be sure that 
16091          * everyone has cleared out from any probe array processing. 
16095         for (probe 
= first
; probe 
!= NULL
; probe 
= first
) { 
16096                 first 
= probe
->dtpr_nextmod
; 
16097                 prov 
= probe
->dtpr_provider
; 
16098                 prov
->dtpv_pops
.dtps_destroy(prov
->dtpv_arg
, probe
->dtpr_id
, 
16100                 kmem_free(probe
->dtpr_mod
, strlen(probe
->dtpr_mod
) + 1); 
16101                 kmem_free(probe
->dtpr_func
, strlen(probe
->dtpr_func
) + 1); 
16102                 kmem_free(probe
->dtpr_name
, strlen(probe
->dtpr_name
) + 1); 
16103                 vmem_free(dtrace_arena
, (void *)(uintptr_t)probe
->dtpr_id
, 1); 
16105                 zfree(dtrace_probe_t_zone
, probe
); 
16108         dtrace_modctl_remove(ctl
); 
16113         lck_mtx_unlock(&dtrace_lock
); 
16114         lck_mtx_unlock(&mod_lock
); 
16115         lck_mtx_unlock(&dtrace_provider_lock
); 
16121 dtrace_suspend(void) 
16123         dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_suspend
)); 
16127 dtrace_resume(void) 
16129         dtrace_probe_foreach(offsetof(dtrace_pops_t
, dtps_resume
)); 
16133 dtrace_cpu_setup(cpu_setup_t what
, processorid_t cpu
) 
16135         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
16136         lck_mtx_lock(&dtrace_lock
); 
16140                 dtrace_state_t 
*state
; 
16141                 dtrace_optval_t 
*opt
, rs
, c
; 
16144                  * For now, we only allocate a new buffer for anonymous state. 
16146                 if ((state 
= dtrace_anon
.dta_state
) == NULL
) 
16149                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_ACTIVE
) 
16152                 opt 
= state
->dts_options
; 
16153                 c 
= opt
[DTRACEOPT_CPU
]; 
16155                 if (c 
!= DTRACE_CPUALL 
&& c 
!= DTRACEOPT_UNSET 
&& c 
!= cpu
) 
16159                  * Regardless of what the actual policy is, we're going to 
16160                  * temporarily set our resize policy to be manual.  We're 
16161                  * also going to temporarily set our CPU option to denote 
16162                  * the newly configured CPU. 
16164                 rs 
= opt
[DTRACEOPT_BUFRESIZE
]; 
16165                 opt
[DTRACEOPT_BUFRESIZE
] = DTRACEOPT_BUFRESIZE_MANUAL
; 
16166                 opt
[DTRACEOPT_CPU
] = (dtrace_optval_t
)cpu
; 
16168                 (void) dtrace_state_buffers(state
); 
16170                 opt
[DTRACEOPT_BUFRESIZE
] = rs
; 
16171                 opt
[DTRACEOPT_CPU
] = c
; 
16178                  * We don't free the buffer in the CPU_UNCONFIG case.  (The 
16179                  * buffer will be freed when the consumer exits.) 
16187         lck_mtx_unlock(&dtrace_lock
); 
16192 dtrace_cpu_setup_initial(processorid_t cpu
) 
16194         (void) dtrace_cpu_setup(CPU_CONFIG
, cpu
); 
16198 dtrace_toxrange_add(uintptr_t base
, uintptr_t limit
) 
16200         if (dtrace_toxranges 
>= dtrace_toxranges_max
) { 
16202                 dtrace_toxrange_t 
*range
; 
16204                 osize 
= dtrace_toxranges_max 
* sizeof (dtrace_toxrange_t
); 
16207                         ASSERT(dtrace_toxrange 
== NULL
); 
16208                         ASSERT(dtrace_toxranges_max 
== 0); 
16209                         dtrace_toxranges_max 
= 1; 
16211                         dtrace_toxranges_max 
<<= 1; 
16214                 nsize 
= dtrace_toxranges_max 
* sizeof (dtrace_toxrange_t
); 
16215                 range 
= kmem_zalloc(nsize
, KM_SLEEP
); 
16217                 if (dtrace_toxrange 
!= NULL
) { 
16218                         ASSERT(osize 
!= 0); 
16219                         bcopy(dtrace_toxrange
, range
, osize
); 
16220                         kmem_free(dtrace_toxrange
, osize
); 
16223                 dtrace_toxrange 
= range
; 
16226         ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_base 
== 0); 
16227         ASSERT(dtrace_toxrange
[dtrace_toxranges
].dtt_limit 
== 0); 
16229         dtrace_toxrange
[dtrace_toxranges
].dtt_base 
= base
; 
16230         dtrace_toxrange
[dtrace_toxranges
].dtt_limit 
= limit
; 
16231         dtrace_toxranges
++; 
16235  * DTrace Driver Cookbook Functions 
16239 dtrace_attach(dev_info_t 
*devi
, ddi_attach_cmd_t cmd
) 
16241 #pragma unused(cmd) /* __APPLE__ */ 
16242         dtrace_provider_id_t id
; 
16243         dtrace_state_t 
*state 
= NULL
; 
16244         dtrace_enabling_t 
*enab
; 
16246         lck_mtx_lock(&cpu_lock
); 
16247         lck_mtx_lock(&dtrace_provider_lock
); 
16248         lck_mtx_lock(&dtrace_lock
); 
16250         /* Darwin uses BSD cloning device driver to automagically obtain minor device number. */ 
16252         ddi_report_dev(devi
); 
16253         dtrace_devi 
= devi
; 
16255         dtrace_modload 
= dtrace_module_loaded
; 
16256         dtrace_modunload 
= dtrace_module_unloaded
; 
16257         dtrace_cpu_init 
= dtrace_cpu_setup_initial
; 
16258         dtrace_helpers_cleanup 
= dtrace_helpers_destroy
; 
16259         dtrace_helpers_fork 
= dtrace_helpers_duplicate
; 
16260         dtrace_cpustart_init 
= dtrace_suspend
; 
16261         dtrace_cpustart_fini 
= dtrace_resume
; 
16262         dtrace_debugger_init 
= dtrace_suspend
; 
16263         dtrace_debugger_fini 
= dtrace_resume
; 
16265         register_cpu_setup_func((cpu_setup_func_t 
*)dtrace_cpu_setup
, NULL
); 
16267         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
16269         dtrace_arena 
= vmem_create("dtrace", (void *)1, UINT32_MAX
, 1, 
16270             NULL
, NULL
, NULL
, 0, VM_SLEEP 
| VMC_IDENTIFIER
); 
16271         dtrace_taskq 
= taskq_create("dtrace_taskq", 1, maxclsyspri
, 
16274         dtrace_state_cache 
= kmem_cache_create("dtrace_state_cache", 
16275             sizeof (dtrace_dstate_percpu_t
) * (int)NCPU
, DTRACE_STATE_ALIGN
, 
16276             NULL
, NULL
, NULL
, NULL
, NULL
, 0); 
16278         LCK_MTX_ASSERT(&cpu_lock
, LCK_MTX_ASSERT_OWNED
); 
16280         dtrace_bymod 
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_mod
), 
16281             offsetof(dtrace_probe_t
, dtpr_nextmod
), 
16282             offsetof(dtrace_probe_t
, dtpr_prevmod
)); 
16284         dtrace_byfunc 
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_func
), 
16285             offsetof(dtrace_probe_t
, dtpr_nextfunc
), 
16286             offsetof(dtrace_probe_t
, dtpr_prevfunc
)); 
16288         dtrace_byname 
= dtrace_hash_create(offsetof(dtrace_probe_t
, dtpr_name
), 
16289             offsetof(dtrace_probe_t
, dtpr_nextname
), 
16290             offsetof(dtrace_probe_t
, dtpr_prevname
)); 
16292         if (dtrace_retain_max 
< 1) { 
16293                 cmn_err(CE_WARN
, "illegal value (%lu) for dtrace_retain_max; " 
16294                     "setting to 1", dtrace_retain_max
); 
16295                 dtrace_retain_max 
= 1; 
16299          * Now discover our toxic ranges. 
16301         dtrace_toxic_ranges(dtrace_toxrange_add
); 
16304          * Before we register ourselves as a provider to our own framework, 
16305          * we would like to assert that dtrace_provider is NULL -- but that's 
16306          * not true if we were loaded as a dependency of a DTrace provider. 
16307          * Once we've registered, we can assert that dtrace_provider is our 
16310         (void) dtrace_register("dtrace", &dtrace_provider_attr
, 
16311             DTRACE_PRIV_NONE
, 0, &dtrace_provider_ops
, NULL
, &id
); 
16313         ASSERT(dtrace_provider 
!= NULL
); 
16314         ASSERT((dtrace_provider_id_t
)dtrace_provider 
== id
); 
16316 #if defined (__x86_64__) 
16317         dtrace_probeid_begin 
= dtrace_probe_create((dtrace_provider_id_t
) 
16318             dtrace_provider
, NULL
, NULL
, "BEGIN", 1, NULL
); 
16319         dtrace_probeid_end 
= dtrace_probe_create((dtrace_provider_id_t
) 
16320             dtrace_provider
, NULL
, NULL
, "END", 0, NULL
); 
16321         dtrace_probeid_error 
= dtrace_probe_create((dtrace_provider_id_t
) 
16322             dtrace_provider
, NULL
, NULL
, "ERROR", 3, NULL
); 
16323 #elif (defined(__arm__) || defined(__arm64__)) 
16324         dtrace_probeid_begin 
= dtrace_probe_create((dtrace_provider_id_t
) 
16325             dtrace_provider
, NULL
, NULL
, "BEGIN", 2, NULL
); 
16326         dtrace_probeid_end 
= dtrace_probe_create((dtrace_provider_id_t
) 
16327             dtrace_provider
, NULL
, NULL
, "END", 1, NULL
); 
16328         dtrace_probeid_error 
= dtrace_probe_create((dtrace_provider_id_t
) 
16329             dtrace_provider
, NULL
, NULL
, "ERROR", 4, NULL
); 
16331 #error Unknown Architecture 
16334         dtrace_anon_property(); 
16335         lck_mtx_unlock(&cpu_lock
); 
16338          * If DTrace helper tracing is enabled, we need to allocate the 
16339          * trace buffer and initialize the values. 
16341         if (dtrace_helptrace_enabled
) { 
16342                 ASSERT(dtrace_helptrace_buffer 
== NULL
); 
16343                 dtrace_helptrace_buffer 
= 
16344                     kmem_zalloc(dtrace_helptrace_bufsize
, KM_SLEEP
); 
16345                 dtrace_helptrace_next 
= 0; 
16349          * If there are already providers, we must ask them to provide their 
16350          * probes, and then match any anonymous enabling against them.  Note 
16351          * that there should be no other retained enablings at this time: 
16352          * the only retained enablings at this time should be the anonymous 
16355         if (dtrace_anon
.dta_enabling 
!= NULL
) { 
16356                 ASSERT(dtrace_retained 
== dtrace_anon
.dta_enabling
); 
16359                  * APPLE NOTE: if handling anonymous dof, switch symbol modes. 
16361                 if (dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) { 
16362                         dtrace_kernel_symbol_mode 
= DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
; 
16365                 dtrace_enabling_provide(NULL
); 
16366                 state 
= dtrace_anon
.dta_state
; 
16369                  * We couldn't hold cpu_lock across the above call to 
16370                  * dtrace_enabling_provide(), but we must hold it to actually 
16371                  * enable the probes.  We have to drop all of our locks, pick 
16372                  * up cpu_lock, and regain our locks before matching the 
16373                  * retained anonymous enabling. 
16375                 lck_mtx_unlock(&dtrace_lock
); 
16376                 lck_mtx_unlock(&dtrace_provider_lock
); 
16378                 lck_mtx_lock(&cpu_lock
); 
16379                 lck_mtx_lock(&dtrace_provider_lock
); 
16380                 lck_mtx_lock(&dtrace_lock
); 
16382                 if ((enab 
= dtrace_anon
.dta_enabling
) != NULL
) 
16383                         (void) dtrace_enabling_match(enab
, NULL
, NULL
); 
16385                 lck_mtx_unlock(&cpu_lock
); 
16388         lck_mtx_unlock(&dtrace_lock
); 
16389         lck_mtx_unlock(&dtrace_provider_lock
); 
16391         if (state 
!= NULL
) { 
16393                  * If we created any anonymous state, set it going now. 
16395                 (void) dtrace_state_go(state
, &dtrace_anon
.dta_beganon
); 
16398         return (DDI_SUCCESS
); 
16403 dtrace_open(dev_t 
*devp
, int flag
, int otyp
, cred_t 
*cred_p
) 
16405 #pragma unused(flag, otyp) 
16406         dtrace_state_t 
*state
; 
16412         /* APPLE: Darwin puts Helper on its own major device. */ 
16415          * If no DTRACE_PRIV_* bits are set in the credential, then the 
16416          * caller lacks sufficient permission to do anything with DTrace. 
16418         dtrace_cred2priv(cred_p
, &priv
, &uid
, &zoneid
); 
16419         if (priv 
== DTRACE_PRIV_NONE
) 
16423          * APPLE NOTE: We delay the initialization of fasttrap as late as possible. 
16424          * It certainly can't be later than now! 
16429          * Ask all providers to provide all their probes. 
16431         lck_mtx_lock(&dtrace_provider_lock
); 
16432         dtrace_probe_provide(NULL
, NULL
); 
16433         lck_mtx_unlock(&dtrace_provider_lock
); 
16435         lck_mtx_lock(&cpu_lock
); 
16436         lck_mtx_lock(&dtrace_lock
); 
16438         dtrace_membar_producer(); 
16441          * If the kernel debugger is active (that is, if the kernel debugger 
16442          * modified text in some way), we won't allow the open. 
16444         if (kdi_dtrace_set(KDI_DTSET_DTRACE_ACTIVATE
) != 0) { 
16446                 lck_mtx_unlock(&dtrace_lock
); 
16447                 lck_mtx_unlock(&cpu_lock
); 
16451         rv 
= dtrace_state_create(devp
, cred_p
, &state
); 
16452         lck_mtx_unlock(&cpu_lock
); 
16454         if (rv 
!= 0 || state 
== NULL
) { 
16455                 if (--dtrace_opens 
== 0 && dtrace_anon
.dta_enabling 
== NULL
) 
16456                         (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
); 
16457                 lck_mtx_unlock(&dtrace_lock
); 
16458                 /* propagate EAGAIN or ERESTART */ 
16462         lck_mtx_unlock(&dtrace_lock
); 
16464         lck_rw_lock_exclusive(&dtrace_dof_mode_lock
); 
16467          * If we are currently lazy, transition states. 
16469          * Unlike dtrace_close, we do not need to check the 
16470          * value of dtrace_opens, as any positive value (and 
16471          * we count as 1) means we transition states. 
16473         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_ON
) { 
16474                 dtrace_dof_mode 
= DTRACE_DOF_MODE_LAZY_OFF
; 
16476                  * We do not need to hold the exclusive lock while processing 
16477                  * DOF on processes. We do need to make sure the mode does not get 
16478                  * changed to DTRACE_DOF_MODE_LAZY_ON during that stage though 
16479                  * (which should not happen anyway since it only happens in 
16480                  * dtrace_close). There is no way imcomplete USDT probes can be 
16481                  * activate by any DTrace clients here since they all have to 
16482                  * call dtrace_open and be blocked on dtrace_dof_mode_lock 
16484                 lck_rw_lock_exclusive_to_shared(&dtrace_dof_mode_lock
); 
16486                  * Iterate all existing processes and load lazy dofs. 
16488                 proc_iterate(PROC_ALLPROCLIST 
| PROC_NOWAITTRANS
, 
16489                              dtrace_lazy_dofs_proc_iterate_doit
, 
16491                              dtrace_lazy_dofs_proc_iterate_filter
, 
16494                 lck_rw_unlock_shared(&dtrace_dof_mode_lock
); 
16497                 lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
); 
16502          * Update kernel symbol state. 
16504          * We must own the provider and dtrace locks.  
16506          * NOTE! It may appear there is a race by setting this value so late 
16507          * after dtrace_probe_provide. However, any kext loaded after the 
16508          * call to probe provide and before we set LAZY_OFF will be marked as 
16509          * eligible for symbols from userspace. The same dtrace that is currently 
16510          * calling dtrace_open() (this call!) will get a list of kexts needing 
16511          * symbols and fill them in, thus closing the race window. 
16513          * We want to set this value only after it certain it will succeed, as 
16514          * this significantly reduces the complexity of error exits. 
16516         lck_mtx_lock(&dtrace_lock
); 
16517         if (dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
) { 
16518                 dtrace_kernel_symbol_mode 
= DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
; 
16520         lck_mtx_unlock(&dtrace_lock
); 
16527 dtrace_close(dev_t dev
, int flag
, int otyp
, cred_t 
*cred_p
) 
16529 #pragma unused(flag, otyp, cred_p) /* __APPLE__ */ 
16530         minor_t minor 
= getminor(dev
); 
16531         dtrace_state_t 
*state
; 
16533         /* APPLE NOTE: Darwin puts Helper on its own major device. */ 
16534         state 
= dtrace_state_get(minor
); 
16536         lck_mtx_lock(&cpu_lock
); 
16537         lck_mtx_lock(&dtrace_lock
); 
16539         if (state
->dts_anon
) { 
16541                  * There is anonymous state. Destroy that first. 
16543                 ASSERT(dtrace_anon
.dta_state 
== NULL
); 
16544                 dtrace_state_destroy(state
->dts_anon
); 
16547         dtrace_state_destroy(state
); 
16548         ASSERT(dtrace_opens 
> 0); 
16551          * Only relinquish control of the kernel debugger interface when there 
16552          * are no consumers and no anonymous enablings. 
16554         if (--dtrace_opens 
== 0 && dtrace_anon
.dta_enabling 
== NULL
) 
16555                 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
); 
16557         lck_mtx_unlock(&dtrace_lock
); 
16558         lck_mtx_unlock(&cpu_lock
); 
16561          * Lock ordering requires the dof mode lock be taken before 
16564         lck_rw_lock_exclusive(&dtrace_dof_mode_lock
); 
16565         lck_mtx_lock(&dtrace_lock
); 
16567         if (dtrace_opens 
== 0) { 
16569                  * If we are currently lazy-off, and this is the last close, transition to 
16572                 if (dtrace_dof_mode 
== DTRACE_DOF_MODE_LAZY_OFF
) { 
16573                         dtrace_dof_mode 
= DTRACE_DOF_MODE_LAZY_ON
; 
16577                  * If we are the last dtrace client, switch back to lazy (from userspace) symbols 
16579                 if (dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_FROM_KERNEL
) { 
16580                         dtrace_kernel_symbol_mode 
= DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
; 
16584         lck_mtx_unlock(&dtrace_lock
); 
16585         lck_rw_unlock_exclusive(&dtrace_dof_mode_lock
); 
16588          * Kext probes may be retained past the end of the kext's lifespan. The 
16589          * probes are kept until the last reference to them has been removed. 
16590          * Since closing an active dtrace context is likely to drop that last reference, 
16591          * lets take a shot at cleaning out the orphaned probes now. 
16593         dtrace_module_unloaded(NULL
); 
16600 dtrace_ioctl_helper(u_long cmd
, caddr_t arg
, int *rv
) 
16604          * Safe to check this outside the dof mode lock 
16606         if (dtrace_dof_mode 
== DTRACE_DOF_MODE_NEVER
) 
16607                 return KERN_SUCCESS
; 
16610 #if defined (__arm64__) 
16611         case DTRACEHIOC_ADDDOF_U32
: 
16612         case DTRACEHIOC_ADDDOF_U64
: 
16614         case DTRACEHIOC_ADDDOF
: 
16615 #endif /* __arm64__*/ 
16617                         dof_helper_t 
*dhp 
= NULL
; 
16618                         size_t dof_ioctl_data_size
; 
16619                         dof_ioctl_data_t
* multi_dof
; 
16622                         user_addr_t user_address 
= *(user_addr_t
*)arg
; 
16623                         uint64_t dof_count
; 
16624                         int multi_dof_claimed 
= 0; 
16625                         proc_t
* p 
= current_proc(); 
16628                          * If this is a restricted process and dtrace is restricted, 
16629                          * do not allow DOFs to be registered 
16631                         if (dtrace_is_restricted() && 
16632                                 !dtrace_are_restrictions_relaxed() && 
16633                                 !dtrace_can_attach_to_proc(current_proc())) { 
16638                          * Read the number of DOF sections being passed in. 
16640                         if (copyin(user_address 
+ offsetof(dof_ioctl_data_t
, dofiod_count
), 
16642                                    sizeof(dof_count
))) { 
16643                                 dtrace_dof_error(NULL
, "failed to copyin dofiod_count"); 
16648                          * Range check the count. 
16650                         if (dof_count 
== 0 || dof_count 
> 1024) { 
16651                                 dtrace_dof_error(NULL
, "dofiod_count is not valid"); 
16656                          * Allocate a correctly sized structure and copyin the data. 
16658                         dof_ioctl_data_size 
= DOF_IOCTL_DATA_T_SIZE(dof_count
); 
16659                         if ((multi_dof 
= kmem_alloc(dof_ioctl_data_size
, KM_SLEEP
)) == NULL
)  
16662                         /* NOTE! We can no longer exit this method via return */ 
16663                         if (copyin(user_address
, multi_dof
, dof_ioctl_data_size
) != 0) { 
16664                                 dtrace_dof_error(NULL
, "failed copyin of dof_ioctl_data_t"); 
16670                          * Check that the count didn't change between the first copyin and the second. 
16672                         if (multi_dof
->dofiod_count 
!= dof_count
) { 
16678                          * Try to process lazily first. 
16680                         rval 
= dtrace_lazy_dofs_add(p
, multi_dof
, &multi_dof_claimed
); 
16683                          * If rval is EACCES, we must be non-lazy. 
16685                         if (rval 
== EACCES
) { 
16688                                  * Process each dof_helper_t 
16692                                         dhp 
= &multi_dof
->dofiod_helpers
[i
]; 
16694                                         dof_hdr_t 
*dof 
= dtrace_dof_copyin(dhp
->dofhp_dof
, &rval
); 
16697                                                 lck_mtx_lock(&dtrace_lock
); 
16700                                                  * dtrace_helper_slurp() takes responsibility for the dof -- 
16701                                                  * it may free it now or it may save it and free it later. 
16703                                                 if ((dhp
->dofhp_dof 
= (uint64_t)dtrace_helper_slurp(p
, dof
, dhp
)) == -1ULL) { 
16707                                                 lck_mtx_unlock(&dtrace_lock
); 
16709                                 } while (++i 
< multi_dof
->dofiod_count 
&& rval 
== 0); 
16713                          * We need to copyout the multi_dof struct, because it contains 
16714                          * the generation (unique id) values needed to call DTRACEHIOC_REMOVE 
16716                          * This could certainly be better optimized. 
16718                         if (copyout(multi_dof
, user_address
, dof_ioctl_data_size
) != 0) { 
16719                                 dtrace_dof_error(NULL
, "failed copyout of dof_ioctl_data_t"); 
16720                                 /* Don't overwrite pre-existing error code */ 
16721                                 if (rval 
== 0) rval 
= EFAULT
; 
16726                          * If we had to allocate struct memory, free it. 
16728                         if (multi_dof 
!= NULL 
&& !multi_dof_claimed
) { 
16729                                 kmem_free(multi_dof
, dof_ioctl_data_size
); 
16735                 case DTRACEHIOC_REMOVE
: { 
16736                         int generation 
= *(int*)arg
; 
16737                         proc_t
* p 
= current_proc(); 
16742                         int rval 
= dtrace_lazy_dofs_remove(p
, generation
); 
16745                          * EACCES means non-lazy 
16747                         if (rval 
== EACCES
) { 
16748                                 lck_mtx_lock(&dtrace_lock
); 
16749                                 rval 
= dtrace_helper_destroygen(p
, generation
); 
16750                                 lck_mtx_unlock(&dtrace_lock
); 
16765 dtrace_ioctl(dev_t dev
, u_long cmd
, user_addr_t arg
, int md
, cred_t 
*cr
, int *rv
) 
16768         minor_t minor 
= getminor(dev
); 
16769         dtrace_state_t 
*state
; 
16772         /* Darwin puts Helper on its own major device. */ 
16774         state 
= dtrace_state_get(minor
); 
16776         if (state
->dts_anon
) { 
16777            ASSERT(dtrace_anon
.dta_state 
== NULL
); 
16778            state 
= state
->dts_anon
; 
16782         case DTRACEIOC_PROVIDER
: { 
16783                 dtrace_providerdesc_t pvd
; 
16784                 dtrace_provider_t 
*pvp
; 
16786                 if (copyin(arg
, &pvd
, sizeof (pvd
)) != 0) 
16789                 pvd
.dtvd_name
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
16790                 lck_mtx_lock(&dtrace_provider_lock
); 
16792                 for (pvp 
= dtrace_provider
; pvp 
!= NULL
; pvp 
= pvp
->dtpv_next
) { 
16793                         if (strncmp(pvp
->dtpv_name
, pvd
.dtvd_name
, DTRACE_PROVNAMELEN
) == 0) 
16797                 lck_mtx_unlock(&dtrace_provider_lock
); 
16802                 bcopy(&pvp
->dtpv_priv
, &pvd
.dtvd_priv
, sizeof (dtrace_ppriv_t
)); 
16803                 bcopy(&pvp
->dtpv_attr
, &pvd
.dtvd_attr
, sizeof (dtrace_pattr_t
)); 
16804                 if (copyout(&pvd
, arg
, sizeof (pvd
)) != 0) 
16810         case DTRACEIOC_EPROBE
: { 
16811                 dtrace_eprobedesc_t epdesc
; 
16813                 dtrace_action_t 
*act
; 
16819                 if (copyin(arg
, &epdesc
, sizeof (epdesc
)) != 0) 
16822                 lck_mtx_lock(&dtrace_lock
); 
16824                 if ((ecb 
= dtrace_epid2ecb(state
, epdesc
.dtepd_epid
)) == NULL
) { 
16825                         lck_mtx_unlock(&dtrace_lock
); 
16829                 if (ecb
->dte_probe 
== NULL
) { 
16830                         lck_mtx_unlock(&dtrace_lock
); 
16834                 epdesc
.dtepd_probeid 
= ecb
->dte_probe
->dtpr_id
; 
16835                 epdesc
.dtepd_uarg 
= ecb
->dte_uarg
; 
16836                 epdesc
.dtepd_size 
= ecb
->dte_size
; 
16838                 nrecs 
= epdesc
.dtepd_nrecs
; 
16839                 epdesc
.dtepd_nrecs 
= 0; 
16840                 for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
16841                         if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
) 
16844                         epdesc
.dtepd_nrecs
++; 
16848                  * Now that we have the size, we need to allocate a temporary 
16849                  * buffer in which to store the complete description.  We need 
16850                  * the temporary buffer to be able to drop dtrace_lock() 
16851                  * across the copyout(), below. 
16853                 size 
= sizeof (dtrace_eprobedesc_t
) + 
16854                         (epdesc
.dtepd_nrecs 
* sizeof (dtrace_recdesc_t
)); 
16856                 buf 
= kmem_alloc(size
, KM_SLEEP
); 
16857                 dest 
= (uintptr_t)buf
; 
16859                 bcopy(&epdesc
, (void *)dest
, sizeof (epdesc
)); 
16860                 dest 
+= offsetof(dtrace_eprobedesc_t
, dtepd_rec
[0]); 
16862                 for (act 
= ecb
->dte_action
; act 
!= NULL
; act 
= act
->dta_next
) { 
16863                         if (DTRACEACT_ISAGG(act
->dta_kind
) || act
->dta_intuple
) 
16869                         bcopy(&act
->dta_rec
, (void *)dest
, 
16870                         sizeof (dtrace_recdesc_t
)); 
16871                         dest 
+= sizeof (dtrace_recdesc_t
); 
16874                 lck_mtx_unlock(&dtrace_lock
); 
16876                 if (copyout(buf
, arg
, dest 
- (uintptr_t)buf
) != 0) { 
16877                         kmem_free(buf
, size
); 
16881                 kmem_free(buf
, size
); 
16885         case DTRACEIOC_AGGDESC
: { 
16886                 dtrace_aggdesc_t aggdesc
; 
16887                 dtrace_action_t 
*act
; 
16888                 dtrace_aggregation_t 
*agg
; 
16891                 dtrace_recdesc_t 
*lrec
; 
16896                 if (copyin(arg
, &aggdesc
, sizeof (aggdesc
)) != 0) 
16899                 lck_mtx_lock(&dtrace_lock
); 
16901                 if ((agg 
= dtrace_aggid2agg(state
, aggdesc
.dtagd_id
)) == NULL
) { 
16902                         lck_mtx_unlock(&dtrace_lock
); 
16906                 aggdesc
.dtagd_epid 
= agg
->dtag_ecb
->dte_epid
; 
16908                 nrecs 
= aggdesc
.dtagd_nrecs
; 
16909                 aggdesc
.dtagd_nrecs 
= 0; 
16911                 offs 
= agg
->dtag_base
; 
16912                 lrec 
= &agg
->dtag_action
.dta_rec
; 
16913                 aggdesc
.dtagd_size 
= lrec
->dtrd_offset 
+ lrec
->dtrd_size 
- offs
; 
16915                 for (act 
= agg
->dtag_first
; ; act 
= act
->dta_next
) { 
16916                         ASSERT(act
->dta_intuple 
|| 
16917                         DTRACEACT_ISAGG(act
->dta_kind
)); 
16920                          * If this action has a record size of zero, it 
16921                          * denotes an argument to the aggregating action. 
16922                          * Because the presence of this record doesn't (or 
16923                          * shouldn't) affect the way the data is interpreted, 
16924                          * we don't copy it out to save user-level the 
16925                          * confusion of dealing with a zero-length record. 
16927                         if (act
->dta_rec
.dtrd_size 
== 0) { 
16928                                 ASSERT(agg
->dtag_hasarg
); 
16932                         aggdesc
.dtagd_nrecs
++; 
16934                         if (act 
== &agg
->dtag_action
) 
16939                  * Now that we have the size, we need to allocate a temporary 
16940                  * buffer in which to store the complete description.  We need 
16941                  * the temporary buffer to be able to drop dtrace_lock() 
16942                  * across the copyout(), below. 
16944                 size 
= sizeof (dtrace_aggdesc_t
) + 
16945                         (aggdesc
.dtagd_nrecs 
* sizeof (dtrace_recdesc_t
)); 
16947                 buf 
= kmem_alloc(size
, KM_SLEEP
); 
16948                 dest 
= (uintptr_t)buf
; 
16950                 bcopy(&aggdesc
, (void *)dest
, sizeof (aggdesc
)); 
16951                 dest 
+= offsetof(dtrace_aggdesc_t
, dtagd_rec
[0]); 
16953                 for (act 
= agg
->dtag_first
; ; act 
= act
->dta_next
) { 
16954                         dtrace_recdesc_t rec 
= act
->dta_rec
; 
16957                          * See the comment in the above loop for why we pass 
16958                          * over zero-length records. 
16960                         if (rec
.dtrd_size 
== 0) { 
16961                                 ASSERT(agg
->dtag_hasarg
); 
16968                         rec
.dtrd_offset 
-= offs
; 
16969                         bcopy(&rec
, (void *)dest
, sizeof (rec
)); 
16970                         dest 
+= sizeof (dtrace_recdesc_t
); 
16972                         if (act 
== &agg
->dtag_action
) 
16976                 lck_mtx_unlock(&dtrace_lock
); 
16978                 if (copyout(buf
, arg
, dest 
- (uintptr_t)buf
) != 0) { 
16979                         kmem_free(buf
, size
); 
16983                 kmem_free(buf
, size
); 
16987         case DTRACEIOC_ENABLE
: { 
16989                 dtrace_enabling_t 
*enab 
= NULL
; 
16990                 dtrace_vstate_t 
*vstate
; 
16996                  * If a NULL argument has been passed, we take this as our 
16997                  * cue to reevaluate our enablings. 
17000                         dtrace_enabling_matchall(); 
17005                 if ((dof 
= dtrace_dof_copyin(arg
, &rval
)) == NULL
) 
17008                 lck_mtx_lock(&cpu_lock
); 
17009                 lck_mtx_lock(&dtrace_lock
); 
17010                 vstate 
= &state
->dts_vstate
; 
17012                 if (state
->dts_activity 
!= DTRACE_ACTIVITY_INACTIVE
) { 
17013                         lck_mtx_unlock(&dtrace_lock
); 
17014                         lck_mtx_unlock(&cpu_lock
); 
17015                         dtrace_dof_destroy(dof
); 
17019                 if (dtrace_dof_slurp(dof
, vstate
, cr
, &enab
, 0, B_TRUE
) != 0) { 
17020                         lck_mtx_unlock(&dtrace_lock
); 
17021                         lck_mtx_unlock(&cpu_lock
); 
17022                         dtrace_dof_destroy(dof
); 
17026                 if ((rval 
= dtrace_dof_options(dof
, state
)) != 0) { 
17027                         dtrace_enabling_destroy(enab
); 
17028                         lck_mtx_unlock(&dtrace_lock
); 
17029                         lck_mtx_unlock(&cpu_lock
); 
17030                         dtrace_dof_destroy(dof
); 
17034                 if ((err 
= dtrace_enabling_match(enab
, rv
, NULL
)) == 0) { 
17035                         err 
= dtrace_enabling_retain(enab
); 
17037                         dtrace_enabling_destroy(enab
); 
17040                 lck_mtx_unlock(&dtrace_lock
); 
17041                 lck_mtx_unlock(&cpu_lock
); 
17042                 dtrace_dof_destroy(dof
); 
17047         case DTRACEIOC_REPLICATE
: { 
17048                 dtrace_repldesc_t desc
; 
17049                 dtrace_probedesc_t 
*match 
= &desc
.dtrpd_match
; 
17050                 dtrace_probedesc_t 
*create 
= &desc
.dtrpd_create
; 
17053                 if (copyin(arg
, &desc
, sizeof (desc
)) != 0) 
17056                 match
->dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
17057                 match
->dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
17058                 match
->dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
17059                 match
->dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
17061                 create
->dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
17062                 create
->dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
17063                 create
->dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
17064                 create
->dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
17066                 lck_mtx_lock(&dtrace_lock
); 
17067                 err 
= dtrace_enabling_replicate(state
, match
, create
); 
17068                 lck_mtx_unlock(&dtrace_lock
); 
17073         case DTRACEIOC_PROBEMATCH
: 
17074         case DTRACEIOC_PROBES
: { 
17075                 dtrace_probe_t 
*probe 
= NULL
; 
17076                 dtrace_probedesc_t desc
; 
17077                 dtrace_probekey_t pkey
; 
17084                 if (copyin(arg
, &desc
, sizeof (desc
)) != 0) 
17087                 desc
.dtpd_provider
[DTRACE_PROVNAMELEN 
- 1] = '\0'; 
17088                 desc
.dtpd_mod
[DTRACE_MODNAMELEN 
- 1] = '\0'; 
17089                 desc
.dtpd_func
[DTRACE_FUNCNAMELEN 
- 1] = '\0'; 
17090                 desc
.dtpd_name
[DTRACE_NAMELEN 
- 1] = '\0'; 
17093                  * Before we attempt to match this probe, we want to give 
17094                  * all providers the opportunity to provide it. 
17096                 if (desc
.dtpd_id 
== DTRACE_IDNONE
) { 
17097                         lck_mtx_lock(&dtrace_provider_lock
); 
17098                         dtrace_probe_provide(&desc
, NULL
); 
17099                         lck_mtx_unlock(&dtrace_provider_lock
); 
17103                 if (cmd 
== DTRACEIOC_PROBEMATCH
)  { 
17104                         dtrace_probekey(&desc
, &pkey
); 
17105                         pkey
.dtpk_id 
= DTRACE_IDNONE
; 
17108                 dtrace_cred2priv(cr
, &priv
, &uid
, &zoneid
); 
17110                 lck_mtx_lock(&dtrace_lock
); 
17112                 if (cmd 
== DTRACEIOC_PROBEMATCH
) { 
17113                         /* Quiet compiler warning */ 
17114                         for (i 
= desc
.dtpd_id
; i 
<= (dtrace_id_t
)dtrace_nprobes
; i
++) { 
17115                                 if ((probe 
= dtrace_probes
[i 
- 1]) != NULL 
&& 
17116                                         (m 
= dtrace_match_probe(probe
, &pkey
, 
17117                                         priv
, uid
, zoneid
)) != 0) 
17122                                 lck_mtx_unlock(&dtrace_lock
); 
17127                         /* Quiet compiler warning */ 
17128                         for (i 
= desc
.dtpd_id
; i 
<= (dtrace_id_t
)dtrace_nprobes
; i
++) { 
17129                                 if ((probe 
= dtrace_probes
[i 
- 1]) != NULL 
&& 
17130                                         dtrace_match_priv(probe
, priv
, uid
, zoneid
)) 
17135                 if (probe 
== NULL
) { 
17136                         lck_mtx_unlock(&dtrace_lock
); 
17140                 dtrace_probe_description(probe
, &desc
); 
17141                 lck_mtx_unlock(&dtrace_lock
); 
17143                 if (copyout(&desc
, arg
, sizeof (desc
)) != 0) 
17149         case DTRACEIOC_PROBEARG
: { 
17150                 dtrace_argdesc_t desc
; 
17151                 dtrace_probe_t 
*probe
; 
17152                 dtrace_provider_t 
*prov
; 
17154                 if (copyin(arg
, &desc
, sizeof (desc
)) != 0) 
17157                 if (desc
.dtargd_id 
== DTRACE_IDNONE
) 
17160                 if (desc
.dtargd_ndx 
== DTRACE_ARGNONE
) 
17163                 lck_mtx_lock(&dtrace_provider_lock
); 
17164                 lck_mtx_lock(&mod_lock
); 
17165                 lck_mtx_lock(&dtrace_lock
); 
17167                 /* Quiet compiler warning */ 
17168                 if (desc
.dtargd_id 
> (dtrace_id_t
)dtrace_nprobes
) { 
17169                         lck_mtx_unlock(&dtrace_lock
); 
17170                         lck_mtx_unlock(&mod_lock
); 
17171                         lck_mtx_unlock(&dtrace_provider_lock
); 
17175                 if ((probe 
= dtrace_probes
[desc
.dtargd_id 
- 1]) == NULL
) { 
17176                         lck_mtx_unlock(&dtrace_lock
); 
17177                         lck_mtx_unlock(&mod_lock
); 
17178                         lck_mtx_unlock(&dtrace_provider_lock
); 
17182                 lck_mtx_unlock(&dtrace_lock
); 
17184                 prov 
= probe
->dtpr_provider
; 
17186                 if (prov
->dtpv_pops
.dtps_getargdesc 
== NULL
) { 
17188                  * There isn't any typed information for this probe. 
17189                  * Set the argument number to DTRACE_ARGNONE. 
17191                         desc
.dtargd_ndx 
= DTRACE_ARGNONE
; 
17193                         desc
.dtargd_native
[0] = '\0'; 
17194                         desc
.dtargd_xlate
[0] = '\0'; 
17195                         desc
.dtargd_mapping 
= desc
.dtargd_ndx
; 
17197                         prov
->dtpv_pops
.dtps_getargdesc(prov
->dtpv_arg
, 
17198                         probe
->dtpr_id
, probe
->dtpr_arg
, &desc
); 
17201                 lck_mtx_unlock(&mod_lock
); 
17202                 lck_mtx_unlock(&dtrace_provider_lock
); 
17204                 if (copyout(&desc
, arg
, sizeof (desc
)) != 0) 
17210         case DTRACEIOC_GO
: { 
17211                 processorid_t cpuid
; 
17212                 rval 
= dtrace_state_go(state
, &cpuid
); 
17217                 if (copyout(&cpuid
, arg
, sizeof (cpuid
)) != 0) 
17223         case DTRACEIOC_STOP
: { 
17224                 processorid_t cpuid
; 
17226                 lck_mtx_lock(&dtrace_lock
); 
17227                 rval 
= dtrace_state_stop(state
, &cpuid
); 
17228                 lck_mtx_unlock(&dtrace_lock
); 
17233                 if (copyout(&cpuid
, arg
, sizeof (cpuid
)) != 0) 
17239         case DTRACEIOC_DOFGET
: { 
17240                 dof_hdr_t hdr
, *dof
; 
17243                 if (copyin(arg
, &hdr
, sizeof (hdr
)) != 0) 
17246                 lck_mtx_lock(&dtrace_lock
); 
17247                 dof 
= dtrace_dof_create(state
); 
17248                 lck_mtx_unlock(&dtrace_lock
); 
17250                 len 
= MIN(hdr
.dofh_loadsz
, dof
->dofh_loadsz
); 
17251                 rval 
= copyout(dof
, arg
, len
); 
17252                 dtrace_dof_destroy(dof
); 
17254                 return (rval 
== 0 ? 0 : EFAULT
); 
17257         case DTRACEIOC_SLEEP
: { 
17260                 uint64_t rvalue 
= DTRACE_WAKE_TIMEOUT
; 
17262                 if (copyin(arg
, &time
, sizeof(time
)) != 0) 
17265                 nanoseconds_to_absolutetime((uint64_t)time
, &abstime
); 
17266                 clock_absolutetime_interval_to_deadline(abstime
, &abstime
); 
17268                 if (assert_wait_deadline(state
, THREAD_ABORTSAFE
, abstime
) == THREAD_WAITING
) { 
17269                         if (state
->dts_buf_over_limit 
> 0) { 
17270                                 clear_wait(current_thread(), THREAD_INTERRUPTED
); 
17271                                 rvalue 
= DTRACE_WAKE_BUF_LIMIT
; 
17273                                 thread_block(THREAD_CONTINUE_NULL
); 
17274                                 if (state
->dts_buf_over_limit 
> 0) { 
17275                                         rvalue 
= DTRACE_WAKE_BUF_LIMIT
; 
17280                 if (copyout(&rvalue
, arg
, sizeof(rvalue
)) != 0) 
17286         case DTRACEIOC_SIGNAL
: { 
17291         case DTRACEIOC_AGGSNAP
: 
17292         case DTRACEIOC_BUFSNAP
: { 
17293                 dtrace_bufdesc_t desc
; 
17295                 boolean_t over_limit
; 
17296                 dtrace_buffer_t 
*buf
; 
17298                 if (copyin(arg
, &desc
, sizeof (desc
)) != 0) 
17301                 if ((int)desc
.dtbd_cpu 
< 0 || desc
.dtbd_cpu 
>= NCPU
) 
17304                 lck_mtx_lock(&dtrace_lock
); 
17306                 if (cmd 
== DTRACEIOC_BUFSNAP
) { 
17307                         buf 
= &state
->dts_buffer
[desc
.dtbd_cpu
]; 
17309                         buf 
= &state
->dts_aggbuffer
[desc
.dtbd_cpu
]; 
17312                 if (buf
->dtb_flags 
& (DTRACEBUF_RING 
| DTRACEBUF_FILL
)) { 
17313                         size_t sz 
= buf
->dtb_offset
; 
17315                         if (state
->dts_activity 
!= DTRACE_ACTIVITY_STOPPED
) { 
17316                                 lck_mtx_unlock(&dtrace_lock
); 
17321                          * If this buffer has already been consumed, we're 
17322                          * going to indicate that there's nothing left here 
17325                         if (buf
->dtb_flags 
& DTRACEBUF_CONSUMED
) { 
17326                                 lck_mtx_unlock(&dtrace_lock
); 
17328                                 desc
.dtbd_size 
= 0; 
17329                                 desc
.dtbd_drops 
= 0; 
17330                                 desc
.dtbd_errors 
= 0; 
17331                                 desc
.dtbd_oldest 
= 0; 
17332                                 sz 
= sizeof (desc
); 
17334                                 if (copyout(&desc
, arg
, sz
) != 0) 
17341                          * If this is a ring buffer that has wrapped, we want 
17342                          * to copy the whole thing out. 
17344                         if (buf
->dtb_flags 
& DTRACEBUF_WRAPPED
) { 
17345                                 dtrace_buffer_polish(buf
); 
17346                                 sz 
= buf
->dtb_size
; 
17349                         if (copyout(buf
->dtb_tomax
, (user_addr_t
)desc
.dtbd_data
, sz
) != 0) { 
17350                                 lck_mtx_unlock(&dtrace_lock
); 
17354                         desc
.dtbd_size 
= sz
; 
17355                         desc
.dtbd_drops 
= buf
->dtb_drops
; 
17356                         desc
.dtbd_errors 
= buf
->dtb_errors
; 
17357                         desc
.dtbd_oldest 
= buf
->dtb_xamot_offset
; 
17358                         desc
.dtbd_timestamp 
= dtrace_gethrtime(); 
17360                         lck_mtx_unlock(&dtrace_lock
); 
17362                         if (copyout(&desc
, arg
, sizeof (desc
)) != 0) 
17365                         buf
->dtb_flags 
|= DTRACEBUF_CONSUMED
; 
17370                 if (buf
->dtb_tomax 
== NULL
) { 
17371                         ASSERT(buf
->dtb_xamot 
== NULL
); 
17372                         lck_mtx_unlock(&dtrace_lock
); 
17376                 cached 
= buf
->dtb_tomax
; 
17377                 over_limit 
= buf
->dtb_cur_limit 
== buf
->dtb_size
; 
17379                 ASSERT(!(buf
->dtb_flags 
& DTRACEBUF_NOSWITCH
)); 
17381                 dtrace_xcall(desc
.dtbd_cpu
, 
17382                         (dtrace_xcall_t
)dtrace_buffer_switch
, buf
); 
17384                 state
->dts_errors 
+= buf
->dtb_xamot_errors
; 
17387                 * If the buffers did not actually switch, then the cross call 
17388                 * did not take place -- presumably because the given CPU is 
17389                 * not in the ready set.  If this is the case, we'll return 
17392                 if (buf
->dtb_tomax 
== cached
) { 
17393                         ASSERT(buf
->dtb_xamot 
!= cached
); 
17394                         lck_mtx_unlock(&dtrace_lock
); 
17398                 ASSERT(cached 
== buf
->dtb_xamot
); 
17400                  * At this point we know the buffer have switched, so we 
17401                  * can decrement the over limit count if the buffer was over 
17402                  * its limit. The new buffer might already be over its limit 
17403                  * yet, but we don't care since we're guaranteed not to be 
17404                  * checking the buffer over limit count  at this point. 
17407                         uint32_t old 
= atomic_add_32(&state
->dts_buf_over_limit
, -1); 
17408                         #pragma unused(old) 
17411                          * Verify that we didn't underflow the value 
17417                 * We have our snapshot; now copy it out. 
17419                 if (dtrace_buffer_copyout(buf
->dtb_xamot
, 
17420                                         (user_addr_t
)desc
.dtbd_data
, 
17421                                         buf
->dtb_xamot_offset
) != 0) { 
17422                         lck_mtx_unlock(&dtrace_lock
); 
17426                 desc
.dtbd_size 
= buf
->dtb_xamot_offset
; 
17427                 desc
.dtbd_drops 
= buf
->dtb_xamot_drops
; 
17428                 desc
.dtbd_errors 
= buf
->dtb_xamot_errors
; 
17429                 desc
.dtbd_oldest 
= 0; 
17430                 desc
.dtbd_timestamp 
= buf
->dtb_switched
; 
17432                 lck_mtx_unlock(&dtrace_lock
); 
17435                  * Finally, copy out the buffer description. 
17437                 if (copyout(&desc
, arg
, sizeof (desc
)) != 0) 
17443         case DTRACEIOC_CONF
: { 
17444                 dtrace_conf_t conf
; 
17446                 bzero(&conf
, sizeof (conf
)); 
17447                 conf
.dtc_difversion 
= DIF_VERSION
; 
17448                 conf
.dtc_difintregs 
= DIF_DIR_NREGS
; 
17449                 conf
.dtc_diftupregs 
= DIF_DTR_NREGS
; 
17450                 conf
.dtc_ctfmodel 
= CTF_MODEL_NATIVE
; 
17452                 if (copyout(&conf
, arg
, sizeof (conf
)) != 0) 
17458         case DTRACEIOC_STATUS
: { 
17459                 dtrace_status_t stat
; 
17460                 dtrace_dstate_t 
*dstate
; 
17465                 * See the comment in dtrace_state_deadman() for the reason 
17466                 * for setting dts_laststatus to INT64_MAX before setting 
17467                 * it to the correct value. 
17469                 state
->dts_laststatus 
= INT64_MAX
; 
17470                 dtrace_membar_producer(); 
17471                 state
->dts_laststatus 
= dtrace_gethrtime(); 
17473                 bzero(&stat
, sizeof (stat
)); 
17475                 lck_mtx_lock(&dtrace_lock
); 
17477                 if (state
->dts_activity 
== DTRACE_ACTIVITY_INACTIVE
) { 
17478                         lck_mtx_unlock(&dtrace_lock
); 
17482                 if (state
->dts_activity 
== DTRACE_ACTIVITY_DRAINING
) 
17483                         stat
.dtst_exiting 
= 1; 
17485                 nerrs 
= state
->dts_errors
; 
17486                 dstate 
= &state
->dts_vstate
.dtvs_dynvars
; 
17488                 for (i 
= 0; i 
< (int)NCPU
; i
++) { 
17489                         dtrace_dstate_percpu_t 
*dcpu 
= &dstate
->dtds_percpu
[i
]; 
17491                         stat
.dtst_dyndrops 
+= dcpu
->dtdsc_drops
; 
17492                         stat
.dtst_dyndrops_dirty 
+= dcpu
->dtdsc_dirty_drops
; 
17493                         stat
.dtst_dyndrops_rinsing 
+= dcpu
->dtdsc_rinsing_drops
; 
17495                         if (state
->dts_buffer
[i
].dtb_flags 
& DTRACEBUF_FULL
) 
17496                                 stat
.dtst_filled
++; 
17498                         nerrs 
+= state
->dts_buffer
[i
].dtb_errors
; 
17500                         for (j 
= 0; j 
< state
->dts_nspeculations
; j
++) { 
17501                                 dtrace_speculation_t 
*spec
; 
17502                                 dtrace_buffer_t 
*buf
; 
17504                                 spec 
= &state
->dts_speculations
[j
]; 
17505                                 buf 
= &spec
->dtsp_buffer
[i
]; 
17506                                 stat
.dtst_specdrops 
+= buf
->dtb_xamot_drops
; 
17510                 stat
.dtst_specdrops_busy 
= state
->dts_speculations_busy
; 
17511                 stat
.dtst_specdrops_unavail 
= state
->dts_speculations_unavail
; 
17512                 stat
.dtst_stkstroverflows 
= state
->dts_stkstroverflows
; 
17513                 stat
.dtst_dblerrors 
= state
->dts_dblerrors
; 
17515                         (state
->dts_activity 
== DTRACE_ACTIVITY_KILLED
); 
17516                 stat
.dtst_errors 
= nerrs
; 
17518                 lck_mtx_unlock(&dtrace_lock
); 
17520                 if (copyout(&stat
, arg
, sizeof (stat
)) != 0) 
17526         case DTRACEIOC_FORMAT
: { 
17527                 dtrace_fmtdesc_t fmt
; 
17531                 if (copyin(arg
, &fmt
, sizeof (fmt
)) != 0) 
17534                 lck_mtx_lock(&dtrace_lock
); 
17536                 if (fmt
.dtfd_format 
== 0 || 
17537                         fmt
.dtfd_format 
> state
->dts_nformats
) { 
17538                         lck_mtx_unlock(&dtrace_lock
); 
17543                  * Format strings are allocated contiguously and they are 
17544                  * never freed; if a format index is less than the number 
17545                  * of formats, we can assert that the format map is non-NULL 
17546                  * and that the format for the specified index is non-NULL. 
17548                 ASSERT(state
->dts_formats 
!= NULL
); 
17549                 str 
= state
->dts_formats
[fmt
.dtfd_format 
- 1]; 
17550                 ASSERT(str 
!= NULL
); 
17552                 len 
= strlen(str
) + 1; 
17554                 if (len 
> fmt
.dtfd_length
) { 
17555                         fmt
.dtfd_length 
= len
; 
17557                         if (copyout(&fmt
, arg
, sizeof (fmt
)) != 0) { 
17558                                 lck_mtx_unlock(&dtrace_lock
); 
17562                         if (copyout(str
, (user_addr_t
)fmt
.dtfd_string
, len
) != 0) { 
17563                                 lck_mtx_unlock(&dtrace_lock
); 
17568                 lck_mtx_unlock(&dtrace_lock
); 
17572         case DTRACEIOC_MODUUIDSLIST
: { 
17573                 size_t module_uuids_list_size
; 
17574                 dtrace_module_uuids_list_t
* uuids_list
; 
17575                 uint64_t dtmul_count
; 
17578                  * Security restrictions make this operation illegal, if this is enabled DTrace 
17579                  * must refuse to provide any fbt probes. 
17581                 if (dtrace_fbt_probes_restricted()) { 
17582                         cmn_err(CE_WARN
, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");       
17587                  * Fail if the kernel symbol mode makes this operation illegal. 
17588                  * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check 
17589                  * for them without holding the dtrace_lock. 
17591                 if (dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_NEVER 
|| 
17592                     dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL
) { 
17593                         cmn_err(CE_WARN
, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_MODUUIDSLIST", dtrace_kernel_symbol_mode
); 
17598                  * Read the number of symbolsdesc structs being passed in. 
17600                 if (copyin(arg 
+ offsetof(dtrace_module_uuids_list_t
, dtmul_count
), 
17602                            sizeof(dtmul_count
))) { 
17603                         cmn_err(CE_WARN
, "failed to copyin dtmul_count"); 
17608                  * Range check the count. More than 2k kexts is probably an error. 
17610                 if (dtmul_count 
> 2048) { 
17611                         cmn_err(CE_WARN
, "dtmul_count is not valid"); 
17616                  * For all queries, we return EINVAL when the user specified 
17617                  * count does not match the actual number of modules we find 
17620                  * If the user specified count is zero, then this serves as a 
17621                  * simple query to count the available modules in need of symbols. 
17626                 if (dtmul_count 
== 0) 
17628                         lck_mtx_lock(&mod_lock
); 
17629                         struct modctl
* ctl 
= dtrace_modctl_list
; 
17631                                 /* Update the private probes bit */ 
17632                                 if (dtrace_provide_private_probes
) 
17633                                         ctl
->mod_flags 
|= MODCTL_FBT_PROVIDE_PRIVATE_PROBES
; 
17635                                 ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
)); 
17636                                 if (!MOD_SYMBOLS_DONE(ctl
)) { 
17640                                 ctl 
= ctl
->mod_next
; 
17642                         lck_mtx_unlock(&mod_lock
); 
17644                         if (copyout(&dtmul_count
, arg
, sizeof (dtmul_count
)) != 0) 
17651                  * If we reach this point, then we have a request for full list data. 
17652                  * Allocate a correctly sized structure and copyin the data. 
17654                 module_uuids_list_size 
= DTRACE_MODULE_UUIDS_LIST_SIZE(dtmul_count
); 
17655                 if ((uuids_list 
= kmem_alloc(module_uuids_list_size
, KM_SLEEP
)) == NULL
)  
17658                 /* NOTE! We can no longer exit this method via return */ 
17659                 if (copyin(arg
, uuids_list
, module_uuids_list_size
) != 0) { 
17660                         cmn_err(CE_WARN
, "failed copyin of dtrace_module_uuids_list_t"); 
17662                         goto moduuidslist_cleanup
; 
17666                  * Check that the count didn't change between the first copyin and the second. 
17668                 if (uuids_list
->dtmul_count 
!= dtmul_count
) { 
17670                         goto moduuidslist_cleanup
; 
17674                  * Build the list of UUID's that need symbols 
17676                 lck_mtx_lock(&mod_lock
); 
17680                 struct modctl
* ctl 
= dtrace_modctl_list
; 
17682                         /* Update the private probes bit */ 
17683                         if (dtrace_provide_private_probes
) 
17684                                 ctl
->mod_flags 
|= MODCTL_FBT_PROVIDE_PRIVATE_PROBES
; 
17687                          * We assume that userspace symbols will be "better" than kernel level symbols, 
17688                          * as userspace can search for dSYM(s) and symbol'd binaries. Even if kernel syms 
17689                          * are available, add user syms if the module might use them. 
17691                         ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
)); 
17692                         if (!MOD_SYMBOLS_DONE(ctl
)) { 
17693                                 UUID
* uuid 
= &uuids_list
->dtmul_uuid
[dtmul_count
]; 
17694                                 if (dtmul_count
++ < uuids_list
->dtmul_count
) { 
17695                                         memcpy(uuid
, ctl
->mod_uuid
, sizeof(UUID
)); 
17698                         ctl 
= ctl
->mod_next
; 
17701                 lck_mtx_unlock(&mod_lock
); 
17703                 if (uuids_list
->dtmul_count 
< dtmul_count
) 
17706                 uuids_list
->dtmul_count 
= dtmul_count
; 
17709                  * Copyout the symbols list (or at least the count!) 
17711                 if (copyout(uuids_list
, arg
, module_uuids_list_size
) != 0) { 
17712                         cmn_err(CE_WARN
, "failed copyout of dtrace_symbolsdesc_list_t"); 
17716         moduuidslist_cleanup
: 
17718                  * If we had to allocate struct memory, free it. 
17720                 if (uuids_list 
!= NULL
) { 
17721                         kmem_free(uuids_list
, module_uuids_list_size
); 
17727         case DTRACEIOC_PROVMODSYMS
: { 
17728                 size_t module_symbols_size
; 
17729                 dtrace_module_symbols_t
* module_symbols
; 
17730                 uint64_t dtmodsyms_count
; 
17733                  * Security restrictions make this operation illegal, if this is enabled DTrace 
17734                  * must refuse to provide any fbt probes. 
17736                 if (dtrace_fbt_probes_restricted()) { 
17737                         cmn_err(CE_WARN
, "security restrictions disallow DTRACEIOC_MODUUIDSLIST");       
17742                  * Fail if the kernel symbol mode makes this operation illegal. 
17743                  * Both NEVER & ALWAYS_FROM_KERNEL are permanent states, it is legal to check 
17744                  * for them without holding the dtrace_lock. 
17746                 if (dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_NEVER 
|| 
17747                     dtrace_kernel_symbol_mode 
== DTRACE_KERNEL_SYMBOLS_ALWAYS_FROM_KERNEL
) { 
17748                         cmn_err(CE_WARN
, "dtrace_kernel_symbol_mode of %u disallows DTRACEIOC_PROVMODSYMS", dtrace_kernel_symbol_mode
); 
17753                  * Read the number of module symbols structs being passed in. 
17755                 if (copyin(arg 
+ offsetof(dtrace_module_symbols_t
, dtmodsyms_count
), 
17757                            sizeof(dtmodsyms_count
))) { 
17758                         cmn_err(CE_WARN
, "failed to copyin dtmodsyms_count"); 
17763                  * Range check the count. How much data can we pass around? 
17766                 if (dtmodsyms_count 
== 0 || (dtmodsyms_count 
> 100 * 1024)) { 
17767                         cmn_err(CE_WARN
, "dtmodsyms_count is not valid"); 
17772                  * Allocate a correctly sized structure and copyin the data. 
17774                 module_symbols_size 
= DTRACE_MODULE_SYMBOLS_SIZE(dtmodsyms_count
); 
17775                 if ((module_symbols 
= kmem_alloc(module_symbols_size
, KM_SLEEP
)) == NULL
)  
17780                 /* NOTE! We can no longer exit this method via return */ 
17781                 if (copyin(arg
, module_symbols
, module_symbols_size
) != 0) { 
17782                         cmn_err(CE_WARN
, "failed copyin of dtrace_module_symbols_t"); 
17784                         goto module_symbols_cleanup
; 
17788                  * Check that the count didn't change between the first copyin and the second. 
17790                 if (module_symbols
->dtmodsyms_count 
!= dtmodsyms_count
) { 
17792                         goto module_symbols_cleanup
; 
17796                  * Find the modctl to add symbols to. 
17798                 lck_mtx_lock(&dtrace_provider_lock
); 
17799                 lck_mtx_lock(&mod_lock
); 
17801                 struct modctl
* ctl 
= dtrace_modctl_list
; 
17803                         /* Update the private probes bit */ 
17804                         if (dtrace_provide_private_probes
) 
17805                                 ctl
->mod_flags 
|= MODCTL_FBT_PROVIDE_PRIVATE_PROBES
; 
17807                         ASSERT(!MOD_HAS_USERSPACE_SYMBOLS(ctl
)); 
17808                         if (MOD_HAS_UUID(ctl
) && !MOD_SYMBOLS_DONE(ctl
)) { 
17809                                 if (memcmp(module_symbols
->dtmodsyms_uuid
, ctl
->mod_uuid
, sizeof(UUID
)) == 0) { 
17811                                         ctl
->mod_user_symbols 
= module_symbols
; 
17815                         ctl 
= ctl
->mod_next
; 
17819                         dtrace_provider_t 
*prv
; 
17822                          * We're going to call each providers per-module provide operation 
17823                          * specifying only this module. 
17825                         for (prv 
= dtrace_provider
; prv 
!= NULL
; prv 
= prv
->dtpv_next
) 
17826                                 prv
->dtpv_pops
.dtps_provide_module(prv
->dtpv_arg
, ctl
);  
17829                          * We gave every provider a chance to provide with the user syms, go ahead and clear them 
17831                         ctl
->mod_user_symbols 
= NULL
; /* MUST reset this to clear HAS_USERSPACE_SYMBOLS */ 
17834                 lck_mtx_unlock(&mod_lock
); 
17835                 lck_mtx_unlock(&dtrace_provider_lock
); 
17837         module_symbols_cleanup
: 
17839                  * If we had to allocate struct memory, free it. 
17841                 if (module_symbols 
!= NULL
) { 
17842                         kmem_free(module_symbols
, module_symbols_size
); 
17848         case DTRACEIOC_PROCWAITFOR
: { 
17849                 dtrace_procdesc_t pdesc 
= { 
17854                 if ((rval 
= copyin(arg
, &pdesc
, sizeof(pdesc
))) != 0) 
17855                         goto proc_waitfor_error
; 
17857                 if ((rval 
= dtrace_proc_waitfor(&pdesc
)) != 0) 
17858                         goto proc_waitfor_error
; 
17860                 if ((rval 
= copyout(&pdesc
, arg
, sizeof(pdesc
))) != 0) 
17861                         goto proc_waitfor_error
; 
17865         proc_waitfor_error
: 
17866                 /* The process was suspended, revert this since the client will not do it. */ 
17867                 if (pdesc
.p_pid 
!= -1) { 
17868                         proc_t 
*proc 
= proc_find(pdesc
.p_pid
); 
17869                         if (proc 
!= PROC_NULL
) { 
17870                                 task_pidresume(proc
->task
); 
17886  * APPLE NOTE:  dtrace_detach not implemented 
17888 #if !defined(__APPLE__) 
17891 dtrace_detach(dev_info_t 
*dip
, ddi_detach_cmd_t cmd
) 
17893         dtrace_state_t 
*state
; 
17900                 return (DDI_SUCCESS
); 
17903                 return (DDI_FAILURE
); 
17906         lck_mtx_lock(&cpu_lock
); 
17907         lck_mtx_lock(&dtrace_provider_lock
); 
17908         lck_mtx_lock(&dtrace_lock
); 
17910         ASSERT(dtrace_opens 
== 0); 
17912         if (dtrace_helpers 
> 0) { 
17913                 lck_mtx_unlock(&dtrace_lock
); 
17914                 lck_mtx_unlock(&dtrace_provider_lock
); 
17915                 lck_mtx_unlock(&cpu_lock
); 
17916                 return (DDI_FAILURE
); 
17919         if (dtrace_unregister((dtrace_provider_id_t
)dtrace_provider
) != 0) { 
17920                 lck_mtx_unlock(&dtrace_lock
); 
17921                 lck_mtx_unlock(&dtrace_provider_lock
); 
17922                 lck_mtx_unlock(&cpu_lock
); 
17923                 return (DDI_FAILURE
); 
17926         dtrace_provider 
= NULL
; 
17928         if ((state 
= dtrace_anon_grab()) != NULL
) { 
17930                  * If there were ECBs on this state, the provider should 
17931                  * have not been allowed to detach; assert that there is 
17934                 ASSERT(state
->dts_necbs 
== 0); 
17935                 dtrace_state_destroy(state
); 
17938                  * If we're being detached with anonymous state, we need to 
17939                  * indicate to the kernel debugger that DTrace is now inactive. 
17941                 (void) kdi_dtrace_set(KDI_DTSET_DTRACE_DEACTIVATE
); 
17944         bzero(&dtrace_anon
, sizeof (dtrace_anon_t
)); 
17945         unregister_cpu_setup_func((cpu_setup_func_t 
*)dtrace_cpu_setup
, NULL
); 
17946         dtrace_cpu_init 
= NULL
; 
17947         dtrace_helpers_cleanup 
= NULL
; 
17948         dtrace_helpers_fork 
= NULL
; 
17949         dtrace_cpustart_init 
= NULL
; 
17950         dtrace_cpustart_fini 
= NULL
; 
17951         dtrace_debugger_init 
= NULL
; 
17952         dtrace_debugger_fini 
= NULL
; 
17953         dtrace_kreloc_init 
= NULL
; 
17954         dtrace_kreloc_fini 
= NULL
; 
17955         dtrace_modload 
= NULL
; 
17956         dtrace_modunload 
= NULL
; 
17958         lck_mtx_unlock(&cpu_lock
); 
17960         if (dtrace_helptrace_enabled
) { 
17961                 kmem_free(dtrace_helptrace_buffer
, dtrace_helptrace_bufsize
); 
17962                 dtrace_helptrace_buffer 
= NULL
; 
17965         kmem_free(dtrace_probes
, dtrace_nprobes 
* sizeof (dtrace_probe_t 
*)); 
17966         dtrace_probes 
= NULL
; 
17967         dtrace_nprobes 
= 0; 
17969         dtrace_hash_destroy(dtrace_bymod
); 
17970         dtrace_hash_destroy(dtrace_byfunc
); 
17971         dtrace_hash_destroy(dtrace_byname
); 
17972         dtrace_bymod 
= NULL
; 
17973         dtrace_byfunc 
= NULL
; 
17974         dtrace_byname 
= NULL
; 
17976         kmem_cache_destroy(dtrace_state_cache
); 
17977         vmem_destroy(dtrace_arena
); 
17979         if (dtrace_toxrange 
!= NULL
) { 
17980                 kmem_free(dtrace_toxrange
, 
17981                     dtrace_toxranges_max 
* sizeof (dtrace_toxrange_t
)); 
17982                 dtrace_toxrange 
= NULL
; 
17983                 dtrace_toxranges 
= 0; 
17984                 dtrace_toxranges_max 
= 0; 
17987         ddi_remove_minor_node(dtrace_devi
, NULL
); 
17988         dtrace_devi 
= NULL
; 
17990         ddi_soft_state_fini(&dtrace_softstate
); 
17992         ASSERT(dtrace_vtime_references 
== 0); 
17993         ASSERT(dtrace_opens 
== 0); 
17994         ASSERT(dtrace_retained 
== NULL
); 
17996         lck_mtx_unlock(&dtrace_lock
); 
17997         lck_mtx_unlock(&dtrace_provider_lock
); 
18000          * We don't destroy the task queue until after we have dropped our 
18001          * locks (taskq_destroy() may block on running tasks).  To prevent 
18002          * attempting to do work after we have effectively detached but before 
18003          * the task queue has been destroyed, all tasks dispatched via the 
18004          * task queue must check that DTrace is still attached before 
18005          * performing any operation. 
18007         taskq_destroy(dtrace_taskq
); 
18008         dtrace_taskq 
= NULL
; 
18010         return (DDI_SUCCESS
); 
18012 #endif  /* __APPLE__ */ 
18014 d_open_t _dtrace_open
, helper_open
; 
18015 d_close_t _dtrace_close
, helper_close
; 
18016 d_ioctl_t _dtrace_ioctl
, helper_ioctl
; 
18019 _dtrace_open(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
18022         dev_t locdev 
= dev
; 
18024         return  dtrace_open( &locdev
, flags
, devtype
, CRED()); 
18028 helper_open(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
18030 #pragma unused(dev,flags,devtype,p) 
18035 _dtrace_close(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
18038         return dtrace_close( dev
, flags
, devtype
, CRED()); 
18042 helper_close(dev_t dev
, int flags
, int devtype
, struct proc 
*p
) 
18044 #pragma unused(dev,flags,devtype,p) 
18049 _dtrace_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc 
*p
) 
18053     user_addr_t uaddrp
; 
18055     if (proc_is64bit(p
)) 
18056                 uaddrp 
= *(user_addr_t 
*)data
; 
18058                 uaddrp 
= (user_addr_t
) *(uint32_t *)data
; 
18060         err 
= dtrace_ioctl(dev
, cmd
, uaddrp
, fflag
, CRED(), &rv
); 
18062         /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ 
18064                 ASSERT( (err 
& 0xfffff000) == 0 ); 
18065                 return (err 
& 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ 
18066         } else if (rv 
!= 0) { 
18067                 ASSERT( (rv 
& 0xfff00000) == 0 ); 
18068                 return (((rv 
& 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ 
18074 helper_ioctl(dev_t dev
, u_long cmd
, caddr_t data
, int fflag
, struct proc 
*p
) 
18076 #pragma unused(dev,fflag,p) 
18079         err 
= dtrace_ioctl_helper(cmd
, data
, &rv
); 
18080         /* Darwin's BSD ioctls only return -1 or zero. Overload errno to mimic Solaris. 20 bits suffice. */ 
18082                 ASSERT( (err 
& 0xfffff000) == 0 ); 
18083                 return (err 
& 0xfff); /* ioctl will return -1 and will set errno to an error code < 4096 */ 
18084         } else if (rv 
!= 0) { 
18085                 ASSERT( (rv 
& 0xfff00000) == 0 ); 
18086                 return (((rv 
& 0xfffff) << 12)); /* ioctl will return -1 and will set errno to a value >= 4096 */ 
18091 #define HELPER_MAJOR  -24 /* let the kernel pick the device number */ 
18094  * A struct describing which functions will get invoked for certain 
18097 static struct cdevsw helper_cdevsw 
= 
18099         helper_open
,            /* open */ 
18100         helper_close
,           /* close */ 
18101         eno_rdwrt
,                      /* read */ 
18102         eno_rdwrt
,                      /* write */ 
18103         helper_ioctl
,           /* ioctl */ 
18104         (stop_fcn_t 
*)nulldev
, /* stop */ 
18105         (reset_fcn_t 
*)nulldev
, /* reset */ 
18107         eno_select
,                     /* select */ 
18108         eno_mmap
,                       /* mmap */ 
18109         eno_strat
,                      /* strategy */ 
18110         eno_getc
,                       /* getc */ 
18111         eno_putc
,                       /* putc */ 
18115 static int helper_majdevno 
= 0; 
18117 static int gDTraceInited 
= 0; 
18120 helper_init( void ) 
18123          * Once the "helper" is initialized, it can take ioctl calls that use locks 
18124          * and zones initialized in dtrace_init. Make certain dtrace_init was called 
18128         if (!gDTraceInited
) { 
18129                 panic("helper_init before dtrace_init\n"); 
18132         if (0 >= helper_majdevno
) 
18134                 helper_majdevno 
= cdevsw_add(HELPER_MAJOR
, &helper_cdevsw
); 
18136                 if (helper_majdevno 
< 0) { 
18137                         printf("helper_init: failed to allocate a major number!\n"); 
18141                 if (NULL 
== devfs_make_node( makedev(helper_majdevno
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,  
18142                                         DTRACEMNR_HELPER
, 0 )) { 
18143                         printf("dtrace_init: failed to devfs_make_node for helper!\n"); 
18147                 panic("helper_init: called twice!\n"); 
18150 #undef HELPER_MAJOR 
18153 dtrace_clone_func(dev_t dev
, int action
) 
18155 #pragma unused(dev) 
18157         if (action 
== DEVFS_CLONE_ALLOC
) { 
18158                 return dtrace_state_reserve(); 
18160         else if (action 
== DEVFS_CLONE_FREE
) { 
18166 void dtrace_ast(void); 
18172         uint32_t clients 
= atomic_and_32(&dtrace_wake_clients
, 0); 
18176          * We disable preemption here to be sure that we won't get 
18177          * interrupted by a wakeup to a thread that is higher 
18178          * priority than us, so that we do issue all wakeups 
18180         disable_preemption(); 
18181         for (i 
= 0; i 
< DTRACE_NCLIENTS
; i
++) { 
18182                 if (clients 
& (1 << i
)) { 
18183                         dtrace_state_t 
*state 
= dtrace_state_get(i
); 
18190         enable_preemption(); 
18194 #define DTRACE_MAJOR  -24 /* let the kernel pick the device number */ 
18196 static struct cdevsw dtrace_cdevsw 
= 
18198         _dtrace_open
,           /* open */ 
18199         _dtrace_close
,          /* close */ 
18200         eno_rdwrt
,                      /* read */ 
18201         eno_rdwrt
,                      /* write */ 
18202         _dtrace_ioctl
,          /* ioctl */ 
18203         (stop_fcn_t 
*)nulldev
, /* stop */ 
18204         (reset_fcn_t 
*)nulldev
, /* reset */ 
18206         eno_select
,                     /* select */ 
18207         eno_mmap
,                       /* mmap */ 
18208         eno_strat
,                      /* strategy */ 
18209         eno_getc
,                       /* getc */ 
18210         eno_putc
,                       /* putc */ 
18214 lck_attr_t
* dtrace_lck_attr
; 
18215 lck_grp_attr_t
* dtrace_lck_grp_attr
; 
18216 lck_grp_t
* dtrace_lck_grp
; 
18218 static int gMajDevNo
; 
18221 dtrace_init( void ) 
18223         if (0 == gDTraceInited
) { 
18225                 size_t size 
= sizeof(dtrace_buffer_memory_maxsize
); 
18228                  * DTrace allocates buffers based on the maximum number 
18229                  * of enabled cpus. This call avoids any race when finding 
18232                 ASSERT(dtrace_max_cpus 
== 0); 
18233                 ncpu 
= dtrace_max_cpus 
= ml_get_max_cpus(); 
18236                  * Retrieve the size of the physical memory in order to define 
18237                  * the state buffer memory maximal size.  If we cannot retrieve 
18238                  * this value, we'll consider that we have 1Gb of memory per CPU, that's 
18239                  * still better than raising a kernel panic. 
18241                 if (0 != kernel_sysctlbyname("hw.memsize", &dtrace_buffer_memory_maxsize
, 
18244                         dtrace_buffer_memory_maxsize 
= ncpu 
* 1024 * 1024 * 1024; 
18245                         printf("dtrace_init: failed to retrieve the hw.memsize, defaulted to %lld bytes\n", 
18246                                dtrace_buffer_memory_maxsize
); 
18250                  * Finally, divide by three to prevent DTrace from eating too 
18253                 dtrace_buffer_memory_maxsize 
/= 3; 
18254                 ASSERT(dtrace_buffer_memory_maxsize 
> 0); 
18256                 gMajDevNo 
= cdevsw_add(DTRACE_MAJOR
, &dtrace_cdevsw
); 
18258                 if (gMajDevNo 
< 0) { 
18259                         printf("dtrace_init: failed to allocate a major number!\n"); 
18264                 if (NULL 
== devfs_make_node_clone( makedev(gMajDevNo
, 0), DEVFS_CHAR
, UID_ROOT
, GID_WHEEL
, 0666,  
18265                                         dtrace_clone_func
, DTRACEMNR_DTRACE
, 0 )) { 
18266                         printf("dtrace_init: failed to devfs_make_node_clone for dtrace!\n"); 
18271 #if defined(DTRACE_MEMORY_ZONES) 
18273                  * Initialize the dtrace kalloc-emulation zones. 
18275                 dtrace_alloc_init(); 
18276 #endif /* DTRACE_MEMORY_ZONES */ 
18279                  * Allocate the dtrace_probe_t zone 
18281                 dtrace_probe_t_zone 
= zinit(sizeof(dtrace_probe_t
), 
18282                                             1024 * sizeof(dtrace_probe_t
), 
18283                                             sizeof(dtrace_probe_t
), 
18284                                             "dtrace.dtrace_probe_t"); 
18287                  * Create the dtrace lock group and attrs. 
18289                 dtrace_lck_attr 
= lck_attr_alloc_init(); 
18290                 dtrace_lck_grp_attr
= lck_grp_attr_alloc_init();          
18291                 dtrace_lck_grp 
= lck_grp_alloc_init("dtrace",  dtrace_lck_grp_attr
); 
18294                  * We have to initialize all locks explicitly 
18296                 lck_mtx_init(&dtrace_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18297                 lck_mtx_init(&dtrace_provider_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18298                 lck_mtx_init(&dtrace_meta_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18299                 lck_mtx_init(&dtrace_procwaitfor_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18301                 lck_mtx_init(&dtrace_errlock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18303                 lck_rw_init(&dtrace_dof_mode_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18306                  * The cpu_core structure consists of per-CPU state available in any context. 
18307                  * On some architectures, this may mean that the page(s) containing the 
18308                  * NCPU-sized array of cpu_core structures must be locked in the TLB -- it 
18309                  * is up to the platform to assure that this is performed properly.  Note that 
18310                  * the structure is sized to avoid false sharing. 
18312                 lck_mtx_init(&cpu_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18313                 lck_mtx_init(&cyc_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18314                 lck_mtx_init(&mod_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18317                  * Initialize the CPU offline/online hooks. 
18319                 dtrace_install_cpu_hooks(); 
18321                 dtrace_modctl_list 
= NULL
; 
18323                 cpu_core 
= (cpu_core_t 
*)kmem_zalloc( ncpu 
* sizeof(cpu_core_t
), KM_SLEEP 
); 
18324                 for (i 
= 0; i 
< ncpu
; ++i
) { 
18325                         lck_mtx_init(&cpu_core
[i
].cpuc_pid_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18328                 cpu_list 
= (dtrace_cpu_t 
*)kmem_zalloc( ncpu 
* sizeof(dtrace_cpu_t
), KM_SLEEP 
); 
18329                 for (i 
= 0; i 
< ncpu
; ++i
) { 
18330                         cpu_list
[i
].cpu_id 
= (processorid_t
)i
; 
18331                         cpu_list
[i
].cpu_next 
= &(cpu_list
[(i
+1) % ncpu
]); 
18332                         LIST_INIT(&cpu_list
[i
].cpu_cyc_list
); 
18333                         lck_rw_init(&cpu_list
[i
].cpu_ft_lock
, dtrace_lck_grp
, dtrace_lck_attr
); 
18336                 lck_mtx_lock(&cpu_lock
); 
18337                 for (i 
= 0; i 
< ncpu
; ++i
)  
18338                         /* FIXME: track CPU configuration */ 
18339                         dtrace_cpu_setup_initial( (processorid_t
)i 
); /* In lieu of register_cpu_setup_func() callback */ 
18340                 lck_mtx_unlock(&cpu_lock
); 
18342                 (void)dtrace_abs_to_nano(0LL); /* Force once only call to clock_timebase_info (which can take a lock) */ 
18346                  * See dtrace_impl.h for a description of dof modes. 
18347                  * The default is lazy dof. 
18349                  * FIXME: Warn if state is LAZY_OFF? It won't break anything, but 
18350                  * makes no sense... 
18352                 if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode
, sizeof (dtrace_dof_mode
))) { 
18353 #if CONFIG_EMBEDDED 
18354                         /* Disable DOF mode by default for performance reasons */ 
18355                         dtrace_dof_mode 
= DTRACE_DOF_MODE_NEVER
; 
18357                         dtrace_dof_mode 
= DTRACE_DOF_MODE_LAZY_ON
; 
18362                  * Sanity check of dof mode value. 
18364                 switch (dtrace_dof_mode
) { 
18365                         case DTRACE_DOF_MODE_NEVER
: 
18366                         case DTRACE_DOF_MODE_LAZY_ON
: 
18367                                 /* valid modes, but nothing else we need to do */ 
18370                         case DTRACE_DOF_MODE_LAZY_OFF
: 
18371                         case DTRACE_DOF_MODE_NON_LAZY
: 
18372                                 /* Cannot wait for a dtrace_open to init fasttrap */ 
18377                                 /* Invalid, clamp to non lazy */ 
18378                                 dtrace_dof_mode 
= DTRACE_DOF_MODE_NON_LAZY
; 
18384                  * See dtrace_impl.h for a description of kernel symbol modes. 
18385                  * The default is to wait for symbols from userspace (lazy symbols). 
18387                 if (!PE_parse_boot_argn("dtrace_kernel_symbol_mode", &dtrace_kernel_symbol_mode
, sizeof (dtrace_kernel_symbol_mode
))) { 
18388                         dtrace_kernel_symbol_mode 
= DTRACE_KERNEL_SYMBOLS_FROM_USERSPACE
; 
18391                 dtrace_restriction_policy_load(); 
18396                 panic("dtrace_init: called twice!\n"); 
18400 dtrace_postinit(void) 
18403          * Called from bsd_init after all provider's *_init() routines have been 
18404          * run. That way, anonymous DOF enabled under dtrace_attach() is safe 
18407         dtrace_attach( (dev_info_t 
*)(uintptr_t)makedev(gMajDevNo
, 0), 0 ); /* Punning a dev_t to a dev_info_t* */ 
18410          * Add the mach_kernel to the module list for lazy processing 
18412         struct kmod_info fake_kernel_kmod
; 
18413         memset(&fake_kernel_kmod
, 0, sizeof(fake_kernel_kmod
)); 
18415         strlcpy(fake_kernel_kmod
.name
, "mach_kernel", sizeof(fake_kernel_kmod
.name
)); 
18416         fake_kernel_kmod
.id 
= 1; 
18417         fake_kernel_kmod
.address 
= g_kernel_kmod_info
.address
; 
18418         fake_kernel_kmod
.size 
= g_kernel_kmod_info
.size
; 
18420         if (dtrace_module_loaded(&fake_kernel_kmod
, 0) != 0) { 
18421                 printf("dtrace_postinit: Could not register mach_kernel modctl\n"); 
18424         if (!PE_parse_boot_argn("dtrace_provide_private_probes", &dtrace_provide_private_probes
, sizeof (dtrace_provide_private_probes
))) { 
18425                         dtrace_provide_private_probes 
= 0; 
18428         (void)OSKextRegisterKextsWithDTrace(); 
18430 #undef DTRACE_MAJOR 
18433  * Routines used to register interest in cpu's being added to or removed 
18437 register_cpu_setup_func(cpu_setup_func_t 
*ignore1
, void *ignore2
) 
18439 #pragma unused(ignore1,ignore2) 
18443 unregister_cpu_setup_func(cpu_setup_func_t 
*ignore1
, void *ignore2
) 
18445 #pragma unused(ignore1,ignore2)